2018-07-28期 MapReduce实现对数字排序
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了2018-07-28期 MapReduce实现对数字排序相关的知识,希望对你有一定的参考价值。
package cn.sjq.mr.sort.number;
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.junit.Test;
/**
* 本测试验证统一在NumberSort类中实现
* @author songjq
*
*/
public class NumberSort {
/**
* MapReduce对数字进行排序
* MapReduce的排序规则是发生在K2上,也就说MapReduce只会对K2进行排序
* 如果K2经过Reduce处理了,会将重复的数字去重,这里不经过Reduce处理
* @author songjq
*
*/
public static class NumberSortNoReducerMapper extends Mapper<LongWritable, Text, LongWritable, NullWritable> {
@Override
protected void map(LongWritable k1, Text v1, Context context) throws IOException, InterruptedException {
/*
* 数据格式
* 89
12
32
34
8
32
21
98
78
12
34
21
34
32
*/
String line = v1.toString();
long number = Long.parseLong(line);
context.write(new LongWritable(number), NullWritable.get());
}
}
/**
* 只有Mapper类,无Reducer提交job
* NumberSortNoReducerMapperJob
* @author songjq
* 执行结果:
* 执行结果:
8
12
12
21
21
32
32
32
34
34
34
78
89
98
* 从结果可以看出,通过MR处理后,对数字进行了升序排序,且没有去重
* @throws Exception
*/
@Test
public void NumberSortNoReducerMapperJob() throws Exception {
Job job = Job.getInstance(new Configuration());
job.setJarByClass(NumberSort.class);
job.setMapperClass(NumberSortNoReducerMapper.class);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(NullWritable.class);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(NullWritable.class);
// 默认采用MR比较器,默认比较器是按K2进行升序排序
// job.setSortComparatorClass(cls);
FileInputFormat.setInputPaths(job, new Path("D:\test\tmp\sort\numbers.data"));
FileOutputFormat.setOutputPath(job, new Path("D:\test\tmp\sort\out1"));
job.waitForCompletion(true);
}
/**
* MapReduce对数字进行排序
* MapReduce的排序规则是发生在K2上,也就说MapReduce只会对K2进行排序
* 这里我们编写对于的Reducer程序,观察排序是否去重
* @author songjq
*
*/
public static class NumberSortMapper extends Mapper<LongWritable, Text, LongWritable, NullWritable> {
@Override
protected void map(LongWritable k1, Text v1, Context context) throws IOException, InterruptedException {
/*
* 数据格式
* 89
12
32
34
8
32
21
98
78
12
34
21
34
32
*/
String line = v1.toString();
long number = Long.parseLong(line);
context.write(new LongWritable(number), NullWritable.get());
}
}
/**
* NumberSortMapper对于Reducer
* @author songjq
*
*/
public static class NumberSortReducer extends Reducer<LongWritable, NullWritable, LongWritable, NullWritable> {
@Override
protected void reduce(LongWritable k3, Iterable<NullWritable> v3s, Context ctx)
throws IOException, InterruptedException {
ctx.write(k3, NullWritable.get());
}
}
/**
* 自定义比较器Comparator
* 这里继承LongWritable.Comparator,并对其compare方法进行重写
* @author songjq
*
*/
public static class MyLongWritableComparator extends LongWritable.Comparator{
//这里直接在super.compare(b1, s1, l1, b2, s2, l2)前加-即可实现降序排序
@Override
public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
return - super.compare(b1, s1, l1, b2, s2, l2);
}
}
/**
* Mapper类有对于的Reducer类
* NumberSortHasReducerJob
* 执行结果:
* 8
12
21
32
34
78
89
98
如上通过Reduce可以进行去重处理
* @author songjq
*/
@Test
public void NumberSortHasReducerJob() throws Exception {
Job job = Job.getInstance(new Configuration());
job.setJarByClass(NumberSort.class);
job.setMapperClass(NumberSortMapper.class);
job.setReducerClass(NumberSortReducer.class);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(NullWritable.class);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(NullWritable.class);
// 默认采用MR比较器,默认比较器是按K2进行升序排序
// job.setSortComparatorClass(cls);
FileInputFormat.setInputPaths(job, new Path("D:\test\tmp\sort\numbers.data"));
FileOutputFormat.setOutputPath(job, new Path("D:\test\tmp\sort\out2"));
job.waitForCompletion(true);
}
/**
* 使用自定义比较器MyLongWritableComparator实现降序排序
* 执行结果:
* 98
89
78
34
32
21
12
8
* @throws Exception
*/
@Test
public void NumberSortUseMyComparatorJob() throws Exception {
Job job = Job.getInstance(new Configuration());
job.setJarByClass(NumberSort.class);
job.setMapperClass(NumberSortMapper.class);
job.setReducerClass(NumberSortReducer.class);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(NullWritable.class);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(NullWritable.class);
// 默认采用MR比较器,默认比较器是按K2进行升序排序,这里使用自定义MyLongWritableComparator比较器实现降序
job.setSortComparatorClass(MyLongWritableComparator.class);
FileInputFormat.setInputPaths(job, new Path("D:\test\tmp\sort\numbers.data"));
FileOutputFormat.setOutputPath(job, new Path("D:\test\tmp\sort\out3"));
job.waitForCompletion(true);
}
}
以上是关于2018-07-28期 MapReduce实现对数字排序的主要内容,如果未能解决你的问题,请参考以下文章
2018-08-01 期 MapReduce实现多表查询等值连接
2018-08-02 期 MapReduce实现多表查询自连接
2018-07-29期 MapReduce实现对字符串进行排序
2018-08-05 期 MapReduce实现每个单词在每个文件中坐标信息统计