2018-07-28期 MapReduce实现对数字排序

Posted

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了2018-07-28期 MapReduce实现对数字排序相关的知识,希望对你有一定的参考价值。

package cn.sjq.mr.sort.number;

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.NullWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.junit.Test;

/**

* 本测试验证统一在NumberSort类中实现

* @author songjq

*

*/

public class NumberSort {

/**

* MapReduce对数字进行排序

* MapReduce的排序规则是发生在K2上,也就说MapReduce只会对K2进行排序

* 如果K2经过Reduce处理了,会将重复的数字去重,这里不经过Reduce处理

* @author songjq

*

*/

public static class NumberSortNoReducerMapper extends Mapper<LongWritable, Text, LongWritable, NullWritable> {

@Override

protected void map(LongWritable k1, Text v1, Context context) throws IOException, InterruptedException {

/*

* 数据格式

*  89

12

32

34

8

32

21

98

78

12

34

21

34

32

*/

String line = v1.toString();

long number = Long.parseLong(line);

context.write(new LongWritable(number), NullWritable.get());

}

}

/**

* 只有Mapper类,无Reducer提交job

*  NumberSortNoReducerMapperJob

* @author songjq

* 执行结果:

* 执行结果:

  8

12

12

21

21

32

32

32

34

34

34

78

89

98

* 从结果可以看出,通过MR处理后,对数字进行了升序排序,且没有去重

* @throws Exception

*/

@Test

public void NumberSortNoReducerMapperJob() throws Exception {

Job job = Job.getInstance(new Configuration());

job.setJarByClass(NumberSort.class);

job.setMapperClass(NumberSortNoReducerMapper.class);

job.setMapOutputKeyClass(LongWritable.class);

job.setMapOutputValueClass(NullWritable.class);

job.setOutputKeyClass(LongWritable.class);

job.setOutputValueClass(NullWritable.class);

// 默认采用MR比较器,默认比较器是按K2进行升序排序

// job.setSortComparatorClass(cls);

FileInputFormat.setInputPaths(job, new Path("D:\test\tmp\sort\numbers.data"));

FileOutputFormat.setOutputPath(job, new Path("D:\test\tmp\sort\out1"));

job.waitForCompletion(true);

}

/**

* MapReduce对数字进行排序

* MapReduce的排序规则是发生在K2上,也就说MapReduce只会对K2进行排序

* 这里我们编写对于的Reducer程序,观察排序是否去重

* @author songjq

*

*/

public static class NumberSortMapper extends Mapper<LongWritable, Text, LongWritable, NullWritable> {

@Override

protected void map(LongWritable k1, Text v1, Context context) throws IOException, InterruptedException {

/*

* 数据格式

*  89

12

32

34

8

32

21

98

78

12

34

21

34

32

*/

String line = v1.toString();

long number = Long.parseLong(line);

context.write(new LongWritable(number), NullWritable.get());

}

}

/**

* NumberSortMapper对于Reducer

* @author songjq

*

*/

public static class NumberSortReducer extends Reducer<LongWritable, NullWritable, LongWritable, NullWritable> {

@Override

protected void reduce(LongWritable k3, Iterable<NullWritable> v3s, Context ctx)

throws IOException, InterruptedException {

ctx.write(k3, NullWritable.get());

}

}

/**

* 自定义比较器Comparator

* 这里继承LongWritable.Comparator,并对其compare方法进行重写

* @author songjq

*

*/

public static class MyLongWritableComparator extends LongWritable.Comparator{

//这里直接在super.compare(b1, s1, l1, b2, s2, l2)前加-即可实现降序排序

@Override

public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {

return - super.compare(b1, s1, l1, b2, s2, l2);

}

}

/**

* Mapper类有对于的Reducer类

*  NumberSortHasReducerJob

*  执行结果:

*  8

12

21

32

34

78

89

98

如上通过Reduce可以进行去重处理

* @author songjq

*/

@Test

public void NumberSortHasReducerJob() throws Exception {

Job job = Job.getInstance(new Configuration());

job.setJarByClass(NumberSort.class);

job.setMapperClass(NumberSortMapper.class);

job.setReducerClass(NumberSortReducer.class);

job.setMapOutputKeyClass(LongWritable.class);

job.setMapOutputValueClass(NullWritable.class);

job.setOutputKeyClass(LongWritable.class);

job.setOutputValueClass(NullWritable.class);

// 默认采用MR比较器,默认比较器是按K2进行升序排序

// job.setSortComparatorClass(cls);

FileInputFormat.setInputPaths(job, new Path("D:\test\tmp\sort\numbers.data"));

FileOutputFormat.setOutputPath(job, new Path("D:\test\tmp\sort\out2"));

job.waitForCompletion(true);

}

/**

* 使用自定义比较器MyLongWritableComparator实现降序排序

* 执行结果:

*  98

89

78

34

32

21

12

8

* @throws Exception

*/

@Test

public void NumberSortUseMyComparatorJob() throws Exception {

Job job = Job.getInstance(new Configuration());

job.setJarByClass(NumberSort.class);

job.setMapperClass(NumberSortMapper.class);

job.setReducerClass(NumberSortReducer.class);

job.setMapOutputKeyClass(LongWritable.class);

job.setMapOutputValueClass(NullWritable.class);

job.setOutputKeyClass(LongWritable.class);

job.setOutputValueClass(NullWritable.class);

// 默认采用MR比较器,默认比较器是按K2进行升序排序,这里使用自定义MyLongWritableComparator比较器实现降序

job.setSortComparatorClass(MyLongWritableComparator.class);

FileInputFormat.setInputPaths(job, new Path("D:\test\tmp\sort\numbers.data"));

FileOutputFormat.setOutputPath(job, new Path("D:\test\tmp\sort\out3"));

job.waitForCompletion(true);

}

}


以上是关于2018-07-28期 MapReduce实现对数字排序的主要内容,如果未能解决你的问题,请参考以下文章

2018-08-01 期 MapReduce实现多表查询等值连接

2018-08-02 期 MapReduce实现多表查询自连接

2018-07-29期 MapReduce实现对字符串进行排序

2018-08-05 期 MapReduce实现每个单词在每个文件中坐标信息统计

2018-08-10期 MapReduce实现双色球近10年每个号码中奖次数统计

2018-08-09期 MapReduce实现对单个用户支付金额最大的前N个商品排名