2018-07-29期 MapReduce实现对字符串进行排序

Posted

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了2018-07-29期 MapReduce实现对字符串进行排序相关的知识,希望对你有一定的参考价值。

package cn.sjq.mr.sort.number;

import java.io.IOException;

import org.apache.commons.lang.StringUtils;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.NullWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.junit.Test;

/**

* MapReduce字符串排序,字符串排序是按照数据字典顺序进行排序

* 这里所有的类采用匿名内部类实现

* @author songjq

*

*/

public class StringSort {

/**

* 对字符串进行排序Mapper类

* @author songjq

*

*/

static class StringSortMapper extends Mapper<LongWritable, Text, Text, NullWritable> {

private Text tkey = new Text();

@Override

protected void map(LongWritable k1, Text v1, Context context)

throws IOException, InterruptedException {

String line = v1.toString();

//分词 格式:Wait Events Statistics

String[] split = StringUtils.split(line," ");

for(int i=0;i<split.length;i++) {

tkey.set(split[i]);

context.write(tkey, NullWritable.get());

}

}

}

/**

* MapReduce提交job类

* 这里采用MapReduce默认的比较器进行字符串的升序排序

* 执行结果:

* Activity

Advisory

Buffer

Cache

Cache

Dictionary

Events

IO

Instance

...

* @throws Exception

*/

@Test

public void StringSortJob() throws Exception {

Job job = Job.getInstance(new Configuration());

job.setJarByClass(StringSort.class);

job.setMapperClass(StringSortMapper.class);

job.setMapOutputKeyClass(Text.class);

job.setMapOutputValueClass(NullWritable.class);

job.setOutputKeyClass(Text.class);

job.setOutputValueClass(NullWritable.class);

//采用MapReduce默认排序规则

//job.setSortComparatorClass(cls);

FileInputFormat.setInputPaths(job, new Path("D:\test\tmp\sort\Strings.data"));

FileOutputFormat.setOutputPath(job, new Path("D:\test\tmp\sort\out5"));

job.waitForCompletion(true);

}

/**

* 自定义StringSortMyComparator比较器,继承Text.Comparator重写compare方法,实现对字符串降序排序

* @author songjq

*

*/

static class StringSortMyComparator extends Text.Comparator{

//只需要在super前面加 - 即可实现降序排序

@Override

public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {

return -  super.compare(b1, s1, l1, b2, s2, l2);

}

}

/**

* MapReduce提交job类

* 这里采用自定义比较器StringSortMyComparator实现对字符串的降序排序

* 执行结果:

* Latch

Instance

IO

Events

Dictionary

Cache

Cache

Buffer

Advisory

Activity

...

* @throws Exception

*/

@Test

public void StringSortUseMyComparatorJob() throws Exception {

Job job = Job.getInstance(new Configuration());

job.setJarByClass(StringSort.class);

job.setMapperClass(StringSortMapper.class);

job.setMapOutputKeyClass(Text.class);

job.setMapOutputValueClass(NullWritable.class);

job.setOutputKeyClass(Text.class);

job.setOutputValueClass(NullWritable.class);

//采用自定义比较器实现对字符串降序排序

job.setSortComparatorClass(StringSortMyComparator.class);

FileInputFormat.setInputPaths(job, new Path("D:\test\tmp\sort\Strings.data"));

FileOutputFormat.setOutputPath(job, new Path("D:\test\tmp\sort\out6"));

job.waitForCompletion(true);

}

}


以上是关于2018-07-29期 MapReduce实现对字符串进行排序的主要内容,如果未能解决你的问题,请参考以下文章

2018-08-07 期 MapReduce模拟实现热销商品排行

2018-08-01 期 MapReduce实现多表查询等值连接

2018-08-02 期 MapReduce实现多表查询自连接

2018-08-05 期 MapReduce实现每个单词在每个文件中坐标信息统计

2018-08-10期 MapReduce实现双色球近10年每个号码中奖次数统计

2018-08-09期 MapReduce实现对单个用户支付金额最大的前N个商品排名