mapreduce 自定义数据类型的简单的应用

Posted

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了mapreduce 自定义数据类型的简单的应用相关的知识,希望对你有一定的参考价值。

本文以手机流量统计为例:

日志中包含下面字段

技术分享

现在需要统计手机的上行数据包,下行数据包,上行总流量,下行总流量。

分析:可以以手机号为key 以上4个字段为value传传递数据。

这样则需要自己定义一个数据类型,用于封装要统计的4个字段,在map 与reduce之间传递和shuffle

注:作为key的自定义类型需要实现WritableComparable 里面的compareTo方法

     作为value的自定义类 则只需实现Writable里面的方法

自定义代码如下:

/***
 * MapReduce Module
 * 
 * @author nele
 * 
 */
public class MobileDataMapReduce extends Configured implements Tool {

    // map class
    /**
     * 
     * @author nele
     * 
     */
    public static class MobileDataMapper extends
            Mapper<LongWritable, Text, Text, MobileDataWritable> {

        public Text outPutKey = new Text();
        public MobileDataWritable outPutValue = new MobileDataWritable();

        @Override
        public void map(LongWritable key, Text value, Context context)
                throws IOException, InterruptedException {
            System.out.println(key+":"+value);
            String[] arr = value.toString().split("\t");
            outPutKey.set(arr[1]);
            outPutValue.set(Long.valueOf(arr[6]), Long.valueOf(arr[7]),
                    Long.valueOf(arr[8]), Long.valueOf(arr[9]));
            context.write(outPutKey, outPutValue);
        }

    }

    // reduce class
    /***
     * 
     * @author nele
     * 
     */
    public static class MobileDataReducer extends
            Reducer<Text, MobileDataWritable, Text, MobileDataWritable> {

        private Text outPutKey = new Text();
        private MobileDataWritable outPutValue = new MobileDataWritable();

        @Override
        public void reduce(Text key, Iterable<MobileDataWritable> values,
                Context context) throws IOException, InterruptedException {
            long upPackNum = 0;
            long downPackNum = 0;
            long upPayLoad = 0;
            long downPayLoad = 0;
            for (MobileDataWritable val : values) {
                upPackNum += val.getUpPackNum();
                downPackNum += val.getDownPackNum();
                upPayLoad += val.getUpPayLoad();
                downPayLoad += val.getDownPayLoad();
            }
           outPutKey.set(key);
           outPutValue.set(upPackNum, downPackNum, upPayLoad, downPayLoad);
           context.write(outPutKey, outPutValue);
        }
    }

    // run method
    public int run(String[] args) throws Exception {
        Configuration conf = super.getConf();

        // create job
        Job job = Job.getInstance(conf, this.getClass().getSimpleName());
        job.setJarByClass(this.getClass());

        // set input path
        Path inPath = new Path(args[0]);
        FileInputFormat.addInputPath(job, inPath);

        // map
        job.setMapperClass(MobileDataMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(MobileDataWritable.class);

        // conbile
        job.setCombinerClass(MobileDataReducer.class);

        // reduce
        job.setReducerClass(MobileDataReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(MobileDataWritable.class);

        // output
        Path outPath = new Path(args[1]);
        FileOutputFormat.setOutputPath(job, outPath);

        // submit
        return job.waitForCompletion(true) ? 0 : 1;
    }

    public static void main(String[] args) throws Exception {
         args = new String[] {
         "hdfs://bigdata5:8020/user/nele/data/input/HTTP_20130313143750.data",
         "hdfs://bigdata5:8020/user/nele/data/output/output6" };

        Configuration conf = new Configuration();

        int status = ToolRunner.run(conf, new MobileDataMapReduce(), args);

        System.exit(status);
    }

}

 

现在就可以使用自定义的类型进行手机流量的统计 代码如下:

/***
 * MapReduce Module
 * 
 * @author nele
 * 
 */
public class MobileDataMapReduce extends Configured implements Tool {

    // map class
    /**
     * 
     * @author nele
     * 
     */
    public static class MobileDataMapper extends
            Mapper<LongWritable, Text, Text, MobileDataWritable> {

        public Text outPutKey = new Text();
        public MobileDataWritable outPutValue = new MobileDataWritable();

        @Override
        public void map(LongWritable key, Text value, Context context)
                throws IOException, InterruptedException {
            System.out.println(key+":"+value);
            String[] arr = value.toString().split("\t");
            outPutKey.set(arr[1]);
            outPutValue.set(Long.valueOf(arr[6]), Long.valueOf(arr[7]),
                    Long.valueOf(arr[8]), Long.valueOf(arr[9]));
            context.write(outPutKey, outPutValue);
        }

    }

    // reduce class
    /***
     * 
     * @author nele
     * 
     */
    public static class MobileDataReducer extends
            Reducer<Text, MobileDataWritable, Text, MobileDataWritable> {

        private Text outPutKey = new Text();
        private MobileDataWritable outPutValue = new MobileDataWritable();

        @Override
        public void reduce(Text key, Iterable<MobileDataWritable> values,
                Context context) throws IOException, InterruptedException {
            long upPackNum = 0;
            long downPackNum = 0;
            long upPayLoad = 0;
            long downPayLoad = 0;
            for (MobileDataWritable val : values) {
                upPackNum += val.getUpPackNum();
                downPackNum += val.getDownPackNum();
                upPayLoad += val.getUpPayLoad();
                downPayLoad += val.getDownPayLoad();
            }
           outPutKey.set(key);
           outPutValue.set(upPackNum, downPackNum, upPayLoad, downPayLoad);
           context.write(outPutKey, outPutValue);
        }
    }

    // run method
    public int run(String[] args) throws Exception {
        Configuration conf = super.getConf();

        // create job
        Job job = Job.getInstance(conf, this.getClass().getSimpleName());
        job.setJarByClass(this.getClass());

        // set input path
        Path inPath = new Path(args[0]);
        FileInputFormat.addInputPath(job, inPath);

        // map
        job.setMapperClass(MobileDataMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(MobileDataWritable.class);

        // conbile
        job.setCombinerClass(MobileDataReducer.class);

        // reduce
        job.setReducerClass(MobileDataReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(MobileDataWritable.class);

        // output
        Path outPath = new Path(args[1]);
        FileOutputFormat.setOutputPath(job, outPath);

        // submit
        return job.waitForCompletion(true) ? 0 : 1;
    }

    public static void main(String[] args) throws Exception {
         args = new String[] {
         "hdfs://bigdata5:8020/user/nele/data/input/HTTP_20130313143750.data",
         "hdfs://bigdata5:8020/user/nele/data/output/output6" };

        Configuration conf = new Configuration();

        int status = ToolRunner.run(conf, new MobileDataMapReduce(), args);

        System.exit(status);
    }

}

 

这样就可以统计 给出的数据日志中的手机各种流量的数据

以上是关于mapreduce 自定义数据类型的简单的应用的主要内容,如果未能解决你的问题,请参考以下文章

mapreduce 的二次排序

自定义MapReduce的类型

MapReduce之自定义InputFormat

MapReduce分析流量汇总

与JAVA类型相比较,MapReduce中定义的数据类型都有哪些特点?

Hadoop学习之路MapReduce自定义排序