MR作业编程案例-流量统计

Posted

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了MR作业编程案例-流量统计相关的知识,希望对你有一定的参考价值。

流量统计(统计每个用户的上行流量和下行流量及其流量总和)


源数据:

1363157985066     13726230503    00-FD-07-A4-72-B8:CMCC    120.196.100.82    i02.c.aliimg.com        24    27    2481    24681    200

1363157995052     13826544101    5C-0E-8B-C7-F1-E0:CMCC    120.197.40.4            4    0    264    0    200

1363157991076     13926435656    20-10-7A-28-CC-0A:CMCC    120.196.100.99            2    4    132    1512    200

1363154400022     13926251106    5C-0E-8B-8B-B1-50:CMCC    120.197.40.4            4    0    240    0    200

1363157993044     18211575961    94-71-AC-CD-E6-18:CMCC-EASY    120.196.100.99    iface.qiyi.com    视频网站    15    12    1527    2106    200

1363157995074     84138413    5C-0E-8B-8C-E8-20:7DaysInn    120.197.40.4    122.72.52.12        20    16    4116    1432    200

1363157993055     13560439658    C4-17-FE-BA-DE-D9:CMCC    120.196.100.99            18    15    1116    954    200

1363157995033     15920133257    5C-0E-8B-C7-BA-20:CMCC    120.197.40.4    sug.so.360.cn    信息安全    20    20    3156    2936    200

1363157983019     13719199419    68-A1-B7-03-07-B1:CMCC-EASY    120.196.100.82            4    0    240    0    200

1363157984041     13660577991    5C-0E-8B-92-5C-20:CMCC-EASY    120.197.40.4    s19.cnzz.com    站点统计    24    9    6960    690    200

1363157973098     15013685858    5C-0E-8B-C7-F7-90:CMCC    120.197.40.4    rank.ie.sogou.com    搜索引擎    28    27    3659    3538    200

1363157986029     15989002119    E8-99-C4-4E-93-E0:CMCC-EASY    120.196.100.99    www.umeng.com    站点统计    3    3    1938    180    200

1363157992093     13560439658    C4-17-FE-BA-DE-D9:CMCC    120.196.100.99            15    9    918    4938    200

1363157986041     13480253104    5C-0E-8B-C7-FC-80:CMCC-EASY    120.197.40.4            3    3    180    180    200

1363157984040     13602846565    5C-0E-8B-8B-B6-00:CMCC    120.197.40.4    2052.flash2-http.qq.com    综合门户    15    12    1938    2910    200

1363157995093     13922314466    00-FD-07-A2-EC-BA:CMCC    120.196.100.82    img.qfc.cn        12    12    3008    3720    200

1363157982040     13502468823    5C-0A-5B-6A-0B-D4:CMCC-EASY    120.196.100.99    y0.ifengimg.com    综合门户    57    102    7335    110349    200

1363157986072     18320173382    84-25-DB-4F-10-1A:CMCC-EASY    120.196.100.99    input.shouji.sogou.com    搜索引擎    21    18    9531    2412    200

1363157990043     13925057413    00-1F-64-E1-E6-9A:CMCC    120.196.100.55    t3.baidu.com    搜索引擎    69    63    11058    48243    200

1363157988072     13760778710    00-FD-07-A4-7B-08:CMCC    120.196.100.82            2    2    120    120    200

1363157985066     13726238888    00-FD-07-A4-72-B8:CMCC    120.196.100.82    i02.c.aliimg.com        24    27    2481    24681    200

1363157993055     13560436666    C4-17-FE-BA-DE-D9:CMCC    120.196.100.99            18    15    1116    954    200

1、第一次作业:

①封装FlowBean

package com.it18zhang.flowdemo;

import java.io.DataInput;

import java.io.DataOutput;

import java.io.IOException;

import org.apache.hadoop.io.WritableComparable;

public class FlowBean implements WritableComparable<FlowBean> {

         private long upFlow;

         private long downFlow;

         private long sumFlow;

         public FlowBean() {

         }

         public FlowBean(long upFlow, long downFlow) {

                  this.upFlow = upFlow;

                  this.downFlow = downFlow;

                  this.sumFlow = this.upFlow + this.downFlow;

         }

         public long getUpFlow() {

                  return upFlow;

         }

         public void setUpFlow(long upFlow) {

                  this.upFlow = upFlow;

         }

         public long getDownFlow() {

                  return downFlow;

         }

         public void setDownFlow(long downFlow) {

                  this.downFlow = downFlow;

         }

         public long getSumFlow() {

                  return sumFlow;

         }

         @Override

         public String toString() {

                  return upFlow + "\t" + downFlow + "\t" + sumFlow;

         }

         public void write(DataOutput out) throws IOException {

                  out.writeLong(upFlow);

                  out.writeLong(downFlow);

                  out.writeLong(sumFlow);

         }

         public void readFields(DataInput in) throws IOException {

                  upFlow = in.readLong();

                  downFlow = in.readLong();

                  sumFlow = in.readLong();

         }

         public int compareTo(FlowBean o) {

                  return this.sumFlow - o.getSumFlow() > 0 ? -1 : 1;

         }

}

②Mapper

package com.it18zhang.flowdemo;

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Mapper;

public class FlowCountMapper extends Mapper<LongWritable, Text, Text, FlowBean> {

         @Override

         protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, FlowBean>.Context context)

                           throws IOException, InterruptedException {

                  String[] splits = value.toString().split("\t");

                  String tel = splits[1];

                  long upFlow = Integer.parseInt(splits[splits.length - 2]);

                  long downFlow = Integer.parseInt(splits[splits.length - 3]);

                  FlowBean fb = new FlowBean(upFlow, downFlow);

                  context.write(new Text(tel), fb);

         }

}

③Reducer

package com.it18zhang.flowdemo;

import java.io.IOException;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Reducer;

public class FlowCountReducer extends Reducer<Text, FlowBean, Text, FlowBean> {

         @Override

         protected void reduce(Text key, Iterable<FlowBean> values, Context context)

                           throws IOException, InterruptedException {

                  long upFlow = 0;

                  long downFlow = 0;

                  for(FlowBean value : values){

                           upFlow = value.getUpFlow();

                           downFlow = value.getDownFlow();

                  }

                  FlowBean fb = new FlowBean(upFlow,downFlow);

                  context.write(key, fb);

         }

}

④App

package com.it18zhang.flowdemo;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class FlowCountApp {

         public static void main(String[] args) throws Exception {

                  //新建Job

                  Configuration conf = new Configuration();

                  Job job = Job.getInstance(conf);

                  job.setJobName("FlowCountApp");

                  job.setJarByClass(FlowCountApp.class);

                  

                  //设置Mapper信息

                  job.setMapperClass(FlowCountMapper.class);

                  job.setMapOutputKeyClass(Text.class);

                  job.setMapOutputValueClass(FlowBean.class);

                  

                  //设置Reducer信息

                  job.setReducerClass(FlowCountReducer.class);

                  job.setOutputKeyClass(Text.class);

                  job.setOutputValueClass(FlowBean.class);

                  

                  //设置输入输出路径

                  FileInputFormat.setInputPaths(job, new Path(args[0]));

                  FileOutputFormat.setOutputPath(job, new Path(args[1]));

                  

                  //提交作业

                  System.out.println(job.waitForCompletion(true) ? 0 : 1);

                  System.out.println("Job Finished");

         }

}

结果

13480253104    180    180    360

13502468823    110349    7335    117684

13560436666    954    1116    2070

13560439658    5892    2034    7926

13602846565    2910    1938    4848

13660577991    690    6960    7650

13719199419    0    240    240

13726230503    24681    2481    27162

13726238888    24681    2481    27162

13760778710    120    120    240

13826544101    0    264    264

13922314466    3720    3008    6728

13925057413    48243    11058    59301

13926251106    0    240    240

13926435656    1512    132    1644

15013685858    3538    3659    7197

15920133257    2936    3156    6092

15989002119    180    1938    2118

18211575961    2106    1527    3633

18320173382    2412    9531    11943

84138413    1432    4116    5548

2、第二次作业:

①Mapper

package com.it18zhang.flowdemo;

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Mapper;

public class SortMapper extends Mapper<LongWritable, Text, FlowBean, Text> {

         @Override

         protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

                  String[] splits = value.toString().split("\t");

                  String tel = splits[0];

                  long upFlow = Long.parseLong(splits[1]);

                  long downFlow = Long.parseLong(splits[2]);

                  FlowBean fb = new FlowBean(upFlow,downFlow);

                  context.write(fb, new Text(tel));

         }

}

②Reducer

package com.it18zhang.flowdemo;

import java.io.IOException;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Reducer;

public class SortReducer extends Reducer<FlowBean, Text, Text, FlowBean> {

         @Override

         protected void reduce(FlowBean key, Iterable<Text> values, Context context)

                           throws IOException, InterruptedException {

                           context.write(values.iterator().next(), key);

         }

}

③App

package com.it18zhang.flowdemo;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class SortApp {

         public static void main(String[] args) throws Exception {

                  // 新建Job

                  Configuration conf = new Configuration();

                  Job job = Job.getInstance(conf);

                  job.setJobName("SortApp");

                  job.setJarByClass(SortApp.class);

                  // 设置Mapper信息

                  job.setMapperClass(SortMapper.class);

                  job.setMapOutputKeyClass(FlowBean.class);

                  job.setMapOutputValueClass(Text.class);

                  // 设置Reducer信息

                  job.setReducerClass(SortReducer.class);

                  job.setOutputKeyClass(Text.class);

                  job.setOutputValueClass(FlowBean.class);

                  // 设置输入输出路径

                  FileInputFormat.setInputPaths(job, new Path(args[0]));

                  FileOutputFormat.setOutputPath(job, new Path(args[1]));

                  // 提交作业

                  System.out.println(job.waitForCompletion(true) ? 0 : 1);

                  System.out.println("Job Finished");

         }

}

结果

13502468823    110349    7335    117684

13925057413    48243    11058    59301

13726238888    24681    2481    27162

13726230503    24681    2481    27162

18320173382    2412    9531    11943

13660577991    690    6960    7650

15013685858    3538    3659    7197

13922314466    3720    3008    6728

15920133257    2936    3156    6092

13560439658    4938    918    5856

84138413    1432    4116    5548

13602846565    2910    1938    4848

18211575961    2106    1527    3633

15989002119    180    1938    2118

13560436666    954    1116    2070

13926435656    1512    132    1644

13480253104    180    180    360

13826544101    0    264    264

13926251106    0    240    240

13760778710    120    120    240

13719199419    0    240    240


以上是关于MR作业编程案例-流量统计的主要内容,如果未能解决你的问题,请参考以下文章

MapReduce的典型编程场景1

Hadoop Mapreduce 案例 统计手机流量使用情况

大数据技术之流量汇总案例

SQL笛卡尔积结合前后行数据的统计案例

SQL笛卡尔积结合前后行数据的统计案例

SQL笛卡尔积结合前后行数据的统计案例