利用MapReduce计算平均数
Posted qwangxiao
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了利用MapReduce计算平均数相关的知识,希望对你有一定的参考价值。
利用mapreduce求出股票价格的开盘和收盘平均数
下图为采集到的股票信息,共计1416支股票的信息
因为在linux系统下默认采用utf-8的编码格式,而在win下txt默认采用ANSI编码格式。所以需要在linux下将文件转换一下格式,可以采用:
递归转换(包括子文件夹)
find default -type d -exec mkdir -p utf/{} \;
find default -type f -exec iconv -f GBK -t UTF-8 {} -o utf/{} \;
这两行命令将default目录下的文件由GBK编码转换为UTF-8编码,目录结构不变,转码后的文件保存在utf/default目录下。
- package economic;
- import java.io.IOException;
- import java.util.Iterator;
- import java.util.StringTokenizer;
- import org.apache.hadoop.conf.Configuration;
- import org.apache.hadoop.fs.Path;
- import org.apache.hadoop.io.FloatWritable;
- import org.apache.hadoop.io.IntWritable;
- import org.apache.hadoop.io.LongWritable;
- import org.apache.hadoop.io.Text;
- import org.apache.hadoop.mapreduce.Job;
- import org.apache.hadoop.mapreduce.Mapper;
- import org.apache.hadoop.mapreduce.Reducer;
- import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
- import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
- import org.apache.hadoop.util.GenericOptionsParser;
- public class ScoreAvgTest {
- /**
- *
- * @author hadoop www.ysgj1688.com KEYIN:输入map的key值,为每行文本的开始位置子字节计算,(0,11...)
- * VALUEIN:输入map的value,为每行文本值 KEYOUT :输出的key值 VALUEOUT:输出的value值
- */
- public static class MapperClass extends Mapper<Object, Text, Text, Text> {
- private Text companyName = new Text();
- private Text open = new Text();
- private Text data=new Text();
- private int n = 0;
- @Override
- protected void map(Object key, Text value, Context context)
- throws IOException, InterruptedException {
- // TODO Auto-generated method stub
- System.out.println(this.n);
- n++;
- String lineText = value.toString();
- String[] args = lineText.split("\\s+");
- if (args.length == 4) {
- this.companyName.set(args[1]);
- }
- if (args.length == 7) {
- try {
- System.out.println("Bfter Reducer:" + companyName + ","
- + args[1]);
- data.set(args[1]+" "+args[4]);
- context.write(this.companyName, data);
- } catch (IOException e) {
- e.printStackTrace(www.vboyl130.cn);
- } catch (InterruptedException e) {
- e.printStackTrace();
- }
- }
- }
- }
- /**
- *
- * @author hadoop KEYIN:输入的名字 VALUEIN:输入的分数 KEYOUT:输出的名字 VALUEOUT:统计输出的平均分
- */
- public static class ReducerClass extends Reducer<Text, Text, Text, Text> {
- private Text text = new Text();
- protected void reduce(Text companyName, Iterable<Text> kaipan,
- Context context) throws IOException, InterruptedException {
- // TODO Auto-generated method stub
- double sumOpen = 0.0;
- double sumClose = 0.0;
- int num = 0;
- Iterator<Text> $it = kaipan.iterator();
- while ($it.hasNext())www.vboyule.cn {
- String record = $it.next(www.baohuayule.net ).toString();
- String[] getData=record.split(" ");
- System.out.println(num);
- System.out.println("原始数据:" + record);
- num++;
- System.out.println("第" www.qinlinyule.cn + num + "次循环");
- sumOpen += (Double.valueOf(getData[0])*100);
- sumClose+=(Double.valueOf(getData[1])*100);
- }
- double openPrise = sumOpen / (100 * num);
- double closePrise = sumClose / (100 * num);
- System.out.println("openPrice1:" + openPrise);
- System.out.println("www.120xh.cn closePrice1:" + closePrise);
- openPrise = (double) Math.round(openPrise * 100) / 100;
- closePrise = (double) Math.round(closePrise * 100) / 100;
- System.out.println("sumOpen:" + sumOpen+" sumClose"+sumClose);
- System.out.println("openPrice2:" + openPrise);
- System.out.println("closePrice2:" + closePrise);
- String result ="开盘平均价:"+Double.toString(openPrise)+", 收盘平均价:"+Double.toString(closePrise);
- text.set(result);
- try {
- context.write(companyName, text);
- } catch (IOException e) {
- e.printStackTrace();
- } catch (InterruptedException e) {
- e.printStackTrace();
- }
- }
- }
- public static void main(String[] args) throws IOException,
- InterruptedException, www.255055.cn/ ClassNotFoundException {
- Configuration conf = new Configuration();
- conf.set("fs.default.name", "hdfs://localhost:9000");
- String[] otherArgs = new String[] { "export", "output" }; //export为文本输入路径,output为输出路径
- if (otherArgs.length < 2) {
- System.err.println("Usage:wordcount<in>[<in>...]<out>");
- System.exit(2);
- }
- Job job = Job.getInstance(conf, "arg");
- job.setJarByClass(ScoreAvgTest.class);
- job.setMapperClass(MapperClass.class);
- // job.setCombinerClass(ReducerClass.class);
- System.out.println("Mapper over");
- job.setReducerClass(ReducerClass.class);
- System.out.println("Reducer over");
- job.setOutputKeyClass(Text.class);
- job.setOutputValueClass(Text.class);
- FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
- FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
- System.exit(job.waitForCompletion(true) ? 0 : 1);
- }
- }
运行后生成的output文件夹中的文件
以上是关于利用MapReduce计算平均数的主要内容,如果未能解决你的问题,请参考以下文章
Matlab使用 MapReduce 计算图像的最大平均 HSV