协同过滤算法(天池竞赛试题)
Posted 吾生也有涯,而知也无涯
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了协同过滤算法(天池竞赛试题)相关的知识,希望对你有一定的参考价值。
一:推荐算法分类:
1.按数据使用划分:
-
- 协同过滤算法:UserCF, ItemCF, ModelCF
- 基于内容的推荐: 用户内容属性和物品内容属性
- 社会化过滤:基于用户的社会网络关系
2.案例:天池大数据竞赛
我们会开放如下数据类型:
字 段 |
字段说明 |
提取说明 |
user_id |
用户标记 |
抽样&字段加密 |
Time |
行为时间 |
精度到天级别&隐藏年份 |
action_type |
用户对品牌的行为类型 |
包括点击、购买、加入购物车、收藏4种行为 |
brand_id |
品牌数字ID |
抽样&字段加密 |
提供的数据量,涉及千万级天猫用户,万级天猫品牌,时间跨度4个月的行为记录。
提供的训练数据在天池集群的表t_alibaba_bigdata_user_brand_total_1中,字段分别为:user_id,brand_id, type, visit_datetime。如图所示
3.用户4种行为类型(Type)对应代码分别为:
点击:0;购买:1;收藏:2;购物车:3
二:实现思路及代码
1、 对原数据去重
1 package com.oracle.www.TianChi_compition; 2 3 import java.io.IOException; 4 5 import org.apache.hadoop.conf.Configuration; 6 import org.apache.hadoop.fs.FileSystem; 7 import org.apache.hadoop.fs.Path; 8 import org.apache.hadoop.io.LongWritable; 9 import org.apache.hadoop.io.NullWritable; 10 import org.apache.hadoop.io.Text; 11 import org.apache.hadoop.mapreduce.Job; 12 import org.apache.hadoop.mapreduce.Mapper; 13 import org.apache.hadoop.mapreduce.Reducer; 14 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 15 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 16 17 /* 18 * 对原数据去重,去表头 19 */ 20 public class Step01 { 21 static class MyMapper extends Mapper<LongWritable, Text, Text, NullWritable> { 22 @Override 23 protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, NullWritable>.Context context) 24 throws IOException, InterruptedException { 25 if (key.get() > 0) { 26 context.write(value, NullWritable.get()); 27 } 28 } 29 } 30 31 static class MyReducer extends Reducer<Text, NullWritable, Text, NullWritable> { 32 @Override 33 protected void reduce(Text key, Iterable<NullWritable> vlue, 34 Reducer<Text, NullWritable, Text, NullWritable>.Context context) 35 throws IOException, InterruptedException { 36 context.write(key, NullWritable.get()); 37 } 38 } 39 40 public static void main(String[] args) 41 throws IllegalArgumentException, IOException, ClassNotFoundException, InterruptedException { 42 Configuration conf = new Configuration(); 43 Job job = Job.getInstance(conf); 44 job.setJarByClass(Step01.class); 45 46 job.setMapperClass(MyMapper.class); 47 job.setReducerClass(MyReducer.class); 48 49 job.setMapOutputKeyClass(Text.class); 50 job.setMapOutputValueClass(NullWritable.class); 51 52 job.setOutputKeyClass(Text.class); 53 54 Path outPath = new Path("hdfs://192.168.9.13:8020/deweight"); 55 FileSystem fs = outPath.getFileSystem(conf); 56 if (fs.exists(outPath)) { 57 fs.delete(outPath, true); 58 } 59 FileInputFormat.addInputPath(job, new Path("hdfs://192.168.9.13:8020/TianmaoData")); 60 FileOutputFormat.setOutputPath(job, outPath); 61 job.waitForCompletion(true); 62 63 } 64 65 }
2、 获得所有物品之间的同现矩阵
1 package com.oracle.www.TianChi_compition; 2 3 import java.io.IOException; 4 import java.util.ArrayList; 5 6 import org.apache.hadoop.conf.Configuration; 7 import org.apache.hadoop.fs.FileSystem; 8 import org.apache.hadoop.fs.Path; 9 import org.apache.hadoop.io.IntWritable; 10 import org.apache.hadoop.io.Text; 11 import org.apache.hadoop.mapreduce.Job; 12 import org.apache.hadoop.mapreduce.Mapper; 13 import org.apache.hadoop.mapreduce.Reducer; 14 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 15 import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat; 16 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 17 18 /* 19 * 生成同现(显)矩阵 20 * map端<商品1-商品2,1>拆分,发送 21 * reduce端<商品1-商品2,1,1,1...>统计 22 */ 23 public class Step03 { 24 25 static class MyMapper extends Mapper<Text, Text, Text, IntWritable> { 26 Text k = new Text(); 27 IntWritable v = new IntWritable(); 28 29 @Override 30 protected void map(Text key, Text value, Mapper<Text, Text, Text, IntWritable>.Context context) 31 throws IOException, InterruptedException { 32 ArrayList<String> itemList = new ArrayList<>(); 33 String line = value.toString(); 34 String[] datas = line.split("\t"); 35 for (String data : datas) {// 将用户购买过的商品添加到list集合中 36 String[] item_mark = data.split(":"); 37 itemList.add(item_mark[0]); 38 } 39 40 for (int i = 0; i < itemList.size(); i++) { 41 for (int j = 0; j < itemList.size(); j++) { 42 k.set(itemList.get(i) + "-" + itemList.get(j)); 43 v.set(1); 44 context.write(k, v); 45 } 46 47 } 48 } 49 } 50 51 static class MyReducer extends Reducer<Text, IntWritable, Text, IntWritable> { 52 Text k = new Text(); 53 IntWritable v = new IntWritable(); 54 55 @Override 56 protected void reduce(Text key, Iterable<IntWritable> value, 57 Reducer<Text, IntWritable, Text, IntWritable>.Context context) 58 throws IOException, InterruptedException { 59 int sum = 0; 60 for (IntWritable val : value) { 61 sum += val.get(); 62 } 63 k.set(key.toString()); 64 v.set(sum); 65 context.write(k, v); 66 } 67 } 68 69 public static void main(String[] args) throws ClassNotFoundException, InterruptedException { 70 Configuration conf = new Configuration(); 71 try { 72 Job job = Job.getInstance(conf); 73 74 job.setJarByClass(Step03.class); 75 job.setMapperClass(MyMapper.class); 76 job.setReducerClass(MyReducer.class); 77 78 job.setMapOutputKeyClass(Text.class); 79 job.setMapOutputValueClass(IntWritable.class); 80 81 job.setOutputKeyClass(Text.class); 82 job.setOutputValueClass(IntWritable.class); 83 84 job.setInputFormatClass(KeyValueTextInputFormat.class); 85 86 // 判断output文件夹是否存在,如果存在则删除 87 Path outPath = new Path("hdfs://192.168.9.13:8020/implyCount");// 输出路径 88 FileSystem fs = outPath.getFileSystem(conf);// 根据输出路径找到文件,参数为配置文件 89 if (fs.exists(outPath)) { 90 fs.delete(outPath); 91 // fs.delete(outPath, true);true的意思是,就算output有东西,也一带删除,默认为true 92 93 } 94 FileInputFormat.setInputPaths(job, new Path("hdfs://192.168.9.13:8020/gradeMarking")); 95 FileOutputFormat.setOutputPath(job, outPath); 96 job.waitForCompletion(true); 97 } catch (IOException e) { 98 // TODO Auto-generated catch block 99 e.printStackTrace(); 100 } 101 } 102 }
3、 权重矩阵(用户对同一件商品的不同行为操作得到的评分矩阵)
1 package com.oracle.www.TianChi_compition; 2 3 import java.io.IOException; 4 import java.util.HashMap; 5 import java.util.Iterator; 6 7 import org.apache.hadoop.conf.Configuration; 8 import org.apache.hadoop.fs.FileSystem; 9 import org.apache.hadoop.fs.Path; 10 /* 11 * 生成评分矩阵 12 * map端拆分,发送<用户 商品+":"+操作> 13 * reduce端统计生成<用户 商品1+":"+评分,商品2+":"+评分,...> 14 */ 15 import org.apache.hadoop.io.LongWritable; 16 import org.apache.hadoop.io.Text; 17 import org.apache.hadoop.mapreduce.Job; 18 import org.apache.hadoop.mapreduce.Mapper; 19 import org.apache.hadoop.mapreduce.Reducer; 20 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 21 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 22 23 public class Step02 { 24 static Text userId = new Text(); 25 static Text shopping_operate = new Text(); 26 27 static class MyMapper extends Mapper<LongWritable, Text, Text, Text> { 28 @Override 29 protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context) 30 throws IOException, InterruptedException { 31 32 String line = value.toString(); 33 String[] datas = line.split("\t"); 34 userId.set(datas[1]); 35 shopping_operate.set(datas[0] + ":" + datas[2]); 36 context.write(userId, shopping_operate); 37 } 38 } 39 40 static class MyReducer extends Reducer<Text, Text, Text, Text> { 41 Text v = new Text(); 42 double click = 0; 43 double collect = 0; 44 double cart = 0; 45 double alipay = 0; 46 47 @Override 48 protected void reduce(Text key, Iterable<Text> value, Reducer<Text, Text, Text, Text>.Context context) 49 throws IOException, InterruptedException { 50 // shoppingOperate_counter<商品,<商品操作,操作次数>> 51 HashMap<String, HashMap<String, Integer>> shoppingOperate_counter = new HashMap<>(); 52 String[] temp_str = null; 53 String shoppingName = null; 54 String shoppingOperate = null; 55 HashMap<String, Integer> operate_counter = null;// 内层map,记录对商品的操作和操作次数 56 for (Text val : value) { 57 temp_str = val.toString().split(":"); 58 shoppingName = temp_str[0]; 59 shoppingOperate = temp_str[1]; 60 if (!shoppingOperate_counter.containsKey(shoppingName)) {// map中不存在此商品信息,添加并给予初始值 61 operate_counter = new HashMap<>(); 62 operate_counter.put(shoppingOperate, 1); 63 shoppingOperate_counter.put(shoppingName, operate_counter); 64 } else {// map中包含此商品 65 operate_counter = shoppingOperate_counter.get(shoppingName); 66 if (!operate_counter.containsKey(shoppingOperate)) {// 包含此商品不包含此操作 67 operate_counter.put(shoppingOperate, 1); 68 } else { 69 operate_counter.put(shoppingOperate, operate_counter.get(shoppingOperate) + 1); 70 } 71 } 72 } 73 // 通过对shoppingOperate_counter循环遍历,统计算分 74 Iterator<String> iter = shoppingOperate_counter.keySet().iterator(); 75 StringBuffer shopping_marking = new StringBuffer(); 76 while (iter.hasNext()) { 77 click = 0; 78 collect = 0; 79 cart = 0; 80 alipay = 0; 81 shoppingName = iter.next(); 82 operate_counter = shoppingOperate_counter.get(shoppingName); 83 Iterator<String> operateIter = operate_counter.keySet().iterator(); 84 int counter = 0;// 记录用户对单个商品操作过的次数 85 while (operateIter.hasNext()) { 86 counter++; 87 shoppingOperate = operateIter.next(); 88 if ("click".equals(shoppingOperate)) { 89 click = operate_counter.get(shoppingOperate); 90 } else if ("collect".equals(shoppingOperate)) { 91 collect = operate_counter.get(shoppingOperate); 92 } else if ("cart".equals(shoppingOperate)) { 93 cart = operate_counter.get(shoppingOperate); 94 } else { 95 alipay = operate_counter.get(shoppingOperate); 96 } 97 } 98 double sum = click / counter * 1.0 + collect / counter * 2.0 + cart / counter * 3.0 99 + alipay / counter * 4.0; 100 shopping_marking.append(shoppingName + ":" + sum + "\t"); 101 } 102 v.set(shopping_marking.toString()); 103 context.write(key, v); 104 } 105 } 106 107 public static void main(String[] args) throws ClassNotFoundException, InterruptedException { 108 Configuration conf = new Configuration(); 109 try { 110 Job job = Job.getInstance(conf); 111 job.setJarByClass(Step02.class); 112 job.setMapperClass(MyMapper.class); 113 job.setReducerClass(MyReducer.class); 114 115 job.setMapOutputKeyClass(Text.class); 116 job.setMapOutputValueClass(Text.class); 117 118 job.setOutputKeyClass(Text.class); 119 job.setOutputKeyClass(Text.class); 120 121 Path outPath = new Path("hdfs://192.168.9.13:8020/deweight"); 122 FileSystem fs = outPath.getFileSystem(conf); 123 if (fs.exists(outPath)) { 124 fs.delete(outPath); 125 } 126 FileInputFormat.setInputPaths(job, new Path("hdfs://192.168.9.13:8020/deweight")); 127 FileOutputFormat.setOutputPath(job, outPath); 128 job.waitForCompletion(true); 129 } catch (IOException e) { 130 // TODO Auto-generated catch block 131 e.printStackTrace(); 132 } 133 134 } 135 136 }
4、 两个矩阵相乘得到三维矩阵
1 package com.oracle.www.TianChi_compition; 2 3 import java.io.IOException; 4 import java.util.HashMap; 5 import java.util.Map.Entry; 6 7 import org.apache.hadoop.conf.Configuration; 8 import org.apache.hadoop.fs.FileSystem; 9 import org.apache.hadoop.fs.Path; 10 import org.apache.hadoop.io.DoubleWritable; 11 import org.apache.hadoop.io.Text; 12 import org.apache.hadoop.mapreduce.Job; 13 import org.apache.hadoop.mapreduce.Mapper; 14 import org.apache.hadoop.mapreduce.Reducer; 15 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 16 import org.apache.hadoop.mapreduce.lib.input.FileSplit; 17 import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat; 18 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 19 20 public class Step04 { 21 static class MyMapper extends Mapper<Text, Text, Text, Text> { 22 String parentName = null; 23 Text k = new Text(); 24 Text v = new Text(); 25 26 @Override 27 protected void setup(Mapper<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException { 28 FileSplit fs = (FileSplit) context.getInputSplit(); 29 parentName = fs.getPath().getParent().getName(); 30 } 31 32 @Override 33 protected void map(Text key, Text value, Mapper<Text, Text, Text, Text>.Context context) 34 throws IOException, InterruptedException { 35 String line = value.toString(); 36 String[] datas = null; 37 // 判断输入目录 38 if (parentName.equals("gradeMarking")) {// 评分 39 datas = line.split("\t"); 40 for (String data : datas) { 41 String[] item_mark = data.split(":"); 42 k.set(item_mark[0]); 43 v.set(key.toString() + ":" + item_mark[1]); 44 context.write(k, v); 45 } 46 } else { 47 datas = key.toString().split("-"); 48 k.set(datas[1]); 49 v.set(datas[0] + ":" + line); 50 context.write(k, v); 51 } 52 } 53 } 54 55 static class MyReducer extends Reducer<Text, Text, Text, DoubleWritable> { 56 Text k = new Text(); 57 DoubleWritable v = new DoubleWritable(); 58 // <商品x 用户1:评分1,用户2:评分2,...,商品1:频次1,商品2:频次2,...>(频次值为两件商品同时出现的次数) 59 60 @Override 61 protected void reduce(Text key, Iterable<Text> value, Reducer<Text, Text, Text, DoubleWritable>.Context context) 62 throws IOException, InterruptedException { 63 HashMap<String, Double> user_mark = new HashMap<>(); 64 HashMap<String, Double> item_counter = new HashMap<>(); 65 // 将 用户:评分 , 商品:频次 添加到对应的map中 66 String[] datas = null; 67 for (Text val : value) { 68 datas = val.toString().split(":"); 69 if (datas[0].startsWith("u")) { 70 user_mark.put(datas[0], Double.parseDouble(datas[1])); 71 } else { 72 item_counter.put(datas[0], Double.parseDouble(datas[1])); 73 } 74 } 75 76 // 遍历循环相乘 77 String userName = null; 78 double userMark = 0.0; 79 String itemName = null; 80 double iterCounter = 0; 81 for (Entry<String, Double> entry1 : user_mark.entrySet()) { 82 userName = entry1.getKey(); 83 userMark = entry1.getValue(); 84 for (Entry<String, Double> entry2 : item_counter.entrySet()) { 85 itemName = entry2.getKey(); 86 iterCounter = entry2.getValue(); 87 k.set(userName + ":" + itemName); 88 v.set(userMark * iterCounter); 89 context.write(k, v); 90 } 91 } 92 93 } 94 } 95 96 public static void main(String[] args) throws ClassNotFoundException, InterruptedException { 97 Configuration conf = new Configuration(); 98 try { 99 Job job = Job.getInstance(conf); 100 101 job.setJarByClass(Step03.class); 102 job.setMapperClass(MyMapper.class); 103 job.setReducerClass(MyReducer.class); 104 105 job.setMapOutputKeyClass(Text.class); 106 job.setMapOutputValueClass(Text.class); 107 108 job.setOutputKeyClass(Text.class); 109 job.setOutputValueClass(DoubleWritable.class); 110 111 job.setInputFormatClass(KeyValueTextInputFormat.class); 112 113 // 判断output文件夹是否存在,如果存在则删除 114 Path outPath = new Path("hdfs://192.168.9.13:8020/mark&implyCount_multiply");// 输出路径 115 FileSystem fs = outPath.getFileSystem(conf);// 根据输出路径找到文件,参数为配置文件 116 if (fs.exists(outPath)) { 117 fs.delete(outPath); 118 // fs.delete(outPath, true);true的意思是,就算output有东西,也一带删除,默认为true 119 120 } 121 FileInputFormat.setInputPaths(job, new Path[] { new Path("hdfs://192.168.9.13:8020/gradeMarking"), 122 new Path("hdfs://192.168.9.13:8020/implyCount") }); 123 FileOutputFormat.setOutputPath(job, outPath); 124 job.waitForCompletion(true); 125 } catch (IOException e) { 126 // TODO Auto-generated catch block 127 e.printStackTrace(); 128 } 129 } 130 131 }
5、 三维矩阵的数据相加获得所有用户对所有物品的推荐值(二维矩阵)
1 package com.oracle.www.TianChi_compition; 2 3 /* 4 * 筛选掉用户购买过的商品,并求和 5 */ 6 import java.io.BufferedReader; 7 import java.io.FileReader; 8 import java.io.IOException; 9 import java.net.URI; 10 import java.net.URISyntaxException; 11 import java.util.ArrayList; 12 13 import org.apache.hadoop.conf.Configuration; 14 import org.apache.hadoop.fs.FileSystem; 15 import org.apache.hadoop.fs.Path; 16 import org.apache.hadoop.io.Text; 17 import org.apache.hadoop.mapreduce.Job; 18 import org.apache.hadoop.mapreduce.Mapper; 19 import org.apache.hadoop.mapreduce.Reducer; 20 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 21 import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat; 22 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 23 24 public class Step05 { 25 static class MyMapper extends Mapper<Text, Text, Text, Text> { 26 // boughtList集合用于存放哪些用户买过哪些商品,不能使用map集合存放, 27 // 同一个用户可能买过多件商品,同一件商品也有可能同时被好多人买过; 28 ArrayList<String> boughtList = new ArrayList<>(); 29 BufferedReader br = null; 30 31 // setup方法初始化boughtList集合 32 @Override 33 protected void setup(Mapper<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException { 34 br = new BufferedReader(new FileReader("part-r-00000")); 35 String line = null; 36 String[] datas = null; 37 while ((line = br.readLine()) != null) { 38 datas = line.split("\t"); 39 if ("alipay".equals(datas[2])) { 40 boughtList.add(datas[1] + ":" + datas[0]); 41 } 42 } 43 } 44 45 // map方法排除掉用户购买过的商品,使其不推荐 46 @Override 47 protected void map(Text key, Text value, Mapper<Text, Text, Text, Text>.Context context) 48 throws IOException, InterruptedException { 49 // 判断向该用户推荐的商品是否被该用户购买过,如果购买过,则不推荐(即不向reduce端发送) 50 if (!boughtList.contains(key.toString())) { 51 context.write(key, value); 52 } 53 } 54 } 55 56 static class MyReducer extends Reducer<Text, Text, Text, Text> { 57 Text k = new Text(); 58 Text v = new Text(); 59 60 @Override 61 protected void reduce(Text key, Iterable<Text> value, Reducer<Text, Text, Text, Text>.Context context) 62 throws IOException, InterruptedException { 63 double rank = 0.0; 64 for (Text val : value) { 65 rank += Double.parseDouble(val.toString()); 66 } 67 k.set(key.toString().split(":")[0]); 68 v.set(key.toString().split(":")[1] + ":" + rank); 69 context.write(k, v); 70 } 71 } 72 73 public static void main(String[] args) 74 throws IOException, ClassNotFoundException, InterruptedException, URISyntaxException { 75 Configuration conf = new Configuration(); 76 Job job = Job.getInstance(conf); 77 job.setJarByClass(Step05.class); 78 job.setMapperClass(MyMapper.class); 79 job.setReducerClass(MyReducer.class); 80 81 job.setMapOutputKeyClass(Text.class); 82 job.setMapOutputValueClass(Text.class); 83 84 job.setOutputKeyClass(Text.class); 85 job.setOutputValueClass(Text.class); 86 87 job.setInputFormatClass(KeyValueTextInputFormat.class); 88 89 job.addCacheFile(new URI("hdfs://192.168.9.13:8020/deweight/part-r-00000")); 90 91 Path outPath = new Path("hdfs://192.168.9.13:8020/shoppingRecommend"); 92 FileSystem fs = outPath.getFileSystem(conf); 93 if (fs.exists(outPath)) { 94 fs.delete(outPath, true); 95 } 96 FileInputFormat.setInputPaths(job, new Path("hdfs://192.168.9.13:8020/mark&implyCount_multiply")); 97 FileOutputFormat.setOutputPath(job, outPath); 98 99 job.waitForCompletion(true); 100 101 } 102 103 }
6、 按照推荐值降序排序(筛选权重高的前十件商品)。
1 package com.oracle.www.TianChi_compition; 2 3 import java.io.DataInput; 4 import java.io.DataOutput; 5 import java.io.IOException; 6 import java.lang.reflect.InvocationTargetException; 7 import java.util.ArrayList; 8 import java.util.Collections; 9 10 import org.apache.commons.beanutils.BeanUtils; 11 import org.apache.hadoop.conf.Configuration; 12 import org.apache.hadoop.fs.FileSystem; 13 import org.apache.hadoop.fs.Path; 14 import org.apache.hadoop.io.Text; 15 import org.apache.hadoop.io.WritableComparable; 16 import org.apache.hadoop.mapreduce.Job; 17 import org.apache.hadoop.mapreduce.Mapper; 18 import org.apache.hadoop.mapreduce.Reducer; 19 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 20 import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat; 21 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 22 23 /* 24 * 排序,选出向用户推荐的前十个商品 25 */ 26 public class Step06 { 27 // 将取到一行的内容拆分,以 <用户,商品j:权重r>发送到reduce端进行处理 28 static class MyMapper extends Mapper<Text, Text, Text, Sort> { 29 Sort sort = null; 30 31 @Override 32 protected void map(Text key, Text value, Mapper<Text, Text, Text, Sort>.Context context) 33 throws IOException, InterruptedException { 34 35 sort = new Sort(value.toString().split(":")[0], Double.parseDouble(value.toString().split(":")[1])); 36 context.write(key, sort); 37 } 38 } 39 40 // reduce端将同一用户的推荐商品按权值大小排序,将前十个拼接输出 41 static class MyReducer extends Reducer<Text, Sort, Text, Text> { 42 ArrayList<Sort> list = new ArrayList<>(); 43 Text v = new Text(); 44 45 @Override 46 protected void reduce(Text key, Iterable<Sort> value, Reducer<Text, Sort, Text, Text>.Context context) 47 throws IOException, InterruptedException { 48 StringBuffer sb = new StringBuffer(); 49 list.clear(); 50 // map端如果将自定义对象作为value发送到reduce端进行迭代时,需要将迭代器中的每个对象使用BeanUtils.copyProperties(dest,org)将属性拷贝到另外一个对象中; 51 for (Sort sort : value) { 52 Sort tempSort = new Sort(); 53 try { 54 BeanUtils.copyProperties(tempSort, sort); 55 list.add(tempSort); 56 } catch (IllegalAccessException e) { 57 // TODO Auto-generated catch block 58 e.printStackTrace(); 59 } catch (InvocationTargetException e) { 60 // TODO Auto-generated catch block 61 e.printStackTrace(); 62 } 63 } 64 65 Collections.sort(list); 66 for (int i = 0; i < list.size() && i < 10; i++) { 67 sb.append(list.get(i)); 68 } 69 v.set(sb.toString()); 70 context.write(key, v); 71 } 72 } 73 74 static public class Sort implements WritableComparable<Sort> { 75 private String shoppingName; 76 private double shoppingRank; 77 78 public Sort() { 79 } 80 81 public Sort(String shoppingName, double shoppingRank) { 82 this.shoppingName = shoppingName; 83 this.shoppingRank = shoppingRank; 84 } 85 86 public String getShoppingName() { 87 return shoppingName; 88 } 89 90 public void setShoppingName(String shoppingName) { 91 this.shoppingName = shoppingName; 92 } 93 94 public double getShoppingRank() { 95 return shoppingRank; 96 } 97 98 public void setShoppingRank(double shoppingRank) { 99 this.shoppingRank = shoppingRank; 100 } 101 102 @Override 103 public String toString() { 104 return shoppingName + ":" + shoppingRank + "\t"; 105 } 106 107 @Override 108 public void write(DataOutput out) throws IOException { 109 out.writeDouble(shoppingRank); 110 out.writeUTF(shoppingName); 111 } 112 113 @Override 114 public void readFields(DataInput in) throws IOException { 115 this.shoppingRank = in.readDouble(); 116 this.shoppingName = in.readUTF(); 117 } 118 119 @Override 120 public int compareTo(Sort o) { 121 int temp = 0; 122 if (this.getShoppingRank() - o.getShoppingRank() < 0) { 123 return 1; 124 } else if (this.getShoppingRank() - o.getShoppingRank() > 0) { 125 return -1; 126 } 127 return temp; 128 } 129 } 130 131 public static void main(String[] args) throws ClassNotFoundException, InterruptedException { 132 Configuration conf = new Configuration(); 133 try { 134 Job job = Job.getInstance(); 135 136 job.setJarByClass(Step06.class); 137 job.setMapperClass(MyMapper.class); 138 job.setReducerClass(MyReducer.class); 139 140 job.setMapOutputKeyClass(Text.class); 141 job.setMapOutputValueClass(Sort.class); 142 143 job.setOutputKeyClass(Text.class); 144 job.setOutputValueClass(Text.class); 145 146 job.setInputFormatClass(KeyValueTextInputFormat.class); 147 148 Path outPath = new Path("hdfs://192.168.9.13:8020/ShoppingRecommend_Sort"); 149 FileSystem fs = outPath.getFileSystem(conf); 150 if (fs.exists(outPath)) { 151 fs.delete(outPath); 152 } 153 154 FileInputFormat.setInputPaths(job, new Path("hdfs://192.168.9.13:8020/shoppingRecommend")); 155 FileOutputFormat.setOutputPath(job, outPath); 156 157 job.waitForCompletion(true); 158 } catch (IOException e) { 159 // TODO Auto-generated catch block 160 e.printStackTrace(); 161 } 162 163 } 164 165 }
以上是关于协同过滤算法(天池竞赛试题)的主要内容,如果未能解决你的问题,请参考以下文章