java和python实现一个加权SlopeOne推荐算法

Posted 石头木

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了java和python实现一个加权SlopeOne推荐算法相关的知识,希望对你有一定的参考价值。

一.加权SlopeOne算法公式:

(1).求得所有item之间的评分偏差


上式中分子部分为项目j与项目i的偏差和,分母部分为所有同时对项目j与项目i评分的用户数

(2).加权预测评分

项目j与项目i

上式中表示用户u对项目j的评分预测,分子为项目j对项目i的偏差加上用户对项目i的评分,cji表示同时对项目j与项目i评分的用户数

二.python实现

 1 #!/usr/bin/python
 2 # -*- coding: utf-8 -*-
 3 
 4 user_data = {"小明": {"张学友": 4, "周杰伦": 3, "刘德华": 4},
 5           "小海": {"张学友": 5, "周杰伦": 2},
 6           "李梅": {"周杰伦": 3.5, "刘德华": 4},
 7           "李磊": {"张学友": 5, "刘德华": 3}}
 8 
 9 class recommender:
10 
11     def __init__(self,data):
12         self.frequency={}
13         self.deviation={}
14         self.data=data
15 
16     #计算所有item之间评分偏差
17     def computeDeviation(self):
18         for ratings in self.data.values():
19             for item,rating in ratings.items():
20                 self.frequency.setdefault(item,{})
21                 self.deviation.setdefault(item,{})
22                 for item2,rating2 in ratings.items():
23                     if item!=item2:
24                         self.frequency[item].setdefault(item2,0)
25                         self.deviation[item].setdefault(item2,0.0)
26                         self.frequency[item][item2]+=1#两个项目的用户数
27                         self.deviation[item][item2]+=(rating-rating2)#累加两个评分差值
28         for item,ratings in self.deviation.items():
29             for item2 in ratings:
30                 ratings[item2]/=self.frequency[item][item2]
31 
32     #评分预测
33     def predictRating(self,userRatings,k):
34         recommendations={}
35         frequencies={}
36         for item,rating in userRatings.items():
37             for diffItem,diffRating in self.deviation.items():
38                 if diffItem not in userRatings and item in self.deviation[diffItem]:
39                     fre=self.frequency[diffItem][item]
40                     recommendations.setdefault(diffItem,0.0)
41                     frequencies.setdefault(diffItem,0)
42                     #分子部分
43                     recommendations[diffItem]+=(diffRating[item]+rating)*fre
44                     #分母部分
45                     frequencies[diffItem]+=fre
46         recommendations=[(k,v/frequencies[k]) for (k,v) in recommendations.items()]
47         #排序返回前k个
48         recommendations.sort(key=lambda a_tuple:a_tuple[1],reverse=True)
49         return recommendations[:k]
50 
51 if __name__==\'__main__\':
52     r=recommender(user_data)
53     r.computeDeviation()
54     u=user_data[\'李磊\']
55     print(r.predictRating(u,5))
56     

三.java实现

  1 import java.util.HashMap;
  2 import java.util.Map;
  3 import java.util.List;
  4 import java.util.ArrayList;
  5 import java.util.Comparator;
  6 import java.util.Collections;
  7 
  8 /**
  9  * Created by  on 2016/12/8.ShiYan
 10  * 一.计算所有物品对的偏差
 11  * 二.利用偏差进行预测
 12  */
 13 public class SlopeOne {
 14     Map<String,Map<String,Integer>> frequency=null;
 15     Map<String,Map<String,Double>> deviation=null;
 16     Map<String,Map<String,Integer>> user_rating=null;
 17 
 18     public SlopeOne( Map<String,Map<String,Integer>> user_rating){
 19         frequency=new HashMap<String,Map<String,Integer>>();
 20         deviation=new HashMap<String,Map<String,Double>>();
 21         this.user_rating=user_rating;
 22     }
 23 
 24     /**
 25      * 所有有item间的评分偏差
 26      */
 27     public void computeDeviation(){
 28         for(Map.Entry<String,Map<String,Integer>> ratingsEntry:user_rating.entrySet()){
 29             for(Map.Entry<String,Integer> ratingEntry:ratingsEntry.getValue().entrySet()){
 30                 String item=ratingEntry.getKey();
 31                 int rating=ratingEntry.getValue();
 32                 Map<String,Integer> itemFrequency=null;
 33                 if(!frequency.containsKey(item)){
 34                     itemFrequency=new HashMap<String, Integer>();
 35                     frequency.put(item,itemFrequency);
 36                 }else{
 37                     itemFrequency=frequency.get(item);
 38                 }
 39 
 40                 Map<String,Double> itemDeviation=null;
 41                 if(!deviation.containsKey(item)){
 42                     itemDeviation=new HashMap<String, Double>();
 43                     deviation.put(item,itemDeviation);
 44                 }else{
 45                     itemDeviation=deviation.get(item);
 46                 }
 47 
 48                 for(Map.Entry<String,Integer> ratingEntry2:ratingsEntry.getValue().entrySet()){
 49                     String item2=ratingEntry2.getKey();
 50                     int rating2=ratingEntry2.getValue();
 51                     if(!item.equals(item2)){
 52                         //两个项目的用户数
 53                         itemFrequency.put(item2,itemFrequency.containsKey(item2)?itemFrequency.get(item2)+1:0);
 54                         //两个项目的评分偏差,累加
 55                         itemDeviation.put(item2,itemDeviation.containsKey(item2)?itemDeviation.get(item2)+(rating-rating2):0.0);
 56                     }
 57                 }
 58             }
 59         }
 60 
 61         for(Map.Entry<String,Map<String,Double>> itemsDeviation:deviation.entrySet()){
 62             String item=itemsDeviation.getKey();
 63             Map<String,Double> itemDev=itemsDeviation.getValue();
 64             Map<String,Integer> itemFre=frequency.get(item);
 65             for(String itemName:itemDev.keySet()){
 66                 itemDev.put(itemName,itemDev.get(itemName)/itemFre.get(itemName));
 67             }
 68         }
 69     }
 70 
 71     /**
 72      * 评分预测
 73      * @param userRating 目标用户的评分
 74      * @param k 返回前k个
 75      * @return
 76      */
 77     public  List<Map.Entry<String,Double>> predictRating(Map<String,Integer> userRating,int k){
 78         Map<String,Double> recommendations=new HashMap<String,Double>();
 79         Map<String,Integer> frequencies=new HashMap<String, Integer>();
 80         for(Map.Entry<String,Integer> userEntry:userRating.entrySet()){
 81             String userItem=userEntry.getKey();
 82             double rating=userEntry.getValue();
 83             for(Map.Entry<String,Map<String,Double>> deviationEntry:deviation.entrySet()){
 84                 String item=deviationEntry.getKey();
 85                 Map<String,Double> itemDeviation=deviationEntry.getValue();
 86                 Map<String,Integer> itemFrequency=frequency.get(item);
 87                 if(!userRating.containsKey(item) && itemDeviation.containsKey(userItem)){
 88                     int fre=itemFrequency.get(userItem);
 89                     if(!recommendations.containsKey(item))
 90                         recommendations.put(item,0.0);
 91                     if(!frequencies.containsKey(item))
 92                         frequencies.put(item,0);
 93                     //分子部分
 94                     recommendations.put(item,recommendations.get(item)+(itemDeviation.get(userItem)+rating)*fre);
 95                     //分母部分
 96                     frequencies.put(item,frequencies.get(item)+fre);
 97                 }
 98             }
 99         }
100         for(Map.Entry<String,Double> recoEntry:recommendations.entrySet()){
101             String key=recoEntry.getKey();
102             double value=recoEntry.getValue()/frequencies.get(key);
103             recommendations.put(key,value);
104         }
105         //排序,这里还可以使用优先队列返回top_k
106         List<Map.Entry<String,Double>> list_map=new ArrayList<Map.Entry<String,Double>>(recommendations.entrySet());
107         Collections.sort(list_map,new Comparator<Map.Entry<String,Double>>(){
108                     @Override
109                     public int compare(Map.Entry<String, Double> o1, Map.Entry<String, Double> o2) {
110                         if(o2.getValue()>o1.getValue())
111                             return 1;
112                         else if(o2.getValue()<o1.getValue())
113                             return -1;
114                         else
115                             return 0;
116                     }
117                 }
118         );
119         List<Map.Entry<String,Double>> top_k=new ArrayList<Map.Entry<String, Double>>();
120         if(list_map.size()<k) k=list_map.size();
121         for(int i=0;i<k;i++){
122             top_k.add(list_map.get(i));
123         }
124         return top_k;
125     }
126 
127     public static void main(String[] args){
128         Map<String,Map<String,Integer>> userRatings=new HashMap<String, Map<String, Integer>>();
129         Map<String,Integer> xiMingRating=new HashMap<String, Integer>();
130         xiMingRating.put("张学友",4);
131         xiMingRating.put("周杰伦",3);
132         xiMingRating.put("刘德华",4);
133         Map<String,Integer> xiHaiRating=new HashMap<String, Integer>();
134         xiHaiRating.put("张学友",5);
135         xiHaiRating.put("周杰伦",2);
136         Map<String,Integer> liMeiRating=new HashMap<String, Integer>();
137         liMeiRating.put("周杰伦",3);
138         liMeiRating.put( "刘德华",4);
139         Map<String,Integer> liLeiRating=new HashMap<String, Integer>();
140         liLeiRating.put("张学友",5);
141         liLeiRating.put("刘德华",3);
142         userRatings.put("xiMing",xiMingRating);
143         userRatings.put("xiHai",xiHaiRating);
144         userRatings.put("liMei", liMeiRating);
145         userRatings.put("liLei",liLeiRating);
146 
147         SlopeOne slopOne=new SlopeOne(userRatings);
148         slopOne.computeDeviation();
149         List<Map.Entry<String,Double>> top_k=slopOne.predictRating(userRatings.get("liLei"),5);
150         for(Map.Entry<String,Double> item:top_k){
151             System.out.println(item.getKey()+"   "+item.getValue());
152         }
153     }
154 }

 

以上是关于java和python实现一个加权SlopeOne推荐算法的主要内容,如果未能解决你的问题,请参考以下文章

java 用于在未加权的二分图中找到最大匹配的匈牙利算法的Java实现

Java加权负载均衡策略

一文速学-时间序列分析算法之加权移动平均法详解+Python代码实现

加权斜率一算法? (从 Python 移植到 R)

`python`中的加权高斯核密度估计

我们如何构建具有加权边缘的树