好友推薦—基於關係的java和spark程式碼實現
阿新 • • 發佈:2019-01-03
本文主要實現基於二度好友的推薦。
測試資料為自己隨手畫的關係圖
把圖片整理成文字資訊如下:
a b c d e f y
b c a f g
c a b d
d c a e h q r
e f h d a
f e a b g
g h f b
h e g i d
i j m n q h
j i k l
k j
l j
m i
n i o
o n p
p o q
q i d
r d s t w v
s r t u
t r s u
u s t
v r w x
w r v x
x w v
y a z
z y
每行的第一個為使用者,之後的為其好友。
在計算使用者之間的推薦分上參考上面部落格中的兩個數學公式分為兩個推薦分。
一個公式為
上面是每個共同好友預設的提供1分,但是在生活中如果次共同好友的好友數比較少,則說明次好友可能更加重要,所以有了下面對每個共同好友加權重的公式
如果好友數差距過大,要對好友數進行開方或對數之類的,這就是要看產品方面對我們的要求了。
說的比較簡單,詳細理論可以看上面連結的部落格。
下面是單機版java的程式碼:
選建立一個來描述好友之間關係的類
import java.util.List; public class Score { //A和B的所有好友數 private int Union; //A和B的共同好友數 private int Intersection; //A和B的共同好友列表 private List<Character> l; //此物件是A和B之間的關係 private char A; private char B; //沒加權重的得分 private float score; //對每個共同好友加權重的得分 private double wscore; public double getWscore() { return wscore; } public void setWscore(double wscore) { this.wscore = wscore; } public float getScore() { return score; } public char getA() { return A; } public void setA(char a) { A = a; } public char getB() { return B; } public void setB(char b) { B = b; } public int getUnion() { return Union; } public void setUnion(int union) { Union = union; if(Union!=0){ score=(float) Intersection/Union; } } public int getIntersection() { return Intersection; } public void setIntersection(int intersection) { Intersection = intersection; if(Union!=0){ score=(float) Intersection/Union; } } public List getL() { return l; } public void setL(List l) { this.l = l; } @Override public String toString() { return "Score{" + "Union=" + Union + ", Intersection=" + Intersection + ", l=" + l + ", A=" + A + ", B=" + B + ", score=" + score + ", wscore="+wscore+ '}'; } }
然後寫程式碼對資料進行操作:
import java.io.BufferedReader; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; public class fof { public static void main(String[] args) throws Exception { BufferedReader bufferedReader=new BufferedReader(new InputStreamReader(new FileInputStream("F:\\friends.txt"))); List<Score> scores=new ArrayList<Score>(); String s=null; List<char[]> l=new ArrayList<char[]>(); Map<Character,Integer> map=new HashMap<Character, Integer>(); while((s=bufferedReader.readLine())!=null){ char[] c = s.replace(" ", "").toCharArray(); l.add(c); map.put(c[0],c.length-1); } for(int i=0;i<l.size();i++){ for(int j=0;j<l.size();j++){ if(i!=j){ Score score = aaa(l.get(i), l.get(j)); if(score!=null){ List<Character> l1 = score.getL(); double wscore=0.0; for(char c:l1){ double sqrt = Math.sqrt(map.get(c)); double v = 1 / sqrt; wscore+=v; } double v = wscore / score.getUnion();//得出加權的分數 score.setWscore(v); scores.add(score); } } } } for(Score score:scores){ System.out.println(score); } } //此方法得出兩個使用者之間的關係類 public static Score aaa(char[] a,char[] b){ char x=a[0]; //直接好友返回null for(char y:b){ if(x==y){ return null; } } List<Character> l=new ArrayList<Character>(); for(int i=1;i<a.length;i++){ for(int j=1;j<b.length;j++){ if(a[i]==b[j]){ l.add(a[i]); } } } if(l.size()==0){ return null;//沒有共同好友返回null } Score score = new Score(); score.setL(l); score.setA(a[0]); score.setB(b[0]); score.setIntersection(l.size()); score.setUnion(a.length+b.length-l.size()-2); return score; } }
在執行後得出來的資料:
Score{Union=7, Intersection=2, l=[b, f], A=a, B=g, score=0.2857143, wscore=0.14285714285714285}
Score{Union=8, Intersection=2, l=[d, e], A=a, B=h, score=0.25, wscore=0.11353103630798289}
Score{Union=7, Intersection=1, l=[d], A=a, B=q, score=0.14285715, wscore=0.058321184351980436}
Score{Union=10, Intersection=1, l=[d], A=a, B=r, score=0.1, wscore=0.040824829046386304}
Score{Union=6, Intersection=1, l=[y], A=a, B=z, score=0.16666667, wscore=0.1178511301977579}
Score{Union=8, Intersection=2, l=[c, a], A=b, B=d, score=0.25, wscore=0.12319981995668611}
Score{Union=6, Intersection=2, l=[a, f], A=b, B=e, score=0.33333334, wscore=0.1513747150773105}
Score{Union=7, Intersection=1, l=[g], A=b, B=h, score=0.14285715, wscore=0.08247860988423227}
Score{Union=5, Intersection=1, l=[a], A=b, B=y, score=0.2, wscore=0.08164965809277261}
Score{Union=5, Intersection=2, l=[a, d], A=c, B=e, score=0.4, wscore=0.16329931618554522}
Score{Union=5, Intersection=2, l=[a, b], A=c, B=f, score=0.4, wscore=0.18164965809277261}
Score{Union=5, Intersection=1, l=[b], A=c, B=g, score=0.2, wscore=0.1}
Score{Union=6, Intersection=1, l=[d], A=c, B=h, score=0.16666667, wscore=0.06804138174397718}
Score{Union=4, Intersection=1, l=[d], A=c, B=q, score=0.25, wscore=0.10206207261596577}
Score{Union=7, Intersection=1, l=[d], A=c, B=r, score=0.14285715, wscore=0.058321184351980436}
Score{Union=4, Intersection=1, l=[a], A=c, B=y, score=0.25, wscore=0.10206207261596577}
Score{Union=8, Intersection=2, l=[c, a], A=d, B=b, score=0.25, wscore=0.12319981995668611}
Score{Union=8, Intersection=2, l=[a, e], A=d, B=f, score=0.25, wscore=0.11353103630798289}
Score{Union=8, Intersection=1, l=[h], A=d, B=g, score=0.125, wscore=0.0625}
Score{Union=9, Intersection=2, l=[h, q], A=d, B=i, score=0.22222222, wscore=0.13412297568739417}
Score{Union=7, Intersection=1, l=[q], A=d, B=p, score=0.14285715, wscore=0.10101525445522107}
Score{Union=8, Intersection=1, l=[r], A=d, B=s, score=0.125, wscore=0.05590169943749474}
Score{Union=8, Intersection=1, l=[r], A=d, B=t, score=0.125, wscore=0.05590169943749474}
Score{Union=8, Intersection=1, l=[r], A=d, B=v, score=0.125, wscore=0.05590169943749474}
Score{Union=8, Intersection=1, l=[r], A=d, B=w, score=0.125, wscore=0.05590169943749474}
Score{Union=7, Intersection=1, l=[a], A=d, B=y, score=0.14285715, wscore=0.058321184351980436}
Score{Union=6, Intersection=2, l=[f, a], A=e, B=b, score=0.33333334, wscore=0.1513747150773105}
Score{Union=5, Intersection=2, l=[d, a], A=e, B=c, score=0.4, wscore=0.16329931618554522}
Score{Union=5, Intersection=2, l=[f, h], A=e, B=g, score=0.4, wscore=0.2}
Score{Union=8, Intersection=1, l=[h], A=e, B=i, score=0.125, wscore=0.0625}
Score{Union=5, Intersection=1, l=[d], A=e, B=q, score=0.2, wscore=0.08164965809277261}
Score{Union=8, Intersection=1, l=[d], A=e, B=r, score=0.125, wscore=0.051031036307982884}
Score{Union=5, Intersection=1, l=[a], A=e, B=y, score=0.2, wscore=0.08164965809277261}
Score{Union=5, Intersection=2, l=[a, b], A=f, B=c, score=0.4, wscore=0.18164965809277261}
Score{Union=8, Intersection=2, l=[e, a], A=f, B=d, score=0.25, wscore=0.11353103630798289}
Score{Union=6, Intersection=2, l=[e, g], A=f, B=h, score=0.33333334, wscore=0.17955837819827095}
Score{Union=5, Intersection=1, l=[a], A=f, B=y, score=0.2, wscore=0.08164965809277261}
Score{Union=7, Intersection=2, l=[f, b], A=g, B=a, score=0.2857143, wscore=0.14285714285714285}
Score{Union=5, Intersection=1, l=[b], A=g, B=c, score=0.2, wscore=0.1}
Score{Union=8, Intersection=1, l=[h], A=g, B=d, score=0.125, wscore=0.0625}
Score{Union=5, Intersection=2, l=[h, f], A=g, B=e, score=0.4, wscore=0.2}
Score{Union=7, Intersection=1, l=[h], A=g, B=i, score=0.14285715, wscore=0.07142857142857142}
Score{Union=8, Intersection=2, l=[e, d], A=h, B=a, score=0.25, wscore=0.11353103630798289}
Score{Union=7, Intersection=1, l=[g], A=h, B=b, score=0.14285715, wscore=0.08247860988423227}
Score{Union=6, Intersection=1, l=[d], A=h, B=c, score=0.16666667, wscore=0.06804138174397718}
Score{Union=6, Intersection=2, l=[e, g], A=h, B=f, score=0.33333334, wscore=0.17955837819827095}
Score{Union=6, Intersection=1, l=[i], A=h, B=j, score=0.16666667, wscore=0.07453559924999299}
Score{Union=4, Intersection=1, l=[i], A=h, B=m, score=0.25, wscore=0.11180339887498948}
Score{Union=5, Intersection=1, l=[i], A=h, B=n, score=0.2, wscore=0.08944271909999159}
Score{Union=4, Intersection=2, l=[i, d], A=h, B=q, score=0.5, wscore=0.21386547149095525}
Score{Union=8, Intersection=1, l=[d], A=h, B=r, score=0.125, wscore=0.051031036307982884}
Score{Union=9, Intersection=2, l=[q, h], A=i, B=d, score=0.22222222, wscore=0.13412297568739417}
Score{Union=8, Intersection=1, l=[h], A=i, B=e, score=0.125, wscore=0.0625}
Score{Union=7, Intersection=1, l=[h], A=i, B=g, score=0.14285715, wscore=0.07142857142857142}
Score{Union=5, Intersection=1, l=[j], A=i, B=k, score=0.2, wscore=0.11547005383792516}
Score{Union=5, Intersection=1, l=[j], A=i, B=l, score=0.2, wscore=0.11547005383792516}
Score{Union=6, Intersection=1, l=[n], A=i, B=o, score=0.16666667, wscore=0.1178511301977579}
Score{Union=6, Intersection=1, l=[q], A=i, B=p, score=0.16666667, wscore=0.1178511301977579}
Score{Union=6, Intersection=1, l=[i], A=j, B=h, score=0.16666667, wscore=0.07453559924999299}
Score{Union=3, Intersection=1, l=[i], A=j, B=m, score=0.33333334, wscore=0.14907119849998599}
Score{Union=4, Intersection=1, l=[i], A=j, B=n, score=0.25, wscore=0.11180339887498948}
Score{Union=4, Intersection=1, l=[i], A=j, B=q, score=0.25, wscore=0.11180339887498948}
Score{Union=5, Intersection=1, l=[j], A=k, B=i, score=0.2, wscore=0.11547005383792516}
Score{Union=1, Intersection=1, l=[j], A=k, B=l, score=1.0, wscore=0.5773502691896258}
Score{Union=5, Intersection=1, l=[j], A=l, B=i, score=0.2, wscore=0.11547005383792516}
Score{Union=1, Intersection=1, l=[j], A=l, B=k, score=1.0, wscore=0.5773502691896258}
Score{Union=4, Intersection=1, l=[i], A=m, B=h, score=0.25, wscore=0.11180339887498948}
Score{Union=3, Intersection=1, l=[i], A=m, B=j, score=0.33333334, wscore=0.14907119849998599}
Score{Union=2, Intersection=1, l=[i], A=m, B=n, score=0.5, wscore=0.22360679774997896}
Score{Union=2, Intersection=1, l=[i], A=m, B=q, score=0.5, wscore=0.22360679774997896}
Score{Union=5, Intersection=1, l=[i], A=n, B=h, score=0.2, wscore=0.08944271909999159}
Score{Union=4, Intersection=1, l=[i], A=n, B=j, score=0.25, wscore=0.11180339887498948}
Score{Union=2, Intersection=1, l=[i], A=n, B=m, score=0.5, wscore=0.22360679774997896}
Score{Union=3, Intersection=1, l=[o], A=n, B=p, score=0.33333334, wscore=0.2357022603955158}
Score{Union=3, Intersection=1, l=[i], A=n, B=q, score=0.33333334, wscore=0.14907119849998599}
Score{Union=6, Intersection=1, l=[n], A=o, B=i, score=0.16666667, wscore=0.1178511301977579}
Score{Union=7, Intersection=1, l=[q], A=p, B=d, score=0.14285715, wscore=0.10101525445522107}
Score{Union=6, Intersection=1, l=[q], A=p, B=i, score=0.16666667, wscore=0.1178511301977579}
Score{Union=3, Intersection=1, l=[o], A=p, B=n, score=0.33333334, wscore=0.2357022603955158}
Score{Union=7, Intersection=1, l=[d], A=q, B=a, score=0.14285715, wscore=0.058321184351980436}
Score{Union=4, Intersection=1, l=[d], A=q, B=c, score=0.25, wscore=0.10206207261596577}
Score{Union=5, Intersection=1, l=[d], A=q, B=e, score=0.2, wscore=0.08164965809277261}
Score{Union=4, Intersection=2, l=[i, d], A=q, B=h, score=0.5, wscore=0.21386547149095525}
Score{Union=4, Intersection=1, l=[i], A=q, B=j, score=0.25, wscore=0.11180339887498948}
Score{Union=2, Intersection=1, l=[i], A=q, B=m, score=0.5, wscore=0.22360679774997896}
Score{Union=3, Intersection=1, l=[i], A=q, B=n, score=0.33333334, wscore=0.14907119849998599}
Score{Union=6, Intersection=1, l=[d], A=q, B=r, score=0.16666667, wscore=0.06804138174397718}
Score{Union=10, Intersection=1, l=[d], A=r, B=a, score=0.1, wscore=0.040824829046386304}
Score{Union=7, Intersection=1, l=[d], A=r, B=c, score=0.14285715, wscore=0.058321184351980436}
Score{Union=8, Intersection=1, l=[d], A=r, B=e, score=0.125, wscore=0.051031036307982884}
Score{Union=8, Intersection=1, l=[d], A=r, B=h, score=0.125, wscore=0.051031036307982884}
Score{Union=6, Intersection=1, l=[d], A=r, B=q, score=0.16666667, wscore=0.06804138174397718}
Score{Union=5, Intersection=2, l=[s, t], A=r, B=u, score=0.4, wscore=0.23094010767585033}
Score{Union=5, Intersection=2, l=[w, v], A=r, B=x, score=0.4, wscore=0.23094010767585033}
Score{Union=8, Intersection=1, l=[r], A=s, B=d, score=0.125, wscore=0.05590169943749474}
Score{Union=5, Intersection=1, l=[r], A=s, B=v, score=0.2, wscore=0.08944271909999159}
Score{Union=5, Intersection=1, l=[r], A=s, B=w, score=0.2, wscore=0.08944271909999159}
Score{Union=8, Intersection=1, l=[r], A=t, B=d, score=0.125, wscore=0.05590169943749474}
Score{Union=5, Intersection=1, l=[r], A=t, B=v, score=0.2, wscore=0.08944271909999159}
Score{Union=5, Intersection=1, l=[r], A=t, B=w, score=0.2, wscore=0.08944271909999159}
Score{Union=5, Intersection=2, l=[s, t], A=u, B=r, score=0.4, wscore=0.23094010767585033}
Score{Union=8, Intersection=1, l=[r], A=v, B=d, score=0.125, wscore=0.05590169943749474}
Score{Union=5, Intersection=1, l=[r], A=v, B=s, score=0.2, wscore=0.08944271909999159}
Score{Union=5, Intersection=1, l=[r], A=v, B=t, score=0.2, wscore=0.08944271909999159}
Score{Union=8, Intersection=1, l=[r], A=w, B=d, score=0.125, wscore=0.05590169943749474}
Score{Union=5, Intersection=1, l=[r], A=w, B=s, score=0.2, wscore=0.08944271909999159}
Score{Union=5, Intersection=1, l=[r], A=w, B=t, score=0.2, wscore=0.08944271909999159}
Score{Union=5, Intersection=2, l=[w, v], A=x, B=r, score=0.4, wscore=0.23094010767585033}
Score{Union=5, Intersection=1, l=[a], A=y, B=b, score=0.2, wscore=0.08164965809277261}
Score{Union=4, Intersection=1, l=[a], A=y, B=c, score=0.25, wscore=0.10206207261596577}
Score{Union=7, Intersection=1, l=[a], A=y, B=d, score=0.14285715, wscore=0.058321184351980436}
Score{Union=5, Intersection=1, l=[a], A=y, B=e, score=0.2, wscore=0.08164965809277261}
Score{Union=5, Intersection=1, l=[a], A=y, B=f, score=0.2, wscore=0.08164965809277261}
Score{Union=6, Intersection=1, l=[y], A=z, B=a, score=0.16666667, wscore=0.1178511301977579}
根據個人需求在根據得分進行排序就好了。
下面是在spark中的程式碼。
spark沒有計算加權的分數(懶得寫了),用的推薦分類還是上面的同一個Score類。
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.Function;
import scala.Tuple2;
import java.util.LinkedList;
public class Sparkfof {
public static void main(String[] args){
JavaSparkContext sparkContext = new JavaSparkContext(new SparkConf().setAppName("fof"));
JavaRDD<String> stringJavaRDD = sparkContext.textFile(args[0]);
JavaRDD<char[]> map = stringJavaRDD.map(new Function<String, char[]>() {
@Override
public char[] call(String s) throws Exception {
return s.replace(" ", "").toCharArray();
}
});
JavaPairRDD<char[], char[]> cartesian = map.cartesian(map);
JavaRDD<Score> scoreJavaRDD = cartesian.flatMap(new FlatMapFunction<Tuple2<char[], char[]>, Score>() {
@Override
public Iterable<Score> call(Tuple2<char[], char[]> tuple2) throws Exception {
Score aaa = fof.aaa(tuple2._1, tuple2._2);
LinkedList<Score> scores = new LinkedList<>();
scores.add(aaa);
return scores;
}
});
JavaRDD<Score> filter = scoreJavaRDD.filter(new Function<Score, Boolean>() {
@Override
public Boolean call(Score score) throws Exception {
return score != null;
}
});
filter.saveAsTextFile("/tmp/spark-tmp/fof");
}
}
然後把程式打包放到spark機器上執行。
去hdfs中讀取跑完的資料。
hadoop fs -cat /tmp/spark-tmp/fof/part-00000
得出資料如下和單機java資料一樣。