基於社交網路的使用者與基於物品的協同過濾推薦演算法-java
阿新 • • 發佈:2019-02-02
完整工程+資料來源:https://github.com/scnuxiaotao/recom_sys
import java.io.BufferedReader; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.util.HashMap; public class itemcf { /* * * 主函式~ * */ /*public static void main(String[] args) throws IOException { _Run(); }*/ /* * * 基於物品的實現~ * */ static int usersum = 20836; //使用者數 static int itemsum = 200; //物品總數 static int N = 3; //推薦個數 static int[][] train; //訓練集合user item rate矩陣 static int[][] test;//測試集合user item rate矩陣 static double[][] trainuseritem; //訓練集合user item 興趣程度 矩陣 static int[][] recommend; //為每個使用者推薦N個物品 static simi [][]simiItem; //排序後的相似性矩陣 static double [][]itemsim; //未排序的相似性矩陣 static String road = "data/6 總評論情感分析結果/酒店-評論(已轉化).txt"; //資料路徑,格式為使用者編號::物品編號::評分 static String road2 = "data/10 推薦/(已轉化)天河酒店ID.txt"; static String road3 = "data/10 推薦/物品推薦.txt"; public static String road4 = "data/10 推薦/(已轉化)使用者ID.txt"; public static class simi { double value; //相似值 int num; //相似物品號 }; public static void _Run() throws IOException { get_user_hotel_num(); System.out.println("usersum: "+usersum); System.out.println("itemsum: "+itemsum); train = new int[itemsum][usersum]; train[0][0] = 0; //訓練集合user item rate矩陣 test = new int[itemsum][usersum]; test[0][0] = 0; //測試集合user item rate矩陣 trainuseritem = new double[usersum][itemsum]; trainuseritem[0][0] = 0.0; //訓練集合user item 興趣程度 矩陣 recommend = new int[usersum][N]; recommend[0][0] = 0; //為每個使用者推薦N個物品 simiItem = new simi[itemsum][itemsum]; //排序後的相似性矩陣 itemsim = new double[itemsum][itemsum]; //未排序的相似性矩陣 int i,j,k = 8; //去使用者的k個最近鄰居(相似度最高)來計算推薦物品 for(i = 0 ;i < itemsum;++i) for(j = 0 ;j < itemsum;++j) simiItem[i][j] = new simi(); System.out.println("1.訓練集"); SplitData(8,1); //輸出初始化的矩陣 /*for (i=0;i<10;i++) { System.out.println("Item"+i+": "); for (j=0;j<5;j++) { System.out.print(train[i][j]+" "); } System.out.println(); }*/ System.out.println("2.計算物品之間相似性,得到相似性矩陣"); for (i=0;i<itemsum;i++) { for (j=0;j<itemsum;j++) { itemsim[i][j] = Simility(train[i],train[j]); if(i == j) itemsim[i][j] = 0; //此處有bug,已修改 } } //輸出物品相似性矩陣 /*for (i=0;i<5;i++) { System.out.println("Item"+": "); for (j=0;j<100;j++) { System.out.print(itemsim[i][j]+" "); } System.out.println(); }*/ System.out.println("3.物品相似度由高到低排序"); sort(); //輸出排序後的物品相似性矩陣 /*for(i=0;i<5;i++) { System.out.println("Item"+i+": "); for(j=0;j<10;j++) { System.out.print(simiItem[i][j].num+","+simiItem[i][j].value+" "); } System.out.println(); }*/ System.out.println("4.得到使用者對物品興趣程度的矩陣"); for(i=0;i<usersum;i++) { for(j=0;j<itemsum;j++) { if(train[j][i]==0) //如果使用者i對物品j沒有過行為,才計算i對j的預測興趣程度 //trainuseritem[i][j]= getUserLikeItem(i,j,k); } } //輸出使用者對物品興趣的矩陣 /*for (i=0;i<5;i++) { System.out.println("User_ins"+i+": "); for (j=0;j<10;j++) { System.out.print(trainuseritem[i][j]+" "); } System.out.println(); }*/ System.out.println("5.通過物品興趣程度,推薦前N個"); getRecommend(); //輸出推薦矩陣 for (i=0;i<200;i++) { System.out.println("user"+(i+1)); for (j=0;j<N;j++) { if(recommend[i][j] != 0) System.out.print(recommend[i][j]+" "); } System.out.println(); } System.out.println("6.輸出到txt"); out_txt(road2,road3); } public static void get_user_hotel_num() throws IOException { FileReader data_about = new FileReader(road2); BufferedReader read_data_about=new BufferedReader(data_about); int num = 0; while(read_data_about.readLine() != null) num++; itemsum = num; data_about.close(); read_data_about.close(); FileReader data_about2 = new FileReader(road4); BufferedReader read_data_about2=new BufferedReader(data_about2); num = 0; while(read_data_about2.readLine() != null) num++; usersum = num; data_about2.close(); read_data_about2.close(); } public static void out_txt(String r1,String r2) throws IOException { FileReader data_about=new FileReader(r1); BufferedReader read_data_about=new BufferedReader(data_about); FileWriter fw=new FileWriter(r2); String id; //暫存檔案一行記錄 int id_num = 1; String []tmps = new String[5]; String []hotel = new String[201]; while((id=read_data_about.readLine())!=null){ tmps = id.split("::"); String hotelname = tmps[0]; String number = tmps[1]; hotel[Integer.parseInt(number)] = hotelname; } int i,j; for (i=0;i<usersum;i++) { fw.write("user"+(i+1)); for (j=0;j<N;j++) { if(recommend[i][j] != 0) fw.write("::"+hotel[recommend[i][j]]); } fw.write("\r\n"); } data_about.close(); read_data_about.close(); fw.close(); } //拆分資料集為測試集test和訓練集trainuser,其中1/m為測試集,取不同的k<=m-1值 在相同的隨即種子下可得到不同的測/訓集合 public static int SplitData(int m, int k) { int usernum = 0; int itemnum = 0; try { FileReader data_about=new FileReader(road); BufferedReader read_data_about=new BufferedReader(data_about); String s2; //暫存檔案一行記錄 try { while((s2=read_data_about.readLine())!=null){ //尋找資料集每條記錄對應的使用者號和物品號 int sum = 0,ok = 0; for(int m_ = 0;m_ < s2.length();++m_) { if(s2.charAt(m_) != ':') sum = sum * 10 + s2.charAt(m_) - 48; else { m_ += 1; if(ok == 0) {usernum = sum;ok = 1;} else { itemnum = sum; break; } sum = 0; } } if (usernum <= usersum && itemnum <= itemsum) { //if(System.currentTimeMillis()%(m-1)==k) //設定當前時間為隨機種子 //判斷隨機產生0-7之間的隨機數是否等於k // test[itemnum-1][usernum-1] = 1; //rate為評分,再此實驗中只需統計有無評分的,無需討論具體評分 //else train[itemnum-1][usernum-1] = 1; //使用者號的物品號均從0開始算起, } } } catch (IOException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } try { data_about.close(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } try { read_data_about.close(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } return 1; } //利用訓練集計算使用者之間相似度 /* 計算向量ItemA和ItemB的相似性,返回值為ItemA和ItemB的相似度 */ public static double Simility(int[] ItemA, int[] ItemB) { int comUser = 0; //ItemA與ItemB的都被使用者評論的使用者個數 double simility = 0.0; int countIa = 0; int countIb = 0; int i; for (i=0;i<usersum;i++) //此處有bug,已修改 { if (ItemA[i]>0&&ItemB[i]>0) { comUser++;//查詢ItemA與ItemB的都被使用者評論的使用者個數 } if (ItemA[i]>0){ countIa++;//評論ItemA的使用者數量 } if (ItemB[i]>0){ countIb++;//評論ItemB的使用者數量 } } double tem = Math.sqrt(countIa*countIb); //double tem = 1; //System.out.println(tem); if(tem == 0) { return 0; } else { simility = comUser/tem; return simility; } } /*物品相似性矩陣排序(根據相似性由高到低排序)*/ public static void quickSort(int x, int start, int end) { if (start < end) { double base = simiItem[x][start].value; // 選定的基準值(第一個數值作為基準值) double temp; // 記錄臨時中間值 int i_tmp; int i = start, j = end; do { while ((simiItem[x][i].value > base) && (i < end)) i++; while ((simiItem[x][j].value < base) && (j > start)) j--; if (i <= j) { temp = simiItem[x][i].value; simiItem[x][i].value = simiItem[x][j].value; simiItem[x][j].value = temp; i_tmp = simiItem[x][i].num; simiItem[x][i].num = simiItem[x][j].num; simiItem[x][j].num = i_tmp; i++; j--; } } while (i <= j); if (start < j) quickSort(x, start, j); if (end > i) quickSort(x, i, end); } } public static int sort() { for (int i=0;i<itemsum;i++) { for(int j = 0; j < itemsum; ++j) { simiItem[i][j].num = j; simiItem[i][j].value = itemsim[i][j]; } quickSort(i,0,itemsum-1); } return 1; } //得到使用者i對物品j預測興趣程度,用於推薦 public static double getUserLikeItem(int i,int j,int k) { for(int x=0;x<k;x++)//從物品j最相似的k個物品中,找出使用者i有過行為的物品 { //System.out.println(simiItem[j][x].num); if(train[simiItem[j][x].num][i]>0)//若這個使用者同樣對相似物品也有過行為 { trainuseritem[i][j]+=simiItem[j][x].value; } } return trainuseritem[i][j]; } /*通過物品興趣程度,推薦前N個*/ public static int getRecommend() //有bug,已修改 { int maxnum;//當前最感興趣物品號 for(int i=0;i<usersum;i++) { int []finflag = new int[itemsum]; for (int x=0;x<N;x++)//推薦N個 { maxnum = 0; while(maxnum < itemsum && finflag[maxnum]!=0) maxnum++; for (int j=0;j<itemsum;j++) //每迴圈一次就尋找此次感興趣最大的物品 { if (trainuseritem[i][maxnum] < trainuseritem[i][j]&&finflag[j]==0) maxnum = j; } finflag[maxnum] = 1; if(trainuseritem[i][maxnum] != 0) recommend[i][x]=maxnum+1;//recommend陣列從1開始使用 } } return 1; } }
package WjPack; import java.io.BufferedReader; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.Map; import java.util.Set; public class new_ojld_dis { /* * * 主函式~ * */ /*public static void main(String[] args) throws IOException { run(); }*/ /* * * 基於使用者的實現~ * */ static String road_main = "data"; static String road = road_main + "/6 總評論情感分析結果/酒店-評論(已轉化).txt";//資料路徑,格式為使用者編號::物品編號::評分 static String road2 = road_main + "/10 推薦/(已轉化)使用者-關注.txt"; //資料路徑,格式為使用者編號::關注編號 static String road3 = road_main + "/10 推薦/(已轉化)天河酒店ID.txt"; static String road4 = road_main + "/10 推薦/使用者推薦.txt"; static String road5 = road_main + "/10 推薦/(已轉化)使用者ID.txt"; static int usersum = 20836; //使用者數 static int itemsum = 200; //物品總數 static Map<String,HashMap<String,Integer>> score = new HashMap<String,HashMap<String,Integer>>(); static Set<String> userSet = new HashSet<String>(); static Set<String> filmSet = new HashSet<String>(); static FileWriter txtw; static String tjhotel = ""; static ArrayList<String> arr; static { arr = new ArrayList<String>(); try { score = get_score_from_road(); } catch (IOException e) { } } public static void run() throws IOException { txtw=new FileWriter(road4); get_hotelid(road3); for(int m = 0;m < 20836;++m) { tjhotel = ""; new_ojld_dis.outNearbyUserList(arr.get(m)); if(tjhotel.length() > 1) txtw.write("user"+arr.get(m)+tjhotel+"\r\n"); } txtw.close(); } public static void init() { try { get_user_hotel_num(); } catch (IOException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } System.out.println(usersum); System.out.println(itemsum); } public static void get_user_hotel_num() throws IOException { FileReader data_about = new FileReader(road3); BufferedReader read_data_about=new BufferedReader(data_about); int num = 0; while(read_data_about.readLine() != null) num++; itemsum = num; data_about.close(); read_data_about.close(); data_about = new FileReader(road5); read_data_about=new BufferedReader(data_about); num = 0; while(read_data_about.readLine() != null) num++; usersum = num; data_about.close(); read_data_about.close(); } public static Map<String,HashMap<String,Integer>> get_score_from_road() throws IOException { init(); String []tmps = new String[5]; FileReader data_about=new FileReader(road); BufferedReader read_data_about=new BufferedReader(data_about); String s2; //暫存檔案一行記錄 String usertmp = null; Map<String,HashMap<String,Integer>> score = new HashMap<String,HashMap<String,Integer>>(); HashMap<String,Integer> tempScore = new HashMap<String,Integer>(); while((s2=read_data_about.readLine())!=null){ //尋找資料集每條記錄對應的使用者號和物品號 tmps = s2.split("::"); String username = tmps[0]; String filmname = tmps[1]; Integer socrename = Integer.valueOf(tmps[2]); if(usertmp == null) {usertmp = username;arr.add(usertmp);} else if(!usertmp.equals(username)) { score.put(usertmp, tempScore); usertmp = username; arr.add(usertmp); tempScore = new HashMap<String,Integer>(); } tempScore.put(filmname, socrename); } score.put(usertmp, tempScore); arr.add(usertmp); return score; } public static void outNearbyUserList(String user) throws IOException { FileReader data_about=new FileReader(road2); BufferedReader read_data_about=new BufferedReader(data_about); Map<String,Double> scores = new HashMap<String,Double>(); String []tmps = new String[5]; HashMap<String,Integer> thing = new HashMap<String,Integer>(); String id; //暫存檔案一行記錄 int num = 0; while((id=read_data_about.readLine())!=null){ tmps = id.split("::"); String username = tmps[0]; String fansname = tmps[1]; if(username.equals(user)) { thing.put(fansname, 1); } } for (int m = 0;m < arr.size()-1;++m) { String tempUser = arr.get(m); if (tempUser.equals(user) || !thing.containsKey(tempUser)) { continue; } double score = getOSScore(user, tempUser); if(score >= 0) scores.put(tempUser, score); } data_about.close(); read_data_about.close(); } private static Double getOSScore(String user1, String user2) throws NumberFormatException, IOException { HashMap<String,Integer> user1Score = (HashMap<String,Integer>) score.get(user1); HashMap<String,Integer> user2Score = (HashMap<String,Integer>) score.get(user2); double totalscore = 100; ArrayList<String> hobby = new ArrayList<String>(); Iterator<String> it = user1Score.keySet().iterator(); while (it.hasNext()) { String film = (String) it.next(); int a1 = (Integer) user1Score.get(film); //System.out.println(film); if(user2Score.get(film) == null) continue; int b1 = (Integer) user2Score.get(film); int a = a1 * a1 - b1 * b1; //System.out.println(Math.abs(a)); totalscore = Math.sqrt(Math.abs(a)); } if(totalscore == 0) { int ok = 0; it = user2Score.keySet().iterator(); if(it != null) { while (it.hasNext()) { String film = (String) it.next(); if(user1Score.get(film) == null) { if(ok == 0) { ok = 1; } tjhotel+="::"+hotel[Integer.parseInt(film)]; } } } } return totalscore; } static String []hotel = new String[201]; public static void get_hotelid(String r1) throws IOException { FileReader data_about=new FileReader(r1); BufferedReader read_data_about=new BufferedReader(data_about); String id; //暫存檔案一行記錄 String []tmps = new String[5]; while((id=read_data_about.readLine())!=null){ tmps = id.split("::"); String hotelname = tmps[0]; String number = tmps[1]; hotel[Integer.parseInt(number)] = hotelname; } data_about.close(); read_data_about.close(); } }
課程設計寫的程式碼,可以用的,不過不寫註釋,但也不是很難看懂,先了解以下原理再看看程式碼就差不多了~
PS:因為抓到的使用者ID和酒店都是資料都是類似434132這麼長的編號,為了方便我用陣列存,事先我全部轉化為1開始的編號了。比如說有兩個4654654,32131321,那我就轉成1,2了,處理完推薦完再把1,2轉成4654654,32131321