Coursera《Introduction to Recommender Systems》Program Assignment3 使用者相似性計算
阿新 • • 發佈:2019-01-05
__author__ = 'LiFeiteng' # -*- coding: utf-8 -*- import numpy as np class UserUserRec: def __init__(self): self.U = 0 self.M = 0 self.user_dict = {} self.movie_dict = {} self.movie_title = {} self.user_ratings = np.matrix([]) def GetRatingData(self, ratings_file): for line in open(ratings_file): user, movie, rating = line.split(",") if not self.user_dict.has_key(user): self.user_dict[user] = self.U self.U += 1 if not self.movie_dict.has_key(movie): self.movie_dict[movie] = self.M self.M += 1 print self.U, self.M self.user_ratings = np.matrix(np.zeros([self.U, self.M])) for line in open("ratings.csv", "r"): user, movie, rating = line.split(",") self.user_ratings[self.user_dict[user], self.movie_dict[movie]] = np.double(rating) def GetMovieTitles(self, movie_titles_file): for line in open(movie_titles_file): movie, title = line.split(",") #delete '\n' self.movie_title[movie] = title[:-1] def CosineUserSim(self, user1, user2): # 我覺得這裡使用的公式是不對的 user_rat = self.user_ratings[user1,:].copy() u1 = user_rat - np.mean(user_rat[user_rat>0.0]) u1 = np.array(u1)*np.array(np.where(user_rat>0, 1, 0)) user_rat = self.user_ratings[user2,:].copy() u2 = user_rat - np.mean(user_rat[user_rat>0.0]) u2 = np.array(u2)*np.array(np.where(user_rat>0, 1, 0)) if (np.linalg.norm(u1[0,:])*np.linalg.norm(u2[0,:])) == 0: sim = 0.0 else: #問題出在這裡的norm上,norm會計算user1 user2 不共同評分的項 sim = np.dot(u1[0,:],u2[0,:])/(np.linalg.norm(u1[0,:])*np.linalg.norm(u2[0,:])) return np.double(sim) def MovieScore4User(self, user, movie): #以下省略 N 行 return score4movie # end of class UserUserRec #### PA3 user_user_rec = UserUserRec() user_user_rec.GetRatingData("ratings.csv") user_user_rec.GetMovieTitles("movie-titles.csv") user_user_rec.MovieScore4User('1024', '77') outfile = open("outfile.txt","w") for line in open("input.txt"): user, movie = line.split(":") movie = str(int(movie)) score = user_user_rec.MovieScore4User(user, movie) str1 = ",".join([user, movie, format(score,".4f"), user_user_rec.movie_title[movie]]) outfile.write(str1) outfile.close()