1. 程式人生 > >Coursera《Introduction to Recommender Systems》Program Assignment3 使用者相似性計算

Coursera《Introduction to Recommender Systems》Program Assignment3 使用者相似性計算

__author__ = 'LiFeiteng'
# -*- coding: utf-8 -*-
import numpy as np

class   UserUserRec:
	def __init__(self):
		self.U = 0
		self.M = 0
		self.user_dict = {}
		self.movie_dict = {}
		self.movie_title = {}
		self.user_ratings = np.matrix([])


	def GetRatingData(self, ratings_file):
		for line in open(ratings_file):
			user, movie, rating = line.split(",")
			if not self.user_dict.has_key(user):
				self.user_dict[user] = self.U
				self.U += 1
			if not self.movie_dict.has_key(movie):
				self.movie_dict[movie] = self.M
				self.M += 1
		print self.U, self.M
		self.user_ratings = np.matrix(np.zeros([self.U, self.M]))
		for line in open("ratings.csv", "r"):
			user, movie, rating = line.split(",")
			self.user_ratings[self.user_dict[user], self.movie_dict[movie]] = np.double(rating)

	def GetMovieTitles(self, movie_titles_file):
		for line in open(movie_titles_file):
			movie, title = line.split(",")
			#delete '\n'
			self.movie_title[movie] = title[:-1]

	def CosineUserSim(self, user1, user2):
        # 我覺得這裡使用的公式是不對的
		user_rat = self.user_ratings[user1,:].copy()
		u1 = user_rat - np.mean(user_rat[user_rat>0.0])
		u1 = np.array(u1)*np.array(np.where(user_rat>0, 1, 0))

		user_rat = self.user_ratings[user2,:].copy()
		u2 = user_rat - np.mean(user_rat[user_rat>0.0])
		u2 = np.array(u2)*np.array(np.where(user_rat>0, 1, 0))

		if (np.linalg.norm(u1[0,:])*np.linalg.norm(u2[0,:])) == 0:
			sim = 0.0
		else: #問題出在這裡的norm上,norm會計算user1 user2 不共同評分的項
			sim = np.dot(u1[0,:],u2[0,:])/(np.linalg.norm(u1[0,:])*np.linalg.norm(u2[0,:]))
		return np.double(sim)

	def MovieScore4User(self, user, movie):
        #以下省略 N 行
		return score4movie
# end of class UserUserRec


#### PA3
user_user_rec = UserUserRec()
user_user_rec.GetRatingData("ratings.csv")
user_user_rec.GetMovieTitles("movie-titles.csv")

user_user_rec.MovieScore4User('1024', '77')

outfile = open("outfile.txt","w")
for line in open("input.txt"):
	user, movie = line.split(":")
	movie = str(int(movie))
	score = user_user_rec.MovieScore4User(user, movie)
	str1 = ",".join([user, movie, format(score,".4f"), user_user_rec.movie_title[movie]])
	outfile.write(str1)

outfile.close()