資料探勘之推薦分析--python實現

阿新 • • 發佈：2019-01-04

#
# FILTERINGDATA.py
#
# Code file for the book Programmer's Guide to Data Mining
# http://guidetodatamining.com
# Ron Zacharski
#
# -*- coding:utf8 -*
from math import sqrt

users = {"Angelica": {"Blues Traveler": 3.5, "Broken Bells": 2.0, "Norah Jones": 4.5, "Phoenix": 5.0, "Slightly Stoopid": 1.5, "The Strokes": 2.5, "Vampire Weekend": 2.0},
"Bill":{"Blues Traveler": 2.0, "Broken Bells": 3.5, "Deadmau5": 4.0, "Phoenix": 2.0, "Slightly Stoopid": 3.5, "Vampire Weekend": 3.0},
"Chan": {"Blues Traveler": 5.0, "Broken Bells": 1.0, "Deadmau5": 1.0, "Norah Jones": 3.0, "Phoenix": 5, "Slightly Stoopid": 1.0},
"Dan": {"Blues Traveler": 3.0, "Broken Bells": 4.0, "Deadmau5": 4.5, "Phoenix": 3.0, "Slightly Stoopid": 4.5, "The Strokes": 4.0, "Vampire Weekend": 2.0},
"Hailey": {"Broken Bells": 4.0, "Deadmau5": 1.0, "Norah Jones": 4.0, "The Strokes": 4.0, "Vampire Weekend": 1.0},
"Jordyn": {"Broken Bells": 4.5, "Deadmau5": 4.0, "Norah Jones": 5.0, "Phoenix": 5.0, "Slightly Stoopid": 4.5, "The Strokes": 4.0, "Vampire Weekend": 4.0},
"Sam": {"Blues Traveler": 5.0, "Broken Bells": 2.0, "Norah Jones": 3.0, "Phoenix": 5.0, "Slightly Stoopid": 4.0, "The Strokes": 5.0},
"Veronica": {"Blues Traveler": 3.0, "Norah Jones": 5.0, "Phoenix": 4.0, "Slightly Stoopid": 2.5, "The Strokes": 3.0}
}

def manhattan(rating1, rating2):
"""Computes the Manhattan distance. Both rating1 and rating2 are dictionaries
of the form {'The Strokes': 3.0, 'Slightly Stoopid': 2.5}"""
distance = 0
total = 0
for key in rating1:
if key in rating2:
distance += abs(rating1[key] - rating2[key])
total += 1
if total > 0:
return distance / total
else:
return -1 #Indicates no ratings in common

def pearson(rating1, rating2):
sum_xy = 0
sum_x = 0
sum_y = 0
sum_x2 = 0
sum_y2 = 0
n = 0
for key in rating1:
if key in rating2:
n += 1
x = rating1[key]
y = rating2[key]
sum_xy += x * y
sum_x += x
sum_y += y
sum_x2 += pow(x, 2)
sum_y2 += pow(y, 2)
# now compute denominator
denominator = sqrt(sum_x2 - pow(sum_x, 2) / n) * sqrt(sum_y2 - pow(sum_y, 2) / n)
if denominator == 0:
return 0
else:
return (sum_xy - (sum_x * sum_y) / n) / denominator

def cosSimilarity (rating1,rating2):
similarityDegree=0
innerProd_xy=0
vector_x2=0
vector_y2=0
for key in rating1 :
if key in rating2:
innerProd_xy+=rating1[key]*rating2[key]
vector_x2+=rating1[key]**2
vector_y2+=rating2[key]**2
if sqrt(vector_x2)*sqrt(vector_y2):
similarityDegree=innerProd_xy/(sqrt(vector_x2)*sqrt(vector_y2))
return similarityDegree
else:
return -1

def computeNearestNeighbor(username, users):
"""creates a sorted list of users based on their distance to username"""
distances = []
for user in users:
if user != username:
distance = cosSimilarity(users[user], users[username])
distances.append((distance, user))
# sort based on distance -- closest first
#此處排序是從小到大排列，用餘弦相似度，需取最後一個元素，明氏距離則相反
distances.sort()
print distances
return distances

def recommend(username, users):
"""Give list of recommendations"""
# first find nearest neighbor
nearest = computeNearestNeighbor(username, users)[-1][1]#取最後一個元素
print nearest
recommendations = []
# now find bands neighbor rated that user didn't
neighborRatings = users[nearest]
userRatings = users[username]
for artist in neighborRatings:
if not artist in userRatings:
recommendations.append((artist, neighborRatings[artist]))
# using the fn sorted for variety - sort is more efficient
return sorted(recommendations, key=lambda artistTuple: artistTuple[1], reverse = True)
print recommend('Veronica',users)

資料探勘之推薦分析--python實現

資料探勘之推薦分析--python實現

轉載：資料探勘之_SVD的python實現和分析

資料探勘之_SVD的python實現和分析pin

資料探勘之關聯分析一（基本概念）

大資料的的超級應用—資料探勘之推薦系統

資料探勘之關聯分析二（頻繁項集的產生）

R語言學習系列(資料探勘之決策樹演算法實現--ID3程式碼篇)

資料探勘之關聯分析五（序列模式）

資料探勘之曼哈頓距離、歐幾裡距離、明氏距離、皮爾遜相關係數、餘弦相似度Python實現程式碼

資料探勘之售房資料分析1

資料探勘之FP_Tree演算法實現

資料探勘之方差分析實驗

基於.NET實現資料探勘--聚類分析演算法

資料探勘之關聯規則挖掘之Apriori演算法實現

Python資料探勘實戰——相關分析

使用Orange進行資料探勘之聚類分析(2)------K-means

資料探勘之利用Python畫相關性矩陣圖

資料探勘之鳶尾花資料集分析

python資料探勘商品推薦演算法

HAWQ + MADlib 玩轉資料探勘之（六）——主成分分析與主成分投影

資料探勘之推薦分析--python實現

相關推薦