svd 做協同過濾
阿新 • • 發佈:2019-01-10
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Jun 20 21:02:58 2018
@author: luogan
"""
#coding=UTF-8
from numpy import *
from numpy import linalg as la
def loadExData():
return[[0, 0, 0, 2, 2],
[0, 0, 0, 3, 3],
[0, 0, 0, 1, 1],
[1, 1, 1, 0, 0],
[2 , 2, 2, 0, 0],
[5, 5, 5, 0, 0],
[1, 1, 1, 0, 0]]
def loadExData2():
return[[0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 5],
[0, 0, 0, 3, 0, 4, 0, 0, 0, 0, 3],
[0, 0, 0, 0, 4, 0, 0, 1, 0, 4, 0],
[3, 3, 4, 0, 0, 0, 0, 2, 2, 0, 0],
[5, 4, 5, 0, 0, 0, 0, 5, 5, 0, 0],
[0, 0, 0, 0, 5, 0 , 1, 0, 0, 5, 0],
[4, 3, 4, 0, 0, 0, 0, 5, 5, 0, 1],
[0, 0, 0, 4, 0, 4, 0, 0, 0, 0, 4],
[0, 0, 0, 2, 0, 2, 5, 0, 0, 1, 2],
[0, 0, 0, 0, 5, 0, 0, 0, 0, 4, 0],
[1, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0]]
def ecludSim(inA,inB):
return 1.0/(1.0 + la.norm(inA - inB))
def pearsSim(inA,inB):
if len(inA) < 3 : return 1.0
return 0.5+0.5*corrcoef(inA, inB, rowvar = 0)[0][1]
def cosSim(inA,inB):
num = float(inA.T*inB)
denom = la.norm(inA)*la.norm(inB)
return 0.5+0.5*(num/denom)
def standEst(dataMat, user, simMeas, item):
n = shape(dataMat)[1]
simTotal = 0.0; ratSimTotal = 0.0
for j in range(n):
userRating = dataMat[user,j]
if userRating == 0: continue
overLap = nonzero(logical_and(dataMat[:,item].A>0, \
dataMat[:,j].A>0))[0]
if len(overLap) == 0: similarity = 0
else: similarity = simMeas(dataMat[overLap,item], \
dataMat[overLap,j])
print ('the %d and %d similarity is: %f' % (item, j, similarity))
simTotal += similarity
ratSimTotal += similarity * userRating
if simTotal == 0: return 0
else: return ratSimTotal/simTotal
def svdEst(dataMat, user, simMeas, item):
n = shape(dataMat)[1]
simTotal = 0.0; ratSimTotal = 0.0
U,Sigma,VT = la.svd(dataMat)
Sig4 = mat(eye(4)*Sigma[:4]) #arrange Sig4 into a diagonal matrix
xformedItems = dataMat.T * U[:,:4] * Sig4.I #create transformed items
Sig = mat(eye(n)*Sigma) #arrange Sig4 into a diagonal matrix
#print Sig
#print U * Sig * VT #back up source mat
#print xformedItems #item feature begin compute item similer
#print "user feature:"
#xformedUsers = dataMat * VT[:,:4] * Sig4
#print xformedUsers
#print xformedUsers * xformedItems.T
#print dataMat
for j in range(n):
userRating = dataMat[user,j]
if userRating == 0 or j==item: continue
similarity = simMeas(xformedItems[item,:].T,\
xformedItems[j,:].T)
print ('the %d and %d similarity is: %f' % (item, j, similarity))
simTotal += similarity
ratSimTotal += similarity * userRating
if simTotal == 0: return 0
else: return ratSimTotal/simTotal
def recommend(dataMat, user, N=3, simMeas=cosSim, estMethod=standEst):
#print 'type', dataMat[:,:4] #the number user line or col
print (nonzero(dataMat[user,:].A==0)) # to array
unratedItems=nonzero(dataMat[user,:].A==0)[1]
print (unratedItems)
#unratedItems = nonzero(dataMat[user,:].A==0)[1]#find unrated items
if len(unratedItems) == 0: return 'you rated everything'
itemScores = []
for item in unratedItems:
estimatedScore = estMethod(dataMat, user, simMeas, item)
itemScores.append((item, estimatedScore))
return sorted(itemScores, key=lambda jj: jj[1], reverse=True)[:N]
def printMat(inMat, thresh=0.8):
for i in range(32):
for k in range(32):
if float(inMat[i,k]) > thresh:
print (1),
else: print( 0),
print ('')
def imgCompress(numSV=3, thresh=0.8):
myl = []
for line in open('0_5.txt').readlines():
newRow = []
for i in range(32):
newRow.append(int(line[i]))
myl.append(newRow)
myMat = mat(myl)
print ("****original matrix******")
printMat(myMat, thresh)
U,Sigma,VT = la.svd(myMat)
SigRecon = mat(zeros((numSV, numSV)))
for k in range(numSV):#construct diagonal matrix from vector
SigRecon[k,k] = Sigma[k]
reconMat = U[:,:numSV]*SigRecon*VT[:numSV,:]
print ("****reconstructed matrix using %d singular values******" % numSV)
printMat(reconMat, thresh)
if __name__ == '__main__':
print ("begin")
myData=loadExData2()
myMat=mat(myData)
#myMat = mat(loadExData)
recommend(myMat, 2, 3, cosSim, svdEst)
begin
(array([0, 0, 0, 0, 0, 0, 0, 0]), array([ 0, 1, 2, 3, 5, 6, 8, 10]))
[ 0 1 2 3 5 6 8 10]
the 0 and 4 similarity is: 0.487100
the 0 and 7 similarity is: 0.996341
the 0 and 9 similarity is: 0.490280
the 1 and 4 similarity is: 0.485583
the 1 and 7 similarity is: 0.995886
the 1 and 9 similarity is: 0.490272
the 2 and 4 similarity is: 0.485739
the 2 and 7 similarity is: 0.995963
the 2 and 9 similarity is: 0.490180
the 3 and 4 similarity is: 0.450495
the 3 and 7 similarity is: 0.482175
the 3 and 9 similarity is: 0.522379
the 5 and 4 similarity is: 0.506795
the 5 and 7 similarity is: 0.494716
the 5 and 9 similarity is: 0.496130
the 6 and 4 similarity is: 0.434401
the 6 and 7 similarity is: 0.479543
the 6 and 9 similarity is: 0.583833
the 8 and 4 similarity is: 0.490037
the 8 and 7 similarity is: 0.997067
the 8 and 9 similarity is: 0.490078
the 10 and 4 similarity is: 0.512896
the 10 and 7 similarity is: 0.524970
the 10 and 9 similarity is: 0.493617