1. 程式人生 > >svd 做協同過濾

svd 做協同過濾

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Jun 20 21:02:58 2018

@author: luogan
"""

#coding=UTF-8
from numpy import *
from numpy import linalg as la

def loadExData():
    return[[0, 0, 0, 2, 2],
           [0, 0, 0, 3, 3],
           [0, 0, 0, 1, 1],
           [1, 1, 1, 0, 0],
           [2
, 2, 2, 0, 0], [5, 5, 5, 0, 0], [1, 1, 1, 0, 0]] def loadExData2(): return[[0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 5], [0, 0, 0, 3, 0, 4, 0, 0, 0, 0, 3], [0, 0, 0, 0, 4, 0, 0, 1, 0, 4, 0], [3, 3, 4, 0, 0, 0, 0, 2, 2, 0, 0], [5, 4, 5, 0, 0, 0, 0, 5, 5, 0, 0], [0, 0, 0, 0, 5, 0
, 1, 0, 0, 5, 0], [4, 3, 4, 0, 0, 0, 0, 5, 5, 0, 1], [0, 0, 0, 4, 0, 4, 0, 0, 0, 0, 4], [0, 0, 0, 2, 0, 2, 5, 0, 0, 1, 2], [0, 0, 0, 0, 5, 0, 0, 0, 0, 4, 0], [1, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0]] def ecludSim(inA,inB): return 1.0/(1.0 + la.norm(inA - inB)) def pearsSim(inA,inB): if
len(inA) < 3 : return 1.0 return 0.5+0.5*corrcoef(inA, inB, rowvar = 0)[0][1] def cosSim(inA,inB): num = float(inA.T*inB) denom = la.norm(inA)*la.norm(inB) return 0.5+0.5*(num/denom) def standEst(dataMat, user, simMeas, item): n = shape(dataMat)[1] simTotal = 0.0; ratSimTotal = 0.0 for j in range(n): userRating = dataMat[user,j] if userRating == 0: continue overLap = nonzero(logical_and(dataMat[:,item].A>0, \ dataMat[:,j].A>0))[0] if len(overLap) == 0: similarity = 0 else: similarity = simMeas(dataMat[overLap,item], \ dataMat[overLap,j]) print ('the %d and %d similarity is: %f' % (item, j, similarity)) simTotal += similarity ratSimTotal += similarity * userRating if simTotal == 0: return 0 else: return ratSimTotal/simTotal def svdEst(dataMat, user, simMeas, item): n = shape(dataMat)[1] simTotal = 0.0; ratSimTotal = 0.0 U,Sigma,VT = la.svd(dataMat) Sig4 = mat(eye(4)*Sigma[:4]) #arrange Sig4 into a diagonal matrix xformedItems = dataMat.T * U[:,:4] * Sig4.I #create transformed items Sig = mat(eye(n)*Sigma) #arrange Sig4 into a diagonal matrix #print Sig #print U * Sig * VT #back up source mat #print xformedItems #item feature begin compute item similer #print "user feature:" #xformedUsers = dataMat * VT[:,:4] * Sig4 #print xformedUsers #print xformedUsers * xformedItems.T #print dataMat for j in range(n): userRating = dataMat[user,j] if userRating == 0 or j==item: continue similarity = simMeas(xformedItems[item,:].T,\ xformedItems[j,:].T) print ('the %d and %d similarity is: %f' % (item, j, similarity)) simTotal += similarity ratSimTotal += similarity * userRating if simTotal == 0: return 0 else: return ratSimTotal/simTotal def recommend(dataMat, user, N=3, simMeas=cosSim, estMethod=standEst): #print 'type', dataMat[:,:4] #the number user line or col print (nonzero(dataMat[user,:].A==0)) # to array unratedItems=nonzero(dataMat[user,:].A==0)[1] print (unratedItems) #unratedItems = nonzero(dataMat[user,:].A==0)[1]#find unrated items if len(unratedItems) == 0: return 'you rated everything' itemScores = [] for item in unratedItems: estimatedScore = estMethod(dataMat, user, simMeas, item) itemScores.append((item, estimatedScore)) return sorted(itemScores, key=lambda jj: jj[1], reverse=True)[:N] def printMat(inMat, thresh=0.8): for i in range(32): for k in range(32): if float(inMat[i,k]) > thresh: print (1), else: print( 0), print ('') def imgCompress(numSV=3, thresh=0.8): myl = [] for line in open('0_5.txt').readlines(): newRow = [] for i in range(32): newRow.append(int(line[i])) myl.append(newRow) myMat = mat(myl) print ("****original matrix******") printMat(myMat, thresh) U,Sigma,VT = la.svd(myMat) SigRecon = mat(zeros((numSV, numSV))) for k in range(numSV):#construct diagonal matrix from vector SigRecon[k,k] = Sigma[k] reconMat = U[:,:numSV]*SigRecon*VT[:numSV,:] print ("****reconstructed matrix using %d singular values******" % numSV) printMat(reconMat, thresh) if __name__ == '__main__': print ("begin") myData=loadExData2() myMat=mat(myData) #myMat = mat(loadExData) recommend(myMat, 2, 3, cosSim, svdEst)
begin
(array([0, 0, 0, 0, 0, 0, 0, 0]), array([ 0,  1,  2,  3,  5,  6,  8, 10]))
[ 0  1  2  3  5  6  8 10]
the 0 and 4 similarity is: 0.487100
the 0 and 7 similarity is: 0.996341
the 0 and 9 similarity is: 0.490280
the 1 and 4 similarity is: 0.485583
the 1 and 7 similarity is: 0.995886
the 1 and 9 similarity is: 0.490272
the 2 and 4 similarity is: 0.485739
the 2 and 7 similarity is: 0.995963
the 2 and 9 similarity is: 0.490180
the 3 and 4 similarity is: 0.450495
the 3 and 7 similarity is: 0.482175
the 3 and 9 similarity is: 0.522379
the 5 and 4 similarity is: 0.506795
the 5 and 7 similarity is: 0.494716
the 5 and 9 similarity is: 0.496130
the 6 and 4 similarity is: 0.434401
the 6 and 7 similarity is: 0.479543
the 6 and 9 similarity is: 0.583833
the 8 and 4 similarity is: 0.490037
the 8 and 7 similarity is: 0.997067
the 8 and 9 similarity is: 0.490078
the 10 and 4 similarity is: 0.512896
the 10 and 7 similarity is: 0.524970
the 10 and 9 similarity is: 0.493617

原文連結