1. 程式人生 > >PersonalRank學習的一些記錄

PersonalRank學習的一些記錄

想學點推薦的東西,學了點PersonRank演算法,網上資料挺多,做了些實踐後些了下面的一些程式碼,只作為以後複習的資料:

不得不提到自己看過的一個部落格,挺好,至少我看懂了在幹嘛。

https://www.cnblogs.com/zhangchaoyang/articles/5470763.html

import numpy as np
import pymysql
class LoadDB():
    def __init__(self):
        pass
    def readFromDB(self):
        """
        從資料庫中獲使用者與物品之間的喜歡關係資料,處理成矩陣模型後,並匯出到本地CSV檔案中
        :return:
        """
        db = pymysql.connect("localhost", "root", "admin")
        cursor = db.cursor()
        cursor.execute("use cherishmall;")
        try:
            # 查詢所有的使用者
            cursor.execute("select user_id_lys from rate_lys group by user_id_lys")
            useridTmpTuple = cursor.fetchall()
            useridList = ['user_' + str(id[0]) for id in useridTmpTuple]
            # 查詢所有的商品
            cursor.execute("select goods_id_lys from rate_lys group by goods_id_lys")
            goodsidTmpTuple = cursor.fetchall()
            goodsidList = ['goods_' + str(id[0]) for id in goodsidTmpTuple]
            # 計算頂點個數
            PointList = useridList + goodsidList
            # 查詢某使用者喜歡的商品列表
            M = np.array(np.zeros([len(PointList), len(PointList)]))
            for i, userid in enumerate(useridTmpTuple):
                cursor.execute("select goods_id_lys from rate_lys where user_id_lys='%s'" % (userid[0]))
                likeGoodsList = cursor.fetchall()
                for j, likeGood in enumerate(likeGoodsList):
                    findIndex = PointList.index('goods_' + str(likeGood[0]))
                    M[i][findIndex] = 1
            tempM = M.T
            modelM = M + tempM
            # 處理完成模型後後,輸出到csv
            self.write2CSV(modelM, PointList)
            # 關閉資料庫連線
        except Exception as e:
            print("exception", e)
        db.close()

    def write2CSV(self,modelM,PointList):
        """
        將矩陣模型匯出到CSV檔案中
        :param modelM: 矩陣模型
        :param PointList: 行/列資料含義
        :return:
        """
        header=np.array(PointList)
        writeModel=np.vstack((header,modelM))
        try:
            np.savetxt('model.csv', writeModel, fmt='%s', delimiter=',')
        except Exception as e:
            print(e)
if __name__=="__main__":
    LoadDB().readFromDB()
import numpy as np
import json
import datetime
from numpy.linalg import solve
import pymysql
import pickle
import time
import csv
from scipy.sparse.linalg import gmres, lgmres
from scipy.sparse import csr_matrix
from functools import cmp_to_key
class LoadModel():
    def __init__(self,csvFilePath,alpha):
        """
        初始化
        """
        self.init(csvFilePath,alpha)
    def init(self,csvFilePath,alpha):
        """
        讀取模型檔案,獲得推薦必須的引數
        :return:
        """
        resultTuple=self.readFromCSV(csvFilePath)
        # 矩陣行/列資料的含義
        self.vertex = resultTuple[0]
        # 模型矩陣
        self.M=resultTuple[1]
        # 某個頂點往下游走的概率alpha,停留在本頂點的是1-alpha
        self.alpha=alpha

    def readFromCSV(self,csvFilePath):
        """
        從csv檔案中讀取模型資料
        :return:
        """
        with open(csvFilePath,'r',newline='') as f:
            csvReader=csv.reader(f)
            vertex=[]
            for index,row in enumerate(csvReader):
                vertex=row
                break
        modelM=np.loadtxt(open(csvFilePath,'rb'),dtype='float',delimiter=",",skiprows=1)
        return (vertex,self.doChangeM(modelM))

    def doChangeM(self, M):
        """
        將0/1模型歸一化後轉換為概率模型
        :param M: 0/1模型
        :return: 轉換後的概率模型
        """
        for index, _ in enumerate(M):
            row = np.reshape(_, (_.size,))
            rowSum = np.sum(row)
            M[index] = M[index] / rowSum
        return np.matrix(M)

    def predict(self, vertex, M, alpha):
        """
        預測推薦結果
        :param vertex: 行、列資料表達的意思
        :param M: 概率模型引數
        :param alpha: 每次遊走離開結點的概率d
        :return: (所有推薦結果,計算時間)
        """
        # 一次性計算出從任意節點開始遊走的PersonalRank結果。從總體上看,這種方法是最快的
        n = M.shape[0]
        A = np.eye(n) - alpha * M.T
        begin = time.time()
        D = A.I
        end = time.time()
        useTime=end=begin
        allDict={}
        for j in range(n):
            oneDict={}
            score = {}
            total = 0.0  # 用於歸一化
            for i in range(n):
                score[vertex[i]] = D[i, j]
                total += D[i, j]
            li = sorted(score.items(), key=cmp_to_key(lambda x, y: x[1] - y[1]), reverse=True)
            for ele in li:
                oneDict[ele[0]] = float('%.3f' % (ele[1] / total))
            allDict[vertex[j]]=oneDict
        return (allDict,useTime)

    def analysis(self,vertex,allDict,M,N):
        """
        分析最後推薦結果(過濾物品頂點出遊;過濾每個頂點出遊時與使用者的相關度;以及取出toN推薦記錄)
        :param vertex:  模型行列資料對應值得含義
        :param allDict: 所有頂點出遊的各個頂點的相關度結果字典
        :param M:       概率模型資料(暫未使用)
        :param N:       top N的N值
        :return:        返回最終推薦結果
        """
        tempM=np.around(M, decimals=4)
        vertex=self.vertex
        # print(vertex)
        # print(tempM)
        #         排除使用者頂點
        # print(allDict)
        resultDict={}
        for pointKdy, tmpdict in allDict.items():
            if 'user' in pointKdy:
                recomDict = {}
                for pointName,value in tmpdict.items():
                    if 'user' in pointName:
                        continue
                    recomDict[pointName]=value
                if recomDict:
                    resultDict[pointKdy]=recomDict
        # print(resultDict)
        #包括自己喜歡的商品頂點,因為與自己相關但是自己可能沒買過
        #最後取出top N作為推薦結果
        result={}
        for userkey, tmpdict in resultDict.items():
            sortList=sorted(tmpdict.items(), key=lambda x: x[1], reverse=True)
            # 不足N個會選取所有
            topNList=sortList[:N]
            result[userkey]=topNList
        # print(result)
        return result

    def writeRecomResult(self,recomResult,recommendedResultOBJPath):
        """
        使用pickle模組將python物件持久化到本地檔案中,同時也存一份到.txt中參考
        :param recomResult: 推薦結果的python物件
        :param recommendedResultOBJPath: 持久化檔案路徑
        :return:
        """
        with open(recommendedResultOBJPath,'wb') as p_file:
            pickle.dump(recomResult,p_file)
        with open(recommendedResultOBJPath+'.txt','w') as f:
            for userKey,recomList in recomResult.items():
                f.write(userKey+"::"+str(recomList)+"\n")
        self.readRecomResult(recommendedResultOBJPath)

    def readRecomResult(self,recommendedResultOBJPath):
        """
        使用pickle模組將資料從持久化檔案中讀取出來
        :param recommendedResultOBJPath: 持久化檔案路徑
        :return: 推薦結果的python物件
        """
        recomResult=None
        with open(recommendedResultOBJPath,'rb') as p_file:
            recomResult = pickle.load(p_file)
        return recomResult

    def upload2DB(self,recomResult):
        """
        上傳最後的推薦結果到資料庫的推薦表中
        :param recomResult: 最後的推薦結果
        :return:
        """
        db = pymysql.connect("localhost", "root", "admin")
        cursor = db.cursor()
        cursor.execute("use cherishmall;")
        try:
            for userKey,recomList in recomResult.items():
                tmpList=[]
                for goodsId,num in recomList:
                    tmpDict={"goodId":goodsId,"num":num}
                    tmpList.append(tmpDict)
                jsonStr=json.dumps(tmpList)
                nowTime=datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
                userid=int(userKey.split("_")[1])
                sql="insert into recommended_lys(user_id_lys,goods_id_lys,add_time_lys) values('%d','%s','%s')" %(userid,jsonStr,nowTime)
                cursor.execute(sql)
                db.commit()
        except Exception as e:
            print("Exception",e)
            db.rollback()
        db.close()

if __name__=="__main__":
    loadModel=LoadModel('model.csv',0.8)
    # allDict=loadModel.predict(loadModel.vertex,loadModel.M,loadModel.alpha)[0]
    # # print(allDict)
    # recomResult=loadModel.analysis(loadModel.vertex,allDict,loadModel.M,5)
    # loadModel.writeRecomResult(recomResult,'recommendedResultOBJ')
    recomResult=loadModel.readRecomResult("recommendedResultOBJ")
    print(recomResult)
    loadModel.upload2DB(recomResult)


資料庫

對應的二部圖:

注:A、B、C分別為user_1、user_2、user_3、user_4  ;    a、b、c、d分別為good_1、good_2、good_3、good_4