1. 程式人生 > 其它 >計算物品之間的相似度矩陣

計算物品之間的相似度矩陣

公式一:

在這裡插入圖片描述

  • 構建資料集
import pandas as pd
import numpy as np

user_score_dict = pd.DataFrame({
            "A": {"a": 3.0, "b": 4.0, "c": 0.0, "d": 3.5, "e": 0.0},
            "B": {"a": 4.0, "b": 0.0, "c": 4.5, "d"
: 0.0, "e": 3.5}, "C": {"a": 0.0, "b": 3.5, "c": 0.0, "d": 0.0, "e": 3.0}, "D": {"a": 0.0, "b": 4.0, "c": 0.0, "d": 3.5, "e": 3.0}, }) user_score_dict = user_score_dict.
T user_score_dict

在這裡插入圖片描述

  • 構建使用者倒排表、共現矩陣
user_score_dict = {
            "A": {"a": 3.0, "b": 4.0, "c": 0.0, "d": 3.5, "e": 0.0},
            "B": {"a": 4.0, "b": 0.0, "c": 4.5, "d": 0.0, "e": 3.5},
"C": {"a": 0.0, "b": 3.5, "c": 0.0, "d": 0.0, "e": 3.0}, "D": {"a": 0.0, "b": 4.0, "c": 0.0, "d": 3.5, "e": 3.0}, } def ItemSimilarity(): itemSim = dict() # 得到每個物品有多少使用者產生過行為 item_user_count = dict() # 同現矩陣 count = dict() for user, item in user_score_dict.items(): for i in item.keys(): item_user_count.setdefault(i, 0) if user_score_dict[user][i] > 0.0: item_user_count[i] += 1 for j in item.keys(): count.setdefault(i, {}).setdefault(j, 0) if ( user_score_dict[user][i] > 0.0 and user_score_dict[user][j] > 0.0 and i != j ): count[i][j] += 1 # 共現矩陣 -> 相似度矩陣 for i, related_items in count.items(): itemSim.setdefault(i, dict()) for j, cuv in related_items.items(): itemSim[i].setdefault(j, 0) itemSim[i][j] = cuv / item_user_count[i] return itemSim sim = ItemSimilarity() pd.DataFrame(sim).T

在這裡插入圖片描述

公式二:

在這裡插入圖片描述

  • 構建資料集
import pandas as pd
import numpy as np

user_score_dict = pd.DataFrame({
            "A": {"a": 3.0, "b": 4.0, "c": 0.0, "d": 3.5, "e": 0.0},
            "B": {"a": 4.0, "b": 0.0, "c": 4.5, "d": 0.0, "e": 3.5},
            "C": {"a": 0.0, "b": 3.5, "c": 0.0, "d": 0.0, "e": 3.0},
            "D": {"a": 0.0, "b": 4.0, "c": 0.0, "d": 3.5, "e": 3.0},
        })
user_score_dict = user_score_dict.T
user_score_dict
  • 構建使用者倒排表、共現矩陣
import math
user_score_dict = {
            "A": {"a": 3.0, "b": 4.0, "c": 0.0, "d": 3.5, "e": 0.0},
            "B": {"a": 4.0, "b": 0.0, "c": 4.5, "d": 0.0, "e": 3.5},
            "C": {"a": 0.0, "b": 3.5, "c": 0.0, "d": 0.0, "e": 3.0},
            "D": {"a": 0.0, "b": 4.0, "c": 0.0, "d": 3.5, "e": 3.0},
        }
def ItemSimilarity():
        itemSim = dict()
        # 得到每個物品有多少使用者產生過行為
        item_user_count = dict()
        # 同現矩陣
        count = dict()
        for user, item in user_score_dict.items():
            for i in item.keys():
                item_user_count.setdefault(i, 0)
                if user_score_dict[user][i] > 0.0:
                    item_user_count[i] += 1
                for j in item.keys():
                    count.setdefault(i, {}).setdefault(j, 0)
                    if (
                        user_score_dict[user][i] > 0.0
                        and user_score_dict[user][j] > 0.0
                        and i != j
                    ):
                        count[i][j] += 1
         # 共現矩陣 -> 相似度矩陣
        for i, related_items in count.items():
            itemSim.setdefault(i, dict())
            for j, cuv in related_items.items():
                itemSim[i].setdefault(j, 0)
                itemSim[i][j] = cuv / math.sqrt(item_user_count[i]*item_user_count[j])
        return itemSim
sim = ItemSimilarity()
pd.DataFrame(sim).T

在這裡插入圖片描述