計算物品之間的相似度矩陣
阿新 • • 發佈:2021-04-27
公式一:
- 構建資料集
import pandas as pd
import numpy as np
user_score_dict = pd.DataFrame({
"A": {"a": 3.0, "b": 4.0, "c": 0.0, "d": 3.5, "e": 0.0},
"B": {"a": 4.0, "b": 0.0, "c": 4.5, "d" : 0.0, "e": 3.5},
"C": {"a": 0.0, "b": 3.5, "c": 0.0, "d": 0.0, "e": 3.0},
"D": {"a": 0.0, "b": 4.0, "c": 0.0, "d": 3.5, "e": 3.0},
})
user_score_dict = user_score_dict. T
user_score_dict
- 構建使用者倒排表、共現矩陣
user_score_dict = {
"A": {"a": 3.0, "b": 4.0, "c": 0.0, "d": 3.5, "e": 0.0},
"B": {"a": 4.0, "b": 0.0, "c": 4.5, "d": 0.0, "e": 3.5},
"C": {"a": 0.0, "b": 3.5, "c": 0.0, "d": 0.0, "e": 3.0},
"D": {"a": 0.0, "b": 4.0, "c": 0.0, "d": 3.5, "e": 3.0},
}
def ItemSimilarity():
itemSim = dict()
# 得到每個物品有多少使用者產生過行為
item_user_count = dict()
# 同現矩陣
count = dict()
for user, item in user_score_dict.items():
for i in item.keys():
item_user_count.setdefault(i, 0)
if user_score_dict[user][i] > 0.0:
item_user_count[i] += 1
for j in item.keys():
count.setdefault(i, {}).setdefault(j, 0)
if (
user_score_dict[user][i] > 0.0
and user_score_dict[user][j] > 0.0
and i != j
):
count[i][j] += 1
# 共現矩陣 -> 相似度矩陣
for i, related_items in count.items():
itemSim.setdefault(i, dict())
for j, cuv in related_items.items():
itemSim[i].setdefault(j, 0)
itemSim[i][j] = cuv / item_user_count[i]
return itemSim
sim = ItemSimilarity()
pd.DataFrame(sim).T
公式二:
- 構建資料集
import pandas as pd
import numpy as np
user_score_dict = pd.DataFrame({
"A": {"a": 3.0, "b": 4.0, "c": 0.0, "d": 3.5, "e": 0.0},
"B": {"a": 4.0, "b": 0.0, "c": 4.5, "d": 0.0, "e": 3.5},
"C": {"a": 0.0, "b": 3.5, "c": 0.0, "d": 0.0, "e": 3.0},
"D": {"a": 0.0, "b": 4.0, "c": 0.0, "d": 3.5, "e": 3.0},
})
user_score_dict = user_score_dict.T
user_score_dict
- 構建使用者倒排表、共現矩陣
import math
user_score_dict = {
"A": {"a": 3.0, "b": 4.0, "c": 0.0, "d": 3.5, "e": 0.0},
"B": {"a": 4.0, "b": 0.0, "c": 4.5, "d": 0.0, "e": 3.5},
"C": {"a": 0.0, "b": 3.5, "c": 0.0, "d": 0.0, "e": 3.0},
"D": {"a": 0.0, "b": 4.0, "c": 0.0, "d": 3.5, "e": 3.0},
}
def ItemSimilarity():
itemSim = dict()
# 得到每個物品有多少使用者產生過行為
item_user_count = dict()
# 同現矩陣
count = dict()
for user, item in user_score_dict.items():
for i in item.keys():
item_user_count.setdefault(i, 0)
if user_score_dict[user][i] > 0.0:
item_user_count[i] += 1
for j in item.keys():
count.setdefault(i, {}).setdefault(j, 0)
if (
user_score_dict[user][i] > 0.0
and user_score_dict[user][j] > 0.0
and i != j
):
count[i][j] += 1
# 共現矩陣 -> 相似度矩陣
for i, related_items in count.items():
itemSim.setdefault(i, dict())
for j, cuv in related_items.items():
itemSim[i].setdefault(j, 0)
itemSim[i][j] = cuv / math.sqrt(item_user_count[i]*item_user_count[j])
return itemSim
sim = ItemSimilarity()
pd.DataFrame(sim).T