Skip to content

Commit 0e29d71

Browse files
Create personal_rank.py
1 parent 58fd405 commit 0e29d71

File tree

1 file changed

+130
-0
lines changed

1 file changed

+130
-0
lines changed
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
# coding=utf-8
2+
'''
3+
Date:20160930
4+
@author: zhaozhiyong
5+
'''
6+
7+
import numpy as np
8+
9+
def load_data(file_path):
10+
'''导入用户商品数据
11+
input: file_path(string):用户商品数据存储的文件
12+
output: data(mat):用户商品矩阵
13+
'''
14+
f = open(file_path)
15+
data = []
16+
for line in f.readlines():
17+
lines = line.strip().split("\t")
18+
tmp = []
19+
for x in lines:
20+
if x != "-":
21+
tmp.append(1) # 打过分记为1
22+
else:
23+
tmp.append(0) # 未打分记为0
24+
data.append(tmp)
25+
f.close()
26+
return np.mat(data)
27+
28+
def generate_dict(dataTmp):
29+
'''将用户-商品矩阵转换成二部图的表示
30+
input: dataTmp(mat):用户商品矩阵
31+
output: data_dict(dict):图的表示
32+
'''
33+
m, n = np.shape(dataTmp)
34+
35+
data_dict = {}
36+
# 对每一个用户生成节点
37+
for i in xrange(m):
38+
tmp_dict = {}
39+
for j in xrange(n):
40+
if dataTmp[i, j] != 0:
41+
tmp_dict["D_" + str(j)] = dataTmp[i, j]
42+
data_dict["U_" + str(i)] = tmp_dict
43+
44+
# 对每一个商品生成节点
45+
for j in xrange(n):
46+
tmp_dict = {}
47+
for i in xrange(m):
48+
if dataTmp[i, j] != 0:
49+
tmp_dict["U_" + str(i)] = dataTmp[i, j]
50+
data_dict["D_" + str(j)] = tmp_dict
51+
return data_dict
52+
53+
54+
def PersonalRank(data_dict, alpha, user, maxCycles):
55+
'''利用PersonalRank打分
56+
input: data_dict(dict):用户-商品的二部图表示
57+
alpha(float):概率
58+
user(string):指定用户
59+
maxCycles(int):最大的迭代次数
60+
output: rank(dict):打分的列表
61+
'''
62+
# 1、初始化打分
63+
rank = {}
64+
for x in data_dict.keys():
65+
rank[x] = 0
66+
rank[user] = 1 # 从user开始游走
67+
68+
# 2、迭代
69+
step = 0
70+
while step < maxCycles:
71+
tmp = {}
72+
for x in data_dict.keys():
73+
tmp[x] = 0
74+
75+
for i, ri in data_dict.items():
76+
for j in ri.keys():
77+
if j not in tmp:
78+
tmp[j] = 0
79+
tmp[j] += alpha * rank[i] / (1.0 * len(ri))
80+
if j == user:
81+
tmp[j] += (1 - alpha)
82+
# 判断是否收敛
83+
check = []
84+
for k in tmp.keys():
85+
check.append(tmp[k] - rank[k])
86+
if sum(check) <= 0.0001:
87+
break
88+
rank = tmp
89+
if step % 20 == 0:
90+
print "iter: ", step
91+
step = step + 1
92+
return rank
93+
94+
def recommend(data_dict, rank, user):
95+
'''得到最终的推荐列表
96+
input: data_dict(dict):用户-商品的二部图表示
97+
rank(dict):打分的结果
98+
user(string):用户
99+
output: result(dict):推荐结果
100+
'''
101+
items_dict = {}
102+
# 1、用户user已打过分的项
103+
items = []
104+
for k in data_dict[user].keys():
105+
items.append(k)
106+
107+
# 2、从rank取出商品的打分
108+
for k in rank.keys():
109+
if k.startswith("D_"): # 商品
110+
if k not in items: # 排除已经互动过的商品
111+
items_dict[k] = rank[k]
112+
113+
# 3、按打分的降序排序
114+
result = sorted(items_dict.items(), key=lambda d: d[1], reverse=True)
115+
return result
116+
117+
if __name__ == "__main__":
118+
# 1、导入用户商品矩阵
119+
print "------------ 1.load data -------------"
120+
dataMat = load_data("data.txt")
121+
# 2、将用户商品矩阵转换成邻接表的存储
122+
print "------------ 2.generate dict --------------"
123+
data_dict = generate_dict(dataMat)
124+
# 3、利用PersonalRank计算
125+
print "------------ 3.PersonalRank --------------"
126+
rank = PersonalRank(data_dict, 0.85, "U_0", 500)
127+
# 4、根据rank结果进行商品推荐
128+
print "------------ 4.recommend -------------"
129+
result = recommend(data_dict, rank, "U_0")
130+
print result

0 commit comments

Comments
 (0)