Skip to content

Commit fc00121

Browse files
Create user_based_recommend.py
1 parent 3715eb7 commit fc00121

File tree

1 file changed

+116
-0
lines changed

1 file changed

+116
-0
lines changed
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
# coding:UTF-8
2+
'''
3+
Date:20160928
4+
@author: zhaozhiyong
5+
'''
6+
7+
import numpy as np
8+
9+
def load_data(file_path):
10+
'''导入用户商品数据
11+
input: file_path(string):用户商品数据存放的文件
12+
output: data(mat):用户商品矩阵
13+
'''
14+
f = open(file_path)
15+
data = []
16+
for line in f.readlines():
17+
lines = line.strip().split("\t")
18+
tmp = []
19+
for x in lines:
20+
if x != "-":
21+
tmp.append(float(x)) # 直接存储用户对商品的打分
22+
else:
23+
tmp.append(0)
24+
data.append(tmp)
25+
f.close()
26+
27+
return np.mat(data)
28+
29+
def cos_sim(x, y):
30+
'''余弦相似性
31+
input: x(mat):以行向量的形式存储,可以是用户或者商品
32+
y(mat):以行向量的形式存储,可以是用户或者商品
33+
output: x和y之间的余弦相似度
34+
'''
35+
numerator = x * y.T # x和y之间的额内积
36+
denominator = np.sqrt(x * x.T) * np.sqrt(y * y.T)
37+
return (numerator / denominator)[0, 0]
38+
39+
40+
def similarity(data):
41+
'''计算矩阵中任意两行之间的相似度
42+
input: data(mat):任意矩阵
43+
output: w(mat):任意两行之间的相似度
44+
'''
45+
m = np.shape(data)[0] # 用户的数量
46+
# 初始化相似度矩阵
47+
w = np.mat(np.zeros((m, m)))
48+
49+
for i in xrange(m):
50+
for j in xrange(i, m):
51+
if j != i:
52+
# 计算任意两行之间的相似度
53+
w[i, j] = cos_sim(data[i, ], data[j, ])
54+
w[j, i] = w[i, j]
55+
else:
56+
w[i, j] = 0
57+
return w
58+
59+
def user_based_recommend(data, w, user):
60+
'''基于用户相似性为用户user推荐商品
61+
input: data(mat):用户商品矩阵
62+
w(mat):用户之间的相似度
63+
user(int):用户的编号
64+
output: predict(list):推荐列表
65+
'''
66+
m, n = np.shape(data)
67+
interaction = data[user, ] # 用户user与商品信息
68+
69+
# 1、找到用户user没有互动过的商品
70+
not_inter = []
71+
for i in xrange(n):
72+
if interaction[0, i] == 0: # 没有互动的商品
73+
not_inter.append(i)
74+
75+
# 2、对没有互动过的商品进行预测
76+
predict = {}
77+
for x in not_inter:
78+
item = np.copy(data[:, x]) # 找到所有用户对商品x的互动信息
79+
for i in xrange(m): # 对每一个用户
80+
if item[i, 0] != 0: # 若该用户对商品x有过互动
81+
if x not in predict:
82+
predict[x] = w[user, i] * item[i, 0]
83+
else:
84+
predict[x] = predict[x] + w[user, i] * item[i, 0]
85+
# 3、按照预测的大小从大到小排序
86+
return sorted(predict.items(), key=lambda d:d[1], reverse=True)
87+
88+
def top_k(predict, k):
89+
'''为用户推荐前k个商品
90+
input: predict(list):排好序的商品列表
91+
k(int):推荐的商品个数
92+
output: top_recom(list):top_k个商品
93+
'''
94+
top_recom = []
95+
len_result = len(predict)
96+
if k >= len_result:
97+
top_recom = predict
98+
else:
99+
for i in xrange(k):
100+
top_recom.append(predict[i])
101+
return top_recom
102+
103+
if __name__ == "__main__":
104+
# 1、导入用户商品数据
105+
print "------------ 1. load data ------------"
106+
data = load_data("data.txt")
107+
# 2、计算用户之间的相似性
108+
print "------------ 2. calculate similarity between users -------------"
109+
w = similarity(data)
110+
# 3、利用用户之间的相似性进行推荐
111+
print "------------ 3. predict ------------"
112+
predict = user_based_recommend(data, w, 0)
113+
# 4、进行Top-K推荐
114+
print "------------ 4. top_k recommendation ------------"
115+
top_recom = top_k(predict, 2)
116+
print top_recom

0 commit comments

Comments
 (0)