Skip to content

Commit 5b97f59

Browse files
Create mf.py
1 parent c3354aa commit 5b97f59

File tree

1 file changed

+138
-0
lines changed
  • Chapter_15 MatrixFactorization

1 file changed

+138
-0
lines changed
Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
# coding:UTF-8
2+
'''
3+
Date:20160928
4+
@author: zhaozhiyong
5+
'''
6+
import numpy as np
7+
8+
def load_data(path):
9+
'''导入数据
10+
input: path(string):用户商品矩阵存储的位置
11+
output: data(mat):用户商品矩阵
12+
'''
13+
f = open(path)
14+
data = []
15+
for line in f.readlines():
16+
arr = []
17+
lines = line.strip().split("\t")
18+
for x in lines:
19+
if x != "-":
20+
arr.append(float(x))
21+
else:
22+
arr.append(float(0))
23+
data.append(arr)
24+
f.close()
25+
return np.mat(data)
26+
27+
def gradAscent(dataMat, k, alpha, beta, maxCycles):
28+
'''利用梯度下降法对矩阵进行分解
29+
input: dataMat(mat):用户商品矩阵
30+
k(int):分解矩阵的参数
31+
alpha(float):学习率
32+
beta(float):正则化参数
33+
maxCycles(int):最大迭代次数
34+
output: p,q(mat):分解后的矩阵
35+
'''
36+
m, n = np.shape(dataMat)
37+
# 1、初始化p和q
38+
p = np.mat(np.random.random((m, k)))
39+
q = np.mat(np.random.random((k, n)))
40+
41+
# 2、开始训练
42+
for step in xrange(maxCycles):
43+
for i in xrange(m):
44+
for j in xrange(n):
45+
if dataMat[i, j] > 0:
46+
error = dataMat[i, j]
47+
for r in xrange(k):
48+
error = error - p[i, r] * q[r, j]
49+
for r in xrange(k):
50+
# 梯度上升
51+
p[i, r] = p[i, r] + alpha * (2 * error * q[r, j] - beta * p[i, r])
52+
q[r, j] = q[r, j] + alpha * (2 * error * p[i, r] - beta * q[r, j])
53+
54+
loss = 0.0
55+
for i in xrange(m):
56+
for j in xrange(n):
57+
if dataMat[i, j] > 0:
58+
error = 0.0
59+
for r in xrange(k):
60+
error = error + p[i, r] * q[r, j]
61+
# 3、计算损失函数
62+
loss = (dataMat[i, j] - error) * (dataMat[i, j] - error)
63+
for r in xrange(k):
64+
loss = loss + beta * (p[i, r] * p[i, r] + q[r, j] * q[r, j]) / 2
65+
66+
if loss < 0.001:
67+
break
68+
if step % 1000 == 0:
69+
print "\titer: ", step, " loss: ", loss
70+
return p, q
71+
72+
def save_file(file_name, source):
73+
'''保存结果
74+
input: file_name(string):需要保存的文件名
75+
source(mat):需要保存的文件
76+
'''
77+
f = open(file_name, "w")
78+
m, n = np.shape(source)
79+
for i in xrange(m):
80+
tmp = []
81+
for j in xrange(n):
82+
tmp.append(str(source[i, j]))
83+
f.write("\t".join(tmp) + "\n")
84+
f.close()
85+
86+
def prediction(dataMatrix, p, q, user):
87+
'''为用户user未互动的项打分
88+
input: dataMatrix(mat):原始用户商品矩阵
89+
p(mat):分解后的矩阵p
90+
q(mat):分解后的矩阵q
91+
user(int):用户的id
92+
output: predict(list):推荐列表
93+
'''
94+
n = np.shape(dataMatrix)[1]
95+
predict = {}
96+
for j in xrange(n):
97+
if dataMatrix[user, j] == 0:
98+
predict[j] = (p[user,] * q[:,j])[0,0]
99+
100+
# 按照打分从大到小排序
101+
return sorted(predict.items(), key=lambda d:d[1], reverse=True)
102+
103+
def top_k(predict, k):
104+
'''为用户推荐前k个商品
105+
input: predict(list):排好序的商品列表
106+
k(int):推荐的商品个数
107+
output: top_recom(list):top_k个商品
108+
'''
109+
top_recom = []
110+
len_result = len(predict)
111+
if k >= len_result:
112+
top_recom = predict
113+
else:
114+
for i in xrange(k):
115+
top_recom.append(predict[i])
116+
return top_recom
117+
118+
119+
120+
if __name__ == "__main__":
121+
# 1、导入用户商品矩阵
122+
print "----------- 1、load data -----------"
123+
dataMatrix = load_data("data.txt")
124+
# 2、利用梯度下降法对矩阵进行分解
125+
print "----------- 2、training -----------"
126+
p, q = gradAscent(dataMatrix, 5, 0.0002, 0.02, 5000)
127+
# 3、保存分解后的结果
128+
print "----------- 3、save decompose -----------"
129+
save_file("p", p)
130+
save_file("q", q)
131+
# 4、预测
132+
print "----------- 4、prediction -----------"
133+
predict = prediction(dataMatrix, p, q, 0)
134+
# 进行Top-K推荐
135+
print "----------- 5、top_k recommendation ------------"
136+
top_recom = top_k(predict, 2)
137+
print top_recom
138+
print p*q

0 commit comments

Comments
 (0)