|
| 1 | +# coding=utf-8 |
| 2 | +''' |
| 3 | +Date:20160930 |
| 4 | +@author: zhaozhiyong |
| 5 | +''' |
| 6 | + |
| 7 | +import numpy as np |
| 8 | + |
| 9 | +def load_data(file_path): |
| 10 | + '''导入用户商品数据 |
| 11 | + input: file_path(string):用户商品数据存储的文件 |
| 12 | + output: data(mat):用户商品矩阵 |
| 13 | + ''' |
| 14 | + f = open(file_path) |
| 15 | + data = [] |
| 16 | + for line in f.readlines(): |
| 17 | + lines = line.strip().split("\t") |
| 18 | + tmp = [] |
| 19 | + for x in lines: |
| 20 | + if x != "-": |
| 21 | + tmp.append(1) # 打过分记为1 |
| 22 | + else: |
| 23 | + tmp.append(0) # 未打分记为0 |
| 24 | + data.append(tmp) |
| 25 | + f.close() |
| 26 | + return np.mat(data) |
| 27 | + |
| 28 | +def generate_dict(dataTmp): |
| 29 | + '''将用户-商品矩阵转换成二部图的表示 |
| 30 | + input: dataTmp(mat):用户商品矩阵 |
| 31 | + output: data_dict(dict):图的表示 |
| 32 | + ''' |
| 33 | + m, n = np.shape(dataTmp) |
| 34 | + |
| 35 | + data_dict = {} |
| 36 | + # 对每一个用户生成节点 |
| 37 | + for i in xrange(m): |
| 38 | + tmp_dict = {} |
| 39 | + for j in xrange(n): |
| 40 | + if dataTmp[i, j] != 0: |
| 41 | + tmp_dict["D_" + str(j)] = dataTmp[i, j] |
| 42 | + data_dict["U_" + str(i)] = tmp_dict |
| 43 | + |
| 44 | + # 对每一个商品生成节点 |
| 45 | + for j in xrange(n): |
| 46 | + tmp_dict = {} |
| 47 | + for i in xrange(m): |
| 48 | + if dataTmp[i, j] != 0: |
| 49 | + tmp_dict["U_" + str(i)] = dataTmp[i, j] |
| 50 | + data_dict["D_" + str(j)] = tmp_dict |
| 51 | + return data_dict |
| 52 | + |
| 53 | + |
| 54 | +def PersonalRank(data_dict, alpha, user, maxCycles): |
| 55 | + '''利用PersonalRank打分 |
| 56 | + input: data_dict(dict):用户-商品的二部图表示 |
| 57 | + alpha(float):概率 |
| 58 | + user(string):指定用户 |
| 59 | + maxCycles(int):最大的迭代次数 |
| 60 | + output: rank(dict):打分的列表 |
| 61 | + ''' |
| 62 | + # 1、初始化打分 |
| 63 | + rank = {} |
| 64 | + for x in data_dict.keys(): |
| 65 | + rank[x] = 0 |
| 66 | + rank[user] = 1 # 从user开始游走 |
| 67 | + |
| 68 | + # 2、迭代 |
| 69 | + step = 0 |
| 70 | + while step < maxCycles: |
| 71 | + tmp = {} |
| 72 | + for x in data_dict.keys(): |
| 73 | + tmp[x] = 0 |
| 74 | + |
| 75 | + for i, ri in data_dict.items(): |
| 76 | + for j in ri.keys(): |
| 77 | + if j not in tmp: |
| 78 | + tmp[j] = 0 |
| 79 | + tmp[j] += alpha * rank[i] / (1.0 * len(ri)) |
| 80 | + if j == user: |
| 81 | + tmp[j] += (1 - alpha) |
| 82 | + # 判断是否收敛 |
| 83 | + check = [] |
| 84 | + for k in tmp.keys(): |
| 85 | + check.append(tmp[k] - rank[k]) |
| 86 | + if sum(check) <= 0.0001: |
| 87 | + break |
| 88 | + rank = tmp |
| 89 | + if step % 20 == 0: |
| 90 | + print "iter: ", step |
| 91 | + step = step + 1 |
| 92 | + return rank |
| 93 | + |
| 94 | +def recommend(data_dict, rank, user): |
| 95 | + '''得到最终的推荐列表 |
| 96 | + input: data_dict(dict):用户-商品的二部图表示 |
| 97 | + rank(dict):打分的结果 |
| 98 | + user(string):用户 |
| 99 | + output: result(dict):推荐结果 |
| 100 | + ''' |
| 101 | + items_dict = {} |
| 102 | + # 1、用户user已打过分的项 |
| 103 | + items = [] |
| 104 | + for k in data_dict[user].keys(): |
| 105 | + items.append(k) |
| 106 | + |
| 107 | + # 2、从rank取出商品的打分 |
| 108 | + for k in rank.keys(): |
| 109 | + if k.startswith("D_"): # 商品 |
| 110 | + if k not in items: # 排除已经互动过的商品 |
| 111 | + items_dict[k] = rank[k] |
| 112 | + |
| 113 | + # 3、按打分的降序排序 |
| 114 | + result = sorted(items_dict.items(), key=lambda d: d[1], reverse=True) |
| 115 | + return result |
| 116 | + |
| 117 | +if __name__ == "__main__": |
| 118 | + # 1、导入用户商品矩阵 |
| 119 | + print "------------ 1.load data -------------" |
| 120 | + dataMat = load_data("data.txt") |
| 121 | + # 2、将用户商品矩阵转换成邻接表的存储 |
| 122 | + print "------------ 2.generate dict --------------" |
| 123 | + data_dict = generate_dict(dataMat) |
| 124 | + # 3、利用PersonalRank计算 |
| 125 | + print "------------ 3.PersonalRank --------------" |
| 126 | + rank = PersonalRank(data_dict, 0.85, "U_0", 500) |
| 127 | + # 4、根据rank结果进行商品推荐 |
| 128 | + print "------------ 4.recommend -------------" |
| 129 | + result = recommend(data_dict, rank, "U_0") |
| 130 | + print result |
0 commit comments