Skip to content

Commit 7c5d626

Browse files
Create lb.py
1 parent d124f00 commit 7c5d626

File tree

1 file changed

+117
-0
lines changed
  • Chapter_13 LabelPropagation

1 file changed

+117
-0
lines changed

Chapter_13 LabelPropagation/lb.py

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
# coding:UTF-8
2+
'''
3+
Date:20160805
4+
@author: zhaozhiyong
5+
'''
6+
import string
7+
8+
def loadData(filePath):
9+
'''
10+
input: filePath(string)文件的存储位置
11+
output: vector_dict(dict)节点:社区
12+
edge_dict(dict)存储节点之间的边和权重
13+
'''
14+
f = open(filePath)
15+
vector_dict = {} # 存储节点
16+
edge_dict = {} # 存储边
17+
for line in f.readlines():
18+
lines = line.strip().split("\t")
19+
20+
for i in xrange(2):
21+
if lines[i] not in vector_dict: # 节点已存储
22+
# 将节点放入到vector_dict中,设置所属社区为其自身
23+
vector_dict[lines[i]] = string.atoi(lines[i])
24+
# 将边放入到edge_dict
25+
edge_list = []
26+
if len(lines) == 3:
27+
edge_list.append(lines[1 - i] + ":" + lines[2])
28+
else:
29+
edge_list.append(lines[1 - i] + ":" + "1")
30+
edge_dict[lines[i]] = edge_list
31+
else: # 节点未存储
32+
edge_list = edge_dict[lines[i]]
33+
if len(lines) == 3:
34+
edge_list.append(lines[1 - i] + ":" + lines[2])
35+
else:
36+
edge_list.append(lines[1 - i] + ":" + "1")
37+
edge_dict[lines[i]] = edge_list
38+
f.close()
39+
return vector_dict, edge_dict
40+
41+
def get_max_community_label(vector_dict, adjacency_node_list):
42+
'''得到相邻接的节点中标签数最多的标签
43+
input: vector_dict(dict)节点:社区
44+
adjacency_node_list(list)节点的邻接节点
45+
output: 节点所属的社区
46+
'''
47+
label_dict = {}
48+
for node in adjacency_node_list:
49+
node_id_weight = node.strip().split(":")
50+
node_id = node_id_weight[0]#邻接节点
51+
node_weight = string.atoi(node_id_weight[1])#与邻接节点之间的权重
52+
if vector_dict[node_id] not in label_dict:
53+
label_dict[vector_dict[node_id]] = node_weight
54+
else:
55+
label_dict[vector_dict[node_id]] += node_weight
56+
57+
# 找到最大的标签
58+
sort_list = sorted(label_dict.items(), key=lambda d: d[1], reverse=True)
59+
return sort_list[0][0]
60+
61+
def check(vector_dict, edge_dict):
62+
'''检查是否满足终止条件
63+
input: vector_dict(dict)节点:社区
64+
edge_dict(dict)存储节点之间的边和权重
65+
output: 是否需要更新
66+
'''
67+
for node in vector_dict.keys():
68+
adjacency_node_list = edge_dict[node] # 与节点node相连接的节点
69+
node_label = vector_dict[node] # 节点node所属社区
70+
label = get_max_community_label(vector_dict, adjacency_node_list)
71+
if node_label == label: # 对每个节点,其所属的社区标签是最大的
72+
continue
73+
else:
74+
return 0
75+
return 1
76+
77+
def label_propagation(vector_dict, edge_dict):
78+
'''标签传播
79+
input: vector_dict(dict)节点:社区
80+
edge_dict(dict)存储节点之间的边和权重
81+
output: vector_dict(dict)节点:社区
82+
'''
83+
# 初始化,设置每个节点属于不同的社区
84+
t = 0
85+
# 以随机的次序处理每个节点
86+
while True:
87+
if (check(vector_dict, edge_dict) == 0):
88+
t = t + 1
89+
print "iteration: ", t
90+
# 对每一个node进行更新
91+
for node in vector_dict.keys():
92+
adjacency_node_list = edge_dict[node] # 获取节点node的邻接节点
93+
vector_dict[node] = get_max_community_label(vector_dict, adjacency_node_list)
94+
print vector_dict
95+
else:
96+
break
97+
return vector_dict
98+
99+
def save_result(file_name, vec_new):
100+
f_result = open(file_name, "w")
101+
for key in vec_new.keys():
102+
f_result.write(str(key) + "\t" + str(vec_new[key]) + "\n")
103+
f_result.close()
104+
105+
106+
if __name__ == "__main__":
107+
# 1、导入数据
108+
print "----------1.load data ------------"
109+
vector_dict, edge_dict = loadData("cd_data.txt")
110+
print "original community: \n", vector_dict
111+
# 2、利用label propagation算法进行社区划分
112+
print "----------2.label propagation ------------"
113+
vec_new = label_propagation(vector_dict, edge_dict)
114+
# 3、保存最终的社区划分的结果
115+
print "----------3.save result ------------"
116+
save_result("result1", vec_new)
117+
print "final_result:", vec_new

0 commit comments

Comments
 (0)