Skip to content

Commit 99defd9

Browse files
committed
1
1 parent f9677fc commit 99defd9

File tree

3 files changed

+281
-278
lines changed

3 files changed

+281
-278
lines changed
Lines changed: 98 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -1,95 +1,98 @@
1-
# coding:UTF-8
2-
'''
3-
Date:20160805
4-
@author: zhaozhiyong
5-
'''
6-
import numpy as np
7-
8-
def load_data(inputfile):
9-
'''导入训练数据
10-
input: inputfile(string)训练样本的位置
11-
output: feature_data(mat)特征
12-
label_data(mat)标签
13-
k(int)类别的个数
14-
'''
15-
f = open(inputfile) # 打开文件
16-
feature_data = []
17-
label_data = []
18-
for line in f.readlines():
19-
feature_tmp = []
20-
feature_tmp.append(1) # 偏置项
21-
lines = line.strip().split("\t")
22-
for i in xrange(len(lines) - 1):
23-
feature_tmp.append(float(lines[i]))
24-
label_data.append(int(lines[-1]))
25-
26-
feature_data.append(feature_tmp)
27-
f.close() # 关闭文件
28-
return np.mat(feature_data), np.mat(label_data).T, len(set(label_data))
29-
30-
def cost(err, label_data):
31-
'''计算损失函数值
32-
input: err(mat):exp的值
33-
label_data(mat):标签的值
34-
output: sum_cost / m(float):损失函数的值
35-
'''
36-
m = np.shape(err)[0]
37-
sum_cost = 0.0
38-
for i in xrange(m):
39-
if err[i, label_data[i, 0]] / np.sum(err[i, :]) > 0:
40-
sum_cost -= np.log(err[i, label_data[i, 0]] / np.sum(err[i, :]))
41-
else:
42-
sum_cost -= 0
43-
return sum_cost / m
44-
45-
46-
def gradientAscent(feature_data, label_data, k, maxCycle, alpha):
47-
'''利用梯度下降法训练Softmax模型
48-
input: feature_data(mat):特征
49-
label_data(mat):标签
50-
k(int):类别的个数
51-
maxCycle(int):最大的迭代次数
52-
alpha(float):学习率
53-
output: weights(mat):权重
54-
'''
55-
m, n = np.shape(feature_data)
56-
weights = np.mat(np.ones((n, k))) # 权重的初始化
57-
i = 0
58-
while i <= maxCycle:
59-
err = np.exp(feature_data * weights)
60-
if i % 500 == 0:
61-
print "\t-----iter: ", i , ", cost: ", cost(err, label_data)
62-
rowsum = -err.sum(axis=1)
63-
rowsum = rowsum.repeat(k, axis=1)
64-
err = err / rowsum
65-
for x in range(m):
66-
err[x, label_data[x, 0]] += 1
67-
weights = weights + (alpha / m) * feature_data.T * err
68-
i += 1
69-
return weights
70-
71-
def save_model(file_name, weights):
72-
'''保存最终的模型
73-
input: file_name(string):保存的文件名
74-
weights(mat):softmax模型
75-
'''
76-
f_w = open(file_name, "w")
77-
m, n = np.shape(weights)
78-
for i in xrange(m):
79-
w_tmp = []
80-
for j in xrange(n):
81-
w_tmp.append(str(weights[i, j]))
82-
f_w.write("\t".join(w_tmp) + "\n")
83-
f_w.close()
84-
85-
if __name__ == "__main__":
86-
inputfile = "SoftInput.txt"
87-
# 1、导入训练数据
88-
print "---------- 1.load data ------------"
89-
feature, label, k = load_data(inputfile)
90-
# 2、训练Softmax模型
91-
print "---------- 2.training ------------"
92-
weights = gradientAscent(feature, label, k, 10000, 0.4)
93-
# 3、保存最终的模型
94-
print "---------- 3.save model ------------"
95-
save_model("weights", weights)
1+
# coding:UTF-8
2+
'''
3+
Date:20160805
4+
@author: zhaozhiyong
5+
'''
6+
import numpy as np
7+
8+
def load_data(inputfile):
9+
'''导入训练数据
10+
input: inputfile(string)训练样本的位置
11+
output: feature_data(mat)特征
12+
label_data(mat)标签
13+
k(int)类别的个数
14+
'''
15+
f = open(inputfile) # 打开文件
16+
feature_data = []
17+
label_data = []
18+
for line in f.readlines():
19+
feature_tmp = []
20+
feature_tmp.append(1) # 偏置项
21+
lines = line.strip().split("\t")
22+
for i in range(len(lines) - 1):
23+
feature_tmp.append(float(lines[i]))
24+
label_data.append(int(lines[-1]))
25+
26+
feature_data.append(feature_tmp)
27+
f.close() # 关闭文件
28+
return np.mat(feature_data), np.mat(label_data).T, len(set(label_data))
29+
30+
def cost(err, label_data):
31+
'''计算损失函数值
32+
input: err(mat):exp的值
33+
label_data(mat):标签的值
34+
output: sum_cost / m(float):损失函数的值
35+
'''
36+
m = np.shape(err)[0]
37+
sum_cost = 0.0
38+
for i in range(m):
39+
if err[i, label_data[i, 0]] / np.sum(err[i, :]) > 0:
40+
sum_cost -= np.log(err[i, label_data[i, 0]] / np.sum(err[i, :]))
41+
else:
42+
sum_cost -= 0
43+
return sum_cost / m
44+
45+
46+
def gradientAscent(feature_data, label_data, k, maxCycle, alpha):
47+
'''利用梯度下降法训练Softmax模型
48+
input: feature_data(mat):特征
49+
label_data(mat):标签
50+
k(int):类别的个数
51+
maxCycle(int):最大的迭代次数
52+
alpha(float):学习率
53+
output: weights(mat):权重
54+
'''
55+
m, n = np.shape(feature_data)
56+
weights = np.mat(np.ones((n, k))) # 权重的初始化
57+
i = 0
58+
while i <= maxCycle:
59+
err = np.exp(feature_data * weights) #得到一个shape(m,k)的mat
60+
61+
if i % 500 == 0:
62+
print ("\t-----iter: ", i , ", cost: ", cost(err, label_data))
63+
rowsum = -err.sum(axis=1) #而当加入axis=1以后就是将一个矩阵的每一行向量相加
64+
rowsum = rowsum.repeat(k, axis=1)
65+
# axis=0,沿着y轴复制,实际上增加了行数,axis=1,沿着x轴复制,实际上增加列数
66+
67+
err = err / rowsum
68+
for x in range(m):
69+
err[x, label_data[x, 0]] += 1 #得到的是标签的类型
70+
weights = weights + (alpha / m) * feature_data.T * err
71+
i += 1
72+
return weights
73+
74+
def save_model(file_name, weights):
75+
'''保存最终的模型
76+
input: file_name(string):保存的文件名
77+
weights(mat):softmax模型
78+
'''
79+
f_w = open(file_name, "w")
80+
m, n = np.shape(weights)
81+
for i in range(m):
82+
w_tmp = []
83+
for j in range(n):
84+
w_tmp.append(str(weights[i, j]))
85+
f_w.write("\t".join(w_tmp) + "\n")
86+
f_w.close()
87+
88+
if __name__ == "__main__":
89+
inputfile = "SoftInput.txt"
90+
# 1、导入训练数据
91+
print ("---------- 1.load data ------------")
92+
feature, label, k = load_data(inputfile)
93+
# 2、训练Softmax模型
94+
print ("---------- 2.training ------------")
95+
weights = gradientAscent(feature, label, k, 10000, 0.4)
96+
# 3、保存最终的模型
97+
print ("---------- 3.save model ------------")
98+
save_model("weights", weights)

0 commit comments

Comments
 (0)