Skip to content

Commit f9677fc

Browse files
committed
1
1 parent 3745519 commit f9677fc

File tree

5 files changed

+388
-389
lines changed

5 files changed

+388
-389
lines changed
Lines changed: 99 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -1,99 +1,99 @@
1-
# coding:UTF-8
2-
'''
3-
Date:20160901
4-
@author: zhaozhiyong
5-
'''
6-
import numpy as np
7-
8-
def load_data(file_name):
9-
'''导入训练数据
10-
input: file_name(string)训练数据的位置
11-
output: feature_data(mat)特征
12-
label_data(mat)标签
13-
'''
14-
f = open(file_name) # 打开文件
15-
feature_data = []
16-
label_data = []
17-
for line in f.readlines():
18-
feature_tmp = []
19-
lable_tmp = []
20-
lines = line.strip().split("\t")
21-
feature_tmp.append(1) # 偏置项
22-
for i in xrange(len(lines) - 1):
23-
feature_tmp.append(float(lines[i]))
24-
lable_tmp.append(float(lines[-1]))
25-
26-
feature_data.append(feature_tmp)
27-
label_data.append(lable_tmp)
28-
f.close() # 关闭文件
29-
return np.mat(feature_data), np.mat(label_data)
30-
31-
def sig(x):
32-
'''Sigmoid函数
33-
input: x(mat):feature * w
34-
output: sigmoid(x)(mat):Sigmoid值
35-
'''
36-
return 1.0 / (1 + np.exp(-x))
37-
38-
def lr_train_bgd(feature, label, maxCycle, alpha):
39-
'''利用梯度下降法训练LR模型
40-
input: feature(mat)特征
41-
label(mat)标签
42-
maxCycle(int)最大迭代次数
43-
alpha(float)学习率
44-
output: w(mat):权重
45-
'''
46-
n = np.shape(feature)[1] # 特征个数
47-
w = np.mat(np.ones((n, 1))) # 初始化权重
48-
i = 0
49-
while i <= maxCycle: # 在最大迭代次数的范围内
50-
i += 1 # 当前的迭代次数
51-
h = sig(feature * w) # 计算Sigmoid值
52-
err = label - h
53-
if i % 100 == 0:
54-
print "\t---------iter=" + str(i) + \
55-
" , train error rate= " + str(error_rate(h, label))
56-
w = w + alpha * feature.T * err # 权重修正
57-
return w
58-
59-
def error_rate(h, label):
60-
'''计算当前的损失函数值
61-
input: h(mat):预测值
62-
label(mat):实际值
63-
output: err/m(float):错误率
64-
'''
65-
m = np.shape(h)[0]
66-
67-
sum_err = 0.0
68-
for i in xrange(m):
69-
if h[i, 0] > 0 and (1 - h[i, 0]) > 0:
70-
sum_err -= (label[i,0] * np.log(h[i,0]) + \
71-
(1-label[i,0]) * np.log(1-h[i,0]))
72-
else:
73-
sum_err -= 0
74-
return sum_err / m
75-
76-
def save_model(file_name, w):
77-
'''保存最终的模型
78-
input: file_name(string):模型保存的文件名
79-
w(mat):LR模型的权重
80-
'''
81-
m = np.shape(w)[0]
82-
f_w = open(file_name, "w")
83-
w_array = []
84-
for i in xrange(m):
85-
w_array.append(str(w[i, 0]))
86-
f_w.write("\t".join(w_array))
87-
f_w.close()
88-
89-
if __name__ == "__main__":
90-
# 1、导入训练数据
91-
print "---------- 1.load data ------------"
92-
feature, label = load_data("data.txt")
93-
# 2、训练LR模型
94-
print "---------- 2.training ------------"
95-
w = lr_train_bgd(feature, label, 1000, 0.01)
96-
# 3、保存最终的模型
97-
print "---------- 3.save model ------------"
98-
save_model("weights", w)
99-
1+
# coding:UTF-8
2+
'''
3+
Date:20160901
4+
@author: zhaozhiyong
5+
'''
6+
import numpy as np
7+
8+
def load_data(file_name):
9+
'''导入训练数据
10+
input: file_name(string)训练数据的位置
11+
output: feature_data(mat)特征
12+
label_data(mat)标签
13+
'''
14+
f = open(file_name) # 打开文件
15+
feature_data = []
16+
label_data = []
17+
for line in f.readlines():
18+
feature_tmp = []
19+
lable_tmp = []
20+
lines = line.strip().split("\t")
21+
feature_tmp.append(1) # 偏置项
22+
for i in range(len(lines) - 1):
23+
feature_tmp.append(float(lines[i]))
24+
lable_tmp.append(float(lines[-1]))
25+
26+
feature_data.append(feature_tmp)
27+
label_data.append(lable_tmp)
28+
f.close() # 关闭文件
29+
return np.mat(feature_data), np.mat(label_data)
30+
31+
def sig(x):
32+
'''Sigmoid函数
33+
input: x(mat):feature * w
34+
output: sigmoid(x)(mat):Sigmoid值
35+
'''
36+
return 1.0 / (1 + np.exp(-x))
37+
38+
def lr_train_bgd(feature, label, maxCycle, alpha):
39+
'''利用梯度下降法训练LR模型
40+
input: feature(mat)特征
41+
label(mat)标签
42+
maxCycle(int)最大迭代次数
43+
alpha(float)学习率
44+
output: w(mat):权重
45+
'''
46+
n = np.shape(feature)[1] # 特征个数
47+
w = np.mat(np.ones((n, 1))) # 初始化权重
48+
i = 0
49+
while i <= maxCycle: # 在最大迭代次数的范围内
50+
i += 1 # 当前的迭代次数
51+
h = sig(feature * w) # 计算Sigmoid值
52+
err = label - h
53+
if i % 100 == 0:
54+
print ("\t---------iter=" + str(i) + \
55+
" , train error rate= " + str(error_rate(h, label)))
56+
w = w + alpha * feature.T * err # 权重修正
57+
return w
58+
59+
def error_rate(h, label):
60+
'''计算当前的损失函数值
61+
input: h(mat):预测值
62+
label(mat):实际值
63+
output: err/m(float):错误率
64+
'''
65+
m = np.shape(h)[0]
66+
67+
sum_err = 0.0
68+
for i in range(m):
69+
if h[i, 0] > 0 and (1 - h[i, 0]) > 0:
70+
sum_err -= (label[i,0] * np.log(h[i,0]) + \
71+
(1-label[i,0]) * np.log(1-h[i,0]))
72+
else:
73+
sum_err -= 0
74+
return sum_err / m
75+
76+
def save_model(file_name, w):
77+
'''保存最终的模型
78+
input: file_name(string):模型保存的文件名
79+
w(mat):LR模型的权重
80+
'''
81+
m = np.shape(w)[0]
82+
f_w = open(file_name, "w")
83+
w_array = []
84+
for i in range(m):
85+
w_array.append(str(w[i, 0]))
86+
f_w.write("\t".join(w_array))
87+
f_w.close()
88+
89+
if __name__ == "__main__":
90+
# 1、导入训练数据
91+
print ("---------- 1.load data ------------")
92+
feature, label = load_data("data.txt")
93+
# 2、训练LR模型
94+
print ("---------- 2.training ------------")
95+
w = lr_train_bgd(feature, label, 1000, 0.01)
96+
# 3、保存最终的模型
97+
print ("---------- 3.save model ------------")
98+
save_model("weights", w)
99+

Chapter_4 SVM/svm.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ def svm_predict(svm, test_sample_x):
235235
kernel_value = cal_kernel_value(svm.train_x, test_sample_x, svm.kernel_opt)
236236
# 2、计算预测值
237237
predict = kernel_value.T * np.multiply(svm.train_y, svm.alphas) + svm.b
238-
return predict
238+
return predict
239239

240240
def cal_accuracy(svm, test_x, test_y):
241241
'''计算预测的准确性

0 commit comments

Comments
 (0)