|
| 1 | +# coding:UTF-8 |
| 2 | +''' |
| 3 | +Date:20160831 |
| 4 | +@author: zhaozhiyong |
| 5 | +''' |
| 6 | +import numpy as np |
| 7 | +from math import sqrt |
| 8 | + |
| 9 | +def load_data(file_name): |
| 10 | + '''导入数据 |
| 11 | + input: file_name(string):文件的存储位置 |
| 12 | + output: feature_data(mat):特征 |
| 13 | + label_data(mat):标签 |
| 14 | + n_class(int):类别的个数 |
| 15 | + ''' |
| 16 | + # 1、获取特征 |
| 17 | + f = open(file_name) # 打开文件 |
| 18 | + feature_data = [] |
| 19 | + label_tmp = [] |
| 20 | + for line in f.readlines(): |
| 21 | + feature_tmp = [] |
| 22 | + lines = line.strip().split("\t") |
| 23 | + for i in xrange(len(lines) - 1): |
| 24 | + feature_tmp.append(float(lines[i])) |
| 25 | + label_tmp.append(int(lines[-1])) |
| 26 | + feature_data.append(feature_tmp) |
| 27 | + f.close() # 关闭文件 |
| 28 | + |
| 29 | + # 2、获取标签 |
| 30 | + m = len(label_tmp) |
| 31 | + n_class = len(set(label_tmp)) # 得到类别的个数 |
| 32 | + |
| 33 | + label_data = np.mat(np.zeros((m, n_class))) |
| 34 | + for i in xrange(m): |
| 35 | + label_data[i, label_tmp[i]] = 1 |
| 36 | + |
| 37 | + return np.mat(feature_data), label_data, n_class |
| 38 | + |
| 39 | +def sig(x): |
| 40 | + '''Sigmoid函数 |
| 41 | + input: x(mat/float):自变量,可以是矩阵或者是任意实数 |
| 42 | + output: Sigmoid值(mat/float):Sigmoid函数的值 |
| 43 | + ''' |
| 44 | + return 1.0 / (1 + np.exp(-x)) |
| 45 | + |
| 46 | +def partial_sig(x): |
| 47 | + '''Sigmoid导函数的值 |
| 48 | + input: x(mat/float):自变量,可以是矩阵或者是任意实数 |
| 49 | + output: out(mat/float):Sigmoid导函数的值 |
| 50 | + ''' |
| 51 | + m, n = np.shape(x) |
| 52 | + out = np.mat(np.zeros((m, n))) |
| 53 | + for i in xrange(m): |
| 54 | + for j in xrange(n): |
| 55 | + out[i, j] = sig(x[i, j]) * (1 - sig(x[i, j])) |
| 56 | + return out |
| 57 | + |
| 58 | +def hidden_in(feature, w0, b0): |
| 59 | + '''计算隐含层的输入 |
| 60 | + input: feature(mat):特征 |
| 61 | + w0(mat):输入层到隐含层之间的权重 |
| 62 | + b0(mat):输入层到隐含层之间的偏置 |
| 63 | + output: hidden_in(mat):隐含层的输入 |
| 64 | + ''' |
| 65 | + m = np.shape(feature)[0] |
| 66 | + hidden_in = feature * w0 |
| 67 | + for i in xrange(m): |
| 68 | + hidden_in[i, ] += b0 |
| 69 | + return hidden_in |
| 70 | + |
| 71 | +def hidden_out(hidden_in): |
| 72 | + '''隐含层的输出 |
| 73 | + input: hidden_in(mat):隐含层的输入 |
| 74 | + output: hidden_output(mat):隐含层的输出 |
| 75 | + ''' |
| 76 | + hidden_output = sig(hidden_in) |
| 77 | + return hidden_output; |
| 78 | + |
| 79 | +def predict_in(hidden_out, w1, b1): |
| 80 | + '''计算输出层的输入 |
| 81 | + input: hidden_out(mat):隐含层的输出 |
| 82 | + w1(mat):隐含层到输出层之间的权重 |
| 83 | + b1(mat):隐含层到输出层之间的偏置 |
| 84 | + output: predict_in(mat):输出层的输入 |
| 85 | + ''' |
| 86 | + m = np.shape(hidden_out)[0] |
| 87 | + predict_in = hidden_out * w1 |
| 88 | + for i in xrange(m): |
| 89 | + predict_in[i, ] += b1 |
| 90 | + return predict_in |
| 91 | + |
| 92 | +def predict_out(predict_in): |
| 93 | + '''输出层的输出 |
| 94 | + input: predict_in(mat):输出层的输入 |
| 95 | + output: result(mat):输出层的输出 |
| 96 | + ''' |
| 97 | + result = sig(predict_in) |
| 98 | + return result |
| 99 | + |
| 100 | +def bp_train(feature, label, n_hidden, maxCycle, alpha, n_output): |
| 101 | + '''计算隐含层的输入 |
| 102 | + input: feature(mat):特征 |
| 103 | + label(mat):标签 |
| 104 | + n_hidden(int):隐含层的节点个数 |
| 105 | + maxCycle(int):最大的迭代次数 |
| 106 | + alpha(float):学习率 |
| 107 | + n_output(int):输出层的节点个数 |
| 108 | + output: w0(mat):输入层到隐含层之间的权重 |
| 109 | + b0(mat):输入层到隐含层之间的偏置 |
| 110 | + w1(mat):隐含层到输出层之间的权重 |
| 111 | + b1(mat):隐含层到输出层之间的偏置 |
| 112 | + ''' |
| 113 | + m, n = np.shape(feature) |
| 114 | + # 1、初始化 |
| 115 | + w0 = np.mat(np.random.rand(n, n_hidden)) |
| 116 | + w0 = w0 * (8.0 * sqrt(6) / sqrt(n + n_hidden)) - \ |
| 117 | + np.mat(np.ones((n, n_hidden))) * \ |
| 118 | + (4.0 * sqrt(6) / sqrt(n + n_hidden)) |
| 119 | + b0 = np.mat(np.random.rand(1, n_hidden)) |
| 120 | + b0 = b0 * (8.0 * sqrt(6) / sqrt(n + n_hidden)) - \ |
| 121 | + np.mat(np.ones((1, n_hidden))) * \ |
| 122 | + (4.0 * sqrt(6) / sqrt(n + n_hidden)) |
| 123 | + w1 = np.mat(np.random.rand(n_hidden, n_output)) |
| 124 | + w1 = w1 * (8.0 * sqrt(6) / sqrt(n_hidden + n_output)) - \ |
| 125 | + np.mat(np.ones((n_hidden, n_output))) * \ |
| 126 | + (4.0 * sqrt(6) / sqrt(n_hidden + n_output)) |
| 127 | + b1 = np.mat(np.random.rand(1, n_output)) |
| 128 | + b1 = b1 * (8.0 * sqrt(6) / sqrt(n_hidden + n_output)) - \ |
| 129 | + np.mat(np.ones((1, n_output))) * \ |
| 130 | + (4.0 * sqrt(6) / sqrt(n_hidden + n_output)) |
| 131 | + |
| 132 | + # 2、训练 |
| 133 | + i = 0 |
| 134 | + while i <= maxCycle: |
| 135 | + # 2.1、信号正向传播 |
| 136 | + # 2.1.1、计算隐含层的输入 |
| 137 | + hidden_input = hidden_in(feature, w0, b0) # mXn_hidden |
| 138 | + # 2.1.2、计算隐含层的输出 |
| 139 | + hidden_output = hidden_out(hidden_input) |
| 140 | + # 2.1.3、计算输出层的输入 |
| 141 | + output_in = predict_in(hidden_output, w1, b1) # mXn_output |
| 142 | + # 2.1.4、计算输出层的输出 |
| 143 | + output_out = predict_out(output_in) |
| 144 | + |
| 145 | + # 2.2、误差的反向传播 |
| 146 | + # 2.2.1、隐含层到输出层之间的残差 |
| 147 | + delta_output = -np.multiply((label - output_out), partial_sig(output_in)) |
| 148 | + # 2.2.2、输入层到隐含层之间的残差 |
| 149 | + delta_hidden = np.multiply((delta_output * w1.T), partial_sig(hidden_input)) |
| 150 | + |
| 151 | + # 2.3、 修正权重和偏置 |
| 152 | + w1 = w1 - alpha * (hidden_output.T * delta_output) |
| 153 | + b1 = b1 - alpha * np.sum(delta_output, axis=0) * (1.0 / m) |
| 154 | + w0 = w0 - alpha * (feature.T * delta_hidden) |
| 155 | + b0 = b0 - alpha * np.sum(delta_hidden, axis=0) * (1.0 / m) |
| 156 | + if i % 100 == 0: |
| 157 | + print "\t-------- iter: ", i, \ |
| 158 | + " ,cost: ", (1.0/2) * get_cost(get_predict(feature, w0, w1, b0, b1) - label) |
| 159 | + i += 1 |
| 160 | + return w0, w1, b0, b1 |
| 161 | + |
| 162 | +def get_cost(cost): |
| 163 | + '''计算当前损失函数的值 |
| 164 | + input: cost(mat):预测值与标签之间的差 |
| 165 | + output: cost_sum / m (double):损失函数的值 |
| 166 | + ''' |
| 167 | + m,n = np.shape(cost) |
| 168 | + |
| 169 | + cost_sum = 0.0 |
| 170 | + for i in xrange(m): |
| 171 | + for j in xrange(n): |
| 172 | + cost_sum += cost[i,j] * cost[i,j] |
| 173 | + return cost_sum / m |
| 174 | + |
| 175 | +def get_predict(feature, w0, w1, b0, b1): |
| 176 | + '''计算最终的预测 |
| 177 | + input: feature(mat):特征 |
| 178 | + w0(mat):输入层到隐含层之间的权重 |
| 179 | + b0(mat):输入层到隐含层之间的偏置 |
| 180 | + w1(mat):隐含层到输出层之间的权重 |
| 181 | + b1(mat):隐含层到输出层之间的偏置 |
| 182 | + output: 预测值 |
| 183 | + ''' |
| 184 | + return predict_out(predict_in(hidden_out(hidden_in(feature, w0, b0)), w1, b1)) |
| 185 | + |
| 186 | +def save_model(w0, w1, b0, b1): |
| 187 | + '''保存最终的模型 |
| 188 | + input: w0(mat):输入层到隐含层之间的权重 |
| 189 | + b0(mat):输入层到隐含层之间的偏置 |
| 190 | + w1(mat):隐含层到输出层之间的权重 |
| 191 | + b1(mat):隐含层到输出层之间的偏置 |
| 192 | + output: |
| 193 | + ''' |
| 194 | + def write_file(file_name, source): |
| 195 | + f = open(file_name, "w") |
| 196 | + m, n = np.shape(source) |
| 197 | + for i in xrange(m): |
| 198 | + tmp = [] |
| 199 | + for j in xrange(n): |
| 200 | + tmp.append(str(source[i, j])) |
| 201 | + f.write("\t".join(tmp) + "\n") |
| 202 | + f.close() |
| 203 | + |
| 204 | + write_file("weight_w0", w0) |
| 205 | + write_file("weight_w1", w1) |
| 206 | + write_file("weight_b0", b0) |
| 207 | + write_file("weight_b1", b1) |
| 208 | + |
| 209 | +def err_rate(label, pre): |
| 210 | + '''计算训练样本上的错误率 |
| 211 | + input: label(mat):训练样本的标签 |
| 212 | + pre(mat):训练样本的预测值 |
| 213 | + output: rate[0,0](float):错误率 |
| 214 | + ''' |
| 215 | + m = np.shape(label)[0] |
| 216 | + err = 0.0 |
| 217 | + for i in xrange(m): |
| 218 | + if label[i, 0] != pre[i, 0]: |
| 219 | + err += 1 |
| 220 | + rate = err / m |
| 221 | + return rate |
| 222 | + |
| 223 | +if __name__ == "__main__": |
| 224 | + # 1、导入数据 |
| 225 | + print "--------- 1.load data ------------" |
| 226 | + feature, label, n_class = load_data("data.txt") |
| 227 | + # 2、训练网络模型 |
| 228 | + print "--------- 2.training ------------" |
| 229 | + w0, w1, b0, b1 = bp_train(feature, label, 20, 1000, 0.1, n_class) |
| 230 | + # 3、保存最终的模型 |
| 231 | + print "--------- 3.save model ------------" |
| 232 | + save_model(w0, w1, b0, b1) |
| 233 | + # 4、得到最终的预测结果 |
| 234 | + print "--------- 4.get prediction ------------" |
| 235 | + result = get_predict(feature, w0, w1, b0, b1) |
| 236 | + print "训练准确性为:", (1 - err_rate(np.argmax(label, axis=1), np.argmax(result, axis=1))) |
| 237 | + |
0 commit comments