Skip to content

Commit 1a8b838

Browse files
add KNN
1 parent 23c8b06 commit 1a8b838

File tree

2 files changed

+172
-0
lines changed

2 files changed

+172
-0
lines changed

KNN/KNN_python.py

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
#coding:utf-8
2+
#Author:codewithzichao
3+
4+
5+
'''
6+
数据集:Mnist数据集(只使用了1000来训练,只使用了1000来测试。)
7+
结果(准确率):0.738
8+
时间:28.6643168926239
9+
'''
10+
import numpy as np
11+
import time
12+
13+
def loadData(fileName):
14+
'''
15+
加载数据
16+
:param fileName: 数据路径
17+
:return: 返回特征向量与标签类别
18+
'''
19+
data_list=[]
20+
label_list=[]
21+
22+
with open(fileName,"r") as f:
23+
for line in f.readlines():
24+
curline=line.strip().split(",")
25+
26+
data_list.append([int(feature) for feature in curline[1:]])
27+
label_list.append(int(curline[0]))
28+
29+
data_matrix=np.array(data_list)
30+
label_matrix=np.array(label_list)
31+
32+
return data_matrix,label_matrix
33+
34+
class KNN(object):
35+
def __init__(self,train_data,train_label,K):
36+
'''
37+
构造函数
38+
:param train_data: 训练集的特征向量
39+
:param train_label: 训练集的类别向量
40+
:param K: 指定的K值
41+
'''
42+
self.train_data=train_data
43+
self.train_label=train_label
44+
self.input_num=self.train_data.shape[0]
45+
self.feature=self.train_data.shape[1]
46+
self.K=K
47+
48+
def cal_distance(self,x1,x2):
49+
'''
50+
计算两个样本之间的距离,使用欧式距离
51+
:param x1: 第一个样本
52+
:param x2: 第二步样本
53+
:return: 样本之间的距离
54+
'''
55+
return np.sqrt(np.sum(np.square(x1-x2)))
56+
57+
def get_K(self,x):
58+
dist_group=np.zeros(self.input_num)
59+
for i in range(self.input_num):
60+
x1=self.train_data[i]
61+
dist=self.cal_distance(x,x1)
62+
dist_group[i]=dist
63+
64+
topK=np.argsort(dist_group)[:self.K]#升序排序
65+
66+
labeldist=np.zeros(10)#10个标签,在每一个标签对应的位置上加1
67+
68+
for i in range(len(topK)):
69+
labeldist[int(self.train_label[topK[i]])]+=1
70+
71+
return np.argmax(labeldist)
72+
73+
def test(self,test_data,test_label):
74+
'''
75+
在测试集上测试
76+
:param test_data: 测试集的特征向量
77+
:param test_label: 测试集的标签向量
78+
:return: 准确率
79+
'''
80+
error=0
81+
82+
test_num=test_data.shape[0]
83+
for i in range(test_num):
84+
print(f"the current sample is {i+1},the total samples is{test_num}.")
85+
x=test_data[i]
86+
y=self.get_K(x)
87+
88+
if(y!=test_label[i]):
89+
error+=1
90+
91+
accuracy=(test_num-error)/test_num
92+
return accuracy
93+
94+
if __name__=="__main__":
95+
start=time.time()
96+
97+
print("start load data.")
98+
train_data,train_label=loadData("../MnistData/mnist_train.csv")
99+
test_data,test_label=loadData("../MnistData/mnist_test.csv")
100+
print("finished load data.")
101+
102+
a=KNN(train_data[:1000],train_label[:1000],30)
103+
104+
print("finished training.")
105+
106+
accuracy=a.test(test_data[:1000],test_label[:1000])
107+
print(f"the accuracy is {accuracy}.")
108+
109+
end=time.time()
110+
111+
print(f"the total time is {end-start}.")
112+
113+

KNN/KNN_sklearn.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
#coding:utf-8
2+
#Author:codewithzichao
3+
4+
5+
'''
6+
数据集:Mnist数据集(只使用了1000来训练,只使用了1000来测试。)
7+
结果(准确率):0.799
8+
时间:16.832828998565674
9+
---------------------------
10+
果然,自己写的python没有编写kdtree等部分,效果与时间上都比不上sklearn。
11+
'''
12+
13+
import numpy as np
14+
import time
15+
from sklearn.neighbors import KNeighborsClassifier
16+
17+
def loadData(fileName):
18+
'''
19+
加载数据
20+
:param fileName: 数据路径
21+
:return: 返回特征向量与标签类别
22+
'''
23+
data_list=[]
24+
label_list=[]
25+
26+
with open(fileName,"r") as f:
27+
for line in f.readlines():
28+
curline=line.strip().split(",")
29+
30+
data_list.append([int(feature) for feature in curline[1:]])
31+
label_list.append(int(curline[0]))
32+
33+
data_matrix=np.array(data_list)
34+
label_matrix=np.array(label_list)
35+
36+
return data_matrix,label_matrix
37+
38+
39+
if __name__=="__main__":
40+
start = time.time()
41+
42+
print("start load data.")
43+
train_data, train_label = loadData("../MnistData/mnist_train.csv")
44+
test_data, test_label = loadData("../MnistData/mnist_test.csv")
45+
print("finished load data.")
46+
47+
knn=KNeighborsClassifier(n_neighbors=10)
48+
knn.fit(train_data[:1000],train_label[:1000])
49+
50+
prediction=knn.predict(test_data[:1000])
51+
for i in range(1000):
52+
print(f"predict is {prediction[i]},the true is {test_label[i]}.")
53+
accuracy=knn.score(test_data[:1000],test_label[:1000])
54+
print(f"the accuracy is {accuracy}.")
55+
56+
end=time.time()
57+
58+
print(f"the total time is {end-start}.")
59+

0 commit comments

Comments
 (0)