Skip to content

Commit c77e380

Browse files
author
louwill
authored
Add files via upload
1 parent 54961a6 commit c77e380

File tree

2 files changed

+297
-0
lines changed

2 files changed

+297
-0
lines changed

naive bayes/naive_bayes.ipynb

Lines changed: 239 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,239 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"import numpy as np\n",
10+
"import pandas as pd"
11+
]
12+
},
13+
{
14+
"cell_type": "code",
15+
"execution_count": 2,
16+
"metadata": {},
17+
"outputs": [
18+
{
19+
"data": {
20+
"text/html": [
21+
"<div>\n",
22+
"<style scoped>\n",
23+
" .dataframe tbody tr th:only-of-type {\n",
24+
" vertical-align: middle;\n",
25+
" }\n",
26+
"\n",
27+
" .dataframe tbody tr th {\n",
28+
" vertical-align: top;\n",
29+
" }\n",
30+
"\n",
31+
" .dataframe thead th {\n",
32+
" text-align: right;\n",
33+
" }\n",
34+
"</style>\n",
35+
"<table border=\"1\" class=\"dataframe\">\n",
36+
" <thead>\n",
37+
" <tr style=\"text-align: right;\">\n",
38+
" <th></th>\n",
39+
" <th>x1</th>\n",
40+
" <th>x2</th>\n",
41+
" <th>y</th>\n",
42+
" </tr>\n",
43+
" </thead>\n",
44+
" <tbody>\n",
45+
" <tr>\n",
46+
" <th>0</th>\n",
47+
" <td>1</td>\n",
48+
" <td>S</td>\n",
49+
" <td>-1</td>\n",
50+
" </tr>\n",
51+
" <tr>\n",
52+
" <th>1</th>\n",
53+
" <td>1</td>\n",
54+
" <td>M</td>\n",
55+
" <td>-1</td>\n",
56+
" </tr>\n",
57+
" <tr>\n",
58+
" <th>2</th>\n",
59+
" <td>1</td>\n",
60+
" <td>M</td>\n",
61+
" <td>1</td>\n",
62+
" </tr>\n",
63+
" <tr>\n",
64+
" <th>3</th>\n",
65+
" <td>1</td>\n",
66+
" <td>S</td>\n",
67+
" <td>1</td>\n",
68+
" </tr>\n",
69+
" <tr>\n",
70+
" <th>4</th>\n",
71+
" <td>1</td>\n",
72+
" <td>S</td>\n",
73+
" <td>-1</td>\n",
74+
" </tr>\n",
75+
" </tbody>\n",
76+
"</table>\n",
77+
"</div>"
78+
],
79+
"text/plain": [
80+
" x1 x2 y\n",
81+
"0 1 S -1\n",
82+
"1 1 M -1\n",
83+
"2 1 M 1\n",
84+
"3 1 S 1\n",
85+
"4 1 S -1"
86+
]
87+
},
88+
"execution_count": 2,
89+
"metadata": {},
90+
"output_type": "execute_result"
91+
}
92+
],
93+
"source": [
94+
"x1 = [1,1,1,1,1,2,2,2,2,2,3,3,3,3,3]\n",
95+
"x2 = ['S','M','M','S','S','S','M','M','L','L','L','M','M','L','L']\n",
96+
"y = [-1,-1,1,1,-1,-1,-1,1,1,1,1,1,1,1,-1]\n",
97+
"\n",
98+
"df = pd.DataFrame({'x1':x1, 'x2':x2, 'y':y})\n",
99+
"df.head()"
100+
]
101+
},
102+
{
103+
"cell_type": "code",
104+
"execution_count": 3,
105+
"metadata": {},
106+
"outputs": [],
107+
"source": [
108+
"X = df[['x1', 'x2']]\n",
109+
"y = df[['y']]"
110+
]
111+
},
112+
{
113+
"cell_type": "code",
114+
"execution_count": 4,
115+
"metadata": {},
116+
"outputs": [],
117+
"source": [
118+
"def nb_fit(X, y):\n",
119+
" classes = y[y.columns[0]].unique()\n",
120+
" class_count = y[y.columns[0]].value_counts()\n",
121+
" class_prior = class_count/len(y)\n",
122+
" \n",
123+
" prior = dict()\n",
124+
" for col in X.columns:\n",
125+
" for j in classes:\n",
126+
" p_x_y = X[(y==j).values][col].value_counts()\n",
127+
" for i in p_x_y.index:\n",
128+
" prior[(col, i, j)] = p_x_y[i]/class_count[j]\n",
129+
" return classes, class_prior, prior"
130+
]
131+
},
132+
{
133+
"cell_type": "code",
134+
"execution_count": 5,
135+
"metadata": {},
136+
"outputs": [
137+
{
138+
"data": {
139+
"text/plain": [
140+
"(array([-1, 1], dtype=int64), 1 0.6\n",
141+
" -1 0.4\n",
142+
" Name: y, dtype: float64, {('x1', 1, -1): 0.5,\n",
143+
" ('x1', 2, -1): 0.3333333333333333,\n",
144+
" ('x1', 3, -1): 0.16666666666666666,\n",
145+
" ('x1', 3, 1): 0.4444444444444444,\n",
146+
" ('x1', 2, 1): 0.3333333333333333,\n",
147+
" ('x1', 1, 1): 0.2222222222222222,\n",
148+
" ('x2', 'S', -1): 0.5,\n",
149+
" ('x2', 'M', -1): 0.3333333333333333,\n",
150+
" ('x2', 'L', -1): 0.16666666666666666,\n",
151+
" ('x2', 'L', 1): 0.4444444444444444,\n",
152+
" ('x2', 'M', 1): 0.4444444444444444,\n",
153+
" ('x2', 'S', 1): 0.1111111111111111})"
154+
]
155+
},
156+
"execution_count": 5,
157+
"metadata": {},
158+
"output_type": "execute_result"
159+
}
160+
],
161+
"source": [
162+
"nb_fit(X, y)"
163+
]
164+
},
165+
{
166+
"cell_type": "code",
167+
"execution_count": 6,
168+
"metadata": {},
169+
"outputs": [],
170+
"source": [
171+
"X_test = {'x1': 2, 'x2': 'S'}"
172+
]
173+
},
174+
{
175+
"cell_type": "code",
176+
"execution_count": 7,
177+
"metadata": {},
178+
"outputs": [],
179+
"source": [
180+
"classes, class_prior, prior = nb_fit(X, y)\n",
181+
"\n",
182+
"def predict(X_test):\n",
183+
" res = []\n",
184+
" for c in classes:\n",
185+
" p_y = class_prior[c]\n",
186+
" p_x_y = 1\n",
187+
" for i in X_test.items():\n",
188+
" p_x_y *= prior[tuple(list(i)+[c])]\n",
189+
" res.append(p_y*p_x_y)\n",
190+
" return classes[np.argmax(res)]"
191+
]
192+
},
193+
{
194+
"cell_type": "code",
195+
"execution_count": 10,
196+
"metadata": {},
197+
"outputs": [
198+
{
199+
"name": "stdout",
200+
"output_type": "stream",
201+
"text": [
202+
"测试数据预测类别为: -1\n"
203+
]
204+
}
205+
],
206+
"source": [
207+
"print('测试数据预测类别为:', predict(X_test))"
208+
]
209+
},
210+
{
211+
"cell_type": "code",
212+
"execution_count": null,
213+
"metadata": {},
214+
"outputs": [],
215+
"source": []
216+
}
217+
],
218+
"metadata": {
219+
"kernelspec": {
220+
"display_name": "Python 3",
221+
"language": "python",
222+
"name": "python3"
223+
},
224+
"language_info": {
225+
"codemirror_mode": {
226+
"name": "ipython",
227+
"version": 3
228+
},
229+
"file_extension": ".py",
230+
"mimetype": "text/x-python",
231+
"name": "python",
232+
"nbconvert_exporter": "python",
233+
"pygments_lexer": "ipython3",
234+
"version": "3.7.3"
235+
}
236+
},
237+
"nbformat": 4,
238+
"nbformat_minor": 2
239+
}

naive bayes/naive_bayes.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
#!/usr/bin/env python3
2+
# -*- encoding: utf-8 -*-
3+
4+
'''
5+
@author: louwill
6+
7+
@file: naive_bayes.py
8+
@time: 2019/5/12 9:41
9+
'''
10+
11+
import numpy as np
12+
import pandas as pd
13+
14+
class Naive_Bayes:
15+
def __init__(self):
16+
pass
17+
18+
# 朴素贝叶斯训练过程
19+
def nb_fit(self, X, y):
20+
classes = y[y.columns[0]].unique()
21+
class_count = y[y.columns[0]].value_counts()
22+
# 类先验概率
23+
class_prior = class_count / len(y)
24+
# 计算类条件概率
25+
prior = dict()
26+
for col in X.columns:
27+
for j in classes:
28+
p_x_y = X[(y == j).values][col].value_counts()
29+
for i in p_x_y.index:
30+
prior[(col, i, j)] = p_x_y[i] / class_count[j]
31+
32+
return classes, class_prior, prior
33+
34+
# 预测新的实例
35+
def predict(self, X_test):
36+
res = []
37+
for c in classes:
38+
p_y = class_prior[c]
39+
p_x_y = 1
40+
for i in X_test.items():
41+
p_x_y *= prior[tuple(list(i) + [c])]
42+
res.append(p_y * p_x_y)
43+
return classes[np.argmax(res)]
44+
45+
46+
if __name__ == "__main__":
47+
x1 = [1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3]
48+
x2 = ['S', 'M', 'M', 'S', 'S', 'S', 'M', 'M', 'L', 'L', 'L', 'M', 'M', 'L', 'L']
49+
y = [-1, -1, 1, 1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, -1]
50+
df = pd.DataFrame({'x1': x1, 'x2': x2, 'y': y})
51+
X = df[['x1', 'x2']]
52+
y = df[['y']]
53+
X_test = {'x1': 2, 'x2': 'S'}
54+
55+
nb = Naive_Bayes()
56+
classes, class_prior, prior = nb.nb_fit(X, y)
57+
print('测试数据预测类别为:', nb.predict(X_test))
58+

0 commit comments

Comments
 (0)