K Means Clustering
K Means Clustering
In [1]:
1 import pandas as pd
In [5]:
1 ml = pd.read_csv("mall_kmeans.csv")
In [6]:
1 ml.head()
Out[6]:
0 1 Male 19 15 39
1 2 Male 21 15 81
2 3 Female 20 16 6
3 4 Female 23 16 77
4 5 Female 31 17 40
In [8]:
1 ml.isnull().sum()
Out[8]:
CustomerID 0
Genre 0
Age 0
Annual Income (k$) 0
Spending Score (1-100) 0
dtype: int64
In [9]:
1 ml.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 5 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 CustomerID 200 non-null int64
1 Genre 200 non-null object
2 Age 200 non-null int64
3 Annual Income (k$) 200 non-null int64
4 Spending Score (1-100) 200 non-null int64
dtypes: int64(4), object(1)
memory usage: 7.9+ KB
localhost:8888/notebooks/Desktop/ML/Mall_kmeans/Mall_kmean.ipynb 1/10
9/1/23, 2:11 PM Mall_kmean - Jupyter Notebook
In [10]:
1 ml.Genre.value_counts()
Out[10]:
Female 112
Male 88
Name: Genre, dtype: int64
In [11]:
1 ml.Genre.replace({'Female':0,'Male':1},inplace=True)
In [14]:
1 ml.select_dtypes(include='object').columns
Out[14]:
Index([], dtype='object')
In [15]:
In [111]:
1 kmeans_ml = KMeans(n_clusters=5)
In [112]:
1 kmeans_ml.fit(ml)
Out[112]:
KMeans(n_clusters=5)
In [113]:
1 kmeans_ml.labels_
Out[113]:
array([2, 4, 2, 4, 2, 4, 2, 4, 2, 4, 2, 4, 2, 4, 2, 4, 2, 4, 2, 4, 2, 4,
2, 4, 2, 4, 2, 4, 2, 4, 2, 4, 2, 4, 2, 4, 2, 4, 2, 4, 2, 4, 2, 4,
2, 4, 2, 2, 2, 2, 2, 4, 2, 2, 2, 2, 2, 2, 0, 2, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 3, 0, 3, 1, 3, 1, 3,
1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3,
1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3,
1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3,
1, 3])
localhost:8888/notebooks/Desktop/ML/Mall_kmeans/Mall_kmean.ipynb 2/10
9/1/23, 2:11 PM Mall_kmean - Jupyter Notebook
In [114]:
1 set(kmeans_ml.labels_)
Out[114]:
{0, 1, 2, 3, 4}
In [115]:
1 kmeans_ml.cluster_centers_
Out[115]:
In [116]:
1 len(kmeans_ml.cluster_centers_)
Out[116]:
In [117]:
1 centroid_df = pd.DataFrame(kmeans_ml.cluster_centers_)
In [118]:
1 centroid_df.columns = ml.columns
In [119]:
1 centroid_df
Out[119]:
localhost:8888/notebooks/Desktop/ML/Mall_kmeans/Mall_kmean.ipynb 3/10
9/1/23, 2:11 PM Mall_kmean - Jupyter Notebook
In [120]:
1 kmeans_ml.score(ml)
Out[120]:
-157141.33959373957
In [94]:
1 lst = []
2 for k in range(1,10):
3 kmeans_ml = KMeans(n_clusters=k)
4 kmeans_ml.fit(ml)
5 score = kmeans_ml.score(ml)
6 lst.append(score)
7 print("cluster over are",k, "cluster left are",len(range(1,10))-k)
8 print("____________________")
C:\Users\MR.GODHADE\anaconda3\lib\site-packages\sklearn\cluster\_kmeans.p
y:1036: UserWarning: KMeans is known to have a memory leak on Windows with
MKL, when there are less chunks than available threads. You can avoid it b
y setting the environment variable OMP_NUM_THREADS=1.
warnings.warn(
In [121]:
1 import numpy as np
In [122]:
1 lst = np.round(np.abs(lst))
In [123]:
1 cluster_num = list(range(1,10))
localhost:8888/notebooks/Desktop/ML/Mall_kmeans/Mall_kmean.ipynb 4/10
9/1/23, 2:11 PM Mall_kmean - Jupyter Notebook
In [124]:
In [125]:
In [126]:
1 lst
Out[126]:
In [127]:
Out[127]:
17.848424521880645
localhost:8888/notebooks/Desktop/ML/Mall_kmeans/Mall_kmean.ipynb 5/10
9/1/23, 2:11 PM Mall_kmean - Jupyter Notebook
In [128]:
1 (387066 - 271397)*100/387066
Out[128]:
29.88353407429224
In [129]:
1 (271397 - 195401)*100/271397
Out[129]:
28.001783365328283
In [130]:
1 (195401 - 157506)*100/195401
Out[130]:
19.393452438830916
In [131]:
1 colormap = np.array(['Red','Green','Blue','Yellow','Black'])
In [140]:
1 kmeans_ml.labels_
Out[140]:
array([2, 4, 2, 4, 2, 4, 2, 4, 2, 4, 2, 4, 2, 4, 2, 4, 2, 4, 2, 4, 2, 4,
2, 4, 2, 4, 2, 4, 2, 4, 2, 4, 2, 4, 2, 4, 2, 4, 2, 4, 2, 4, 2, 4,
2, 4, 2, 2, 2, 2, 2, 4, 2, 2, 2, 2, 2, 2, 0, 2, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 3, 0, 3, 1, 3, 1, 3,
1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3,
1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3,
1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3,
1, 3])
localhost:8888/notebooks/Desktop/ML/Mall_kmeans/Mall_kmean.ipynb 6/10
9/1/23, 2:11 PM Mall_kmean - Jupyter Notebook
In [139]:
1 colormap[kmeans_ml.labels_]
Out[139]:
In [133]:
Out[133]:
<matplotlib.collections.PathCollection at 0x1c7ec6cefa0>
localhost:8888/notebooks/Desktop/ML/Mall_kmeans/Mall_kmean.ipynb 7/10
9/1/23, 2:11 PM Mall_kmean - Jupyter Notebook
In [134]:
1 ml
Out[134]:
0 1 1 19 15 39
1 2 1 21 15 81
2 3 0 20 16 6
3 4 0 23 16 77
4 5 0 31 17 40
localhost:8888/notebooks/Desktop/ML/Mall_kmeans/Mall_kmean.ipynb 8/10
9/1/23, 2:11 PM Mall_kmean - Jupyter Notebook
In [136]:
Out[136]:
localhost:8888/notebooks/Desktop/ML/Mall_kmeans/Mall_kmean.ipynb 9/10
9/1/23, 2:11 PM Mall_kmean - Jupyter Notebook
In [137]:
Out[137]:
In [ ]:
localhost:8888/notebooks/Desktop/ML/Mall_kmeans/Mall_kmean.ipynb 10/10