Skip to content

Commit 3c84210

Browse files
authored
selecting feature is very imp to reduce over-fitting and to increase accuracy
Which features needs to be selected as input to ML and which needs to be removed?. this reusable code snippets helps you to decide which features are important and which are not important
1 parent 962dbf9 commit 3c84210

File tree

1 file changed

+48
-0
lines changed

1 file changed

+48
-0
lines changed

featureSelection.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
from pandas import read_csv
2+
from numpy import set_printoptions
3+
from sklearn.preprocessing import MinMaxScaler
4+
from sklearn.preprocessing import StandardScaler
5+
from sklearn.preprocessing import Normalizer
6+
from sklearn.preprocessing import Binarizer
7+
from sklearn.feature_selection import SelectKBest
8+
from sklearn.feature_selection import chi2
9+
from sklearn.feature_selection import RFE
10+
from sklearn.linear_model import LogisticRegression
11+
from sklearn.decomposition import PCA
12+
from sklearn.ensemble import ExtraTreesClassifier
13+
14+
filename='C:/Users/na347632/Desktop/FAI/learning/python_pro_bundle/machine_learning_mastery_with_python/code/chapter_05/pima-indians-diabetes.data.csv'
15+
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
16+
dataframe=read_csv(filename,names=names)
17+
array=dataframe.values
18+
X=array[:,0:8]
19+
Y=array[:,8]
20+
21+
#univariate selection
22+
test=SelectKBest(score_func=chi2,k=4)
23+
fit=test.fit(X,Y)
24+
features=fit.transform(X)
25+
print(fit.scores_)
26+
print(features[0:5,:])
27+
28+
# Recursive Feature Elimination
29+
model=LogisticRegression()
30+
fit=RFE(model,4)
31+
featureselect=fit.fit(X,Y)
32+
print(featureselect.n_features_)
33+
print(featureselect.support_)
34+
print(featureselect.ranking_)
35+
36+
#PCA
37+
pcamo=PCA(n_components=4)
38+
fit=pcamo.fit(X)
39+
print(fit.explained_variance_)
40+
print(fit.components_)
41+
42+
#feature importance using ExtraTreesClassifier
43+
model=ExtraTreesClassifier()
44+
model.fit(X,Y)
45+
print(model.feature_importances_)
46+
47+
48+

0 commit comments

Comments
 (0)