|
| 1 | +from pandas import read_csv |
| 2 | +from numpy import set_printoptions |
| 3 | +from sklearn.preprocessing import MinMaxScaler |
| 4 | +from sklearn.preprocessing import StandardScaler |
| 5 | +from sklearn.preprocessing import Normalizer |
| 6 | +from sklearn.preprocessing import Binarizer |
| 7 | +from sklearn.feature_selection import SelectKBest |
| 8 | +from sklearn.feature_selection import chi2 |
| 9 | +from sklearn.feature_selection import RFE |
| 10 | +from sklearn.linear_model import LogisticRegression |
| 11 | +from sklearn.decomposition import PCA |
| 12 | +from sklearn.ensemble import ExtraTreesClassifier |
| 13 | + |
| 14 | +filename='C:/Users/na347632/Desktop/FAI/learning/python_pro_bundle/machine_learning_mastery_with_python/code/chapter_05/pima-indians-diabetes.data.csv' |
| 15 | +names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class'] |
| 16 | +dataframe=read_csv(filename,names=names) |
| 17 | +array=dataframe.values |
| 18 | +X=array[:,0:8] |
| 19 | +Y=array[:,8] |
| 20 | + |
| 21 | +#univariate selection |
| 22 | +test=SelectKBest(score_func=chi2,k=4) |
| 23 | +fit=test.fit(X,Y) |
| 24 | +features=fit.transform(X) |
| 25 | +print(fit.scores_) |
| 26 | +print(features[0:5,:]) |
| 27 | + |
| 28 | +# Recursive Feature Elimination |
| 29 | +model=LogisticRegression() |
| 30 | +fit=RFE(model,4) |
| 31 | +featureselect=fit.fit(X,Y) |
| 32 | +print(featureselect.n_features_) |
| 33 | +print(featureselect.support_) |
| 34 | +print(featureselect.ranking_) |
| 35 | + |
| 36 | +#PCA |
| 37 | +pcamo=PCA(n_components=4) |
| 38 | +fit=pcamo.fit(X) |
| 39 | +print(fit.explained_variance_) |
| 40 | +print(fit.components_) |
| 41 | + |
| 42 | +#feature importance using ExtraTreesClassifier |
| 43 | +model=ExtraTreesClassifier() |
| 44 | +model.fit(X,Y) |
| 45 | +print(model.feature_importances_) |
| 46 | + |
| 47 | + |
| 48 | + |
0 commit comments