1
1
import time
2
+ from typing import Union , Optional
2
3
3
4
import numpy as np
4
5
import os
13
14
from decision_trees .utils .constants import get_classifier
14
15
15
16
16
- def test_dataset (number_of_bits_per_feature : int ,
17
- train_data : np .ndarray , train_target : np .ndarray ,
18
- test_data : np .ndarray , test_target : np .ndarray ,
19
- clf_type : ClassifierType ,
20
- path : str , name : str
21
- ):
22
- path = path + '/' + name + '_' + str (number_of_bits_per_feature ) + '_' + clf_type .name + '/'
17
+ def test_dataset (
18
+ number_of_bits_per_feature : int ,
19
+ train_data : np .ndarray , train_target : np .ndarray ,
20
+ test_data : np .ndarray , test_target : np .ndarray ,
21
+ clf_type : ClassifierType ,
22
+ max_depth : Optional [int ], number_of_classifiers : Optional [int ],
23
+ path : str , name : str
24
+ ):
25
+ path = os .path .join (
26
+ path ,
27
+ name + '_' + str (number_of_bits_per_feature ) + '_' + clf_type .name + '_' + str (max_depth ) + '_' + str (number_of_classifiers )
28
+ )
29
+ result_file = os .path .join (path , 'score.txt' )
23
30
24
31
if not os .path .exists (path ):
25
32
os .makedirs (path )
26
33
27
34
# first create classifier from scikit
28
- clf = get_classifier (clf_type )
35
+ clf = get_classifier (clf_type , max_depth , number_of_classifiers )
29
36
30
37
# first - train the classifiers on non-quantized data
31
38
clf .fit (train_data , train_target )
32
39
test_predicted = clf .predict (test_data )
33
40
print ("scikit clf with test data:" )
34
- report_performance (clf , clf_type , test_target , test_predicted )
41
+ report_performance (clf , clf_type , test_target , test_predicted , result_file )
35
42
36
43
# perform quantization of train and test data
37
44
# while at some point I was considering not quantizing the test data,
@@ -44,7 +51,7 @@ def test_dataset(number_of_bits_per_feature: int,
44
51
clf .fit (train_data_quantized , train_target )
45
52
test_predicted_quantized = clf .predict (test_data_quantized )
46
53
print ("scikit clf with train and test data quantized:" )
47
- report_performance (clf , clf_type , test_target , test_predicted_quantized )
54
+ report_performance (clf , clf_type , test_target , test_predicted_quantized , result_file )
48
55
49
56
# generate own classifier based on the one from scikit
50
57
number_of_features = len (train_data [0 ])
@@ -69,48 +76,80 @@ def test_dataset(number_of_bits_per_feature: int,
69
76
# _test_classification_performance(clf, test_data, 10, 10)
70
77
71
78
72
- def report_performance (clf , clf_type : ClassifierType , expected : np .ndarray , predicted : np .ndarray ):
79
+ def report_performance (
80
+ clf , clf_type : ClassifierType ,
81
+ expected : np .ndarray , predicted : np .ndarray ,
82
+ result_file : Optional [str ]= None
83
+ ):
73
84
if clf_type == ClassifierType .RANDOM_FOREST_REGRESSOR :
74
- _report_regressor (expected , predicted )
85
+ _report_regressor (expected , predicted , result_file )
75
86
else :
76
- _report_classifier (clf , expected , predicted )
87
+ _report_classifier (clf , expected , predicted , result_file )
77
88
78
89
79
- def _report_classifier (clf , expected : np .ndarray , predicted : np .ndarray ):
80
- print ("Detailed classification report:" )
90
+ def _report_classifier (
91
+ clf ,
92
+ expected : np .ndarray , predicted : np .ndarray ,
93
+ result_file : Optional [str ]= None
94
+ ):
95
+ t = ''
96
+ t += 'Detailed classification report:\n '
81
97
82
- print ( " Classification report for classifier %s: \n %s \n "
83
- % ( clf , metrics .classification_report (expected , predicted )))
98
+ t += ' Classification report for classifier ' + str ( clf ) + ' \n '
99
+ t += str ( metrics .classification_report (expected , predicted )) + ' \n '
84
100
cm = metrics .confusion_matrix (expected , predicted )
85
101
cm = cm / cm .sum (axis = 1 )[:, None ] * 100
86
102
87
- #np.set_printoptions(formatter={'float': '{: 2.2f}'.format})
88
- print ( f" Confusion matrix:\n { cm } " )
103
+ # np.set_printoptions(formatter={'float': '{: 2.2f}'.format})
104
+ t += f' Confusion matrix:\n { cm } \n '
89
105
90
106
f1_score = metrics .f1_score (expected , predicted , average = 'weighted' )
91
107
precision = metrics .precision_score (expected , predicted , average = 'weighted' )
92
108
recall = metrics .recall_score (expected , predicted , average = 'weighted' )
93
109
accuracy = metrics .accuracy_score (expected , predicted )
94
- print (f"f1_score: { f1_score :{2 }.{4 }} " )
95
- print (f"precision: { precision :{2 }.{4 }} " )
96
- print (f"recall: { recall :{2 }.{4 }} " )
97
- print (f"accuracy: { accuracy :{2 }.{4 }} " )
110
+ t += f'f1_score: { f1_score :{2 }.{4 }} \n '
111
+ t += f'precision: { precision :{2 }.{4 }} \n '
112
+ t += f'recall: { recall :{2 }.{4 }} \n '
113
+ t += f'accuracy: { accuracy :{2 }.{4 }} \n '
114
+
115
+ if result_file is not None :
116
+ with open (result_file , 'a+' ) as f :
117
+ f .write (t )
118
+ else :
119
+ print (t )
98
120
99
121
100
- def _report_regressor (expected : np .ndarray , predicted : np .ndarray ):
101
- print ("Detailed regression report:" )
122
+ def _report_regressor (
123
+ expected : np .ndarray ,
124
+ predicted : np .ndarray ,
125
+ result_file : Optional [str ]= None
126
+ ):
127
+ t = ''
128
+ t += 'Detailed regression report:\n '
102
129
103
130
mae = metrics .mean_absolute_error (expected , predicted )
104
131
mse = metrics .mean_squared_error (expected , predicted )
105
132
r2s = metrics .r2_score (expected , predicted )
106
133
evs = metrics .explained_variance_score (expected , predicted )
107
- print (f"mean_absolute_error: { mae :{2 }.{4 }} " )
108
- print (f"mean_squared_error: { mse :{2 }.{4 }} " )
109
- print (f"coefficient_of_determination: { r2s :{2 }.{4 }} " )
110
- print (f"explained_variance_score: { evs :{2 }.{4 }} " )
134
+ t += f'mean_absolute_error: { mae :{2 }.{4 }} \n '
135
+ t += f'mean_squared_error: { mse :{2 }.{4 }} \n '
136
+ t += f'coefficient_of_determination: { r2s :{2 }.{4 }} \n '
137
+ t += f'explained_variance_score: { evs :{2 }.{4 }} \n '
138
+
139
+ if result_file is not None :
140
+ with open (result_file , 'a+' ) as f :
141
+ f .write (t )
142
+ else :
143
+ print (t )
111
144
112
145
113
- def generate_my_classifier (clf , number_of_features : int , number_of_bits_per_feature : int , path : str ):
146
+ def generate_my_classifier (
147
+ clf ,
148
+ number_of_features : int ,
149
+ number_of_bits_per_feature : int ,
150
+ path : str ,
151
+ result_file : Union [str , None ]= None
152
+ ):
114
153
if isinstance (clf , DecisionTreeClassifier ):
115
154
print ("Creating decision tree classifier!" )
116
155
my_clf = Tree ("DecisionTreeClassifier" , number_of_features , number_of_bits_per_feature )
@@ -123,7 +162,7 @@ def generate_my_classifier(clf, number_of_features: int, number_of_bits_per_feat
123
162
124
163
my_clf .build (clf )
125
164
my_clf .create_vhdl_file (path )
126
- my_clf .print_parameters ()
165
+ my_clf .print_parameters (result_file )
127
166
128
167
return my_clf
129
168
0 commit comments