Skip to content

Commit ba58972

Browse files
committed
Adjusted the testing routine and the way results are saved to a file.
1 parent c7c4bf7 commit ba58972

File tree

2 files changed

+71
-32
lines changed

2 files changed

+71
-32
lines changed

decision_trees/dataset_tester.py

+70-31
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import time
2+
from typing import Union, Optional
23

34
import numpy as np
45
import os
@@ -13,25 +14,31 @@
1314
from decision_trees.utils.constants import get_classifier
1415

1516

16-
def test_dataset(number_of_bits_per_feature: int,
17-
train_data: np.ndarray, train_target: np.ndarray,
18-
test_data: np.ndarray, test_target: np.ndarray,
19-
clf_type: ClassifierType,
20-
path: str, name: str
21-
):
22-
path = path + '/' + name + '_' + str(number_of_bits_per_feature) + '_' + clf_type.name + '/'
17+
def test_dataset(
18+
number_of_bits_per_feature: int,
19+
train_data: np.ndarray, train_target: np.ndarray,
20+
test_data: np.ndarray, test_target: np.ndarray,
21+
clf_type: ClassifierType,
22+
max_depth: Optional[int], number_of_classifiers: Optional[int],
23+
path: str, name: str
24+
):
25+
path = os.path.join(
26+
path,
27+
name + '_' + str(number_of_bits_per_feature) + '_' + clf_type.name + '_' + str(max_depth) + '_' + str(number_of_classifiers)
28+
)
29+
result_file = os.path.join(path, 'score.txt')
2330

2431
if not os.path.exists(path):
2532
os.makedirs(path)
2633

2734
# first create classifier from scikit
28-
clf = get_classifier(clf_type)
35+
clf = get_classifier(clf_type, max_depth, number_of_classifiers)
2936

3037
# first - train the classifiers on non-quantized data
3138
clf.fit(train_data, train_target)
3239
test_predicted = clf.predict(test_data)
3340
print("scikit clf with test data:")
34-
report_performance(clf, clf_type, test_target, test_predicted)
41+
report_performance(clf, clf_type, test_target, test_predicted, result_file)
3542

3643
# perform quantization of train and test data
3744
# while at some point I was considering not quantizing the test data,
@@ -44,7 +51,7 @@ def test_dataset(number_of_bits_per_feature: int,
4451
clf.fit(train_data_quantized, train_target)
4552
test_predicted_quantized = clf.predict(test_data_quantized)
4653
print("scikit clf with train and test data quantized:")
47-
report_performance(clf, clf_type, test_target, test_predicted_quantized)
54+
report_performance(clf, clf_type, test_target, test_predicted_quantized, result_file)
4855

4956
# generate own classifier based on the one from scikit
5057
number_of_features = len(train_data[0])
@@ -69,48 +76,80 @@ def test_dataset(number_of_bits_per_feature: int,
6976
# _test_classification_performance(clf, test_data, 10, 10)
7077

7178

72-
def report_performance(clf, clf_type: ClassifierType, expected: np.ndarray, predicted: np.ndarray):
79+
def report_performance(
80+
clf, clf_type: ClassifierType,
81+
expected: np.ndarray, predicted: np.ndarray,
82+
result_file: Optional[str]=None
83+
):
7384
if clf_type == ClassifierType.RANDOM_FOREST_REGRESSOR:
74-
_report_regressor(expected, predicted)
85+
_report_regressor(expected, predicted, result_file)
7586
else:
76-
_report_classifier(clf, expected, predicted)
87+
_report_classifier(clf, expected, predicted, result_file)
7788

7889

79-
def _report_classifier(clf, expected: np.ndarray, predicted: np.ndarray):
80-
print("Detailed classification report:")
90+
def _report_classifier(
91+
clf,
92+
expected: np.ndarray, predicted: np.ndarray,
93+
result_file: Optional[str]=None
94+
):
95+
t = ''
96+
t += 'Detailed classification report:\n'
8197

82-
print("Classification report for classifier %s:\n%s\n"
83-
% (clf, metrics.classification_report(expected, predicted)))
98+
t += 'Classification report for classifier ' + str(clf) + '\n'
99+
t += str(metrics.classification_report(expected, predicted)) + '\n'
84100
cm = metrics.confusion_matrix(expected, predicted)
85101
cm = cm / cm.sum(axis=1)[:, None] * 100
86102

87-
#np.set_printoptions(formatter={'float': '{: 2.2f}'.format})
88-
print(f"Confusion matrix:\n {cm}")
103+
# np.set_printoptions(formatter={'float': '{: 2.2f}'.format})
104+
t += f'Confusion matrix:\n {cm}\n'
89105

90106
f1_score = metrics.f1_score(expected, predicted, average='weighted')
91107
precision = metrics.precision_score(expected, predicted, average='weighted')
92108
recall = metrics.recall_score(expected, predicted, average='weighted')
93109
accuracy = metrics.accuracy_score(expected, predicted)
94-
print(f"f1_score: {f1_score:{2}.{4}}")
95-
print(f"precision: {precision:{2}.{4}}")
96-
print(f"recall: {recall:{2}.{4}}")
97-
print(f"accuracy: {accuracy:{2}.{4}}")
110+
t += f'f1_score: {f1_score:{2}.{4}}\n'
111+
t += f'precision: {precision:{2}.{4}}\n'
112+
t += f'recall: {recall:{2}.{4}}\n'
113+
t += f'accuracy: {accuracy:{2}.{4}}\n'
114+
115+
if result_file is not None:
116+
with open(result_file, 'a+') as f:
117+
f.write(t)
118+
else:
119+
print(t)
98120

99121

100-
def _report_regressor(expected: np.ndarray, predicted: np.ndarray):
101-
print("Detailed regression report:")
122+
def _report_regressor(
123+
expected: np.ndarray,
124+
predicted: np.ndarray,
125+
result_file: Optional[str]=None
126+
):
127+
t = ''
128+
t += 'Detailed regression report:\n'
102129

103130
mae = metrics.mean_absolute_error(expected, predicted)
104131
mse = metrics.mean_squared_error(expected, predicted)
105132
r2s = metrics.r2_score(expected, predicted)
106133
evs = metrics.explained_variance_score(expected, predicted)
107-
print(f"mean_absolute_error: {mae:{2}.{4}}")
108-
print(f"mean_squared_error: {mse:{2}.{4}}")
109-
print(f"coefficient_of_determination: {r2s:{2}.{4}}")
110-
print(f"explained_variance_score: {evs:{2}.{4}}")
134+
t += f'mean_absolute_error: {mae:{2}.{4}}\n'
135+
t += f'mean_squared_error: {mse:{2}.{4}}\n'
136+
t += f'coefficient_of_determination: {r2s:{2}.{4}}\n'
137+
t += f'explained_variance_score: {evs:{2}.{4}}\n'
138+
139+
if result_file is not None:
140+
with open(result_file, 'a+') as f:
141+
f.write(t)
142+
else:
143+
print(t)
111144

112145

113-
def generate_my_classifier(clf, number_of_features: int, number_of_bits_per_feature: int, path: str):
146+
def generate_my_classifier(
147+
clf,
148+
number_of_features: int,
149+
number_of_bits_per_feature: int,
150+
path: str,
151+
result_file: Union[str, None]=None
152+
):
114153
if isinstance(clf, DecisionTreeClassifier):
115154
print("Creating decision tree classifier!")
116155
my_clf = Tree("DecisionTreeClassifier", number_of_features, number_of_bits_per_feature)
@@ -123,7 +162,7 @@ def generate_my_classifier(clf, number_of_features: int, number_of_bits_per_feat
123162

124163
my_clf.build(clf)
125164
my_clf.create_vhdl_file(path)
126-
my_clf.print_parameters()
165+
my_clf.print_parameters(result_file)
127166

128167
return my_clf
129168

decision_trees/gridsearch.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ def _scikit_gridsearch(
101101
# print()
102102

103103
# TODO: important note - this does not use test data to evaluate, instead it probably splits the train data
104-
# internally, which means that the final scor will be calculated on this data and is different than the one
104+
# internally, which means that the final score will be calculated on this data and is different than the one
105105
# calculated on test data
106106

107107
data = np.concatenate((train_data, test_data))

0 commit comments

Comments
 (0)