Skip to content

Commit cddb1ed

Browse files
author
eugenp
committed
minor cleanup
1 parent 1175f22 commit cddb1ed

File tree

1 file changed

+26
-15
lines changed

1 file changed

+26
-15
lines changed

spring-security-oauth/src/main/java/org/baeldung/reddit/classifier/RedditClassifier.java

Lines changed: 26 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ public RedditClassifier() {
5050
domainEncoder.setProbes(1);
5151
}
5252

53-
public RedditClassifier(int poolSize, int noOfFeatures) {
53+
public RedditClassifier(final int poolSize, final int noOfFeatures) {
5454
this.noOfFeatures = noOfFeatures;
5555
classifier = new AdaptiveLogisticRegression(2, noOfFeatures, new L2());
5656
classifier.setPoolSize(poolSize);
@@ -60,7 +60,7 @@ public RedditClassifier(int poolSize, int noOfFeatures) {
6060
domainEncoder.setProbes(1);
6161
}
6262

63-
public void trainClassifier(String fileName) throws IOException {
63+
public void trainClassifier(final String fileName) throws IOException {
6464
final List<NamedVector> vectors = extractVectors(readDataFile(fileName));
6565
final int size = vectors.size();
6666
final int noOfTraining = (int) (size * 0.8);
@@ -77,7 +77,7 @@ public void trainClassifier(String fileName) throws IOException {
7777
evaluateClassifier(testData);
7878
}
7979

80-
public Vector convertPost(String title, String domain, int hour) {
80+
public Vector convertPost(final String title, final String domain, final int hour) {
8181
final Vector vector = new RandomAccessSparseVector(noOfFeatures);
8282
final List<String> words = Splitter.onPattern("\\W").omitEmptyStrings().splitToList(title);
8383
vector.set(0, hour);
@@ -89,7 +89,7 @@ public Vector convertPost(String title, String domain, int hour) {
8989
return vector;
9090
}
9191

92-
public int classify(Vector features) {
92+
public int classify(final Vector features) {
9393
if (learner == null) {
9494
learner = classifier.getBest().getPayload().getLearner();
9595
}
@@ -102,7 +102,7 @@ public double getAccuracy() {
102102

103103
// ==== Private methods
104104

105-
private void evaluateClassifier(List<NamedVector> vectors) throws IOException {
105+
private void evaluateClassifier(final List<NamedVector> vectors) throws IOException {
106106
int category, result;
107107
int correct = 0;
108108
int wrong = 0;
@@ -125,7 +125,7 @@ private void evaluateClassifier(List<NamedVector> vectors) throws IOException {
125125
this.accuracy = correct / (wrong + correct + 0.0);
126126
}
127127

128-
private List<String> readDataFile(String fileName) throws IOException {
128+
private List<String> readDataFile(final String fileName) throws IOException {
129129
List<String> lines = Files.readLines(new File(fileName), Charset.forName("utf-8"));
130130
if ((lines == null) || (lines.size() == 0)) {
131131
new RedditDataCollector().collectData();
@@ -135,33 +135,44 @@ private List<String> readDataFile(String fileName) throws IOException {
135135
return lines;
136136
}
137137

138-
private List<NamedVector> extractVectors(List<String> lines) {
138+
private List<NamedVector> extractVectors(final List<String> lines) {
139139
final List<NamedVector> vectors = new ArrayList<NamedVector>(lines.size());
140140
for (final String line : lines) {
141141
vectors.add(extractVector(line));
142142
}
143143
return vectors;
144144
}
145145

146-
private NamedVector extractVector(String line) {
146+
private NamedVector extractVector(final String line) {
147147
final String[] items = line.split(",");
148-
final String category = extractCategory(Integer.parseInt(items[0]));
148+
final String numberOfVotes = items[0];
149+
final String time = items[1];
150+
final String numberOfWordInTitle = items[2];
151+
final String title = items[3];
152+
final String theRootDomain = items[4];
153+
154+
final String category = extractCategory(Integer.parseInt(numberOfVotes));
155+
149156
final NamedVector vector = new NamedVector(new RandomAccessSparseVector(noOfFeatures), category);
150-
final Calendar cal = Calendar.getInstance(TimeZone.getTimeZone("GMT"));
151-
cal.setTimeInMillis(Long.parseLong(items[1]) * 1000);
152157

158+
final Calendar cal = Calendar.getInstance(TimeZone.getTimeZone("GMT"));
159+
cal.setTimeInMillis(Long.parseLong(time) * 1000);
153160
vector.set(0, cal.get(Calendar.HOUR_OF_DAY)); // hour of day
154-
vector.set(1, Integer.parseInt(items[2])); // number of words in the title
155-
domainEncoder.addToVector(items[4], vector);
156-
final String[] words = items[3].split(" ");
161+
162+
vector.set(1, Integer.parseInt(numberOfWordInTitle)); // number of words in the title
163+
164+
domainEncoder.addToVector(theRootDomain, vector);
165+
final String[] words = title.split(" ");
157166
// titleEncoder.setProbes(words.length);
167+
168+
// TODO: use a Java 8 stream with filter and remove the 1 and 2 character words; example: "a", "of", "to"
158169
for (final String word : words) {
159170
titleEncoder.addToVector(word, vector);
160171
}
161172
return vector;
162173
}
163174

164-
private String extractCategory(int score) {
175+
private String extractCategory(final int score) {
165176
return (score < MIN_SCORE) ? "BAD" : "GOOD";
166177
}
167178

0 commit comments

Comments
 (0)