@@ -59,8 +59,8 @@ public class KivaController {
59
59
private String trainFilePath = "C:\\ Users\\ stephen\\ Desktop\\ svm\\ train\\ train-%s.txt" ;
60
60
private String modelFilePath = "C:\\ Users\\ stephen\\ Desktop\\ svm\\ model\\ model-%s.txt" ;
61
61
private String testFilePath = "C:\\ Users\\ stephen\\ Desktop\\ svm\\ test\\ testdata.txt" ;
62
- private String predictFilePath = "C:\\ Users\\ stephen\\ Desktop\\ svm\\ predict\\ predict-%s.txt" ;
63
62
private static final int TAG_FREQ = 1 ;
63
+ private int F_VALUE_NUM = 6 ;
64
64
65
65
@ ApiOperation (value = "解析loans" , notes = "将数据解析到数据库" )
66
66
@ RequestMapping (value = "init" , method = RequestMethod .GET )
@@ -203,7 +203,7 @@ public Response genTagByKnn(@RequestParam("num") int num) throws IOException {
203
203
Collections .sort (kivaResult .getTokenList (), Comparator .comparing (Token ::getWeight ));
204
204
List <KivaResult > selectResult = getKnnSimilarDoc (kivaResult , kivaResults , num );
205
205
List <Token > sortedTags = getSortedTags (selectResult );
206
- sortedTags = sublist (sortedTags ,num );
206
+ sortedTags = sublist (sortedTags , num );
207
207
updateDatabaseOfGenTag (kivaResult .getKiva ().getId (), sortedTags , TagType .KNN );
208
208
});
209
209
LogRecod .print ("knn finished" );
@@ -448,7 +448,11 @@ public Response tranformToSimple(int num) {
448
448
simple .setOriginalDescription (des );
449
449
simple .setStandardDescription (Joiner .on ("#" ).join (ct ));
450
450
simple .setTags (normalizeTags (kiva .getTags ()));
451
- kivaService .insertKivaSimple (simple );
451
+ try {
452
+ kivaService .insertKivaSimple (simple );
453
+ } catch (Exception e ) {
454
+ kivaService .updateSimpleSelective (simple );
455
+ }
452
456
}
453
457
});
454
458
return Response .success ("" );
@@ -760,7 +764,7 @@ private double pValue(List<KivaSimple> simpleList, int tagType) throws IOExcepti
760
764
GenTag genTag = JSONObject .parseObject (kivaSimple .getGenTags (), GenTag .class );
761
765
List <String > originTags = splitString (kivaSimple .getTags ());
762
766
List <Token > originalTokens = Lists .transform (originTags , Token ::new );
763
- List <Token > genTags = getTagsByTagType (tagType , genTag );
767
+ List <Token > genTags = sublist ( getTagsByTagType (tagType , genTag ), F_VALUE_NUM );
764
768
precise += same (genTags , originalTokens );
765
769
all += genTags .size ();
766
770
}
@@ -788,7 +792,7 @@ private double rValue(List<KivaSimple> simpleList, int tagType) throws IOExcepti
788
792
GenTag genTag = JSONObject .parseObject (kivaSimple .getGenTags (), GenTag .class );
789
793
List <String > originTags = splitString (kivaSimple .getTags ());
790
794
List <Token > originalTokens = Lists .transform (originTags , Token ::new );
791
- List <Token > genTags = getTagsByTagType (tagType , genTag );
795
+ List <Token > genTags = sublist ( getTagsByTagType (tagType , genTag ), F_VALUE_NUM );
792
796
recall += same (genTags , originalTokens );
793
797
all += originTags .size ();
794
798
}
@@ -1129,7 +1133,7 @@ public Response genTagByLDA(int num) {
1129
1133
KivaResult kivaResult = new KivaResult (kivaSimple );
1130
1134
List <KivaResult > selectResult = getLDASimilarDoc (kivaResult , kivaList , score , docThetas , num );
1131
1135
List <Token > sortedTags = getSortedTags (selectResult );
1132
- sortedTags = sublist (sortedTags ,num );
1136
+ sortedTags = sublist (sortedTags , num );
1133
1137
updateDatabaseOfGenTag (kivaResult .getKiva ().getId (), sortedTags , TagType .LDA );
1134
1138
}
1135
1139
return Response .success (docThetas );
@@ -1210,9 +1214,11 @@ private List<Token> getSimlarDocTokens(String text, List<Token> toCompare, List<
1210
1214
List <Long > validIds = simplesContainsTag (token .getWord ());
1211
1215
List <KivaResult > krs = filterValid (kivaResults , validIds );
1212
1216
double score = computeToSimlerResult (krs , toCompare );
1213
- Token t = new Token (token .getWord ());
1214
- t .setWeight (score );
1215
- result .add (t );
1217
+ if (score >0.005 ) {
1218
+ // Token t = new Token(token.getWord());
1219
+ // t.setWeight(score);
1220
+ result .add (token );
1221
+ }
1216
1222
}
1217
1223
Collections .sort (result , Comparator .comparing (Token ::getWeight ).reversed ());
1218
1224
0 commit comments