Skip to content

Commit c32dd0f

Browse files
committed
有戏的filter
1 parent 2edd397 commit c32dd0f

File tree

2 files changed

+17
-11
lines changed

2 files changed

+17
-11
lines changed

src/main/java/com/stephen/lab/controller/paper/KivaController.java

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,8 @@ public class KivaController {
5959
private String trainFilePath = "C:\\Users\\stephen\\Desktop\\svm\\train\\train-%s.txt";
6060
private String modelFilePath = "C:\\Users\\stephen\\Desktop\\svm\\model\\model-%s.txt";
6161
private String testFilePath = "C:\\Users\\stephen\\Desktop\\svm\\test\\testdata.txt";
62-
private String predictFilePath = "C:\\Users\\stephen\\Desktop\\svm\\predict\\predict-%s.txt";
6362
private static final int TAG_FREQ = 1;
63+
private int F_VALUE_NUM = 6;
6464

6565
@ApiOperation(value = "解析loans", notes = "将数据解析到数据库")
6666
@RequestMapping(value = "init", method = RequestMethod.GET)
@@ -203,7 +203,7 @@ public Response genTagByKnn(@RequestParam("num") int num) throws IOException {
203203
Collections.sort(kivaResult.getTokenList(), Comparator.comparing(Token::getWeight));
204204
List<KivaResult> selectResult = getKnnSimilarDoc(kivaResult, kivaResults, num);
205205
List<Token> sortedTags = getSortedTags(selectResult);
206-
sortedTags=sublist(sortedTags,num);
206+
sortedTags = sublist(sortedTags, num);
207207
updateDatabaseOfGenTag(kivaResult.getKiva().getId(), sortedTags, TagType.KNN);
208208
});
209209
LogRecod.print("knn finished");
@@ -448,7 +448,11 @@ public Response tranformToSimple(int num) {
448448
simple.setOriginalDescription(des);
449449
simple.setStandardDescription(Joiner.on("#").join(ct));
450450
simple.setTags(normalizeTags(kiva.getTags()));
451-
kivaService.insertKivaSimple(simple);
451+
try {
452+
kivaService.insertKivaSimple(simple);
453+
} catch (Exception e) {
454+
kivaService.updateSimpleSelective(simple);
455+
}
452456
}
453457
});
454458
return Response.success("");
@@ -760,7 +764,7 @@ private double pValue(List<KivaSimple> simpleList, int tagType) throws IOExcepti
760764
GenTag genTag = JSONObject.parseObject(kivaSimple.getGenTags(), GenTag.class);
761765
List<String> originTags = splitString(kivaSimple.getTags());
762766
List<Token> originalTokens = Lists.transform(originTags, Token::new);
763-
List<Token> genTags = getTagsByTagType(tagType, genTag);
767+
List<Token> genTags = sublist(getTagsByTagType(tagType, genTag), F_VALUE_NUM);
764768
precise += same(genTags, originalTokens);
765769
all += genTags.size();
766770
}
@@ -788,7 +792,7 @@ private double rValue(List<KivaSimple> simpleList, int tagType) throws IOExcepti
788792
GenTag genTag = JSONObject.parseObject(kivaSimple.getGenTags(), GenTag.class);
789793
List<String> originTags = splitString(kivaSimple.getTags());
790794
List<Token> originalTokens = Lists.transform(originTags, Token::new);
791-
List<Token> genTags = getTagsByTagType(tagType, genTag);
795+
List<Token> genTags = sublist(getTagsByTagType(tagType, genTag), F_VALUE_NUM);
792796
recall += same(genTags, originalTokens);
793797
all += originTags.size();
794798
}
@@ -1129,7 +1133,7 @@ public Response genTagByLDA(int num) {
11291133
KivaResult kivaResult = new KivaResult(kivaSimple);
11301134
List<KivaResult> selectResult = getLDASimilarDoc(kivaResult, kivaList, score, docThetas, num);
11311135
List<Token> sortedTags = getSortedTags(selectResult);
1132-
sortedTags=sublist(sortedTags,num);
1136+
sortedTags = sublist(sortedTags, num);
11331137
updateDatabaseOfGenTag(kivaResult.getKiva().getId(), sortedTags, TagType.LDA);
11341138
}
11351139
return Response.success(docThetas);
@@ -1210,9 +1214,11 @@ private List<Token> getSimlarDocTokens(String text, List<Token> toCompare, List<
12101214
List<Long> validIds = simplesContainsTag(token.getWord());
12111215
List<KivaResult> krs = filterValid(kivaResults, validIds);
12121216
double score = computeToSimlerResult(krs, toCompare);
1213-
Token t = new Token(token.getWord());
1214-
t.setWeight(score);
1215-
result.add(t);
1217+
if(score>0.005) {
1218+
// Token t = new Token(token.getWord());
1219+
// t.setWeight(score);
1220+
result.add(token);
1221+
}
12161222
}
12171223
Collections.sort(result, Comparator.comparing(Token::getWeight).reversed());
12181224

src/main/java/com/stephen/lab/util/nlp/NLPIRUtil.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,9 +62,9 @@ public static List<String> cutwords(String docContent) {
6262
// String ne = token.get(CoreAnnotations.NamedEntityTagAnnotation.class); // 获取命名实体识别结果
6363
// 获取词形还原结果
6464
String lemma = token.get(CoreAnnotations.LemmaAnnotation.class);
65-
if (!pos.startsWith("V")) {
65+
// if (!pos.startsWith("V")) {
6666
result.add(lemma);
67-
}
67+
// }
6868
}
6969
}
7070
return result;

0 commit comments

Comments
 (0)