43
43
import java .util .stream .Stream ;
44
44
45
45
import org .apache .commons .lang3 .SystemUtils ;
46
+ import org .apache .lucene .document .DateTools ;
46
47
import org .apache .lucene .document .Document ;
48
+ import org .apache .lucene .index .IndexableField ;
47
49
import org .apache .lucene .queryparser .classic .ParseException ;
48
50
import org .apache .lucene .search .ScoreDoc ;
49
51
50
52
import org .eclipse .jgit .api .Git ;
51
53
import org .eclipse .jgit .api .MergeCommand ;
54
+ import org .eclipse .jgit .api .errors .GitAPIException ;
52
55
import org .eclipse .jgit .lib .ObjectId ;
56
+ import org .eclipse .jgit .revwalk .RevCommit ;
53
57
import org .junit .jupiter .api .AfterEach ;
54
58
import org .junit .jupiter .api .BeforeEach ;
55
59
import org .junit .jupiter .api .Test ;
@@ -171,6 +175,7 @@ void setUpClass() throws Exception {
171
175
172
176
@ AfterEach
173
177
void tearDownClass () throws Exception {
178
+ env .releaseIndexSearchers ();
174
179
repository .destroy ();
175
180
}
176
181
@@ -312,21 +317,28 @@ void testIndexPath() throws IOException {
312
317
313
318
@ Test
314
319
void testGetLastRev () throws IOException , ParseException {
320
+ // IndexDatabase.getDocument() searches the index, so refresh the IndexSearcher objects
321
+ // to get fresh results.
322
+ env .maybeRefreshIndexSearchers ();
315
323
Document doc = IndexDatabase .getDocument (Paths .get (repository .getSourceRoot (),
316
324
"git" , "main.c" ).toFile ());
317
325
assertNotNull (doc );
318
326
assertEquals ("aa35c25882b9a60a97758e0ceb276a3f8cb4ae3a" , doc .get (QueryBuilder .LASTREV ));
319
327
}
320
328
321
- static void changeFileAndCommit (Git git , File file , String comment ) throws Exception {
329
+ static RevCommit changeFileAndCommit (Git git , File file , String comment ) throws Exception {
322
330
String authorName = "Foo Bar" ;
323
331
String authorEmail =
"[email protected] " ;
324
332
325
333
try (FileOutputStream fos = new FileOutputStream (file , true )) {
326
334
fos .write (comment .getBytes (StandardCharsets .UTF_8 ));
327
335
}
328
336
329
- git .commit ().setMessage (comment ).setAuthor (authorName , authorEmail ).setAll (true ).call ();
337
+ return commitFile (git , comment , authorName , authorEmail );
338
+ }
339
+
340
+ private static RevCommit commitFile (Git git , String comment , String authorName , String authorEmail ) throws GitAPIException {
341
+ return git .commit ().setMessage (comment ).setAuthor (authorName , authorEmail ).setAll (true ).call ();
330
342
}
331
343
332
344
private void addFileAndCommit (Git git , String newFileName , File repositoryRoot , String message ) throws Exception {
@@ -338,7 +350,7 @@ private void addFileAndCommit(Git git, String newFileName, File repositoryRoot,
338
350
fos .write ("foo bar foo bar foo bar" .getBytes (StandardCharsets .UTF_8 ));
339
351
}
340
352
git .add ().addFilepattern (newFileName ).call ();
341
- git . commit (). setMessage ( message ). setAuthor ( "foo bar" ,
"[email protected] " ). setAll ( true ). call ( );
353
+ commitFile ( git , message , "foo bar" ,
"[email protected] " );
342
354
}
343
355
344
356
private void addMergeCommit (Git git , File repositoryRoot ) throws Exception {
@@ -1019,6 +1031,7 @@ void testAnnotationCacheProjectTunable(boolean useAnnotationCache, boolean isHis
1019
1031
// cleanup
1020
1032
gitProject .setHistoryBasedReindex (projectUseAnnotationOrig );
1021
1033
env .setDataRoot (dataRootOrig );
1034
+ env .releaseIndexSearchers ();
1022
1035
IOUtils .removeRecursive (dataRoot );
1023
1036
}
1024
1037
@@ -1076,4 +1089,134 @@ void testHistoryCacheForFileBasedRepository() throws Exception {
1076
1089
assertFalse (otherFile .exists ());
1077
1090
assertFalse (historyGuru .hasHistoryCacheForFile (otherFile ));
1078
1091
}
1092
+
1093
+ /**
1094
+ * When incrementally indexing across Git changesets which modify the same file however the outcome
1095
+ * is no change to the file (the changes nullify each other), IndexDatabase needs to filter these files
1096
+ * out because Git does it as well. Otherwise, the indexer would attempt to add the document with
1097
+ * time stamp of pre-existing document which would make indexing of the related project fail.
1098
+ * This test simulates this case.
1099
+ * <p>
1100
+ * The strategy of this test is as follows:
1101
+ * <ol>
1102
+ * <li>initialize parent repository</li>
1103
+ * <li>change+add file <code>foo.txt</code> in parent repository, commit</li>
1104
+ * <li>change+add file <code>bar.txt</code> in parent repository, commit</li>
1105
+ * <li>clone parent repository</li>
1106
+ * <li>index the clone</li>
1107
+ * <li>change <code>foo.txt</code> in parent repository, commit</li>
1108
+ * <li>change <code>bar.txt</code> in parent repository, commit</li>
1109
+ * <li>revert the change done to foo.txt in the last commit in parent repository</li>
1110
+ * <li>pull the changes to the clone</li>
1111
+ * <li>index the clone (incremental)</li>
1112
+ * </ol>
1113
+ * </p>
1114
+ * Before the fix, the last reindex resulted in RuntimeException caused by the addition of the <code>foo.txt</code>
1115
+ * file with the time stamp of the file before the last changes. This is because history based reindex
1116
+ * extracts the list of files from the changesets, however Git does not update the file if the changes
1117
+ * were nullified.
1118
+ */
1119
+ @ Test
1120
+ void testNullifiedChanges () throws Exception {
1121
+ File parentRepositoryRoot = new File (env .getSourceRootPath (), "gitNoChangeParent" );
1122
+ assertTrue (parentRepositoryRoot .mkdir ());
1123
+
1124
+ env .setHistoryBasedReindex (true );
1125
+
1126
+ final String barName = "bar.txt" ;
1127
+ final String repoName = "gitNoChange" ;
1128
+ Path repositoryRootPath = Path .of (env .getSourceRootPath (), repoName );
1129
+ List <String > projectList = List .of (File .separator + repoName );
1130
+ try (Git gitParent = Git .init ().setDirectory (parentRepositoryRoot ).call ()) {
1131
+ // Create initial commits for the files in the parent repository.
1132
+ final String fooName = "foo.txt" ;
1133
+ File fooFile = new File (parentRepositoryRoot , fooName );
1134
+ if (!fooFile .createNewFile ()) {
1135
+ throw new IOException ("Could not create file " + fooFile );
1136
+ }
1137
+ gitParent .add ().addFilepattern (fooName ).call ();
1138
+ changeFileAndCommit (gitParent , fooFile , "first foo" );
1139
+
1140
+ File barFile = new File (parentRepositoryRoot , barName );
1141
+ if (!barFile .createNewFile ()) {
1142
+ throw new IOException ("Could not create file " + barFile );
1143
+ }
1144
+ gitParent .add ().addFilepattern (barName ).call ();
1145
+ changeFileAndCommit (gitParent , barFile , "first bar" );
1146
+
1147
+ // Clone the repository at this point so that subsequent changes can be pulled later on.
1148
+ final String cloneUrl = parentRepositoryRoot .toURI ().toString ();
1149
+ try (Git gitClone = Git .cloneRepository ()
1150
+ .setURI (cloneUrl )
1151
+ .setDirectory (repositoryRootPath .toFile ())
1152
+ .call ()) {
1153
+
1154
+ // Perform initial index. This is important so that history cache for the repository
1155
+ // is created. It contains ID of the last indexed changeset which so that it can be
1156
+ // used during the final reindex.
1157
+ indexer .prepareIndexer (
1158
+ env , true , true ,
1159
+ null , null );
1160
+ env .setRepositories (new ArrayList <>(HistoryGuru .getInstance ().getRepositories ()));
1161
+ env .generateProjectRepositoriesMap ();
1162
+ Project project = Project .getByName (repoName );
1163
+ assertNotNull (project );
1164
+ List <RepositoryInfo > repositoryInfos = env .getProjectRepositoriesMap ().get (project );
1165
+ assertEquals (1 , repositoryInfos .size ());
1166
+ assertEquals ("git" , repositoryInfos .get (0 ).getType ());
1167
+ indexer .doIndexerExecution (projectList , null );
1168
+
1169
+ // Change the parent repository so that it contains nullified change to the foo.txt file.
1170
+ final String data = "change foo" ;
1171
+ gitParent .add ().addFilepattern (fooName ).call ();
1172
+ RevCommit commit = changeFileAndCommit (gitParent , fooFile , data );
1173
+
1174
+ // Also throw another file into the mix so that it resembles reality a bit more.
1175
+ changeFileAndCommit (gitParent , barFile , "change bar" );
1176
+
1177
+ // Revert the changes done to foo.txt so that the changes got nullified for the subsequent pull.
1178
+ gitParent .revert ().include (commit ).call ();
1179
+
1180
+ // Bring the changes to the repository to be indexed. Again, done for better simulation.
1181
+ gitClone .pull ().call ();
1182
+ }
1183
+ }
1184
+
1185
+ // Final reindex. This should discover the changes done to the clone and index them.
1186
+ indexer .prepareIndexer (
1187
+ env , true , true ,
1188
+ null , null );
1189
+ //
1190
+ // Use IndexDatabase instead of indexer.doIndexerExecution(projectList, null) because
1191
+ // it will detect the indexing failure via RuntimeException. Also, it will be possible
1192
+ // to determine via mocking whether history based reindex was used.
1193
+ //
1194
+ IndexDownArgsFactory factory = new IndexDownArgsFactory ();
1195
+ IndexDownArgsFactory spyFactory = spy (factory );
1196
+ IndexDownArgs args = new IndexDownArgs ();
1197
+ // In this case the getIndexDownArgs() should be called from update() just once so this will suffice.
1198
+ when (spyFactory .getIndexDownArgs ()).thenReturn (args );
1199
+ Project project = env .getProjects ().get (repoName );
1200
+ assertNotNull (project );
1201
+ IndexDatabase idbOrig = new IndexDatabase (project , spyFactory );
1202
+ assertNotNull (idbOrig );
1203
+ IndexDatabase idb = spy (idbOrig );
1204
+ idb .update ();
1205
+ // Verify history based reindex was used.
1206
+ checkIndexDown (true , idb );
1207
+
1208
+ // Check that the document for bar.txt was updated. Serves as a smoke test.
1209
+ File barFile = new File (repositoryRootPath .toString (), barName );
1210
+ assertTrue (barFile .exists ());
1211
+ // IndexDatabase.getDocument() performs index search to retrieve the document, so the corresponding
1212
+ // IndexSearcher object has to be bumped in order to get fresh document.
1213
+ env .maybeRefreshIndexSearchers ();
1214
+ Document barDoc = IndexDatabase .getDocument (barFile );
1215
+ assertNotNull (barDoc );
1216
+ IndexableField field = barDoc .getField (QueryBuilder .DATE );
1217
+ String docDate = field .stringValue ();
1218
+ // Need to use the same resolution as in AnalyzerGuru#populateDocument().
1219
+ String fileDate = DateTools .timeToString (barFile .lastModified (), DateTools .Resolution .MILLISECOND );
1220
+ assertEquals (fileDate , docDate );
1221
+ }
1079
1222
}
0 commit comments