Description
Elasticsearch version (bin/elasticsearch --version
): master
, various 7.x
versions
Plugins installed: N/A
JVM version (java -version
): N/A
OS version (uname -a
if on a Unix-like system): N/A
Description of the problem including expected versus actual behavior:
In SnapshotsService#shards
we assume that if an index has metadata in the cluster state then it has a routing table entry, but this isn't true if the index was closed prior to 7.2. As a workaround, you can open and close any such indices.
Steps to reproduce:
Introduce a closed index in, say, 7.1.1 and then try and take a snapshot in master
. For instance, apply these changes to b65992e ...
$ git diff
diff --git a/qa/full-cluster-restart/src/test/java/org/elasticsearch/upgrades/FullClusterRestartIT.java b/qa/full-cluster-restart/src/test/java/org/elasticsearch/upgrades/FullClusterRestartIT.java
index 865b076d4e1..bd20357bae9 100644
--- a/qa/full-cluster-restart/src/test/java/org/elasticsearch/upgrades/FullClusterRestartIT.java
+++ b/qa/full-cluster-restart/src/test/java/org/elasticsearch/upgrades/FullClusterRestartIT.java
@@ -14,6 +14,7 @@ import org.elasticsearch.client.Request;
import org.elasticsearch.client.Response;
import org.elasticsearch.client.ResponseException;
import org.elasticsearch.client.RestClient;
+import org.elasticsearch.cluster.block.ClusterBlockException;
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.cluster.metadata.MetadataIndexStateService;
import org.elasticsearch.common.Booleans;
@@ -769,6 +770,9 @@ public class FullClusterRestartIT extends AbstractFullClusterRestartTestCase {
* and some routing configuration.
*/
public void testSnapshotRestore() throws IOException {
+ // Close an index in the old version to verify that we handle snapshotting it correctly in the newer version
+ final String closedIndexName = index + "_closed";
+
int count;
if (isRunningAgainstOldCluster()) {
// Create the index
@@ -779,6 +783,9 @@ public class FullClusterRestartIT extends AbstractFullClusterRestartTestCase {
}
createIndex(index, settings.build());
indexRandomDocuments(count, true, true, i -> jsonBuilder().startObject().field("field", "value").endObject());
+
+ createIndex(closedIndexName, settings.build());
+ closeIndex(closedIndexName);
} else {
count = countOfIndexedRandomDocuments();
}
@@ -859,6 +866,14 @@ public class FullClusterRestartIT extends AbstractFullClusterRestartTestCase {
if (false == isRunningAgainstOldCluster()) {
checkSnapshot("new_snap", count, Version.CURRENT);
}
+
+ final String closedSnapshotName = isRunningAgainstOldCluster() ? "old_snap_closed" : "new_snap_closed";
+ Request createSnapshotOfClosedIndex = new Request("PUT", "/_snapshot/repo/" + closedSnapshotName);
+ createSnapshotOfClosedIndex.addParameter("wait_for_completion", "true");
+ createSnapshotOfClosedIndex.setJsonEntity("{\"indices\": \"" + closedIndexName + "\"}");
+ final ResponseException responseException
+ = expectThrows(ResponseException.class, () -> client().performRequest(createSnapshotOfClosedIndex));
+ assertThat(responseException.getMessage(), containsString("index closed"));
}
public void testHistoryUUIDIsAdded() throws Exception {
... and then run ./gradlew :qa:full-cluster-restart:v7.1.1#bwcTest -Dtests.class=org.elasticsearch.upgrades.FullClusterRestartIT -Dtests.method=testSnapshotRestore
.
Provide logs (if relevant):
In tests one of the nodes fails with an AssertionError
:
[2021-03-22T17:25:08,048][ERROR][o.e.b.ElasticsearchUncaughtExceptionHandler] [v7.1.1-1] fatal error in thread [elasticsearch[v7.1.1-1][masterService#updateTask][T#1]], exiting
java.lang.AssertionError: null
at org.elasticsearch.snapshots.SnapshotsService.shards(SnapshotsService.java:2285) ~[elasticsearch-8.0.0-SNAPSHOT.jar:8.0.0-SNAPSHOT]
at org.elasticsearch.snapshots.SnapshotsService$1.execute(SnapshotsService.java:330) ~[elasticsearch-8.0.0-SNAPSHOT.jar:8.0.0-SNAPSHOT]
at org.elasticsearch.repositories.blobstore.BlobStoreRepository$1.execute(BlobStoreRepository.java:391) ~[elasticsearch-8.0.0-SNAPSHOT.jar:8.0.0-SNAPSHOT]
at org.elasticsearch.cluster.ClusterStateUpdateTask.execute(ClusterStateUpdateTask.java:48) ~[elasticsearch-8.0.0-SNAPSHOT.jar:8.0.0-SNAPSHOT]
at org.elasticsearch.cluster.service.MasterService.executeTasks(MasterService.java:686) ~[elasticsearch-8.0.0-SNAPSHOT.jar:8.0.0-SNAPSHOT]
at org.elasticsearch.cluster.service.MasterService.calculateTaskOutputs(MasterService.java:308) ~[elasticsearch-8.0.0-SNAPSHOT.jar:8.0.0-SNAPSHOT]
at org.elasticsearch.cluster.service.MasterService.runTasks(MasterService.java:203) ~[elasticsearch-8.0.0-SNAPSHOT.jar:8.0.0-SNAPSHOT]
at org.elasticsearch.cluster.service.MasterService$Batcher.run(MasterService.java:140) ~[elasticsearch-8.0.0-SNAPSHOT.jar:8.0.0-SNAPSHOT]
at org.elasticsearch.cluster.service.TaskBatcher.runIfNotProcessed(TaskBatcher.java:139) ~[elasticsearch-8.0.0-SNAPSHOT.jar:8.0.0-SNAPSHOT]
at org.elasticsearch.cluster.service.TaskBatcher$BatchedTask.run(TaskBatcher.java:177) ~[elasticsearch-8.0.0-SNAPSHOT.jar:8.0.0-SNAPSHOT]
at org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingRunnable.run(ThreadContext.java:669) ~[elasticsearch-8.0.0-SNAPSHOT.jar:8.0.0-SNAPSHOT]
at org.elasticsearch.common.util.concurrent.PrioritizedEsThreadPoolExecutor$TieBreakingPrioritizedRunnable.runAndClean(PrioritizedEsThreadPoolExecutor.java:241) ~[elasticsearch-8.0.0-SNAPSHOT.jar:8.0.0-SNAPSHOT]
at org.elasticsearch.common.util.concurrent.PrioritizedEsThreadPoolExecutor$TieBreakingPrioritizedRunnable.run(PrioritizedEsThreadPoolExecutor.java:204) ~[elasticsearch-8.0.0-SNAPSHOT.jar:8.0.0-SNAPSHOT]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1130) ~[?:?]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:630) ~[?:?]
at java.lang.Thread.run(Thread.java:832) [?:?]
In production it's a NPE instead:
[REDACTED] [REDACTED][REDACTED] failed to create snapshot
java.lang.NullPointerException: Cannot invoke "org.elasticsearch.cluster.routing.IndexRoutingTable.shard(int)" because "indexRoutingTable" is null
at org.elasticsearch.snapshots.SnapshotsService.shards(SnapshotsService.java:2717) ~[elasticsearch-7.11.1.jar:7.11.1]
at org.elasticsearch.snapshots.SnapshotsService.access$1000(SnapshotsService.java:115) ~[elasticsearch-7.11.1.jar:7.11.1]
at org.elasticsearch.snapshots.SnapshotsService$2.execute(SnapshotsService.java:399) ~[elasticsearch-7.11.1.jar:7.11.1]
at org.elasticsearch.repositories.blobstore.BlobStoreRepository$1.execute(BlobStoreRepository.java:380) ~[elasticsearch-7.11.1.jar:7.11.1]
at org.elasticsearch.cluster.ClusterStateUpdateTask.execute(ClusterStateUpdateTask.java:48) ~[elasticsearch-7.11.1.jar:7.11.1]
at org.elasticsearch.cluster.service.MasterService.executeTasks(MasterService.java:691) ~[elasticsearch-7.11.1.jar:7.11.1]
at org.elasticsearch.cluster.service.MasterService.calculateTaskOutputs(MasterService.java:313) ~[elasticsearch-7.11.1.jar:7.11.1]
at org.elasticsearch.cluster.service.MasterService.runTasks(MasterService.java:208) [elasticsearch-7.11.1.jar:7.11.1]
at org.elasticsearch.cluster.service.MasterService.access$000(MasterService.java:62) [elasticsearch-7.11.1.jar:7.11.1]
at org.elasticsearch.cluster.service.MasterService$Batcher.run(MasterService.java:140) [elasticsearch-7.11.1.jar:7.11.1]
at org.elasticsearch.cluster.service.TaskBatcher.runIfNotProcessed(TaskBatcher.java:139) [elasticsearch-7.11.1.jar:7.11.1]
at org.elasticsearch.cluster.service.TaskBatcher$BatchedTask.run(TaskBatcher.java:177) [elasticsearch-7.11.1.jar:7.11.1]
at org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingRunnable.run(ThreadContext.java:673) [elasticsearch-7.11.1.jar:7.11.1]
at org.elasticsearch.common.util.concurrent.PrioritizedEsThreadPoolExecutor$TieBreakingPrioritizedRunnable.runAndClean(PrioritizedEsThreadPoolExecutor.java:241) [elasticsearch-7.11.1.jar:7.11.1]
at org.elasticsearch.common.util.concurrent.PrioritizedEsThreadPoolExecutor$TieBreakingPrioritizedRunnable.run(PrioritizedEsThreadPoolExecutor.java:204) [elasticsearch-7.11.1.jar:7.11.1]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1130) [?:?]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:630) [?:?]
at java.lang.Thread.run(Thread.java:832) [?:?]