elastic · idegtiarenko · Apr 22, 2025 · Apr 8, 2025 · Apr 10, 2025 · Apr 10, 2025
diff --git a/docs/changelog/126653.yaml b/docs/changelog/126653.yaml
@@ -0,0 +1,5 @@
+pr: 126653
+summary: Retry shard movements during ESQL query
+area: ES|QL
+type: enhancement
+issues: []
diff --git a/server/src/main/java/org/elasticsearch/action/search/TransportSearchShardsAction.java b/server/src/main/java/org/elasticsearch/action/search/TransportSearchShardsAction.java
@@ -131,11 +131,8 @@ public void searchShards(Task task, SearchShardsRequest searchShardsRequest, Act
             listener.delegateFailureAndWrap((delegate, searchRequest) -> {
                 Index[] concreteIndices = resolvedIndices.getConcreteLocalIndices();
                 final Set<ResolvedExpression> indicesAndAliases = indexNameExpressionResolver.resolveExpressions(
-
                     project.metadata(),
-
                     searchRequest.indices()
-
                 );
                 final Map<String, AliasFilter> aliasFilters = transportSearchAction.buildIndexAliasFilters(
                     project,

diff --git a/...plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java b/...plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java
@@ -15,6 +15,7 @@
 import org.elasticsearch.action.ActionListener;
 import org.elasticsearch.cluster.RemoteException;
 import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver;
+import org.elasticsearch.cluster.project.ProjectResolver;
 import org.elasticsearch.cluster.service.ClusterService;
 import org.elasticsearch.common.Strings;
 import org.elasticsearch.common.breaker.CircuitBreaker;
@@ -383,6 +384,7 @@ public static LogicalOptimizerContext unboundLogicalOptimizerContext() {
         mock(SearchService.class),
         null,
         mock(ClusterService.class),
+        mock(ProjectResolver.class),
         mock(IndexNameExpressionResolver.class),
         null,
         mockInferenceRunner()

diff --git a/...n/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/ManyShardsIT.java b/...n/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/ManyShardsIT.java
@@ -25,7 +25,6 @@
 import org.elasticsearch.rest.RestStatus;
 import org.elasticsearch.search.MockSearchService;
 import org.elasticsearch.search.SearchService;
-import org.elasticsearch.test.junit.annotations.TestIssueLogging;
 import org.elasticsearch.test.transport.MockTransportService;
 import org.elasticsearch.transport.RemoteTransportException;
 import org.elasticsearch.transport.TransportChannel;
@@ -260,10 +259,6 @@ public void testLimitConcurrentShards() {
         }
     }
 
-    @TestIssueLogging(
-        issueUrl = "https://github.com/elastic/elasticsearch/issues/125947",
-        value = "logger.org.elasticsearch.cluster.routing.allocation.ShardChangesObserver:TRACE"
-    )
     public void testCancelUnnecessaryRequests() {
         assumeTrue("Requires pragmas", canUseQueryPragmas());
         internalCluster().ensureAtLeastNumDataNodes(3);

diff --git a/...internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/DataNodeRequestSenderIT.java b/...internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/DataNodeRequestSenderIT.java
@@ -0,0 +1,159 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.esql.plugin;
+
+import org.elasticsearch.action.index.IndexRequest;
+import org.elasticsearch.action.support.WriteRequest;
+import org.elasticsearch.cluster.metadata.IndexMetadata;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.common.util.CollectionUtils;
+import org.elasticsearch.compute.operator.exchange.ExchangeService;
+import org.elasticsearch.plugins.Plugin;
+import org.elasticsearch.test.transport.MockTransportService;
+import org.elasticsearch.transport.TransportService;
+import org.elasticsearch.xpack.esql.action.AbstractEsqlIntegTestCase;
+import org.elasticsearch.xpack.esql.action.EsqlQueryResponse;
+
+import java.util.Collection;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.LongAdder;
+
+import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
+import static org.elasticsearch.xpack.esql.EsqlTestUtils.as;
+import static org.elasticsearch.xpack.esql.EsqlTestUtils.getValuesList;
+import static org.hamcrest.Matchers.greaterThan;
+import static org.hamcrest.Matchers.hasSize;
+
+public class DataNodeRequestSenderIT extends AbstractEsqlIntegTestCase {
+
+    @Override
+    protected Collection<Class<? extends Plugin>> nodePlugins() {
+        return CollectionUtils.appendToCopy(super.nodePlugins(), MockTransportService.TestPlugin.class);
+    }
+
+    public void testSearchWhileRelocating() throws InterruptedException {
+        internalCluster().ensureAtLeastNumDataNodes(3);
+        var primaries = randomIntBetween(1, 10);
+        var replicas = randomIntBetween(0, 1);
+
+        indicesAdmin().prepareCreate("index-1").setSettings(indexSettings(primaries, replicas)).get();
+
+        var docs = randomIntBetween(10, 100);
+        var bulk = client().prepareBulk("index-1").setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE);
+        for (int i = 0; i < docs; i++) {
+            bulk.add(new IndexRequest().source("key", "value-1"));
+        }
+        bulk.get();
+
+        // start background searches
+        var stopped = new AtomicBoolean(false);
+        var queries = new LongAdder();
+        var threads = new Thread[randomIntBetween(1, 5)];
+        for (int i = 0; i < threads.length; i++) {
+            threads[i] = new Thread(() -> {
+                while (stopped.get() == false) {
+                    try (EsqlQueryResponse resp = run("FROM index-1")) {
+                        assertThat(getValuesList(resp), hasSize(docs));
+                    }
+                    queries.increment();
+                }
+            });
+        }
+        for (Thread thread : threads) {
+            thread.start();
+        }
+
+        // start shard movements
+        var rounds = randomIntBetween(1, 10);
+        var names = internalCluster().getNodeNames();
+        for (int i = 0; i < rounds; i++) {
+            for (String name : names) {
+                client().admin()
+                    .cluster()
+                    .prepareUpdateSettings(TEST_REQUEST_TIMEOUT, TEST_REQUEST_TIMEOUT)
+                    .setPersistentSettings(Settings.builder().put("cluster.routing.allocation.exclude._name", name))
+                    .get();
+                ensureGreen("index-1");
+                Thread.yield();
+            }
+        }
+
+        stopped.set(true);
+        for (Thread thread : threads) {
+            thread.join(10_000);
+        }
+
+        client().admin()
+            .cluster()
+            .prepareUpdateSettings(TEST_REQUEST_TIMEOUT, TEST_REQUEST_TIMEOUT)
+            .setPersistentSettings(Settings.builder().putNull("cluster.routing.allocation.exclude._name"))
+            .get();
+        assertThat(queries.sum(), greaterThan((long) threads.length));
+    }
+
+    public void testRetryOnShardMovement() {
+        internalCluster().ensureAtLeastNumDataNodes(2);
+
+        assertAcked(
+            client().admin()
+                .indices()
+                .prepareCreate("index-1")
+                .setSettings(
+                    Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0)
+                )
+        );
+        assertAcked(
+            client().admin()
+                .indices()
+                .prepareCreate("index-2")
+                .setSettings(
+                    Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0)
+                )
+        );
+        client().prepareBulk("index-1")
+            .setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE)
+            .add(new IndexRequest().source("key", "value-1"))
+            .get();
+        client().prepareBulk("index-2")
+            .setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE)
+            .add(new IndexRequest().source("key", "value-2"))
+            .get();
+
+        var shouldMove = new AtomicBoolean(true);
+
+        for (TransportService transportService : internalCluster().getInstances(TransportService.class)) {
+            as(transportService, MockTransportService.class).addRequestHandlingBehavior(
+                ExchangeService.OPEN_EXCHANGE_ACTION_NAME,
+                (handler, request, channel, task) -> {
+                    // move index shard
+                    if (shouldMove.compareAndSet(true, false)) {
+                        var currentShardNodeId = clusterService().state()
+                            .routingTable()
+                            .index("index-1")
+                            .shard(0)
+                            .primaryShard()
+                            .currentNodeId();
+                        assertAcked(
+                            client().admin()
+                                .indices()
+                                .prepareUpdateSettings("index-1")
+                                .setSettings(Settings.builder().put("index.routing.allocation.exclude._id", currentShardNodeId))
+                        );
+                        ensureGreen("index-1");
+                    }
+                    // execute data node request
+                    handler.messageReceived(request, channel, task);
+                }
+            );
+        }
+
+        try (EsqlQueryResponse resp = run("FROM " + randomFrom("index-1,index-2", "index-*"))) {
+            assertThat(getValuesList(resp), hasSize(2));
+        }
+    }
+}
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/ComputeService.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/ComputeService.java
@@ -12,6 +12,7 @@
 import org.elasticsearch.action.search.SearchRequest;
 import org.elasticsearch.action.search.ShardSearchFailure;
 import org.elasticsearch.cluster.RemoteException;
+import org.elasticsearch.cluster.project.ProjectResolver;
 import org.elasticsearch.cluster.service.ClusterService;
 import org.elasticsearch.common.util.BigArrays;
 import org.elasticsearch.common.util.concurrent.RunOnce;
@@ -130,6 +131,7 @@ public class ComputeService {
     private final LookupFromIndexService lookupFromIndexService;
     private final InferenceRunner inferenceRunner;
     private final ClusterService clusterService;
+    private final ProjectResolver projectResolver;
     private final AtomicLong childSessionIdGenerator = new AtomicLong();
     private final DataNodeComputeHandler dataNodeComputeHandler;
     private final ClusterComputeHandler clusterComputeHandler;
@@ -157,7 +159,16 @@ public ComputeService(
         this.lookupFromIndexService = lookupFromIndexService;
         this.inferenceRunner = transportActionServices.inferenceRunner();
         this.clusterService = transportActionServices.clusterService();
-        this.dataNodeComputeHandler = new DataNodeComputeHandler(this, searchService, transportService, exchangeService, esqlExecutor);
+        this.projectResolver = transportActionServices.projectResolver();
+        this.dataNodeComputeHandler = new DataNodeComputeHandler(
+            this,
+            clusterService,
+            projectResolver,
+            searchService,
+            transportService,
+            exchangeService,
+            esqlExecutor
+        );
         this.clusterComputeHandler = new ClusterComputeHandler(
             this,
             exchangeService,

diff --git a/...plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/DataNodeComputeHandler.java b/...plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/DataNodeComputeHandler.java
@@ -16,6 +16,8 @@
 import org.elasticsearch.action.support.ChannelActionListener;
 import org.elasticsearch.action.support.RefCountingRunnable;
 import org.elasticsearch.cluster.node.DiscoveryNode;
+import org.elasticsearch.cluster.project.ProjectResolver;
+import org.elasticsearch.cluster.service.ClusterService;
 import org.elasticsearch.compute.operator.DriverCompletionInfo;
 import org.elasticsearch.compute.operator.exchange.ExchangeService;
 import org.elasticsearch.compute.operator.exchange.ExchangeSink;
@@ -66,19 +68,25 @@
 final class DataNodeComputeHandler implements TransportRequestHandler<DataNodeRequest> {
     private final ComputeService computeService;
     private final SearchService searchService;
+    private final ClusterService clusterService;
+    private final ProjectResolver projectResolver;
     private final TransportService transportService;
     private final ExchangeService exchangeService;
     private final Executor esqlExecutor;
     private final ThreadPool threadPool;
 
     DataNodeComputeHandler(
         ComputeService computeService,
+        ClusterService clusterService,
+        ProjectResolver projectResolver,
         SearchService searchService,
         TransportService transportService,
         ExchangeService exchangeService,
         Executor esqlExecutor
     ) {
         this.computeService = computeService;
+        this.clusterService = clusterService;
+        this.projectResolver = projectResolver;
         this.searchService = searchService;
         this.transportService = transportService;
         this.exchangeService = exchangeService;
@@ -102,12 +110,17 @@ void startComputeOnDataNodes(
         Integer maxConcurrentNodesPerCluster = PlanConcurrencyCalculator.INSTANCE.calculateNodesConcurrency(dataNodePlan, configuration);
 
         new DataNodeRequestSender(
+            clusterService,
+            projectResolver,
             transportService,
             esqlExecutor,
-            clusterAlias,
             parentTask,
+            originalIndices,
+            PlannerUtils.canMatchFilter(dataNodePlan),
+            clusterAlias,
             configuration.allowPartialResults(),
-            maxConcurrentNodesPerCluster == null ? -1 : maxConcurrentNodesPerCluster
+            maxConcurrentNodesPerCluster == null ? -1 : maxConcurrentNodesPerCluster,
+            configuration.pragmas().unavailableShardResolutionAttempts()
         ) {
             @Override
             protected void sendRequest(
@@ -200,8 +213,6 @@ protected void sendRequest(
             }
         }.startComputeOnDataNodes(
             concreteIndices,
-            originalIndices,
-            PlannerUtils.canMatchFilter(dataNodePlan),
             runOnTaskFailure,
             ActionListener.releaseAfter(outListener, exchangeSource.addEmptySink())
         );