Skip to content

Commit 2756227

Browse files
committed
simplify cleaning up non allocation shard code
its pretty complex today, simplify it by doing a single iteration, and handling both dangling shard cleanup on existing indices and shards that have been reallocated to a differetn node
1 parent 320c9b7 commit 2756227

File tree

1 file changed

+37
-58
lines changed

1 file changed

+37
-58
lines changed

src/main/java/org/elasticsearch/indices/store/IndicesStore.java

Lines changed: 37 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,9 @@
2424
import org.elasticsearch.cluster.ClusterService;
2525
import org.elasticsearch.cluster.ClusterStateListener;
2626
import org.elasticsearch.cluster.metadata.MetaData;
27-
import org.elasticsearch.cluster.routing.*;
27+
import org.elasticsearch.cluster.routing.IndexRoutingTable;
28+
import org.elasticsearch.cluster.routing.IndexShardRoutingTable;
29+
import org.elasticsearch.cluster.routing.ShardRouting;
2830
import org.elasticsearch.common.component.AbstractComponent;
2931
import org.elasticsearch.common.inject.Inject;
3032
import org.elasticsearch.common.io.FileSystemUtils;
@@ -37,8 +39,6 @@
3739
import org.elasticsearch.node.settings.NodeSettingsService;
3840
import org.elasticsearch.threadpool.ThreadPool;
3941

40-
import java.io.File;
41-
4242
/**
4343
*
4444
*/
@@ -81,8 +81,6 @@ public void onRefreshSettings(Settings settings) {
8181

8282
private final ClusterService clusterService;
8383

84-
private final ThreadPool threadPool;
85-
8684
private volatile String rateLimitingType;
8785
private volatile ByteSizeValue rateLimitingThrottle;
8886
private final StoreRateLimiting rateLimiting = new StoreRateLimiting();
@@ -96,7 +94,6 @@ public IndicesStore(Settings settings, NodeEnvironment nodeEnv, NodeSettingsServ
9694
this.nodeSettingsService = nodeSettingsService;
9795
this.indicesService = indicesService;
9896
this.clusterService = clusterService;
99-
this.threadPool = threadPool;
10097

10198
this.rateLimitingType = componentSettings.get("throttle.type", "none");
10299
rateLimiting.setType(rateLimitingType);
@@ -128,55 +125,20 @@ public void clusterChanged(ClusterChangedEvent event) {
128125
return;
129126
}
130127

131-
// when all shards are started within a shard replication group, delete an unallocated shard on this node
132-
RoutingTable routingTable = event.state().routingTable();
133-
for (IndexRoutingTable indexRoutingTable : routingTable) {
134-
IndexService indexService = indicesService.indexService(indexRoutingTable.index());
135-
if (indexService == null) {
136-
// we handle this later...
137-
continue;
138-
}
139-
// if the store is not persistent, don't bother trying to check if it can be deleted
140-
if (!indexService.store().persistent()) {
141-
continue;
142-
}
128+
for (IndexRoutingTable indexRoutingTable : event.state().routingTable()) {
129+
// Note, closed indices will not have any routing information, so won't be deleted
143130
for (IndexShardRoutingTable indexShardRoutingTable : indexRoutingTable) {
144-
// if it has been created on this node, we don't want to delete it
145-
if (indexService.hasShard(indexShardRoutingTable.shardId().id())) {
146-
continue;
147-
}
148-
if (!indexService.store().canDeleteUnallocated(indexShardRoutingTable.shardId())) {
149-
continue;
150-
}
151-
// only delete an unallocated shard if all (other shards) are started
152-
int startedShardsCount = indexShardRoutingTable.countWithState(ShardRoutingState.STARTED);
153-
if (startedShardsCount > 0 && startedShardsCount == indexShardRoutingTable.size()) {
154-
if (logger.isDebugEnabled()) {
155-
logger.debug("[{}][{}] deleting unallocated shard", indexShardRoutingTable.shardId().index().name(), indexShardRoutingTable.shardId().id());
156-
}
157-
try {
158-
indexService.store().deleteUnallocated(indexShardRoutingTable.shardId());
159-
} catch (Exception e) {
160-
logger.debug("[{}][{}] failed to delete unallocated shard, ignoring", e, indexShardRoutingTable.shardId().index().name(), indexShardRoutingTable.shardId().id());
161-
}
162-
}
163-
}
164-
}
165-
166-
// do the reverse, and delete dangling shards that might remain on that node
167-
// but are allocated on other nodes
168-
if (nodeEnv.hasNodeFile()) {
169-
// delete unused shards for existing indices
170-
for (IndexRoutingTable indexRoutingTable : routingTable) {
171-
IndexService indexService = indicesService.indexService(indexRoutingTable.index());
172-
if (indexService != null) { // allocated, ignore this
173-
continue;
174-
}
175-
for (IndexShardRoutingTable indexShardRoutingTable : indexRoutingTable) {
176-
boolean shardCanBeDeleted = true;
131+
ShardId shardId = indexShardRoutingTable.shardId();
132+
// a shard can be deleted if all its copies are active, and its not allocated on this node
133+
boolean shardCanBeDeleted = true;
134+
if (indexShardRoutingTable.size() == 0) {
135+
// should not really happen, there should always be at least 1 (primary) shard in a
136+
// shard replication group, in any case, protected from deleting something by mistake
137+
shardCanBeDeleted = false;
138+
} else {
177139
for (ShardRouting shardRouting : indexShardRoutingTable) {
178-
// don't delete a shard that not all instances are active
179-
if (!shardRouting.active()) {
140+
// be conservative here, check on started, not even active
141+
if (!shardRouting.started()) {
180142
shardCanBeDeleted = false;
181143
break;
182144
}
@@ -188,13 +150,30 @@ public void clusterChanged(ClusterChangedEvent event) {
188150
break;
189151
}
190152
}
191-
if (shardCanBeDeleted) {
192-
ShardId shardId = indexShardRoutingTable.shardId();
193-
for (File shardLocation : nodeEnv.shardLocations(shardId)) {
194-
if (shardLocation.exists()) {
153+
}
154+
if (shardCanBeDeleted) {
155+
IndexService indexService = indicesService.indexService(indexRoutingTable.index());
156+
if (indexService == null) {
157+
// not physical allocation of the index, delete it from the file system if applicable
158+
if (nodeEnv.hasNodeFile()) {
159+
logger.debug("[{}][{}] deleting shard that is no longer used", shardId.index().name(), shardId.id());
160+
FileSystemUtils.deleteRecursively(nodeEnv.shardLocations(shardId));
161+
}
162+
} else {
163+
if (!indexService.hasShard(shardId.id())) {
164+
if (indexService.store().canDeleteUnallocated(shardId)) {
195165
logger.debug("[{}][{}] deleting shard that is no longer used", shardId.index().name(), shardId.id());
196-
FileSystemUtils.deleteRecursively(shardLocation);
166+
try {
167+
indexService.store().deleteUnallocated(indexShardRoutingTable.shardId());
168+
} catch (Exception e) {
169+
logger.debug("[{}][{}] failed to delete unallocated shard, ignoring", e, indexShardRoutingTable.shardId().index().name(), indexShardRoutingTable.shardId().id());
170+
}
197171
}
172+
} else {
173+
// this state is weird, should we log?
174+
// basically, it means that the shard is not allocated on this node using the routing
175+
// but its still physically exists on an IndexService
176+
// Note, this listener should run after IndicesClusterStateService...
198177
}
199178
}
200179
}

0 commit comments

Comments
 (0)