Skip to content

Commit c2073c3

Browse files
committed
improve allocation decision to allow to explain why a decision has been made
building the infra to support explaining why an allocation decision has been made, for example, why a shard is not allocated on a specific node
1 parent c350dcd commit c2073c3

File tree

8 files changed

+148
-80
lines changed

8 files changed

+148
-80
lines changed

src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/EvenShardsCountAllocator.java

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import org.elasticsearch.cluster.routing.allocation.FailedRerouteAllocation;
2828
import org.elasticsearch.cluster.routing.allocation.RoutingAllocation;
2929
import org.elasticsearch.cluster.routing.allocation.StartedRerouteAllocation;
30+
import org.elasticsearch.cluster.routing.allocation.decider.Decision;
3031
import org.elasticsearch.common.component.AbstractComponent;
3132
import org.elasticsearch.common.inject.Inject;
3233
import org.elasticsearch.common.settings.Settings;
@@ -77,7 +78,8 @@ public boolean allocateUnassigned(RoutingAllocation allocation) {
7778
lastNode = 0;
7879
}
7980

80-
if (allocation.deciders().canAllocate(shard, node, allocation).allocate()) {
81+
Decision decision = allocation.deciders().canAllocate(shard, node, allocation);
82+
if (decision.type() == Decision.Type.YES) {
8183
int numberOfShardsToAllocate = routingNodes.requiredAverageNumberOfShardsPerNode() - node.shards().size();
8284
if (numberOfShardsToAllocate <= 0) {
8385
continue;
@@ -96,7 +98,8 @@ public boolean allocateUnassigned(RoutingAllocation allocation) {
9698
MutableShardRouting shard = it.next();
9799
// go over the nodes and try and allocate the remaining ones
98100
for (RoutingNode routingNode : sortedNodesLeastToHigh(allocation)) {
99-
if (allocation.deciders().canAllocate(shard, routingNode, allocation).allocate()) {
101+
Decision decision = allocation.deciders().canAllocate(shard, routingNode, allocation);
102+
if (decision.type() == Decision.Type.YES) {
100103
changed = true;
101104
routingNode.add(shard);
102105
it.remove();
@@ -142,7 +145,8 @@ public boolean rebalance(RoutingAllocation allocation) {
142145
continue;
143146
}
144147

145-
if (allocation.deciders().canAllocate(startedShard, lowRoutingNode, allocation).allocate()) {
148+
Decision decision = allocation.deciders().canAllocate(startedShard, lowRoutingNode, allocation);
149+
if (decision.type() == Decision.Type.YES) {
146150
changed = true;
147151
lowRoutingNode.add(new MutableShardRouting(startedShard.index(), startedShard.id(),
148152
lowRoutingNode.nodeId(), startedShard.currentNodeId(),
@@ -179,7 +183,8 @@ public boolean move(MutableShardRouting shardRouting, RoutingNode node, RoutingA
179183
if (nodeToCheck.nodeId().equals(node.nodeId())) {
180184
continue;
181185
}
182-
if (allocation.deciders().canAllocate(shardRouting, nodeToCheck, allocation).allocate()) {
186+
Decision decision = allocation.deciders().canAllocate(shardRouting, nodeToCheck, allocation);
187+
if (decision.type() == Decision.Type.YES) {
183188
nodeToCheck.add(new MutableShardRouting(shardRouting.index(), shardRouting.id(),
184189
nodeToCheck.nodeId(), shardRouting.currentNodeId(),
185190
shardRouting.primary(), INITIALIZING, shardRouting.version() + 1));

src/main/java/org/elasticsearch/cluster/routing/allocation/command/AllocateAllocationCommand.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import org.elasticsearch.cluster.routing.MutableShardRouting;
2727
import org.elasticsearch.cluster.routing.RoutingNode;
2828
import org.elasticsearch.cluster.routing.allocation.RoutingAllocation;
29+
import org.elasticsearch.cluster.routing.allocation.decider.Decision;
2930
import org.elasticsearch.common.io.stream.StreamInput;
3031
import org.elasticsearch.common.io.stream.StreamOutput;
3132
import org.elasticsearch.common.xcontent.ToXContent;
@@ -160,8 +161,9 @@ public void execute(RoutingAllocation allocation) throws ElasticSearchException
160161
}
161162

162163
RoutingNode routingNode = allocation.routingNodes().node(discoNode.id());
163-
if (!allocation.deciders().canAllocate(shardRouting, routingNode, allocation).allowed()) {
164-
throw new ElasticSearchIllegalArgumentException("[allocate] allocation of " + shardId + " on node " + discoNode + " is not allowed");
164+
Decision decision = allocation.deciders().canAllocate(shardRouting, routingNode, allocation);
165+
if (decision.type() == Decision.Type.NO) {
166+
throw new ElasticSearchIllegalArgumentException("[allocate] allocation of " + shardId + " on node " + discoNode + " is not allowed, reason: " + decision);
165167
}
166168
// go over and remove it from the unassigned
167169
for (Iterator<MutableShardRouting> it = allocation.routingNodes().unassigned().iterator(); it.hasNext(); ) {

src/main/java/org/elasticsearch/cluster/routing/allocation/command/MoveAllocationCommand.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
import org.elasticsearch.cluster.routing.RoutingNode;
2828
import org.elasticsearch.cluster.routing.ShardRoutingState;
2929
import org.elasticsearch.cluster.routing.allocation.RoutingAllocation;
30-
import org.elasticsearch.cluster.routing.allocation.decider.AllocationDecider;
30+
import org.elasticsearch.cluster.routing.allocation.decider.Decision;
3131
import org.elasticsearch.common.io.stream.StreamInput;
3232
import org.elasticsearch.common.io.stream.StreamOutput;
3333
import org.elasticsearch.common.xcontent.ToXContent;
@@ -158,11 +158,11 @@ public void execute(RoutingAllocation allocation) throws ElasticSearchException
158158
}
159159

160160
RoutingNode toRoutingNode = allocation.routingNodes().node(toDiscoNode.id());
161-
AllocationDecider.Decision decision = allocation.deciders().canAllocate(shardRouting, toRoutingNode, allocation);
162-
if (!decision.allowed()) {
163-
throw new ElasticSearchIllegalArgumentException("[move_allocation] can't move " + shardId + ", from " + fromDiscoNode + ", to " + toDiscoNode + ", since its not allowed");
161+
Decision decision = allocation.deciders().canAllocate(shardRouting, toRoutingNode, allocation);
162+
if (decision.type() == Decision.Type.NO) {
163+
throw new ElasticSearchIllegalArgumentException("[move_allocation] can't move " + shardId + ", from " + fromDiscoNode + ", to " + toDiscoNode + ", since its not allowed, reason: " + decision);
164164
}
165-
if (!decision.allocate()) {
165+
if (decision.type() == Decision.Type.THROTTLE) {
166166
// its being throttled, maybe have a flag to take it into account and fail? for now, just do it since the "user" wants it...
167167
}
168168

src/main/java/org/elasticsearch/cluster/routing/allocation/decider/AllocationDecider.java

Lines changed: 1 addition & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -30,54 +30,6 @@
3030
*/
3131
public abstract class AllocationDecider extends AbstractComponent {
3232

33-
public static enum Decision {
34-
YES {
35-
@Override
36-
public boolean allocate() {
37-
return true;
38-
}
39-
40-
@Override
41-
public boolean allowed() {
42-
return true;
43-
}
44-
},
45-
NO {
46-
@Override
47-
public boolean allocate() {
48-
return false;
49-
}
50-
51-
@Override
52-
public boolean allowed() {
53-
return false;
54-
}
55-
},
56-
THROTTLE {
57-
@Override
58-
public boolean allocate() {
59-
return false;
60-
}
61-
62-
@Override
63-
public boolean allowed() {
64-
return true;
65-
}
66-
};
67-
68-
/**
69-
* It can be allocated *now* on a node. Note, it might be {@link #allowed()} to be allocated
70-
* on a node, yet, allocate will be <tt>false</tt> since its being throttled for example.
71-
*/
72-
public abstract boolean allocate();
73-
74-
/**
75-
* Is allocation allowed on a node. Note, this does not mean that we should allocate *now*,
76-
* though, in extreme cases, we might "force" allocation.
77-
*/
78-
public abstract boolean allowed();
79-
}
80-
8133
protected AllocationDecider(Settings settings) {
8234
super(settings);
8335
}
@@ -87,7 +39,7 @@ public boolean canRebalance(ShardRouting shardRouting, RoutingAllocation allocat
8739
}
8840

8941
public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) {
90-
return Decision.YES;
42+
return Decision.ALWAYS;
9143
}
9244

9345
/**

src/main/java/org/elasticsearch/cluster/routing/allocation/decider/AllocationDeciders.java

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -70,18 +70,16 @@ public boolean canRebalance(ShardRouting shardRouting, RoutingAllocation allocat
7070

7171
@Override
7272
public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) {
73-
Decision ret = Decision.YES;
74-
// first, check if its in the ignored, if so, return NO
7573
if (allocation.shouldIgnoreShardForNode(shardRouting.shardId(), node.nodeId())) {
7674
return Decision.NO;
7775
}
78-
// now, go over the registered allocations
79-
for (AllocationDecider allocation1 : allocations) {
80-
Decision decision = allocation1.canAllocate(shardRouting, node, allocation);
81-
if (decision == Decision.NO) {
82-
return Decision.NO;
83-
} else if (decision == Decision.THROTTLE) {
84-
ret = Decision.THROTTLE;
76+
Decision.Multi ret = new Decision.Multi();
77+
for (AllocationDecider allocationDecider : allocations) {
78+
Decision decision = allocationDecider.canAllocate(shardRouting, node, allocation);
79+
// the assumption is that a decider that returns the static instance Decision#ALWAYS
80+
// does not really implements canAllocate
81+
if (decision != Decision.ALWAYS) {
82+
ret.add(decision);
8583
}
8684
}
8785
return ret;
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
/*
2+
* Licensed to ElasticSearch and Shay Banon under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. ElasticSearch licenses this
6+
* file to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.elasticsearch.cluster.routing.allocation.decider;
21+
22+
import com.google.common.collect.Lists;
23+
24+
import java.util.List;
25+
26+
/**
27+
*/
28+
public abstract class Decision {
29+
30+
public static final Decision ALWAYS = new Single(Type.YES);
31+
public static final Decision YES = new Single(Type.YES);
32+
public static final Decision NO = new Single(Type.NO);
33+
public static final Decision THROTTLE = new Single(Type.THROTTLE);
34+
35+
public static Decision single(Type type, String explanation, Object... explanationParams) {
36+
return new Single(type, explanation, explanationParams);
37+
}
38+
39+
public static enum Type {
40+
YES,
41+
NO,
42+
THROTTLE
43+
}
44+
45+
public abstract Type type();
46+
47+
public static class Single extends Decision {
48+
private final Type type;
49+
private final String explanation;
50+
private final Object[] explanationParams;
51+
52+
public Single(Type type) {
53+
this(type, null, (Object[]) null);
54+
}
55+
56+
public Single(Type type, String explanation, Object... explanationParams) {
57+
this.type = type;
58+
this.explanation = explanation;
59+
this.explanationParams = explanationParams;
60+
}
61+
62+
public Type type() {
63+
return this.type;
64+
}
65+
66+
@Override
67+
public String toString() {
68+
if (explanation == null) {
69+
return type + "()";
70+
}
71+
return type + "(" + String.format(explanation, explanationParams) + ")";
72+
}
73+
}
74+
75+
public static class Multi extends Decision {
76+
77+
private final List<Decision> decisions = Lists.newArrayList();
78+
79+
public Multi add(Decision decision) {
80+
decisions.add(decision);
81+
return this;
82+
}
83+
84+
@Override
85+
public Type type() {
86+
Type ret = Type.YES;
87+
for (int i = 0; i < decisions.size(); i++) {
88+
Type type = decisions.get(i).type();
89+
if (type == Type.NO) {
90+
return type;
91+
} else if (type == Type.THROTTLE) {
92+
ret = type;
93+
}
94+
}
95+
return ret;
96+
}
97+
98+
@Override
99+
public String toString() {
100+
StringBuilder sb = new StringBuilder();
101+
for (Decision decision : decisions) {
102+
sb.append("[").append(decision.toString()).append("]");
103+
}
104+
return sb.toString();
105+
}
106+
}
107+
}

src/main/java/org/elasticsearch/gateway/blobstore/BlobReuseExistingGatewayAllocator.java

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
import org.elasticsearch.cluster.routing.allocation.RoutingAllocation;
3333
import org.elasticsearch.cluster.routing.allocation.StartedRerouteAllocation;
3434
import org.elasticsearch.cluster.routing.allocation.allocator.GatewayAllocator;
35-
import org.elasticsearch.cluster.routing.allocation.decider.AllocationDecider;
35+
import org.elasticsearch.cluster.routing.allocation.decider.Decision;
3636
import org.elasticsearch.common.component.AbstractComponent;
3737
import org.elasticsearch.common.inject.Inject;
3838
import org.elasticsearch.common.settings.Settings;
@@ -119,7 +119,8 @@ public boolean allocateUnassigned(RoutingAllocation allocation) {
119119
continue;
120120
}
121121
// if its THROTTLING, we are not going to allocate it to this node, so ignore it as well
122-
if (allocation.deciders().canAllocate(shard, node, allocation).allocate()) {
122+
Decision decision = allocation.deciders().canAllocate(shard, node, allocation);
123+
if (decision.type() == Decision.Type.YES) {
123124
canBeAllocatedToAtLeastOneNode = true;
124125
break;
125126
}
@@ -153,7 +154,7 @@ public boolean allocateUnassigned(RoutingAllocation allocation) {
153154
// check if we can allocate on that node...
154155
// we only check for NO, since if this node is THROTTLING and it has enough "same data"
155156
// then we will try and assign it next time
156-
if (allocation.deciders().canAllocate(shard, node, allocation) == AllocationDecider.Decision.NO) {
157+
if (allocation.deciders().canAllocate(shard, node, allocation).type() == Decision.Type.NO) {
157158
continue;
158159
}
159160

@@ -236,7 +237,7 @@ public boolean allocateUnassigned(RoutingAllocation allocation) {
236237
}
237238

238239
if (lastNodeMatched != null) {
239-
if (allocation.deciders().canAllocate(shard, lastNodeMatched, allocation) == AllocationDecider.Decision.THROTTLE) {
240+
if (allocation.deciders().canAllocate(shard, lastNodeMatched, allocation).type() == Decision.Type.THROTTLE) {
240241
if (logger.isTraceEnabled()) {
241242
logger.debug("[{}][{}]: throttling allocation [{}] to [{}] in order to reuse its unallocated persistent store with total_size [{}]", shard.index(), shard.id(), shard, lastDiscoNodeMatched, new ByteSizeValue(lastSizeMatched));
242243
}

src/main/java/org/elasticsearch/gateway/local/LocalGatewayAllocator.java

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
import org.elasticsearch.cluster.routing.allocation.RoutingAllocation;
3636
import org.elasticsearch.cluster.routing.allocation.StartedRerouteAllocation;
3737
import org.elasticsearch.cluster.routing.allocation.allocator.GatewayAllocator;
38-
import org.elasticsearch.cluster.routing.allocation.decider.AllocationDecider;
38+
import org.elasticsearch.cluster.routing.allocation.decider.Decision;
3939
import org.elasticsearch.common.component.AbstractComponent;
4040
import org.elasticsearch.common.inject.Inject;
4141
import org.elasticsearch.common.settings.Settings;
@@ -195,10 +195,10 @@ public boolean allocateUnassigned(RoutingAllocation allocation) {
195195
Set<DiscoveryNode> noNodes = Sets.newHashSet();
196196
for (DiscoveryNode discoNode : nodesWithHighestVersion) {
197197
RoutingNode node = routingNodes.node(discoNode.id());
198-
AllocationDecider.Decision decision = allocation.deciders().canAllocate(shard, node, allocation);
199-
if (decision == AllocationDecider.Decision.THROTTLE) {
198+
Decision decision = allocation.deciders().canAllocate(shard, node, allocation);
199+
if (decision.type() == Decision.Type.THROTTLE) {
200200
throttledNodes.add(discoNode);
201-
} else if (decision == AllocationDecider.Decision.NO) {
201+
} else if (decision.type() == Decision.Type.NO) {
202202
noNodes.add(discoNode);
203203
} else {
204204
if (logger.isDebugEnabled()) {
@@ -258,7 +258,8 @@ public boolean allocateUnassigned(RoutingAllocation allocation) {
258258
}
259259
// if we can't allocate it on a node, ignore it, for example, this handles
260260
// cases for only allocating a replica after a primary
261-
if (allocation.deciders().canAllocate(shard, node, allocation).allocate()) {
261+
Decision decision = allocation.deciders().canAllocate(shard, node, allocation);
262+
if (decision.type() == Decision.Type.YES) {
262263
canBeAllocatedToAtLeastOneNode = true;
263264
break;
264265
}
@@ -292,7 +293,8 @@ public boolean allocateUnassigned(RoutingAllocation allocation) {
292293
// check if we can allocate on that node...
293294
// we only check for NO, since if this node is THROTTLING and it has enough "same data"
294295
// then we will try and assign it next time
295-
if (allocation.deciders().canAllocate(shard, node, allocation) == AllocationDecider.Decision.NO) {
296+
Decision decision = allocation.deciders().canAllocate(shard, node, allocation);
297+
if (decision.type() == Decision.Type.NO) {
296298
continue;
297299
}
298300

@@ -328,7 +330,8 @@ public boolean allocateUnassigned(RoutingAllocation allocation) {
328330

329331
if (lastNodeMatched != null) {
330332
// we only check on THROTTLE since we checked before before on NO
331-
if (allocation.deciders().canAllocate(shard, lastNodeMatched, allocation) == AllocationDecider.Decision.THROTTLE) {
333+
Decision decision = allocation.deciders().canAllocate(shard, lastNodeMatched, allocation);
334+
if (decision.type() == Decision.Type.THROTTLE) {
332335
if (logger.isTraceEnabled()) {
333336
logger.debug("[{}][{}]: throttling allocation [{}] to [{}] in order to reuse its unallocated persistent store with total_size [{}]", shard.index(), shard.id(), shard, lastDiscoNodeMatched, new ByteSizeValue(lastSizeMatched));
334337
}

0 commit comments

Comments
 (0)