Skip to content

Commit 1dae144

Browse files
authored
xds: Fix load reporting when pick first is used for locality-routing. (#11495)
* Determine subchannel's network locality from connected address, instead of assuming that all addresses for a subchannel are in the same locality.
1 parent 421e237 commit 1dae144

File tree

9 files changed

+352
-64
lines changed

9 files changed

+352
-64
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
/*
2+
* Copyright 2024 The gRPC Authors
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package io.grpc;
18+
19+
/**
20+
* An internal class. Do not use.
21+
*
22+
* <p>An interface to provide the attributes for address connected by subchannel.
23+
*/
24+
@Internal
25+
public interface InternalSubchannelAddressAttributes {
26+
27+
/**
28+
* Return attributes of the server address connected by sub channel.
29+
*/
30+
public Attributes getConnectedAddressAttributes();
31+
}

api/src/main/java/io/grpc/LoadBalancer.java

+12
Original file line numberDiff line numberDiff line change
@@ -1428,6 +1428,18 @@ public void updateAddresses(List<EquivalentAddressGroup> addrs) {
14281428
public Object getInternalSubchannel() {
14291429
throw new UnsupportedOperationException();
14301430
}
1431+
1432+
/**
1433+
* (Internal use only) returns attributes of the address subchannel is connected to.
1434+
*
1435+
* <p>Warning: this is INTERNAL API, is not supposed to be used by external users, and may
1436+
* change without notice. If you think you must use it, please file an issue and we can consider
1437+
* removing its "internal" status.
1438+
*/
1439+
@Internal
1440+
public Attributes getConnectedAddressAttributes() {
1441+
throw new UnsupportedOperationException();
1442+
}
14311443
}
14321444

14331445
/**

core/src/main/java/io/grpc/internal/InternalSubchannel.java

+10
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,8 @@ protected void handleNotInUse() {
157157

158158
private Status shutdownReason;
159159

160+
private volatile Attributes connectedAddressAttributes;
161+
160162
InternalSubchannel(List<EquivalentAddressGroup> addressGroups, String authority, String userAgent,
161163
BackoffPolicy.Provider backoffPolicyProvider,
162164
ClientTransportFactory transportFactory, ScheduledExecutorService scheduledExecutor,
@@ -525,6 +527,13 @@ public void run() {
525527
return channelStatsFuture;
526528
}
527529

530+
/**
531+
* Return attributes for server address connected by sub channel.
532+
*/
533+
public Attributes getConnectedAddressAttributes() {
534+
return connectedAddressAttributes;
535+
}
536+
528537
ConnectivityState getState() {
529538
return state.getState();
530539
}
@@ -568,6 +577,7 @@ public void run() {
568577
} else if (pendingTransport == transport) {
569578
activeTransport = transport;
570579
pendingTransport = null;
580+
connectedAddressAttributes = addressIndex.getCurrentEagAttributes();
571581
gotoNonErrorState(READY);
572582
}
573583
}

core/src/main/java/io/grpc/internal/ManagedChannelImpl.java

+5
Original file line numberDiff line numberDiff line change
@@ -2044,6 +2044,11 @@ public void updateAddresses(List<EquivalentAddressGroup> addrs) {
20442044
subchannel.updateAddresses(addrs);
20452045
}
20462046

2047+
@Override
2048+
public Attributes getConnectedAddressAttributes() {
2049+
return subchannel.getConnectedAddressAttributes();
2050+
}
2051+
20472052
private List<EquivalentAddressGroup> stripOverrideAuthorityAttributes(
20482053
List<EquivalentAddressGroup> eags) {
20492054
List<EquivalentAddressGroup> eagsWithoutOverrideAttr = new ArrayList<>();

core/src/test/java/io/grpc/internal/InternalSubchannelTest.java

+26
Original file line numberDiff line numberDiff line change
@@ -1339,6 +1339,32 @@ public void channelzStatContainsTransport() throws Exception {
13391339
assertThat(index.getCurrentAddress()).isSameInstanceAs(addr2);
13401340
}
13411341

1342+
@Test
1343+
public void connectedAddressAttributes_ready() {
1344+
SocketAddress addr = new SocketAddress() {};
1345+
Attributes attr = Attributes.newBuilder().set(Attributes.Key.create("some-key"), "1").build();
1346+
createInternalSubchannel(new EquivalentAddressGroup(Arrays.asList(addr), attr));
1347+
1348+
assertEquals(IDLE, internalSubchannel.getState());
1349+
assertNoCallbackInvoke();
1350+
assertNull(internalSubchannel.obtainActiveTransport());
1351+
assertNull(internalSubchannel.getConnectedAddressAttributes());
1352+
1353+
assertExactCallbackInvokes("onStateChange:CONNECTING");
1354+
assertEquals(CONNECTING, internalSubchannel.getState());
1355+
verify(mockTransportFactory).newClientTransport(
1356+
eq(addr),
1357+
eq(createClientTransportOptions().setEagAttributes(attr)),
1358+
isA(TransportLogger.class));
1359+
assertNull(internalSubchannel.getConnectedAddressAttributes());
1360+
1361+
internalSubchannel.obtainActiveTransport();
1362+
transports.peek().listener.transportReady();
1363+
assertExactCallbackInvokes("onStateChange:READY");
1364+
assertEquals(READY, internalSubchannel.getState());
1365+
assertEquals(attr, internalSubchannel.getConnectedAddressAttributes());
1366+
}
1367+
13421368
/** Create ClientTransportOptions. Should not be reused if it may be mutated. */
13431369
private ClientTransportFactory.ClientTransportOptions createClientTransportOptions() {
13441370
return new ClientTransportFactory.ClientTransportOptions()

util/src/main/java/io/grpc/util/ForwardingSubchannel.java

+6
Original file line numberDiff line numberDiff line change
@@ -74,11 +74,17 @@ public Object getInternalSubchannel() {
7474
return delegate().getInternalSubchannel();
7575
}
7676

77+
7778
@Override
7879
public void updateAddresses(List<EquivalentAddressGroup> addrs) {
7980
delegate().updateAddresses(addrs);
8081
}
8182

83+
@Override
84+
public Attributes getConnectedAddressAttributes() {
85+
return delegate().getConnectedAddressAttributes();
86+
}
87+
8288
@Override
8389
public String toString() {
8490
return MoreObjects.toStringHelper(this).add("delegate", delegate()).toString();

xds/src/main/java/io/grpc/xds/ClusterImplLoadBalancer.java

+103-38
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import io.grpc.ClientStreamTracer;
2828
import io.grpc.ClientStreamTracer.StreamInfo;
2929
import io.grpc.ConnectivityState;
30+
import io.grpc.ConnectivityStateInfo;
3031
import io.grpc.EquivalentAddressGroup;
3132
import io.grpc.InternalLogId;
3233
import io.grpc.LoadBalancer;
@@ -59,6 +60,7 @@
5960
import java.util.Map;
6061
import java.util.Objects;
6162
import java.util.concurrent.atomic.AtomicLong;
63+
import java.util.concurrent.atomic.AtomicReference;
6264
import javax.annotation.Nullable;
6365

6466
/**
@@ -77,10 +79,8 @@ final class ClusterImplLoadBalancer extends LoadBalancer {
7779
Strings.isNullOrEmpty(System.getenv("GRPC_XDS_EXPERIMENTAL_CIRCUIT_BREAKING"))
7880
|| Boolean.parseBoolean(System.getenv("GRPC_XDS_EXPERIMENTAL_CIRCUIT_BREAKING"));
7981

80-
private static final Attributes.Key<ClusterLocalityStats> ATTR_CLUSTER_LOCALITY_STATS =
81-
Attributes.Key.create("io.grpc.xds.ClusterImplLoadBalancer.clusterLocalityStats");
82-
private static final Attributes.Key<String> ATTR_CLUSTER_LOCALITY_NAME =
83-
Attributes.Key.create("io.grpc.xds.ClusterImplLoadBalancer.clusterLocalityName");
82+
private static final Attributes.Key<AtomicReference<ClusterLocality>> ATTR_CLUSTER_LOCALITY =
83+
Attributes.Key.create("io.grpc.xds.ClusterImplLoadBalancer.clusterLocality");
8484

8585
private final XdsLogger logger;
8686
private final Helper helper;
@@ -213,36 +213,45 @@ public void updateBalancingState(ConnectivityState newState, SubchannelPicker ne
213213
@Override
214214
public Subchannel createSubchannel(CreateSubchannelArgs args) {
215215
List<EquivalentAddressGroup> addresses = withAdditionalAttributes(args.getAddresses());
216-
Locality locality = args.getAddresses().get(0).getAttributes().get(
217-
InternalXdsAttributes.ATTR_LOCALITY); // all addresses should be in the same locality
218-
String localityName = args.getAddresses().get(0).getAttributes().get(
219-
InternalXdsAttributes.ATTR_LOCALITY_NAME);
220-
// Endpoint addresses resolved by ClusterResolverLoadBalancer should always contain
221-
// attributes with its locality, including endpoints in LOGICAL_DNS clusters.
222-
// In case of not (which really shouldn't), loads are aggregated under an empty locality.
223-
if (locality == null) {
224-
locality = Locality.create("", "", "");
225-
localityName = "";
226-
}
227-
final ClusterLocalityStats localityStats =
228-
(lrsServerInfo == null)
229-
? null
230-
: xdsClient.addClusterLocalityStats(lrsServerInfo, cluster,
231-
edsServiceName, locality);
232-
216+
// This value for ClusterLocality is not recommended for general use.
217+
// Currently, we extract locality data from the first address, even before the subchannel is
218+
// READY.
219+
// This is mainly to accommodate scenarios where a Load Balancing API (like "pick first")
220+
// might return the subchannel before it is READY. Typically, we wouldn't report load for such
221+
// selections because the channel will disregard the chosen (not-ready) subchannel.
222+
// However, we needed to ensure this case is handled.
223+
ClusterLocality clusterLocality = createClusterLocalityFromAttributes(
224+
args.getAddresses().get(0).getAttributes());
225+
AtomicReference<ClusterLocality> localityAtomicReference = new AtomicReference<>(
226+
clusterLocality);
233227
Attributes attrs = args.getAttributes().toBuilder()
234-
.set(ATTR_CLUSTER_LOCALITY_STATS, localityStats)
235-
.set(ATTR_CLUSTER_LOCALITY_NAME, localityName)
228+
.set(ATTR_CLUSTER_LOCALITY, localityAtomicReference)
236229
.build();
237230
args = args.toBuilder().setAddresses(addresses).setAttributes(attrs).build();
238231
final Subchannel subchannel = delegate().createSubchannel(args);
239232

240233
return new ForwardingSubchannel() {
234+
@Override
235+
public void start(SubchannelStateListener listener) {
236+
delegate().start(new SubchannelStateListener() {
237+
@Override
238+
public void onSubchannelState(ConnectivityStateInfo newState) {
239+
if (newState.getState().equals(ConnectivityState.READY)) {
240+
// Get locality based on the connected address attributes
241+
ClusterLocality updatedClusterLocality = createClusterLocalityFromAttributes(
242+
subchannel.getConnectedAddressAttributes());
243+
ClusterLocality oldClusterLocality = localityAtomicReference
244+
.getAndSet(updatedClusterLocality);
245+
oldClusterLocality.release();
246+
}
247+
listener.onSubchannelState(newState);
248+
}
249+
});
250+
}
251+
241252
@Override
242253
public void shutdown() {
243-
if (localityStats != null) {
244-
localityStats.release();
245-
}
254+
localityAtomicReference.get().release();
246255
delegate().shutdown();
247256
}
248257

@@ -274,6 +283,28 @@ private List<EquivalentAddressGroup> withAdditionalAttributes(
274283
return newAddresses;
275284
}
276285

286+
private ClusterLocality createClusterLocalityFromAttributes(Attributes addressAttributes) {
287+
Locality locality = addressAttributes.get(InternalXdsAttributes.ATTR_LOCALITY);
288+
String localityName = addressAttributes.get(InternalXdsAttributes.ATTR_LOCALITY_NAME);
289+
290+
// Endpoint addresses resolved by ClusterResolverLoadBalancer should always contain
291+
// attributes with its locality, including endpoints in LOGICAL_DNS clusters.
292+
// In case of not (which really shouldn't), loads are aggregated under an empty
293+
// locality.
294+
if (locality == null) {
295+
locality = Locality.create("", "", "");
296+
localityName = "";
297+
}
298+
299+
final ClusterLocalityStats localityStats =
300+
(lrsServerInfo == null)
301+
? null
302+
: xdsClient.addClusterLocalityStats(lrsServerInfo, cluster,
303+
edsServiceName, locality);
304+
305+
return new ClusterLocality(localityStats, localityName);
306+
}
307+
277308
@Override
278309
protected Helper delegate() {
279310
return helper;
@@ -361,18 +392,23 @@ public PickResult pickSubchannel(PickSubchannelArgs args) {
361392
"Cluster max concurrent requests limit exceeded"));
362393
}
363394
}
364-
final ClusterLocalityStats stats =
365-
result.getSubchannel().getAttributes().get(ATTR_CLUSTER_LOCALITY_STATS);
366-
if (stats != null) {
367-
String localityName =
368-
result.getSubchannel().getAttributes().get(ATTR_CLUSTER_LOCALITY_NAME);
369-
args.getPickDetailsConsumer().addOptionalLabel("grpc.lb.locality", localityName);
370-
371-
ClientStreamTracer.Factory tracerFactory = new CountingStreamTracerFactory(
372-
stats, inFlights, result.getStreamTracerFactory());
373-
ClientStreamTracer.Factory orcaTracerFactory = OrcaPerRequestUtil.getInstance()
374-
.newOrcaClientStreamTracerFactory(tracerFactory, new OrcaPerRpcListener(stats));
375-
return PickResult.withSubchannel(result.getSubchannel(), orcaTracerFactory);
395+
final AtomicReference<ClusterLocality> clusterLocality =
396+
result.getSubchannel().getAttributes().get(ATTR_CLUSTER_LOCALITY);
397+
398+
if (clusterLocality != null) {
399+
ClusterLocalityStats stats = clusterLocality.get().getClusterLocalityStats();
400+
if (stats != null) {
401+
String localityName =
402+
result.getSubchannel().getAttributes().get(ATTR_CLUSTER_LOCALITY).get()
403+
.getClusterLocalityName();
404+
args.getPickDetailsConsumer().addOptionalLabel("grpc.lb.locality", localityName);
405+
406+
ClientStreamTracer.Factory tracerFactory = new CountingStreamTracerFactory(
407+
stats, inFlights, result.getStreamTracerFactory());
408+
ClientStreamTracer.Factory orcaTracerFactory = OrcaPerRequestUtil.getInstance()
409+
.newOrcaClientStreamTracerFactory(tracerFactory, new OrcaPerRpcListener(stats));
410+
return PickResult.withSubchannel(result.getSubchannel(), orcaTracerFactory);
411+
}
376412
}
377413
}
378414
return result;
@@ -447,4 +483,33 @@ public void onLoadReport(MetricReport report) {
447483
stats.recordBackendLoadMetricStats(report.getNamedMetrics());
448484
}
449485
}
486+
487+
/**
488+
* Represents the {@link ClusterLocalityStats} and network locality name of a cluster.
489+
*/
490+
static final class ClusterLocality {
491+
private final ClusterLocalityStats clusterLocalityStats;
492+
private final String clusterLocalityName;
493+
494+
@VisibleForTesting
495+
ClusterLocality(ClusterLocalityStats localityStats, String localityName) {
496+
this.clusterLocalityStats = localityStats;
497+
this.clusterLocalityName = localityName;
498+
}
499+
500+
ClusterLocalityStats getClusterLocalityStats() {
501+
return clusterLocalityStats;
502+
}
503+
504+
String getClusterLocalityName() {
505+
return clusterLocalityName;
506+
}
507+
508+
@VisibleForTesting
509+
void release() {
510+
if (clusterLocalityStats != null) {
511+
clusterLocalityStats.release();
512+
}
513+
}
514+
}
450515
}

xds/src/main/java/io/grpc/xds/client/LoadStatsManager2.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ private synchronized void releaseClusterDropCounter(
9191
String cluster, @Nullable String edsServiceName) {
9292
checkState(allDropStats.containsKey(cluster)
9393
&& allDropStats.get(cluster).containsKey(edsServiceName),
94-
"stats for cluster %s, edsServiceName %s not exits", cluster, edsServiceName);
94+
"stats for cluster %s, edsServiceName %s do not exist", cluster, edsServiceName);
9595
ReferenceCounted<ClusterDropStats> ref = allDropStats.get(cluster).get(edsServiceName);
9696
ref.release();
9797
}

0 commit comments

Comments
 (0)