Skip to content

Commit a55d7ce

Browse files
authored
fix: wrong use of getRetryDelayInMillis() / 1000 in documentation and retry loops (#885)
Both the documentation for `TransactionManager` as well as some internal retry loops wrongly used `SpannerException#getRetryDelayInMillis() / 1000` as input for `Thread.sleep(long)`. The retry delay is already in milliseconds and should not be modified. Fixes #874
1 parent b2b7bb2 commit a55d7ce

16 files changed

+186
-93
lines changed

google-cloud-spanner/src/main/java/com/google/cloud/spanner/DatabaseClient.java

+21-21
Original file line numberDiff line numberDiff line change
@@ -321,19 +321,19 @@ CommitResponse writeAtLeastOnceWithOptions(
321321
* <pre>{@code
322322
* long singerId = my_singer_id;
323323
* try (TransactionManager manager = dbClient.transactionManager()) {
324-
* TransactionContext txn = manager.begin();
324+
* TransactionContext transaction = manager.begin();
325325
* while (true) {
326326
* String column = "FirstName";
327-
* Struct row = txn.readRow("Singers", Key.of(singerId), Collections.singleton(column));
327+
* Struct row = transaction.readRow("Singers", Key.of(singerId), Collections.singleton(column));
328328
* String name = row.getString(column);
329-
* txn.buffer(
329+
* transaction.buffer(
330330
* Mutation.newUpdateBuilder("Singers").set(column).to(name.toUpperCase()).build());
331331
* try {
332332
* manager.commit();
333333
* break;
334334
* } catch (AbortedException e) {
335-
* Thread.sleep(e.getRetryDelayInMillis() / 1000);
336-
* txn = manager.resetForRetry();
335+
* Thread.sleep(e.getRetryDelayInMillis());
336+
* transaction = manager.resetForRetry();
337337
* }
338338
* }
339339
* }
@@ -385,19 +385,19 @@ CommitResponse writeAtLeastOnceWithOptions(
385385
* <pre>{@code
386386
* long singerId = 1L;
387387
* try (AsyncTransactionManager manager = client.transactionManagerAsync()) {
388-
* TransactionContextFuture txnFut = manager.beginAsync();
388+
* TransactionContextFuture transactionFuture = manager.beginAsync();
389389
* while (true) {
390390
* String column = "FirstName";
391391
* CommitTimestampFuture commitTimestamp =
392-
* txnFut
392+
* transactionFuture
393393
* .then(
394-
* (txn, __) ->
395-
* txn.readRowAsync(
394+
* (transaction, __) ->
395+
* transaction.readRowAsync(
396396
* "Singers", Key.of(singerId), Collections.singleton(column)))
397397
* .then(
398-
* (txn, row) -> {
398+
* (transaction, row) -> {
399399
* String name = row.getString(column);
400-
* txn.buffer(
400+
* transaction.buffer(
401401
* Mutation.newUpdateBuilder("Singers")
402402
* .set(column)
403403
* .to(name.toUpperCase())
@@ -409,8 +409,8 @@ CommitResponse writeAtLeastOnceWithOptions(
409409
* commitTimestamp.get();
410410
* break;
411411
* } catch (AbortedException e) {
412-
* Thread.sleep(e.getRetryDelayInMillis() / 1000);
413-
* txnFut = manager.resetForRetryAsync();
412+
* Thread.sleep(e.getRetryDelayInMillis());
413+
* transactionFuture = manager.resetForRetryAsync();
414414
* }
415415
* }
416416
* }
@@ -421,26 +421,26 @@ CommitResponse writeAtLeastOnceWithOptions(
421421
* <pre>{@code
422422
* final long singerId = 1L;
423423
* try (AsyncTransactionManager manager = client().transactionManagerAsync()) {
424-
* TransactionContextFuture txn = manager.beginAsync();
424+
* TransactionContextFuture transactionFuture = manager.beginAsync();
425425
* while (true) {
426426
* final String column = "FirstName";
427427
* CommitTimestampFuture commitTimestamp =
428-
* txn.then(
428+
* transactionFuture.then(
429429
* new AsyncTransactionFunction<Void, Struct>() {
430430
* @Override
431-
* public ApiFuture<Struct> apply(TransactionContext txn, Void input)
431+
* public ApiFuture<Struct> apply(TransactionContext transaction, Void input)
432432
* throws Exception {
433-
* return txn.readRowAsync(
433+
* return transaction.readRowAsync(
434434
* "Singers", Key.of(singerId), Collections.singleton(column));
435435
* }
436436
* })
437437
* .then(
438438
* new AsyncTransactionFunction<Struct, Void>() {
439439
* @Override
440-
* public ApiFuture<Void> apply(TransactionContext txn, Struct input)
440+
* public ApiFuture<Void> apply(TransactionContext transaction, Struct input)
441441
* throws Exception {
442442
* String name = input.getString(column);
443-
* txn.buffer(
443+
* transaction.buffer(
444444
* Mutation.newUpdateBuilder("Singers")
445445
* .set(column)
446446
* .to(name.toUpperCase())
@@ -453,8 +453,8 @@ CommitResponse writeAtLeastOnceWithOptions(
453453
* commitTimestamp.get();
454454
* break;
455455
* } catch (AbortedException e) {
456-
* Thread.sleep(e.getRetryDelayInMillis() / 1000);
457-
* txn = manager.resetForRetryAsync();
456+
* Thread.sleep(e.getRetryDelayInMillis());
457+
* transactionFuture = manager.resetForRetryAsync();
458458
* }
459459
* }
460460
* }

google-cloud-spanner/src/main/java/com/google/cloud/spanner/SessionPool.java

+11-3
Original file line numberDiff line numberDiff line change
@@ -816,13 +816,21 @@ private TransactionContext internalBegin() {
816816
}
817817

818818
@Override
819-
public SpannerException handleSessionNotFound(SessionNotFoundException notFound) {
820-
session = sessionPool.replaceSession(notFound, session);
819+
public SpannerException handleSessionNotFound(SessionNotFoundException notFoundException) {
820+
session = sessionPool.replaceSession(notFoundException, session);
821821
PooledSession pooledSession = session.get();
822822
delegate = pooledSession.delegate.transactionManager(options);
823823
restartedAfterSessionNotFound = true;
824+
return createAbortedExceptionWithMinimalRetryDelay(notFoundException);
825+
}
826+
827+
private static SpannerException createAbortedExceptionWithMinimalRetryDelay(
828+
SessionNotFoundException notFoundException) {
824829
return SpannerExceptionFactory.newSpannerException(
825-
ErrorCode.ABORTED, notFound.getMessage(), notFound);
830+
ErrorCode.ABORTED,
831+
notFoundException.getMessage(),
832+
SpannerExceptionFactory.createAbortedExceptionWithRetryDelay(
833+
notFoundException.getMessage(), notFoundException, 0, 1));
826834
}
827835

828836
@Override

google-cloud-spanner/src/main/java/com/google/cloud/spanner/SpannerExceptionFactory.java

+24
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,11 @@
2424
import com.google.common.base.Predicate;
2525
import com.google.rpc.ErrorInfo;
2626
import com.google.rpc.ResourceInfo;
27+
import com.google.rpc.RetryInfo;
2728
import io.grpc.Context;
2829
import io.grpc.Metadata;
2930
import io.grpc.Status;
31+
import io.grpc.StatusRuntimeException;
3032
import io.grpc.protobuf.ProtoUtils;
3133
import java.util.concurrent.CancellationException;
3234
import java.util.concurrent.TimeoutException;
@@ -226,6 +228,28 @@ private static ErrorInfo extractErrorInfo(Throwable cause) {
226228
return null;
227229
}
228230

231+
/**
232+
* Creates a {@link StatusRuntimeException} that contains a {@link RetryInfo} with the specified
233+
* retry delay.
234+
*/
235+
static StatusRuntimeException createAbortedExceptionWithRetryDelay(
236+
String message, Throwable cause, long retryDelaySeconds, int retryDelayNanos) {
237+
Metadata.Key<RetryInfo> key = ProtoUtils.keyForProto(RetryInfo.getDefaultInstance());
238+
Metadata trailers = new Metadata();
239+
RetryInfo retryInfo =
240+
RetryInfo.newBuilder()
241+
.setRetryDelay(
242+
com.google.protobuf.Duration.newBuilder()
243+
.setNanos(retryDelayNanos)
244+
.setSeconds(retryDelaySeconds))
245+
.build();
246+
trailers.put(key, retryInfo);
247+
return io.grpc.Status.ABORTED
248+
.withDescription(message)
249+
.withCause(cause)
250+
.asRuntimeException(trailers);
251+
}
252+
229253
static SpannerException newSpannerExceptionPreformatted(
230254
ErrorCode code, @Nullable String message, @Nullable Throwable cause) {
231255
// This is the one place in the codebase that is allowed to call constructors directly.

google-cloud-spanner/src/main/java/com/google/cloud/spanner/TransactionRunnerImpl.java

+29-15
Original file line numberDiff line numberDiff line change
@@ -531,7 +531,10 @@ public void onError(SpannerException e, boolean withBeginTransaction) {
531531
// Simulate an aborted transaction to force a retry with a new transaction.
532532
this.transactionIdFuture.setException(
533533
SpannerExceptionFactory.newSpannerException(
534-
ErrorCode.ABORTED, "Aborted due to failed initial statement", e));
534+
ErrorCode.ABORTED,
535+
"Aborted due to failed initial statement",
536+
SpannerExceptionFactory.createAbortedExceptionWithRetryDelay(
537+
"Aborted due to failed initial statement", e, 0, 1)));
535538
}
536539

537540
if (e.getErrorCode() == ErrorCode.ABORTED) {
@@ -684,6 +687,19 @@ public void run() {
684687
return updateCount;
685688
}
686689

690+
private SpannerException createAbortedExceptionForBatchDml(ExecuteBatchDmlResponse response) {
691+
// Manually construct an AbortedException with a 10ms retry delay for BatchDML responses that
692+
// return an Aborted status (and not an AbortedException).
693+
return newSpannerException(
694+
ErrorCode.fromRpcStatus(response.getStatus()),
695+
response.getStatus().getMessage(),
696+
SpannerExceptionFactory.createAbortedExceptionWithRetryDelay(
697+
response.getStatus().getMessage(),
698+
/* cause = */ null,
699+
/* retryDelaySeconds = */ 0,
700+
/* retryDelayNanos = */ (int) TimeUnit.MILLISECONDS.toNanos(10L)));
701+
}
702+
687703
@Override
688704
public long[] batchUpdate(Iterable<Statement> statements, UpdateOption... options) {
689705
beforeReadOrQuery();
@@ -705,8 +721,7 @@ public long[] batchUpdate(Iterable<Statement> statements, UpdateOption... option
705721
// If one of the DML statements was aborted, we should throw an aborted exception.
706722
// In all other cases, we should throw a BatchUpdateException.
707723
if (response.getStatus().getCode() == Code.ABORTED_VALUE) {
708-
throw newSpannerException(
709-
ErrorCode.fromRpcStatus(response.getStatus()), response.getStatus().getMessage());
724+
throw createAbortedExceptionForBatchDml(response);
710725
} else if (response.getStatus().getCode() != 0) {
711726
throw newSpannerBatchUpdateException(
712727
ErrorCode.fromRpcStatus(response.getStatus()),
@@ -741,25 +756,24 @@ public ApiFuture<long[]> batchUpdateAsync(
741756
response,
742757
new ApiFunction<ExecuteBatchDmlResponse, long[]>() {
743758
@Override
744-
public long[] apply(ExecuteBatchDmlResponse input) {
745-
long[] results = new long[input.getResultSetsCount()];
746-
for (int i = 0; i < input.getResultSetsCount(); ++i) {
747-
results[i] = input.getResultSets(i).getStats().getRowCountExact();
748-
if (input.getResultSets(i).getMetadata().hasTransaction()) {
759+
public long[] apply(ExecuteBatchDmlResponse batchDmlResponse) {
760+
long[] results = new long[batchDmlResponse.getResultSetsCount()];
761+
for (int i = 0; i < batchDmlResponse.getResultSetsCount(); ++i) {
762+
results[i] = batchDmlResponse.getResultSets(i).getStats().getRowCountExact();
763+
if (batchDmlResponse.getResultSets(i).getMetadata().hasTransaction()) {
749764
onTransactionMetadata(
750-
input.getResultSets(i).getMetadata().getTransaction(),
765+
batchDmlResponse.getResultSets(i).getMetadata().getTransaction(),
751766
builder.getTransaction().hasBegin());
752767
}
753768
}
754769
// If one of the DML statements was aborted, we should throw an aborted exception.
755770
// In all other cases, we should throw a BatchUpdateException.
756-
if (input.getStatus().getCode() == Code.ABORTED_VALUE) {
757-
throw newSpannerException(
758-
ErrorCode.fromRpcStatus(input.getStatus()), input.getStatus().getMessage());
759-
} else if (input.getStatus().getCode() != 0) {
771+
if (batchDmlResponse.getStatus().getCode() == Code.ABORTED_VALUE) {
772+
throw createAbortedExceptionForBatchDml(batchDmlResponse);
773+
} else if (batchDmlResponse.getStatus().getCode() != 0) {
760774
throw newSpannerBatchUpdateException(
761-
ErrorCode.fromRpcStatus(input.getStatus()),
762-
input.getStatus().getMessage(),
775+
ErrorCode.fromRpcStatus(batchDmlResponse.getStatus()),
776+
batchDmlResponse.getStatus().getMessage(),
763777
results);
764778
}
765779
return results;

google-cloud-spanner/src/main/java/com/google/cloud/spanner/connection/ReadWriteTransaction.java

+4-1
Original file line numberDiff line numberDiff line change
@@ -725,8 +725,11 @@ private void handleAborted(AbortedException aborted) {
725725
logger.fine(toString() + ": Starting internal transaction retry");
726726
while (true) {
727727
// First back off and then restart the transaction.
728+
long delay = aborted.getRetryDelayInMillis();
728729
try {
729-
Thread.sleep(aborted.getRetryDelayInMillis() / 1000);
730+
if (delay > 0L) {
731+
Thread.sleep(delay);
732+
}
730733
} catch (InterruptedException ie) {
731734
Thread.currentThread().interrupt();
732735
throw SpannerExceptionFactory.newSpannerException(

google-cloud-spanner/src/test/java/com/google/cloud/spanner/AsyncTransactionManagerTest.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -1137,7 +1137,7 @@ public ApiFuture<Void> apply(TransactionContext txn, Struct input)
11371137
commitTimestamp.get();
11381138
break;
11391139
} catch (AbortedException e) {
1140-
Thread.sleep(e.getRetryDelayInMillis() / 1000);
1140+
Thread.sleep(e.getRetryDelayInMillis());
11411141
txn = manager.resetForRetryAsync();
11421142
}
11431143
}

google-cloud-spanner/src/test/java/com/google/cloud/spanner/DatabaseClientImplTest.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -662,7 +662,7 @@ public void transactionManagerIsNonBlocking() throws Exception {
662662
txManager.commit();
663663
break;
664664
} catch (AbortedException e) {
665-
Thread.sleep(e.getRetryDelayInMillis() / 1000);
665+
Thread.sleep(e.getRetryDelayInMillis());
666666
tx = txManager.resetForRetry();
667667
}
668668
}
@@ -705,7 +705,7 @@ public CallbackResponse cursorReady(AsyncResultSet resultSet) {
705705
txManager.commit();
706706
break;
707707
} catch (AbortedException e) {
708-
Thread.sleep(e.getRetryDelayInMillis() / 1000);
708+
Thread.sleep(e.getRetryDelayInMillis());
709709
tx = txManager.resetForRetry();
710710
}
711711
}

google-cloud-spanner/src/test/java/com/google/cloud/spanner/MockSpannerServiceImpl.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -1732,7 +1732,7 @@ private void simulateAbort(Session session, ByteString transactionId) {
17321732

17331733
public StatusRuntimeException createAbortedException(ByteString transactionId) {
17341734
RetryInfo retryInfo =
1735-
RetryInfo.newBuilder().setRetryDelay(Duration.newBuilder().setNanos(100).build()).build();
1735+
RetryInfo.newBuilder().setRetryDelay(Duration.newBuilder().setNanos(1).build()).build();
17361736
Metadata.Key<RetryInfo> key =
17371737
Metadata.Key.of(
17381738
retryInfo.getDescriptorForType().getFullName() + Metadata.BINARY_HEADER_SUFFIX,

0 commit comments

Comments
 (0)