Skip to content

Commit e77767b

Browse files
mukund-thakursteveloughran
authored andcommitted
HADOOP-16711.
This adds a new option fs.s3a.bucket.probe, range (0-2) to control which probe for a bucket existence to perform on startup. 0: no checks 1: v1 check (as has been performend until now) 2: v2 bucket check, which also incudes a permission check. Default. When set to 0, bucket existence checks won't be done during initialization thus making it faster. When the bucket is not available in S3, or if fs.s3a.endpoint points to the wrong instance of a private S3 store consecutive calls like listing, read, write etc. will fail with an UnknownStoreException. Contributed by: * Mukund Thakur (main patch and tests) * Rajesh Balamohan (v0 list and performance tests) * lqjacklee (HADOOP-15990/v2 list) * Steve Loughran (UnknownStoreException support) modified: hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java modified: hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java modified: hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ARetryPolicy.java modified: hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java new file: hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/UnknownStoreException.java new file: hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ErrorTranslation.java modified: hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md modified: hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md modified: hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md modified: hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java new file: hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java modified: hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3ClientFactory.java modified: hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AExceptionTranslation.java modified: hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/AbstractS3GuardToolTestBase.java modified: hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardToolDynamoDB.java modified: hadoop-tools/hadoop-aws/src/test/resources/core-site.xml Change-Id: Ic174f803e655af172d81c1274ed92b51bdceb384
1 parent e3bba5f commit e77767b

File tree

16 files changed

+484
-28
lines changed

16 files changed

+484
-28
lines changed

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -481,6 +481,20 @@ private Constants() {
481481
"fs.s3a.metadatastore.authoritative";
482482
public static final boolean DEFAULT_METADATASTORE_AUTHORITATIVE = false;
483483

484+
/**
485+
* Bucket validation parameter which can be set by client. This will be
486+
* used in {@code S3AFileSystem.initialize(URI, Configuration)}.
487+
* Value: {@value}
488+
*/
489+
public static final String S3A_BUCKET_PROBE = "fs.s3a.bucket.probe";
490+
491+
/**
492+
* Default value of bucket validation parameter. An existence of bucket
493+
* will be validated using {@code S3AFileSystem.verifyBucketExistsV2()}.
494+
* Value: {@value}
495+
*/
496+
public static final int S3A_BUCKET_PROBE_DEFAULT = 2;
497+
484498
/**
485499
* How long a directory listing in the MS is considered as authoritative.
486500
*/

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java

Lines changed: 59 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,7 @@
173173
import static org.apache.hadoop.fs.s3a.auth.delegation.S3ADelegationTokens.hasDelegationTokenBinding;
174174
import static org.apache.hadoop.fs.s3a.impl.CallableSupplier.submit;
175175
import static org.apache.hadoop.fs.s3a.impl.CallableSupplier.waitForCompletionIgnoringExceptions;
176+
import static org.apache.hadoop.fs.s3a.impl.ErrorTranslation.isUnknownBucket;
176177
import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_404;
177178
import static org.apache.hadoop.fs.s3a.impl.NetworkBinding.fixBucketRegion;
178179
import static org.apache.hadoop.io.IOUtils.cleanupWithLogger;
@@ -392,9 +393,7 @@ public void initialize(URI name, Configuration originalConf)
392393
initCannedAcls(conf);
393394

394395
// This initiates a probe against S3 for the bucket existing.
395-
// It is where all network and authentication configuration issues
396-
// surface, and is potentially slow.
397-
verifyBucketExists();
396+
doBucketProbing();
398397

399398
inputPolicy = S3AInputPolicy.getPolicy(
400399
conf.getTrimmed(INPUT_FADVISE, INPUT_FADV_NORMAL));
@@ -463,6 +462,41 @@ public void initialize(URI name, Configuration originalConf)
463462

464463
}
465464

465+
/**
466+
* Test bucket existence in S3.
467+
* When the value of {@link Constants#S3A_BUCKET_PROBE} is set to 0,
468+
* bucket existence check is not done to improve performance of
469+
* S3AFileSystem initialization. When set to 1 or 2, bucket existence check
470+
* will be performed which is potentially slow.
471+
* If 3 or higher: warn and use the v2 check.
472+
* @throws UnknownStoreException the bucket is absent
473+
* @throws IOException any other problem talking to S3
474+
*/
475+
@Retries.RetryTranslated
476+
private void doBucketProbing() throws IOException {
477+
int bucketProbe = getConf()
478+
.getInt(S3A_BUCKET_PROBE, S3A_BUCKET_PROBE_DEFAULT);
479+
Preconditions.checkArgument(bucketProbe >= 0,
480+
"Value of " + S3A_BUCKET_PROBE + " should be >= 0");
481+
switch (bucketProbe) {
482+
case 0:
483+
LOG.debug("skipping check for bucket existence");
484+
break;
485+
case 1:
486+
verifyBucketExists();
487+
break;
488+
case 2:
489+
verifyBucketExistsV2();
490+
break;
491+
default:
492+
// we have no idea what this is, assume it is from a later release.
493+
LOG.warn("Unknown bucket probe option {}: {}; falling back to check #2",
494+
S3A_BUCKET_PROBE, bucketProbe);
495+
verifyBucketExistsV2();
496+
break;
497+
}
498+
}
499+
466500
/**
467501
* Initialize the thread pool.
468502
* This must be re-invoked after replacing the S3Client during test
@@ -510,15 +544,31 @@ protected static S3AStorageStatistics createStorageStatistics() {
510544
* Verify that the bucket exists. This does not check permissions,
511545
* not even read access.
512546
* Retry policy: retrying, translated.
513-
* @throws FileNotFoundException the bucket is absent
547+
* @throws UnknownStoreException the bucket is absent
514548
* @throws IOException any other problem talking to S3
515549
*/
516550
@Retries.RetryTranslated
517551
protected void verifyBucketExists()
518-
throws FileNotFoundException, IOException {
552+
throws UnknownStoreException, IOException {
519553
if (!invoker.retry("doesBucketExist", bucket, true,
520554
() -> s3.doesBucketExist(bucket))) {
521-
throw new FileNotFoundException("Bucket " + bucket + " does not exist");
555+
throw new UnknownStoreException("Bucket " + bucket + " does not exist");
556+
}
557+
}
558+
559+
/**
560+
* Verify that the bucket exists. This will correctly throw an exception
561+
* when credentials are invalid.
562+
* Retry policy: retrying, translated.
563+
* @throws UnknownStoreException the bucket is absent
564+
* @throws IOException any other problem talking to S3
565+
*/
566+
@Retries.RetryTranslated
567+
protected void verifyBucketExistsV2()
568+
throws UnknownStoreException, IOException {
569+
if (!invoker.retry("doesBucketExistV2", bucket, true,
570+
() -> s3.doesBucketExistV2(bucket))) {
571+
throw new UnknownStoreException("Bucket " + bucket + " does not exist");
522572
}
523573
}
524574

@@ -2891,7 +2941,7 @@ S3AFileStatus s3GetFileStatus(final Path path,
28912941
} catch (AmazonServiceException e) {
28922942
// if the response is a 404 error, it just means that there is
28932943
// no file at that path...the remaining checks will be needed.
2894-
if (e.getStatusCode() != SC_404) {
2944+
if (e.getStatusCode() != SC_404 || isUnknownBucket(e)) {
28952945
throw translateException("getFileStatus", path, e);
28962946
}
28972947
} catch (AmazonClientException e) {
@@ -2923,7 +2973,7 @@ S3AFileStatus s3GetFileStatus(final Path path,
29232973
meta.getVersionId());
29242974
}
29252975
} catch (AmazonServiceException e) {
2926-
if (e.getStatusCode() != SC_404) {
2976+
if (e.getStatusCode() != SC_404 || isUnknownBucket(e)) {
29272977
throw translateException("getFileStatus", newKey, e);
29282978
}
29292979
} catch (AmazonClientException e) {
@@ -2962,7 +3012,7 @@ S3AFileStatus s3GetFileStatus(final Path path,
29623012
return new S3AFileStatus(Tristate.TRUE, path, username);
29633013
}
29643014
} catch (AmazonServiceException e) {
2965-
if (e.getStatusCode() != SC_404) {
3015+
if (e.getStatusCode() != SC_404 || isUnknownBucket(e)) {
29663016
throw translateException("getFileStatus", path, e);
29673017
}
29683018
} catch (AmazonClientException e) {

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ARetryPolicy.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,7 @@ protected Map<Class<? extends Exception>, RetryPolicy> createExceptionMap() {
188188
policyMap.put(AccessDeniedException.class, fail);
189189
policyMap.put(NoAuthWithAWSException.class, fail);
190190
policyMap.put(FileNotFoundException.class, fail);
191+
policyMap.put(UnknownStoreException.class, fail);
191192
policyMap.put(InvalidRequestException.class, fail);
192193

193194
// metadata stores should do retries internally when it makes sense

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@
8686

8787
import static org.apache.commons.lang3.StringUtils.isEmpty;
8888
import static org.apache.hadoop.fs.s3a.Constants.*;
89+
import static org.apache.hadoop.fs.s3a.impl.ErrorTranslation.isUnknownBucket;
8990
import static org.apache.hadoop.fs.s3a.impl.MultiObjectDeleteSupport.translateDeleteException;
9091
import static org.apache.hadoop.io.IOUtils.cleanupWithLogger;
9192

@@ -249,6 +250,18 @@ public static IOException translateException(@Nullable String operation,
249250

250251
// the object isn't there
251252
case 404:
253+
if (isUnknownBucket(ase)) {
254+
// this is a missing bucket
255+
ioe = new UnknownStoreException(path, ase);
256+
} else {
257+
// a normal unknown object
258+
ioe = new FileNotFoundException(message);
259+
ioe.initCause(ase);
260+
}
261+
break;
262+
263+
// this also surfaces sometimes and is considered to
264+
// be ~ a not found exception.
252265
case 410:
253266
ioe = new FileNotFoundException(message);
254267
ioe.initCause(ase);
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.hadoop.fs.s3a;
20+
21+
import java.io.IOException;
22+
23+
import org.apache.hadoop.classification.InterfaceAudience;
24+
import org.apache.hadoop.classification.InterfaceStability;
25+
26+
/**
27+
* The bucket or other AWS resource is unknown.
28+
*
29+
* Why not a subclass of FileNotFoundException?
30+
* There's too much code which caches an FNFE and infers that the file isn't
31+
* there - a missing bucket is far more significant and generally should
32+
* not be ignored.
33+
*/
34+
@InterfaceAudience.Public
35+
@InterfaceStability.Evolving
36+
public class UnknownStoreException extends IOException {
37+
38+
/**
39+
* Constructor.
40+
* @param message message
41+
*/
42+
public UnknownStoreException(final String message) {
43+
this(message, null);
44+
}
45+
46+
/**
47+
* Constructor.
48+
* @param message message
49+
* @param cause cause (may be null)
50+
*/
51+
public UnknownStoreException(final String message, Throwable cause) {
52+
super(message);
53+
if (cause != null) {
54+
initCause(cause);
55+
}
56+
}
57+
}
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.hadoop.fs.s3a.impl;
20+
21+
import com.amazonaws.AmazonServiceException;
22+
23+
import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_404;
24+
25+
/**
26+
* Translate from AWS SDK-wrapped exceptions into IOExceptions with
27+
* as much information as possible.
28+
* The core of the translation logic is in S3AUtils, in
29+
* {@code translateException} and nearby; that has grown to be
30+
* a large a complex piece of logic, as it ties in with retry/recovery
31+
* policies, throttling, etc.
32+
*
33+
* This class is where future expansion of that code should go so that we have
34+
* an isolated place for all the changes..
35+
* The existing code las been left in S3AUtils it is to avoid cherry-picking
36+
* problems on backports.
37+
*/
38+
public class ErrorTranslation {
39+
40+
/**
41+
* Private constructor for utility class.
42+
*/
43+
private ErrorTranslation() {
44+
}
45+
46+
/**
47+
* Does this exception indicate that the AWS Bucket was unknown.
48+
* @param e exception.
49+
* @return true if the status code and error code mean that the
50+
* remote bucket is unknown.
51+
*/
52+
public static boolean isUnknownBucket(AmazonServiceException e) {
53+
return e.getStatusCode() == SC_404
54+
&& AwsErrorCodes.E_NO_SUCH_BUCKET.equals(e.getErrorCode());
55+
}
56+
57+
/**
58+
* AWS error codes explicitly recognized and processes specially;
59+
* kept in their own class for isolation.
60+
*/
61+
public static final class AwsErrorCodes {
62+
63+
/**
64+
* The AWS S3 error code used to recognize when a 404 means the bucket is
65+
* unknown.
66+
*/
67+
public static final String E_NO_SUCH_BUCKET = "NoSuchBucket";
68+
69+
/** private constructor. */
70+
private AwsErrorCodes() {
71+
}
72+
}
73+
}

hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1000,6 +1000,26 @@ options are covered in [Testing](./testing.md).
10001000
converged to Integer.MAX_VALUE milliseconds
10011001
</description>
10021002
</property>
1003+
1004+
<property>
1005+
<name>fs.s3a.bucket.probe</name>
1006+
<value>2</value>
1007+
<description>
1008+
The value can be 0, 1 or 2 (default).
1009+
When set to 0, bucket existence checks won't be done
1010+
during initialization thus making it faster.
1011+
Though it should be noted that when the bucket is not available in S3,
1012+
or if fs.s3a.endpoint points to the wrong instance of a private S3 store
1013+
consecutive calls like listing, read, write etc. will fail with
1014+
an UnknownStoreException.
1015+
When set to 1, the bucket existence check will be done using the
1016+
V1 API of the S3 protocol which doesn't verify the client's permissions
1017+
to list or read data in the bucket.
1018+
When set to 2, the bucket existence check will be done using the
1019+
V2 API of the S3 protocol which does verify that the
1020+
client has permission to read the bucket.
1021+
</description>
1022+
</property>
10031023
```
10041024

10051025
## <a name="retry_and_recovery"></a>Retry and Recovery

hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -608,3 +608,19 @@ with HADOOP-15669.
608608

609609
Other options may be added to `fs.s3a.ssl.channel.mode` in the future as
610610
further SSL optimizations are made.
611+
612+
## Tuning FileSystem Initialization.
613+
614+
When an S3A Filesystem instance is created and initialized, the client
615+
checks if the bucket provided is valid. This can be slow.
616+
You can ignore bucket validation by configuring `fs.s3a.bucket.probe` as follows:
617+
618+
```xml
619+
<property>
620+
<name>fs.s3a.bucket.probe</name>
621+
<value>0</value>
622+
</property>
623+
```
624+
625+
Note: if the bucket does not exist, this issue will surface when operations are performed
626+
on the filesystem; you will see `UnknownStoreException` stack traces.

0 commit comments

Comments
 (0)