Skip to content

Commit 81dc4a9

Browse files
HADOOP-15663. ABFS: Simplify configuration.
Contributed by Da Zhou.
1 parent df57c6c commit 81dc4a9

24 files changed

+419
-401
lines changed

hadoop-common-project/hadoop-common/src/main/resources/core-default.xml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1618,6 +1618,18 @@
16181618
</property>
16191619

16201620
<!-- Azure file system properties -->
1621+
<property>
1622+
<name>fs.AbstractFileSystem.wasb.impl</name>
1623+
<value>org.apache.hadoop.fs.azure.Wasb</value>
1624+
<description>AbstractFileSystem implementation class of wasb://</description>
1625+
</property>
1626+
1627+
<property>
1628+
<name>fs.AbstractFileSystem.wasbs.impl</name>
1629+
<value>org.apache.hadoop.fs.azure.Wasbs</value>
1630+
<description>AbstractFileSystem implementation class of wasbs://</description>
1631+
</property>
1632+
16211633
<property>
16221634
<name>fs.wasb.impl</name>
16231635
<value>org.apache.hadoop.fs.azure.NativeAzureFileSystem</value>

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -173,10 +173,6 @@ public AbfsConfiguration(final Configuration configuration) throws IllegalAccess
173173
}
174174
}
175175

176-
public boolean isEmulator() {
177-
return this.getConfiguration().getBoolean(FS_AZURE_EMULATOR_ENABLED, false);
178-
}
179-
180176
public boolean isSecureMode() {
181177
return isSecure;
182178
}

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@
7777
import org.apache.hadoop.fs.azurebfs.services.AuthType;
7878
import org.apache.hadoop.fs.azurebfs.services.ExponentialRetryPolicy;
7979
import org.apache.hadoop.fs.azurebfs.services.SharedKeyCredentials;
80+
import org.apache.hadoop.fs.azurebfs.utils.UriUtils;
8081
import org.apache.hadoop.fs.permission.AclEntry;
8182
import org.apache.hadoop.fs.permission.AclStatus;
8283
import org.apache.hadoop.fs.permission.FsAction;
@@ -86,6 +87,7 @@
8687
import org.slf4j.Logger;
8788
import org.slf4j.LoggerFactory;
8889

90+
import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_ABFS_ENDPOINT;
8991
import static org.apache.hadoop.util.Time.now;
9092

9193
/**
@@ -146,7 +148,27 @@ URIBuilder getURIBuilder(final String hostName, boolean isSecure) {
146148

147149
final URIBuilder uriBuilder = new URIBuilder();
148150
uriBuilder.setScheme(scheme);
149-
uriBuilder.setHost(hostName);
151+
152+
// For testing purposes, an IP address and port may be provided to override
153+
// the host specified in the FileSystem URI. Also note that the format of
154+
// the Azure Storage Service URI changes from
155+
// http[s]://[account][domain-suffix]/[filesystem] to
156+
// http[s]://[ip]:[port]/[account]/[filesystem].
157+
String endPoint = abfsConfiguration.getConfiguration().get(AZURE_ABFS_ENDPOINT);
158+
if (endPoint == null || !endPoint.contains(AbfsHttpConstants.COLON)) {
159+
uriBuilder.setHost(hostName);
160+
return uriBuilder;
161+
}
162+
163+
// Split ip and port
164+
String[] data = endPoint.split(AbfsHttpConstants.COLON);
165+
if (data.length != 2) {
166+
throw new RuntimeException(String.format("ABFS endpoint is not set correctly : %s, "
167+
+ "Do not specify scheme when using {IP}:{PORT}", endPoint));
168+
}
169+
uriBuilder.setHost(data[0].trim());
170+
uriBuilder.setPort(Integer.parseInt(data[1].trim()));
171+
uriBuilder.setPath("/" + UriUtils.extractAccountNameFromHostName(hostName));
150172

151173
return uriBuilder;
152174
}

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,6 @@ public final class ConfigurationKeys {
3737
public static final String AZURE_BACKOFF_INTERVAL = "fs.azure.io.retry.backoff.interval";
3838
public static final String AZURE_MAX_IO_RETRIES = "fs.azure.io.retry.max.retries";
3939

40-
// Remove this and use common azure storage emulator property for public release.
41-
public static final String FS_AZURE_EMULATOR_ENABLED = "fs.azure.abfs.emulator.enabled";
42-
4340
// Read and write buffer sizes defined by the user
4441
public static final String AZURE_WRITE_BUFFER_SIZE = "fs.azure.write.request.size";
4542
public static final String AZURE_READ_BUFFER_SIZE = "fs.azure.read.request.size";
@@ -60,6 +57,8 @@ public final class ConfigurationKeys {
6057
public static final String AZURE_KEY_ACCOUNT_KEYPROVIDER_PREFIX = "fs.azure.account.keyprovider.";
6158
public static final String AZURE_KEY_ACCOUNT_SHELLKEYPROVIDER_SCRIPT = "fs.azure.shellkeyprovider.script";
6259

60+
/** End point of ABFS account: {@value}. */
61+
public static final String AZURE_ABFS_ENDPOINT = "fs.azure.abfs.endpoint";
6362
/** Prefix for auth type properties: {@value}. */
6463
public static final String FS_AZURE_ACCOUNT_AUTH_TYPE_PROPERTY_NAME = "fs.azure.account.auth.type.";
6564
/** Prefix for oauth token provider type: {@value}. */

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/UriUtils.java

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -41,20 +41,21 @@ public static boolean containsAbfsUrl(final String string) {
4141
}
4242

4343
/**
44-
* Extracts the raw account name from account name.
45-
* @param accountName to extract the raw account name.
46-
* @return extracted raw account name.
44+
* Extracts the account name from the host name.
45+
* @param hostName the fully-qualified domain name of the storage service
46+
* endpoint (e.g. {account}.dfs.core.windows.net.
47+
* @return the storage service account name.
4748
*/
48-
public static String extractRawAccountFromAccountName(final String accountName) {
49-
if (accountName == null || accountName.isEmpty()) {
49+
public static String extractAccountNameFromHostName(final String hostName) {
50+
if (hostName == null || hostName.isEmpty()) {
5051
return null;
5152
}
5253

53-
if (!containsAbfsUrl(accountName)) {
54+
if (!containsAbfsUrl(hostName)) {
5455
return null;
5556
}
5657

57-
String[] splitByDot = accountName.split("\\.");
58+
String[] splitByDot = hostName.split("\\.");
5859
if (splitByDot.length == 0) {
5960
return null;
6061
}

hadoop-tools/hadoop-azure/src/site/markdown/testing_azure.md

Lines changed: 152 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ For example:
9090
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
9191
<configuration>
9292
<property>
93-
<name>fs.azure.test.account.name</name>
93+
<name>fs.azure.wasb.account.name</name>
9494
<value>{ACCOUNTNAME}.blob.core.windows.net</value>
9595
</property>
9696
<property>
@@ -126,7 +126,7 @@ Overall, to run all the tests using `mvn test`, a sample `azure-auth-keys.xml`
126126
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
127127
<configuration>
128128
<property>
129-
<name>fs.azure.test.account.name</name>
129+
<name>fs.azure.wasb.account.name</name>
130130
<value>{ACCOUNTNAME}.blob.core.windows.net</value>
131131
</property>
132132
<property>
@@ -576,77 +576,172 @@ This will delete the containers; the output log of the test run will
576576
provide the details and summary of the operation.
577577

578578

579-
## Testing ABFS
579+
## Testing the Azure ABFS Client
580580

581-
The ABFS Connector tests share the same account as the wasb tests; this is
582-
needed for cross-connector compatibility tests.
583-
584-
This makes for a somewhat complex set of configuration options.
585-
586-
Here are the settings for an account `ACCOUNTNAME`
581+
Azure Data Lake Storage Gen 2 (ADLS Gen 2) is a set of capabilities dedicated to
582+
big data analytics, built on top of Azure Blob Storage. The ABFS and ABFSS
583+
schemes target the ADLS Gen 2 REST API, and the WASB and WASBS schemes target
584+
the Azure Blob Storage REST API. ADLS Gen 2 offers better performance and
585+
scalability. ADLS Gen 2 also offers authentication and authorization compatible
586+
with the Hadoop Distributed File System permissions model when hierarchical
587+
namespace is enabled for the storage account. Furthermore, the metadata and data
588+
produced by ADLS Gen 2 REST API can be consumed by Blob REST API, and vice versa.
589+
590+
In order to test ABFS, please add the following configuration to your
591+
`src/test/resources/azure-auth-keys.xml` file. Note that the ABFS tests include
592+
compatibility tests which require WASB credentials, in addition to the ABFS
593+
credentials.
587594

588595
```xml
589-
<property>
590-
<name>abfs.account.name</name>
591-
<value>ACCOUNTNAME</value>
592-
</property>
596+
<?xml version="1.0" encoding="UTF-8"?>
597+
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
598+
<configuration xmlns:xi="http://www.w3.org/2001/XInclude">
599+
<property>
600+
<name>fs.azure.abfs.account.name</name>
601+
<value>{ACCOUNT_NAME}.dfs.core.windows.net</value>
602+
</property>
593603

594-
<property>
595-
<name>abfs.account.full.name</name>
596-
<value>${abfs.account.name}.dfs.core.windows.net</value>
597-
</property>
604+
<property>
605+
<name>fs.azure.account.key.{ACCOUNT_NAME}.dfs.core.windows.net</name>
606+
<value>{ACCOUNT_ACCESS_KEY}</value>
607+
</property>
598608

599-
<property>
600-
<name>abfs.account.key</name>
601-
<value>SECRETKEY==</value>
602-
</property>
609+
<property>
610+
<name>fs.azure.wasb.account.name</name>
611+
<value>{ACCOUNT_NAME}.blob.core.windows.net</value>
612+
</property>
613+
614+
<property>
615+
<name>fs.azure.account.key.{ACCOUNT_NAME}.blob.core.windows.net</name>
616+
<value>{ACCOUNT_ACCESS_KEY}</value>
617+
</property>
603618

604-
<property>
605-
<name>fs.azure.account.key.ACCOUNTNAME.dfs.core.windows.net</name>
606-
<value>${abfs.account.key}</value>
607-
</property>
619+
<property>
620+
<name>fs.contract.test.fs.abfs</name>
621+
<value>abfs://{CONTAINER_NAME}@{ACCOUNT_NAME}.dfs.core.windows.net</value>
622+
<description>A file system URI to be used by the contract tests.</description>
623+
</property>
608624

609-
<property>
610-
<name>fs.azure.account.key.ACCOUNTNAME.blob.core.windows.net</name>
611-
<value>${abfs.account.key}</value>
612-
</property>
625+
<property>
626+
<name>fs.contract.test.fs.wasb</name>
627+
<value>wasb://{CONTAINER_NAME}@{ACCOUNT_NAME}.blob.core.windows.net</value>
628+
<description>A file system URI to be used by the contract tests.</description>
629+
</property>
630+
</configuration>
631+
```
613632

614-
<property>
615-
<name>fs.azure.test.account.key.ACCOUNTNAME.dfs.core.windows.net</name>
616-
<value>${abfs.account.key}</value>
617-
</property>
633+
To run OAuth and ACL test cases you must use a storage account with the
634+
hierarchical namespace enabled, and set the following configuration settings:
618635

636+
```xml
637+
<!--=========================== AUTHENTICATION OPTIONS ===================-->
638+
<!--ATTENTION:
639+
TO RUN ABFS & WASB COMPATIBILITY TESTS, YOU MUST SET AUTH TYPE AS SharedKey.
640+
OAUTH IS INTRODUCED TO ABFS ONLY.-->
619641
<property>
620-
<name>fs.azure.test.account.key.ACCOUNTNAME.blob.core.windows.net</name>
621-
<value>${abfs.account.key}</value>
642+
<name>fs.azure.account.auth.type.{YOUR_ABFS_ACCOUNT_NAME}</name>
643+
<value>{AUTH TYPE}</value>
644+
<description>The authorization type can be SharedKey, OAuth, or Custom. The
645+
default is SharedKey.</description>
622646
</property>
623647

624-
<property>
625-
<name>fs.azure.account.key.ACCOUNTNAME</name>
626-
<value>${abfs.account.key}</value>
627-
</property>
648+
<!--============================= FOR OAUTH ===========================-->
649+
<!--IF AUTH TYPE IS SET AS OAUTH, FOLLOW THE STEPS BELOW-->
650+
<!--NOTICE: AAD client and tenant related properties can be obtained through Azure Portal-->
628651

629-
<property>
630-
<name>fs.azure.test.account.key.ACCOUNTNAME</name>
631-
<value>${abfs.account.key}</value>
632-
</property>
652+
<!--1. UNCOMMENT BELOW AND CHOOSE YOUR OAUTH PROVIDER TYPE -->
633653

634-
<property>
635-
<name>fs.azure.test.account.name</name>
636-
<value>${abfs.account.full.name}</value>
637-
</property>
654+
<!--
655+
<property>
656+
<name>fs.azure.account.oauth.provider.type.{ABFS_ACCOUNT_NAME}</name>
657+
<value>org.apache.hadoop.fs.azurebfs.oauth2.{Token Provider Class name}</value>
658+
<description>The full name of token provider class name.</description>
659+
</property>
660+
-->
638661

639-
<property>
640-
<name>fs.contract.test.fs.abfs</name>
641-
<value>abfs://[email protected]</value>
642-
<description>Container for contract tests</description>
643-
</property>
662+
<!--2. UNCOMMENT BELOW AND SET CREDENTIALS ACCORDING TO THE PROVIDER TYPE-->
644663

645-
<property>
646-
<name>fs.contract.test.fs.abfss</name>
647-
<value>abfss://[email protected]</value>
648-
<description>Container for contract tests</description>
649-
</property>
664+
<!--2.1. If "ClientCredsTokenProvider" is set as key provider, uncomment below and
665+
set auth endpoint, client id and secret below-->
666+
<!--
667+
<property>
668+
<name>fs.azure.account.oauth2.client.endpoint.{ABFS_ACCOUNT_NAME}</name>
669+
<value>https://login.microsoftonline.com/{TENANTID}/oauth2/token</value>
670+
<description>Token end point, this can be found through Azure portal</description>
671+
</property>
650672
673+
<property>
674+
<name>fs.azure.account.oauth2.client.id.{ABFS_ACCOUNT_NAME}</name>
675+
<value>{client id}</value>
676+
<description>AAD client id.</description>
677+
</property>
678+
679+
<property>
680+
<name>fs.azure.account.oauth2.client.secret.{ABFS_ACCOUNT_NAME}</name>
681+
<value>{client secret}</value>
682+
</property>
683+
-->
684+
685+
<!--2.2. If "UserPasswordTokenProvider" is set as key provider, uncomment below and
686+
set auth endpoint, use name and password-->
687+
<!--
688+
<property>
689+
<name>fs.azure.account.oauth2.client.endpoint.{ABFS_ACCOUNT_NAME}</name>
690+
<value>https://login.microsoftonline.com/{TENANTID}/oauth2/token</value>
691+
<description>Token end point, this can be found through Azure portal</description>
692+
</property>
651693
694+
<property>
695+
<name>fs.azure.account.oauth2.user.name.{ABFS_ACCOUNT_NAME}</name>
696+
<value>{user name}</value>
697+
</property>
698+
699+
<property>
700+
<name>fs.azure.account.oauth2.user.password.{ABFS_ACCOUNT_NAME}</name>
701+
<value>{user password}</value>
702+
</property>
703+
-->
704+
705+
<!--2.3. If "MsiTokenProvider" is set as key provider, uncomment below and
706+
set tenantGuid and client id.-->
707+
<!--
708+
<property>
709+
<name>fs.azure.account.oauth2.msi.tenant.{ABFS_ACCOUNT_NAME}</name>
710+
<value>{tenantGuid}</value>
711+
<description>msi tenantGuid.</description>
712+
</property>
713+
714+
<property>
715+
<name>fs.azure.account.oauth2.client.id.{ABFS_ACCOUNT_NAME}</name>
716+
<value>{client id}</value>
717+
<description>AAD client id.</description>
718+
</property>
719+
-->
720+
721+
<!--2.4. If "RefreshTokenBasedTokenProvider" is set as key provider, uncomment below and
722+
set refresh token and client id.-->
723+
<!--
724+
<property>
725+
<name>fs.azure.account.oauth2.refresh.token.{ABFS_ACCOUNT_NAME}</name>
726+
<value>{refresh token}</value>
727+
<description>refresh token.</description>
728+
</property>
729+
730+
<property>
731+
<name>fs.azure.account.oauth2.client.id.{ABFS_ACCOUNT_NAME}</name>
732+
<value>{client id}</value>
733+
<description>AAD client id.</description>
734+
</property>
735+
-->
652736
```
737+
738+
If running tests against an endpoint that uses the URL format
739+
http[s]://[ip]:[port]/[account]/[filesystem] instead of
740+
http[s]://[account][domain-suffix]/[filesystem], please use the following:
741+
742+
```xml
743+
<property>
744+
<name>fs.azure.abfs.endpoint</name>
745+
<value>{IP}:{PORT}</value>
746+
</property>
747+
```

0 commit comments

Comments
 (0)