Skip to content

Commit 7e67358

Browse files
authored
HADOOP-19207: [ABFS][FNSOverBlob] Response Handling of Blob Endpoint APIs and Metadata APIs (#7210)
Contributed by Anuj Modi
1 parent d5b836b commit 7e67358

32 files changed

+2351
-421
lines changed

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java

+41-4
Original file line numberDiff line numberDiff line change
@@ -416,6 +416,14 @@ public class AbfsConfiguration{
416416
private String clientProvidedEncryptionKey;
417417
private String clientProvidedEncryptionKeySHA;
418418

419+
/**
420+
* Constructor for AbfsConfiguration for specified service type.
421+
* @param rawConfig used to initialize the configuration.
422+
* @param accountName the name of the azure storage account.
423+
* @param fsConfiguredServiceType service type configured for the file system.
424+
* @throws IllegalAccessException if the field is not accessible.
425+
* @throws IOException if an I/O error occurs.
426+
*/
419427
public AbfsConfiguration(final Configuration rawConfig,
420428
String accountName,
421429
AbfsServiceType fsConfiguredServiceType)
@@ -445,6 +453,13 @@ public AbfsConfiguration(final Configuration rawConfig,
445453
}
446454
}
447455

456+
/**
457+
* Constructor for AbfsConfiguration for default service type i.e. DFS.
458+
* @param rawConfig used to initialize the configuration.
459+
* @param accountName the name of the azure storage account.
460+
* @throws IllegalAccessException if the field is not accessible.
461+
* @throws IOException if an I/O error occurs.
462+
*/
448463
public AbfsConfiguration(final Configuration rawConfig, String accountName)
449464
throws IllegalAccessException, IOException {
450465
this(rawConfig, accountName, AbfsServiceType.DFS);
@@ -470,7 +485,7 @@ public Trilean getIsNamespaceEnabledAccount() {
470485
* @return the service type.
471486
*/
472487
public AbfsServiceType getFsConfiguredServiceType() {
473-
return getEnum(FS_AZURE_FNS_ACCOUNT_SERVICE_TYPE, fsConfiguredServiceType);
488+
return getCaseInsensitiveEnum(FS_AZURE_FNS_ACCOUNT_SERVICE_TYPE, fsConfiguredServiceType);
474489
}
475490

476491
/**
@@ -479,7 +494,7 @@ public AbfsServiceType getFsConfiguredServiceType() {
479494
* @return the service type.
480495
*/
481496
public AbfsServiceType getConfiguredServiceTypeForFNSAccounts() {
482-
return getEnum(FS_AZURE_FNS_ACCOUNT_SERVICE_TYPE, null);
497+
return getCaseInsensitiveEnum(FS_AZURE_FNS_ACCOUNT_SERVICE_TYPE, null);
483498
}
484499

485500
/**
@@ -488,7 +503,7 @@ public AbfsServiceType getConfiguredServiceTypeForFNSAccounts() {
488503
* @return the service type.
489504
*/
490505
public AbfsServiceType getIngressServiceType() {
491-
return getEnum(FS_AZURE_INGRESS_SERVICE_TYPE, getFsConfiguredServiceType());
506+
return getCaseInsensitiveEnum(FS_AZURE_INGRESS_SERVICE_TYPE, getFsConfiguredServiceType());
492507
}
493508

494509
/**
@@ -515,7 +530,7 @@ public void validateConfiguredServiceType(boolean isHNSEnabled)
515530
}
516531
if (isHNSEnabled && getConfiguredServiceTypeForFNSAccounts() == AbfsServiceType.BLOB) {
517532
throw new InvalidConfigurationValueException(
518-
FS_AZURE_FNS_ACCOUNT_SERVICE_TYPE, "Cannot be BLOB for HNS Account");
533+
FS_AZURE_FNS_ACCOUNT_SERVICE_TYPE, "Service Type Cannot be BLOB for HNS Account");
519534
} else if (isHNSEnabled && fsConfiguredServiceType == AbfsServiceType.BLOB) {
520535
throw new InvalidConfigurationValueException(FS_DEFAULT_NAME_KEY,
521536
"Blob Endpoint Url Cannot be used to initialize filesystem for HNS Account");
@@ -712,6 +727,28 @@ public <T extends Enum<T>> T getEnum(String name, T defaultValue) {
712727
rawConfig.getEnum(name, defaultValue));
713728
}
714729

730+
/**
731+
* Returns the account-specific enum value if it exists, then
732+
* looks for an account-agnostic value in case-insensitive manner.
733+
* @param name Account-agnostic configuration key
734+
* @param defaultValue Value returned if none is configured
735+
* @param <T> Enum type
736+
* @return enum value if one exists, else null
737+
*/
738+
public <T extends Enum<T>> T getCaseInsensitiveEnum(String name, T defaultValue) {
739+
String configValue = getString(name, null);
740+
if (configValue != null) {
741+
for (T enumConstant : defaultValue.getDeclaringClass().getEnumConstants()) { // Step 3: Iterate over enum constants
742+
if (enumConstant.name().equalsIgnoreCase(configValue)) {
743+
return enumConstant;
744+
}
745+
}
746+
// No match found
747+
throw new IllegalArgumentException("No enum constant " + defaultValue.getDeclaringClass().getCanonicalName() + "." + configValue);
748+
}
749+
return defaultValue;
750+
}
751+
715752
/**
716753
* Returns the account-agnostic enum value if it exists, else
717754
* return default.

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java

+29-23
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@
122122
import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ACCOUNT_IS_HNS_ENABLED;
123123
import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_BLOCK_UPLOAD_ACTIVE_BLOCKS;
124124
import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_BLOCK_UPLOAD_BUFFER_DIR;
125+
import static org.apache.hadoop.fs.azurebfs.constants.FSOperationType.CREATE_FILESYSTEM;
125126
import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.BLOCK_UPLOAD_ACTIVE_BLOCKS_DEFAULT;
126127
import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DATA_BLOCKS_BUFFER_DEFAULT;
127128
import static org.apache.hadoop.fs.azurebfs.constants.InternalConstants.CAPABILITY_SAFE_READAHEAD;
@@ -215,16 +216,16 @@ public void initialize(URI uri, Configuration configuration)
215216
tracingHeaderFormat = abfsConfiguration.getTracingHeaderFormat();
216217
this.setWorkingDirectory(this.getHomeDirectory());
217218

218-
TracingContext tracingContext = new TracingContext(clientCorrelationId,
219-
fileSystemId, FSOperationType.CREATE_FILESYSTEM, tracingHeaderFormat, listener);
219+
TracingContext initFSTracingContext = new TracingContext(clientCorrelationId,
220+
fileSystemId, FSOperationType.INIT, tracingHeaderFormat, listener);
220221

221222
/*
222223
* Validate the service type configured in the URI is valid for account type used.
223224
* HNS Account Cannot have Blob Endpoint URI.
224225
*/
225226
try {
226227
abfsConfiguration.validateConfiguredServiceType(
227-
tryGetIsNamespaceEnabled(new TracingContext(tracingContext)));
228+
tryGetIsNamespaceEnabled(initFSTracingContext));
228229
} catch (InvalidConfigurationValueException ex) {
229230
LOG.debug("File system configured with Invalid Service Type", ex);
230231
throw ex;
@@ -233,34 +234,39 @@ public void initialize(URI uri, Configuration configuration)
233234
throw new InvalidConfigurationValueException(FS_AZURE_ACCOUNT_IS_HNS_ENABLED, ex);
234235
}
235236

237+
/*
238+
* Non-hierarchical-namespace account can not have a customer-provided-key(CPK).
239+
* Fail initialization of filesystem if the configs are provided. CPK is of
240+
* two types: GLOBAL_KEY, and ENCRYPTION_CONTEXT.
241+
*/
242+
try {
243+
if ((isEncryptionContextCPK(abfsConfiguration) || isGlobalKeyCPK(
244+
abfsConfiguration)) && !tryGetIsNamespaceEnabled(new TracingContext(
245+
initFSTracingContext))) {
246+
throw new PathIOException(uri.getPath(),
247+
CPK_IN_NON_HNS_ACCOUNT_ERROR_MESSAGE);
248+
}
249+
} catch (InvalidConfigurationValueException ex) {
250+
LOG.debug("Non-Hierarchical Namespace Accounts Cannot Have CPK Enabled", ex);
251+
throw ex;
252+
} catch (AzureBlobFileSystemException ex) {
253+
LOG.debug("Failed to determine account type for service type validation", ex);
254+
throw new InvalidConfigurationValueException(FS_AZURE_ACCOUNT_IS_HNS_ENABLED, ex);
255+
}
256+
236257
// Create the file system if it does not exist.
237258
if (abfsConfiguration.getCreateRemoteFileSystemDuringInitialization()) {
238-
if (this.tryGetFileStatus(new Path(AbfsHttpConstants.ROOT_PATH), tracingContext) == null) {
259+
TracingContext createFSTracingContext = new TracingContext(initFSTracingContext);
260+
createFSTracingContext.setOperation(CREATE_FILESYSTEM);
261+
if (this.tryGetFileStatus(new Path(AbfsHttpConstants.ROOT_PATH), createFSTracingContext) == null) {
239262
try {
240-
this.createFileSystem(tracingContext);
263+
this.createFileSystem(createFSTracingContext);
241264
} catch (AzureBlobFileSystemException ex) {
242265
checkException(null, ex, AzureServiceErrorCode.FILE_SYSTEM_ALREADY_EXISTS);
243266
}
244267
}
245268
}
246269

247-
/*
248-
* Non-hierarchical-namespace account can not have a customer-provided-key(CPK).
249-
* Fail initialization of filesystem if the configs are provided. CPK is of
250-
* two types: GLOBAL_KEY, and ENCRYPTION_CONTEXT.
251-
*/
252-
if ((isEncryptionContextCPK(abfsConfiguration) || isGlobalKeyCPK(
253-
abfsConfiguration))
254-
&& !getIsNamespaceEnabled(new TracingContext(tracingContext))) {
255-
/*
256-
* Close the filesystem gracefully before throwing exception. Graceful close
257-
* will ensure that all resources are released properly.
258-
*/
259-
close();
260-
throw new PathIOException(uri.getPath(),
261-
CPK_IN_NON_HNS_ACCOUNT_ERROR_MESSAGE);
262-
}
263-
264270
LOG.trace("Initiate check for delegation token manager");
265271
if (UserGroupInformation.isSecurityEnabled()) {
266272
this.delegationTokenEnabled = abfsConfiguration.isDelegationTokenManagerEnabled();
@@ -700,7 +706,7 @@ private void incrementStatistic(AbfsStatistic statistic) {
700706
private void trailingPeriodCheck(Path path) throws IllegalArgumentException {
701707
while (!path.isRoot()) {
702708
String pathToString = path.toString();
703-
if (pathToString.length() != 0) {
709+
if (!pathToString.isEmpty()) {
704710
if (pathToString.charAt(pathToString.length() - 1) == '.') {
705711
throw new IllegalArgumentException(
706712
"ABFS does not allow files or directories to end with a dot.");

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java

+46-32
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,7 @@
150150
import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.CHAR_STAR;
151151
import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.CHAR_UNDERSCORE;
152152
import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.DIRECTORY;
153+
import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EMPTY_STRING;
153154
import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.FILE;
154155
import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.ROOT_PATH;
155156
import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.SINGLE_WHITE_SPACE;
@@ -343,11 +344,13 @@ public void close() throws IOException {
343344
}
344345

345346
byte[] encodeAttribute(String value) throws UnsupportedEncodingException {
346-
return value.getBytes(XMS_PROPERTIES_ENCODING);
347+
// DFS Client works with ISO_8859_1 encoding, Blob Works with UTF-8.
348+
return getClient().encodeAttribute(value);
347349
}
348350

349351
String decodeAttribute(byte[] value) throws UnsupportedEncodingException {
350-
return new String(value, XMS_PROPERTIES_ENCODING);
352+
// DFS Client works with ISO_8859_1 encoding, Blob Works with UTF-8.
353+
return getClient().decodeAttribute(value);
351354
}
352355

353356
private String[] authorityParts(URI uri) throws InvalidUriAuthorityException, InvalidUriException {
@@ -485,9 +488,8 @@ public Hashtable<String, String> getFilesystemProperties(
485488
.getFilesystemProperties(tracingContext);
486489
perfInfo.registerResult(op.getResult());
487490

488-
final String xMsProperties = op.getResult().getResponseHeader(HttpHeaderConfigurations.X_MS_PROPERTIES);
489-
490-
parsedXmsProperties = parseCommaSeparatedXmsProperties(xMsProperties);
491+
// Handling difference in request headers formats between DFS and Blob Clients.
492+
parsedXmsProperties = getClient().getXMSProperties(op.getResult());
491493
perfInfo.registerSuccess(true);
492494

493495
return parsedXmsProperties;
@@ -533,10 +535,8 @@ public Hashtable<String, String> getPathStatus(final Path path,
533535
perfInfo.registerResult(op.getResult());
534536
contextEncryptionAdapter.destroy();
535537

536-
final String xMsProperties = op.getResult().getResponseHeader(HttpHeaderConfigurations.X_MS_PROPERTIES);
537-
538-
parsedXmsProperties = parseCommaSeparatedXmsProperties(xMsProperties);
539-
538+
// Handling difference in request headers formats between DFS and Blob Clients.
539+
parsedXmsProperties = getClient().getXMSProperties(op.getResult());
540540
perfInfo.registerSuccess(true);
541541

542542
return parsedXmsProperties;
@@ -899,10 +899,8 @@ public AbfsInputStream openFileForRead(Path path,
899899
} else {
900900
AbfsHttpOperation op = getClient().getPathStatus(relativePath, false,
901901
tracingContext, null).getResult();
902-
resourceType = op.getResponseHeader(
903-
HttpHeaderConfigurations.X_MS_RESOURCE_TYPE);
904-
contentLength = Long.parseLong(
905-
op.getResponseHeader(HttpHeaderConfigurations.CONTENT_LENGTH));
902+
resourceType = getClient().checkIsDir(op) ? DIRECTORY : FILE;
903+
contentLength = extractContentLength(op);
906904
eTag = op.getResponseHeader(HttpHeaderConfigurations.ETAG);
907905
/*
908906
* For file created with ENCRYPTION_CONTEXT, client shall receive
@@ -983,17 +981,15 @@ public OutputStream openFileForWrite(final Path path,
983981
.getPathStatus(relativePath, false, tracingContext, null);
984982
perfInfo.registerResult(op.getResult());
985983

986-
final String resourceType = op.getResult().getResponseHeader(HttpHeaderConfigurations.X_MS_RESOURCE_TYPE);
987-
final Long contentLength = Long.valueOf(op.getResult().getResponseHeader(HttpHeaderConfigurations.CONTENT_LENGTH));
988-
989-
if (parseIsDirectory(resourceType)) {
984+
if (getClient().checkIsDir(op.getResult())) {
990985
throw new AbfsRestOperationException(
991986
AzureServiceErrorCode.PATH_NOT_FOUND.getStatusCode(),
992987
AzureServiceErrorCode.PATH_NOT_FOUND.getErrorCode(),
993-
"openFileForRead must be used with files and not directories",
988+
"openFileForWrite must be used with files and not directories",
994989
null);
995990
}
996991

992+
final long contentLength = extractContentLength(op.getResult());
997993
final long offset = overwrite ? 0 : contentLength;
998994

999995
perfInfo.registerSuccess(true);
@@ -1180,8 +1176,8 @@ public FileStatus getFileStatus(final Path path,
11801176
contentLength = 0;
11811177
resourceIsDir = true;
11821178
} else {
1183-
contentLength = parseContentLength(result.getResponseHeader(HttpHeaderConfigurations.CONTENT_LENGTH));
1184-
resourceIsDir = parseIsDirectory(result.getResponseHeader(HttpHeaderConfigurations.X_MS_RESOURCE_TYPE));
1179+
contentLength = extractContentLength(result);
1180+
resourceIsDir = getClient().checkIsDir(result);
11851181
}
11861182

11871183
final String transformedOwner = identityTransformer.transformIdentityForGetRequest(
@@ -1256,10 +1252,16 @@ public String listStatus(final Path path, final String startFrom,
12561252
startFrom);
12571253

12581254
final String relativePath = getRelativePath(path);
1255+
AbfsClient listingClient = getClient();
12591256

12601257
if (continuation == null || continuation.isEmpty()) {
12611258
// generate continuation token if a valid startFrom is provided.
12621259
if (startFrom != null && !startFrom.isEmpty()) {
1260+
/*
1261+
* Blob Endpoint Does not support startFrom yet. Fallback to DFS Client.
1262+
* startFrom remains null for all HDFS APIs. This is only for internal use.
1263+
*/
1264+
listingClient = getClient(AbfsServiceType.DFS);
12631265
continuation = getIsNamespaceEnabled(tracingContext)
12641266
? generateContinuationTokenForXns(startFrom)
12651267
: generateContinuationTokenForNonXns(relativePath, startFrom);
@@ -1268,11 +1270,11 @@ public String listStatus(final Path path, final String startFrom,
12681270

12691271
do {
12701272
try (AbfsPerfInfo perfInfo = startTracking("listStatus", "listPath")) {
1271-
AbfsRestOperation op = getClient().listPath(relativePath, false,
1273+
AbfsRestOperation op = listingClient.listPath(relativePath, false,
12721274
abfsConfiguration.getListMaxResults(), continuation,
12731275
tracingContext);
12741276
perfInfo.registerResult(op.getResult());
1275-
continuation = op.getResult().getResponseHeader(HttpHeaderConfigurations.X_MS_CONTINUATION);
1277+
continuation = listingClient.getContinuationFromResponse(op.getResult());
12761278
ListResultSchema retrievedSchema = op.getResult().getListResultSchema();
12771279
if (retrievedSchema == null) {
12781280
throw new AbfsRestOperationException(
@@ -1465,7 +1467,7 @@ public void modifyAclEntries(final Path path, final List<AclEntry> aclSpec,
14651467
final AbfsRestOperation op = getClient()
14661468
.getAclStatus(relativePath, useUpn, tracingContext);
14671469
perfInfoGet.registerResult(op.getResult());
1468-
final String eTag = op.getResult().getResponseHeader(HttpHeaderConfigurations.ETAG);
1470+
final String eTag = extractEtagHeader(op.getResult());
14691471

14701472
final Map<String, String> aclEntries = AbfsAclHelper.deserializeAclSpec(op.getResult().getResponseHeader(HttpHeaderConfigurations.X_MS_ACL));
14711473

@@ -1508,7 +1510,7 @@ public void removeAclEntries(final Path path, final List<AclEntry> aclSpec,
15081510
final AbfsRestOperation op = getClient()
15091511
.getAclStatus(relativePath, isUpnFormat, tracingContext);
15101512
perfInfoGet.registerResult(op.getResult());
1511-
final String eTag = op.getResult().getResponseHeader(HttpHeaderConfigurations.ETAG);
1513+
final String eTag = extractEtagHeader(op.getResult());
15121514

15131515
final Map<String, String> aclEntries = AbfsAclHelper.deserializeAclSpec(op.getResult().getResponseHeader(HttpHeaderConfigurations.X_MS_ACL));
15141516

@@ -1546,7 +1548,7 @@ public void removeDefaultAcl(final Path path, TracingContext tracingContext)
15461548
final AbfsRestOperation op = getClient()
15471549
.getAclStatus(relativePath, tracingContext);
15481550
perfInfoGet.registerResult(op.getResult());
1549-
final String eTag = op.getResult().getResponseHeader(HttpHeaderConfigurations.ETAG);
1551+
final String eTag = extractEtagHeader(op.getResult());
15501552
final Map<String, String> aclEntries = AbfsAclHelper.deserializeAclSpec(op.getResult().getResponseHeader(HttpHeaderConfigurations.X_MS_ACL));
15511553
final Map<String, String> defaultAclEntries = new HashMap<>();
15521554

@@ -1590,7 +1592,7 @@ public void removeAcl(final Path path, TracingContext tracingContext)
15901592
final AbfsRestOperation op = getClient()
15911593
.getAclStatus(relativePath, tracingContext);
15921594
perfInfoGet.registerResult(op.getResult());
1593-
final String eTag = op.getResult().getResponseHeader(HttpHeaderConfigurations.ETAG);
1595+
final String eTag = extractEtagHeader(op.getResult());
15941596

15951597
final Map<String, String> aclEntries = AbfsAclHelper.deserializeAclSpec(op.getResult().getResponseHeader(HttpHeaderConfigurations.X_MS_ACL));
15961598
final Map<String, String> newAclEntries = new HashMap<>();
@@ -1636,7 +1638,7 @@ public void setAcl(final Path path, final List<AclEntry> aclSpec,
16361638
final AbfsRestOperation op = getClient()
16371639
.getAclStatus(relativePath, isUpnFormat, tracingContext);
16381640
perfInfoGet.registerResult(op.getResult());
1639-
final String eTag = op.getResult().getResponseHeader(HttpHeaderConfigurations.ETAG);
1641+
final String eTag = extractEtagHeader(op.getResult());
16401642

16411643
final Map<String, String> getAclEntries = AbfsAclHelper.deserializeAclSpec(op.getResult().getResponseHeader(HttpHeaderConfigurations.X_MS_ACL));
16421644

@@ -1859,12 +1861,24 @@ public String getRelativePath(final Path path) {
18591861
return relPath;
18601862
}
18611863

1862-
private long parseContentLength(final String contentLength) {
1863-
if (contentLength == null) {
1864-
return -1;
1864+
/**
1865+
* Extracts the content length from the HTTP operation's response headers.
1866+
*
1867+
* @param op The AbfsHttpOperation instance from which to extract the content length.
1868+
* This operation contains the HTTP response headers.
1869+
* @return The content length as a long value. If the Content-Length header is
1870+
* not present or is empty, returns 0.
1871+
*/
1872+
private long extractContentLength(AbfsHttpOperation op) {
1873+
long contentLength;
1874+
String contentLengthHeader = op.getResponseHeader(
1875+
HttpHeaderConfigurations.CONTENT_LENGTH);
1876+
if (!contentLengthHeader.equals(EMPTY_STRING)) {
1877+
contentLength = Long.parseLong(contentLengthHeader);
1878+
} else {
1879+
contentLength = 0;
18651880
}
1866-
1867-
return Long.parseLong(contentLength);
1881+
return contentLength;
18681882
}
18691883

18701884
private boolean parseIsDirectory(final String resourceType) {

0 commit comments

Comments
 (0)