Skip to content

Commit c7e1b66

Browse files
HADOOP-19232: [ABFS][FNSOverBlob] Implementing Ingress Support with various Fallback Handling (#7272)
Contributed by Anmol Asrani
1 parent a592666 commit c7e1b66

File tree

65 files changed

+6429
-376
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

65 files changed

+6429
-376
lines changed

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java

+14
Original file line numberDiff line numberDiff line change
@@ -1462,6 +1462,20 @@ void setIsNamespaceEnabledAccount(String isNamespaceEnabledAccount) {
14621462
this.isNamespaceEnabledAccount = isNamespaceEnabledAccount;
14631463
}
14641464

1465+
/**
1466+
* Checks if the FixedSASTokenProvider is configured for the current account.
1467+
*
1468+
* @return true if the FixedSASTokenProvider is configured, false otherwise.
1469+
*/
1470+
public boolean isFixedSASTokenProviderConfigured() {
1471+
try {
1472+
return getSASTokenProvider() instanceof FixedSASTokenProvider;
1473+
} catch (AzureBlobFileSystemException e) {
1474+
LOG.debug("Failed to get SAS token provider", e);
1475+
return false;
1476+
}
1477+
}
1478+
14651479
private String getTrimmedPasswordString(String key, String defaultValue) throws IOException {
14661480
String value = getPasswordString(key);
14671481
if (StringUtils.isBlank(value)) {

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java

+28-1
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
import org.apache.commons.lang3.StringUtils;
4747
import org.apache.hadoop.classification.VisibleForTesting;
4848
import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidConfigurationValueException;
49+
import org.apache.hadoop.fs.azurebfs.services.AuthType;
4950
import org.apache.hadoop.fs.impl.BackReference;
5051
import org.apache.hadoop.security.ProviderUtils;
5152
import org.apache.hadoop.util.Preconditions;
@@ -122,11 +123,13 @@
122123
import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ACCOUNT_IS_HNS_ENABLED;
123124
import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_BLOCK_UPLOAD_ACTIVE_BLOCKS;
124125
import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_BLOCK_UPLOAD_BUFFER_DIR;
126+
import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_SAS_FIXED_TOKEN;
125127
import static org.apache.hadoop.fs.azurebfs.constants.FSOperationType.CREATE_FILESYSTEM;
126128
import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.BLOCK_UPLOAD_ACTIVE_BLOCKS_DEFAULT;
127129
import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DATA_BLOCKS_BUFFER_DEFAULT;
128130
import static org.apache.hadoop.fs.azurebfs.constants.InternalConstants.CAPABILITY_SAFE_READAHEAD;
129131
import static org.apache.hadoop.fs.azurebfs.services.AbfsErrors.ERR_CREATE_ON_ROOT;
132+
import static org.apache.hadoop.fs.azurebfs.services.AbfsErrors.UNAUTHORIZED_SAS;
130133
import static org.apache.hadoop.fs.impl.PathCapabilitiesSupport.validatePathCapabilityArgs;
131134
import static org.apache.hadoop.fs.statistics.IOStatisticsLogging.logIOStatisticsAtLevel;
132135
import static org.apache.hadoop.util.functional.RemoteIterators.filteringRemoteIterator;
@@ -234,6 +237,29 @@ public void initialize(URI uri, Configuration configuration)
234237
throw new InvalidConfigurationValueException(FS_AZURE_ACCOUNT_IS_HNS_ENABLED, ex);
235238
}
236239

240+
/*
241+
* Validates if the correct SAS Token provider is configured for non-HNS accounts.
242+
* For non-HNS accounts, if the authentication type is set to SAS, only a fixed SAS Token is supported as of now.
243+
* A custom SAS Token Provider should not be configured in such cases, as it will override the FixedSASTokenProvider and render it unused.
244+
* If the namespace is not enabled and the FixedSASTokenProvider is not configured,
245+
* an InvalidConfigurationValueException will be thrown.
246+
*
247+
* @throws InvalidConfigurationValueException if account is not namespace enabled and FixedSASTokenProvider is not configured.
248+
*/
249+
try {
250+
if (abfsConfiguration.getAuthType(abfsConfiguration.getAccountName()) == AuthType.SAS && // Auth type is SAS
251+
!tryGetIsNamespaceEnabled(new TracingContext(initFSTracingContext)) && // Account is FNS
252+
!abfsConfiguration.isFixedSASTokenProviderConfigured()) { // Fixed SAS Token Provider is not configured
253+
throw new InvalidConfigurationValueException(FS_AZURE_SAS_FIXED_TOKEN, UNAUTHORIZED_SAS);
254+
}
255+
} catch (InvalidConfigurationValueException ex) {
256+
LOG.error("File system configured with Invalid SAS Token Provider for FNS Accounts", ex);
257+
throw ex;
258+
} catch (AzureBlobFileSystemException ex) {
259+
LOG.error("Failed to determine account type for auth type validation", ex);
260+
throw new InvalidConfigurationValueException(FS_AZURE_ACCOUNT_IS_HNS_ENABLED, ex);
261+
}
262+
237263
/*
238264
* Non-hierarchical-namespace account can not have a customer-provided-key(CPK).
239265
* Fail initialization of filesystem if the configs are provided. CPK is of
@@ -266,6 +292,7 @@ public void initialize(URI uri, Configuration configuration)
266292
}
267293
}
268294
}
295+
getAbfsStore().updateClientWithNamespaceInfo(new TracingContext(initFSTracingContext));
269296

270297
LOG.trace("Initiate check for delegation token manager");
271298
if (UserGroupInformation.isSecurityEnabled()) {
@@ -797,7 +824,7 @@ private FileStatus getFileStatus(final Path path,
797824
Path qualifiedPath = makeQualified(path);
798825

799826
try {
800-
return abfsStore.getFileStatus(qualifiedPath, tracingContext);
827+
return getAbfsStore().getFileStatus(qualifiedPath, tracingContext);
801828
} catch (AzureBlobFileSystemException ex) {
802829
checkException(path, ex);
803830
return null;

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java

+56-24
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,18 @@ public AzureBlobFileSystemStore(
293293
"abfs-bounded");
294294
}
295295

296+
/**
297+
* Updates the client with the namespace information.
298+
*
299+
* @param tracingContext the tracing context to be used for the operation
300+
* @throws AzureBlobFileSystemException if an error occurs while updating the client
301+
*/
302+
public void updateClientWithNamespaceInfo(TracingContext tracingContext)
303+
throws AzureBlobFileSystemException {
304+
boolean isNamespaceEnabled = getIsNamespaceEnabled(tracingContext);
305+
AbfsClient.setIsNamespaceEnabled(isNamespaceEnabled);
306+
}
307+
296308
/**
297309
* Checks if the given key in Azure Storage should be stored as a page
298310
* blob instead of block blob.
@@ -635,14 +647,15 @@ public OutputStream createFile(final Path path,
635647
final FsPermission permission, final FsPermission umask,
636648
TracingContext tracingContext) throws IOException {
637649
try (AbfsPerfInfo perfInfo = startTracking("createFile", "createPath")) {
650+
AbfsClient createClient = getClientHandler().getIngressClient();
638651
boolean isNamespaceEnabled = getIsNamespaceEnabled(tracingContext);
639652
LOG.debug("createFile filesystem: {} path: {} overwrite: {} permission: {} umask: {} isNamespaceEnabled: {}",
640-
getClient().getFileSystem(),
641-
path,
642-
overwrite,
643-
permission,
644-
umask,
645-
isNamespaceEnabled);
653+
createClient.getFileSystem(),
654+
path,
655+
overwrite,
656+
permission,
657+
umask,
658+
isNamespaceEnabled);
646659

647660
String relativePath = getRelativePath(path);
648661
boolean isAppendBlob = false;
@@ -660,9 +673,9 @@ public OutputStream createFile(final Path path,
660673
}
661674

662675
final ContextEncryptionAdapter contextEncryptionAdapter;
663-
if (getClient().getEncryptionType() == EncryptionType.ENCRYPTION_CONTEXT) {
676+
if (createClient.getEncryptionType() == EncryptionType.ENCRYPTION_CONTEXT) {
664677
contextEncryptionAdapter = new ContextProviderEncryptionAdapter(
665-
getClient().getEncryptionContextProvider(), getRelativePath(path));
678+
createClient.getEncryptionContextProvider(), getRelativePath(path));
666679
} else {
667680
contextEncryptionAdapter = NoContextEncryptionAdapter.getInstance();
668681
}
@@ -677,7 +690,7 @@ public OutputStream createFile(final Path path,
677690
);
678691

679692
} else {
680-
op = getClient().createPath(relativePath, true,
693+
op = createClient.createPath(relativePath, true,
681694
overwrite,
682695
new Permissions(isNamespaceEnabled, permission, umask),
683696
isAppendBlob,
@@ -689,15 +702,16 @@ public OutputStream createFile(final Path path,
689702
perfInfo.registerResult(op.getResult()).registerSuccess(true);
690703

691704
AbfsLease lease = maybeCreateLease(relativePath, tracingContext);
692-
705+
String eTag = extractEtagHeader(op.getResult());
693706
return new AbfsOutputStream(
694707
populateAbfsOutputStreamContext(
695708
isAppendBlob,
696709
lease,
697-
getClient(),
710+
getClientHandler(),
698711
statistics,
699712
relativePath,
700713
0,
714+
eTag,
701715
contextEncryptionAdapter,
702716
tracingContext));
703717
}
@@ -720,12 +734,12 @@ private AbfsRestOperation conditionalCreateOverwriteFile(final String relativePa
720734
final ContextEncryptionAdapter contextEncryptionAdapter,
721735
final TracingContext tracingContext) throws IOException {
722736
AbfsRestOperation op;
723-
737+
AbfsClient createClient = getClientHandler().getIngressClient();
724738
try {
725739
// Trigger a create with overwrite=false first so that eTag fetch can be
726740
// avoided for cases when no pre-existing file is present (major portion
727741
// of create file traffic falls into the case of no pre-existing file).
728-
op = getClient().createPath(relativePath, true, false, permissions,
742+
op = createClient.createPath(relativePath, true, false, permissions,
729743
isAppendBlob, null, contextEncryptionAdapter, tracingContext);
730744

731745
} catch (AbfsRestOperationException e) {
@@ -745,12 +759,11 @@ private AbfsRestOperation conditionalCreateOverwriteFile(final String relativePa
745759
}
746760
}
747761

748-
String eTag = op.getResult()
749-
.getResponseHeader(HttpHeaderConfigurations.ETAG);
762+
String eTag = extractEtagHeader(op.getResult());
750763

751764
try {
752765
// overwrite only if eTag matches with the file properties fetched befpre
753-
op = getClient().createPath(relativePath, true, true, permissions,
766+
op = createClient.createPath(relativePath, true, true, permissions,
754767
isAppendBlob, eTag, contextEncryptionAdapter, tracingContext);
755768
} catch (AbfsRestOperationException ex) {
756769
if (ex.getStatusCode() == HttpURLConnection.HTTP_PRECON_FAILED) {
@@ -778,22 +791,24 @@ private AbfsRestOperation conditionalCreateOverwriteFile(final String relativePa
778791
*
779792
* @param isAppendBlob is Append blob support enabled?
780793
* @param lease instance of AbfsLease for this AbfsOutputStream.
781-
* @param client AbfsClient.
794+
* @param clientHandler AbfsClientHandler.
782795
* @param statistics FileSystem statistics.
783796
* @param path Path for AbfsOutputStream.
784797
* @param position Position or offset of the file being opened, set to 0
785798
* when creating a new file, but needs to be set for APPEND
786799
* calls on the same file.
800+
* @param eTag eTag of the file.
787801
* @param tracingContext instance of TracingContext for this AbfsOutputStream.
788802
* @return AbfsOutputStreamContext instance with the desired parameters.
789803
*/
790804
private AbfsOutputStreamContext populateAbfsOutputStreamContext(
791805
boolean isAppendBlob,
792806
AbfsLease lease,
793-
AbfsClient client,
807+
AbfsClientHandler clientHandler,
794808
FileSystem.Statistics statistics,
795809
String path,
796810
long position,
811+
String eTag,
797812
ContextEncryptionAdapter contextEncryptionAdapter,
798813
TracingContext tracingContext) {
799814
int bufferSize = abfsConfiguration.getWriteBufferSize();
@@ -814,24 +829,38 @@ private AbfsOutputStreamContext populateAbfsOutputStreamContext(
814829
.withEncryptionAdapter(contextEncryptionAdapter)
815830
.withBlockFactory(getBlockFactory())
816831
.withBlockOutputActiveBlocks(blockOutputActiveBlocks)
817-
.withClient(client)
832+
.withClientHandler(clientHandler)
818833
.withPosition(position)
819834
.withFsStatistics(statistics)
820835
.withPath(path)
821836
.withExecutorService(new SemaphoredDelegatingExecutor(boundedThreadPool,
822837
blockOutputActiveBlocks, true))
823838
.withTracingContext(tracingContext)
824839
.withAbfsBackRef(fsBackRef)
840+
.withIngressServiceType(abfsConfiguration.getIngressServiceType())
841+
.withDFSToBlobFallbackEnabled(abfsConfiguration.isDfsToBlobFallbackEnabled())
842+
.withETag(eTag)
825843
.build();
826844
}
827845

846+
/**
847+
* Creates a directory.
848+
*
849+
* @param path Path of the directory to create.
850+
* @param permission Permission of the directory.
851+
* @param umask Umask of the directory.
852+
* @param tracingContext tracing context
853+
*
854+
* @throws AzureBlobFileSystemException server error.
855+
*/
828856
public void createDirectory(final Path path, final FsPermission permission,
829857
final FsPermission umask, TracingContext tracingContext)
830858
throws IOException {
831859
try (AbfsPerfInfo perfInfo = startTracking("createDirectory", "createPath")) {
860+
AbfsClient createClient = getClientHandler().getIngressClient();
832861
boolean isNamespaceEnabled = getIsNamespaceEnabled(tracingContext);
833862
LOG.debug("createDirectory filesystem: {} path: {} permission: {} umask: {} isNamespaceEnabled: {}",
834-
getClient().getFileSystem(),
863+
createClient.getFileSystem(),
835864
path,
836865
permission,
837866
umask,
@@ -841,7 +870,7 @@ public void createDirectory(final Path path, final FsPermission permission,
841870
!isNamespaceEnabled || abfsConfiguration.isEnabledMkdirOverwrite();
842871
Permissions permissions = new Permissions(isNamespaceEnabled,
843872
permission, umask);
844-
final AbfsRestOperation op = getClient().createPath(getRelativePath(path),
873+
final AbfsRestOperation op = createClient.createPath(getRelativePath(path),
845874
false, overwrite, permissions, false, null, null, tracingContext);
846875
perfInfo.registerResult(op.getResult()).registerSuccess(true);
847876
}
@@ -976,6 +1005,7 @@ public OutputStream openFileForWrite(final Path path,
9761005
overwrite);
9771006

9781007
String relativePath = getRelativePath(path);
1008+
AbfsClient writeClient = getClientHandler().getIngressClient();
9791009

9801010
final AbfsRestOperation op = getClient()
9811011
.getPathStatus(relativePath, false, tracingContext, null);
@@ -1000,8 +1030,9 @@ public OutputStream openFileForWrite(final Path path,
10001030
}
10011031

10021032
AbfsLease lease = maybeCreateLease(relativePath, tracingContext);
1033+
final String eTag = extractEtagHeader(op.getResult());
10031034
final ContextEncryptionAdapter contextEncryptionAdapter;
1004-
if (getClient().getEncryptionType() == EncryptionType.ENCRYPTION_CONTEXT) {
1035+
if (writeClient.getEncryptionType() == EncryptionType.ENCRYPTION_CONTEXT) {
10051036
final String encryptionContext = op.getResult()
10061037
.getResponseHeader(
10071038
HttpHeaderConfigurations.X_MS_ENCRYPTION_CONTEXT);
@@ -1010,7 +1041,7 @@ public OutputStream openFileForWrite(final Path path,
10101041
"File doesn't have encryptionContext.");
10111042
}
10121043
contextEncryptionAdapter = new ContextProviderEncryptionAdapter(
1013-
getClient().getEncryptionContextProvider(), getRelativePath(path),
1044+
writeClient.getEncryptionContextProvider(), getRelativePath(path),
10141045
encryptionContext.getBytes(StandardCharsets.UTF_8));
10151046
} else {
10161047
contextEncryptionAdapter = NoContextEncryptionAdapter.getInstance();
@@ -1020,10 +1051,11 @@ public OutputStream openFileForWrite(final Path path,
10201051
populateAbfsOutputStreamContext(
10211052
isAppendBlob,
10221053
lease,
1023-
getClient(),
1054+
getClientHandler(),
10241055
statistics,
10251056
relativePath,
10261057
offset,
1058+
eTag,
10271059
contextEncryptionAdapter,
10281060
tracingContext));
10291061
}

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java

+31-1
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ public final class AbfsHttpConstants {
5151
public static final String DEFAULT_TIMEOUT = "90";
5252
public static final String APPEND_BLOB_TYPE = "appendblob";
5353
public static final String LIST = "list";
54+
public static final String BLOCK_BLOB_TYPE = "BlockBlob";
55+
public static final String APPEND_BLOCK = "appendblock";
5456

5557
//Abfs Http Client Constants for Blob Endpoint APIs.
5658

@@ -238,7 +240,7 @@ public static ApiVersion getCurrentVersion() {
238240
public static final String PUT_BLOCK_LIST = "PutBlockList";
239241

240242
/**
241-
* Value that differentiates categories of the http_status.
243+
* Value that differentiates categories of the HTTP status.
242244
* <pre>
243245
* 100 - 199 : Informational responses
244246
* 200 - 299 : Successful responses
@@ -249,6 +251,28 @@ public static ApiVersion getCurrentVersion() {
249251
*/
250252
public static final Integer HTTP_STATUS_CATEGORY_QUOTIENT = 100;
251253

254+
/**
255+
* XML version declaration for the block list.
256+
*/
257+
public static final String XML_VERSION = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>%n";
258+
259+
/**
260+
* Start tag for the block list XML.
261+
*/
262+
public static final String BLOCK_LIST_START_TAG = "<BlockList>%n";
263+
264+
/**
265+
* End tag for the block list XML.
266+
*/
267+
public static final String BLOCK_LIST_END_TAG = "</BlockList>%n";
268+
269+
/**
270+
* Format string for the latest block in the block list XML.
271+
* The placeholder will be replaced with the block identifier.
272+
*/
273+
public static final String LATEST_BLOCK_FORMAT = "<Latest>%s</Latest>%n";
274+
275+
252276
/**
253277
* List of configurations that are related to Customer-Provided-Keys.
254278
* <ol>
@@ -289,6 +313,12 @@ public static ApiVersion getCurrentVersion() {
289313
public static final String APACHE_IMPL = "Apache";
290314
public static final String JDK_FALLBACK = "JDK_fallback";
291315
public static final String KEEP_ALIVE_CACHE_CLOSED = "KeepAliveCache is closed";
316+
public static final String DFS_FLUSH = "D";
317+
public static final String DFS_APPEND = "D";
318+
public static final String BLOB_FLUSH = "B";
319+
public static final String BLOB_APPEND = "B";
320+
public static final String FALLBACK_FLUSH = "FB";
321+
public static final String FALLBACK_APPEND = "FB";
292322

293323
private AbfsHttpConstants() {}
294324
}

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java

+5
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,11 @@ public final class FileSystemConfigurations {
145145
*/
146146
public static final int BLOCK_UPLOAD_ACTIVE_BLOCKS_DEFAULT = 20;
147147

148+
/**
149+
* Length of the block ID used for appends.
150+
*/
151+
public static final int BLOCK_ID_LENGTH = 60;
152+
148153
/**
149154
* Buffer blocks to disk.
150155
* Capacity is limited to available disk space.

0 commit comments

Comments
 (0)