151151import static org .apache .hadoop .fs .azurebfs .constants .AbfsHttpConstants .CHAR_STAR ;
152152import static org .apache .hadoop .fs .azurebfs .constants .AbfsHttpConstants .CHAR_UNDERSCORE ;
153153import static org .apache .hadoop .fs .azurebfs .constants .AbfsHttpConstants .DIRECTORY ;
154+ import static org .apache .hadoop .fs .azurebfs .constants .AbfsHttpConstants .EMPTY_STRING ;
154155import static org .apache .hadoop .fs .azurebfs .constants .AbfsHttpConstants .FILE ;
155156import static org .apache .hadoop .fs .azurebfs .constants .AbfsHttpConstants .ROOT_PATH ;
156157import static org .apache .hadoop .fs .azurebfs .constants .AbfsHttpConstants .SINGLE_WHITE_SPACE ;
162163import static org .apache .hadoop .fs .azurebfs .constants .FileSystemConfigurations .INFINITE_LEASE_DURATION ;
163164import static org .apache .hadoop .fs .azurebfs .constants .FileSystemUriSchemes .ABFS_BLOB_DOMAIN_NAME ;
164165import static org .apache .hadoop .fs .azurebfs .constants .HttpHeaderConfigurations .X_MS_ENCRYPTION_CONTEXT ;
166+ import static org .apache .hadoop .fs .azurebfs .services .AbfsErrors .ERR_OPENFILE_ON_DIRECTORY ;
165167import static org .apache .hadoop .fs .azurebfs .utils .UriUtils .isKeyForDirectorySet ;
166168
167169/**
@@ -553,7 +555,7 @@ public Hashtable<String, String> getPathStatus(final Path path,
553555
554556 /**
555557 * Creates an object of {@link ContextEncryptionAdapter}
556- * from a file path. It calls {@link org.apache.hadoop.fs.azurebfs.services.AbfsClient
558+ * from a file path. It calls {@link org.apache.hadoop.fs.azurebfs.services.AbfsClient
557559 * #getPathStatus(String, boolean, TracingContext, EncryptionAdapter)} method to get
558560 * contextValue (x-ms-encryption-context) from the server. The contextValue is passed
559561 * to the constructor of EncryptionAdapter to create the required object of
@@ -866,6 +868,53 @@ public AbfsInputStream openFileForRead(final Path path,
866868 tracingContext );
867869 }
868870
871+ /**
872+ * Creates an exception indicating that openFileForRead was called on a directory.
873+ *
874+ * @return AbfsRestOperationException with PATH_NOT_FOUND error code and a message
875+ * indicating that openFileForRead must be used with files and not directories.
876+ */
877+ private AbfsRestOperationException openFileForReadDirectoryException () {
878+ return new AbfsRestOperationException (
879+ AzureServiceErrorCode .PATH_NOT_FOUND .getStatusCode (),
880+ AzureServiceErrorCode .PATH_NOT_FOUND .getErrorCode (),
881+ ERR_OPENFILE_ON_DIRECTORY ,
882+ null );
883+ }
884+
885+ /**
886+ * Opens a file for read and returns an {@link AbfsInputStream}.
887+ *
888+ * <p>
889+ * The method decides whether to call the server's GetPathStatus based on:
890+ * <ul>
891+ * <li>the supplied {@code parameters} (if it contains a {@link VersionedFileStatus}
892+ * with a valid encryption context when required),</li>
893+ * <li>the client's encryption type ({@link EncryptionType#ENCRYPTION_CONTEXT}), and</li>
894+ * <li>the configuration flag returned by {@link AbfsConfiguration#shouldRestrictGpsOnOpenFile()}.</li>
895+ * </ul>
896+ * If the encryption type is {@code ENCRYPTION_CONTEXT} the server-supplied
897+ * X-MS-ENCRYPTION-CONTEXT header will be required and used to construct a
898+ * {@link ContextProviderEncryptionAdapter}. If that header is missing a
899+ * {@link PathIOException} is thrown.
900+ * </p>
901+ *
902+ * <p>
903+ * Note: when {@link AbfsConfiguration#shouldRestrictGpsOnOpenFile()} is enabled,
904+ * the implementation won't do the GetPathStatus call. In that case, if the file does not
905+ * actually exist or read is attempted on a directory, {@code openFileForRead} will not fail immediately.
906+ * It will only be detected when the returned stream performs its first read, at which point an appropriate error will be raised.
907+ * </p>
908+ *
909+ * @param path the path to open (may be unqualified)
910+ * @param parameters optional {@link OpenFileParameters} that may include a {@link FileStatus}
911+ * (possibly a {@link VersionedFileStatus}) and other open parameters
912+ * @param statistics filesystem statistics to associate with the returned stream
913+ * @param tracingContext tracing context for remote calls
914+ * @return an {@link AbfsInputStream} for reading the file
915+ * @throws IOException on IO or server errors. A {@link PathIOException} is thrown when
916+ * an expected encryption context header is missing.
917+ */
869918 public AbfsInputStream openFileForRead (Path path ,
870919 final Optional <OpenFileParameters > parameters ,
871920 final FileSystem .Statistics statistics , TracingContext tracingContext )
@@ -878,13 +927,13 @@ public AbfsInputStream openFileForRead(Path path,
878927 FileStatus fileStatus = parameters .map (OpenFileParameters ::getStatus )
879928 .orElse (null );
880929 String relativePath = getRelativePath (path );
881- String resourceType , eTag ;
882- long contentLength ;
930+ String resourceType = EMPTY_STRING , eTag = EMPTY_STRING ;
931+ long contentLength = 0 ;
883932 ContextEncryptionAdapter contextEncryptionAdapter = NoContextEncryptionAdapter .getInstance ();
884933 /*
885934 * GetPathStatus API has to be called in case of:
886- * 1. fileStatus is null or not an object of VersionedFileStatus: as eTag
887- * would not be there in the fileStatus object.
935+ * 1. restrictGpsOnOpenFile config is disabled AND fileStatus is null or not
936+ * an object of VersionedFileStatus: as eTag would not be there in the fileStatus object.
888937 * 2. fileStatus is an object of VersionedFileStatus and the object doesn't
889938 * have encryptionContext field when client's encryptionType is
890939 * ENCRYPTION_CONTEXT.
@@ -908,19 +957,23 @@ public AbfsInputStream openFileForRead(Path path,
908957 getClient ().getEncryptionContextProvider (), getRelativePath (path ),
909958 encryptionContext .getBytes (StandardCharsets .UTF_8 ));
910959 }
911- } else {
960+ }
961+ /*
962+ * If file created with ENCRYPTION_CONTEXT, irrespective of whether isRestrictGpsOnOpenFile config is enabled or not,
963+ * GetPathStatus API has to be called to get the encryptionContext from the response header
964+ */
965+ else if (getClient ().getEncryptionType () == EncryptionType .ENCRYPTION_CONTEXT
966+ || !getAbfsConfiguration ().shouldRestrictGpsOnOpenFile ()) {
967+
912968 AbfsHttpOperation op = getClient ().getPathStatus (relativePath , false ,
913- tracingContext , null ).getResult ();
914- resourceType = getClient ().checkIsDir (op ) ? DIRECTORY : FILE ;
915- contentLength = extractContentLength (op );
916- eTag = op .getResponseHeader (HttpHeaderConfigurations .ETAG );
969+ tracingContext , null ).getResult ();
917970 /*
918971 * For file created with ENCRYPTION_CONTEXT, client shall receive
919972 * encryptionContext from header field: X_MS_ENCRYPTION_CONTEXT.
920973 */
921974 if (getClient ().getEncryptionType () == EncryptionType .ENCRYPTION_CONTEXT ) {
922975 final String fileEncryptionContext = op .getResponseHeader (
923- HttpHeaderConfigurations .X_MS_ENCRYPTION_CONTEXT );
976+ HttpHeaderConfigurations .X_MS_ENCRYPTION_CONTEXT );
924977 if (fileEncryptionContext == null ) {
925978 LOG .debug ("EncryptionContext missing in GetPathStatus response" );
926979 throw new PathIOException (path .toString (),
@@ -930,14 +983,20 @@ public AbfsInputStream openFileForRead(Path path,
930983 getClient ().getEncryptionContextProvider (), getRelativePath (path ),
931984 fileEncryptionContext .getBytes (StandardCharsets .UTF_8 ));
932985 }
986+ resourceType = getClient ().checkIsDir (op ) ? DIRECTORY : FILE ;
987+ contentLength = extractContentLength (op );
988+ eTag = op .getResponseHeader (HttpHeaderConfigurations .ETAG );
989+ }
990+ /* The only remaining case is:
991+ * - restrictGpsOnOpenFile config is enabled with null/wrong FileStatus and encryptionType not as ENCRYPTION_CONTEXT
992+ * In this case, we don't need to call GetPathStatus API.
993+ */
994+ else {
995+ // do nothing
933996 }
934997
935998 if (parseIsDirectory (resourceType )) {
936- throw new AbfsRestOperationException (
937- AzureServiceErrorCode .PATH_NOT_FOUND .getStatusCode (),
938- AzureServiceErrorCode .PATH_NOT_FOUND .getErrorCode (),
939- "openFileForRead must be used with files and not directories" ,
940- null );
999+ throw openFileForReadDirectoryException ();
9411000 }
9421001
9431002 perfInfo .registerSuccess (true );
@@ -1003,6 +1062,7 @@ AZURE_FOOTER_READ_BUFFER_SIZE, getAbfsConfiguration().getFooterReadBufferSize())
10031062 .withStreamStatistics (new AbfsInputStreamStatisticsImpl ())
10041063 .withShouldReadBufferSizeAlways (getAbfsConfiguration ().shouldReadBufferSizeAlways ())
10051064 .withReadAheadBlockSize (getAbfsConfiguration ().getReadAheadBlockSize ())
1065+ .shouldRestrictGpsOnOpenFile (getAbfsConfiguration ().shouldRestrictGpsOnOpenFile ())
10061066 .withBufferedPreadDisabled (bufferedPreadDisabled )
10071067 .withEncryptionAdapter (contextEncryptionAdapter )
10081068 .withAbfsBackRef (fsBackRef )
@@ -1855,7 +1915,7 @@ private AbfsClientContext populateAbfsClientContext() {
18551915 .build ();
18561916 }
18571917
1858- public String getRelativePath (final Path path ) {
1918+ public static String getRelativePath (final Path path ) {
18591919 Preconditions .checkNotNull (path , "path" );
18601920 String relPath = path .toUri ().getPath ();
18611921 if (relPath .isEmpty ()) {
0 commit comments