diff --git a/coverage/pom.xml b/coverage/pom.xml index 9d85c05416..0cc67f8bfc 100644 --- a/coverage/pom.xml +++ b/coverage/pom.xml @@ -21,13 +21,13 @@ com.google.cloud.bigdataoss bigdataoss-parent - 3.0.0-SNAPSHOT + 4.0.0-SNAPSHOT coverage coverage - 3.0.0-SNAPSHOT + 4.0.0-SNAPSHOT pom diff --git a/gcs/CHANGES.md b/gcs/CHANGES.md index 3f78dcdb9b..72ae46cbe5 100644 --- a/gcs/CHANGES.md +++ b/gcs/CHANGES.md @@ -1,6 +1,12 @@ # Release Notes ## Next + +1. Upgrade hadoop versionto `3.4.2` +2. Add bidi-support in connector + +1. Add listStatusStartingFrom API. + 1. Add AUTO_RANDOM as new fadvise mode. 1. Add getFileStatusWithHint() API diff --git a/gcs/dependency-reduced-pom.xml b/gcs/dependency-reduced-pom.xml index 0f79d349b2..ff6f3c74a7 100644 --- a/gcs/dependency-reduced-pom.xml +++ b/gcs/dependency-reduced-pom.xml @@ -1,30 +1,14 @@ - - bigdataoss-parent com.google.cloud.bigdataoss - 3.1.4-SNAPSHOT + 3.0.0-SNAPSHOT 4.0.0 gcs-connector gcs-connector - 3.1.4-SNAPSHOT + 3.0.0-SNAPSHOT An implementation of org.apache.hadoop.fs.FileSystem targeting Google Cloud Storage @@ -103,7 +87,6 @@ io.grpc io.opencensus io.opentelemetry - io.opentelemetry.api io.opentelemetry.contrib io.opentelemetry.semconv io.perfmark @@ -225,18 +208,18 @@ org.apache.hadoop hadoop-client-api - 3.3.6 + 3.4.2 provided org.apache.hadoop hadoop-common - 3.3.6 + 3.4.2 tests test - hadoop-shaded-protobuf_3_7 + hadoop-shaded-protobuf_3_25 org.apache.hadoop.thirdparty @@ -264,8 +247,8 @@ commons-net - commons-collections - commons-collections + commons-collections4 + org.apache.commons javax.servlet-api @@ -307,6 +290,10 @@ jersey-json com.github.pjfanning + + jettison + org.codehaus.jettison + jersey-server com.sun.jersey @@ -315,10 +302,6 @@ reload4j ch.qos.reload4j - - commons-beanutils - commons-beanutils - commons-configuration2 org.apache.commons @@ -359,6 +342,14 @@ zookeeper org.apache.zookeeper + + netty-handler + io.netty + + + netty-transport-native-epoll + io.netty + metrics-core io.dropwizard.metrics @@ -367,6 +358,10 @@ commons-compress org.apache.commons + + bcprov-jdk18on + org.bouncycastle + kerb-core org.apache.kerby @@ -392,7 +387,7 @@ com.google.cloud.bigdataoss gcsio - 3.1.4-SNAPSHOT + 3.0.0-SNAPSHOT test-jar test @@ -429,7 +424,7 @@ org.apache.hadoop hadoop-client-runtime - 3.3.6 + 3.4.2 test diff --git a/gcs/pom.xml b/gcs/pom.xml index 3bcd15712f..a2669af0fe 100644 --- a/gcs/pom.xml +++ b/gcs/pom.xml @@ -21,7 +21,7 @@ com.google.cloud.bigdataoss bigdataoss-parent - 3.0.0-SNAPSHOT + 4.0.0-SNAPSHOT ../pom.xml @@ -31,7 +31,7 @@ gcs-connector - 3.0.0-SNAPSHOT + 4.0.0-SNAPSHOT diff --git a/gcs/src/main/java/com/google/cloud/hadoop/fs/gcs/GhfsThreadLocalStatistics.java b/gcs/src/main/java/com/google/cloud/hadoop/fs/gcs/GhfsThreadLocalStatistics.java index 7120c4e763..8597626024 100644 --- a/gcs/src/main/java/com/google/cloud/hadoop/fs/gcs/GhfsThreadLocalStatistics.java +++ b/gcs/src/main/java/com/google/cloud/hadoop/fs/gcs/GhfsThreadLocalStatistics.java @@ -17,6 +17,8 @@ package com.google.cloud.hadoop.fs.gcs; import static com.google.cloud.hadoop.fs.gcs.GhfsStatistic.GCS_CONNECTOR_TIME; +import static com.google.cloud.hadoop.fs.gcs.GhfsStatistic.STREAM_READ_VECTORED_OPERATIONS; +import static com.google.cloud.hadoop.fs.gcs.GhfsStatistic.STREAM_READ_VECTORED_READ_COMBINED_RANGES; import com.google.cloud.hadoop.gcsio.GoogleCloudStorageStatistics; import java.util.Arrays; @@ -58,6 +60,10 @@ public void reset() { void increment(GhfsStatistic statistic, long count) { if (statistic == GCS_CONNECTOR_TIME) { Metric.HADOOP_API_TIME.increment(count); + } else if (statistic == STREAM_READ_VECTORED_OPERATIONS) { + Metric.STREAM_READ_VECTORED_COUNT.increment(count); + } else if (statistic == STREAM_READ_VECTORED_READ_COMBINED_RANGES) { + Metric.STREAM_READ_VECTORED_RANGE_COUNT.increment(count); } else if (statistic.getIsHadoopApi()) { Metric.HADOOP_API_COUNT.increment(count); } @@ -104,7 +110,9 @@ private enum Metric { GCS_API_COUNT("gcsApiCount"), GCS_API_TIME("gcsApiTime"), BACKOFF_COUNT("backoffCount"), - BACKOFF_TIME("backoffTime"); + BACKOFF_TIME("backoffTime"), + STREAM_READ_VECTORED_COUNT("readVectoredCount"), + STREAM_READ_VECTORED_RANGE_COUNT("readVectoredRangeCount"); private final String metricName; private final ThreadLocalValue metricValue; diff --git a/gcs/src/main/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFileSystem.java b/gcs/src/main/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFileSystem.java index 2d7b5d5a84..b223ea0c51 100644 --- a/gcs/src/main/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFileSystem.java +++ b/gcs/src/main/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFileSystem.java @@ -878,6 +878,40 @@ public FileStatus[] listStatus(Path hadoopPath) throws IOException { }); } + /** + * Gets FileStatus of all files which are lexicographically greater than and equal the provided + * path. It filters out any directory objects present in underneath storage. + * + *

This is an experimental API can change without notice. + */ + public FileStatus[] listStatusStartingFrom(Path startFrom) throws IOException { + return trackDurationWithTracing( + instrumentation, + globalStorageStatistics, + GhfsStatistic.INVOCATION_LIST_STATUS, + startFrom, + traceFactory, + () -> { + checkArgument(startFrom != null, "start offset path must not be null"); + + checkOpen(); + + logger.atFiner().log("listStatusStartingFrom(hadoopPath: %s)", startFrom); + + URI gcsPath = getGcsPath(startFrom); + List status; + + List fileInfos = getGcsFs().listFileInfoStartingFrom(gcsPath, LIST_OPTIONS); + status = new ArrayList<>(fileInfos.size()); + String userName = getUgiUserName(); + for (FileInfo fileInfo : fileInfos) { + status.add(getGoogleHadoopFileStatus(fileInfo, userName)); + } + incrementStatistic(GhfsStatistic.INVOCATION_LIST_STATUS_RESULT_SIZE, status.size()); + return status.toArray(new FileStatus[0]); + }); + } + @Override public boolean mkdirs(Path hadoopPath, FsPermission permission) throws IOException { return trackDurationWithTracing( @@ -944,7 +978,7 @@ public FileStatus getFileStatus(Path hadoopPath) throws IOException { * performance and reduce redundant API calls without compromising performance and API behaviour. * Currently, only "file" type hint is supported. * - *

This is an experimental API can can change without notice. + *

This is an experimental API can change without notice. */ public FileStatus getFileStatusWithHint(Path hadoopPath, Configuration hint) throws IOException { return trackDurationWithTracing( diff --git a/gcs/src/test/java/com/google/cloud/hadoop/fs/gcs/GhfsThreadLocalStatisticsTest.java b/gcs/src/test/java/com/google/cloud/hadoop/fs/gcs/GhfsThreadLocalStatisticsTest.java index 3e1dea5d87..5190927324 100644 --- a/gcs/src/test/java/com/google/cloud/hadoop/fs/gcs/GhfsThreadLocalStatisticsTest.java +++ b/gcs/src/test/java/com/google/cloud/hadoop/fs/gcs/GhfsThreadLocalStatisticsTest.java @@ -38,6 +38,9 @@ public class GhfsThreadLocalStatisticsTest { private static final String BACKOFF_TIME = "backoffTime"; private static final String HADOOP_API_COUNT = "hadoopApiCount"; private static final String HADOOP_API_TIME = "hadoopApiTime"; + private static final String STREAM_READ_VECTORED_COUNT = "readVectoredCount"; + + private static final String STREAM_READ_VECTORED_RANGE_COUNT = "readVectoredRangeCount"; private static Map typeToNameMapping = Map.of( @@ -62,7 +65,9 @@ private Map getInitMetrics() { HADOOP_API_COUNT, 0L, HADOOP_API_TIME, 0L, GCS_API_COUNT, 0L, - GCS_API_TIME, 0L)); + GCS_API_TIME, 0L, + STREAM_READ_VECTORED_COUNT, 0L, + STREAM_READ_VECTORED_RANGE_COUNT, 0L)); return result; } @@ -121,6 +126,10 @@ private static void runHadoopApiTests( expectedMetrics.merge(HADOOP_API_COUNT, 1L, Long::sum); } else if (ghfsStatistic == GhfsStatistic.GCS_CONNECTOR_TIME) { expectedMetrics.merge(HADOOP_API_TIME, 1L, Long::sum); + } else if (ghfsStatistic == GhfsStatistic.STREAM_READ_VECTORED_OPERATIONS) { + expectedMetrics.merge(STREAM_READ_VECTORED_COUNT, 1L, Long::sum); + } else if (ghfsStatistic == GhfsStatistic.STREAM_READ_VECTORED_READ_COMBINED_RANGES) { + expectedMetrics.merge(STREAM_READ_VECTORED_RANGE_COUNT, 1L, Long::sum); } verify(expectedMetrics, actualMetrics); @@ -133,6 +142,10 @@ private static void runHadoopApiTests( expectedMetrics.merge(HADOOP_API_COUNT, theValue, Long::sum); } else if (ghfsStatistic == GhfsStatistic.GCS_CONNECTOR_TIME) { expectedMetrics.merge(HADOOP_API_TIME, theValue, Long::sum); + } else if (ghfsStatistic == GhfsStatistic.STREAM_READ_VECTORED_OPERATIONS) { + expectedMetrics.merge(STREAM_READ_VECTORED_COUNT, theValue, Long::sum); + } else if (ghfsStatistic == GhfsStatistic.STREAM_READ_VECTORED_READ_COMBINED_RANGES) { + expectedMetrics.merge(STREAM_READ_VECTORED_RANGE_COUNT, theValue, Long::sum); } verify(expectedMetrics, actualMetrics); diff --git a/gcs/src/test/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFSInputStreamIntegrationTest.java b/gcs/src/test/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFSInputStreamIntegrationTest.java index a939554557..b53f2e153b 100644 --- a/gcs/src/test/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFSInputStreamIntegrationTest.java +++ b/gcs/src/test/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFSInputStreamIntegrationTest.java @@ -68,6 +68,27 @@ public static void afterClass() { gcsFsIHelper.afterAllTests(); } + @Test + public void testBidiVectoredRead() throws Exception { + URI path = gcsFsIHelper.getUniqueObjectUri(getClass(), "testBidiVectoredRead"); + + String testContent = "test content"; + gcsFsIHelper.writeTextFile(path, testContent); + + List ranges = new ArrayList<>(); + ranges.add(FileRange.createFileRange(0, 5)); + ranges.add(FileRange.createFileRange(5, 6)); + + try (GoogleHadoopFileSystem ghfs = + GoogleHadoopFileSystemIntegrationHelper.createGhfs( + path, GoogleHadoopFileSystemIntegrationHelper.getBidiTestConfiguration()); + GoogleHadoopFSInputStream in = createGhfsInputStream(ghfs, path)) { + + in.readVectored(ranges, ByteBuffer::allocate); + validateVectoredReadResult(ranges, path); + } + } + @Test public void seek_illegalArgument() throws Exception { URI path = gcsFsIHelper.getUniqueObjectUri(getClass(), "seek_illegalArgument"); diff --git a/gcs/src/test/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFileSystemIntegrationHelper.java b/gcs/src/test/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFileSystemIntegrationHelper.java index 4990292777..0a9f66a91c 100644 --- a/gcs/src/test/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFileSystemIntegrationHelper.java +++ b/gcs/src/test/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFileSystemIntegrationHelper.java @@ -19,6 +19,7 @@ import static com.google.cloud.hadoop.gcsio.testing.TestConfiguration.GCS_TEST_PROJECT_ID; import static com.google.common.base.Preconditions.checkNotNull; +import com.google.cloud.hadoop.gcsio.GoogleCloudStorageFileSystemOptions.ClientType; import com.google.cloud.hadoop.gcsio.testing.TestConfiguration; import com.google.cloud.hadoop.util.HadoopCredentialsConfiguration.AuthenticationType; import java.net.URI; @@ -66,5 +67,13 @@ public static Configuration getTestConfig() { return config; } + public static Configuration getBidiTestConfiguration() { + Configuration config = getTestConfig(); + config.setBoolean("fs.gs.bidi.enable", true); + config.setEnum("fs.gs.client.type", ClientType.STORAGE_CLIENT); + config.setBoolean("fs.gs.grpc.enable", true); + return config; + } + private GoogleHadoopFileSystemIntegrationHelper() {} } diff --git a/gcs/src/test/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFileSystemTestBase.java b/gcs/src/test/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFileSystemTestBase.java index 5caddc1e15..98ac1a6669 100644 --- a/gcs/src/test/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFileSystemTestBase.java +++ b/gcs/src/test/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFileSystemTestBase.java @@ -24,8 +24,12 @@ import static com.google.common.truth.Truth.assertThat; import static com.google.common.truth.Truth.assertWithMessage; -import com.google.cloud.hadoop.gcsio.*; +import com.google.cloud.hadoop.gcsio.GoogleCloudStorage; +import com.google.cloud.hadoop.gcsio.GoogleCloudStorageFileSystem; import com.google.cloud.hadoop.gcsio.GoogleCloudStorageFileSystemOptions.ClientType; +import com.google.cloud.hadoop.gcsio.GoogleCloudStorageOptions; +import com.google.cloud.hadoop.gcsio.GoogleCloudStorageStatistics; +import com.google.cloud.hadoop.gcsio.StorageResourceId; import java.io.File; import java.io.IOException; import java.io.Writer; @@ -34,15 +38,20 @@ import java.net.URISyntaxException; import java.nio.file.Files; import java.nio.file.Paths; +import java.util.ArrayList; import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.UUID; +import java.util.stream.Collectors; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.StorageStatistics; import org.apache.hadoop.fs.permission.FsPermission; +import org.junit.Rule; import org.junit.Test; +import org.junit.rules.TestName; /** * Abstract base class for test suites targeting variants of GoogleHadoopFileSystem via the Hadoop @@ -74,6 +83,16 @@ protected static Configuration loadConfig(Configuration config, ClientType stora return newConfig; } + @Rule + public TestName name = + new TestName() { + // With parametrization method name will get [index] appended in their name. + @Override + public String getMethodName() { + return super.getMethodName().replaceAll("[\\[,\\],\\s+]", ""); + } + }; + // ----------------------------------------------------------------------------------------- // Tests that vary according to the GHFS variant, but which we want to make sure get tested. // ----------------------------------------------------------------------------------------- @@ -543,6 +562,31 @@ public void provideCoverageForUnmodifiedMethods() throws IOException { } } + @Test + public void listStatusStartingFrom_sortedFileStatus() throws Exception { + int fileCount = 10; + List objectPath = new ArrayList<>(); + URI dirObjectURI = new URI(name.getMethodName() + "/"); + for (int i = 0; i < fileCount; i++) { + // create a random path file + Path filePath = + ghfsHelper.castAsHadoopPath(dirObjectURI.resolve(UUID.randomUUID().toString())); + ghfsHelper.writeFile(filePath, UUID.randomUUID().toString(), 1, /* overwrite= */ false); + objectPath.add(filePath); + } + List sortedPaths = objectPath.stream().sorted().collect(Collectors.toList()); + + FileStatus[] fileStatuses = + invokeListStatusStartingFromMethod(ghfsHelper.castAsHadoopPath(dirObjectURI)); + // Can't assert that this is the only object we get in response, other object lexicographically + // higher would also come in response. + // Only thing we can assert strongly is, list would start with the files created in this + // directory. + for (int i = 0; i < fileCount; i++) { + assertThat(fileStatuses[i].getPath()).isEqualTo(sortedPaths.get(i)); + } + } + @Test public void testGetFileStatusWithHint() throws Exception { Path hadoopPath = ghfsHelper.castAsHadoopPath(getTempFilePath()); @@ -602,6 +646,19 @@ private void invokeGetFileStatusWithHint( assertThat(getStatisticValue(GCS_METADATA_REQUEST)).isEqualTo(numTimes); } + private Method getListStatusStartingFromMethod() throws NoSuchMethodException { + return ghfs.getClass().getMethod("listStatusStartingFrom", Path.class); + } + + private FileStatus[] invokeListStatusStartingFromMethod(Path startFrom) throws Exception { + resetStats(); + + FileStatus[] fileStatus = + (FileStatus[]) getListStatusStartingFromMethod().invoke(ghfs, startFrom); + + return fileStatus; + } + private Long getStatisticValue(GoogleCloudStorageStatistics stat) { return getStatistics().getLong(stat.getSymbol()); } diff --git a/gcsio/pom.xml b/gcsio/pom.xml index 4ceeb0d780..fde55d4ee4 100644 --- a/gcsio/pom.xml +++ b/gcsio/pom.xml @@ -21,7 +21,7 @@ com.google.cloud.bigdataoss bigdataoss-parent - 3.0.0-SNAPSHOT + 4.0.0-SNAPSHOT ../pom.xml @@ -31,7 +31,7 @@ gcsio - 3.0.0-SNAPSHOT + 4.0.0-SNAPSHOT diff --git a/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/ForwardingGoogleCloudStorage.java b/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/ForwardingGoogleCloudStorage.java index 15260c3140..240ca19c08 100644 --- a/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/ForwardingGoogleCloudStorage.java +++ b/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/ForwardingGoogleCloudStorage.java @@ -203,6 +203,12 @@ public List listObjectInfo( return delegate.listObjectInfo(bucketName, objectNamePrefix, listOptions); } + @Override + public List listObjectInfoStartingFrom( + String bucketName, String startOffset, ListObjectOptions listOptions) throws IOException { + return delegate.listObjectInfoStartingFrom(bucketName, startOffset, listOptions); + } + @Override public ListPage listObjectInfoPage( String bucketName, String objectNamePrefix, ListObjectOptions listOptions, String pageToken) diff --git a/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorage.java b/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorage.java index d5f2698c48..9765648c67 100644 --- a/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorage.java +++ b/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorage.java @@ -46,9 +46,15 @@ public interface GoogleCloudStorage { */ String PATH_DELIMITER = "/"; - /** Value indicating all objects should be returned from GCS, no limit. */ + /** + * Value indicating all objects should be returned from GCSFileSystem. No limit i.e. get all + * possible items. Used while listing all files in a directory + */ long MAX_RESULTS_UNLIMITED = -1; + /** Value indicates the maxResult returned by gcs List API. */ + long LIST_MAX_RESULTS = 5000L; + /** The maximum number of objects that can be composed in one operation. */ int MAX_COMPOSE_OBJECTS = 32; @@ -370,6 +376,33 @@ default List listObjectInfo( return listObjectInfo(bucketName, objectNamePrefix, ListObjectOptions.DEFAULT); } + /** + * Lists {@link GoogleCloudStorageItemInfo} of objects contained in the given bucket and their + * name is lexicographically greater than or equal the provided offset. + * + *

Note: As GCS doesn't implement a file system, directory is also treated as an object (if + * it's been created). This APi filters out all those directory object and maintain the order of + * items. This APi strictly expects delimiter in listOptions to be not set. + * + *

Consider a bucket with objects: {@code o1}, {@code d1/}, {@code d1/o1}, {@code d1/o2} + * + *

    + *
  • With {@code startOffset == "o1"} , we get: {@code o1} + *
  • With {@code startOffset == "d1/"} , we get: {@code d1/o1} {@code d1/o2} + *
  • With {@code startOffset == "d1/"o1} , we get: {@code d1/o1} {@code d1/o2} + *

    This is an experimental API and can change without notice. + * + * @param bucketName bucket name + * @param startOffset offset sting, all object with name greater and equal will be listed. + * @return list of objects + * @throws IOException on IO error + */ + default List listObjectInfoStartingFrom( + String bucketName, String startOffset) throws IOException { + return listObjectInfoStartingFrom( + bucketName, startOffset, ListObjectOptions.DEFAULT_USING_START_OFFSET); + } + /** * Lists {@link GoogleCloudStorageItemInfo} of objects contained in the given bucket and whose * names begin with the given prefix. @@ -398,6 +431,31 @@ default List listObjectInfo( List listObjectInfo( String bucketName, String objectNamePrefix, ListObjectOptions listOptions) throws IOException; + /** + * Lists {@link GoogleCloudStorageItemInfo} of objects contained in the given bucket and their + * name is lexicographically greater than or equal the provided offset. + * + *

    Note: As GCS doesn't implement a file system, directory is also treated as an object (if + * it's been created). This APi filters out all those directory object and maintain the order of + * items. This APi strictly expects delimiter in listOptions to be not set. + * + *

    Consider a bucket with objects: {@code o1}, {@code d1/}, {@code d1/o1}, {@code d1/o2} + * + *

      + *
    • With {@code startOffset == "o1"} , we get: {@code o1} + *
    • With {@code startOffset == "d1/"} , we get: {@code d1/o1} {@code d1/o2} + *
    • With {@code startOffset == "d1/"o1} , we get: {@code d1/o1} {@code d1/o2} + *

      This is an experimental API and can change without notice. + * + * @param bucketName bucket name + * @param startOffset offset sting, all object with name greater and equal will be listed. + * @param listOptions options to use when listing objects + * @return list of objects + * @throws IOException on IO error + */ + List listObjectInfoStartingFrom( + String bucketName, String startOffset, ListObjectOptions listOptions) throws IOException; + /** * The same semantics as {@link #listObjectInfo}, but returns only result of single list request * (1 page). diff --git a/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageBidiReadChannel.java b/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageBidiReadChannel.java index e972b43d5e..4234c4d543 100644 --- a/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageBidiReadChannel.java +++ b/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageBidiReadChannel.java @@ -193,10 +193,29 @@ public boolean isOpen() { @Override public void close() throws IOException { if (open) { - open = false; logger.atFinest().log("Closing channel for '%s'", resourceId); - if (blobReadSession != null) { - blobReadSession.close(); + try { + if (blobReadSession != null) { + blobReadSession.close(); + } else if (sessionFuture != null) { + try (BlobReadSession readSession = + sessionFuture.get(readOptions.getBidiClientTimeout(), TimeUnit.SECONDS)) { + // The try-with-resources statement ensures the readSession is automatically closed. + } catch (InterruptedException + | ExecutionException + | TimeoutException + | java.util.concurrent.CancellationException e) { + logger.atFine().withCause(e).log( + "Failed to get/close BlobReadSession during close() for '%s'", resourceId); + } + } + } catch (Exception e) { + GoogleCloudStorageEventBus.postOnException(); + throw new IOException( + String.format("Exception occurred while closing channel '%s'", resourceId), e); + } finally { + blobReadSession = null; + open = false; } } } @@ -230,7 +249,7 @@ public void onSuccess(DisposableByteString disposableByteString) { long bytesRead = processBytesAndCompleteRange(disposableByteString, range, allocate); logger.atFiner().log( - "Vectored Read successful for range starting from %d with length %d. Total Bytes Read are: %d within %d ms", + "Vectored Read successful for range starting from %d with length %d.Total Bytes Read are: %d within %d ms", range.getOffset(), range.getLength(), bytesRead, @@ -449,7 +468,11 @@ public void refillInternalBuffer() throws IOException { ByteString byteString = dbs.byteString(); if (!byteString.isEmpty()) { this.bufferStartPosition = position; - this.internalBuffer = byteString.asReadOnlyByteBuffer(); + + // TODO(dhritichopra): This is a temporary fix for mmeory leak, better alterantives need to + // be explored. + byte[] copiedBytes = byteString.toByteArray(); + this.internalBuffer = ByteBuffer.wrap(copiedBytes); } } catch (InterruptedException | ExecutionException | TimeoutException e) { throw new IOException( diff --git a/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageFileSystem.java b/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageFileSystem.java index da5a550b4c..70b8b0a2cd 100644 --- a/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageFileSystem.java +++ b/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageFileSystem.java @@ -243,6 +243,17 @@ default List listFileInfo(URI path) throws IOException { return listFileInfo(path, ListFileOptions.DEFAULT); } + /** + * Return all the files which are lexicographically equal or greater than the provided path. This + * is an experimental API and can change without notice. + * + * @param startsFrom Given path. + * @return Information about all the files which satisfies the criteria. + */ + default List listFileInfoStartingFrom(URI startsFrom) throws IOException { + return listFileInfoStartingFrom(startsFrom, ListFileOptions.DEFAULT); + } + /** * If the given path points to a directory then the information about its children is returned, * otherwise information about the given file is returned. @@ -253,6 +264,16 @@ default List listFileInfo(URI path) throws IOException { */ List listFileInfo(URI path, ListFileOptions listOptions) throws IOException; + /** + * Return all the files which are lexicographically equal or greater than the provided path. This + * is an experimental API and can change without notice. + * + * @param startsFrom Given path. + * @return Information about all the files which satisfies the criteria. + */ + List listFileInfoStartingFrom(URI startsFrom, ListFileOptions listOptions) + throws IOException; + /** * Returns the list of folder resources in the prefix. It lists all the folder resources * diff --git a/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageFileSystemImpl.java b/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageFileSystemImpl.java index c680ea4f61..0053bfb3e1 100644 --- a/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageFileSystemImpl.java +++ b/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageFileSystemImpl.java @@ -1054,6 +1054,33 @@ public List listFileInfo(URI path, ListFileOptions listOptions) throws return fileInfos; } + public List listFileInfoStartingFrom(URI startsFrom, ListFileOptions listOptions) + throws IOException { + checkNotNull(startsFrom, "start Offset can't be null"); + logger.atFiner().log("listFileInfoStartingFrom(startsFrom: %s)", startsFrom); + + StorageResourceId startOffsetPathId = + StorageResourceId.fromUriPath(startsFrom, /* allowEmptyObjectName= */ true); + + checkArgument( + !startOffsetPathId.isRoot(), + "provided start offset shouldn't be root but an object path %s", + startsFrom); + + List itemsInfo = + gcs.listObjectInfoStartingFrom( + startOffsetPathId.getBucketName(), + startOffsetPathId.getObjectName(), + updateListObjectOptions( + ListObjectOptions.builder() + .setMaxResults(options.getCloudStorageOptions().getMaxListItemsPerCall()) + .setIncludePrefix(false) + .setDelimiter(null) + .build(), + listOptions)); + return FileInfo.fromItemInfos(itemsInfo); + } + @Override public FileInfo getFileInfo(URI path) throws IOException { checkArgument(path != null, "path must not be null"); diff --git a/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageImpl.java b/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageImpl.java index 5569de1c52..89b02b8274 100644 --- a/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageImpl.java +++ b/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageImpl.java @@ -81,8 +81,15 @@ import com.google.common.flogger.GoogleLogger; import com.google.common.io.BaseEncoding; import com.google.common.util.concurrent.ThreadFactoryBuilder; -import com.google.storage.control.v2.*; +import com.google.storage.control.v2.CreateFolderRequest; +import com.google.storage.control.v2.Folder; +import com.google.storage.control.v2.FolderName; +import com.google.storage.control.v2.GetFolderRequest; +import com.google.storage.control.v2.ListFoldersRequest; +import com.google.storage.control.v2.RenameFolderRequest; +import com.google.storage.control.v2.StorageControlClient; import com.google.storage.control.v2.StorageControlClient.ListFoldersPagedResponse; +import com.google.storage.control.v2.StorageControlSettings; import java.io.FileNotFoundException; import java.io.IOException; import java.lang.reflect.Field; @@ -93,6 +100,7 @@ import java.time.Duration; import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.Iterator; @@ -1484,6 +1492,45 @@ private Storage.Objects.Insert prepareEmptyInsert( return insertObject; } + private List listStorageObjects( + String bucketName, String startOffset, ListObjectOptions listOptions) throws IOException { + logger.atFiner().log("listStorageObjects(%s, %s, %s)", bucketName, startOffset, listOptions); + checkArgument( + listOptions.getDelimiter() == null, + "Delimiter shouldn't be used while listing objects starting from an offset"); + + long maxResults = + listOptions.getMaxResults() > 0 ? listOptions.getMaxResults() : LIST_MAX_RESULTS; + + Storage.Objects.List listObject = + createListRequest( + bucketName, + /* objectNamePrefix */ null, + startOffset, + listOptions.getFields(), + /* delimiter */ null, + maxResults, + listOptions.isIncludeFoldersAsPrefixes()); + String pageToken = null; + LinkedList listedObjects = new LinkedList<>(); + // paginated call is required because we may filter all the items, if they are "directory" + // Avoid calling another list API as soon as we have some files listed. + int page = 0; + do { + page += 1; + if (pageToken != null) { + logger.atFiner().log( + "listStorageObjects: next page %s, listedObjects: %d", pageToken, listedObjects.size()); + listObject.setPageToken(pageToken); + } + pageToken = + listStorageObjectsFilteredPage( + listObject, listOptions, listedObjects, Integer.toString(page)); + } while (pageToken != null && listedObjects.size() == 0); + + return listedObjects; + } + /** * Helper for both listObjectInfo that executes the actual API calls to get paginated lists, * accumulating the StorageObjects and String prefixes into the params {@code listedObjects} and @@ -1537,6 +1584,7 @@ private void listStorageObjectsAndPrefixes( createListRequest( bucketName, objectNamePrefix, + /* startOffset */ null, listOptions.getFields(), listOptions.getDelimiter(), maxResults, @@ -1557,6 +1605,37 @@ private void listStorageObjectsAndPrefixes( && getMaxRemainingResults(listOptions.getMaxResults(), listedPrefixes, listedObjects) > 0); } + /* + * Helper function to list files which are lexicographically higher than the offset (inclusive). + * It strictly expects no delimiter to be provided. + * It also filters out all the objects which are "directories" + */ + private String listStorageObjectsFilteredPage( + Storage.Objects.List listObject, + ListObjectOptions listOptions, + List listedObjects, + String pageContext) + throws IOException { + logger.atFiner().log("listStorageObjectsPage(%s, %s)", listObject, listOptions); + checkNotNull(listedObjects, "Must provide a non-null container for listedObjects."); + // We don't want any prefixes[] and filter the objects which are "directories" manually. + checkArgument( + listObject.getDelimiter() == null, + "Delimiter shouldn't be set while listing object from an offset"); + + Objects response = executeListCall(listObject, pageContext); + if (response == null || response.getItems() == null) { + return null; + } + /* filter the objects which are directory */ + for (StorageObject object : response.getItems()) { + if (!object.getName().endsWith(PATH_DELIMITER)) { + listedObjects.add(object); + } + } + return response.getNextPageToken(); + } + @Nullable private String listStorageObjectsAndPrefixesPage( Storage.Objects.List listObject, @@ -1573,13 +1652,9 @@ private String listStorageObjectsAndPrefixesPage( // Deduplicate prefixes and items, because if 'includeTrailingDelimiter' set to true // then returned items will contain "prefix objects" too. Set prefixes = new LinkedHashSet<>(listedPrefixes); - Objects items; - try (ITraceOperation op = - TraceOperation.addToExistingTrace( - String.format("gcs.objects.list(page=%s)", pageContext))) { - items = listObject.execute(); - op.annotate("resultSize", items == null ? 0 : items.size()); + try { + items = executeListCall(listObject, pageContext); } catch (IOException e) { GoogleCloudStorageEventBus.postOnException(); String resource = StringPaths.fromComponents(listObject.getBucket(), listObject.getPrefix()); @@ -1655,9 +1730,21 @@ private String listStorageObjectsAndPrefixesPage( return items.getNextPageToken(); } + private Objects executeListCall(Storage.Objects.List listObject, String pageContext) + throws IOException { + try (ITraceOperation op = + TraceOperation.addToExistingTrace( + String.format("gcs.objects.list(page=%s)", pageContext))) { + Objects items = listObject.execute(); + op.annotate("resultSize", items == null ? 0 : items.size()); + return items; + } + } + private Storage.Objects.List createListRequest( String bucketName, String objectNamePrefix, + String startOffset, String objectFields, String delimiter, long maxResults, @@ -1675,7 +1762,11 @@ private Storage.Objects.List createListRequest( Storage.Objects.List listObject = initializeRequest( - storage.objects().list(bucketName).setPrefix(emptyToNull(objectNamePrefix)), + storage + .objects() + .list(bucketName) + .setStartOffset(emptyToNull(startOffset)) + .setPrefix(emptyToNull(objectNamePrefix)), bucketName); // Set delimiter if supplied. @@ -1728,6 +1819,32 @@ public List listObjectInfo( bucketName, objectNamePrefix, listOptions, listedPrefixes, listedObjects); } + @Override + public List listObjectInfoStartingFrom( + String bucketName, String startOffset, ListObjectOptions listOptions) throws IOException { + logger.atFiner().log( + "listObjectInfoStartingFrom(%s, %s, %s)", bucketName, startOffset, listOptions); + checkArgument( + listOptions.getFields() != null && listOptions.getFields().contains("name"), + "Name is a required field for listing files, provided fields %s", + listOptions.getFields()); + try { + List listedObjects = listStorageObjects(bucketName, startOffset, listOptions); + return getGoogleCloudStorageItemInfos( + bucketName, + /* objectNamePrefix= */ null, + listOptions, + Collections.EMPTY_LIST, + listedObjects); + } catch (Exception e) { + throw new IOException( + String.format( + "Having issue while listing files from offset: %s for bucket: %s and options: %s", + startOffset, bucketName, listOptions), + e); + } + } + /** * @see GoogleCloudStorage#listObjectInfoPage(String, String, ListObjectOptions, String) */ @@ -1747,6 +1864,7 @@ public ListPage listObjectInfoPage( createListRequest( bucketName, objectNamePrefix, + /* startOffset */ null, listOptions.getFields(), listOptions.getDelimiter(), listOptions.getMaxResults(), diff --git a/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/ListObjectOptions.java b/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/ListObjectOptions.java index d31f52ebac..424c023988 100644 --- a/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/ListObjectOptions.java +++ b/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/ListObjectOptions.java @@ -16,6 +16,7 @@ package com.google.cloud.hadoop.gcsio; +import static com.google.cloud.hadoop.gcsio.GoogleCloudStorage.LIST_MAX_RESULTS; import static com.google.cloud.hadoop.gcsio.GoogleCloudStorage.MAX_RESULTS_UNLIMITED; import static com.google.cloud.hadoop.gcsio.GoogleCloudStorage.PATH_DELIMITER; import static com.google.cloud.hadoop.gcsio.GoogleCloudStorageImpl.OBJECT_FIELDS; @@ -30,6 +31,10 @@ public abstract class ListObjectOptions { /** List all objects in the directory. */ public static final ListObjectOptions DEFAULT = builder().build(); + /** List objects starting from an offset. */ + public static final ListObjectOptions DEFAULT_USING_START_OFFSET = + builder().setDelimiter(null).setMaxResults(LIST_MAX_RESULTS).build(); + /** List all objects with the prefix. */ public static final ListObjectOptions DEFAULT_FLAT_LIST = builder().setDelimiter(null).build(); diff --git a/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/testing/InMemoryGoogleCloudStorage.java b/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/testing/InMemoryGoogleCloudStorage.java index 85952ea8a3..dfd82dcf85 100644 --- a/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/testing/InMemoryGoogleCloudStorage.java +++ b/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/testing/InMemoryGoogleCloudStorage.java @@ -514,6 +514,25 @@ private synchronized List listObjectNames( return new ArrayList<>(uniqueNames); } + private synchronized List listObjectNamesStartingFrom( + String bucketName, String startOffset, ListObjectOptions listOptions) { + InMemoryBucketEntry bucketEntry = bucketLookup.get(bucketName); + if (bucketEntry == null) { + return new ArrayList<>(); + } + + Set uniqueNames = new TreeSet<>(); + for (String objectName : bucketEntry.getObjectNames()) { + if (objectName.compareTo(startOffset) >= 0) { + uniqueNames.add(objectName); + } + if (listOptions.getMaxResults() > 0 && uniqueNames.size() >= listOptions.getMaxResults()) { + break; + } + } + return uniqueNames.stream().sorted().collect(Collectors.toList()); + } + @Override public ListPage listObjectInfoPage( String bucketName, String objectNamePrefix, ListObjectOptions listOptions, String pageToken) @@ -544,21 +563,17 @@ public synchronized List listObjectInfo( bucketName, objectNamePrefix, listOptions.toBuilder().setMaxResults(MAX_RESULTS_UNLIMITED).build()); - List listedInfo = new ArrayList<>(); - for (String objectName : listedNames) { - GoogleCloudStorageItemInfo itemInfo = - getItemInfo(new StorageResourceId(bucketName, objectName)); - if (itemInfo.exists()) { - listedInfo.add(itemInfo); - } else if (itemInfo.getResourceId().isStorageObject()) { - listedInfo.add( - GoogleCloudStorageItemInfo.createInferredDirectory(itemInfo.getResourceId())); - } - if (listOptions.getMaxResults() > 0 && listedInfo.size() >= listOptions.getMaxResults()) { - break; - } - } - return listedInfo; + return convertToItemInfo(bucketName, listedNames, listOptions); + } + + @Override + public List listObjectInfoStartingFrom( + String bucketName, String startOffset, ListObjectOptions listOptions) throws IOException { + // Since we're just in memory, we can do the naive implementation of just listing names and + // then calling getItemInfo for each. + List listObjectNamesStartingFrom = + listObjectNamesStartingFrom(bucketName, startOffset, listOptions); + return convertToItemInfo(bucketName, listObjectNamesStartingFrom, listOptions); } public void renameHnFolder(URI src, URI dst) throws IOException { @@ -688,4 +703,24 @@ public GoogleCloudStorageItemInfo composeObjects( public Map getStatistics() { throw new UnsupportedOperationException("not implemented"); } + + private List convertToItemInfo( + String bucketName, final List listedNames, ListObjectOptions listOptions) + throws IOException { + List listedInfo = new ArrayList<>(); + for (String objectName : listedNames) { + GoogleCloudStorageItemInfo itemInfo = + getItemInfo(new StorageResourceId(bucketName, objectName)); + if (itemInfo.exists()) { + listedInfo.add(itemInfo); + } else if (itemInfo.getResourceId().isStorageObject()) { + listedInfo.add( + GoogleCloudStorageItemInfo.createInferredDirectory(itemInfo.getResourceId())); + } + if (listOptions.getMaxResults() > 0 && listedInfo.size() >= listOptions.getMaxResults()) { + break; + } + } + return listedInfo; + } } diff --git a/gcsio/src/test/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageFileSystemNewIntegrationTestBase.java b/gcsio/src/test/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageFileSystemNewIntegrationTestBase.java index 5678fc3b4c..aa8d112196 100644 --- a/gcsio/src/test/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageFileSystemNewIntegrationTestBase.java +++ b/gcsio/src/test/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageFileSystemNewIntegrationTestBase.java @@ -21,6 +21,7 @@ import static com.google.cloud.hadoop.gcsio.TrackingHttpRequestInitializer.deleteRequestString; import static com.google.cloud.hadoop.gcsio.TrackingHttpRequestInitializer.getRequestString; import static com.google.cloud.hadoop.gcsio.TrackingHttpRequestInitializer.listRequestString; +import static com.google.cloud.hadoop.gcsio.TrackingHttpRequestInitializer.listRequestWithStartOffset; import static com.google.cloud.hadoop.gcsio.TrackingHttpRequestInitializer.listRequestWithTrailingDelimiter; import static com.google.cloud.hadoop.gcsio.TrackingHttpRequestInitializer.moveRequestString; import static com.google.cloud.hadoop.gcsio.TrackingHttpRequestInitializer.uploadRequestString; @@ -30,19 +31,24 @@ import static java.util.concurrent.TimeUnit.MINUTES; import static java.util.stream.Collectors.toList; import static org.junit.Assert.assertThrows; +import static org.junit.Assert.assertTrue; +import com.google.cloud.hadoop.gcsio.GoogleCloudStorageFileSystemImpl.PathTypeHint; import com.google.cloud.hadoop.util.RetryHttpInitializer; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import java.io.FileNotFoundException; import java.io.IOException; import java.net.URI; +import java.util.ArrayList; +import java.util.Comparator; import java.util.List; import java.util.UUID; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; +import java.util.stream.Collectors; import org.junit.After; import org.junit.Before; import org.junit.Rule; @@ -1032,6 +1038,137 @@ public void listFileInfo_customFields_fails() throws Exception { } } + @Test + public void listFileInfoStartingFrom_customFields_required() throws Exception { + gcsFs = newGcsFs(newGcsFsOptions().build()); + String bucketName = gcsfsIHelper.sharedBucketName1; + URI bucketUri = new URI("gs://" + bucketName + "/"); + String dirObject = getTestResource() + "/"; + String objectName = dirObject + "I_am_file"; + gcsfsIHelper.createObjects(bucketName, objectName); + + List fileInfos = + gcsFs.listFileInfoStartingFrom( + bucketUri.resolve(dirObject), + ListFileOptions.DEFAULT.toBuilder().setFields("bucket,name").build()); + + // Can't asset that this is the only object we get in response, other object lexicographically + // higher would also come in response. + // Only thing we can assert strongly is, list would start with the files created in this + // directory. + assertThat(fileInfos.get(0).getPath()).isEqualTo(bucketUri.resolve(objectName)); + // No item info calls are made with offset based api + assertThat(gcsRequestsTracker.getAllRequestStrings()) + .containsExactly( + listRequestWithStartOffset( + bucketName, dirObject, /* pageToken= */ null, /* objectFields */ "bucket,name")); + } + + @Test + public void listFileInfoStartingFrom_customFields_fail() throws Exception { + gcsFs = newGcsFs(newGcsFsOptions().build()); + String bucketName = gcsfsIHelper.sharedBucketName1; + URI bucketUri = new URI("gs://" + bucketName + "/"); + String dirObject = getTestResource() + "/"; + String objectName = dirObject + "I_am_file"; + gcsfsIHelper.createObjects(bucketName, objectName); + + assertThrows( + IllegalArgumentException.class, + () -> + gcsFs.listFileInfoStartingFrom( + bucketUri.resolve(dirObject), + ListFileOptions.DEFAULT.toBuilder().setFields("bucket").build())); + + assertTrue(gcsRequestsTracker.getAllRequestStrings().isEmpty()); + } + + @Test + public void listFileInfoStartingFrom_emptyResponse() throws Exception { + gcsFs = newGcsFs(newGcsFsOptions().build()); + String bucketName = gcsfsIHelper.sharedBucketName1; + + URI bucketUri = new URI("gs://" + bucketName + "/"); + // lexicographically the highest string of length 4 + String dirObject = "~~~~" + "/"; + gcsfsIHelper.createObjects(bucketName, dirObject); + + List fileInfos = gcsFs.listFileInfoStartingFrom(bucketUri.resolve(dirObject)); + + // Can't asset that this is the only object we get in response, other object lexicographically + // higher would also come in response. + // Only thing we can assert strongly is, list would start with the files created in this + // directory. + assertThat(fileInfos.size()).isEqualTo(0); + + // No item info calls are made with offset based api + assertThat(gcsRequestsTracker.getAllRequestStrings()) + .containsExactly(listRequestWithStartOffset(bucketName, dirObject, /* pageToken= */ null)); + } + + @Test + public void listFileInfoStartingFrom_sortedLexicographically() throws Exception { + gcsFs = newGcsFs(newGcsFsOptions().build()); + String bucketName = gcsfsIHelper.sharedBucketName1; + URI bucketUri = new URI("gs://" + bucketName + "/"); + String dirObject = getTestResource() + "/"; + int filesCount = 50; + List createdURIs = new ArrayList<>(); + for (int i = 0; i < filesCount; i++) { + String objectName = dirObject + UUID.randomUUID(); + gcsfsIHelper.createObjects(bucketName, objectName); + createdURIs.add(bucketUri.resolve(objectName)); + } + List sortedURI = + createdURIs.stream() + .sorted(Comparator.comparing(URI::getPath)) + .collect(Collectors.toList()); + + List fileInfos = gcsFs.listFileInfoStartingFrom(bucketUri.resolve(dirObject)); + + // Can't asset that this is the only object we get in response, other object lexicographically + // higher would also come in response. + // Only thing we can assert strongly is, list would start with the files created in this + // directory. + for (int i = 0; i < filesCount; i++) { + assertThat(getURIs(fileInfos).get(i)).isEqualTo(sortedURI.get(i)); + } + + // No item info calls are made with offset based api + assertThat(gcsRequestsTracker.getAllRequestStrings()) + .containsExactly(listRequestWithStartOffset(bucketName, dirObject, /* pageToken= */ null)); + } + + @Test + public void listFileInfoStartingFrom_filter_directory() throws Exception { + gcsFs = newGcsFs(newGcsFsOptions().build()); + String bucketName = gcsfsIHelper.sharedBucketName1; + URI bucketUri = new URI("gs://" + bucketName + "/"); + String dirObject = getTestResource() + "/"; + String objectName = dirObject + "I_am_file"; + gcsfsIHelper.createObjects(bucketName, dirObject); + gcsfsIHelper.createObjects(bucketName, objectName); + + List fileInfos = gcsFs.listFileInfoStartingFrom(bucketUri.resolve(dirObject)); + + // Can't assert that this is the only object we get in response, other object lexicographically + // higher would also come in response. + // Only thing we can assert strongly is, list would start with the files created in this + // directory. + assertThat(fileInfos.get(0).getPath()).isEqualTo(bucketUri.resolve(objectName)); + // No item info calls are made with offset based api + assertThat(gcsRequestsTracker.getAllRequestStrings()) + .containsExactly(listRequestWithStartOffset(bucketName, dirObject, /* pageToken= */ null)); + + // directory object was filtered + FileInfo dirFileInfo = + gcsFs.getFileInfoWithHint(bucketUri.resolve(dirObject), PathTypeHint.FILE); + // directory as an object do exists. + assertTrue(dirFileInfo.isDirectory()); + // but directory objects are filtered out and only files are returned. + assertThat(getURIs(fileInfos)).doesNotContain(bucketUri.resolve(dirObject)); + } + @Test public void delete_file_sequential() throws Exception { gcsFs = newGcsFs(newGcsFsOptions().setStatusParallelEnabled(false).build()); @@ -1156,6 +1293,7 @@ private void delete_directory_hns_optimization(boolean parallelStatus) throws Ex "bucket,name,generation", /* maxResults= */ 2, /* pageToken= */ null, + /* startOffset */ null, /* includeFoldersAsPrefixes= */ true), getRequestString(bucketName, dirObject + "/"), deleteRequestString(bucketName, dirObject + "/d1/", /* generationId= */ 1)); @@ -1722,6 +1860,10 @@ private static GoogleCloudStorageFileSystemOptions.Builder newGcsFsOptions() { return GoogleCloudStorageFileSystemOptions.builder().setCloudStorageOptions(gcsOptions); } + private static List getURIs(List fileInfos) { + return fileInfos.stream().map(FileInfo::getPath).collect(toList()); + } + protected abstract GoogleCloudStorageFileSystem newGcsFs( GoogleCloudStorageFileSystemOptions gcsfsOptions) throws IOException; } diff --git a/gcsio/src/test/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageIntegrationHelper.java b/gcsio/src/test/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageIntegrationHelper.java index c8f7ed2511..577861ecc4 100644 --- a/gcsio/src/test/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageIntegrationHelper.java +++ b/gcsio/src/test/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageIntegrationHelper.java @@ -28,14 +28,10 @@ import com.google.cloud.hadoop.gcsio.integration.GoogleCloudStorageTestHelper.TestBucketHelper; import com.google.common.collect.Iterables; import com.google.common.flogger.GoogleLogger; -import java.io.BufferedReader; import java.io.ByteArrayOutputStream; import java.io.IOException; -import java.io.InputStreamReader; import java.io.UnsupportedEncodingException; -import java.net.HttpURLConnection; import java.net.URI; -import java.net.URL; import java.nio.ByteBuffer; import java.nio.channels.SeekableByteChannel; import java.nio.channels.WritableByteChannel; @@ -534,27 +530,28 @@ public String createUniqueZonalOrRegionalBucket(String suffix, boolean isBucketZ String zone = DEFAULT_ZONE; // Default zone String region = DEFAULT_REGION; // Default region - try { - URL metadataServerUrl = new URL(METADATA_SERVER_ZONE_URL); - HttpURLConnection connection = (HttpURLConnection) metadataServerUrl.openConnection(); - connection.setRequestProperty(METADATA_FLAVOR_HEADER, METADATA_FLAVOR_VALUE); - connection.setConnectTimeout(METADATA_SERVER_TIMEOUT_MS); // 5-second connection timeout - connection.setReadTimeout(METADATA_SERVER_TIMEOUT_MS); // 5-second read timeout - - try (BufferedReader reader = - new BufferedReader(new InputStreamReader(connection.getInputStream()))) { - String response = reader.readLine(); - // The response is in the format "projects/PROJECT_NUMBER/zones/ZONE" - String[] parts = response.split("/"); - String fullZone = parts[parts.length - 1]; - zone = fullZone; - region = fullZone.substring(0, fullZone.lastIndexOf('-')); - } - } catch (IOException e) { - logger.atWarning().log( - "Falling back to default region (%s) and zone (%s) because metadata server is unreachable.", - region, zone); - } + // try { + // URL metadataServerUrl = new URL(METADATA_SERVER_ZONE_URL); + // HttpURLConnection connection = (HttpURLConnection) metadataServerUrl.openConnection(); + // connection.setRequestProperty(METADATA_FLAVOR_HEADER, METADATA_FLAVOR_VALUE); + // connection.setConnectTimeout(METADATA_SERVER_TIMEOUT_MS); // 5-second connection timeout + // connection.setReadTimeout(METADATA_SERVER_TIMEOUT_MS); // 5-second read timeout + + // try (BufferedReader reader = + // new BufferedReader(new InputStreamReader(connection.getInputStream()))) { + // String response = reader.readLine(); + // // The response is in the format "projects/PROJECT_NUMBER/zones/ZONE" + // String[] parts = response.split("/"); + // String fullZone = parts[parts.length - 1]; + // zone = fullZone; + // region = fullZone.substring(0, fullZone.lastIndexOf('-')); + // } + // } catch (IOException e) { + // logger.atWarning().log( + // "Falling back to default region (%s) and zone (%s) because metadata server is + // unreachable.", + // region, zone); + // } CreateBucketOptions zonalBucketOptions = CreateBucketOptions.builder() diff --git a/gcsio/src/test/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageNewIntegrationTestBase.java b/gcsio/src/test/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageNewIntegrationTestBase.java index 5307a641da..12ec92732b 100644 --- a/gcsio/src/test/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageNewIntegrationTestBase.java +++ b/gcsio/src/test/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageNewIntegrationTestBase.java @@ -16,6 +16,7 @@ package com.google.cloud.hadoop.gcsio; +import static com.google.cloud.hadoop.gcsio.GoogleCloudStorage.LIST_MAX_RESULTS; import static com.google.cloud.hadoop.gcsio.TrackingHttpRequestInitializer.batchRequestString; import static com.google.cloud.hadoop.gcsio.TrackingHttpRequestInitializer.composeRequestString; import static com.google.cloud.hadoop.gcsio.TrackingHttpRequestInitializer.copyRequestString; @@ -24,6 +25,7 @@ import static com.google.cloud.hadoop.gcsio.TrackingHttpRequestInitializer.getMediaRequestString; import static com.google.cloud.hadoop.gcsio.TrackingHttpRequestInitializer.getRequestString; import static com.google.cloud.hadoop.gcsio.TrackingHttpRequestInitializer.listRequestString; +import static com.google.cloud.hadoop.gcsio.TrackingHttpRequestInitializer.listRequestWithStartOffset; import static com.google.cloud.hadoop.gcsio.TrackingHttpRequestInitializer.listRequestWithTrailingDelimiter; import static com.google.cloud.hadoop.gcsio.TrackingHttpRequestInitializer.postRequestString; import static com.google.cloud.hadoop.gcsio.TrackingHttpRequestInitializer.resumableUploadChunkRequestString; @@ -33,6 +35,7 @@ import static java.nio.charset.StandardCharsets.UTF_8; import static java.util.stream.Collectors.toList; import static org.junit.Assert.assertThrows; +import static org.junit.Assert.assertTrue; import com.google.cloud.hadoop.util.RetryHttpInitializer; import com.google.common.collect.ImmutableList; @@ -43,12 +46,16 @@ import java.nio.ByteBuffer; import java.nio.channels.Channels; import java.nio.channels.SeekableByteChannel; +import java.util.ArrayList; import java.util.Arrays; +import java.util.Comparator; import java.util.List; import java.util.Map; import java.util.UUID; import java.util.function.BiFunction; import java.util.function.Function; +import java.util.stream.Collectors; +import java.util.stream.IntStream; import java.util.zip.GZIPOutputStream; import org.junit.After; import org.junit.Before; @@ -75,7 +82,7 @@ public abstract class GoogleCloudStorageNewIntegrationTestBase { @Rule public TestName name = new TestName(); protected TrackingHttpRequestInitializer gcsRequestsTracker; - protected boolean isTracingSupported = false; + protected boolean isTracingSupported = true; private GoogleCloudStorage gcs; @@ -796,6 +803,259 @@ public void listObjectInfo_allMetadataFieldsCorrect() throws Exception { objectId.getBucketName(), testDirName, /* pageToken= */ null)); } + @Test + public void listObjectInfoStartingFrom_negativeMaxResult() throws Exception { + gcs = createGoogleCloudStorage(gcsOptions); + String bucketName = gcsfsIHelper.sharedBucketName1; + + String testDirName = name.getMethodName() + "/"; + + String fileName = testDirName + "I_am_file"; + StorageResourceId fileResource = new StorageResourceId(bucketName, fileName); + + gcsfsIHelper.gcs.createEmptyObject(fileResource); + + int maxResults = -1; + + ListObjectOptions listOptionsLimitResults = + ListObjectOptions.DEFAULT_USING_START_OFFSET.toBuilder().setMaxResults(maxResults).build(); + gcs.listObjectInfoStartingFrom(bucketName, testDirName, listOptionsLimitResults); + + assertThat(gcsRequestsTracker.getAllRequestStrings()) + .containsExactly( + listRequestWithStartOffset( + // maxResults is override with default value + bucketName, + testDirName, + /* pageToken= */ null, + /* maxResults= */ (int) LIST_MAX_RESULTS)); + } + + @Test + public void listObjectInfoStartingFrom_startOffsetNotObject() throws Exception { + gcs = createGoogleCloudStorage(gcsOptions); + String bucketName = gcsfsIHelper.sharedBucketName1; + + String testDirName = name.getMethodName() + "/"; + + String fileName = testDirName + "I_am_file"; + StorageResourceId fileResource = new StorageResourceId(bucketName, fileName); + + gcsfsIHelper.gcs.createEmptyObject(fileResource); + String startOffset = testDirName.substring(5, testDirName.length() - 1); + + List listedObjects = + gcs.listObjectInfoStartingFrom(bucketName, startOffset); + + verifyListedFilesOrder(listedObjects, startOffset); + + assertThat(gcsRequestsTracker.getAllRequestStrings()) + .containsExactly( + listRequestWithStartOffset(bucketName, startOffset, /* pageToken= */ null)); + } + + @Test + public void listObjectInfoStartingFrom_sortedListFiles() throws Exception { + + gcs = createGoogleCloudStorage(gcsOptions); + String bucketName = gcsfsIHelper.sharedBucketName1; + + String testDirName = name.getMethodName() + "/"; + List resources = new ArrayList<>(); + int filesCount = 50; + for (int i = 0; i < filesCount; i++) { + String uniqueFilename = UUID.randomUUID().toString().replaceAll("-", "").substring(0, 12); + StorageResourceId file = new StorageResourceId(bucketName, testDirName + uniqueFilename); + gcsfsIHelper.gcs.createEmptyObject(file); + resources.add(file); + } + List sortedResources = + resources.stream() + .sorted(Comparator.comparing(StorageResourceId::getObjectName)) + .collect(Collectors.toList()); + assertTrue(sortedResources.size() == resources.size()); + + List listedObjects = + gcs.listObjectInfoStartingFrom(bucketName, testDirName); + verifyListedFilesOrder(listedObjects, testDirName); + // Can't asset that this is the only object we get in response, other object lexicographically + // higher would also come in response. + // Only thing we can assert strongly is, list would start with the files created in this + // directory. + for (int i = 0; i < filesCount; i++) { + assertThat(getObjectNames(listedObjects).get(i)) + .isEqualTo(sortedResources.get(i).getObjectName()); + } + + assertThat(gcsRequestsTracker.getAllRequestStrings()) + .containsExactly( + listRequestWithStartOffset(bucketName, testDirName, /* pageToken= */ null)); + } + + @Test + public void listObjectInfoStartingFrom_invalidListOptions() throws Exception { + String bucketName = gcsfsIHelper.sharedBucketName1; + gcs = createGoogleCloudStorage(gcsOptions); + + String dir = name.getMethodName() + "2/"; + + String fileName = dir + "I_am_file"; + StorageResourceId fileResource = new StorageResourceId(bucketName, fileName); + + gcsfsIHelper.gcs.createEmptyObject(fileResource); + + assertThrows( + IOException.class, + () -> + gcs.listObjectInfoStartingFrom( + gcsfsIHelper.sharedBucketName1, dir, ListObjectOptions.DEFAULT)); + } + + @Test + public void listObjectInfoStartingFrom_multipleListCalls() throws Exception { + + String bucketName = gcsfsIHelper.sharedBucketName1; + gcs = createGoogleCloudStorage(gcsOptions); + + String dir1 = name.getMethodName() + "1/"; + StorageResourceId dirResource1 = new StorageResourceId(bucketName, dir1); + + String dir2 = name.getMethodName() + "2/"; + StorageResourceId dirResource2 = new StorageResourceId(bucketName, dir2); + + String fileName = dir2 + "I_am_file"; + StorageResourceId fileResource = new StorageResourceId(bucketName, fileName); + + gcsfsIHelper.gcs.createEmptyObject(dirResource1); + gcsfsIHelper.gcs.createEmptyObject(dirResource2); + gcsfsIHelper.gcs.createEmptyObject(fileResource); + int maxResults = 2; + + ListObjectOptions listOptionsLimitResults = + ListObjectOptions.DEFAULT_USING_START_OFFSET.toBuilder().setMaxResults(maxResults).build(); + + List listedObjects = + gcs.listObjectInfoStartingFrom( + gcsfsIHelper.sharedBucketName1, dir1, listOptionsLimitResults); + assertThat(getObjectNames(listedObjects)).doesNotContain(dirResource1); + assertThat(getObjectNames(listedObjects)).doesNotContain(dirResource2); + + // Can't asset that this is the only object we get in response, other object lexicographically + // higher would also come in response. + // Only thing we can assert strongly is, list would start with the files created in this + // directory. + assertThat(getObjectNames(listedObjects).get(0)).isEqualTo(fileResource.getObjectName()); + + assertThat(gcsRequestsTracker.getAllRequestStrings()) + .containsExactly( + listRequestWithStartOffset(bucketName, dir1, /* pageToken= */ null, maxResults), + listRequestWithStartOffset(bucketName, dir1, /* pageToken= */ "token_1", maxResults)); + } + + @Test + public void listObjectInfoStartingFrom_filterDirObjects() throws Exception { + gcs = createGoogleCloudStorage(gcsOptions); + + String testDirName = name.getMethodName() + "/"; + StorageResourceId objectId1 = + new StorageResourceId(gcsfsIHelper.sharedBucketName1, testDirName + "object1"); + String subDirName = testDirName + "subDir/"; + StorageResourceId directoryResource = + new StorageResourceId(gcsfsIHelper.sharedBucketName1, subDirName); + StorageResourceId objectId2 = + new StorageResourceId(gcsfsIHelper.sharedBucketName1, subDirName + "subObject1"); + + gcsfsIHelper.gcs.createEmptyObject(objectId1); + gcsfsIHelper.gcs.createEmptyObject(directoryResource); + gcsfsIHelper.gcs.createEmptyObject(objectId2); + + List listedObjects = + gcs.listObjectInfoStartingFrom(gcsfsIHelper.sharedBucketName1, testDirName); + verifyListedFilesOrder(listedObjects, testDirName); + + assertThat(getObjectNames(listedObjects)).doesNotContain(directoryResource); + + // Can't asset that this is the only object we get in response, other object lexicographically + // higher would also come in response. + // Only thing we can assert strongly is, list would start with the files created in this + // directory. + assertThat(getObjectNames(listedObjects).get(0)).isEqualTo(objectId1.getObjectName()); + assertThat(getObjectNames(listedObjects).get(1)).isEqualTo(objectId2.getObjectName()); + + assertThat(gcsRequestsTracker.getAllRequestStrings()) + .containsExactly( + listRequestWithStartOffset( + objectId1.getBucketName(), testDirName, /* pageToken= */ null)); + } + + @Test + public void listObjectInfoStartingFrom_listSubDirFiles() throws Exception { + + gcs = createGoogleCloudStorage(gcsOptions); + + String testDirName = name.getMethodName() + "/"; + StorageResourceId objectId1 = + new StorageResourceId(gcsfsIHelper.sharedBucketName1, testDirName + "object1"); + String subDirName = testDirName + "subDir/"; + StorageResourceId objectId2 = + new StorageResourceId(gcsfsIHelper.sharedBucketName1, subDirName + "subObject1"); + + gcsfsIHelper.gcs.createEmptyObject(objectId2); + gcsfsIHelper.gcs.createEmptyObject(objectId1); + + List listedObjects = + gcs.listObjectInfoStartingFrom(gcsfsIHelper.sharedBucketName1, testDirName); + verifyListedFilesOrder(listedObjects, testDirName); + // Can't asset that this is the only object we get in response, other object lexicographically + // higher would also come in response. + // Only thing we can assert strongly is, list would start with the files created in this + // directory. + assertThat(getObjectNames(listedObjects).get(0)).isEqualTo(objectId1.getObjectName()); + assertThat(getObjectNames(listedObjects).get(1)).isEqualTo(objectId2.getObjectName()); + + assertThat(gcsRequestsTracker.getAllRequestStrings()) + .containsExactly( + listRequestWithStartOffset( + objectId1.getBucketName(), testDirName, /* pageToken= */ null)); + } + + @Test + public void listObjectInfoStartingFrom_allMetadataFieldsCorrect() throws Exception { + gcs = createGoogleCloudStorage(gcsOptions); + + String testDirName = name.getMethodName() + "/"; + StorageResourceId objectId2 = + new StorageResourceId(gcsfsIHelper.sharedBucketName1, testDirName + "object2"); + StorageResourceId objectId1 = + new StorageResourceId(gcsfsIHelper.sharedBucketName1, testDirName + "object1"); + + // Create gzipped file so Content-Encoding will be not null + CreateObjectOptions createOptions = + GZIP_CREATE_OPTIONS.toBuilder() + .setMetadata(ImmutableMap.of("test-key", "val".getBytes(UTF_8))) + .build(); + + gcsfsIHelper.gcs.createEmptyObject(objectId2, createOptions); + gcsfsIHelper.gcs.createEmptyObject(objectId1, createOptions); + + List listedObjects = + gcs.listObjectInfoStartingFrom(gcsfsIHelper.sharedBucketName1, testDirName); + + verifyListedFilesOrder(listedObjects, testDirName); + // Can't asset that this is the only object we get in response, other object lexicographically + // higher would also come in response. + // Only thing we can assert strongly is, list would start with the files created in this + // directory. + assertThat(getObjectNames(listedObjects).get(0)).isEqualTo(objectId1.getObjectName()); + assertThat(getObjectNames(listedObjects).get(1)).isEqualTo(objectId2.getObjectName()); + + assertObjectFields(objectId1, listedObjects.get(0)); + assertThat(gcsRequestsTracker.getAllRequestStrings()) + .containsExactly( + listRequestWithStartOffset( + objectId1.getBucketName(), testDirName, /* pageToken= */ null)); + } + @Test public void getItemInfo_allMetadataFieldsCorrect() throws Exception { gcs = createGoogleCloudStorage(gcsOptions); @@ -1377,6 +1637,26 @@ private String createObjectsInTestDirWithoutSubdirs(String bucketName, String... return testDir; } + private void verifyListedFilesOrder( + List listedObjects, String startOffset) { + // provided item list is sorted + if (listedObjects.size() > 1) { + assertTrue( + IntStream.range(0, listedObjects.size() - 1) + .allMatch( + i -> + listedObjects + .get(i) + .getObjectName() + .compareTo(listedObjects.get(i + 1).getObjectName()) + <= 0)); + } + + assertTrue( + IntStream.range(0, listedObjects.size() - 1) + .allMatch(i -> listedObjects.get(i).getObjectName().compareTo(startOffset) >= 0)); + } + protected abstract GoogleCloudStorage createGoogleCloudStorage(GoogleCloudStorageOptions options) throws IOException; diff --git a/gcsio/src/test/java/com/google/cloud/hadoop/gcsio/TrackingHttpRequestInitializer.java b/gcsio/src/test/java/com/google/cloud/hadoop/gcsio/TrackingHttpRequestInitializer.java index 3a2f6658d3..4810d2e45b 100644 --- a/gcsio/src/test/java/com/google/cloud/hadoop/gcsio/TrackingHttpRequestInitializer.java +++ b/gcsio/src/test/java/com/google/cloud/hadoop/gcsio/TrackingHttpRequestInitializer.java @@ -514,6 +514,48 @@ public static String listRequestWithTrailingDelimiter( bucket, /* includeTrailingDelimiter= */ true, prefix, objectFields, maxResults, pageToken); } + public static String listRequestWithStartOffset( + String bucket, String startOffset, String pageToken, int maxResults) { + return listRequestString( + bucket, + /* flatList */ true, + /* includeTrailingDelimiter */ null, + /* prefix */ null, + OBJECT_FIELDS, + maxResults, + pageToken, + startOffset, + /* includeFoldersAsPrefixes= */ false); + } + + public static String listRequestWithStartOffset( + String bucket, String startOffset, String pageToken) { + return listRequestString( + bucket, + /* flatList */ true, + /* includeTrailingDelimiter */ null, + /* prefix */ null, + OBJECT_FIELDS, + GoogleCloudStorageOptions.DEFAULT.getMaxListItemsPerCall(), + pageToken, + startOffset, + /* includeFoldersAsPrefixes= */ false); + } + + public static String listRequestWithStartOffset( + String bucket, String startOffset, String pageToken, String fields) { + return listRequestString( + bucket, + /* flatList */ true, + /* includeTrailingDelimiter */ null, + /* prefix */ null, + fields, + GoogleCloudStorageOptions.DEFAULT.getMaxListItemsPerCall(), + pageToken, + startOffset, + /* includeFoldersAsPrefixes= */ false); + } + public static String listRequestString( String bucket, Boolean includeTrailingDelimiter, @@ -544,6 +586,7 @@ public static String listRequestString( objectFields, maxResults, pageToken, + /* startOffset */ null, /* includeFoldersAsPrefixes= */ false); } @@ -562,6 +605,7 @@ public static String listRequestString( objectFields, GoogleCloudStorageOptions.DEFAULT.getMaxListItemsPerCall(), pageToken, + /* startOffset */ null, /* includeFoldersAsPrefixes= */ false); } @@ -573,6 +617,7 @@ public static String listRequestString( String objectFields, int maxResults, String pageToken, + String startOffset, Boolean includeFoldersAsPrefixes) { String baseUrl = String.format(LIST_REQUEST_FORMAT, bucket); @@ -591,6 +636,8 @@ public static String listRequestString( addIfNotnull(params, "pageToken", pageToken); addIfNotnull(params, "prefix", prefix); + addIfNotnull(params, "startOffset", startOffset); + return baseUrl + "?" + String.join("&", params); } diff --git a/gcsio/src/test/java/com/google/cloud/hadoop/gcsio/integration/GoogleCloudStorageImplTest.java b/gcsio/src/test/java/com/google/cloud/hadoop/gcsio/integration/GoogleCloudStorageImplTest.java index dd0a23a65a..efaddf7c7c 100644 --- a/gcsio/src/test/java/com/google/cloud/hadoop/gcsio/integration/GoogleCloudStorageImplTest.java +++ b/gcsio/src/test/java/com/google/cloud/hadoop/gcsio/integration/GoogleCloudStorageImplTest.java @@ -539,6 +539,36 @@ public void conflictingWrites_noOverwrite_lastFails() throws IOException { trackingGcs2.delegate.close(); } + @Test + public void listObjectInfoStartingFrom_lexicographicalOrdrer() throws IOException { + String testDirectory = name.getMethodName(); + + TrackingStorageWrapper trackingGcs = + newTrackingGoogleCloudStorage(GCS_OPTIONS); + StorageResourceId resourceId3 = new StorageResourceId(testBucket, testDirectory + "/object3"); + StorageResourceId resourceId2 = new StorageResourceId(testBucket, testDirectory + "/object2"); + StorageResourceId resourceId1 = new StorageResourceId(testBucket, testDirectory + "/object1"); + + trackingGcs.delegate.createEmptyObject(resourceId3); + trackingGcs.delegate.createEmptyObject(resourceId2); + trackingGcs.delegate.createEmptyObject(resourceId1); + + // Verify that directory object not listed + List listedItems = + helperGcs.listObjectInfoStartingFrom(testBucket, testDirectory + "/"); + assertThat(listedItems.stream().map(GoogleCloudStorageItemInfo::getResourceId).toArray()) + .asList() + .containsExactlyElementsIn( + ImmutableList.builder() + .add(resourceId1) + .add(resourceId2) + .add(resourceId3) + .build() + .toArray()) + .inOrder(); + trackingGcs.delegate.close(); + } + @Test public void create_doesNotRepairImplicitDirectories() throws IOException { String testDirectory = name.getMethodName(); diff --git a/pom.xml b/pom.xml index 7beae6bbec..060b4d9a66 100644 --- a/pom.xml +++ b/pom.xml @@ -25,7 +25,7 @@ com.google.cloud.bigdataoss bigdataoss-parent - 3.0.0-SNAPSHOT + 4.0.0-SNAPSHOT pom https://github.com/GoogleCloudDataproc/hadoop-connectors @@ -68,7 +68,7 @@ true - 3.0.0-SNAPSHOT + 4.0.0-SNAPSHOT 1.10.4 26.69.0 diff --git a/util-hadoop/pom.xml b/util-hadoop/pom.xml index e336520381..afd7f5491f 100644 --- a/util-hadoop/pom.xml +++ b/util-hadoop/pom.xml @@ -21,14 +21,14 @@ com.google.cloud.bigdataoss bigdataoss-parent - 3.0.0-SNAPSHOT + 4.0.0-SNAPSHOT ../pom.xml util-hadoop util-hadoop - 3.0.0-SNAPSHOT + 4.0.0-SNAPSHOT diff --git a/util/pom.xml b/util/pom.xml index 8b41bdbbe2..50b179a9dd 100644 --- a/util/pom.xml +++ b/util/pom.xml @@ -21,14 +21,14 @@ com.google.cloud.bigdataoss bigdataoss-parent - 3.0.0-SNAPSHOT + 4.0.0-SNAPSHOT ../pom.xml util util - 3.0.0-SNAPSHOT + 4.0.0-SNAPSHOT