diff --git a/gcs/src/main/java/com/google/cloud/hadoop/fs/gcs/GhfsThreadLocalStatistics.java b/gcs/src/main/java/com/google/cloud/hadoop/fs/gcs/GhfsThreadLocalStatistics.java
index 7120c4e763..8597626024 100644
--- a/gcs/src/main/java/com/google/cloud/hadoop/fs/gcs/GhfsThreadLocalStatistics.java
+++ b/gcs/src/main/java/com/google/cloud/hadoop/fs/gcs/GhfsThreadLocalStatistics.java
@@ -17,6 +17,8 @@
package com.google.cloud.hadoop.fs.gcs;
import static com.google.cloud.hadoop.fs.gcs.GhfsStatistic.GCS_CONNECTOR_TIME;
+import static com.google.cloud.hadoop.fs.gcs.GhfsStatistic.STREAM_READ_VECTORED_OPERATIONS;
+import static com.google.cloud.hadoop.fs.gcs.GhfsStatistic.STREAM_READ_VECTORED_READ_COMBINED_RANGES;
import com.google.cloud.hadoop.gcsio.GoogleCloudStorageStatistics;
import java.util.Arrays;
@@ -58,6 +60,10 @@ public void reset() {
void increment(GhfsStatistic statistic, long count) {
if (statistic == GCS_CONNECTOR_TIME) {
Metric.HADOOP_API_TIME.increment(count);
+ } else if (statistic == STREAM_READ_VECTORED_OPERATIONS) {
+ Metric.STREAM_READ_VECTORED_COUNT.increment(count);
+ } else if (statistic == STREAM_READ_VECTORED_READ_COMBINED_RANGES) {
+ Metric.STREAM_READ_VECTORED_RANGE_COUNT.increment(count);
} else if (statistic.getIsHadoopApi()) {
Metric.HADOOP_API_COUNT.increment(count);
}
@@ -104,7 +110,9 @@ private enum Metric {
GCS_API_COUNT("gcsApiCount"),
GCS_API_TIME("gcsApiTime"),
BACKOFF_COUNT("backoffCount"),
- BACKOFF_TIME("backoffTime");
+ BACKOFF_TIME("backoffTime"),
+ STREAM_READ_VECTORED_COUNT("readVectoredCount"),
+ STREAM_READ_VECTORED_RANGE_COUNT("readVectoredRangeCount");
private final String metricName;
private final ThreadLocalValue metricValue;
diff --git a/gcs/src/main/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFileSystem.java b/gcs/src/main/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFileSystem.java
index 2d7b5d5a84..b223ea0c51 100644
--- a/gcs/src/main/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFileSystem.java
+++ b/gcs/src/main/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFileSystem.java
@@ -878,6 +878,40 @@ public FileStatus[] listStatus(Path hadoopPath) throws IOException {
});
}
+ /**
+ * Gets FileStatus of all files which are lexicographically greater than and equal the provided
+ * path. It filters out any directory objects present in underneath storage.
+ *
+ * This is an experimental API can change without notice.
+ */
+ public FileStatus[] listStatusStartingFrom(Path startFrom) throws IOException {
+ return trackDurationWithTracing(
+ instrumentation,
+ globalStorageStatistics,
+ GhfsStatistic.INVOCATION_LIST_STATUS,
+ startFrom,
+ traceFactory,
+ () -> {
+ checkArgument(startFrom != null, "start offset path must not be null");
+
+ checkOpen();
+
+ logger.atFiner().log("listStatusStartingFrom(hadoopPath: %s)", startFrom);
+
+ URI gcsPath = getGcsPath(startFrom);
+ List status;
+
+ List fileInfos = getGcsFs().listFileInfoStartingFrom(gcsPath, LIST_OPTIONS);
+ status = new ArrayList<>(fileInfos.size());
+ String userName = getUgiUserName();
+ for (FileInfo fileInfo : fileInfos) {
+ status.add(getGoogleHadoopFileStatus(fileInfo, userName));
+ }
+ incrementStatistic(GhfsStatistic.INVOCATION_LIST_STATUS_RESULT_SIZE, status.size());
+ return status.toArray(new FileStatus[0]);
+ });
+ }
+
@Override
public boolean mkdirs(Path hadoopPath, FsPermission permission) throws IOException {
return trackDurationWithTracing(
@@ -944,7 +978,7 @@ public FileStatus getFileStatus(Path hadoopPath) throws IOException {
* performance and reduce redundant API calls without compromising performance and API behaviour.
* Currently, only "file" type hint is supported.
*
- * This is an experimental API can can change without notice.
+ *
This is an experimental API can change without notice.
*/
public FileStatus getFileStatusWithHint(Path hadoopPath, Configuration hint) throws IOException {
return trackDurationWithTracing(
diff --git a/gcs/src/test/java/com/google/cloud/hadoop/fs/gcs/GhfsThreadLocalStatisticsTest.java b/gcs/src/test/java/com/google/cloud/hadoop/fs/gcs/GhfsThreadLocalStatisticsTest.java
index 3e1dea5d87..5190927324 100644
--- a/gcs/src/test/java/com/google/cloud/hadoop/fs/gcs/GhfsThreadLocalStatisticsTest.java
+++ b/gcs/src/test/java/com/google/cloud/hadoop/fs/gcs/GhfsThreadLocalStatisticsTest.java
@@ -38,6 +38,9 @@ public class GhfsThreadLocalStatisticsTest {
private static final String BACKOFF_TIME = "backoffTime";
private static final String HADOOP_API_COUNT = "hadoopApiCount";
private static final String HADOOP_API_TIME = "hadoopApiTime";
+ private static final String STREAM_READ_VECTORED_COUNT = "readVectoredCount";
+
+ private static final String STREAM_READ_VECTORED_RANGE_COUNT = "readVectoredRangeCount";
private static Map typeToNameMapping =
Map.of(
@@ -62,7 +65,9 @@ private Map getInitMetrics() {
HADOOP_API_COUNT, 0L,
HADOOP_API_TIME, 0L,
GCS_API_COUNT, 0L,
- GCS_API_TIME, 0L));
+ GCS_API_TIME, 0L,
+ STREAM_READ_VECTORED_COUNT, 0L,
+ STREAM_READ_VECTORED_RANGE_COUNT, 0L));
return result;
}
@@ -121,6 +126,10 @@ private static void runHadoopApiTests(
expectedMetrics.merge(HADOOP_API_COUNT, 1L, Long::sum);
} else if (ghfsStatistic == GhfsStatistic.GCS_CONNECTOR_TIME) {
expectedMetrics.merge(HADOOP_API_TIME, 1L, Long::sum);
+ } else if (ghfsStatistic == GhfsStatistic.STREAM_READ_VECTORED_OPERATIONS) {
+ expectedMetrics.merge(STREAM_READ_VECTORED_COUNT, 1L, Long::sum);
+ } else if (ghfsStatistic == GhfsStatistic.STREAM_READ_VECTORED_READ_COMBINED_RANGES) {
+ expectedMetrics.merge(STREAM_READ_VECTORED_RANGE_COUNT, 1L, Long::sum);
}
verify(expectedMetrics, actualMetrics);
@@ -133,6 +142,10 @@ private static void runHadoopApiTests(
expectedMetrics.merge(HADOOP_API_COUNT, theValue, Long::sum);
} else if (ghfsStatistic == GhfsStatistic.GCS_CONNECTOR_TIME) {
expectedMetrics.merge(HADOOP_API_TIME, theValue, Long::sum);
+ } else if (ghfsStatistic == GhfsStatistic.STREAM_READ_VECTORED_OPERATIONS) {
+ expectedMetrics.merge(STREAM_READ_VECTORED_COUNT, theValue, Long::sum);
+ } else if (ghfsStatistic == GhfsStatistic.STREAM_READ_VECTORED_READ_COMBINED_RANGES) {
+ expectedMetrics.merge(STREAM_READ_VECTORED_RANGE_COUNT, theValue, Long::sum);
}
verify(expectedMetrics, actualMetrics);
diff --git a/gcs/src/test/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFSInputStreamIntegrationTest.java b/gcs/src/test/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFSInputStreamIntegrationTest.java
index a939554557..b53f2e153b 100644
--- a/gcs/src/test/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFSInputStreamIntegrationTest.java
+++ b/gcs/src/test/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFSInputStreamIntegrationTest.java
@@ -68,6 +68,27 @@ public static void afterClass() {
gcsFsIHelper.afterAllTests();
}
+ @Test
+ public void testBidiVectoredRead() throws Exception {
+ URI path = gcsFsIHelper.getUniqueObjectUri(getClass(), "testBidiVectoredRead");
+
+ String testContent = "test content";
+ gcsFsIHelper.writeTextFile(path, testContent);
+
+ List ranges = new ArrayList<>();
+ ranges.add(FileRange.createFileRange(0, 5));
+ ranges.add(FileRange.createFileRange(5, 6));
+
+ try (GoogleHadoopFileSystem ghfs =
+ GoogleHadoopFileSystemIntegrationHelper.createGhfs(
+ path, GoogleHadoopFileSystemIntegrationHelper.getBidiTestConfiguration());
+ GoogleHadoopFSInputStream in = createGhfsInputStream(ghfs, path)) {
+
+ in.readVectored(ranges, ByteBuffer::allocate);
+ validateVectoredReadResult(ranges, path);
+ }
+ }
+
@Test
public void seek_illegalArgument() throws Exception {
URI path = gcsFsIHelper.getUniqueObjectUri(getClass(), "seek_illegalArgument");
diff --git a/gcs/src/test/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFileSystemIntegrationHelper.java b/gcs/src/test/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFileSystemIntegrationHelper.java
index 4990292777..0a9f66a91c 100644
--- a/gcs/src/test/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFileSystemIntegrationHelper.java
+++ b/gcs/src/test/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFileSystemIntegrationHelper.java
@@ -19,6 +19,7 @@
import static com.google.cloud.hadoop.gcsio.testing.TestConfiguration.GCS_TEST_PROJECT_ID;
import static com.google.common.base.Preconditions.checkNotNull;
+import com.google.cloud.hadoop.gcsio.GoogleCloudStorageFileSystemOptions.ClientType;
import com.google.cloud.hadoop.gcsio.testing.TestConfiguration;
import com.google.cloud.hadoop.util.HadoopCredentialsConfiguration.AuthenticationType;
import java.net.URI;
@@ -66,5 +67,13 @@ public static Configuration getTestConfig() {
return config;
}
+ public static Configuration getBidiTestConfiguration() {
+ Configuration config = getTestConfig();
+ config.setBoolean("fs.gs.bidi.enable", true);
+ config.setEnum("fs.gs.client.type", ClientType.STORAGE_CLIENT);
+ config.setBoolean("fs.gs.grpc.enable", true);
+ return config;
+ }
+
private GoogleHadoopFileSystemIntegrationHelper() {}
}
diff --git a/gcs/src/test/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFileSystemTestBase.java b/gcs/src/test/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFileSystemTestBase.java
index 5caddc1e15..98ac1a6669 100644
--- a/gcs/src/test/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFileSystemTestBase.java
+++ b/gcs/src/test/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFileSystemTestBase.java
@@ -24,8 +24,12 @@
import static com.google.common.truth.Truth.assertThat;
import static com.google.common.truth.Truth.assertWithMessage;
-import com.google.cloud.hadoop.gcsio.*;
+import com.google.cloud.hadoop.gcsio.GoogleCloudStorage;
+import com.google.cloud.hadoop.gcsio.GoogleCloudStorageFileSystem;
import com.google.cloud.hadoop.gcsio.GoogleCloudStorageFileSystemOptions.ClientType;
+import com.google.cloud.hadoop.gcsio.GoogleCloudStorageOptions;
+import com.google.cloud.hadoop.gcsio.GoogleCloudStorageStatistics;
+import com.google.cloud.hadoop.gcsio.StorageResourceId;
import java.io.File;
import java.io.IOException;
import java.io.Writer;
@@ -34,15 +38,20 @@
import java.net.URISyntaxException;
import java.nio.file.Files;
import java.nio.file.Paths;
+import java.util.ArrayList;
import java.util.HashMap;
+import java.util.List;
import java.util.Map;
import java.util.UUID;
+import java.util.stream.Collectors;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.StorageStatistics;
import org.apache.hadoop.fs.permission.FsPermission;
+import org.junit.Rule;
import org.junit.Test;
+import org.junit.rules.TestName;
/**
* Abstract base class for test suites targeting variants of GoogleHadoopFileSystem via the Hadoop
@@ -74,6 +83,16 @@ protected static Configuration loadConfig(Configuration config, ClientType stora
return newConfig;
}
+ @Rule
+ public TestName name =
+ new TestName() {
+ // With parametrization method name will get [index] appended in their name.
+ @Override
+ public String getMethodName() {
+ return super.getMethodName().replaceAll("[\\[,\\],\\s+]", "");
+ }
+ };
+
// -----------------------------------------------------------------------------------------
// Tests that vary according to the GHFS variant, but which we want to make sure get tested.
// -----------------------------------------------------------------------------------------
@@ -543,6 +562,31 @@ public void provideCoverageForUnmodifiedMethods() throws IOException {
}
}
+ @Test
+ public void listStatusStartingFrom_sortedFileStatus() throws Exception {
+ int fileCount = 10;
+ List objectPath = new ArrayList<>();
+ URI dirObjectURI = new URI(name.getMethodName() + "/");
+ for (int i = 0; i < fileCount; i++) {
+ // create a random path file
+ Path filePath =
+ ghfsHelper.castAsHadoopPath(dirObjectURI.resolve(UUID.randomUUID().toString()));
+ ghfsHelper.writeFile(filePath, UUID.randomUUID().toString(), 1, /* overwrite= */ false);
+ objectPath.add(filePath);
+ }
+ List sortedPaths = objectPath.stream().sorted().collect(Collectors.toList());
+
+ FileStatus[] fileStatuses =
+ invokeListStatusStartingFromMethod(ghfsHelper.castAsHadoopPath(dirObjectURI));
+ // Can't assert that this is the only object we get in response, other object lexicographically
+ // higher would also come in response.
+ // Only thing we can assert strongly is, list would start with the files created in this
+ // directory.
+ for (int i = 0; i < fileCount; i++) {
+ assertThat(fileStatuses[i].getPath()).isEqualTo(sortedPaths.get(i));
+ }
+ }
+
@Test
public void testGetFileStatusWithHint() throws Exception {
Path hadoopPath = ghfsHelper.castAsHadoopPath(getTempFilePath());
@@ -602,6 +646,19 @@ private void invokeGetFileStatusWithHint(
assertThat(getStatisticValue(GCS_METADATA_REQUEST)).isEqualTo(numTimes);
}
+ private Method getListStatusStartingFromMethod() throws NoSuchMethodException {
+ return ghfs.getClass().getMethod("listStatusStartingFrom", Path.class);
+ }
+
+ private FileStatus[] invokeListStatusStartingFromMethod(Path startFrom) throws Exception {
+ resetStats();
+
+ FileStatus[] fileStatus =
+ (FileStatus[]) getListStatusStartingFromMethod().invoke(ghfs, startFrom);
+
+ return fileStatus;
+ }
+
private Long getStatisticValue(GoogleCloudStorageStatistics stat) {
return getStatistics().getLong(stat.getSymbol());
}
diff --git a/gcsio/pom.xml b/gcsio/pom.xml
index 4ceeb0d780..fde55d4ee4 100644
--- a/gcsio/pom.xml
+++ b/gcsio/pom.xml
@@ -21,7 +21,7 @@
com.google.cloud.bigdataoss
bigdataoss-parent
- 3.0.0-SNAPSHOT
+ 4.0.0-SNAPSHOT
../pom.xml
@@ -31,7 +31,7 @@
gcsio
- 3.0.0-SNAPSHOT
+ 4.0.0-SNAPSHOT
diff --git a/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/ForwardingGoogleCloudStorage.java b/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/ForwardingGoogleCloudStorage.java
index 15260c3140..240ca19c08 100644
--- a/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/ForwardingGoogleCloudStorage.java
+++ b/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/ForwardingGoogleCloudStorage.java
@@ -203,6 +203,12 @@ public List listObjectInfo(
return delegate.listObjectInfo(bucketName, objectNamePrefix, listOptions);
}
+ @Override
+ public List listObjectInfoStartingFrom(
+ String bucketName, String startOffset, ListObjectOptions listOptions) throws IOException {
+ return delegate.listObjectInfoStartingFrom(bucketName, startOffset, listOptions);
+ }
+
@Override
public ListPage listObjectInfoPage(
String bucketName, String objectNamePrefix, ListObjectOptions listOptions, String pageToken)
diff --git a/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorage.java b/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorage.java
index d5f2698c48..9765648c67 100644
--- a/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorage.java
+++ b/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorage.java
@@ -46,9 +46,15 @@ public interface GoogleCloudStorage {
*/
String PATH_DELIMITER = "/";
- /** Value indicating all objects should be returned from GCS, no limit. */
+ /**
+ * Value indicating all objects should be returned from GCSFileSystem. No limit i.e. get all
+ * possible items. Used while listing all files in a directory
+ */
long MAX_RESULTS_UNLIMITED = -1;
+ /** Value indicates the maxResult returned by gcs List API. */
+ long LIST_MAX_RESULTS = 5000L;
+
/** The maximum number of objects that can be composed in one operation. */
int MAX_COMPOSE_OBJECTS = 32;
@@ -370,6 +376,33 @@ default List listObjectInfo(
return listObjectInfo(bucketName, objectNamePrefix, ListObjectOptions.DEFAULT);
}
+ /**
+ * Lists {@link GoogleCloudStorageItemInfo} of objects contained in the given bucket and their
+ * name is lexicographically greater than or equal the provided offset.
+ *
+ * Note: As GCS doesn't implement a file system, directory is also treated as an object (if
+ * it's been created). This APi filters out all those directory object and maintain the order of
+ * items. This APi strictly expects delimiter in listOptions to be not set.
+ *
+ *
Consider a bucket with objects: {@code o1}, {@code d1/}, {@code d1/o1}, {@code d1/o2}
+ *
+ *
+ * With {@code startOffset == "o1"} , we get: {@code o1}
+ * With {@code startOffset == "d1/"} , we get: {@code d1/o1} {@code d1/o2}
+ * With {@code startOffset == "d1/"o1} , we get: {@code d1/o1} {@code d1/o2}
+ * This is an experimental API and can change without notice.
+ *
+ * @param bucketName bucket name
+ * @param startOffset offset sting, all object with name greater and equal will be listed.
+ * @return list of objects
+ * @throws IOException on IO error
+ */
+ default List listObjectInfoStartingFrom(
+ String bucketName, String startOffset) throws IOException {
+ return listObjectInfoStartingFrom(
+ bucketName, startOffset, ListObjectOptions.DEFAULT_USING_START_OFFSET);
+ }
+
/**
* Lists {@link GoogleCloudStorageItemInfo} of objects contained in the given bucket and whose
* names begin with the given prefix.
@@ -398,6 +431,31 @@ default List listObjectInfo(
List listObjectInfo(
String bucketName, String objectNamePrefix, ListObjectOptions listOptions) throws IOException;
+ /**
+ * Lists {@link GoogleCloudStorageItemInfo} of objects contained in the given bucket and their
+ * name is lexicographically greater than or equal the provided offset.
+ *
+ * Note: As GCS doesn't implement a file system, directory is also treated as an object (if
+ * it's been created). This APi filters out all those directory object and maintain the order of
+ * items. This APi strictly expects delimiter in listOptions to be not set.
+ *
+ *
Consider a bucket with objects: {@code o1}, {@code d1/}, {@code d1/o1}, {@code d1/o2}
+ *
+ *
+ * With {@code startOffset == "o1"} , we get: {@code o1}
+ * With {@code startOffset == "d1/"} , we get: {@code d1/o1} {@code d1/o2}
+ * With {@code startOffset == "d1/"o1} , we get: {@code d1/o1} {@code d1/o2}
+ * This is an experimental API and can change without notice.
+ *
+ * @param bucketName bucket name
+ * @param startOffset offset sting, all object with name greater and equal will be listed.
+ * @param listOptions options to use when listing objects
+ * @return list of objects
+ * @throws IOException on IO error
+ */
+ List listObjectInfoStartingFrom(
+ String bucketName, String startOffset, ListObjectOptions listOptions) throws IOException;
+
/**
* The same semantics as {@link #listObjectInfo}, but returns only result of single list request
* (1 page).
diff --git a/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageBidiReadChannel.java b/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageBidiReadChannel.java
index e972b43d5e..4234c4d543 100644
--- a/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageBidiReadChannel.java
+++ b/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageBidiReadChannel.java
@@ -193,10 +193,29 @@ public boolean isOpen() {
@Override
public void close() throws IOException {
if (open) {
- open = false;
logger.atFinest().log("Closing channel for '%s'", resourceId);
- if (blobReadSession != null) {
- blobReadSession.close();
+ try {
+ if (blobReadSession != null) {
+ blobReadSession.close();
+ } else if (sessionFuture != null) {
+ try (BlobReadSession readSession =
+ sessionFuture.get(readOptions.getBidiClientTimeout(), TimeUnit.SECONDS)) {
+ // The try-with-resources statement ensures the readSession is automatically closed.
+ } catch (InterruptedException
+ | ExecutionException
+ | TimeoutException
+ | java.util.concurrent.CancellationException e) {
+ logger.atFine().withCause(e).log(
+ "Failed to get/close BlobReadSession during close() for '%s'", resourceId);
+ }
+ }
+ } catch (Exception e) {
+ GoogleCloudStorageEventBus.postOnException();
+ throw new IOException(
+ String.format("Exception occurred while closing channel '%s'", resourceId), e);
+ } finally {
+ blobReadSession = null;
+ open = false;
}
}
}
@@ -230,7 +249,7 @@ public void onSuccess(DisposableByteString disposableByteString) {
long bytesRead =
processBytesAndCompleteRange(disposableByteString, range, allocate);
logger.atFiner().log(
- "Vectored Read successful for range starting from %d with length %d. Total Bytes Read are: %d within %d ms",
+ "Vectored Read successful for range starting from %d with length %d.Total Bytes Read are: %d within %d ms",
range.getOffset(),
range.getLength(),
bytesRead,
@@ -449,7 +468,11 @@ public void refillInternalBuffer() throws IOException {
ByteString byteString = dbs.byteString();
if (!byteString.isEmpty()) {
this.bufferStartPosition = position;
- this.internalBuffer = byteString.asReadOnlyByteBuffer();
+
+ // TODO(dhritichopra): This is a temporary fix for mmeory leak, better alterantives need to
+ // be explored.
+ byte[] copiedBytes = byteString.toByteArray();
+ this.internalBuffer = ByteBuffer.wrap(copiedBytes);
}
} catch (InterruptedException | ExecutionException | TimeoutException e) {
throw new IOException(
diff --git a/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageFileSystem.java b/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageFileSystem.java
index da5a550b4c..70b8b0a2cd 100644
--- a/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageFileSystem.java
+++ b/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageFileSystem.java
@@ -243,6 +243,17 @@ default List listFileInfo(URI path) throws IOException {
return listFileInfo(path, ListFileOptions.DEFAULT);
}
+ /**
+ * Return all the files which are lexicographically equal or greater than the provided path. This
+ * is an experimental API and can change without notice.
+ *
+ * @param startsFrom Given path.
+ * @return Information about all the files which satisfies the criteria.
+ */
+ default List listFileInfoStartingFrom(URI startsFrom) throws IOException {
+ return listFileInfoStartingFrom(startsFrom, ListFileOptions.DEFAULT);
+ }
+
/**
* If the given path points to a directory then the information about its children is returned,
* otherwise information about the given file is returned.
@@ -253,6 +264,16 @@ default List listFileInfo(URI path) throws IOException {
*/
List listFileInfo(URI path, ListFileOptions listOptions) throws IOException;
+ /**
+ * Return all the files which are lexicographically equal or greater than the provided path. This
+ * is an experimental API and can change without notice.
+ *
+ * @param startsFrom Given path.
+ * @return Information about all the files which satisfies the criteria.
+ */
+ List listFileInfoStartingFrom(URI startsFrom, ListFileOptions listOptions)
+ throws IOException;
+
/**
* Returns the list of folder resources in the prefix. It lists all the folder resources
*
diff --git a/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageFileSystemImpl.java b/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageFileSystemImpl.java
index c680ea4f61..0053bfb3e1 100644
--- a/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageFileSystemImpl.java
+++ b/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageFileSystemImpl.java
@@ -1054,6 +1054,33 @@ public List listFileInfo(URI path, ListFileOptions listOptions) throws
return fileInfos;
}
+ public List listFileInfoStartingFrom(URI startsFrom, ListFileOptions listOptions)
+ throws IOException {
+ checkNotNull(startsFrom, "start Offset can't be null");
+ logger.atFiner().log("listFileInfoStartingFrom(startsFrom: %s)", startsFrom);
+
+ StorageResourceId startOffsetPathId =
+ StorageResourceId.fromUriPath(startsFrom, /* allowEmptyObjectName= */ true);
+
+ checkArgument(
+ !startOffsetPathId.isRoot(),
+ "provided start offset shouldn't be root but an object path %s",
+ startsFrom);
+
+ List itemsInfo =
+ gcs.listObjectInfoStartingFrom(
+ startOffsetPathId.getBucketName(),
+ startOffsetPathId.getObjectName(),
+ updateListObjectOptions(
+ ListObjectOptions.builder()
+ .setMaxResults(options.getCloudStorageOptions().getMaxListItemsPerCall())
+ .setIncludePrefix(false)
+ .setDelimiter(null)
+ .build(),
+ listOptions));
+ return FileInfo.fromItemInfos(itemsInfo);
+ }
+
@Override
public FileInfo getFileInfo(URI path) throws IOException {
checkArgument(path != null, "path must not be null");
diff --git a/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageImpl.java b/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageImpl.java
index 5569de1c52..89b02b8274 100644
--- a/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageImpl.java
+++ b/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageImpl.java
@@ -81,8 +81,15 @@
import com.google.common.flogger.GoogleLogger;
import com.google.common.io.BaseEncoding;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
-import com.google.storage.control.v2.*;
+import com.google.storage.control.v2.CreateFolderRequest;
+import com.google.storage.control.v2.Folder;
+import com.google.storage.control.v2.FolderName;
+import com.google.storage.control.v2.GetFolderRequest;
+import com.google.storage.control.v2.ListFoldersRequest;
+import com.google.storage.control.v2.RenameFolderRequest;
+import com.google.storage.control.v2.StorageControlClient;
import com.google.storage.control.v2.StorageControlClient.ListFoldersPagedResponse;
+import com.google.storage.control.v2.StorageControlSettings;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.lang.reflect.Field;
@@ -93,6 +100,7 @@
import java.time.Duration;
import java.util.ArrayList;
import java.util.Collection;
+import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
@@ -1484,6 +1492,45 @@ private Storage.Objects.Insert prepareEmptyInsert(
return insertObject;
}
+ private List listStorageObjects(
+ String bucketName, String startOffset, ListObjectOptions listOptions) throws IOException {
+ logger.atFiner().log("listStorageObjects(%s, %s, %s)", bucketName, startOffset, listOptions);
+ checkArgument(
+ listOptions.getDelimiter() == null,
+ "Delimiter shouldn't be used while listing objects starting from an offset");
+
+ long maxResults =
+ listOptions.getMaxResults() > 0 ? listOptions.getMaxResults() : LIST_MAX_RESULTS;
+
+ Storage.Objects.List listObject =
+ createListRequest(
+ bucketName,
+ /* objectNamePrefix */ null,
+ startOffset,
+ listOptions.getFields(),
+ /* delimiter */ null,
+ maxResults,
+ listOptions.isIncludeFoldersAsPrefixes());
+ String pageToken = null;
+ LinkedList listedObjects = new LinkedList<>();
+ // paginated call is required because we may filter all the items, if they are "directory"
+ // Avoid calling another list API as soon as we have some files listed.
+ int page = 0;
+ do {
+ page += 1;
+ if (pageToken != null) {
+ logger.atFiner().log(
+ "listStorageObjects: next page %s, listedObjects: %d", pageToken, listedObjects.size());
+ listObject.setPageToken(pageToken);
+ }
+ pageToken =
+ listStorageObjectsFilteredPage(
+ listObject, listOptions, listedObjects, Integer.toString(page));
+ } while (pageToken != null && listedObjects.size() == 0);
+
+ return listedObjects;
+ }
+
/**
* Helper for both listObjectInfo that executes the actual API calls to get paginated lists,
* accumulating the StorageObjects and String prefixes into the params {@code listedObjects} and
@@ -1537,6 +1584,7 @@ private void listStorageObjectsAndPrefixes(
createListRequest(
bucketName,
objectNamePrefix,
+ /* startOffset */ null,
listOptions.getFields(),
listOptions.getDelimiter(),
maxResults,
@@ -1557,6 +1605,37 @@ private void listStorageObjectsAndPrefixes(
&& getMaxRemainingResults(listOptions.getMaxResults(), listedPrefixes, listedObjects) > 0);
}
+ /*
+ * Helper function to list files which are lexicographically higher than the offset (inclusive).
+ * It strictly expects no delimiter to be provided.
+ * It also filters out all the objects which are "directories"
+ */
+ private String listStorageObjectsFilteredPage(
+ Storage.Objects.List listObject,
+ ListObjectOptions listOptions,
+ List listedObjects,
+ String pageContext)
+ throws IOException {
+ logger.atFiner().log("listStorageObjectsPage(%s, %s)", listObject, listOptions);
+ checkNotNull(listedObjects, "Must provide a non-null container for listedObjects.");
+ // We don't want any prefixes[] and filter the objects which are "directories" manually.
+ checkArgument(
+ listObject.getDelimiter() == null,
+ "Delimiter shouldn't be set while listing object from an offset");
+
+ Objects response = executeListCall(listObject, pageContext);
+ if (response == null || response.getItems() == null) {
+ return null;
+ }
+ /* filter the objects which are directory */
+ for (StorageObject object : response.getItems()) {
+ if (!object.getName().endsWith(PATH_DELIMITER)) {
+ listedObjects.add(object);
+ }
+ }
+ return response.getNextPageToken();
+ }
+
@Nullable
private String listStorageObjectsAndPrefixesPage(
Storage.Objects.List listObject,
@@ -1573,13 +1652,9 @@ private String listStorageObjectsAndPrefixesPage(
// Deduplicate prefixes and items, because if 'includeTrailingDelimiter' set to true
// then returned items will contain "prefix objects" too.
Set prefixes = new LinkedHashSet<>(listedPrefixes);
-
Objects items;
- try (ITraceOperation op =
- TraceOperation.addToExistingTrace(
- String.format("gcs.objects.list(page=%s)", pageContext))) {
- items = listObject.execute();
- op.annotate("resultSize", items == null ? 0 : items.size());
+ try {
+ items = executeListCall(listObject, pageContext);
} catch (IOException e) {
GoogleCloudStorageEventBus.postOnException();
String resource = StringPaths.fromComponents(listObject.getBucket(), listObject.getPrefix());
@@ -1655,9 +1730,21 @@ private String listStorageObjectsAndPrefixesPage(
return items.getNextPageToken();
}
+ private Objects executeListCall(Storage.Objects.List listObject, String pageContext)
+ throws IOException {
+ try (ITraceOperation op =
+ TraceOperation.addToExistingTrace(
+ String.format("gcs.objects.list(page=%s)", pageContext))) {
+ Objects items = listObject.execute();
+ op.annotate("resultSize", items == null ? 0 : items.size());
+ return items;
+ }
+ }
+
private Storage.Objects.List createListRequest(
String bucketName,
String objectNamePrefix,
+ String startOffset,
String objectFields,
String delimiter,
long maxResults,
@@ -1675,7 +1762,11 @@ private Storage.Objects.List createListRequest(
Storage.Objects.List listObject =
initializeRequest(
- storage.objects().list(bucketName).setPrefix(emptyToNull(objectNamePrefix)),
+ storage
+ .objects()
+ .list(bucketName)
+ .setStartOffset(emptyToNull(startOffset))
+ .setPrefix(emptyToNull(objectNamePrefix)),
bucketName);
// Set delimiter if supplied.
@@ -1728,6 +1819,32 @@ public List listObjectInfo(
bucketName, objectNamePrefix, listOptions, listedPrefixes, listedObjects);
}
+ @Override
+ public List listObjectInfoStartingFrom(
+ String bucketName, String startOffset, ListObjectOptions listOptions) throws IOException {
+ logger.atFiner().log(
+ "listObjectInfoStartingFrom(%s, %s, %s)", bucketName, startOffset, listOptions);
+ checkArgument(
+ listOptions.getFields() != null && listOptions.getFields().contains("name"),
+ "Name is a required field for listing files, provided fields %s",
+ listOptions.getFields());
+ try {
+ List listedObjects = listStorageObjects(bucketName, startOffset, listOptions);
+ return getGoogleCloudStorageItemInfos(
+ bucketName,
+ /* objectNamePrefix= */ null,
+ listOptions,
+ Collections.EMPTY_LIST,
+ listedObjects);
+ } catch (Exception e) {
+ throw new IOException(
+ String.format(
+ "Having issue while listing files from offset: %s for bucket: %s and options: %s",
+ startOffset, bucketName, listOptions),
+ e);
+ }
+ }
+
/**
* @see GoogleCloudStorage#listObjectInfoPage(String, String, ListObjectOptions, String)
*/
@@ -1747,6 +1864,7 @@ public ListPage listObjectInfoPage(
createListRequest(
bucketName,
objectNamePrefix,
+ /* startOffset */ null,
listOptions.getFields(),
listOptions.getDelimiter(),
listOptions.getMaxResults(),
diff --git a/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/ListObjectOptions.java b/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/ListObjectOptions.java
index d31f52ebac..424c023988 100644
--- a/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/ListObjectOptions.java
+++ b/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/ListObjectOptions.java
@@ -16,6 +16,7 @@
package com.google.cloud.hadoop.gcsio;
+import static com.google.cloud.hadoop.gcsio.GoogleCloudStorage.LIST_MAX_RESULTS;
import static com.google.cloud.hadoop.gcsio.GoogleCloudStorage.MAX_RESULTS_UNLIMITED;
import static com.google.cloud.hadoop.gcsio.GoogleCloudStorage.PATH_DELIMITER;
import static com.google.cloud.hadoop.gcsio.GoogleCloudStorageImpl.OBJECT_FIELDS;
@@ -30,6 +31,10 @@ public abstract class ListObjectOptions {
/** List all objects in the directory. */
public static final ListObjectOptions DEFAULT = builder().build();
+ /** List objects starting from an offset. */
+ public static final ListObjectOptions DEFAULT_USING_START_OFFSET =
+ builder().setDelimiter(null).setMaxResults(LIST_MAX_RESULTS).build();
+
/** List all objects with the prefix. */
public static final ListObjectOptions DEFAULT_FLAT_LIST = builder().setDelimiter(null).build();
diff --git a/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/testing/InMemoryGoogleCloudStorage.java b/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/testing/InMemoryGoogleCloudStorage.java
index 85952ea8a3..dfd82dcf85 100644
--- a/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/testing/InMemoryGoogleCloudStorage.java
+++ b/gcsio/src/main/java/com/google/cloud/hadoop/gcsio/testing/InMemoryGoogleCloudStorage.java
@@ -514,6 +514,25 @@ private synchronized List listObjectNames(
return new ArrayList<>(uniqueNames);
}
+ private synchronized List listObjectNamesStartingFrom(
+ String bucketName, String startOffset, ListObjectOptions listOptions) {
+ InMemoryBucketEntry bucketEntry = bucketLookup.get(bucketName);
+ if (bucketEntry == null) {
+ return new ArrayList<>();
+ }
+
+ Set uniqueNames = new TreeSet<>();
+ for (String objectName : bucketEntry.getObjectNames()) {
+ if (objectName.compareTo(startOffset) >= 0) {
+ uniqueNames.add(objectName);
+ }
+ if (listOptions.getMaxResults() > 0 && uniqueNames.size() >= listOptions.getMaxResults()) {
+ break;
+ }
+ }
+ return uniqueNames.stream().sorted().collect(Collectors.toList());
+ }
+
@Override
public ListPage listObjectInfoPage(
String bucketName, String objectNamePrefix, ListObjectOptions listOptions, String pageToken)
@@ -544,21 +563,17 @@ public synchronized List listObjectInfo(
bucketName,
objectNamePrefix,
listOptions.toBuilder().setMaxResults(MAX_RESULTS_UNLIMITED).build());
- List listedInfo = new ArrayList<>();
- for (String objectName : listedNames) {
- GoogleCloudStorageItemInfo itemInfo =
- getItemInfo(new StorageResourceId(bucketName, objectName));
- if (itemInfo.exists()) {
- listedInfo.add(itemInfo);
- } else if (itemInfo.getResourceId().isStorageObject()) {
- listedInfo.add(
- GoogleCloudStorageItemInfo.createInferredDirectory(itemInfo.getResourceId()));
- }
- if (listOptions.getMaxResults() > 0 && listedInfo.size() >= listOptions.getMaxResults()) {
- break;
- }
- }
- return listedInfo;
+ return convertToItemInfo(bucketName, listedNames, listOptions);
+ }
+
+ @Override
+ public List listObjectInfoStartingFrom(
+ String bucketName, String startOffset, ListObjectOptions listOptions) throws IOException {
+ // Since we're just in memory, we can do the naive implementation of just listing names and
+ // then calling getItemInfo for each.
+ List listObjectNamesStartingFrom =
+ listObjectNamesStartingFrom(bucketName, startOffset, listOptions);
+ return convertToItemInfo(bucketName, listObjectNamesStartingFrom, listOptions);
}
public void renameHnFolder(URI src, URI dst) throws IOException {
@@ -688,4 +703,24 @@ public GoogleCloudStorageItemInfo composeObjects(
public Map getStatistics() {
throw new UnsupportedOperationException("not implemented");
}
+
+ private List convertToItemInfo(
+ String bucketName, final List listedNames, ListObjectOptions listOptions)
+ throws IOException {
+ List listedInfo = new ArrayList<>();
+ for (String objectName : listedNames) {
+ GoogleCloudStorageItemInfo itemInfo =
+ getItemInfo(new StorageResourceId(bucketName, objectName));
+ if (itemInfo.exists()) {
+ listedInfo.add(itemInfo);
+ } else if (itemInfo.getResourceId().isStorageObject()) {
+ listedInfo.add(
+ GoogleCloudStorageItemInfo.createInferredDirectory(itemInfo.getResourceId()));
+ }
+ if (listOptions.getMaxResults() > 0 && listedInfo.size() >= listOptions.getMaxResults()) {
+ break;
+ }
+ }
+ return listedInfo;
+ }
}
diff --git a/gcsio/src/test/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageFileSystemNewIntegrationTestBase.java b/gcsio/src/test/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageFileSystemNewIntegrationTestBase.java
index 5678fc3b4c..aa8d112196 100644
--- a/gcsio/src/test/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageFileSystemNewIntegrationTestBase.java
+++ b/gcsio/src/test/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageFileSystemNewIntegrationTestBase.java
@@ -21,6 +21,7 @@
import static com.google.cloud.hadoop.gcsio.TrackingHttpRequestInitializer.deleteRequestString;
import static com.google.cloud.hadoop.gcsio.TrackingHttpRequestInitializer.getRequestString;
import static com.google.cloud.hadoop.gcsio.TrackingHttpRequestInitializer.listRequestString;
+import static com.google.cloud.hadoop.gcsio.TrackingHttpRequestInitializer.listRequestWithStartOffset;
import static com.google.cloud.hadoop.gcsio.TrackingHttpRequestInitializer.listRequestWithTrailingDelimiter;
import static com.google.cloud.hadoop.gcsio.TrackingHttpRequestInitializer.moveRequestString;
import static com.google.cloud.hadoop.gcsio.TrackingHttpRequestInitializer.uploadRequestString;
@@ -30,19 +31,24 @@
import static java.util.concurrent.TimeUnit.MINUTES;
import static java.util.stream.Collectors.toList;
import static org.junit.Assert.assertThrows;
+import static org.junit.Assert.assertTrue;
+import com.google.cloud.hadoop.gcsio.GoogleCloudStorageFileSystemImpl.PathTypeHint;
import com.google.cloud.hadoop.util.RetryHttpInitializer;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.URI;
+import java.util.ArrayList;
+import java.util.Comparator;
import java.util.List;
import java.util.UUID;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
+import java.util.stream.Collectors;
import org.junit.After;
import org.junit.Before;
import org.junit.Rule;
@@ -1032,6 +1038,137 @@ public void listFileInfo_customFields_fails() throws Exception {
}
}
+ @Test
+ public void listFileInfoStartingFrom_customFields_required() throws Exception {
+ gcsFs = newGcsFs(newGcsFsOptions().build());
+ String bucketName = gcsfsIHelper.sharedBucketName1;
+ URI bucketUri = new URI("gs://" + bucketName + "/");
+ String dirObject = getTestResource() + "/";
+ String objectName = dirObject + "I_am_file";
+ gcsfsIHelper.createObjects(bucketName, objectName);
+
+ List fileInfos =
+ gcsFs.listFileInfoStartingFrom(
+ bucketUri.resolve(dirObject),
+ ListFileOptions.DEFAULT.toBuilder().setFields("bucket,name").build());
+
+ // Can't asset that this is the only object we get in response, other object lexicographically
+ // higher would also come in response.
+ // Only thing we can assert strongly is, list would start with the files created in this
+ // directory.
+ assertThat(fileInfos.get(0).getPath()).isEqualTo(bucketUri.resolve(objectName));
+ // No item info calls are made with offset based api
+ assertThat(gcsRequestsTracker.getAllRequestStrings())
+ .containsExactly(
+ listRequestWithStartOffset(
+ bucketName, dirObject, /* pageToken= */ null, /* objectFields */ "bucket,name"));
+ }
+
+ @Test
+ public void listFileInfoStartingFrom_customFields_fail() throws Exception {
+ gcsFs = newGcsFs(newGcsFsOptions().build());
+ String bucketName = gcsfsIHelper.sharedBucketName1;
+ URI bucketUri = new URI("gs://" + bucketName + "/");
+ String dirObject = getTestResource() + "/";
+ String objectName = dirObject + "I_am_file";
+ gcsfsIHelper.createObjects(bucketName, objectName);
+
+ assertThrows(
+ IllegalArgumentException.class,
+ () ->
+ gcsFs.listFileInfoStartingFrom(
+ bucketUri.resolve(dirObject),
+ ListFileOptions.DEFAULT.toBuilder().setFields("bucket").build()));
+
+ assertTrue(gcsRequestsTracker.getAllRequestStrings().isEmpty());
+ }
+
+ @Test
+ public void listFileInfoStartingFrom_emptyResponse() throws Exception {
+ gcsFs = newGcsFs(newGcsFsOptions().build());
+ String bucketName = gcsfsIHelper.sharedBucketName1;
+
+ URI bucketUri = new URI("gs://" + bucketName + "/");
+ // lexicographically the highest string of length 4
+ String dirObject = "~~~~" + "/";
+ gcsfsIHelper.createObjects(bucketName, dirObject);
+
+ List fileInfos = gcsFs.listFileInfoStartingFrom(bucketUri.resolve(dirObject));
+
+ // Can't asset that this is the only object we get in response, other object lexicographically
+ // higher would also come in response.
+ // Only thing we can assert strongly is, list would start with the files created in this
+ // directory.
+ assertThat(fileInfos.size()).isEqualTo(0);
+
+ // No item info calls are made with offset based api
+ assertThat(gcsRequestsTracker.getAllRequestStrings())
+ .containsExactly(listRequestWithStartOffset(bucketName, dirObject, /* pageToken= */ null));
+ }
+
+ @Test
+ public void listFileInfoStartingFrom_sortedLexicographically() throws Exception {
+ gcsFs = newGcsFs(newGcsFsOptions().build());
+ String bucketName = gcsfsIHelper.sharedBucketName1;
+ URI bucketUri = new URI("gs://" + bucketName + "/");
+ String dirObject = getTestResource() + "/";
+ int filesCount = 50;
+ List createdURIs = new ArrayList<>();
+ for (int i = 0; i < filesCount; i++) {
+ String objectName = dirObject + UUID.randomUUID();
+ gcsfsIHelper.createObjects(bucketName, objectName);
+ createdURIs.add(bucketUri.resolve(objectName));
+ }
+ List sortedURI =
+ createdURIs.stream()
+ .sorted(Comparator.comparing(URI::getPath))
+ .collect(Collectors.toList());
+
+ List fileInfos = gcsFs.listFileInfoStartingFrom(bucketUri.resolve(dirObject));
+
+ // Can't asset that this is the only object we get in response, other object lexicographically
+ // higher would also come in response.
+ // Only thing we can assert strongly is, list would start with the files created in this
+ // directory.
+ for (int i = 0; i < filesCount; i++) {
+ assertThat(getURIs(fileInfos).get(i)).isEqualTo(sortedURI.get(i));
+ }
+
+ // No item info calls are made with offset based api
+ assertThat(gcsRequestsTracker.getAllRequestStrings())
+ .containsExactly(listRequestWithStartOffset(bucketName, dirObject, /* pageToken= */ null));
+ }
+
+ @Test
+ public void listFileInfoStartingFrom_filter_directory() throws Exception {
+ gcsFs = newGcsFs(newGcsFsOptions().build());
+ String bucketName = gcsfsIHelper.sharedBucketName1;
+ URI bucketUri = new URI("gs://" + bucketName + "/");
+ String dirObject = getTestResource() + "/";
+ String objectName = dirObject + "I_am_file";
+ gcsfsIHelper.createObjects(bucketName, dirObject);
+ gcsfsIHelper.createObjects(bucketName, objectName);
+
+ List fileInfos = gcsFs.listFileInfoStartingFrom(bucketUri.resolve(dirObject));
+
+ // Can't assert that this is the only object we get in response, other object lexicographically
+ // higher would also come in response.
+ // Only thing we can assert strongly is, list would start with the files created in this
+ // directory.
+ assertThat(fileInfos.get(0).getPath()).isEqualTo(bucketUri.resolve(objectName));
+ // No item info calls are made with offset based api
+ assertThat(gcsRequestsTracker.getAllRequestStrings())
+ .containsExactly(listRequestWithStartOffset(bucketName, dirObject, /* pageToken= */ null));
+
+ // directory object was filtered
+ FileInfo dirFileInfo =
+ gcsFs.getFileInfoWithHint(bucketUri.resolve(dirObject), PathTypeHint.FILE);
+ // directory as an object do exists.
+ assertTrue(dirFileInfo.isDirectory());
+ // but directory objects are filtered out and only files are returned.
+ assertThat(getURIs(fileInfos)).doesNotContain(bucketUri.resolve(dirObject));
+ }
+
@Test
public void delete_file_sequential() throws Exception {
gcsFs = newGcsFs(newGcsFsOptions().setStatusParallelEnabled(false).build());
@@ -1156,6 +1293,7 @@ private void delete_directory_hns_optimization(boolean parallelStatus) throws Ex
"bucket,name,generation",
/* maxResults= */ 2,
/* pageToken= */ null,
+ /* startOffset */ null,
/* includeFoldersAsPrefixes= */ true),
getRequestString(bucketName, dirObject + "/"),
deleteRequestString(bucketName, dirObject + "/d1/", /* generationId= */ 1));
@@ -1722,6 +1860,10 @@ private static GoogleCloudStorageFileSystemOptions.Builder newGcsFsOptions() {
return GoogleCloudStorageFileSystemOptions.builder().setCloudStorageOptions(gcsOptions);
}
+ private static List getURIs(List fileInfos) {
+ return fileInfos.stream().map(FileInfo::getPath).collect(toList());
+ }
+
protected abstract GoogleCloudStorageFileSystem newGcsFs(
GoogleCloudStorageFileSystemOptions gcsfsOptions) throws IOException;
}
diff --git a/gcsio/src/test/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageIntegrationHelper.java b/gcsio/src/test/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageIntegrationHelper.java
index c8f7ed2511..577861ecc4 100644
--- a/gcsio/src/test/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageIntegrationHelper.java
+++ b/gcsio/src/test/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageIntegrationHelper.java
@@ -28,14 +28,10 @@
import com.google.cloud.hadoop.gcsio.integration.GoogleCloudStorageTestHelper.TestBucketHelper;
import com.google.common.collect.Iterables;
import com.google.common.flogger.GoogleLogger;
-import java.io.BufferedReader;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
-import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
-import java.net.HttpURLConnection;
import java.net.URI;
-import java.net.URL;
import java.nio.ByteBuffer;
import java.nio.channels.SeekableByteChannel;
import java.nio.channels.WritableByteChannel;
@@ -534,27 +530,28 @@ public String createUniqueZonalOrRegionalBucket(String suffix, boolean isBucketZ
String zone = DEFAULT_ZONE; // Default zone
String region = DEFAULT_REGION; // Default region
- try {
- URL metadataServerUrl = new URL(METADATA_SERVER_ZONE_URL);
- HttpURLConnection connection = (HttpURLConnection) metadataServerUrl.openConnection();
- connection.setRequestProperty(METADATA_FLAVOR_HEADER, METADATA_FLAVOR_VALUE);
- connection.setConnectTimeout(METADATA_SERVER_TIMEOUT_MS); // 5-second connection timeout
- connection.setReadTimeout(METADATA_SERVER_TIMEOUT_MS); // 5-second read timeout
-
- try (BufferedReader reader =
- new BufferedReader(new InputStreamReader(connection.getInputStream()))) {
- String response = reader.readLine();
- // The response is in the format "projects/PROJECT_NUMBER/zones/ZONE"
- String[] parts = response.split("/");
- String fullZone = parts[parts.length - 1];
- zone = fullZone;
- region = fullZone.substring(0, fullZone.lastIndexOf('-'));
- }
- } catch (IOException e) {
- logger.atWarning().log(
- "Falling back to default region (%s) and zone (%s) because metadata server is unreachable.",
- region, zone);
- }
+ // try {
+ // URL metadataServerUrl = new URL(METADATA_SERVER_ZONE_URL);
+ // HttpURLConnection connection = (HttpURLConnection) metadataServerUrl.openConnection();
+ // connection.setRequestProperty(METADATA_FLAVOR_HEADER, METADATA_FLAVOR_VALUE);
+ // connection.setConnectTimeout(METADATA_SERVER_TIMEOUT_MS); // 5-second connection timeout
+ // connection.setReadTimeout(METADATA_SERVER_TIMEOUT_MS); // 5-second read timeout
+
+ // try (BufferedReader reader =
+ // new BufferedReader(new InputStreamReader(connection.getInputStream()))) {
+ // String response = reader.readLine();
+ // // The response is in the format "projects/PROJECT_NUMBER/zones/ZONE"
+ // String[] parts = response.split("/");
+ // String fullZone = parts[parts.length - 1];
+ // zone = fullZone;
+ // region = fullZone.substring(0, fullZone.lastIndexOf('-'));
+ // }
+ // } catch (IOException e) {
+ // logger.atWarning().log(
+ // "Falling back to default region (%s) and zone (%s) because metadata server is
+ // unreachable.",
+ // region, zone);
+ // }
CreateBucketOptions zonalBucketOptions =
CreateBucketOptions.builder()
diff --git a/gcsio/src/test/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageNewIntegrationTestBase.java b/gcsio/src/test/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageNewIntegrationTestBase.java
index 5307a641da..12ec92732b 100644
--- a/gcsio/src/test/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageNewIntegrationTestBase.java
+++ b/gcsio/src/test/java/com/google/cloud/hadoop/gcsio/GoogleCloudStorageNewIntegrationTestBase.java
@@ -16,6 +16,7 @@
package com.google.cloud.hadoop.gcsio;
+import static com.google.cloud.hadoop.gcsio.GoogleCloudStorage.LIST_MAX_RESULTS;
import static com.google.cloud.hadoop.gcsio.TrackingHttpRequestInitializer.batchRequestString;
import static com.google.cloud.hadoop.gcsio.TrackingHttpRequestInitializer.composeRequestString;
import static com.google.cloud.hadoop.gcsio.TrackingHttpRequestInitializer.copyRequestString;
@@ -24,6 +25,7 @@
import static com.google.cloud.hadoop.gcsio.TrackingHttpRequestInitializer.getMediaRequestString;
import static com.google.cloud.hadoop.gcsio.TrackingHttpRequestInitializer.getRequestString;
import static com.google.cloud.hadoop.gcsio.TrackingHttpRequestInitializer.listRequestString;
+import static com.google.cloud.hadoop.gcsio.TrackingHttpRequestInitializer.listRequestWithStartOffset;
import static com.google.cloud.hadoop.gcsio.TrackingHttpRequestInitializer.listRequestWithTrailingDelimiter;
import static com.google.cloud.hadoop.gcsio.TrackingHttpRequestInitializer.postRequestString;
import static com.google.cloud.hadoop.gcsio.TrackingHttpRequestInitializer.resumableUploadChunkRequestString;
@@ -33,6 +35,7 @@
import static java.nio.charset.StandardCharsets.UTF_8;
import static java.util.stream.Collectors.toList;
import static org.junit.Assert.assertThrows;
+import static org.junit.Assert.assertTrue;
import com.google.cloud.hadoop.util.RetryHttpInitializer;
import com.google.common.collect.ImmutableList;
@@ -43,12 +46,16 @@
import java.nio.ByteBuffer;
import java.nio.channels.Channels;
import java.nio.channels.SeekableByteChannel;
+import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import java.util.function.BiFunction;
import java.util.function.Function;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
import java.util.zip.GZIPOutputStream;
import org.junit.After;
import org.junit.Before;
@@ -75,7 +82,7 @@ public abstract class GoogleCloudStorageNewIntegrationTestBase {
@Rule public TestName name = new TestName();
protected TrackingHttpRequestInitializer gcsRequestsTracker;
- protected boolean isTracingSupported = false;
+ protected boolean isTracingSupported = true;
private GoogleCloudStorage gcs;
@@ -796,6 +803,259 @@ public void listObjectInfo_allMetadataFieldsCorrect() throws Exception {
objectId.getBucketName(), testDirName, /* pageToken= */ null));
}
+ @Test
+ public void listObjectInfoStartingFrom_negativeMaxResult() throws Exception {
+ gcs = createGoogleCloudStorage(gcsOptions);
+ String bucketName = gcsfsIHelper.sharedBucketName1;
+
+ String testDirName = name.getMethodName() + "/";
+
+ String fileName = testDirName + "I_am_file";
+ StorageResourceId fileResource = new StorageResourceId(bucketName, fileName);
+
+ gcsfsIHelper.gcs.createEmptyObject(fileResource);
+
+ int maxResults = -1;
+
+ ListObjectOptions listOptionsLimitResults =
+ ListObjectOptions.DEFAULT_USING_START_OFFSET.toBuilder().setMaxResults(maxResults).build();
+ gcs.listObjectInfoStartingFrom(bucketName, testDirName, listOptionsLimitResults);
+
+ assertThat(gcsRequestsTracker.getAllRequestStrings())
+ .containsExactly(
+ listRequestWithStartOffset(
+ // maxResults is override with default value
+ bucketName,
+ testDirName,
+ /* pageToken= */ null,
+ /* maxResults= */ (int) LIST_MAX_RESULTS));
+ }
+
+ @Test
+ public void listObjectInfoStartingFrom_startOffsetNotObject() throws Exception {
+ gcs = createGoogleCloudStorage(gcsOptions);
+ String bucketName = gcsfsIHelper.sharedBucketName1;
+
+ String testDirName = name.getMethodName() + "/";
+
+ String fileName = testDirName + "I_am_file";
+ StorageResourceId fileResource = new StorageResourceId(bucketName, fileName);
+
+ gcsfsIHelper.gcs.createEmptyObject(fileResource);
+ String startOffset = testDirName.substring(5, testDirName.length() - 1);
+
+ List listedObjects =
+ gcs.listObjectInfoStartingFrom(bucketName, startOffset);
+
+ verifyListedFilesOrder(listedObjects, startOffset);
+
+ assertThat(gcsRequestsTracker.getAllRequestStrings())
+ .containsExactly(
+ listRequestWithStartOffset(bucketName, startOffset, /* pageToken= */ null));
+ }
+
+ @Test
+ public void listObjectInfoStartingFrom_sortedListFiles() throws Exception {
+
+ gcs = createGoogleCloudStorage(gcsOptions);
+ String bucketName = gcsfsIHelper.sharedBucketName1;
+
+ String testDirName = name.getMethodName() + "/";
+ List resources = new ArrayList<>();
+ int filesCount = 50;
+ for (int i = 0; i < filesCount; i++) {
+ String uniqueFilename = UUID.randomUUID().toString().replaceAll("-", "").substring(0, 12);
+ StorageResourceId file = new StorageResourceId(bucketName, testDirName + uniqueFilename);
+ gcsfsIHelper.gcs.createEmptyObject(file);
+ resources.add(file);
+ }
+ List sortedResources =
+ resources.stream()
+ .sorted(Comparator.comparing(StorageResourceId::getObjectName))
+ .collect(Collectors.toList());
+ assertTrue(sortedResources.size() == resources.size());
+
+ List listedObjects =
+ gcs.listObjectInfoStartingFrom(bucketName, testDirName);
+ verifyListedFilesOrder(listedObjects, testDirName);
+ // Can't asset that this is the only object we get in response, other object lexicographically
+ // higher would also come in response.
+ // Only thing we can assert strongly is, list would start with the files created in this
+ // directory.
+ for (int i = 0; i < filesCount; i++) {
+ assertThat(getObjectNames(listedObjects).get(i))
+ .isEqualTo(sortedResources.get(i).getObjectName());
+ }
+
+ assertThat(gcsRequestsTracker.getAllRequestStrings())
+ .containsExactly(
+ listRequestWithStartOffset(bucketName, testDirName, /* pageToken= */ null));
+ }
+
+ @Test
+ public void listObjectInfoStartingFrom_invalidListOptions() throws Exception {
+ String bucketName = gcsfsIHelper.sharedBucketName1;
+ gcs = createGoogleCloudStorage(gcsOptions);
+
+ String dir = name.getMethodName() + "2/";
+
+ String fileName = dir + "I_am_file";
+ StorageResourceId fileResource = new StorageResourceId(bucketName, fileName);
+
+ gcsfsIHelper.gcs.createEmptyObject(fileResource);
+
+ assertThrows(
+ IOException.class,
+ () ->
+ gcs.listObjectInfoStartingFrom(
+ gcsfsIHelper.sharedBucketName1, dir, ListObjectOptions.DEFAULT));
+ }
+
+ @Test
+ public void listObjectInfoStartingFrom_multipleListCalls() throws Exception {
+
+ String bucketName = gcsfsIHelper.sharedBucketName1;
+ gcs = createGoogleCloudStorage(gcsOptions);
+
+ String dir1 = name.getMethodName() + "1/";
+ StorageResourceId dirResource1 = new StorageResourceId(bucketName, dir1);
+
+ String dir2 = name.getMethodName() + "2/";
+ StorageResourceId dirResource2 = new StorageResourceId(bucketName, dir2);
+
+ String fileName = dir2 + "I_am_file";
+ StorageResourceId fileResource = new StorageResourceId(bucketName, fileName);
+
+ gcsfsIHelper.gcs.createEmptyObject(dirResource1);
+ gcsfsIHelper.gcs.createEmptyObject(dirResource2);
+ gcsfsIHelper.gcs.createEmptyObject(fileResource);
+ int maxResults = 2;
+
+ ListObjectOptions listOptionsLimitResults =
+ ListObjectOptions.DEFAULT_USING_START_OFFSET.toBuilder().setMaxResults(maxResults).build();
+
+ List listedObjects =
+ gcs.listObjectInfoStartingFrom(
+ gcsfsIHelper.sharedBucketName1, dir1, listOptionsLimitResults);
+ assertThat(getObjectNames(listedObjects)).doesNotContain(dirResource1);
+ assertThat(getObjectNames(listedObjects)).doesNotContain(dirResource2);
+
+ // Can't asset that this is the only object we get in response, other object lexicographically
+ // higher would also come in response.
+ // Only thing we can assert strongly is, list would start with the files created in this
+ // directory.
+ assertThat(getObjectNames(listedObjects).get(0)).isEqualTo(fileResource.getObjectName());
+
+ assertThat(gcsRequestsTracker.getAllRequestStrings())
+ .containsExactly(
+ listRequestWithStartOffset(bucketName, dir1, /* pageToken= */ null, maxResults),
+ listRequestWithStartOffset(bucketName, dir1, /* pageToken= */ "token_1", maxResults));
+ }
+
+ @Test
+ public void listObjectInfoStartingFrom_filterDirObjects() throws Exception {
+ gcs = createGoogleCloudStorage(gcsOptions);
+
+ String testDirName = name.getMethodName() + "/";
+ StorageResourceId objectId1 =
+ new StorageResourceId(gcsfsIHelper.sharedBucketName1, testDirName + "object1");
+ String subDirName = testDirName + "subDir/";
+ StorageResourceId directoryResource =
+ new StorageResourceId(gcsfsIHelper.sharedBucketName1, subDirName);
+ StorageResourceId objectId2 =
+ new StorageResourceId(gcsfsIHelper.sharedBucketName1, subDirName + "subObject1");
+
+ gcsfsIHelper.gcs.createEmptyObject(objectId1);
+ gcsfsIHelper.gcs.createEmptyObject(directoryResource);
+ gcsfsIHelper.gcs.createEmptyObject(objectId2);
+
+ List listedObjects =
+ gcs.listObjectInfoStartingFrom(gcsfsIHelper.sharedBucketName1, testDirName);
+ verifyListedFilesOrder(listedObjects, testDirName);
+
+ assertThat(getObjectNames(listedObjects)).doesNotContain(directoryResource);
+
+ // Can't asset that this is the only object we get in response, other object lexicographically
+ // higher would also come in response.
+ // Only thing we can assert strongly is, list would start with the files created in this
+ // directory.
+ assertThat(getObjectNames(listedObjects).get(0)).isEqualTo(objectId1.getObjectName());
+ assertThat(getObjectNames(listedObjects).get(1)).isEqualTo(objectId2.getObjectName());
+
+ assertThat(gcsRequestsTracker.getAllRequestStrings())
+ .containsExactly(
+ listRequestWithStartOffset(
+ objectId1.getBucketName(), testDirName, /* pageToken= */ null));
+ }
+
+ @Test
+ public void listObjectInfoStartingFrom_listSubDirFiles() throws Exception {
+
+ gcs = createGoogleCloudStorage(gcsOptions);
+
+ String testDirName = name.getMethodName() + "/";
+ StorageResourceId objectId1 =
+ new StorageResourceId(gcsfsIHelper.sharedBucketName1, testDirName + "object1");
+ String subDirName = testDirName + "subDir/";
+ StorageResourceId objectId2 =
+ new StorageResourceId(gcsfsIHelper.sharedBucketName1, subDirName + "subObject1");
+
+ gcsfsIHelper.gcs.createEmptyObject(objectId2);
+ gcsfsIHelper.gcs.createEmptyObject(objectId1);
+
+ List listedObjects =
+ gcs.listObjectInfoStartingFrom(gcsfsIHelper.sharedBucketName1, testDirName);
+ verifyListedFilesOrder(listedObjects, testDirName);
+ // Can't asset that this is the only object we get in response, other object lexicographically
+ // higher would also come in response.
+ // Only thing we can assert strongly is, list would start with the files created in this
+ // directory.
+ assertThat(getObjectNames(listedObjects).get(0)).isEqualTo(objectId1.getObjectName());
+ assertThat(getObjectNames(listedObjects).get(1)).isEqualTo(objectId2.getObjectName());
+
+ assertThat(gcsRequestsTracker.getAllRequestStrings())
+ .containsExactly(
+ listRequestWithStartOffset(
+ objectId1.getBucketName(), testDirName, /* pageToken= */ null));
+ }
+
+ @Test
+ public void listObjectInfoStartingFrom_allMetadataFieldsCorrect() throws Exception {
+ gcs = createGoogleCloudStorage(gcsOptions);
+
+ String testDirName = name.getMethodName() + "/";
+ StorageResourceId objectId2 =
+ new StorageResourceId(gcsfsIHelper.sharedBucketName1, testDirName + "object2");
+ StorageResourceId objectId1 =
+ new StorageResourceId(gcsfsIHelper.sharedBucketName1, testDirName + "object1");
+
+ // Create gzipped file so Content-Encoding will be not null
+ CreateObjectOptions createOptions =
+ GZIP_CREATE_OPTIONS.toBuilder()
+ .setMetadata(ImmutableMap.of("test-key", "val".getBytes(UTF_8)))
+ .build();
+
+ gcsfsIHelper.gcs.createEmptyObject(objectId2, createOptions);
+ gcsfsIHelper.gcs.createEmptyObject(objectId1, createOptions);
+
+ List listedObjects =
+ gcs.listObjectInfoStartingFrom(gcsfsIHelper.sharedBucketName1, testDirName);
+
+ verifyListedFilesOrder(listedObjects, testDirName);
+ // Can't asset that this is the only object we get in response, other object lexicographically
+ // higher would also come in response.
+ // Only thing we can assert strongly is, list would start with the files created in this
+ // directory.
+ assertThat(getObjectNames(listedObjects).get(0)).isEqualTo(objectId1.getObjectName());
+ assertThat(getObjectNames(listedObjects).get(1)).isEqualTo(objectId2.getObjectName());
+
+ assertObjectFields(objectId1, listedObjects.get(0));
+ assertThat(gcsRequestsTracker.getAllRequestStrings())
+ .containsExactly(
+ listRequestWithStartOffset(
+ objectId1.getBucketName(), testDirName, /* pageToken= */ null));
+ }
+
@Test
public void getItemInfo_allMetadataFieldsCorrect() throws Exception {
gcs = createGoogleCloudStorage(gcsOptions);
@@ -1377,6 +1637,26 @@ private String createObjectsInTestDirWithoutSubdirs(String bucketName, String...
return testDir;
}
+ private void verifyListedFilesOrder(
+ List listedObjects, String startOffset) {
+ // provided item list is sorted
+ if (listedObjects.size() > 1) {
+ assertTrue(
+ IntStream.range(0, listedObjects.size() - 1)
+ .allMatch(
+ i ->
+ listedObjects
+ .get(i)
+ .getObjectName()
+ .compareTo(listedObjects.get(i + 1).getObjectName())
+ <= 0));
+ }
+
+ assertTrue(
+ IntStream.range(0, listedObjects.size() - 1)
+ .allMatch(i -> listedObjects.get(i).getObjectName().compareTo(startOffset) >= 0));
+ }
+
protected abstract GoogleCloudStorage createGoogleCloudStorage(GoogleCloudStorageOptions options)
throws IOException;
diff --git a/gcsio/src/test/java/com/google/cloud/hadoop/gcsio/TrackingHttpRequestInitializer.java b/gcsio/src/test/java/com/google/cloud/hadoop/gcsio/TrackingHttpRequestInitializer.java
index 3a2f6658d3..4810d2e45b 100644
--- a/gcsio/src/test/java/com/google/cloud/hadoop/gcsio/TrackingHttpRequestInitializer.java
+++ b/gcsio/src/test/java/com/google/cloud/hadoop/gcsio/TrackingHttpRequestInitializer.java
@@ -514,6 +514,48 @@ public static String listRequestWithTrailingDelimiter(
bucket, /* includeTrailingDelimiter= */ true, prefix, objectFields, maxResults, pageToken);
}
+ public static String listRequestWithStartOffset(
+ String bucket, String startOffset, String pageToken, int maxResults) {
+ return listRequestString(
+ bucket,
+ /* flatList */ true,
+ /* includeTrailingDelimiter */ null,
+ /* prefix */ null,
+ OBJECT_FIELDS,
+ maxResults,
+ pageToken,
+ startOffset,
+ /* includeFoldersAsPrefixes= */ false);
+ }
+
+ public static String listRequestWithStartOffset(
+ String bucket, String startOffset, String pageToken) {
+ return listRequestString(
+ bucket,
+ /* flatList */ true,
+ /* includeTrailingDelimiter */ null,
+ /* prefix */ null,
+ OBJECT_FIELDS,
+ GoogleCloudStorageOptions.DEFAULT.getMaxListItemsPerCall(),
+ pageToken,
+ startOffset,
+ /* includeFoldersAsPrefixes= */ false);
+ }
+
+ public static String listRequestWithStartOffset(
+ String bucket, String startOffset, String pageToken, String fields) {
+ return listRequestString(
+ bucket,
+ /* flatList */ true,
+ /* includeTrailingDelimiter */ null,
+ /* prefix */ null,
+ fields,
+ GoogleCloudStorageOptions.DEFAULT.getMaxListItemsPerCall(),
+ pageToken,
+ startOffset,
+ /* includeFoldersAsPrefixes= */ false);
+ }
+
public static String listRequestString(
String bucket,
Boolean includeTrailingDelimiter,
@@ -544,6 +586,7 @@ public static String listRequestString(
objectFields,
maxResults,
pageToken,
+ /* startOffset */ null,
/* includeFoldersAsPrefixes= */ false);
}
@@ -562,6 +605,7 @@ public static String listRequestString(
objectFields,
GoogleCloudStorageOptions.DEFAULT.getMaxListItemsPerCall(),
pageToken,
+ /* startOffset */ null,
/* includeFoldersAsPrefixes= */ false);
}
@@ -573,6 +617,7 @@ public static String listRequestString(
String objectFields,
int maxResults,
String pageToken,
+ String startOffset,
Boolean includeFoldersAsPrefixes) {
String baseUrl = String.format(LIST_REQUEST_FORMAT, bucket);
@@ -591,6 +636,8 @@ public static String listRequestString(
addIfNotnull(params, "pageToken", pageToken);
addIfNotnull(params, "prefix", prefix);
+ addIfNotnull(params, "startOffset", startOffset);
+
return baseUrl + "?" + String.join("&", params);
}
diff --git a/gcsio/src/test/java/com/google/cloud/hadoop/gcsio/integration/GoogleCloudStorageImplTest.java b/gcsio/src/test/java/com/google/cloud/hadoop/gcsio/integration/GoogleCloudStorageImplTest.java
index dd0a23a65a..efaddf7c7c 100644
--- a/gcsio/src/test/java/com/google/cloud/hadoop/gcsio/integration/GoogleCloudStorageImplTest.java
+++ b/gcsio/src/test/java/com/google/cloud/hadoop/gcsio/integration/GoogleCloudStorageImplTest.java
@@ -539,6 +539,36 @@ public void conflictingWrites_noOverwrite_lastFails() throws IOException {
trackingGcs2.delegate.close();
}
+ @Test
+ public void listObjectInfoStartingFrom_lexicographicalOrdrer() throws IOException {
+ String testDirectory = name.getMethodName();
+
+ TrackingStorageWrapper trackingGcs =
+ newTrackingGoogleCloudStorage(GCS_OPTIONS);
+ StorageResourceId resourceId3 = new StorageResourceId(testBucket, testDirectory + "/object3");
+ StorageResourceId resourceId2 = new StorageResourceId(testBucket, testDirectory + "/object2");
+ StorageResourceId resourceId1 = new StorageResourceId(testBucket, testDirectory + "/object1");
+
+ trackingGcs.delegate.createEmptyObject(resourceId3);
+ trackingGcs.delegate.createEmptyObject(resourceId2);
+ trackingGcs.delegate.createEmptyObject(resourceId1);
+
+ // Verify that directory object not listed
+ List listedItems =
+ helperGcs.listObjectInfoStartingFrom(testBucket, testDirectory + "/");
+ assertThat(listedItems.stream().map(GoogleCloudStorageItemInfo::getResourceId).toArray())
+ .asList()
+ .containsExactlyElementsIn(
+ ImmutableList.builder()
+ .add(resourceId1)
+ .add(resourceId2)
+ .add(resourceId3)
+ .build()
+ .toArray())
+ .inOrder();
+ trackingGcs.delegate.close();
+ }
+
@Test
public void create_doesNotRepairImplicitDirectories() throws IOException {
String testDirectory = name.getMethodName();
diff --git a/pom.xml b/pom.xml
index 7beae6bbec..060b4d9a66 100644
--- a/pom.xml
+++ b/pom.xml
@@ -25,7 +25,7 @@
com.google.cloud.bigdataoss
bigdataoss-parent
- 3.0.0-SNAPSHOT
+ 4.0.0-SNAPSHOT
pom
https://github.com/GoogleCloudDataproc/hadoop-connectors
@@ -68,7 +68,7 @@
true
- 3.0.0-SNAPSHOT
+ 4.0.0-SNAPSHOT
1.10.4
26.69.0
diff --git a/util-hadoop/pom.xml b/util-hadoop/pom.xml
index e336520381..afd7f5491f 100644
--- a/util-hadoop/pom.xml
+++ b/util-hadoop/pom.xml
@@ -21,14 +21,14 @@
com.google.cloud.bigdataoss
bigdataoss-parent
- 3.0.0-SNAPSHOT
+ 4.0.0-SNAPSHOT
../pom.xml
util-hadoop
util-hadoop
- 3.0.0-SNAPSHOT
+ 4.0.0-SNAPSHOT
diff --git a/util/pom.xml b/util/pom.xml
index 8b41bdbbe2..50b179a9dd 100644
--- a/util/pom.xml
+++ b/util/pom.xml
@@ -21,14 +21,14 @@
com.google.cloud.bigdataoss
bigdataoss-parent
- 3.0.0-SNAPSHOT
+ 4.0.0-SNAPSHOT
../pom.xml
util
util
- 3.0.0-SNAPSHOT
+ 4.0.0-SNAPSHOT