Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -871,6 +871,34 @@ public FileStatus[] listStatus(Path hadoopPath) throws IOException {
});
}

public FileStatus[] listStatusStartingFrom(Path startFrom) throws IOException {
return trackDurationWithTracing(
instrumentation,
globalStorageStatistics,
GhfsStatistic.INVOCATION_LIST_STATUS,
startFrom,
traceFactory,
() -> {
checkArgument(startFrom != null, "start offset path must not be null");

checkOpen();

logger.atFiner().log("listStatusStartingFrom(hadoopPath: %s)", startFrom);

URI gcsPath = getGcsPath(startFrom);
List<FileStatus> status;

List<FileInfo> fileInfos = getGcsFs().listFileInfoStartingFrom(gcsPath, LIST_OPTIONS);
status = new ArrayList<>(fileInfos.size());
String userName = getUgiUserName();
for (FileInfo fileInfo : fileInfos) {
status.add(getGoogleHadoopFileStatus(fileInfo, userName));
}
incrementStatistic(GhfsStatistic.INVOCATION_LIST_STATUS_RESULT_SIZE, status.size());
return status.toArray(new FileStatus[0]);
});
}

@Override
public boolean mkdirs(Path hadoopPath, FsPermission permission) throws IOException {
return trackDurationWithTracing(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -778,6 +778,16 @@ public void listStatus_throwsExceptionWhenHadoopPathNull() throws IOException {
assertThat(exception).hasMessageThat().startsWith("hadoopPath must not be null");
}

// TODO: already part of UT, need not add here.
@Test
public void listStatusStartingFrom_throwsExceptionWhenHadoopPathNull() throws IOException {
GoogleHadoopFileSystem myGhfs = createInMemoryGoogleHadoopFileSystem();
IllegalArgumentException exception =
assertThrows(
IllegalArgumentException.class, () -> myGhfs.listStatusStartingFrom((Path) null));
assertThat(exception).hasMessageThat().startsWith("start offset path must not be null");
}

@Test
public void setWorkingDirectory_throwsExceptionWhenHadoopPathNull() throws IOException {
GoogleHadoopFileSystem myGhfs = createInMemoryGoogleHadoopFileSystem();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,12 @@
import static com.google.common.truth.Truth.assertThat;
import static com.google.common.truth.Truth.assertWithMessage;

import com.google.cloud.hadoop.gcsio.*;
import com.google.cloud.hadoop.gcsio.GoogleCloudStorage;
import com.google.cloud.hadoop.gcsio.GoogleCloudStorageFileSystem;
import com.google.cloud.hadoop.gcsio.GoogleCloudStorageFileSystemOptions.ClientType;
import com.google.cloud.hadoop.gcsio.GoogleCloudStorageOptions;
import com.google.cloud.hadoop.gcsio.GoogleCloudStorageStatistics;
import com.google.cloud.hadoop.gcsio.StorageResourceId;
import java.io.File;
import java.io.IOException;
import java.io.Writer;
Expand All @@ -34,15 +38,20 @@
import java.net.URISyntaxException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import java.util.stream.Collectors;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.StorageStatistics;
import org.apache.hadoop.fs.permission.FsPermission;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TestName;

/**
* Abstract base class for test suites targeting variants of GoogleHadoopFileSystem via the Hadoop
Expand Down Expand Up @@ -74,6 +83,16 @@ protected static Configuration loadConfig(Configuration config, ClientType stora
return newConfig;
}

@Rule
public TestName name =
new TestName() {
// With parametrization method name will get [index] appended in their name.
@Override
public String getMethodName() {
return super.getMethodName().replaceAll("[\\[,\\]]", "");
}
};

// -----------------------------------------------------------------------------------------
// Tests that vary according to the GHFS variant, but which we want to make sure get tested.
// -----------------------------------------------------------------------------------------
Expand Down Expand Up @@ -543,6 +562,31 @@ public void provideCoverageForUnmodifiedMethods() throws IOException {
}
}

@Test
public void listStatusStartingFrom_sortedFileStatus() throws Exception {
int fileCount = 10;
List<Path> objectPath = new ArrayList<>();
URI dirObjectURI = new URI(name.getMethodName() + "/");
for (int i = 0; i < fileCount; i++) {
// create a random path file
Path filePath =
ghfsHelper.castAsHadoopPath(dirObjectURI.resolve(UUID.randomUUID().toString()));
ghfsHelper.writeFile(filePath, UUID.randomUUID().toString(), 1, /* overwrite= */ false);
objectPath.add(filePath);
}
List<Path> sortedPaths = objectPath.stream().sorted().collect(Collectors.toList());

FileStatus[] fileStatuses =
invokeListStatusStartingFromMethod(ghfsHelper.castAsHadoopPath(dirObjectURI));
// Can't asset that this is the only object we get in response, other object lexicographically
// higher would also come in response.
// Only thing we can assert strongly is, list would start with the files created in this
// directory.
for (int i = 0; i < fileCount; i++) {
assertThat(fileStatuses[i].getPath()).isEqualTo(sortedPaths.get(i));
}
}

@Test
public void testGetFileStatusWithHint() throws Exception {
Path hadoopPath = ghfsHelper.castAsHadoopPath(getTempFilePath());
Expand Down Expand Up @@ -602,6 +646,19 @@ private void invokeGetFileStatusWithHint(
assertThat(getStatisticValue(GCS_METADATA_REQUEST)).isEqualTo(numTimes);
}

private Method getListStatusStartingFromMethod() throws NoSuchMethodException {
return ghfs.getClass().getMethod("listStatusStartingFrom", Path.class);
}

private FileStatus[] invokeListStatusStartingFromMethod(Path startFrom) throws Exception {
resetStats();

FileStatus[] fileStatus =
(FileStatus[]) getListStatusStartingFromMethod().invoke(ghfs, startFrom);

return fileStatus;
}

private Long getStatisticValue(GoogleCloudStorageStatistics stat) {
return getStatistics().getLong(stat.getSymbol());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,12 @@ public List<GoogleCloudStorageItemInfo> listObjectInfo(
return delegate.listObjectInfo(bucketName, objectNamePrefix, listOptions);
}

@Override
public List<GoogleCloudStorageItemInfo> listObjectInfoStartingFrom(
String bucketName, String startOffset, ListObjectOptions listOptions) throws IOException {
return delegate.listObjectInfoStartingFrom(bucketName, startOffset, listOptions);
}

@Override
public ListPage<GoogleCloudStorageItemInfo> listObjectInfoPage(
String bucketName, String objectNamePrefix, ListObjectOptions listOptions, String pageToken)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,15 @@ public interface GoogleCloudStorage {
*/
String PATH_DELIMITER = "/";

/** Value indicating all objects should be returned from GCS, no limit. */
/**
* Value indicating all objects should be returned from GCSFileSystem. No limit i.e. get all
* possible items. Used while listing all files in a directory
*/
long MAX_RESULTS_UNLIMITED = -1;

/** Value indicates the maxResult returned by gcs List API. */
long LIST_MAX_RESULTS = 5000L;

/** The maximum number of objects that can be composed in one operation. */
int MAX_COMPOSE_OBJECTS = 32;

Expand Down Expand Up @@ -351,6 +357,12 @@ default List<GoogleCloudStorageItemInfo> listObjectInfo(
return listObjectInfo(bucketName, objectNamePrefix, ListObjectOptions.DEFAULT);
}

default List<GoogleCloudStorageItemInfo> listObjectInfoStartingFrom(
String bucketName, String startOffset) throws IOException {
return listObjectInfoStartingFrom(
bucketName, startOffset, ListObjectOptions.DEFAULT_USING_START_OFFSET);
}

/**
* Lists {@link GoogleCloudStorageItemInfo} of objects contained in the given bucket and whose
* names begin with the given prefix.
Expand Down Expand Up @@ -379,6 +391,9 @@ default List<GoogleCloudStorageItemInfo> listObjectInfo(
List<GoogleCloudStorageItemInfo> listObjectInfo(
String bucketName, String objectNamePrefix, ListObjectOptions listOptions) throws IOException;

List<GoogleCloudStorageItemInfo> listObjectInfoStartingFrom(
String bucketName, String startOffset, ListObjectOptions listOptions) throws IOException;

/**
* The same semantics as {@link #listObjectInfo}, but returns only result of single list request
* (1 page).
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,10 @@ default List<FileInfo> listFileInfo(URI path) throws IOException {
return listFileInfo(path, ListFileOptions.DEFAULT);
}

default List<FileInfo> listFileInfoStartingFrom(URI startsFrom) throws IOException {
return listFileInfoStartingFrom(startsFrom, ListFileOptions.DEFAULT);
}

/**
* If the given path points to a directory then the information about its children is returned,
* otherwise information about the given file is returned.
Expand All @@ -253,6 +257,15 @@ default List<FileInfo> listFileInfo(URI path) throws IOException {
*/
List<FileInfo> listFileInfo(URI path, ListFileOptions listOptions) throws IOException;

/**
* Return all the files which are lexicographically equal or higher than the path.
*
* @param startsFrom Given path.
* @return Information about files.
*/
List<FileInfo> listFileInfoStartingFrom(URI startsFrom, ListFileOptions listOptions)
throws IOException;

/**
* Returns the list of folder resources in the prefix. It lists all the folder resources
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1014,6 +1014,33 @@ public List<FileInfo> listFileInfo(URI path, ListFileOptions listOptions) throws
return fileInfos;
}

public List<FileInfo> listFileInfoStartingFrom(URI startsFrom, ListFileOptions listOptions)
throws IOException {
checkNotNull(startsFrom, "start Offset can't be null");
logger.atFiner().log("listFileInfoStartingFrom(startsFrom: %s)", startsFrom);

StorageResourceId startOffsetPathId =
StorageResourceId.fromUriPath(startsFrom, /* allowEmptyObjectName= */ true);

checkArgument(
!startOffsetPathId.isRoot(),
"provided start offset shouldn't be root but an object path %s",
startsFrom);

List<GoogleCloudStorageItemInfo> itemsInfo =
gcs.listObjectInfoStartingFrom(
startOffsetPathId.getBucketName(),
startOffsetPathId.getObjectName(),
updateListObjectOptions(
ListObjectOptions.builder()
.setMaxResults(options.getCloudStorageOptions().getMaxListItemsPerCall())
.setIncludePrefix(false)
.setDelimiter(null)
.build(),
listOptions));
return FileInfo.fromItemInfos(itemsInfo);
}

@Override
public FileInfo getFileInfo(URI path) throws IOException {
checkArgument(path != null, "path must not be null");
Expand Down
Loading