Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions coverage/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,13 @@
<parent>
<groupId>com.google.cloud.bigdataoss</groupId>
<artifactId>bigdataoss-parent</artifactId>
<version>3.0.0-SNAPSHOT</version>
<version>4.0.0-SNAPSHOT</version>
</parent>

<name>coverage</name>

<artifactId>coverage</artifactId>
<version>3.0.0-SNAPSHOT</version>
<version>4.0.0-SNAPSHOT</version>
<packaging>pom</packaging>

<profiles>
Expand Down
6 changes: 6 additions & 0 deletions gcs/CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
# Release Notes

## Next

1. Upgrade hadoop versionto `3.4.2`
2. Add bidi-support in connector

1. Add listStatusStartingFrom API.

1. Add AUTO_RANDOM as new fadvise mode.

1. Add getFileStatusWithHint() API
Expand Down
55 changes: 25 additions & 30 deletions gcs/dependency-reduced-pom.xml
Original file line number Diff line number Diff line change
@@ -1,30 +1,14 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Copyright 2025 Google LLC

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<parent>
<artifactId>bigdataoss-parent</artifactId>
<groupId>com.google.cloud.bigdataoss</groupId>
<version>3.1.4-SNAPSHOT</version>
<version>3.0.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>gcs-connector</artifactId>
<name>gcs-connector</name>
<version>3.1.4-SNAPSHOT</version>
<version>3.0.0-SNAPSHOT</version>
<description>An implementation of org.apache.hadoop.fs.FileSystem targeting Google Cloud Storage</description>
<build>
<plugins>
Expand Down Expand Up @@ -103,7 +87,6 @@
<include>io.grpc</include>
<include>io.opencensus</include>
<include>io.opentelemetry</include>
<include>io.opentelemetry.api</include>
<include>io.opentelemetry.contrib</include>
<include>io.opentelemetry.semconv</include>
<include>io.perfmark</include>
Expand Down Expand Up @@ -225,18 +208,18 @@
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client-api</artifactId>
<version>3.3.6</version>
<version>3.4.2</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>3.3.6</version>
<version>3.4.2</version>
<classifier>tests</classifier>
<scope>test</scope>
<exclusions>
<exclusion>
<artifactId>hadoop-shaded-protobuf_3_7</artifactId>
<artifactId>hadoop-shaded-protobuf_3_25</artifactId>
<groupId>org.apache.hadoop.thirdparty</groupId>
</exclusion>
<exclusion>
Expand Down Expand Up @@ -264,8 +247,8 @@
<groupId>commons-net</groupId>
</exclusion>
<exclusion>
<artifactId>commons-collections</artifactId>
<groupId>commons-collections</groupId>
<artifactId>commons-collections4</artifactId>
<groupId>org.apache.commons</groupId>
</exclusion>
<exclusion>
<artifactId>javax.servlet-api</artifactId>
Expand Down Expand Up @@ -307,6 +290,10 @@
<artifactId>jersey-json</artifactId>
<groupId>com.github.pjfanning</groupId>
</exclusion>
<exclusion>
<artifactId>jettison</artifactId>
<groupId>org.codehaus.jettison</groupId>
</exclusion>
<exclusion>
<artifactId>jersey-server</artifactId>
<groupId>com.sun.jersey</groupId>
Expand All @@ -315,10 +302,6 @@
<artifactId>reload4j</artifactId>
<groupId>ch.qos.reload4j</groupId>
</exclusion>
<exclusion>
<artifactId>commons-beanutils</artifactId>
<groupId>commons-beanutils</groupId>
</exclusion>
<exclusion>
<artifactId>commons-configuration2</artifactId>
<groupId>org.apache.commons</groupId>
Expand Down Expand Up @@ -359,6 +342,14 @@
<artifactId>zookeeper</artifactId>
<groupId>org.apache.zookeeper</groupId>
</exclusion>
<exclusion>
<artifactId>netty-handler</artifactId>
<groupId>io.netty</groupId>
</exclusion>
<exclusion>
<artifactId>netty-transport-native-epoll</artifactId>
<groupId>io.netty</groupId>
</exclusion>
<exclusion>
<artifactId>metrics-core</artifactId>
<groupId>io.dropwizard.metrics</groupId>
Expand All @@ -367,6 +358,10 @@
<artifactId>commons-compress</artifactId>
<groupId>org.apache.commons</groupId>
</exclusion>
<exclusion>
<artifactId>bcprov-jdk18on</artifactId>
<groupId>org.bouncycastle</groupId>
</exclusion>
<exclusion>
<artifactId>kerb-core</artifactId>
<groupId>org.apache.kerby</groupId>
Expand All @@ -392,7 +387,7 @@
<dependency>
<groupId>com.google.cloud.bigdataoss</groupId>
<artifactId>gcsio</artifactId>
<version>3.1.4-SNAPSHOT</version>
<version>3.0.0-SNAPSHOT</version>
<type>test-jar</type>
<scope>test</scope>
</dependency>
Expand Down Expand Up @@ -429,7 +424,7 @@
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client-runtime</artifactId>
<version>3.3.6</version>
<version>3.4.2</version>
<scope>test</scope>
</dependency>
<dependency>
Expand Down
4 changes: 2 additions & 2 deletions gcs/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
<parent>
<groupId>com.google.cloud.bigdataoss</groupId>
<artifactId>bigdataoss-parent</artifactId>
<version>3.0.0-SNAPSHOT</version>
<version>4.0.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>

Expand All @@ -31,7 +31,7 @@
</description>

<artifactId>gcs-connector</artifactId>
<version>3.0.0-SNAPSHOT</version>
<version>4.0.0-SNAPSHOT</version>

<profiles>
<profile>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
package com.google.cloud.hadoop.fs.gcs;

import static com.google.cloud.hadoop.fs.gcs.GhfsStatistic.GCS_CONNECTOR_TIME;
import static com.google.cloud.hadoop.fs.gcs.GhfsStatistic.STREAM_READ_VECTORED_OPERATIONS;
import static com.google.cloud.hadoop.fs.gcs.GhfsStatistic.STREAM_READ_VECTORED_READ_COMBINED_RANGES;

import com.google.cloud.hadoop.gcsio.GoogleCloudStorageStatistics;
import java.util.Arrays;
Expand Down Expand Up @@ -58,6 +60,10 @@ public void reset() {
void increment(GhfsStatistic statistic, long count) {
if (statistic == GCS_CONNECTOR_TIME) {
Metric.HADOOP_API_TIME.increment(count);
} else if (statistic == STREAM_READ_VECTORED_OPERATIONS) {
Metric.STREAM_READ_VECTORED_COUNT.increment(count);
} else if (statistic == STREAM_READ_VECTORED_READ_COMBINED_RANGES) {
Metric.STREAM_READ_VECTORED_RANGE_COUNT.increment(count);
} else if (statistic.getIsHadoopApi()) {
Metric.HADOOP_API_COUNT.increment(count);
}
Expand Down Expand Up @@ -104,7 +110,9 @@ private enum Metric {
GCS_API_COUNT("gcsApiCount"),
GCS_API_TIME("gcsApiTime"),
BACKOFF_COUNT("backoffCount"),
BACKOFF_TIME("backoffTime");
BACKOFF_TIME("backoffTime"),
STREAM_READ_VECTORED_COUNT("readVectoredCount"),
STREAM_READ_VECTORED_RANGE_COUNT("readVectoredRangeCount");

private final String metricName;
private final ThreadLocalValue metricValue;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -878,6 +878,40 @@ public FileStatus[] listStatus(Path hadoopPath) throws IOException {
});
}

/**
* Gets FileStatus of all files which are lexicographically greater than and equal the provided
* path. It filters out any directory objects present in underneath storage.
*
* <p>This is an experimental API can change without notice.
*/
public FileStatus[] listStatusStartingFrom(Path startFrom) throws IOException {
return trackDurationWithTracing(
instrumentation,
globalStorageStatistics,
GhfsStatistic.INVOCATION_LIST_STATUS,
startFrom,
traceFactory,
() -> {
checkArgument(startFrom != null, "start offset path must not be null");

checkOpen();

logger.atFiner().log("listStatusStartingFrom(hadoopPath: %s)", startFrom);

URI gcsPath = getGcsPath(startFrom);
List<FileStatus> status;

List<FileInfo> fileInfos = getGcsFs().listFileInfoStartingFrom(gcsPath, LIST_OPTIONS);
status = new ArrayList<>(fileInfos.size());
String userName = getUgiUserName();
for (FileInfo fileInfo : fileInfos) {
status.add(getGoogleHadoopFileStatus(fileInfo, userName));
}
incrementStatistic(GhfsStatistic.INVOCATION_LIST_STATUS_RESULT_SIZE, status.size());
return status.toArray(new FileStatus[0]);
});
}

@Override
public boolean mkdirs(Path hadoopPath, FsPermission permission) throws IOException {
return trackDurationWithTracing(
Expand Down Expand Up @@ -944,7 +978,7 @@ public FileStatus getFileStatus(Path hadoopPath) throws IOException {
* performance and reduce redundant API calls without compromising performance and API behaviour.
* Currently, only "file" type hint is supported.
*
* <p>This is an experimental API can can change without notice.
* <p>This is an experimental API can change without notice.
*/
public FileStatus getFileStatusWithHint(Path hadoopPath, Configuration hint) throws IOException {
return trackDurationWithTracing(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ public class GhfsThreadLocalStatisticsTest {
private static final String BACKOFF_TIME = "backoffTime";
private static final String HADOOP_API_COUNT = "hadoopApiCount";
private static final String HADOOP_API_TIME = "hadoopApiTime";
private static final String STREAM_READ_VECTORED_COUNT = "readVectoredCount";

private static final String STREAM_READ_VECTORED_RANGE_COUNT = "readVectoredRangeCount";

private static Map<GoogleCloudStorageStatistics, String> typeToNameMapping =
Map.of(
Expand All @@ -62,7 +65,9 @@ private Map<String, Long> getInitMetrics() {
HADOOP_API_COUNT, 0L,
HADOOP_API_TIME, 0L,
GCS_API_COUNT, 0L,
GCS_API_TIME, 0L));
GCS_API_TIME, 0L,
STREAM_READ_VECTORED_COUNT, 0L,
STREAM_READ_VECTORED_RANGE_COUNT, 0L));

return result;
}
Expand Down Expand Up @@ -121,6 +126,10 @@ private static void runHadoopApiTests(
expectedMetrics.merge(HADOOP_API_COUNT, 1L, Long::sum);
} else if (ghfsStatistic == GhfsStatistic.GCS_CONNECTOR_TIME) {
expectedMetrics.merge(HADOOP_API_TIME, 1L, Long::sum);
} else if (ghfsStatistic == GhfsStatistic.STREAM_READ_VECTORED_OPERATIONS) {
expectedMetrics.merge(STREAM_READ_VECTORED_COUNT, 1L, Long::sum);
} else if (ghfsStatistic == GhfsStatistic.STREAM_READ_VECTORED_READ_COMBINED_RANGES) {
expectedMetrics.merge(STREAM_READ_VECTORED_RANGE_COUNT, 1L, Long::sum);
}

verify(expectedMetrics, actualMetrics);
Expand All @@ -133,6 +142,10 @@ private static void runHadoopApiTests(
expectedMetrics.merge(HADOOP_API_COUNT, theValue, Long::sum);
} else if (ghfsStatistic == GhfsStatistic.GCS_CONNECTOR_TIME) {
expectedMetrics.merge(HADOOP_API_TIME, theValue, Long::sum);
} else if (ghfsStatistic == GhfsStatistic.STREAM_READ_VECTORED_OPERATIONS) {
expectedMetrics.merge(STREAM_READ_VECTORED_COUNT, theValue, Long::sum);
} else if (ghfsStatistic == GhfsStatistic.STREAM_READ_VECTORED_READ_COMBINED_RANGES) {
expectedMetrics.merge(STREAM_READ_VECTORED_RANGE_COUNT, theValue, Long::sum);
}

verify(expectedMetrics, actualMetrics);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,27 @@ public static void afterClass() {
gcsFsIHelper.afterAllTests();
}

@Test
public void testBidiVectoredRead() throws Exception {
URI path = gcsFsIHelper.getUniqueObjectUri(getClass(), "testBidiVectoredRead");

String testContent = "test content";
gcsFsIHelper.writeTextFile(path, testContent);

List<FileRange> ranges = new ArrayList<>();
ranges.add(FileRange.createFileRange(0, 5));
ranges.add(FileRange.createFileRange(5, 6));

try (GoogleHadoopFileSystem ghfs =
GoogleHadoopFileSystemIntegrationHelper.createGhfs(
path, GoogleHadoopFileSystemIntegrationHelper.getBidiTestConfiguration());
GoogleHadoopFSInputStream in = createGhfsInputStream(ghfs, path)) {

in.readVectored(ranges, ByteBuffer::allocate);
validateVectoredReadResult(ranges, path);
}
}

@Test
public void seek_illegalArgument() throws Exception {
URI path = gcsFsIHelper.getUniqueObjectUri(getClass(), "seek_illegalArgument");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import static com.google.cloud.hadoop.gcsio.testing.TestConfiguration.GCS_TEST_PROJECT_ID;
import static com.google.common.base.Preconditions.checkNotNull;

import com.google.cloud.hadoop.gcsio.GoogleCloudStorageFileSystemOptions.ClientType;
import com.google.cloud.hadoop.gcsio.testing.TestConfiguration;
import com.google.cloud.hadoop.util.HadoopCredentialsConfiguration.AuthenticationType;
import java.net.URI;
Expand Down Expand Up @@ -66,5 +67,13 @@ public static Configuration getTestConfig() {
return config;
}

public static Configuration getBidiTestConfiguration() {
Configuration config = getTestConfig();
config.setBoolean("fs.gs.bidi.enable", true);
config.setEnum("fs.gs.client.type", ClientType.STORAGE_CLIENT);
config.setBoolean("fs.gs.grpc.enable", true);
return config;
}

private GoogleHadoopFileSystemIntegrationHelper() {}
}
Loading