From 69dd91f92ea8905ec1857015bd01d484904b9839 Mon Sep 17 00:00:00 2001 From: sunxiaojian Date: Thu, 6 Mar 2025 11:52:24 +0800 Subject: [PATCH] Cache Hadoop Filesystem instance on Gravitino server to improve the performance --- .../gravitino/oss/fs/OSSFileSystemProvider.java | 2 +- .../gravitino/s3/fs/S3FileSystemProvider.java | 2 +- .../abs/fs/AzureFileSystemProvider.java | 2 +- .../gravitino/gcs/fs/GCSFileSystemProvider.java | 2 +- .../catalog/hadoop/fs/FileSystemProvider.java | 17 +++++++++++++++++ .../hadoop/fs/HDFSFileSystemProvider.java | 2 +- .../hadoop/fs/LocalFileSystemProvider.java | 2 +- .../hadoop/GravitinoVirtualFileSystem.java | 2 +- 8 files changed, 24 insertions(+), 7 deletions(-) diff --git a/bundles/aliyun/src/main/java/org/apache/gravitino/oss/fs/OSSFileSystemProvider.java b/bundles/aliyun/src/main/java/org/apache/gravitino/oss/fs/OSSFileSystemProvider.java index 358e3a08c76..bff52660e06 100644 --- a/bundles/aliyun/src/main/java/org/apache/gravitino/oss/fs/OSSFileSystemProvider.java +++ b/bundles/aliyun/src/main/java/org/apache/gravitino/oss/fs/OSSFileSystemProvider.java @@ -67,7 +67,7 @@ public FileSystem getFileSystem(Path path, Map config) throws IO hadoopConfMap.forEach(configuration::set); - return AliyunOSSFileSystem.newInstance(path.toUri(), configuration); + return AliyunOSSFileSystem.get(path.toUri(), configuration); } @Override diff --git a/bundles/aws/src/main/java/org/apache/gravitino/s3/fs/S3FileSystemProvider.java b/bundles/aws/src/main/java/org/apache/gravitino/s3/fs/S3FileSystemProvider.java index cbe133ed778..255a65cfb9d 100644 --- a/bundles/aws/src/main/java/org/apache/gravitino/s3/fs/S3FileSystemProvider.java +++ b/bundles/aws/src/main/java/org/apache/gravitino/s3/fs/S3FileSystemProvider.java @@ -74,7 +74,7 @@ public FileSystem getFileSystem(Path path, Map config) throws IO // Hadoop-aws 2 does not support IAMInstanceCredentialsProvider checkAndSetCredentialProvider(configuration); - return S3AFileSystem.newInstance(path.toUri(), configuration); + return S3AFileSystem.get(path.toUri(), configuration); } @Override diff --git a/bundles/azure/src/main/java/org/apache/gravitino/abs/fs/AzureFileSystemProvider.java b/bundles/azure/src/main/java/org/apache/gravitino/abs/fs/AzureFileSystemProvider.java index 3dcbb502f62..736aeba1703 100644 --- a/bundles/azure/src/main/java/org/apache/gravitino/abs/fs/AzureFileSystemProvider.java +++ b/bundles/azure/src/main/java/org/apache/gravitino/abs/fs/AzureFileSystemProvider.java @@ -74,7 +74,7 @@ public FileSystem getFileSystem(@Nonnull Path path, @Nonnull Map hadoopConfMap.forEach(configuration::set); - return FileSystem.newInstance(path.toUri(), configuration); + return FileSystem.get(path.toUri(), configuration); } @Override diff --git a/bundles/gcp/src/main/java/org/apache/gravitino/gcs/fs/GCSFileSystemProvider.java b/bundles/gcp/src/main/java/org/apache/gravitino/gcs/fs/GCSFileSystemProvider.java index 7ab38b2d7a9..9f1d2647456 100644 --- a/bundles/gcp/src/main/java/org/apache/gravitino/gcs/fs/GCSFileSystemProvider.java +++ b/bundles/gcp/src/main/java/org/apache/gravitino/gcs/fs/GCSFileSystemProvider.java @@ -49,7 +49,7 @@ public FileSystem getFileSystem(Path path, Map config) throws IO FileSystemUtils.toHadoopConfigMap(config, GRAVITINO_KEY_TO_GCS_HADOOP_KEY) .forEach(configuration::set); - return FileSystem.newInstance(path.toUri(), configuration); + return FileSystem.get(path.toUri(), configuration); } @Override diff --git a/catalogs/hadoop-common/src/main/java/org/apache/gravitino/catalog/hadoop/fs/FileSystemProvider.java b/catalogs/hadoop-common/src/main/java/org/apache/gravitino/catalog/hadoop/fs/FileSystemProvider.java index 9c1979345d9..31cb9633c4c 100644 --- a/catalogs/hadoop-common/src/main/java/org/apache/gravitino/catalog/hadoop/fs/FileSystemProvider.java +++ b/catalogs/hadoop-common/src/main/java/org/apache/gravitino/catalog/hadoop/fs/FileSystemProvider.java @@ -65,6 +65,23 @@ public interface FileSystemProvider { FileSystem getFileSystem(@Nonnull Path path, @Nonnull Map config) throws IOException; + /** + * Get the FileSystem instance according to the configuration map and file path. + * + * @param config The configuration for the FileSystem instance. + * @param path The path to the file system. + * @param disableCache Whether to cache the FileSystem instance. + * @return The FileSystem instance. + * @throws IOException If the FileSystem instance cannot be created. + */ + default FileSystem getFileSystem( + @Nonnull Path path, @Nonnull Map config, boolean disableCache) + throws IOException { + // disable cache + config.put(String.format("fs.%s.impl.disable.cache", scheme()), String.valueOf(disableCache)); + return getFileSystem(path, config); + } + /** * Scheme of this FileSystem provider. The value is 'file' for LocalFileSystem, 'hdfs' for HDFS, * etc. diff --git a/catalogs/hadoop-common/src/main/java/org/apache/gravitino/catalog/hadoop/fs/HDFSFileSystemProvider.java b/catalogs/hadoop-common/src/main/java/org/apache/gravitino/catalog/hadoop/fs/HDFSFileSystemProvider.java index c6bc8e2e99a..27c816b6321 100644 --- a/catalogs/hadoop-common/src/main/java/org/apache/gravitino/catalog/hadoop/fs/HDFSFileSystemProvider.java +++ b/catalogs/hadoop-common/src/main/java/org/apache/gravitino/catalog/hadoop/fs/HDFSFileSystemProvider.java @@ -37,7 +37,7 @@ public FileSystem getFileSystem(@Nonnull Path path, @Nonnull Map (k, v) -> { configuration.set(k.replace(GRAVITINO_BYPASS, ""), v); }); - return FileSystem.newInstance(path.toUri(), configuration); + return FileSystem.get(path.toUri(), configuration); } @Override diff --git a/catalogs/hadoop-common/src/main/java/org/apache/gravitino/catalog/hadoop/fs/LocalFileSystemProvider.java b/catalogs/hadoop-common/src/main/java/org/apache/gravitino/catalog/hadoop/fs/LocalFileSystemProvider.java index 5a2f10f473c..01e9cc2b783 100644 --- a/catalogs/hadoop-common/src/main/java/org/apache/gravitino/catalog/hadoop/fs/LocalFileSystemProvider.java +++ b/catalogs/hadoop-common/src/main/java/org/apache/gravitino/catalog/hadoop/fs/LocalFileSystemProvider.java @@ -37,7 +37,7 @@ public FileSystem getFileSystem(Path path, Map config) throws IO configuration.set(k.replace(BUILTIN_HDFS_FS_PROVIDER, ""), v); }); - return FileSystem.newInstance(path.toUri(), configuration); + return FileSystem.get(path.toUri(), configuration); } @Override diff --git a/clients/filesystem-hadoop3/src/main/java/org/apache/gravitino/filesystem/hadoop/GravitinoVirtualFileSystem.java b/clients/filesystem-hadoop3/src/main/java/org/apache/gravitino/filesystem/hadoop/GravitinoVirtualFileSystem.java index 67bfe961a22..84342c82fa8 100644 --- a/clients/filesystem-hadoop3/src/main/java/org/apache/gravitino/filesystem/hadoop/GravitinoVirtualFileSystem.java +++ b/clients/filesystem-hadoop3/src/main/java/org/apache/gravitino/filesystem/hadoop/GravitinoVirtualFileSystem.java @@ -319,7 +319,7 @@ private FilesetContextPair getFilesetContext(Path virtualPath, FilesetDataOperat totalProperty.putAll(getCredentialProperties(provider, catalog, identifier)); - return provider.getFileSystem(filePath, totalProperty); + return provider.getFileSystem(filePath, totalProperty, true); } catch (IOException ioe) { throw new GravitinoRuntimeException( ioe,