Skip to content

Commit 451d30c

Browse files
authored
Use Hadoop Filesystem Path URI instead of java.net.URI (#67)
The `java.net.URI` cannot resolve the paths that contain regex patterns, for example, `'s3a://bucket/{year=2019/month=1,year=2019/month=2}/*'`. Instead we should use Hadoop Filesystem Path, and then convert the path to URI using `.toUri()` method. For example, to obtain the schema from a path: ```scala new Path(bucketPathString).toUri.getSchema ```
1 parent 96fe40a commit 451d30c

3 files changed

Lines changed: 10 additions & 6 deletions

File tree

src/main/scala/com/exasol/cloudetl/bucket/Bucket.scala

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
package com.exasol.cloudetl.bucket
22

3-
import java.net.URI
4-
53
import com.exasol.cloudetl.storage.StorageProperties
64
import com.exasol.cloudetl.util.FileSystemUtil
75

@@ -60,7 +58,7 @@ abstract class Bucket extends LazyLogging {
6058
* bucket path.
6159
*/
6260
final lazy val fileSystem: FileSystem =
63-
FileSystem.get(new URI(bucketPath), getConfiguration())
61+
FileSystem.get(new Path(bucketPath).toUri, getConfiguration())
6462

6563
/**
6664
* Get the all the paths in this bucket path.

src/main/scala/com/exasol/cloudetl/storage/StorageProperties.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
package com.exasol.cloudetl.storage
22

3-
import java.net.URI
4-
53
import com.exasol.ExaConnectionInformation
64
import com.exasol.ExaMetadata
75
import com.exasol.cloudetl.common.AbstractProperties
86
import com.exasol.cloudetl.common.CommonProperties
97

8+
import org.apache.hadoop.fs.Path
9+
1010
/**
1111
* A specific implementation of
1212
* [[com.exasol.cloudetl.common.AbstractProperties]] that handles user
@@ -36,7 +36,7 @@ class StorageProperties(
3636
* load data, returns the scheme `s3a` value.
3737
*/
3838
final def getStoragePathScheme(): String =
39-
new URI(getStoragePath()).getScheme
39+
new Path(getStoragePath()).toUri.getScheme
4040

4141
/** Returns the [[FileFormat]] file format. */
4242
final def getFileFormat(): FileFormat =

src/test/scala/com/exasol/cloudetl/storage/StoragePropertiesTest.scala

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,12 @@ class StoragePropertiesTest extends AnyFunSuite with BeforeAndAfterEach with Moc
4444
}
4545
}
4646

47+
test("getStoragePathScheme returns path scheme with regex pattern") {
48+
val path = "s3a://bucket/{year=2019/month=1,year=2019/month=2}/*"
49+
properties = Map(StorageProperties.BUCKET_PATH -> path)
50+
assert(BaseProperties(properties).getStoragePathScheme() === "s3a")
51+
}
52+
4753
test("getFileFormat returns supported file format value") {
4854
properties = Map(
4955
StorageProperties.BUCKET_PATH -> "path",

0 commit comments

Comments
 (0)