From fb0fb0cb077778b13071149369d1a57d673a4caf Mon Sep 17 00:00:00 2001 From: Leo Ghignone Date: Fri, 16 Feb 2024 11:35:26 +1100 Subject: [PATCH] Add s3 adapter --- frictionless/schemes/aws/__init__.py | 2 ++ frictionless/schemes/aws/adapter.py | 54 ++++++++++++++++++++++++++++ frictionless/schemes/aws/plugin.py | 18 +++++++++- tests/package/test_s3.py | 51 ++++++++++++++++++++++++++ 4 files changed, 124 insertions(+), 1 deletion(-) create mode 100644 frictionless/schemes/aws/adapter.py create mode 100644 tests/package/test_s3.py diff --git a/frictionless/schemes/aws/__init__.py b/frictionless/schemes/aws/__init__.py index 71c5fa5fa0..3c05096256 100644 --- a/frictionless/schemes/aws/__init__.py +++ b/frictionless/schemes/aws/__init__.py @@ -1,8 +1,10 @@ +from .adapter import AwsAdapter from .control import AwsControl from .loaders import S3Loader from .plugin import AwsPlugin __all__ = [ + "AwsAdapter", "AwsControl", "AwsPlugin", "S3Loader", diff --git a/frictionless/schemes/aws/adapter.py b/frictionless/schemes/aws/adapter.py new file mode 100644 index 0000000000..9891bd8e62 --- /dev/null +++ b/frictionless/schemes/aws/adapter.py @@ -0,0 +1,54 @@ +from __future__ import annotations + +import io +import json +from typing import Any, Optional +from urllib.parse import urlparse + +from ... import helpers +from ...exception import FrictionlessException +from ...package import Package +from ...platform import platform +from ...system import Adapter +from .control import AwsControl + + +class AwsAdapter(Adapter): + def __init__(self, source: Any, *, basepath: Optional[str] = None): + self.source = source + self.basepath = basepath + + # Read + + def read_package(self): + normsource = helpers.join_basepath(self.source, basepath=self.basepath) + parts = urlparse(normsource, allow_fragments=False) + if parts.scheme == "s3": + control = AwsControl() + client = platform.boto3.resource("s3", endpoint_url=control.s3_endpoint_url) + object = client.Object(bucket_name=parts.netloc, key=parts.path[1:]) + try: + content = object.get()["Body"].read().decode() + except client.meta.client.exceptions.NoSuchBucket: + note = f'No such bucket: "{parts.netloc}"' + raise FrictionlessException(note) + except client.meta.client.exceptions.NoSuchKey: + note = f'No such key: "{parts.path}"' + raise FrictionlessException(note) + except client.meta.client.exceptions.ClientError as exception: + note = f'AWS error: "{exception}"' + raise FrictionlessException(note) + except Exception as exception: + note = f'Cannot read package: "{exception}"' + raise FrictionlessException(note) from exception + else: + if normsource.endswith(".yaml"): + descriptor = platform.yaml.safe_load(io.StringIO(content)) + else: + descriptor = json.loads(content) + + basepath = normsource.rsplit("/", 1)[0] + return Package.from_descriptor(descriptor, basepath=basepath) + + note = "Cannot read package" + raise FrictionlessException(note) diff --git a/frictionless/schemes/aws/plugin.py b/frictionless/schemes/aws/plugin.py index b5e27f3da1..e0170a30e7 100644 --- a/frictionless/schemes/aws/plugin.py +++ b/frictionless/schemes/aws/plugin.py @@ -1,12 +1,15 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Optional +from typing import TYPE_CHECKING, Any, Optional +from urllib.parse import urlparse from ...system import Plugin +from .adapter import AwsAdapter from .control import AwsControl from .loaders import S3Loader if TYPE_CHECKING: + from ...dialect import Control from ...resource import Resource from ...system import Loader @@ -16,6 +19,19 @@ class AwsPlugin(Plugin): # Hooks + def create_adapter( + self, + source: Any, + *, + control: Optional[Control] = None, + basepath: Optional[str] = None, + packagify: bool = False, + ): + if isinstance(source, str): + parsed = urlparse(source) + if parsed.scheme == "s3": + return AwsAdapter(source=source, basepath=basepath) + def create_loader(self, resource: Resource) -> Optional[Loader]: if resource.scheme == "s3": return S3Loader(resource) diff --git a/tests/package/test_s3.py b/tests/package/test_s3.py new file mode 100644 index 0000000000..25032235e5 --- /dev/null +++ b/tests/package/test_s3.py @@ -0,0 +1,51 @@ +import random +import string + +import boto3 +import pytest +from moto import mock_s3 + +from frictionless import Package + +# Read + + +@mock_s3 +def test_s3_package(bucket_name): + # Write + client = boto3.resource("s3", region_name="us-east-1") + bucket = client.create_bucket(Bucket=bucket_name, ACL="public-read") # type: ignore + bucket.put_object( + ACL="private", + Body=open("data/package.json", "rb"), + Bucket=bucket_name, + ContentType="text/json", + Key="package.json", + ) + + # Read + package = Package(f"s3://{bucket_name}/package.json") + + assert package.name == "name" + assert package.basepath == f"s3://{bucket_name}" + assert package.to_descriptor() == { + "name": "name", + "resources": [ + { + "name": "name", + "type": "table", + "path": "table.csv", + "scheme": "file", + "format": "csv", + "mediatype": "text/csv", + }, + ], + } + + +# Fixtures + + +@pytest.fixture +def bucket_name(): + return "bucket_%s" % "".join(random.choice(string.digits) for _ in range(16))