From a93de68c1243cca839ab96c19c1372e5fad7884b Mon Sep 17 00:00:00 2001 From: THKASTL Date: Thu, 20 Jul 2023 18:41:43 +0200 Subject: [PATCH 01/10] For exists(), more precise error handling --- s3fs/core.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/s3fs/core.py b/s3fs/core.py index 80b6fe95..ad6189bd 100644 --- a/s3fs/core.py +++ b/s3fs/core.py @@ -1019,12 +1019,17 @@ async def _exists(self, path): "list_objects_v2", MaxKeys=1, Bucket=bucket, **self.req_kw ) return True - except Exception: + except (FileNotFoundError, PermissionError): pass try: await self._call_s3("get_bucket_location", Bucket=bucket, **self.req_kw) return True - except Exception: + except FileNotFoundError: + return False + except PermissionError as e: + logger.warning( + "Bucket %s doesn't exist or you don't have access to it.", + bucket) return False exists = sync_wrapper(_exists) From ebb6535ca7e22045ad0c221b7ea7d8621c3b7cc5 Mon Sep 17 00:00:00 2001 From: THKASTL Date: Thu, 20 Jul 2023 18:57:39 +0200 Subject: [PATCH 02/10] Remove unnecessary exception object --- s3fs/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/s3fs/core.py b/s3fs/core.py index ad6189bd..54f4e8af 100644 --- a/s3fs/core.py +++ b/s3fs/core.py @@ -1026,7 +1026,7 @@ async def _exists(self, path): return True except FileNotFoundError: return False - except PermissionError as e: + except PermissionError: logger.warning( "Bucket %s doesn't exist or you don't have access to it.", bucket) From 8090d41de03590e58445610050499085027e5637 Mon Sep 17 00:00:00 2001 From: THKASTL Date: Thu, 20 Jul 2023 19:10:13 +0200 Subject: [PATCH 03/10] Fix pre-commit error --- s3fs/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/s3fs/core.py b/s3fs/core.py index 54f4e8af..2843b918 100644 --- a/s3fs/core.py +++ b/s3fs/core.py @@ -1028,8 +1028,8 @@ async def _exists(self, path): return False except PermissionError: logger.warning( - "Bucket %s doesn't exist or you don't have access to it.", - bucket) + "Bucket %s doesn't exist or you don't have access to it.", bucket + ) return False exists = sync_wrapper(_exists) From 36dd8380c04223538bdb198379a7d4002799ff5f Mon Sep 17 00:00:00 2001 From: THKASTL Date: Fri, 21 Jul 2023 18:08:35 +0200 Subject: [PATCH 04/10] Add tests --- s3fs/tests/test_s3fs.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/s3fs/tests/test_s3fs.py b/s3fs/tests/test_s3fs.py index 1b4cd769..e269bf7b 100644 --- a/s3fs/tests/test_s3fs.py +++ b/s3fs/tests/test_s3fs.py @@ -21,6 +21,7 @@ from s3fs.core import S3FileSystem from s3fs.utils import ignoring, SSEParams from botocore.exceptions import NoCredentialsError +from botocore.exceptions import EndpointConnectionError from fsspec.asyn import sync from fsspec.callbacks import Callback from packaging import version @@ -2464,6 +2465,34 @@ def test_exists_isdir(s3): assert not s3.isdir(bad_path) +def test_exists_raises_on_connection_error(monkeypatch): + # Ensure that we raise a ConnectionError instead of returning False if we + # are not actually able to connect to storage, by setting a fake proxy and + # then re-creating the S3FileSystem instance. + monkeypatch.setenv('https_proxy', 'https://fakeproxy.127.0.0.1:8080') + S3FileSystem.clear_instance_cache() + s3 = S3FileSystem(anon=False, client_kwargs={"endpoint_url": endpoint_uri}) + s3.invalidate_cache() + with pytest.raises(EndpointConnectionError): + s3.exists('this-bucket-does-not-exist/') + + +def test_exists_bucket_nonexistent_or_no_access(caplog): + # Ensure that a warning is raised and False is returned if querying a + # bucket that might either not exist or be private. + fs = s3fs.S3FileSystem(key="asdfasfsdf", secret="sadfdsfds") + assert not fs.exists("s3://this-bucket-might-not-exist/") + assert caplog.records[0].levelname == "WARNING" + assert "doesn't exist or you don't have access" in caplog.text + + +def test_exists_bucket_nonexistent(s3, caplog): + # Ensure that NO warning is raised and False is returned if checking bucket + # existance when we have full access. + assert not s3.exists('non_existent_bucket/') + assert len(caplog.records) == 0 + + def test_list_del_multipart(s3): path = test_bucket_name + "/afile" f = s3.open(path, "wb") From bac41a1a62c2e4e26844ffbed23ccc3a1918c38c Mon Sep 17 00:00:00 2001 From: THKASTL Date: Mon, 24 Jul 2023 11:37:43 +0200 Subject: [PATCH 05/10] Fix precommit and warnings error --- s3fs/tests/test_s3fs.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/s3fs/tests/test_s3fs.py b/s3fs/tests/test_s3fs.py index e269bf7b..a180f6d8 100644 --- a/s3fs/tests/test_s3fs.py +++ b/s3fs/tests/test_s3fs.py @@ -2469,18 +2469,19 @@ def test_exists_raises_on_connection_error(monkeypatch): # Ensure that we raise a ConnectionError instead of returning False if we # are not actually able to connect to storage, by setting a fake proxy and # then re-creating the S3FileSystem instance. - monkeypatch.setenv('https_proxy', 'https://fakeproxy.127.0.0.1:8080') + monkeypatch.setenv("https_proxy", "https://fakeproxy.127.0.0.1:8080") S3FileSystem.clear_instance_cache() s3 = S3FileSystem(anon=False, client_kwargs={"endpoint_url": endpoint_uri}) s3.invalidate_cache() with pytest.raises(EndpointConnectionError): - s3.exists('this-bucket-does-not-exist/') + s3.exists("this-bucket-does-not-exist/") def test_exists_bucket_nonexistent_or_no_access(caplog): # Ensure that a warning is raised and False is returned if querying a # bucket that might either not exist or be private. - fs = s3fs.S3FileSystem(key="asdfasfsdf", secret="sadfdsfds") + caplog.clear() + fs = s3fs.S3FileSystem(key="wrongkey", secret="wrongsecret") assert not fs.exists("s3://this-bucket-might-not-exist/") assert caplog.records[0].levelname == "WARNING" assert "doesn't exist or you don't have access" in caplog.text @@ -2489,7 +2490,7 @@ def test_exists_bucket_nonexistent_or_no_access(caplog): def test_exists_bucket_nonexistent(s3, caplog): # Ensure that NO warning is raised and False is returned if checking bucket # existance when we have full access. - assert not s3.exists('non_existent_bucket/') + assert not s3.exists("non_existent_bucket/") assert len(caplog.records) == 0 From 1f957ad73e54aa9aff9de5f4819c038e7a282039 Mon Sep 17 00:00:00 2001 From: THKASTL Date: Mon, 24 Jul 2023 16:16:36 +0200 Subject: [PATCH 06/10] Add comment and skip instance cache --- s3fs/tests/test_s3fs.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/s3fs/tests/test_s3fs.py b/s3fs/tests/test_s3fs.py index a180f6d8..69be62aa 100644 --- a/s3fs/tests/test_s3fs.py +++ b/s3fs/tests/test_s3fs.py @@ -2471,7 +2471,11 @@ def test_exists_raises_on_connection_error(monkeypatch): # then re-creating the S3FileSystem instance. monkeypatch.setenv("https_proxy", "https://fakeproxy.127.0.0.1:8080") S3FileSystem.clear_instance_cache() - s3 = S3FileSystem(anon=False, client_kwargs={"endpoint_url": endpoint_uri}) + s3 = S3FileSystem( + anon=False, + client_kwargs={"endpoint_url": endpoint_uri}, + skip_instance_cache=True, + ) s3.invalidate_cache() with pytest.raises(EndpointConnectionError): s3.exists("this-bucket-does-not-exist/") @@ -2480,6 +2484,7 @@ def test_exists_raises_on_connection_error(monkeypatch): def test_exists_bucket_nonexistent_or_no_access(caplog): # Ensure that a warning is raised and False is returned if querying a # bucket that might either not exist or be private. + # This tests against real AWS, might fail due to network issues. caplog.clear() fs = s3fs.S3FileSystem(key="wrongkey", secret="wrongsecret") assert not fs.exists("s3://this-bucket-might-not-exist/") From 2ac545b0e8b51d74bbec5b4139c863b251bf3d4f Mon Sep 17 00:00:00 2001 From: Thomas Kastl Date: Tue, 25 Jul 2023 14:54:40 +0200 Subject: [PATCH 07/10] Update s3fs/tests/test_s3fs.py Co-authored-by: Martin Durant --- s3fs/tests/test_s3fs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/s3fs/tests/test_s3fs.py b/s3fs/tests/test_s3fs.py index 69be62aa..9506c112 100644 --- a/s3fs/tests/test_s3fs.py +++ b/s3fs/tests/test_s3fs.py @@ -2469,12 +2469,12 @@ def test_exists_raises_on_connection_error(monkeypatch): # Ensure that we raise a ConnectionError instead of returning False if we # are not actually able to connect to storage, by setting a fake proxy and # then re-creating the S3FileSystem instance. - monkeypatch.setenv("https_proxy", "https://fakeproxy.127.0.0.1:8080") S3FileSystem.clear_instance_cache() s3 = S3FileSystem( anon=False, client_kwargs={"endpoint_url": endpoint_uri}, skip_instance_cache=True, + endpoint_url="https://fakeproxy.127.0.0.1:8080/" ) s3.invalidate_cache() with pytest.raises(EndpointConnectionError): From 54c3beef3644e9c7810832dd8745ea0d7244bf74 Mon Sep 17 00:00:00 2001 From: THKASTL Date: Tue, 25 Jul 2023 14:58:18 +0200 Subject: [PATCH 08/10] Fix tests: Clear existing warnings and remove duplicate url --- s3fs/tests/test_s3fs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/s3fs/tests/test_s3fs.py b/s3fs/tests/test_s3fs.py index 9506c112..07871ee4 100644 --- a/s3fs/tests/test_s3fs.py +++ b/s3fs/tests/test_s3fs.py @@ -2472,7 +2472,6 @@ def test_exists_raises_on_connection_error(monkeypatch): S3FileSystem.clear_instance_cache() s3 = S3FileSystem( anon=False, - client_kwargs={"endpoint_url": endpoint_uri}, skip_instance_cache=True, endpoint_url="https://fakeproxy.127.0.0.1:8080/" ) @@ -2495,6 +2494,7 @@ def test_exists_bucket_nonexistent_or_no_access(caplog): def test_exists_bucket_nonexistent(s3, caplog): # Ensure that NO warning is raised and False is returned if checking bucket # existance when we have full access. + caplog.clear() assert not s3.exists("non_existent_bucket/") assert len(caplog.records) == 0 From 49a9ac55ae419f0c6a598f21dd2da7325a7466ec Mon Sep 17 00:00:00 2001 From: THKASTL Date: Tue, 25 Jul 2023 17:01:19 +0200 Subject: [PATCH 09/10] Fix pre-commit error --- s3fs/tests/test_s3fs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/s3fs/tests/test_s3fs.py b/s3fs/tests/test_s3fs.py index 07871ee4..136072d1 100644 --- a/s3fs/tests/test_s3fs.py +++ b/s3fs/tests/test_s3fs.py @@ -2473,7 +2473,7 @@ def test_exists_raises_on_connection_error(monkeypatch): s3 = S3FileSystem( anon=False, skip_instance_cache=True, - endpoint_url="https://fakeproxy.127.0.0.1:8080/" + endpoint_url="https://fakeproxy.127.0.0.1:8080/", ) s3.invalidate_cache() with pytest.raises(EndpointConnectionError): From 584c74f9ada217dadbafc4496d46a4d910d37b6e Mon Sep 17 00:00:00 2001 From: THKASTL Date: Tue, 25 Jul 2023 17:13:51 +0200 Subject: [PATCH 10/10] Remove outdated monkeypatch --- s3fs/tests/test_s3fs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/s3fs/tests/test_s3fs.py b/s3fs/tests/test_s3fs.py index 136072d1..88def234 100644 --- a/s3fs/tests/test_s3fs.py +++ b/s3fs/tests/test_s3fs.py @@ -2465,7 +2465,7 @@ def test_exists_isdir(s3): assert not s3.isdir(bad_path) -def test_exists_raises_on_connection_error(monkeypatch): +def test_exists_raises_on_connection_error(): # Ensure that we raise a ConnectionError instead of returning False if we # are not actually able to connect to storage, by setting a fake proxy and # then re-creating the S3FileSystem instance.