Skip to content

Commit d70ac0f

Browse files
authored
Merge pull request #1906 from braingram/native_fsspec
Use fsspec for all non-local urls
2 parents 4779d1f + f70f30c commit d70ac0f

15 files changed

Lines changed: 169 additions & 23 deletions

File tree

.github/workflows/ci.yml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,3 +125,13 @@ jobs:
125125
default_python: '3.11'
126126
envs: |
127127
- linux: compatibility
128+
129+
mocks3:
130+
needs: [core]
131+
uses: OpenAstronomy/github-actions-workflows/.github/workflows/tox.yml@v1
132+
with:
133+
submodules: false
134+
# Any env name which does not start with `pyXY` will use this Python version.
135+
default_python: '3.11'
136+
envs: |
137+
- linux: mocks3

asdf/_tests/test_generic_io.py

Lines changed: 40 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,15 @@
44
import stat
55
import sys
66
import urllib.request as urllib_request
7+
from contextlib import nullcontext
78

89
import numpy as np
910
import pytest
1011

1112
import asdf
1213
from asdf import exceptions, generic_io
1314
from asdf.config import config_context
15+
from asdf.exceptions import AsdfDeprecationWarning
1416

1517
from . import _helpers as helpers
1618
from . import create_large_tree, create_small_tree
@@ -21,6 +23,26 @@ def tree(request):
2123
return request.param()
2224

2325

26+
@pytest.fixture(params=[True, False])
27+
def has_fsspec(request, monkeypatch):
28+
if request.param:
29+
yield True
30+
else:
31+
pytest.importorskip("fsspec")
32+
monkeypatch.setitem(sys.modules, "fsspec", None)
33+
yield False
34+
35+
36+
@pytest.fixture()
37+
def warn_no_fsspec(has_fsspec):
38+
if has_fsspec:
39+
yield nullcontext()
40+
else:
41+
yield pytest.warns(
42+
AsdfDeprecationWarning, match=r"Opening http urls without fsspec is deprecated. Please install fsspec"
43+
)
44+
45+
2446
def _roundtrip(tree, get_write_fd, get_read_fd, write_options=None, read_options=None):
2547
write_options = {} if write_options is None else write_options
2648
read_options = {} if read_options is None else read_options
@@ -259,7 +281,7 @@ def get_read_fd():
259281

260282

261283
@pytest.mark.remote_data()
262-
def test_http_connection(tree, httpserver):
284+
def test_http_connection(tree, httpserver, warn_no_fsspec):
263285
path = os.path.join(httpserver.tmpdir, "test.asdf")
264286

265287
def get_write_fd():
@@ -274,9 +296,10 @@ def get_read_fd():
274296
fd.read(0)
275297
return fd
276298

277-
with _roundtrip(tree, get_write_fd, get_read_fd) as ff:
278-
assert len(ff._blocks.blocks) == 2
279-
assert (ff.tree["science_data"] == tree["science_data"]).all()
299+
with warn_no_fsspec:
300+
with _roundtrip(tree, get_write_fd, get_read_fd) as ff:
301+
assert len(ff._blocks.blocks) == 2
302+
assert (ff.tree["science_data"] == tree["science_data"]).all()
280303

281304

282305
def test_exploded_filesystem(tree, tmp_path):
@@ -313,7 +336,7 @@ def get_read_fd():
313336

314337

315338
@pytest.mark.remote_data()
316-
def test_exploded_http(tree, httpserver):
339+
def test_exploded_http(tree, httpserver, warn_no_fsspec):
317340
path = os.path.join(httpserver.tmpdir, "test.asdf")
318341

319342
def get_write_fd():
@@ -322,8 +345,9 @@ def get_write_fd():
322345
def get_read_fd():
323346
return generic_io.get_file(httpserver.url + "test.asdf")
324347

325-
with _roundtrip(tree, get_write_fd, get_read_fd, write_options={"all_array_storage": "external"}) as ff:
326-
assert len(list(ff._blocks.blocks)) == 0
348+
with warn_no_fsspec:
349+
with _roundtrip(tree, get_write_fd, get_read_fd, write_options={"all_array_storage": "external"}) as ff:
350+
assert len(list(ff._blocks.blocks)) == 0
327351

328352

329353
def test_exploded_stream_write(small_tree):
@@ -378,7 +402,7 @@ def test_open_stdout():
378402
pass
379403

380404

381-
def test_invalid_obj(tmp_path):
405+
def test_invalid_obj(tmp_path, has_fsspec):
382406
with pytest.raises(ValueError, match=r"Can't handle .* as a file for mode 'r'"):
383407
generic_io.get_file(42)
384408

@@ -392,8 +416,14 @@ def test_invalid_obj(tmp_path):
392416
):
393417
generic_io.get_file(fd, "r")
394418

395-
with pytest.raises(ValueError, match=r"HTTP connections can not be opened for writing"):
396-
generic_io.get_file("http://www.google.com", "w")
419+
url = "http://www.google.com"
420+
mode = "w"
421+
if has_fsspec:
422+
raises_ctx = pytest.raises(ValueError, match=f"Unable to open {url} with mode {mode}")
423+
else:
424+
raises_ctx = pytest.raises(ValueError, match=r"HTTP connections can not be opened for writing")
425+
with raises_ctx:
426+
generic_io.get_file(url, mode)
397427

398428
with pytest.raises(TypeError, match=r"io.StringIO objects are not supported. Use io.BytesIO instead."):
399429
generic_io.get_file(io.StringIO())

asdf/generic_io.py

Lines changed: 30 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,15 @@
1414
import re
1515
import sys
1616
import tempfile
17+
import warnings
1718
from os import SEEK_CUR, SEEK_END, SEEK_SET
1819
from urllib.request import url2pathname, urlopen
1920

2021
import numpy as np
2122

2223
from . import util
2324
from ._extern import atomicfile
24-
from .exceptions import DelimiterNotFoundError
25+
from .exceptions import AsdfDeprecationWarning, DelimiterNotFoundError
2526
from .util import _patched_urllib_parse
2627

2728
__all__ = ["get_file", "get_uri", "resolve_uri", "relative_uri"]
@@ -402,7 +403,8 @@ def close(self):
402403
"""
403404
if self._close:
404405
self._fd.close()
405-
self._fix_permissions()
406+
if hasattr(self, "_fix_permissions"):
407+
self._fix_permissions()
406408

407409
def truncate(self, size=None):
408410
"""
@@ -1104,16 +1106,10 @@ def get_file(init, mode="r", uri=None, close=False):
11041106

11051107
if isinstance(init, (str, pathlib.Path)):
11061108
parsed = _patched_urllib_parse.urlparse(str(init))
1107-
if parsed.scheme in ["http", "https"]:
1108-
if "w" in mode:
1109-
msg = "HTTP connections can not be opened for writing"
1110-
raise ValueError(msg)
11111109

1112-
return _http_to_temp(init, mode, uri=uri)
1110+
realmode = "r+b" if mode == "rw" else mode + "b"
11131111

11141112
if parsed.scheme in _local_file_schemes:
1115-
realmode = "r+b" if mode == "rw" else mode + "b"
1116-
11171113
# if paths have an extra leading '/' urlparse will
11181114
# parse them even though they violate rfc8089. This will
11191115
# lead to errors or writing files to unexpected locations
@@ -1143,6 +1139,31 @@ def get_file(init, mode="r", uri=None, close=False):
11431139

11441140
return RealFile(fd, mode, close=True, uri=uri)
11451141

1142+
# this is not a local file, import fsspec (if available)
1143+
try:
1144+
import fsspec
1145+
except ImportError:
1146+
fsspec = None
1147+
1148+
if fsspec:
1149+
fd = fsspec.open(init, realmode)
1150+
try:
1151+
fd = fd.__enter__()
1152+
except NotImplementedError as err:
1153+
msg = f"Unable to open {init} with mode {mode}"
1154+
raise ValueError(msg) from err
1155+
return get_file(fd, uri=uri or init, close=True)
1156+
1157+
# finally, allow the legacy http code to handle this (with a warning)
1158+
if parsed.scheme in ["http", "https"]:
1159+
if "w" in mode:
1160+
msg = "HTTP connections can not be opened for writing"
1161+
raise ValueError(msg)
1162+
1163+
msg = "Opening http urls without fsspec is deprecated. Please install fsspec[http]"
1164+
warnings.warn(msg, AsdfDeprecationWarning)
1165+
return _http_to_temp(init, mode, uri=uri)
1166+
11461167
if isinstance(init, io.BytesIO):
11471168
return MemoryIO(init, mode, uri=uri)
11481169

changes/1906.feature.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Optionally use fsspec for urls (like those for s3 resources) provided to asdf.open.

changes/1906.removal.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Deprecate opening http uris unless fsspec is installed.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.

0 commit comments

Comments
 (0)