Skip to content

Commit a14a1dd

Browse files
authored
fix reference CI (#1793)
1 parent f30bc75 commit a14a1dd

File tree

2 files changed

+74
-23
lines changed

2 files changed

+74
-23
lines changed

fsspec/implementations/reference.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -140,13 +140,13 @@ def __init__(
140140

141141
self.root = root
142142
self.chunk_sizes = {}
143-
self.out_root = out_root or self.root
144143
self.cat_thresh = categorical_threshold
145144
self.engine = engine
146145
self.cache_size = cache_size
147146
self.url = self.root + "/{field}/refs.{record}.parq"
148147
# TODO: derive fs from `root`
149148
self.fs = fsspec.filesystem("file") if fs is None else fs
149+
self.out_root = self.fs.unstrip_protocol(out_root or self.root)
150150

151151
from importlib.util import find_spec
152152

@@ -498,7 +498,6 @@ def write(self, field, record, base_url=None, storage_options=None):
498498
}
499499
else:
500500
raise NotImplementedError(f"{self.engine} not supported")
501-
502501
df.to_parquet(
503502
fn,
504503
engine=self.engine,

fsspec/implementations/tests/test_reference.py

+73-21
Original file line numberDiff line numberDiff line change
@@ -489,9 +489,9 @@ def test_missing_nonasync(m):
489489
}
490490
refs = {".zarray": json.dumps(zarray)}
491491

492-
m = fsspec.get_mapper("reference://", fo=refs, remote_protocol="memory")
493-
494-
a = zarr.open_array(m)
492+
a = zarr.open_array(
493+
"reference://", storage_options={"fo": refs, "remote_protocol": "memory"}
494+
)
495495
assert str(a[0]) == "nan"
496496

497497

@@ -800,9 +800,16 @@ def test_cached(m, tmpdir):
800800
@pytest.fixture()
801801
def lazy_refs(m):
802802
zarr = pytest.importorskip("zarr")
803-
l = LazyReferenceMapper.create("memory://refs", fs=m)
804-
g = zarr.open(l, mode="w")
803+
skip_zarr_2()
804+
l = LazyReferenceMapper.create("memory://refs.parquet", fs=m)
805+
g = zarr.open(
806+
"reference://",
807+
storage_options={"fo": "memory://refs.parquet", "remote_options": "memory"},
808+
zarr_format=2,
809+
mode="w",
810+
)
805811
g.create_dataset(name="data", shape=(100,), chunks=(10,), dtype="int64")
812+
g.store.fs.references.flush()
806813
return l
807814

808815

@@ -814,15 +821,15 @@ def test_append_parquet(lazy_refs, m):
814821
assert lazy_refs["data/0"] == b"data"
815822
lazy_refs.flush()
816823

817-
lazy2 = LazyReferenceMapper("memory://refs", fs=m)
824+
lazy2 = LazyReferenceMapper("memory://refs.parquet", fs=m)
818825
assert lazy2["data/0"] == b"data"
819826
with pytest.raises(KeyError):
820827
lazy_refs["data/1"]
821828
lazy2["data/1"] = b"Bdata"
822829
assert lazy2["data/1"] == b"Bdata"
823830
lazy2.flush()
824831

825-
lazy2 = LazyReferenceMapper("memory://refs", fs=m)
832+
lazy2 = LazyReferenceMapper("memory://refs.parquet", fs=m)
826833
assert lazy2["data/0"] == b"data"
827834
assert lazy2["data/1"] == b"Bdata"
828835
lazy2["data/1"] = b"Adata"
@@ -831,81 +838,126 @@ def test_append_parquet(lazy_refs, m):
831838
assert "data/0" not in lazy2
832839
lazy2.flush()
833840

834-
lazy2 = LazyReferenceMapper("memory://refs", fs=m)
841+
lazy2 = LazyReferenceMapper("memory://refs.parquet", fs=m)
835842
with pytest.raises(KeyError):
836843
lazy2["data/0"]
837844
assert lazy2["data/1"] == b"Adata"
838845

839846

847+
def skip_zarr_2():
848+
import zarr
849+
from packaging.version import parse
850+
851+
if parse(zarr.__version__) < parse("3.0"):
852+
pytest.skip("Zarr 3 required")
853+
854+
840855
@pytest.mark.parametrize("engine", ["fastparquet", "pyarrow"])
841856
def test_deep_parq(m, engine):
842857
pytest.importorskip("kerchunk")
843858
zarr = pytest.importorskip("zarr")
859+
skip_zarr_2()
844860

845861
lz = fsspec.implementations.reference.LazyReferenceMapper.create(
846862
"memory://out.parq",
847863
fs=m,
848864
engine=engine,
849865
)
850-
g = zarr.open_group(lz, mode="w")
866+
g = zarr.open_group(
867+
"reference://",
868+
mode="w",
869+
storage_options={"fo": "memory://out.parq", "remote_protocol": "memory"},
870+
zarr_version=2,
871+
)
851872

852873
g2 = g.create_group("instant")
853-
g2.create_dataset(name="one", data=[1, 2, 3])
874+
arr = g2.create_dataset(name="one", shape=(3,), dtype="int64")
875+
arr[:] = [1, 2, 3]
876+
g.store.fs.references.flush()
854877
lz.flush()
855878

856879
lz = fsspec.implementations.reference.LazyReferenceMapper(
857880
"memory://out.parq", fs=m, engine=engine
858881
)
859-
g = zarr.open_group(lz)
860-
assert g.instant.one[:].tolist() == [1, 2, 3]
861-
assert sorted(_["name"] for _ in lz.ls("")) == [".zgroup", ".zmetadata", "instant"]
882+
g = zarr.open_group(
883+
"reference://",
884+
storage_options={"fo": "memory://out.parq", "remote_protocol": "memory"},
885+
zarr_version=2,
886+
)
887+
assert g["instant"]["one"][:].tolist() == [1, 2, 3]
888+
assert sorted(_["name"] for _ in lz.ls("")) == [
889+
".zattrs",
890+
".zgroup",
891+
".zmetadata",
892+
"instant",
893+
]
862894
assert sorted(_["name"] for _ in lz.ls("instant")) == [
895+
"instant/.zattrs",
863896
"instant/.zgroup",
864897
"instant/one",
865898
]
866899

867900
assert sorted(_["name"] for _ in lz.ls("instant/one")) == [
868901
"instant/one/.zarray",
902+
"instant/one/.zattrs",
869903
"instant/one/0",
870904
]
871905

872906

873907
def test_parquet_no_data(m):
874908
zarr = pytest.importorskip("zarr")
875-
lz = fsspec.implementations.reference.LazyReferenceMapper.create(
909+
skip_zarr_2()
910+
fsspec.implementations.reference.LazyReferenceMapper.create(
876911
"memory://out.parq", fs=m
877912
)
878-
879-
g = zarr.open_group(lz, mode="w")
913+
g = zarr.open_group(
914+
"reference://",
915+
storage_options={
916+
"fo": "memory://out.parq",
917+
"fs": m,
918+
"remote_protocol": "memory",
919+
},
920+
zarr_format=2,
921+
mode="w",
922+
)
880923
arr = g.create_dataset(
881924
name="one",
882925
dtype="int32",
883926
shape=(10,),
884927
chunks=(5,),
885-
compression=None,
928+
compressor=None,
886929
fill_value=1,
887930
)
888-
lz.flush()
931+
g.store.fs.references.flush()
889932

890933
assert (arr[:] == 1).all()
891934

892935

893936
def test_parquet_no_references(m):
894937
zarr = pytest.importorskip("zarr")
938+
skip_zarr_2()
895939
lz = fsspec.implementations.reference.LazyReferenceMapper.create(
896940
"memory://out.parq", fs=m
897941
)
898942

899-
g = zarr.open_group(lz, mode="w")
943+
g = zarr.open_group(
944+
"reference://",
945+
storage_options={
946+
"fo": "memory://out.parq",
947+
"fs": m,
948+
"remote_protocol": "memory",
949+
},
950+
zarr_format=2,
951+
mode="w",
952+
)
900953
arr = g.create_dataset(
901954
name="one",
902955
dtype="int32",
903956
shape=(),
904957
chunks=(),
905-
compression=None,
958+
compressor=None,
906959
fill_value=1,
907960
)
908961
lz.flush()
909-
arr[...]
910962

911963
assert arr[...].tolist() == 1 # scalar, equal to fill value

0 commit comments

Comments
 (0)