@@ -489,9 +489,9 @@ def test_missing_nonasync(m):
489
489
}
490
490
refs = {".zarray" : json .dumps (zarray )}
491
491
492
- m = fsspec . get_mapper ( "reference://" , fo = refs , remote_protocol = "memory" )
493
-
494
- a = zarr . open_array ( m )
492
+ a = zarr . open_array (
493
+ "reference://" , storage_options = { "fo" : refs , "remote_protocol" : "memory" }
494
+ )
495
495
assert str (a [0 ]) == "nan"
496
496
497
497
@@ -800,9 +800,16 @@ def test_cached(m, tmpdir):
800
800
@pytest .fixture ()
801
801
def lazy_refs (m ):
802
802
zarr = pytest .importorskip ("zarr" )
803
- l = LazyReferenceMapper .create ("memory://refs" , fs = m )
804
- g = zarr .open (l , mode = "w" )
803
+ skip_zarr_2 ()
804
+ l = LazyReferenceMapper .create ("memory://refs.parquet" , fs = m )
805
+ g = zarr .open (
806
+ "reference://" ,
807
+ storage_options = {"fo" : "memory://refs.parquet" , "remote_options" : "memory" },
808
+ zarr_format = 2 ,
809
+ mode = "w" ,
810
+ )
805
811
g .create_dataset (name = "data" , shape = (100 ,), chunks = (10 ,), dtype = "int64" )
812
+ g .store .fs .references .flush ()
806
813
return l
807
814
808
815
@@ -814,15 +821,15 @@ def test_append_parquet(lazy_refs, m):
814
821
assert lazy_refs ["data/0" ] == b"data"
815
822
lazy_refs .flush ()
816
823
817
- lazy2 = LazyReferenceMapper ("memory://refs" , fs = m )
824
+ lazy2 = LazyReferenceMapper ("memory://refs.parquet " , fs = m )
818
825
assert lazy2 ["data/0" ] == b"data"
819
826
with pytest .raises (KeyError ):
820
827
lazy_refs ["data/1" ]
821
828
lazy2 ["data/1" ] = b"Bdata"
822
829
assert lazy2 ["data/1" ] == b"Bdata"
823
830
lazy2 .flush ()
824
831
825
- lazy2 = LazyReferenceMapper ("memory://refs" , fs = m )
832
+ lazy2 = LazyReferenceMapper ("memory://refs.parquet " , fs = m )
826
833
assert lazy2 ["data/0" ] == b"data"
827
834
assert lazy2 ["data/1" ] == b"Bdata"
828
835
lazy2 ["data/1" ] = b"Adata"
@@ -831,81 +838,126 @@ def test_append_parquet(lazy_refs, m):
831
838
assert "data/0" not in lazy2
832
839
lazy2 .flush ()
833
840
834
- lazy2 = LazyReferenceMapper ("memory://refs" , fs = m )
841
+ lazy2 = LazyReferenceMapper ("memory://refs.parquet " , fs = m )
835
842
with pytest .raises (KeyError ):
836
843
lazy2 ["data/0" ]
837
844
assert lazy2 ["data/1" ] == b"Adata"
838
845
839
846
847
+ def skip_zarr_2 ():
848
+ import zarr
849
+ from packaging .version import parse
850
+
851
+ if parse (zarr .__version__ ) < parse ("3.0" ):
852
+ pytest .skip ("Zarr 3 required" )
853
+
854
+
840
855
@pytest .mark .parametrize ("engine" , ["fastparquet" , "pyarrow" ])
841
856
def test_deep_parq (m , engine ):
842
857
pytest .importorskip ("kerchunk" )
843
858
zarr = pytest .importorskip ("zarr" )
859
+ skip_zarr_2 ()
844
860
845
861
lz = fsspec .implementations .reference .LazyReferenceMapper .create (
846
862
"memory://out.parq" ,
847
863
fs = m ,
848
864
engine = engine ,
849
865
)
850
- g = zarr .open_group (lz , mode = "w" )
866
+ g = zarr .open_group (
867
+ "reference://" ,
868
+ mode = "w" ,
869
+ storage_options = {"fo" : "memory://out.parq" , "remote_protocol" : "memory" },
870
+ zarr_version = 2 ,
871
+ )
851
872
852
873
g2 = g .create_group ("instant" )
853
- g2 .create_dataset (name = "one" , data = [1 , 2 , 3 ])
874
+ arr = g2 .create_dataset (name = "one" , shape = (3 ,), dtype = "int64" )
875
+ arr [:] = [1 , 2 , 3 ]
876
+ g .store .fs .references .flush ()
854
877
lz .flush ()
855
878
856
879
lz = fsspec .implementations .reference .LazyReferenceMapper (
857
880
"memory://out.parq" , fs = m , engine = engine
858
881
)
859
- g = zarr .open_group (lz )
860
- assert g .instant .one [:].tolist () == [1 , 2 , 3 ]
861
- assert sorted (_ ["name" ] for _ in lz .ls ("" )) == [".zgroup" , ".zmetadata" , "instant" ]
882
+ g = zarr .open_group (
883
+ "reference://" ,
884
+ storage_options = {"fo" : "memory://out.parq" , "remote_protocol" : "memory" },
885
+ zarr_version = 2 ,
886
+ )
887
+ assert g ["instant" ]["one" ][:].tolist () == [1 , 2 , 3 ]
888
+ assert sorted (_ ["name" ] for _ in lz .ls ("" )) == [
889
+ ".zattrs" ,
890
+ ".zgroup" ,
891
+ ".zmetadata" ,
892
+ "instant" ,
893
+ ]
862
894
assert sorted (_ ["name" ] for _ in lz .ls ("instant" )) == [
895
+ "instant/.zattrs" ,
863
896
"instant/.zgroup" ,
864
897
"instant/one" ,
865
898
]
866
899
867
900
assert sorted (_ ["name" ] for _ in lz .ls ("instant/one" )) == [
868
901
"instant/one/.zarray" ,
902
+ "instant/one/.zattrs" ,
869
903
"instant/one/0" ,
870
904
]
871
905
872
906
873
907
def test_parquet_no_data (m ):
874
908
zarr = pytest .importorskip ("zarr" )
875
- lz = fsspec .implementations .reference .LazyReferenceMapper .create (
909
+ skip_zarr_2 ()
910
+ fsspec .implementations .reference .LazyReferenceMapper .create (
876
911
"memory://out.parq" , fs = m
877
912
)
878
-
879
- g = zarr .open_group (lz , mode = "w" )
913
+ g = zarr .open_group (
914
+ "reference://" ,
915
+ storage_options = {
916
+ "fo" : "memory://out.parq" ,
917
+ "fs" : m ,
918
+ "remote_protocol" : "memory" ,
919
+ },
920
+ zarr_format = 2 ,
921
+ mode = "w" ,
922
+ )
880
923
arr = g .create_dataset (
881
924
name = "one" ,
882
925
dtype = "int32" ,
883
926
shape = (10 ,),
884
927
chunks = (5 ,),
885
- compression = None ,
928
+ compressor = None ,
886
929
fill_value = 1 ,
887
930
)
888
- lz .flush ()
931
+ g . store . fs . references .flush ()
889
932
890
933
assert (arr [:] == 1 ).all ()
891
934
892
935
893
936
def test_parquet_no_references (m ):
894
937
zarr = pytest .importorskip ("zarr" )
938
+ skip_zarr_2 ()
895
939
lz = fsspec .implementations .reference .LazyReferenceMapper .create (
896
940
"memory://out.parq" , fs = m
897
941
)
898
942
899
- g = zarr .open_group (lz , mode = "w" )
943
+ g = zarr .open_group (
944
+ "reference://" ,
945
+ storage_options = {
946
+ "fo" : "memory://out.parq" ,
947
+ "fs" : m ,
948
+ "remote_protocol" : "memory" ,
949
+ },
950
+ zarr_format = 2 ,
951
+ mode = "w" ,
952
+ )
900
953
arr = g .create_dataset (
901
954
name = "one" ,
902
955
dtype = "int32" ,
903
956
shape = (),
904
957
chunks = (),
905
- compression = None ,
958
+ compressor = None ,
906
959
fill_value = 1 ,
907
960
)
908
961
lz .flush ()
909
- arr [...]
910
962
911
963
assert arr [...].tolist () == 1 # scalar, equal to fill value
0 commit comments