Skip to content

Commit 30fe9d5

Browse files
authored
A bunch of testing for IC1 repos (#1423)
TODO: - [x] rebase is failing
1 parent dcbd543 commit 30fe9d5

24 files changed

+367
-216
lines changed

icechunk-python/tests/conftest.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,21 +7,27 @@
77
from icechunk import Repository, in_memory_storage, local_filesystem_storage
88

99

10-
def parse_repo(store: Literal["local", "memory"], path: str) -> Repository:
10+
def parse_repo(
11+
store: Literal["local", "memory"], path: str, spec_version: int | None
12+
) -> Repository:
1113
if store == "local":
1214
return Repository.create(
1315
storage=local_filesystem_storage(path),
16+
spec_version=spec_version,
1417
)
1518
if store == "memory":
1619
return Repository.create(
1720
storage=in_memory_storage(),
21+
spec_version=spec_version,
1822
)
1923

2024

2125
@pytest.fixture(scope="function")
22-
def repo(request: pytest.FixtureRequest, tmpdir: str) -> tuple[Repository, str]:
26+
def repo(
27+
request: pytest.FixtureRequest, tmpdir: str, any_spec_version: int | None
28+
) -> tuple[Repository, str]:
2329
param = request.param
24-
repo = parse_repo(param, tmpdir)
30+
repo = parse_repo(param, tmpdir, spec_version=any_spec_version)
2531
return repo, tmpdir
2632

2733

@@ -54,6 +60,8 @@ def write_chunks_to_minio(
5460
return etags
5561

5662

57-
@pytest.fixture(scope="function", params=[1, 2, None])
63+
@pytest.fixture(
64+
scope="function", params=[1, 2, None], ids=["spec-v1", "spec-v2", "no-spec-version"]
65+
)
5866
def any_spec_version(request: pytest.FixtureRequest) -> int | None:
5967
return request.param

icechunk-python/tests/test_commit_properties.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,10 @@
66
import zarr
77

88

9-
def test_property_types() -> None:
9+
def test_property_types(any_spec_version: int | None) -> None:
1010
repo = ic.Repository.create(
1111
storage=ic.in_memory_storage(),
12+
spec_version=any_spec_version,
1213
)
1314
session = repo.writable_session("main")
1415
store = session.store
@@ -39,9 +40,10 @@ class NoJson:
3940
pass
4041

4142

42-
def test_invalid_property_types() -> None:
43+
def test_invalid_property_types(any_spec_version: int | None) -> None:
4344
repo = ic.Repository.create(
4445
storage=ic.in_memory_storage(),
46+
spec_version=any_spec_version,
4547
)
4648
session = repo.writable_session("main")
4749
store = session.store

icechunk-python/tests/test_concurrency.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,10 @@ async def list_store(store: icechunk.IcechunkStore, barrier: asyncio.Barrier) ->
4848
await asyncio.sleep(0.1)
4949

5050

51-
async def test_concurrency() -> None:
51+
async def test_concurrency(any_spec_version: int | None) -> None:
5252
repo = icechunk.Repository.open_or_create(
5353
storage=icechunk.in_memory_storage(),
54+
create_version=any_spec_version,
5455
)
5556

5657
session = repo.writable_session("main")
@@ -93,7 +94,7 @@ async def test_concurrency() -> None:
9394

9495

9596
@pytest.mark.filterwarnings("ignore:datetime.datetime.utcnow")
96-
async def test_thread_concurrency() -> None:
97+
async def test_thread_concurrency(any_spec_version: int | None) -> None:
9798
"""Run multiple threads doing different type of operations for SECONDS_TO_RUN seconds.
9899
99100
The threads execute 5 types of operations: reads, native writes, virtual writes, deletes and lists.
@@ -150,6 +151,7 @@ async def test_thread_concurrency() -> None:
150151
storage=storage,
151152
config=config,
152153
authorize_virtual_chunk_access=credentials,
154+
spec_version=any_spec_version,
153155
)
154156

155157
session = repo.writable_session("main")

icechunk-python/tests/test_config.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,16 @@
1010

1111

1212
@pytest.fixture(scope="function")
13-
def tmp_store(tmpdir: Path) -> Generator[tuple[icechunk.IcechunkStore, str]]:
13+
def tmp_store(
14+
tmpdir: Path, any_spec_version: int | None
15+
) -> Generator[tuple[icechunk.IcechunkStore, str]]:
1416
repo_path = f"{tmpdir}"
1517
config = icechunk.RepositoryConfig.default()
1618
config.inline_chunk_threshold_bytes = 5
1719
repo = icechunk.Repository.open_or_create(
1820
storage=icechunk.local_filesystem_storage(repo_path),
1921
config=config,
22+
create_version=any_spec_version,
2023
)
2124

2225
session = repo.writable_session("main")
@@ -25,24 +28,26 @@ def tmp_store(tmpdir: Path) -> Generator[tuple[icechunk.IcechunkStore, str]]:
2528
yield store, repo_path
2629

2730

28-
def test_config_fetch() -> None:
31+
def test_config_fetch(any_spec_version: int | None) -> None:
2932
config = icechunk.RepositoryConfig.default()
3033
config.inline_chunk_threshold_bytes = 5
3134
storage = icechunk.in_memory_storage()
3235
repo = icechunk.Repository.create(
3336
storage=storage,
3437
config=config,
38+
spec_version=any_spec_version,
3539
)
3640

3741
assert repo.config == config
3842
assert icechunk.Repository.fetch_config(storage) == config
3943

4044

41-
def test_config_save() -> None:
45+
def test_config_save(any_spec_version: int | None) -> None:
4246
config = icechunk.RepositoryConfig.default()
4347
storage = icechunk.in_memory_storage()
4448
repo = icechunk.Repository.create(
4549
storage=storage,
50+
spec_version=any_spec_version,
4651
)
4752

4853
config.inline_chunk_threshold_bytes = 5
@@ -142,10 +147,11 @@ def test_virtual_chunk_containers() -> None:
142147
assert config.virtual_chunk_containers["s3://testbucket/"] == container
143148

144149

145-
def test_can_change_deep_config_values() -> None:
150+
def test_can_change_deep_config_values(any_spec_version: int | None) -> None:
146151
storage = icechunk.in_memory_storage()
147152
repo = icechunk.Repository.create(
148153
storage=storage,
154+
spec_version=any_spec_version,
149155
)
150156
config = icechunk.RepositoryConfig(
151157
inline_chunk_threshold_bytes=11,
@@ -240,7 +246,7 @@ def test_spec_version() -> None:
240246
assert icechunk.spec_version() >= 1
241247

242248

243-
def test_config_from_store() -> None:
249+
def test_config_from_store(any_spec_version: int | None) -> None:
244250
config = icechunk.RepositoryConfig.default()
245251
config.inline_chunk_threshold_bytes = 5
246252

@@ -252,6 +258,7 @@ def test_config_from_store() -> None:
252258
repo = icechunk.Repository.create(
253259
storage=storage,
254260
config=config,
261+
spec_version=any_spec_version,
255262
)
256263
session = repo.writable_session("main")
257264
store = session.store

icechunk-python/tests/test_conflicts.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,10 @@
99

1010

1111
@pytest.fixture
12-
def repo(tmpdir: Path) -> icechunk.Repository:
12+
def repo(tmpdir: Path, any_spec_version: int | None) -> icechunk.Repository:
1313
repo = icechunk.Repository.create(
14-
storage=icechunk.local_filesystem_storage(str(tmpdir))
14+
storage=icechunk.local_filesystem_storage(str(tmpdir)),
15+
spec_version=any_spec_version,
1516
)
1617

1718
session = repo.writable_session("main")
@@ -192,10 +193,11 @@ def test_rebase_chunks_with_ours(
192193
assert array_c[0, 1] == 1
193194

194195

195-
async def test_rebase_async() -> None:
196+
async def test_rebase_async(any_spec_version: int | None) -> None:
196197
"""Test async rebase functionality with conflict detection and resolution."""
197198
repo = await icechunk.Repository.create_async(
198199
storage=icechunk.in_memory_storage(),
200+
spec_version=any_spec_version,
199201
)
200202

201203
# Set up initial state

icechunk-python/tests/test_credentials.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,7 @@ def test_s3_refreshable_credentials_refresh(
191191
def test_s3_refreshable_credentials_pickle_with_optimization(
192192
tmp_path: Path,
193193
scatter_initial_credentials: bool,
194+
any_spec_version: int | None,
194195
) -> None:
195196
"""Verifies pickled repos don't need to call get_credentials again if scatter_initial_credentials=True"""
196197
path = tmp_path / "calls.txt"
@@ -208,7 +209,10 @@ def test_s3_refreshable_credentials_pickle_with_optimization(
208209
scatter_initial_credentials=scatter_initial_credentials,
209210
)
210211
# let's create and use a repo
211-
repo = Repository.create(storage=st)
212+
repo = Repository.create(
213+
storage=st,
214+
spec_version=any_spec_version,
215+
)
212216
assert Repository.exists(st)
213217
assert Repository.exists(st)
214218

icechunk-python/tests/test_dask.py

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,17 @@
1616
from xarray.testing import assert_identical
1717

1818

19-
def test_store_dask() -> None:
19+
def test_store_dask(any_spec_version: int | None) -> None:
2020
shape = (100, 100)
2121
dask_chunks = (20, 20)
2222
dask_array = dask.array.random.random(shape, chunks=dask_chunks)
2323

2424
zarr_chunks = (10, 10)
2525
with tempfile.TemporaryDirectory() as tmpdir:
26-
repo = Repository.create(local_filesystem_storage(tmpdir))
26+
repo = Repository.create(
27+
local_filesystem_storage(tmpdir),
28+
spec_version=any_spec_version,
29+
)
2730
session = repo.writable_session("main")
2831
group = zarr.group(store=session.store, overwrite=True)
2932

@@ -52,27 +55,35 @@ def test_store_dask() -> None:
5255
store_dask(sources=[dask_array], targets=[zarray])
5356

5457

55-
def test_distributed() -> None:
58+
def test_distributed(any_spec_version: int | None) -> None:
5659
with distributed.Client(): # type: ignore [no-untyped-call]
5760
ds = create_test_data().chunk(dim1=3, dim2=4)
58-
with roundtrip(ds, commit=True) as actual:
61+
with roundtrip(ds, commit=True, spec_version=any_spec_version) as actual:
5962
assert_identical(actual, ds)
6063

6164

6265
@pytest.mark.parametrize("scheduler", ["threads", "processes"])
63-
def test_dask_schedulers(scheduler) -> None:
66+
def test_dask_schedulers(scheduler, any_spec_version: int | None) -> None:
6467
with dask.config.set(scheduler=scheduler):
6568
ds = create_test_data().chunk(dim1=3, dim2=4)
66-
with roundtrip(ds, commit=scheduler == "processes") as actual:
69+
with roundtrip(
70+
ds,
71+
commit=scheduler == "processes",
72+
spec_version=any_spec_version,
73+
) as actual:
6774
assert_identical(actual, ds)
6875

6976

7077
@pytest.mark.parametrize("scheduler", ["threads", "processes"])
71-
def test_xarray_to_icechunk_nested_pickling(scheduler) -> None:
78+
def test_xarray_to_icechunk_nested_pickling(
79+
scheduler, any_spec_version: int | None
80+
) -> None:
7281
with dask.config.set(scheduler=scheduler):
7382
ds = create_test_data(dim_sizes=(2, 3, 4)).chunk(-1)
7483
with tempfile.TemporaryDirectory() as tmpdir:
75-
repo = Repository.create(local_filesystem_storage(tmpdir))
84+
repo = Repository.create(
85+
local_filesystem_storage(tmpdir), spec_version=any_spec_version
86+
)
7687
session = repo.writable_session("main")
7788

7889
to_icechunk(ds, session=session, mode="w")
@@ -93,11 +104,14 @@ def test_xarray_to_icechunk_nested_pickling(scheduler) -> None:
93104

94105

95106
@pytest.mark.parametrize("scheduler", ["threads", "processes"])
96-
def test_fork_session_deep_copies(scheduler) -> None:
107+
def test_fork_session_deep_copies(scheduler, any_spec_version: int | None) -> None:
97108
with dask.config.set(scheduler=scheduler):
98109
ds = create_test_data(dim_sizes=(2, 3, 4)).drop_encoding().chunk(dim3=1)
99110
with tempfile.TemporaryDirectory() as tmpdir:
100-
repo = Repository.create(local_filesystem_storage(tmpdir))
111+
repo = Repository.create(
112+
local_filesystem_storage(tmpdir),
113+
spec_version=any_spec_version,
114+
)
101115

102116
session = repo.writable_session("main")
103117
ds.to_zarr(session.store, mode="w", compute=False)

icechunk-python/tests/test_distributed_append.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
)
2222

2323

24-
def do_test(scheduler) -> None:
24+
def do_test(scheduler, spec_version: int | None) -> None:
2525
# Writing the initial dataset
2626
if scheduler in ["processes", "sync"]:
2727
CHUNKX = 3
@@ -69,7 +69,7 @@ def plot() -> None:
6969
}
7070
).chunk(x=CHUNKX, y=CHUNKY)
7171

72-
repo = ic.Repository.open_or_create(IC_STORAGE)
72+
repo = ic.Repository.open_or_create(IC_STORAGE, create_version=spec_version)
7373
session = repo.writable_session("main")
7474
with dask.config.set(scheduler=scheduler):
7575
to_icechunk(initial, session=session, mode="w", split_every=SPLIT_EVERY)
@@ -124,8 +124,8 @@ def plot() -> None:
124124

125125

126126
@pytest.mark.parametrize("scheduler", ["threads", "processes"])
127-
def test_dask_distributed_appends(scheduler) -> None:
128-
do_test(scheduler)
127+
def test_dask_distributed_appends(scheduler, any_spec_version: int | None) -> None:
128+
do_test(scheduler, spec_version=any_spec_version)
129129

130130

131131
if __name__ == "__main__":

icechunk-python/tests/test_distributed_writers.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,9 @@
2222
CHUNKS_PER_TASK = 2
2323

2424

25-
def mk_repo(use_object_store: bool = False) -> icechunk.Repository:
25+
def mk_repo(
26+
spec_version: int | None, use_object_store: bool = False
27+
) -> icechunk.Repository:
2628
if use_object_store:
2729
storage = s3_object_store_storage(
2830
endpoint_url="http://localhost:9000",
@@ -50,13 +52,16 @@ def mk_repo(use_object_store: bool = False) -> icechunk.Repository:
5052
repo = icechunk.Repository.open_or_create(
5153
storage=storage,
5254
config=repo_config,
55+
create_version=spec_version,
5356
)
5457

5558
return repo
5659

5760

5861
@pytest.mark.parametrize("use_object_store", [False, True])
59-
async def test_distributed_writers(use_object_store: bool) -> None:
62+
async def test_distributed_writers(
63+
use_object_store: bool, any_spec_version: int | None
64+
) -> None:
6065
"""Write to an array using uncoordinated writers, distributed via Dask.
6166
6267
We create a big array, and then we split into workers, each worker gets
@@ -65,7 +70,7 @@ async def test_distributed_writers(use_object_store: bool) -> None:
6570
does a distributed commit. When done, we open the store again and verify
6671
we can write everything we have written.
6772
"""
68-
repo = mk_repo(use_object_store)
73+
repo = mk_repo(any_spec_version, use_object_store)
6974
session = repo.writable_session(branch="main")
7075
store = session.store
7176

icechunk-python/tests/test_error.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,15 @@
1010
import zarr
1111

1212

13-
def test_error_message_when_snapshot_deleted(tmpdir: Path) -> None:
13+
def test_error_message_when_snapshot_deleted(
14+
tmpdir: Path, any_spec_version: int | None
15+
) -> None:
1416
tmpdir = Path(tmpdir)
1517
storage = ic.local_filesystem_storage(str(tmpdir))
16-
repo = ic.Repository.create(storage=storage)
18+
repo = ic.Repository.create(
19+
storage=storage,
20+
spec_version=any_spec_version,
21+
)
1722

1823
rmtree(tmpdir / "snapshots")
1924

@@ -27,10 +32,15 @@ def test_error_message_when_snapshot_deleted(tmpdir: Path) -> None:
2732
zarr.group(store=session.store, overwrite=True)
2833

2934

30-
def test_error_message_when_manifest_file_altered(tmpdir: Path) -> None:
35+
def test_error_message_when_manifest_file_altered(
36+
tmpdir: Path, any_spec_version: int | None
37+
) -> None:
3138
tmpdir = Path(tmpdir)
3239
storage = ic.local_filesystem_storage(str(tmpdir))
33-
repo = ic.Repository.create(storage=storage)
40+
repo = ic.Repository.create(
41+
storage=storage,
42+
spec_version=any_spec_version,
43+
)
3444

3545
session = repo.writable_session("main")
3646
store = session.store

0 commit comments

Comments
 (0)