@@ -98,11 +98,9 @@ def assert_eq_concat_on_disk(
9898 adatas ,
9999 tmp_path : Path ,
100100 file_format : Literal ["zarr" , "h5ad" ],
101- * ,
102- max_loaded_elems : int | None = None ,
103- virtual_concat : bool = False ,
104101 * args ,
105- merge_strategy : merge .StrategiesLiteral | None = None ,
102+ virtual_concat : bool = False ,
103+ max_loaded_elems : int | None = None ,
106104 ** kwargs ,
107105):
108106 # create one from the concat function
@@ -112,7 +110,14 @@ def assert_eq_concat_on_disk(
112110 out_name = tmp_path / f"out.{ file_format } "
113111 if max_loaded_elems is not None :
114112 kwargs ["max_loaded_elems" ] = max_loaded_elems
115- concat_on_disk (paths , out_name , * args , virtual_concat = virtual_concat , merge = merge_strategy , ** kwargs )
113+ concat_on_disk (
114+ paths ,
115+ out_name ,
116+ * args ,
117+ virtual_concat = virtual_concat ,
118+ merge = merge_strategy ,
119+ ** kwargs ,
120+ )
116121 with as_group (out_name , mode = "r" ) as rg :
117122 res2 = read_elem (rg )
118123 assert_equal (res1 , res2 , exact = False )
@@ -176,14 +181,15 @@ def test_anndatas(
176181 adatas ,
177182 tmp_path ,
178183 file_format ,
179- max_loaded_elems ,
184+ max_loaded_elems = max_loaded_elems ,
185+ virtual_concat = False ,
180186 axis = axis ,
181187 join = join_type ,
182188 merge_strategy = merge_strategy ,
183189 )
184190
185191
186- def test_anndatas_virtual_concat (
192+ def test_anndatas_virtual_concat_missing_file (
187193 * ,
188194 tmp_path : Path ,
189195):
@@ -218,8 +224,63 @@ def test_anndatas_virtual_concat(
218224 adatas ,
219225 tmp_path ,
220226 file_format ,
227+ virtual_concat = True ,
221228 max_loaded_elems = max_loaded_elems ,
229+ axis = axis ,
230+ join = join_type ,
231+ )
232+ # remove one of the files
233+ # overwrite the file
234+ # copy out.h5ad to out2.h5ad
235+ import shutil
236+
237+ shutil .copy (tmp_path / "out.h5ad" , tmp_path / "out2.h5ad" )
238+ with tmp_path .joinpath ("0.h5ad" ).open ("w" ) as f :
239+ f .write ("0" )
240+ with pytest .raises (
241+ OSError ,
242+ match = "Error raised while reading key 'sparse' of <class 'h5py._hl.group.Group'> from /obsm" ,
243+ ):
244+ ad .read_h5ad (tmp_path / "out2.h5ad" )
245+
246+
247+ def test_anndatas_virtual_concat (
248+ * ,
249+ tmp_path : Path ,
250+ ):
251+ axis = 0
252+ max_loaded_elems = 1_000_000
253+ file_format = "h5ad"
254+ array_type = "sparse"
255+ join_type = "inner"
256+ _ , off_axis_name = _resolve_axis (1 - axis )
257+ random_axes = {0 , 1 }
258+ sparse_fmt = "csr" if axis == 0 else "csc"
259+ kw = GEN_ADATA_OOC_CONCAT_ARGS
260+
261+ adatas = []
262+ for i in range (3 ):
263+ M , N = (np .random .randint (5 , 10 ) if a in random_axes else 50 for a in (0 , 1 ))
264+ a = gen_adata (
265+ (M , N ),
266+ X_type = get_array_type (array_type , axis ),
267+ sparse_fmt = sparse_fmt ,
268+ obs_dtypes = [pd .CategoricalDtype (ordered = False )],
269+ var_dtypes = [pd .CategoricalDtype (ordered = False )],
270+ ** kw ,
271+ )
272+ # ensure some names overlap, others do not, for the off-axis so that inner/outer is properly tested
273+ off_names = getattr (a , f"{ off_axis_name } _names" ).array
274+ off_names [1 ::2 ] = f"{ i } -" + off_names [1 ::2 ]
275+ setattr (a , f"{ off_axis_name } _names" , off_names )
276+ adatas .append (a )
277+
278+ assert_eq_concat_on_disk (
279+ adatas ,
280+ tmp_path ,
281+ file_format ,
222282 virtual_concat = True ,
283+ max_loaded_elems = max_loaded_elems ,
223284 axis = axis ,
224285 join = join_type ,
225286 )
@@ -246,7 +307,7 @@ def test_concat_ordered_categoricals_retained(tmp_path, file_format):
246307 )
247308
248309 adatas = [a , b ]
249- assert_eq_concat_on_disk (adatas , tmp_path , file_format )
310+ assert_eq_concat_on_disk (adatas , tmp_path , file_format , virtual_concat = False )
250311
251312
252313@pytest .fixture
@@ -303,7 +364,13 @@ def test_concatenate_xxxm(xxxm_adatas, tmp_path, file_format, join_type):
303364 for i in range (len (xxxm_adatas )):
304365 xxxm_adatas [i ] = xxxm_adatas [i ].T
305366 xxxm_adatas [i ].X = sparse .csr_matrix (xxxm_adatas [i ].X )
306- assert_eq_concat_on_disk (xxxm_adatas , tmp_path , file_format , join = join_type )
367+ assert_eq_concat_on_disk (
368+ xxxm_adatas ,
369+ tmp_path ,
370+ file_format ,
371+ join = join_type ,
372+ virtual_concat = False ,
373+ )
307374
308375
309376@pytest .mark .skipif (is_zarr_v2 (), reason = "auto sharding is allowed only for zarr v3." )
0 commit comments