@@ -265,6 +265,8 @@ def batch_index_dataset(
265265 max_workers : int | None = 0 ,
266266 executor_cls : type [Executor ] = ProcessPoolExecutor ,
267267 show_progress : bool = False ,
268+ * ,
269+ schema : SchemaSpec = None ,
268270) -> Generator [pa .Table , None , None ]:
269271 """Index a batch of BIDS datasets.
270272
@@ -276,14 +278,16 @@ def batch_index_dataset(
276278 `concurrent.futures.ProcessPoolExecutor` for details.
277279 executor_cls: Executor class to use for parallel indexing.
278280 show_progress: Show progress bar.
281+ schema: Optional `SchemaSpec`. `None` uses the default BIDS schema.
279282
280283 Yields:
281284 An Arrow table index for each BIDS dataset.
282285 """
286+ func = partial (_batch_index_func , schema = schema )
283287 file_count = 0
284288 for dataset , table in (
285289 pbar := tqdm (
286- _pmap (_batch_index_func , roots , max_workers , executor_cls = executor_cls ),
290+ _pmap (func , roots , max_workers , executor_cls = executor_cls ),
287291 total = len (roots ) if isinstance (roots , Sequence ) else None ,
288292 disable = show_progress not in {True , "dataset" },
289293 )
@@ -293,9 +297,11 @@ def batch_index_dataset(
293297 yield table
294298
295299
296- def _batch_index_func (root : str | PathT ) -> tuple [str | None , pa .Table ]:
300+ def _batch_index_func (
301+ root : str | PathT , * , schema : SchemaSpec = None
302+ ) -> tuple [str | None , pa .Table ]:
297303 dataset , _ = _get_bids_dataset (root )
298- table = index_dataset (root , max_workers = 0 , show_progress = False )
304+ table = index_dataset (root , max_workers = 0 , show_progress = False , schema = schema )
299305 return dataset , table
300306
301307
0 commit comments