Skip to content

Commit 9babdea

Browse files
authored
Support multiple input directories and restructure CLI (#281)
1 parent 1284f2f commit 9babdea

26 files changed

Lines changed: 169 additions & 157 deletions

README.md

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,14 +25,16 @@ Requires Python 3.12+. Neuroimaging tools (AFNI, FSL, ANTs) are needed at runtim
2525
## Quick start
2626

2727
```bash
28-
# Usage example
29-
# rbc <input_dir> <output_dir> <workflow> [options]
28+
# Usage: rbc {workflow} INPUT_DIR [INPUT_DIR ...] -o OUTPUT_DIR [options]
3029

3130
# Run the full pipeline
32-
rbc /data /data/derivatives all --runner docker
31+
rbc all /data -o /data/derivatives --runner docker
3332

3433
# Or run a single stage for specific subjects
35-
rbc /data /data/derivatives functional --task rest --participant-label 01 02 --runner docker
34+
rbc functional /data -o /data/derivatives --task rest --participant-label 01 02 --runner docker
35+
36+
# Multiple input directories (e.g., raw BIDS + prior derivatives)
37+
rbc functional /data /data/derivatives -o /data/derivatives --runner docker
3638
```
3739

3840
Run any command with `--help` for full options.

docs/hpc_parallel.md

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ ls -d /data/bids/sub-* | xargs -n1 basename | sed 's/sub-//' > participants.txt
3232

3333
PARTICIPANT=$(sed -n "${SLURM_ARRAY_TASK_ID}p" participants.txt)
3434

35-
rbc /data/bids /data/output all \
35+
rbc all /data/bids -o /data/output \
3636
--runner singularity \
3737
--participant-label "$PARTICIPANT"
3838
```
@@ -60,7 +60,7 @@ If you have a multi-core machine without a scheduler:
6060

6161
```bash
6262
cat participants.txt | parallel -j 4 \
63-
rbc /data/bids /data/output all \
63+
rbc all /data/bids -o /data/output \
6464
--runner singularity \
6565
--participant-label {}
6666
```
@@ -79,7 +79,7 @@ roughly 8-16 GB per subject).
7979

8080
PARTICIPANT=$(sed -n "${PBS_ARRAY_INDEX}p" participants.txt)
8181

82-
rbc /data/bids /data/output all \
82+
rbc all /data/bids -o /data/output \
8383
--runner singularity \
8484
--participant-label "$PARTICIPANT"
8585
```
@@ -92,7 +92,7 @@ clusters provide fast local scratch storage on each compute node. Use
9292
`--tmp-dir` to point intermediates there:
9393

9494
```bash
95-
rbc /data/bids /data/output all \
95+
rbc all /data/bids -o /data/output \
9696
--runner singularity \
9797
--tmp-dir /lscratch/$SLURM_JOB_ID \
9898
--participant-label "$PARTICIPANT"
@@ -125,8 +125,8 @@ Scratch is cleaned up when the job ends, so only final outputs (written to
125125
- **Singularity on HPC:** Most clusters don't allow Docker. The examples above
126126
use `--runner singularity` for this reason. Use `--runner docker` if your
127127
cluster supports it.
128-
- **Shared filesystem:** Make sure `input_dir` and `output_dir` are on a
129-
filesystem accessible to all compute nodes (e.g., Lustre, GPFS, NFS).
128+
- **Shared filesystem:** Make sure input directories and the output directory
129+
are on a filesystem accessible to all compute nodes (e.g., Lustre, GPFS, NFS).
130130
- **Local scratch:** Use `--tmp-dir` to place intermediate files on fast
131131
node-local storage. This can significantly reduce I/O wait times.
132132
- **Sessions:** RBC processes all sessions for a subject sequentially within a

src/rbc/bids/query.py

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
import polars as pl
1414

1515
if TYPE_CHECKING:
16+
from collections.abc import Sequence
17+
1618
from rbc.bids import BidsEntities # noqa: F401
1719

1820
__all__ = [
@@ -24,15 +26,15 @@
2426

2527

2628
def load_table(
27-
dataset_dir: str | Path,
29+
dataset_dirs: str | Path | Sequence[str | Path],
2830
index_fpath: str | Path | None = None,
2931
max_workers: int | None = 0,
3032
verbose: bool = False, # noqa: FBT001, FBT002 (Ignore bool arg for b2t)
3133
) -> pl.DataFrame:
32-
"""Get and return BIDSTable for a given dataset.
34+
"""Get and return BIDSTable for one or more dataset directories.
3335
3436
Args:
35-
dataset_dir: Path to dataset directory.
37+
dataset_dirs: One or more paths to BIDS dataset directories.
3638
index_fpath: Path to bids2table parquet table. If provided and exists,
3739
will be loaded. Otherwise dataset will be indexed.
3840
max_workers: Number of parallel indexing processes. 0=main process only,
@@ -44,24 +46,25 @@ def load_table(
4446
4547
Raises:
4648
ValueError: if no datasets found.
47-
TypeError: if found dataset does not return a DataFrame.
4849
"""
4950
if index_fpath is not None:
5051
return pl.read_parquet(index_fpath)
5152

53+
dirs: list[str | Path] = (
54+
[dataset_dirs] if isinstance(dataset_dirs, (str, Path)) else list(dataset_dirs)
55+
)
56+
all_roots: list[Path] = []
57+
for d in dirs:
58+
all_roots.extend(b2t.find_bids_datasets(d))
59+
5260
tables = b2t.batch_index_dataset(
53-
b2t.find_bids_datasets(dataset_dir),
61+
all_roots,
5462
max_workers=max_workers,
5563
show_progress=verbose,
5664
)
57-
dfs: list[pl.DataFrame] = []
58-
for table in tables:
59-
result = pl.from_arrow(table)
60-
if not isinstance(result, pl.DataFrame):
61-
raise TypeError(f"Expected DataFrame, got {type(result)}")
62-
dfs.append(result)
65+
dfs = [pl.DataFrame(pl.from_arrow(table)) for table in tables]
6366
if len(dfs) == 0:
64-
raise ValueError(f"No datasets found in {dataset_dir}")
67+
raise ValueError(f"No datasets found in {dirs}")
6568

6669
return pl.concat(dfs)
6770

src/rbc/cli/all.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ def validate_namespace(cls, ns: argparse.Namespace) -> AllArgs:
8282
def main(args: AllArgs) -> int:
8383
"""Main entrypoint of combined pipeline."""
8484
run(
85-
input_dir=args.input_dir,
85+
input_dirs=args.input_dirs,
8686
output_dir=args.output_dir,
8787
filters=Filters(
8888
participant_label=args.participant_label,
@@ -117,7 +117,7 @@ def register_command(
117117
parents=parents,
118118
description="RBC full pipeline (anatomical + functional + metrics + QC)",
119119
help="Full pipeline (all workflows)",
120-
usage="rbc input_dir output_dir all [-h] [options]",
120+
usage="rbc all INPUT_DIR [INPUT_DIR ...] -o OUTPUT_DIR [options]",
121121
)
122122
parser.add_argument(
123123
"--regressor",

src/rbc/cli/anatomical.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def validate_namespace(cls, ns: argparse.Namespace) -> AnatomicalArgs:
4545
def main(args: AnatomicalArgs) -> int:
4646
"""Main entrypoint of anatomical workflow."""
4747
run(
48-
input_dir=args.input_dir,
48+
input_dirs=args.input_dirs,
4949
output_dir=args.output_dir,
5050
filters=Filters(
5151
participant_label=args.participant_label,
@@ -71,7 +71,7 @@ def register_command(
7171
parents=parents,
7272
description="RBC anatomical workflow",
7373
help="Anatomical workflow",
74-
usage="rbc input_dir output_dir anatomical [-h] [options]",
74+
usage="rbc anatomical INPUT_DIR [INPUT_DIR ...] -o OUTPUT_DIR [options]",
7575
)
7676

7777
templates = parser.add_argument_group("template overrides")

src/rbc/cli/base.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
class BaseArgs:
2323
"""Base (global) arguments shared across all workflow CLIs."""
2424

25-
input_dir: Path
25+
input_dirs: tuple[Path, ...]
2626
output_dir: Path
2727
runner: Literal["auto", "local", "docker", "podman", "singularity"]
2828
participant_label: list[str]
@@ -33,8 +33,9 @@ class BaseArgs:
3333
@classmethod
3434
def validate_namespace(cls, ns: argparse.Namespace) -> BaseArgs:
3535
"""Validation of base arguments."""
36-
if not ns.input_dir.exists():
37-
raise ValueError(f"Input path does not exist: {ns.input_dir}")
36+
for d in ns.input_dirs:
37+
if not d.exists():
38+
raise ValueError(f"Input path does not exist: {d}")
3839
if ns.runner not in _VALID_RUNNERS:
3940
raise ValueError(
4041
f"Expected one of {_VALID_RUNNERS} for runner, got: {ns.runner!r}"
@@ -56,7 +57,7 @@ def validate_namespace(cls, ns: argparse.Namespace) -> BaseArgs:
5657
)
5758

5859
return cls(
59-
input_dir=ns.input_dir,
60+
input_dirs=tuple(ns.input_dirs),
6061
output_dir=ns.output_dir,
6162
runner=ns.runner,
6263
participant_label=ns.participant_label,

src/rbc/cli/functional.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ def validate_namespace(cls, ns: argparse.Namespace) -> FunctionalArgs:
6565
def main(args: FunctionalArgs) -> int:
6666
"""Main entrypoint of functional workflow."""
6767
run(
68-
input_dir=args.input_dir,
68+
input_dirs=args.input_dirs,
6969
output_dir=args.output_dir,
7070
filters=Filters(
7171
participant_label=args.participant_label,
@@ -95,7 +95,7 @@ def register_command(
9595
parents=parents,
9696
description="RBC functional workflow",
9797
help="Functional workflow",
98-
usage="rbc input_dir output_dir functional [-h] [options]",
98+
usage="rbc functional INPUT_DIR [INPUT_DIR ...] -o OUTPUT_DIR [options]",
9999
)
100100
parser.add_argument(
101101
"--regressor",

src/rbc/cli/longitudinal.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def validate_namespace(cls, ns: argparse.Namespace) -> LongitudinalArgs:
4444
def main(args: LongitudinalArgs) -> int:
4545
"""Main entrypoint of longitudinal workflow."""
4646
run(
47-
input_dir=args.input_dir,
47+
input_dirs=args.input_dirs,
4848
output_dir=args.output_dir,
4949
filters=Filters(
5050
participant_label=args.participant_label,
@@ -71,7 +71,7 @@ def register_command(
7171
parents=parents,
7272
description="RBC-based longitudinal workflow",
7373
help="Longitudinal workflow",
74-
usage="rbc input_dir output_dir longitudinal [-h] [options]",
74+
usage="rbc longitudinal INPUT_DIR [INPUT_DIR ...] -o OUTPUT_DIR [options]",
7575
)
7676
parser.add_argument(
7777
"--anatomical",

src/rbc/cli/main.py

Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,11 @@
99
options.
1010
1111
Usage:
12-
rbc input_dir output_dir {workflow} [options]
12+
rbc {workflow} INPUT_DIR [INPUT_DIR ...] -o OUTPUT_DIR [options]
1313
1414
Example:
15-
rbc /data/bids /data/output anatomical --participant-label 01
15+
rbc anatomical /data/bids -o /data/output --participant-label 01
16+
rbc functional /data/bids /data/derivatives -o /data/output
1617
"""
1718

1819
import argparse
@@ -30,6 +31,20 @@
3031
def _global_opts() -> argparse.ArgumentParser:
3132
"""Shared global options across workflows."""
3233
global_opts = argparse.ArgumentParser(add_help=False)
34+
global_opts.add_argument(
35+
"input_dirs",
36+
nargs="+",
37+
type=Path,
38+
metavar="INPUT_DIR",
39+
help="One or more BIDS-organized input dataset directories",
40+
)
41+
global_opts.add_argument(
42+
"-o",
43+
"--output-dir",
44+
type=Path,
45+
required=True,
46+
help="Directory where output data should be stored",
47+
)
3348
global_opts.add_argument(
3449
"-v",
3550
"--verbose",
@@ -73,18 +88,7 @@ def create_parser() -> argparse.ArgumentParser:
7388
prog="rbc",
7489
description="RBC processing pipelines (developed using NiWrap)",
7590
formatter_class=argparse.RawDescriptionHelpFormatter,
76-
usage="%(prog)s input_dir output_dir {workflow} [options]",
77-
)
78-
# Global arguments
79-
parser.add_argument(
80-
"input_dir",
81-
type=Path,
82-
help="BIDS-organized input dataset directory",
83-
)
84-
parser.add_argument(
85-
"output_dir",
86-
type=Path,
87-
help="Directory where output data should be stored",
91+
usage="%(prog)s {workflow} INPUT_DIR [INPUT_DIR ...] -o OUTPUT_DIR [options]",
8892
)
8993
global_opts = _global_opts()
9094

@@ -104,9 +108,6 @@ def create_parser() -> argparse.ArgumentParser:
104108
# Experimental subcommand
105109
longitudinal.register_command(subparsers, parents=[global_opts])
106110

107-
for action in global_opts._actions:
108-
parser._add_action(action)
109-
110111
return parser
111112

112113

src/rbc/cli/metrics.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ def register_command(
103103
parents=parents,
104104
description="RBC metrics workflow",
105105
help="Metrics workflow (ALFF, ReHo, timeseries)",
106-
usage="rbc input_dir output_dir metrics [-h] [options]",
106+
usage="rbc metrics INPUT_DIR [INPUT_DIR ...] -o OUTPUT_DIR [options]",
107107
)
108108
parser.add_argument(
109109
"--atlas",

0 commit comments

Comments
 (0)