childmindresearch
diff --git a/‎README.md‎
Lines changed: 6 additions & 4 deletions b/‎README.md‎
Lines changed: 6 additions & 4 deletions
diff --git a/‎docs/hpc_parallel.md‎
Lines changed: 6 additions & 6 deletions b/‎docs/hpc_parallel.md‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎src/rbc/bids/query.py‎
Lines changed: 15 additions & 12 deletions b/‎src/rbc/bids/query.py‎
Lines changed: 15 additions & 12 deletions
diff --git a/‎src/rbc/cli/all.py‎
Lines changed: 2 additions & 2 deletions b/‎src/rbc/cli/all.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/rbc/cli/anatomical.py‎
Lines changed: 2 additions & 2 deletions b/‎src/rbc/cli/anatomical.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/rbc/cli/base.py‎
Lines changed: 5 additions & 4 deletions b/‎src/rbc/cli/base.py‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎src/rbc/cli/functional.py‎
Lines changed: 2 additions & 2 deletions b/‎src/rbc/cli/functional.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/rbc/cli/longitudinal.py‎
Lines changed: 2 additions & 2 deletions b/‎src/rbc/cli/longitudinal.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/rbc/cli/main.py‎
Lines changed: 18 additions & 17 deletions b/‎src/rbc/cli/main.py‎
Lines changed: 18 additions & 17 deletions
diff --git a/‎src/rbc/cli/metrics.py‎
Lines changed: 1 addition & 1 deletion b/‎src/rbc/cli/metrics.py‎
Lines changed: 1 addition & 1 deletion
@@ -25,14 +25,16 @@ Requires Python 3.12+. Neuroimaging tools (AFNI, FSL, ANTs) are needed at runtim
 ## Quick start
 
 ```bash
-# Usage example
-# rbc <input_dir> <output_dir> <workflow> [options]
+# Usage: rbc {workflow} INPUT_DIR [INPUT_DIR ...] -o OUTPUT_DIR [options]
 
 # Run the full pipeline
-rbc /data /data/derivatives all --runner docker
+rbc all /data -o /data/derivatives --runner docker
 
 # Or run a single stage for specific subjects
-rbc /data /data/derivatives functional --task rest --participant-label 01 02 --runner docker
+rbc functional /data -o /data/derivatives --task rest --participant-label 01 02 --runner docker
+
+# Multiple input directories (e.g., raw BIDS + prior derivatives)
+rbc functional /data /data/derivatives -o /data/derivatives --runner docker
 ```
 
 Run any command with `--help` for full options.
 
@@ -32,7 +32,7 @@ ls -d /data/bids/sub-* | xargs -n1 basename | sed 's/sub-//' > participants.txt
 
 PARTICIPANT=$(sed -n "${SLURM_ARRAY_TASK_ID}p" participants.txt)
 
-rbc /data/bids /data/output all \
+rbc all /data/bids -o /data/output \
     --runner singularity \
     --participant-label "$PARTICIPANT"
 ```
@@ -60,7 +60,7 @@ If you have a multi-core machine without a scheduler:
 
 ```bash
 cat participants.txt | parallel -j 4 \
-    rbc /data/bids /data/output all \
+    rbc all /data/bids -o /data/output \
         --runner singularity \
         --participant-label {}
 ```
@@ -79,7 +79,7 @@ roughly 8-16 GB per subject).
 
 PARTICIPANT=$(sed -n "${PBS_ARRAY_INDEX}p" participants.txt)
 
-rbc /data/bids /data/output all \
+rbc all /data/bids -o /data/output \
     --runner singularity \
     --participant-label "$PARTICIPANT"
 ```
@@ -92,7 +92,7 @@ clusters provide fast local scratch storage on each compute node. Use
 `--tmp-dir` to point intermediates there:
 
 ```bash
-rbc /data/bids /data/output all \
+rbc all /data/bids -o /data/output \
     --runner singularity \
     --tmp-dir /lscratch/$SLURM_JOB_ID \
     --participant-label "$PARTICIPANT"
@@ -125,8 +125,8 @@ Scratch is cleaned up when the job ends, so only final outputs (written to
 - **Singularity on HPC:** Most clusters don't allow Docker. The examples above
   use `--runner singularity` for this reason. Use `--runner docker` if your
   cluster supports it.
-- **Shared filesystem:** Make sure `input_dir` and `output_dir` are on a
-  filesystem accessible to all compute nodes (e.g., Lustre, GPFS, NFS).
+- **Shared filesystem:** Make sure input directories and the output directory
+  are on a filesystem accessible to all compute nodes (e.g., Lustre, GPFS, NFS).
 - **Local scratch:** Use `--tmp-dir` to place intermediate files on fast
   node-local storage. This can significantly reduce I/O wait times.
 - **Sessions:** RBC processes all sessions for a subject sequentially within a
 
@@ -13,6 +13,8 @@
 import polars as pl
 
 if TYPE_CHECKING:
+    from collections.abc import Sequence
+
     from rbc.bids import BidsEntities  # noqa: F401
 
 __all__ = [
@@ -24,15 +26,15 @@
 
 
 def load_table(
-    dataset_dir: str | Path,
+    dataset_dirs: str | Path | Sequence[str | Path],
     index_fpath: str | Path | None = None,
     max_workers: int | None = 0,
     verbose: bool = False,  # noqa: FBT001, FBT002 (Ignore bool arg for b2t)
 ) -> pl.DataFrame:
-    """Get and return BIDSTable for a given dataset.
+    """Get and return BIDSTable for one or more dataset directories.
 
     Args:
-        dataset_dir: Path to dataset directory.
+        dataset_dirs: One or more paths to BIDS dataset directories.
         index_fpath: Path to bids2table parquet table. If provided and exists,
             will be loaded. Otherwise dataset will be indexed.
         max_workers: Number of parallel indexing processes. 0=main process only,
@@ -44,24 +46,25 @@ def load_table(
 
     Raises:
         ValueError: if no datasets found.
-        TypeError: if found dataset does not return a DataFrame.
     """
     if index_fpath is not None:
         return pl.read_parquet(index_fpath)
 
+    dirs: list[str | Path] = (
+        [dataset_dirs] if isinstance(dataset_dirs, (str, Path)) else list(dataset_dirs)
+    )
+    all_roots: list[Path] = []
+    for d in dirs:
+        all_roots.extend(b2t.find_bids_datasets(d))
+
     tables = b2t.batch_index_dataset(
-        b2t.find_bids_datasets(dataset_dir),
+        all_roots,
         max_workers=max_workers,
         show_progress=verbose,
     )
-    dfs: list[pl.DataFrame] = []
-    for table in tables:
-        result = pl.from_arrow(table)
-        if not isinstance(result, pl.DataFrame):
-            raise TypeError(f"Expected DataFrame, got {type(result)}")
-        dfs.append(result)
+    dfs = [pl.DataFrame(pl.from_arrow(table)) for table in tables]
     if len(dfs) == 0:
-        raise ValueError(f"No datasets found in {dataset_dir}")
+        raise ValueError(f"No datasets found in {dirs}")
 
     return pl.concat(dfs)
 
 
@@ -82,7 +82,7 @@ def validate_namespace(cls, ns: argparse.Namespace) -> AllArgs:
 def main(args: AllArgs) -> int:
     """Main entrypoint of combined pipeline."""
     run(
-        input_dir=args.input_dir,
+        input_dirs=args.input_dirs,
         output_dir=args.output_dir,
         filters=Filters(
             participant_label=args.participant_label,
@@ -117,7 +117,7 @@ def register_command(
         parents=parents,
         description="RBC full pipeline (anatomical + functional + metrics + QC)",
         help="Full pipeline (all workflows)",
-        usage="rbc input_dir output_dir all [-h] [options]",
+        usage="rbc all INPUT_DIR [INPUT_DIR ...] -o OUTPUT_DIR [options]",
     )
     parser.add_argument(
         "--regressor",
 
@@ -45,7 +45,7 @@ def validate_namespace(cls, ns: argparse.Namespace) -> AnatomicalArgs:
 def main(args: AnatomicalArgs) -> int:
     """Main entrypoint of anatomical workflow."""
     run(
-        input_dir=args.input_dir,
+        input_dirs=args.input_dirs,
         output_dir=args.output_dir,
         filters=Filters(
             participant_label=args.participant_label,
@@ -71,7 +71,7 @@ def register_command(
         parents=parents,
         description="RBC anatomical workflow",
         help="Anatomical workflow",
-        usage="rbc input_dir output_dir anatomical [-h] [options]",
+        usage="rbc anatomical INPUT_DIR [INPUT_DIR ...] -o OUTPUT_DIR [options]",
     )
 
     templates = parser.add_argument_group("template overrides")
 
@@ -22,7 +22,7 @@
 class BaseArgs:
     """Base (global) arguments shared across all workflow CLIs."""
 
-    input_dir: Path
+    input_dirs: tuple[Path, ...]
     output_dir: Path
     runner: Literal["auto", "local", "docker", "podman", "singularity"]
     participant_label: list[str]
@@ -33,8 +33,9 @@ class BaseArgs:
     @classmethod
     def validate_namespace(cls, ns: argparse.Namespace) -> BaseArgs:
         """Validation of base arguments."""
-        if not ns.input_dir.exists():
-            raise ValueError(f"Input path does not exist: {ns.input_dir}")
+        for d in ns.input_dirs:
+            if not d.exists():
+                raise ValueError(f"Input path does not exist: {d}")
         if ns.runner not in _VALID_RUNNERS:
             raise ValueError(
                 f"Expected one of {_VALID_RUNNERS} for runner, got: {ns.runner!r}"
@@ -56,7 +57,7 @@ def validate_namespace(cls, ns: argparse.Namespace) -> BaseArgs:
             )
 
         return cls(
-            input_dir=ns.input_dir,
+            input_dirs=tuple(ns.input_dirs),
             output_dir=ns.output_dir,
             runner=ns.runner,
             participant_label=ns.participant_label,
 
@@ -65,7 +65,7 @@ def validate_namespace(cls, ns: argparse.Namespace) -> FunctionalArgs:
 def main(args: FunctionalArgs) -> int:
     """Main entrypoint of functional workflow."""
     run(
-        input_dir=args.input_dir,
+        input_dirs=args.input_dirs,
         output_dir=args.output_dir,
         filters=Filters(
             participant_label=args.participant_label,
@@ -95,7 +95,7 @@ def register_command(
         parents=parents,
         description="RBC functional workflow",
         help="Functional workflow",
-        usage="rbc input_dir output_dir functional [-h] [options]",
+        usage="rbc functional INPUT_DIR [INPUT_DIR ...] -o OUTPUT_DIR [options]",
     )
     parser.add_argument(
         "--regressor",
 
@@ -44,7 +44,7 @@ def validate_namespace(cls, ns: argparse.Namespace) -> LongitudinalArgs:
 def main(args: LongitudinalArgs) -> int:
     """Main entrypoint of longitudinal workflow."""
     run(
-        input_dir=args.input_dir,
+        input_dirs=args.input_dirs,
         output_dir=args.output_dir,
         filters=Filters(
             participant_label=args.participant_label,
@@ -71,7 +71,7 @@ def register_command(
         parents=parents,
         description="RBC-based longitudinal workflow",
         help="Longitudinal workflow",
-        usage="rbc input_dir output_dir longitudinal [-h] [options]",
+        usage="rbc longitudinal INPUT_DIR [INPUT_DIR ...] -o OUTPUT_DIR [options]",
     )
     parser.add_argument(
         "--anatomical",
 
@@ -9,10 +9,11 @@
 options.
 
 Usage:
-    rbc input_dir output_dir {workflow} [options]
+    rbc {workflow} INPUT_DIR [INPUT_DIR ...] -o OUTPUT_DIR [options]
 
 Example:
-    rbc /data/bids /data/output anatomical --participant-label 01
+    rbc anatomical /data/bids -o /data/output --participant-label 01
+    rbc functional /data/bids /data/derivatives -o /data/output
 """
 
 import argparse
@@ -30,6 +31,20 @@
 def _global_opts() -> argparse.ArgumentParser:
     """Shared global options across workflows."""
     global_opts = argparse.ArgumentParser(add_help=False)
+    global_opts.add_argument(
+        "input_dirs",
+        nargs="+",
+        type=Path,
+        metavar="INPUT_DIR",
+        help="One or more BIDS-organized input dataset directories",
+    )
+    global_opts.add_argument(
+        "-o",
+        "--output-dir",
+        type=Path,
+        required=True,
+        help="Directory where output data should be stored",
+    )
     global_opts.add_argument(
         "-v",
         "--verbose",
@@ -73,18 +88,7 @@ def create_parser() -> argparse.ArgumentParser:
         prog="rbc",
         description="RBC processing pipelines (developed using NiWrap)",
         formatter_class=argparse.RawDescriptionHelpFormatter,
-        usage="%(prog)s input_dir output_dir {workflow} [options]",
-    )
-    # Global arguments
-    parser.add_argument(
-        "input_dir",
-        type=Path,
-        help="BIDS-organized input dataset directory",
-    )
-    parser.add_argument(
-        "output_dir",
-        type=Path,
-        help="Directory where output data should be stored",
+        usage="%(prog)s {workflow} INPUT_DIR [INPUT_DIR ...] -o OUTPUT_DIR [options]",
     )
     global_opts = _global_opts()
 
@@ -104,9 +108,6 @@ def create_parser() -> argparse.ArgumentParser:
     # Experimental subcommand
     longitudinal.register_command(subparsers, parents=[global_opts])
 
-    for action in global_opts._actions:
-        parser._add_action(action)
-
     return parser
 
 
 
@@ -103,7 +103,7 @@ def register_command(
         parents=parents,
         description="RBC metrics workflow",
         help="Metrics workflow (ALFF, ReHo, timeseries)",
-        usage="rbc input_dir output_dir metrics [-h] [options]",
+        usage="rbc metrics INPUT_DIR [INPUT_DIR ...] -o OUTPUT_DIR [options]",
     )
     parser.add_argument(
         "--atlas",
Original file line number	Diff line number	Diff line change
`@@ -103,7 +103,7 @@ def register_command(`
`103`	`103`	`parents=parents,`
`104`	`104`	`description="RBC metrics workflow",`
`105`	`105`	`help="Metrics workflow (ALFF, ReHo, timeseries)",`
`106`		`- usage="rbc input_dir output_dir metrics [-h] [options]",`
	`106`	`+ usage="rbc metrics INPUT_DIR [INPUT_DIR ...] -o OUTPUT_DIR [options]",`
`107`	`107`	`)`
`108`	`108`	`parser.add_argument(`
`109`	`109`	`"--atlas",`