Skip to content

Commit e5c3410

Browse files
committed
Restructure tests to match architecture layers (#268)
Extract Filters.apply() to consolidate duplicated filtering logic across all 6 orchestration modules. Move orchestration tests out of cli/ into their own test directory, rewrite them to call orchestration directly, and delete redundant tests (runner setup, mock-heavy filtering counts, dead export assertions).
1 parent a5cc10e commit e5c3410

17 files changed

Lines changed: 888 additions & 2102 deletions

src/rbc/orchestration/__init__.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
from dataclasses import dataclass, field
1212
from typing import TYPE_CHECKING
1313

14+
import polars as pl
15+
1416
from rbc.core import CPAC_ANTS_SEED
1517
from rbc.core.niwrap import setup_runner
1618

@@ -39,6 +41,28 @@ class Filters:
3941
session_label: Sequence[str] = field(default_factory=tuple)
4042
task: str | None = None
4143

44+
def apply(self, df: pl.DataFrame, *base_exprs: pl.Expr) -> pl.DataFrame:
45+
"""Apply user-level and workflow-specific filters to a BIDS table.
46+
47+
Args:
48+
df: BIDS table to filter.
49+
*base_exprs: Workflow-specific filter expressions
50+
(e.g. space, datatype constraints).
51+
52+
Returns:
53+
Filtered DataFrame.
54+
"""
55+
exprs = list(base_exprs)
56+
if len(self.participant_label) > 0:
57+
exprs.append(pl.col("sub").is_in(self.participant_label))
58+
if len(self.session_label) > 0:
59+
exprs.append(pl.col("ses").is_in(self.session_label))
60+
if self.task is not None:
61+
exprs.append(pl.col("task") == self.task)
62+
if not exprs:
63+
return df
64+
return df.filter(pl.all_horizontal(exprs))
65+
4266

4367
@dataclass(frozen=True)
4468
class RunnerConfig:

src/rbc/orchestration/all.py

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -69,18 +69,12 @@ def run(
6969
dataset_dir=input_dir, index_fpath=None, max_workers=0, verbose=verbose
7070
)
7171

72-
filter_exprs = [
72+
df = filters.apply(
73+
df,
7374
pl.col("ses") != "longitudinal",
7475
pl.col("space").is_null(),
7576
pl.col("desc").is_null(),
76-
]
77-
if len(filters.participant_label) > 0:
78-
filter_exprs.append(pl.col("sub").is_in(filters.participant_label))
79-
if len(filters.session_label) > 0:
80-
filter_exprs.append(pl.col("ses").is_in(filters.session_label))
81-
if filters.task is not None:
82-
filter_exprs.append(pl.col("task") == filters.task)
83-
df = df.filter(pl.all_horizontal(filter_exprs))
77+
)
8478

8579
for _, sub_ses_group in tqdm(
8680
df.group_by(SUB_SES_QUERY, maintain_order=True), disable=not verbose

src/rbc/orchestration/anatomical.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -73,16 +73,12 @@ def run(
7373
dataset_dir=input_dir, index_fpath=None, max_workers=0, verbose=verbose
7474
)
7575

76-
filter_exprs = [
76+
df = filters.apply(
77+
df,
7778
pl.col("ses") != "longitudinal",
7879
pl.col("space").is_null(),
7980
pl.col("desc").is_null(),
80-
]
81-
if len(filters.participant_label) > 0:
82-
filter_exprs.append(pl.col("sub").is_in(filters.participant_label))
83-
if len(filters.session_label) > 0:
84-
filter_exprs.append(pl.col("ses").is_in(filters.session_label))
85-
df = df.filter(pl.all_horizontal(filter_exprs))
81+
)
8682

8783
for _, sub_ses_group in tqdm(
8884
df.group_by(SUB_SES_QUERY, maintain_order=True), disable=not verbose

src/rbc/orchestration/functional.py

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -106,17 +106,11 @@ def run(
106106
dataset_dir=input_dir, index_fpath=None, max_workers=0, verbose=verbose
107107
)
108108

109-
filter_exprs = [
109+
df = filters.apply(
110+
df,
110111
pl.col("ses") != "longitudinal",
111112
pl.col("space").is_null(),
112-
]
113-
if len(filters.participant_label) > 0:
114-
filter_exprs.append(pl.col("sub").is_in(filters.participant_label))
115-
if len(filters.session_label) > 0:
116-
filter_exprs.append(pl.col("ses").is_in(filters.session_label))
117-
if filters.task is not None:
118-
filter_exprs.append(pl.col("task") == filters.task)
119-
df = df.filter(pl.all_horizontal(filter_exprs))
113+
)
120114

121115
for _, sub_ses_group in tqdm(
122116
df.group_by(SUB_SES_QUERY, maintain_order=True), disable=not verbose

src/rbc/orchestration/longitudinal.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -126,12 +126,7 @@ def run(
126126
dataset_dir=input_dir, index_fpath=None, max_workers=0, verbose=verbose
127127
)
128128

129-
filter_exprs = [pl.col("ses") != "longitudinal"]
130-
if len(filters.participant_label) > 0:
131-
filter_exprs.append(pl.col("sub").is_in(filters.participant_label))
132-
if len(filters.session_label) > 0:
133-
filter_exprs.append(pl.col("ses").is_in(filters.session_label))
134-
group_df = df.filter(pl.all_horizontal(filter_exprs))
129+
group_df = filters.apply(df, pl.col("ses") != "longitudinal")
135130

136131
for _, sub_ses_group in tqdm(
137132
group_df.group_by(SUB_SES_QUERY, maintain_order=True),

src/rbc/orchestration/metrics.py

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -108,19 +108,13 @@ def run(
108108
dataset_dir=output_dir, index_fpath=None, max_workers=0, verbose=verbose
109109
)
110110

111-
filter_exprs = [
111+
df = filters.apply(
112+
df,
112113
pl.col("datatype") == "func",
113114
pl.col("suffix") == "bold",
114115
pl.col("desc") == "preproc",
115116
pl.col("space") == TemplateSpace.MNI152NLIN6ASYM,
116-
]
117-
if len(filters.participant_label) > 0:
118-
filter_exprs.append(pl.col("sub").is_in(filters.participant_label))
119-
if len(filters.session_label) > 0:
120-
filter_exprs.append(pl.col("ses").is_in(filters.session_label))
121-
if filters.task is not None:
122-
filter_exprs.append(pl.col("task") == filters.task)
123-
df = df.filter(pl.all_horizontal(filter_exprs))
117+
)
124118

125119
for _, group in tqdm(df.group_by(SUB_SES_QUERY), disable=not verbose):
126120
sub: str = group["sub"][0]

src/rbc/orchestration/qc.py

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -48,19 +48,13 @@ def run(
4848
dataset_dir=output_dir, index_fpath=None, max_workers=0, verbose=verbose
4949
)
5050

51-
filter_exprs = [
51+
df = filters.apply(
52+
df,
5253
pl.col("datatype") == "func",
5354
pl.col("suffix") == "bold",
5455
pl.col("desc") == "preproc",
5556
pl.col("space") == TemplateSpace.MNI152NLIN6ASYM,
56-
]
57-
if len(filters.participant_label) > 0:
58-
filter_exprs.append(pl.col("sub").is_in(filters.participant_label))
59-
if len(filters.session_label) > 0:
60-
filter_exprs.append(pl.col("ses").is_in(filters.session_label))
61-
if filters.task is not None:
62-
filter_exprs.append(pl.col("task") == filters.task)
63-
df = df.filter(pl.all_horizontal(filter_exprs))
57+
)
6458

6559
for _, group in tqdm(df.group_by(SUB_SES_QUERY), disable=not verbose):
6660
sub: str = group["sub"][0]

0 commit comments

Comments
 (0)