Skip to content

Commit 2ba8aa6

Browse files
committed
Move rbc scratch dirs off cache shards via independent scratch root
Two problems with the prior generate_exec_folder pattern: 1. It hijacked the niwrap runner's data_dir + uid + execution_counter, forcing _CacheProxyingRunner to forward those attributes through to the base runner on every access. 2. Several rbc sites then wrote sibling files via `input_file.parent / ...`. When input_file was served from a styxcache shard (which is the norm under caching), those writes landed inside the shard, racing other workers' reads and silently truncating cached files. The test_single_session_qc EOFError was a cached .nii.gz truncated this way. generate_exec_folder now owns its own root (RBC_SCRATCH_DIR override, else a process-unique tempfile.mkdtemp). Every touched call site was migrated off `input_file.parent / ...` to `generate_exec_folder(...) / ...`. Sites audited: - core/functional/resampling.py:107, 201 - core/functional/nuisance.py:112 - core/longitudinal/transform.py:145 - core/longitudinal/freesurfer.py:142 - core/metrics/reho.py:191 - core/metrics/alff.py:259 - core/metrics/standardization.py:74 - core/metrics/timeseries.py:159 distortion.py's topup.parent is a read-only lookup into the tool's own output set (no write), left as-is. Unit tests for generate_exec_folder rewritten for the new semantics: no more uid/counter-based naming, just unique scratch dirs.
1 parent 14ccd84 commit 2ba8aa6

11 files changed

Lines changed: 64 additions & 40 deletions

File tree

src/rbc/core/functional/nuisance.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ def bandpass_regressor_file(
109109
f_data[~freq_mask] = 0.0
110110
filtered[:, col] = np.real(ifft(f_data))[:n_tp]
111111

112-
out_path = regressor_file.parent / "regressors_filtered.1D"
112+
out_path = generate_exec_folder("regressors_filtered") / "regressors_filtered.1D"
113113
with out_path.open("w") as f:
114114
for line in header_lines:
115115
f.write(line)

src/rbc/core/functional/resampling.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ def apply_motion_transforms(
104104
)
105105
transformed_vols.append(result.output.output_image_outfile)
106106

107-
out_path = transformed_vols[0].parent / "preproc_bold.nii.gz"
107+
out_path = generate_exec_folder("preproc_bold_merge") / "preproc_bold.nii.gz"
108108
merged = merge_3d_to_4d(transformed_vols, out_path)
109109

110110
# antsApplyTransforms writes the reference image's pixdim into the output
@@ -198,7 +198,10 @@ def resample_bold_to_template(
198198
)
199199
transformed_vols.append(result.output.output_image_outfile)
200200

201-
out_path = transformed_vols[0].parent / "bold_to_template_resampled.nii.gz"
201+
out_path = (
202+
generate_exec_folder("bold_to_template_merge")
203+
/ "bold_to_template_resampled.nii.gz"
204+
)
202205
merged = merge_3d_to_4d(transformed_vols, out_path)
203206

204207
# antsApplyTransforms writes the reference (template) image's pixdim into

src/rbc/core/longitudinal/freesurfer.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from niwrap import freesurfer
1717

1818
from rbc.core.fsl2itk import mat_to_itk
19+
from rbc.core.niwrap import generate_exec_folder
1920

2021
if TYPE_CHECKING:
2122
from collections.abc import Sequence
@@ -139,7 +140,7 @@ def fs_to_itk_xfm(
139140
for ses, source, in_xfm in zip(sessions, sources, in_xfms, strict=True):
140141
fsl_fname = in_xfm.with_suffix(".mat").name
141142
lta = freesurfer.lta_convert(in_lta=in_xfm, out_fsl=fsl_fname)
142-
itk_path = in_xfm.parent / itk_filename(sub, ses)
143+
itk_path = generate_exec_folder("itk_xfm") / itk_filename(sub, ses)
143144
mat_to_itk(
144145
mat=lta.root / fsl_fname,
145146
reference=reference,

src/rbc/core/longitudinal/transform.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
from rbc.core.common import merge_3d_to_4d, split_4d
1010
from rbc.core.functional.resampling import _restore_tr
11+
from rbc.core.niwrap import generate_exec_folder
1112

1213
if TYPE_CHECKING:
1314
from pathlib import Path
@@ -142,7 +143,10 @@ def _transform_4d_chunked(in_file: Path, template: Path, xfm: Path) -> Path:
142143
)
143144
transformed_vols.append(result.output.output_image_outfile)
144145

145-
out_path = transformed_vols[0].parent / "bold_to_longitudinal.nii.gz"
146+
out_path = (
147+
generate_exec_folder("bold_to_longitudinal_merge")
148+
/ "bold_to_longitudinal.nii.gz"
149+
)
146150
merged = merge_3d_to_4d(transformed_vols, out_path)
147151
_restore_tr(merged, in_file)
148152
return merged

src/rbc/core/metrics/alff.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import numpy as np
1818

1919
from rbc.core.nifti import Volume
20+
from rbc.core.niwrap import generate_exec_folder
2021

2122
if TYPE_CHECKING:
2223
from typing import Literal
@@ -256,7 +257,8 @@ def compute_alff(
256257

257258
stem = in_file.name.split(".nii")[0]
258259
if out_file is None:
259-
alff_path = in_file.parent / f"{stem}_alff.nii.gz"
260+
out_dir = generate_exec_folder("alff")
261+
alff_path = out_dir / f"{stem}_alff.nii.gz"
260262
else:
261263
alff_path = Path(out_file)
262264

src/rbc/core/metrics/reho.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from scipy.stats import rankdata
1616

1717
from rbc.core.nifti import Volume
18+
from rbc.core.niwrap import generate_exec_folder
1819

1920
if TYPE_CHECKING:
2021
from typing import Literal
@@ -188,7 +189,7 @@ def compute_reho(
188189

189190
if out_file is None:
190191
stem = in_file.name.split(".nii")[0]
191-
out_file = in_file.parent / f"{stem}_reho.nii.gz"
192+
out_file = generate_exec_folder("reho") / f"{stem}_reho.nii.gz"
192193
out_file = Path(out_file)
193194

194195
bold.derive(reho_map).save(out_file)

src/rbc/core/metrics/standardization.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111

1212
import numpy as np
1313

14+
from rbc.core.niwrap import generate_exec_folder
15+
1416

1517
def zscore(data: np.ndarray, mask: np.ndarray) -> np.ndarray:
1618
"""Z-score a 3D map within a brain mask.
@@ -71,7 +73,7 @@ def compute_zscore(
7173

7274
if out_file is None:
7375
stem = in_file.name.split(".nii")[0]
74-
out_file = in_file.parent / f"{stem}_zscored.nii.gz"
76+
out_file = generate_exec_folder("zscore") / f"{stem}_zscored.nii.gz"
7577
out_file = Path(out_file)
7678

7779
nib.nifti1.Nifti1Image(zscored, img.affine, img.header).to_filename(str(out_file))

src/rbc/core/metrics/timeseries.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212

1313
import numpy as np
1414

15+
from rbc.core.niwrap import generate_exec_folder
16+
1517
logger = logging.getLogger(__name__)
1618

1719

@@ -156,7 +158,7 @@ def compute_timeseries(
156158
corr = correlation_matrix(ts)
157159

158160
if out_dir is None:
159-
out_dir = in_file.parent
161+
out_dir = generate_exec_folder("timeseries")
160162
out_dir = Path(out_dir)
161163
out_dir.mkdir(parents=True, exist_ok=True)
162164

src/rbc/core/niwrap.py

Lines changed: 28 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -213,19 +213,34 @@ def mount_fs_license(runner: niwrap.Runner, fs_license: Path) -> None:
213213
runner.environ["FS_LICENSE"] = _CONTAINER_FS_LICENSE_PATH
214214

215215

216-
def generate_exec_folder(suffix: str = "python") -> Path:
217-
"""Generate an execution folder following Styx hash pattern.
216+
_SCRATCH_ROOT: Path | None = None
218217

219-
Args:
220-
suffix: Task to append to suffix of folder (default: 'python')
221218

222-
Returns:
223-
Path to created execution folder
219+
def _scratch_root() -> Path:
220+
"""Return the shared rbc scratch root, creating it on first use.
221+
222+
Honors ``RBC_SCRATCH_DIR`` if set; otherwise creates a process-unique
223+
temp dir. Isolated from niwrap's runner data_dir so rbc scratch paths
224+
never collide with or scribble into cache shards.
224225
"""
225-
runner = niwrap.get_global_runner()
226-
dir_path = (
227-
Path(runner.data_dir) / f"{runner.uid}_{runner.execution_counter}_{suffix}"
228-
)
229-
dir_path.mkdir(parents=True)
230-
runner.execution_counter += 1
231-
return dir_path
226+
global _SCRATCH_ROOT
227+
if _SCRATCH_ROOT is not None and _SCRATCH_ROOT.exists():
228+
return _SCRATCH_ROOT
229+
override = os.environ.get("RBC_SCRATCH_DIR")
230+
if override:
231+
root = Path(override)
232+
root.mkdir(parents=True, exist_ok=True)
233+
else:
234+
root = Path(tempfile.mkdtemp(prefix="rbc_scratch_"))
235+
_SCRATCH_ROOT = root
236+
return root
237+
238+
239+
def generate_exec_folder(suffix: str = "python") -> Path:
240+
"""Create a fresh scratch directory for intermediate rbc-owned outputs.
241+
242+
Returns a unique directory under the rbc scratch root. The directory
243+
does not live under any niwrap runner's data dir, so writes here are
244+
guaranteed not to touch a cached tool output.
245+
"""
246+
return Path(tempfile.mkdtemp(prefix=f"{suffix}_", dir=_scratch_root()))

tests/unit/core/test_longitudinal_freesurfer.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -143,9 +143,9 @@ def test_per_session_itk_naming(
143143
in_xfms=in_xfms,
144144
)
145145

146-
assert result == [
147-
tmp_path / itk_filename("01", "baseline"),
148-
tmp_path / itk_filename("01", "vis2"),
146+
assert [p.name for p in result] == [
147+
itk_filename("01", "baseline"),
148+
itk_filename("01", "vis2"),
149149
]
150150
assert mock_fs.lta_convert.call_count == 2
151151
assert mock_mat_to_itk.call_count == 2

0 commit comments

Comments
 (0)