Skip to content

Commit ea3d13f

Browse files
committed
fix PARTICIPANT_ID_MISMATCH
1 parent 75bd720 commit ea3d13f

File tree

1 file changed

+26
-135
lines changed

1 file changed

+26
-135
lines changed

cubids/workflows.py

Lines changed: 26 additions & 135 deletions
Original file line numberDiff line numberDiff line change
@@ -127,27 +127,32 @@ def _link_or_copy(src_path, dst_path):
127127
if not os.path.exists(subject_folder_path):
128128
os.makedirs(subject_folder_path, exist_ok=True)
129129

130-
# Ensure participants.tsv is available in temp root
130+
# Ensure participants.tsv is available in temp root and is a copy (not a link)
131131
# Always COPY (never link) to avoid modifying the original file when filtering
132132
participants_tsv_path = os.path.join(temporary_bids_dir, "participants.tsv")
133-
if not os.path.exists(participants_tsv_path):
134-
# Try to find a source participants.tsv in the provided file list
133+
# Always remove existing file first in case it was linked in the earlier loop
134+
if os.path.exists(participants_tsv_path):
135135
try:
136-
source_participants_tsv_path = None
137-
for candidate_path in files_list:
138-
if os.path.basename(candidate_path) == "participants.tsv":
139-
source_participants_tsv_path = candidate_path
140-
break
141-
# If not in file list, try to get it from the original bids_dir
142-
if not source_participants_tsv_path and bids_dir:
143-
potential_path = os.path.join(bids_dir, "participants.tsv")
144-
if os.path.exists(potential_path):
145-
source_participants_tsv_path = potential_path
146-
if source_participants_tsv_path:
147-
# Always copy (not link) to protect the original file from modification
148-
shutil.copy2(source_participants_tsv_path, participants_tsv_path)
136+
os.remove(participants_tsv_path)
149137
except Exception: # noqa: BLE001
150138
pass
139+
# Try to find a source participants.tsv in the provided file list
140+
try:
141+
source_participants_tsv_path = None
142+
for candidate_path in files_list:
143+
if os.path.basename(candidate_path) == "participants.tsv":
144+
source_participants_tsv_path = candidate_path
145+
break
146+
# If not in file list, try to get it from the original bids_dir
147+
if not source_participants_tsv_path and bids_dir:
148+
potential_path = os.path.join(bids_dir, "participants.tsv")
149+
if os.path.exists(potential_path):
150+
source_participants_tsv_path = potential_path
151+
if source_participants_tsv_path:
152+
# Always copy (not link) to protect the original file from modification
153+
shutil.copy2(source_participants_tsv_path, participants_tsv_path)
154+
except Exception: # noqa: BLE001
155+
pass
151156

152157
# If participants.tsv exists in the temp BIDS root, filter to current subject
153158
if os.path.exists(participants_tsv_path):
@@ -334,125 +339,11 @@ def validate(
334339
finally:
335340
pbar.update(1)
336341
else:
337-
# Sequential processing
338-
def _link_or_copy(src_path, dst_path):
339-
"""Materialize src_path at dst_path favoring hardlinks, then symlinks, then copy.
340-
341-
This minimizes disk I/O and maximizes throughput when many subjects are processed.
342-
"""
343-
# If destination already exists (rare with temp dirs), skip
344-
if os.path.exists(dst_path):
345-
return
346-
try:
347-
# Prefer hardlink when on the same filesystem
348-
os.link(src_path, dst_path)
349-
return
350-
except OSError as e:
351-
# EXDEV: cross-device link; fallback to symlink
352-
if e.errno != errno.EXDEV:
353-
# Other hardlink errors may still allow symlink
354-
pass
355-
try:
356-
os.symlink(src_path, dst_path)
357-
return
358-
except OSError:
359-
# Fallback to a regular copy as last resort
360-
shutil.copy2(src_path, dst_path)
361-
362-
for subject, files_list in tqdm.tqdm(subjects_dict.items()):
363-
# Create a temporary directory and populate with links
364-
with tempfile.TemporaryDirectory() as temporary_bids_dir:
365-
366-
for file_path in files_list:
367-
bids_start = file_path.find(subject)
368-
369-
if bids_start < 1:
370-
tmp_file_dir = temporary_bids_dir
371-
else:
372-
bids_folder = Path(file_path[bids_start:]).parent
373-
tmp_file_dir = os.path.join(temporary_bids_dir, str(bids_folder))
374-
375-
if not os.path.exists(tmp_file_dir):
376-
os.makedirs(tmp_file_dir)
377-
output = os.path.join(tmp_file_dir, str(Path(file_path).name))
378-
_link_or_copy(file_path, output)
379-
380-
# Ensure dataset_description.json is available in temp root
381-
dataset_description_path = os.path.join(
382-
temporary_bids_dir, "dataset_description.json"
383-
)
384-
if not os.path.exists(dataset_description_path):
385-
# Try to find dataset_description.json in the provided file list first
386-
source_dataset_description_path = None
387-
for candidate_path in files_list:
388-
if os.path.basename(candidate_path) == "dataset_description.json":
389-
source_dataset_description_path = candidate_path
390-
break
391-
# If not in file list, try to get it from the original bids_dir
392-
if not source_dataset_description_path:
393-
potential_path = os.path.join(bids_dir, "dataset_description.json")
394-
if os.path.exists(potential_path):
395-
source_dataset_description_path = potential_path
396-
if source_dataset_description_path:
397-
_link_or_copy(
398-
source_dataset_description_path, dataset_description_path
399-
)
400-
401-
# Ensure the subject folder exists as a directory in temp root
402-
subject_folder_path = os.path.join(temporary_bids_dir, subject)
403-
if not os.path.exists(subject_folder_path):
404-
os.makedirs(subject_folder_path, exist_ok=True)
405-
406-
# Ensure participants.tsv exists
407-
participants_tsv_path = os.path.join(temporary_bids_dir, "participants.tsv")
408-
if not os.path.exists(participants_tsv_path):
409-
try:
410-
source_participants_tsv_path = None
411-
for candidate_path in files_list:
412-
if os.path.basename(candidate_path) == "participants.tsv":
413-
source_participants_tsv_path = candidate_path
414-
break
415-
# If not in file list, try to get it from the original bids_dir
416-
if not source_participants_tsv_path:
417-
potential_path = os.path.join(bids_dir, "participants.tsv")
418-
if os.path.exists(potential_path):
419-
source_participants_tsv_path = potential_path
420-
if source_participants_tsv_path:
421-
# Always copy (not link) to protect original file
422-
shutil.copy2(source_participants_tsv_path, participants_tsv_path)
423-
except Exception: # noqa: BLE001
424-
pass
425-
426-
if os.path.exists(participants_tsv_path):
427-
try:
428-
participants_table = pd.read_csv(participants_tsv_path, sep="\t")
429-
if "participant_id" in participants_table.columns:
430-
participant_ids = participants_table["participant_id"]
431-
is_current_subject = participant_ids.eq(subject)
432-
participants_table = participants_table[is_current_subject]
433-
participants_table.to_csv(
434-
participants_tsv_path,
435-
sep="\t",
436-
index=False,
437-
)
438-
except Exception as e: # noqa: F841
439-
# Non-fatal: continue validation even if filtering fails
440-
pass
441-
442-
# Run the validator
443-
nifti_head = ignore_nifti_headers
444-
call = build_validator_call(
445-
temporary_bids_dir, local_validator, nifti_head, schema=schema
446-
)
447-
ret = run_validator(call)
448-
if ret.returncode != 0:
449-
logger.error("Errors returned from validator run, parsing now")
450-
451-
decoded = ret.stdout.decode("UTF-8")
452-
tmp_parse = parse_validator_output(decoded)
453-
if tmp_parse.shape[1] > 1:
454-
tmp_parse["subject"] = subject
455-
parsed.append(tmp_parse)
342+
# Sequential processing using the same helper as the parallel path
343+
for args in tqdm.tqdm(validation_args, desc="Validating subjects"):
344+
subject, result = _validate_single_subject(args)
345+
if result is not None and result.shape[1] > 1:
346+
parsed.append(result)
456347

457348
# concatenate the parsed data and exit
458349
if len(parsed) < 1:

0 commit comments

Comments
 (0)