@@ -127,27 +127,32 @@ def _link_or_copy(src_path, dst_path):
127127 if not os .path .exists (subject_folder_path ):
128128 os .makedirs (subject_folder_path , exist_ok = True )
129129
130- # Ensure participants.tsv is available in temp root
130+ # Ensure participants.tsv is available in temp root and is a copy (not a link)
131131 # Always COPY (never link) to avoid modifying the original file when filtering
132132 participants_tsv_path = os .path .join (temporary_bids_dir , "participants.tsv" )
133- if not os . path . exists ( participants_tsv_path ):
134- # Try to find a source participants.tsv in the provided file list
133+ # Always remove existing file first in case it was linked in the earlier loop
134+ if os . path . exists ( participants_tsv_path ):
135135 try :
136- source_participants_tsv_path = None
137- for candidate_path in files_list :
138- if os .path .basename (candidate_path ) == "participants.tsv" :
139- source_participants_tsv_path = candidate_path
140- break
141- # If not in file list, try to get it from the original bids_dir
142- if not source_participants_tsv_path and bids_dir :
143- potential_path = os .path .join (bids_dir , "participants.tsv" )
144- if os .path .exists (potential_path ):
145- source_participants_tsv_path = potential_path
146- if source_participants_tsv_path :
147- # Always copy (not link) to protect the original file from modification
148- shutil .copy2 (source_participants_tsv_path , participants_tsv_path )
136+ os .remove (participants_tsv_path )
149137 except Exception : # noqa: BLE001
150138 pass
139+ # Try to find a source participants.tsv in the provided file list
140+ try :
141+ source_participants_tsv_path = None
142+ for candidate_path in files_list :
143+ if os .path .basename (candidate_path ) == "participants.tsv" :
144+ source_participants_tsv_path = candidate_path
145+ break
146+ # If not in file list, try to get it from the original bids_dir
147+ if not source_participants_tsv_path and bids_dir :
148+ potential_path = os .path .join (bids_dir , "participants.tsv" )
149+ if os .path .exists (potential_path ):
150+ source_participants_tsv_path = potential_path
151+ if source_participants_tsv_path :
152+ # Always copy (not link) to protect the original file from modification
153+ shutil .copy2 (source_participants_tsv_path , participants_tsv_path )
154+ except Exception : # noqa: BLE001
155+ pass
151156
152157 # If participants.tsv exists in the temp BIDS root, filter to current subject
153158 if os .path .exists (participants_tsv_path ):
@@ -334,125 +339,11 @@ def validate(
334339 finally :
335340 pbar .update (1 )
336341 else :
337- # Sequential processing
338- def _link_or_copy (src_path , dst_path ):
339- """Materialize src_path at dst_path favoring hardlinks, then symlinks, then copy.
340-
341- This minimizes disk I/O and maximizes throughput when many subjects are processed.
342- """
343- # If destination already exists (rare with temp dirs), skip
344- if os .path .exists (dst_path ):
345- return
346- try :
347- # Prefer hardlink when on the same filesystem
348- os .link (src_path , dst_path )
349- return
350- except OSError as e :
351- # EXDEV: cross-device link; fallback to symlink
352- if e .errno != errno .EXDEV :
353- # Other hardlink errors may still allow symlink
354- pass
355- try :
356- os .symlink (src_path , dst_path )
357- return
358- except OSError :
359- # Fallback to a regular copy as last resort
360- shutil .copy2 (src_path , dst_path )
361-
362- for subject , files_list in tqdm .tqdm (subjects_dict .items ()):
363- # Create a temporary directory and populate with links
364- with tempfile .TemporaryDirectory () as temporary_bids_dir :
365-
366- for file_path in files_list :
367- bids_start = file_path .find (subject )
368-
369- if bids_start < 1 :
370- tmp_file_dir = temporary_bids_dir
371- else :
372- bids_folder = Path (file_path [bids_start :]).parent
373- tmp_file_dir = os .path .join (temporary_bids_dir , str (bids_folder ))
374-
375- if not os .path .exists (tmp_file_dir ):
376- os .makedirs (tmp_file_dir )
377- output = os .path .join (tmp_file_dir , str (Path (file_path ).name ))
378- _link_or_copy (file_path , output )
379-
380- # Ensure dataset_description.json is available in temp root
381- dataset_description_path = os .path .join (
382- temporary_bids_dir , "dataset_description.json"
383- )
384- if not os .path .exists (dataset_description_path ):
385- # Try to find dataset_description.json in the provided file list first
386- source_dataset_description_path = None
387- for candidate_path in files_list :
388- if os .path .basename (candidate_path ) == "dataset_description.json" :
389- source_dataset_description_path = candidate_path
390- break
391- # If not in file list, try to get it from the original bids_dir
392- if not source_dataset_description_path :
393- potential_path = os .path .join (bids_dir , "dataset_description.json" )
394- if os .path .exists (potential_path ):
395- source_dataset_description_path = potential_path
396- if source_dataset_description_path :
397- _link_or_copy (
398- source_dataset_description_path , dataset_description_path
399- )
400-
401- # Ensure the subject folder exists as a directory in temp root
402- subject_folder_path = os .path .join (temporary_bids_dir , subject )
403- if not os .path .exists (subject_folder_path ):
404- os .makedirs (subject_folder_path , exist_ok = True )
405-
406- # Ensure participants.tsv exists
407- participants_tsv_path = os .path .join (temporary_bids_dir , "participants.tsv" )
408- if not os .path .exists (participants_tsv_path ):
409- try :
410- source_participants_tsv_path = None
411- for candidate_path in files_list :
412- if os .path .basename (candidate_path ) == "participants.tsv" :
413- source_participants_tsv_path = candidate_path
414- break
415- # If not in file list, try to get it from the original bids_dir
416- if not source_participants_tsv_path :
417- potential_path = os .path .join (bids_dir , "participants.tsv" )
418- if os .path .exists (potential_path ):
419- source_participants_tsv_path = potential_path
420- if source_participants_tsv_path :
421- # Always copy (not link) to protect original file
422- shutil .copy2 (source_participants_tsv_path , participants_tsv_path )
423- except Exception : # noqa: BLE001
424- pass
425-
426- if os .path .exists (participants_tsv_path ):
427- try :
428- participants_table = pd .read_csv (participants_tsv_path , sep = "\t " )
429- if "participant_id" in participants_table .columns :
430- participant_ids = participants_table ["participant_id" ]
431- is_current_subject = participant_ids .eq (subject )
432- participants_table = participants_table [is_current_subject ]
433- participants_table .to_csv (
434- participants_tsv_path ,
435- sep = "\t " ,
436- index = False ,
437- )
438- except Exception as e : # noqa: F841
439- # Non-fatal: continue validation even if filtering fails
440- pass
441-
442- # Run the validator
443- nifti_head = ignore_nifti_headers
444- call = build_validator_call (
445- temporary_bids_dir , local_validator , nifti_head , schema = schema
446- )
447- ret = run_validator (call )
448- if ret .returncode != 0 :
449- logger .error ("Errors returned from validator run, parsing now" )
450-
451- decoded = ret .stdout .decode ("UTF-8" )
452- tmp_parse = parse_validator_output (decoded )
453- if tmp_parse .shape [1 ] > 1 :
454- tmp_parse ["subject" ] = subject
455- parsed .append (tmp_parse )
342+ # Sequential processing using the same helper as the parallel path
343+ for args in tqdm .tqdm (validation_args , desc = "Validating subjects" ):
344+ subject , result = _validate_single_subject (args )
345+ if result is not None and result .shape [1 ] > 1 :
346+ parsed .append (result )
456347
457348 # concatenate the parsed data and exit
458349 if len (parsed ) < 1 :
0 commit comments