@@ -86,27 +86,63 @@ def _link_or_copy(src_path, dst_path):
8686 shutil .copy2 (src_path , dst_path )
8787
8888 # Create temporary directory and populate with links
89- with tempfile .TemporaryDirectory () as tmpdir :
89+ with tempfile .TemporaryDirectory () as temporary_bids_dir :
9090 for file_path in files_list :
9191 # Cut the path down to the subject label
9292 bids_start = file_path .find (subject )
9393
9494 # Maybe it's a single file (root-level file)
9595 if bids_start < 1 :
96- tmp_file_dir = tmpdir
96+ tmp_file_dir = temporary_bids_dir
9797 else :
9898 bids_folder = Path (file_path [bids_start :]).parent
99- tmp_file_dir = os .path .join (tmpdir , str (bids_folder ))
99+ tmp_file_dir = os .path .join (temporary_bids_dir , str (bids_folder ))
100100
101101 if not os .path .exists (tmp_file_dir ):
102102 os .makedirs (tmp_file_dir )
103103
104104 output_path = os .path .join (tmp_file_dir , str (Path (file_path ).name ))
105105 _link_or_copy (file_path , output_path )
106106
107+ # Ensure participants.tsv is available in temp root
108+ # copy from original file list if missing
109+ participants_tsv_path = os .path .join (temporary_bids_dir , "participants.tsv" )
110+ if not os .path .exists (participants_tsv_path ):
111+ # Try to find a source participants.tsv in the provided file list
112+ try :
113+ source_participants_tsv_path = None
114+ for candidate_path in files_list :
115+ if os .path .basename (candidate_path ) == "participants.tsv" :
116+ source_participants_tsv_path = candidate_path
117+ break
118+ if source_participants_tsv_path :
119+ _link_or_copy (source_participants_tsv_path , participants_tsv_path )
120+ except Exception : # noqa: BLE001
121+ pass
122+
123+ # If participants.tsv exists in the temp BIDS root, filter to current subject
124+ if os .path .exists (participants_tsv_path ):
125+ try :
126+ participants_table = pd .read_csv (participants_tsv_path , sep = "\t " )
127+ if "participant_id" in participants_table .columns :
128+ participant_ids = participants_table ["participant_id" ]
129+ is_current_subject = participant_ids .eq (subject )
130+ participants_table = participants_table [is_current_subject ]
131+ participants_table .to_csv (
132+ participants_tsv_path ,
133+ sep = "\t " ,
134+ index = False ,
135+ )
136+ except Exception as e : # noqa: F841
137+ # Non-fatal: continue validation even if filtering fails
138+ pass
139+
107140 # Run the validator
108141 call = build_validator_call (
109- tmpdir , local_validator , ignore_nifti_headers , schema = schema_path
142+ temporary_bids_dir ,
143+ local_validator ,
144+ ignore_nifti_headers ,
145+ schema = schema_path ,
110146 )
111147 result = run_validator (call )
112148
@@ -124,13 +160,12 @@ def _link_or_copy(src_path, dst_path):
124160def validate (
125161 bids_dir ,
126162 output_prefix ,
127- sequential ,
128- sequential_subjects ,
163+ validation_scope ,
164+ participant_label ,
129165 local_validator ,
130166 ignore_nifti_headers ,
131167 schema ,
132168 n_cpus = 1 ,
133- max_workers = None ,
134169):
135170 """Run the bids validator.
136171
@@ -140,35 +175,28 @@ def validate(
140175 Path to the BIDS directory.
141176 output_prefix : :obj:`pathlib.Path`
142177 Output filename prefix.
143- sequential : :obj:`bool`
144- Run the validator sequentially.
145- sequential_subjects : :obj:`list` of :obj:`str`
146- Filter the sequential run to only include the listed subjects.
178+ validation_scope : :obj:`str`
179+ Scope of validation: 'dataset' validates the entire dataset,
180+ 'subject' validates each subject separately.
181+ participant_label : :obj:`list` of :obj:`str`
182+ Filter the validation to only include the listed subjects.
147183 local_validator : :obj:`bool`
148184 Use the local bids validator.
149185 ignore_nifti_headers : :obj:`bool`
150186 Ignore NIfTI headers when validating.
151187 schema : :obj:`pathlib.Path` or None
152188 Path to the BIDS schema file.
153189 n_cpus : :obj:`int`
154- Number of CPUs to use for parallel validation (only when sequential=True ).
190+ Number of CPUs to use for parallel validation (only when validation_scope='subject' ).
155191 Default is 1 (sequential processing).
156- max_workers : :obj:`int` or None
157- Maximum number of parallel workers. If None, automatically optimized
158- using formula: sqrt(n_cpus * 16) to balance I/O throughput. Set explicitly
159- to override (e.g., for I/O-constrained systems).
160192 """
161193 # Ensure n_cpus is at least 1
162194 n_cpus = max (1 , n_cpus )
163- # Derive effective worker count: honor explicit max_workers; otherwise use heuristic
164- if max_workers is not None :
165- effective_workers = max (1 , int (max_workers ))
166- else :
167- # Heuristic tuned for I/O-bound workloads materializing files + validator runs.
168- # sqrt(n_cpus * 16) caps concurrency to avoid disk thrashing while keeping CPU busy.
169- effective_workers = max (1 , int ((n_cpus * 16 ) ** 0.5 ))
170- # Do not exceed n_cpus unless user explicitly asks via --max-workers
171- effective_workers = min (effective_workers , n_cpus )
195+ # Derive effective worker count using heuristic
196+ # Heuristic tuned for I/O-bound workloads materializing files + validator runs.
197+ effective_workers = max (1 , int ((n_cpus * 16 ) ** 0.5 ))
198+ # Do not exceed n_cpus
199+ effective_workers = min (effective_workers , n_cpus )
172200
173201 # check status of output_prefix, absolute or relative?
174202 abs_path_output = True
@@ -182,7 +210,7 @@ def validate(
182210 subprocess .run (["mkdir" , str (bids_dir / "code" / "CuBIDS" )])
183211
184212 # Run directly from python using subprocess
185- if not sequential :
213+ if validation_scope == "dataset" :
186214 # run on full dataset
187215 call = build_validator_call (
188216 str (bids_dir ),
@@ -236,8 +264,8 @@ def validate(
236264
237265 parsed = []
238266
239- if sequential_subjects :
240- subjects_dict = {k : v for k , v in subjects_dict .items () if k in sequential_subjects }
267+ if participant_label :
268+ subjects_dict = {k : v for k , v in subjects_dict .items () if k in participant_label }
241269 assert len (list (subjects_dict .keys ())) > 1 , "No subjects found in filter"
242270
243271 # Convert schema Path to string if it exists (for multiprocessing pickling)
@@ -302,26 +330,56 @@ def _link_or_copy(src_path, dst_path):
302330
303331 for subject , files_list in tqdm .tqdm (subjects_dict .items ()):
304332 # Create a temporary directory and populate with links
305- with tempfile .TemporaryDirectory () as tmpdirname :
333+ with tempfile .TemporaryDirectory () as temporary_bids_dir :
306334
307335 for file_path in files_list :
308336 bids_start = file_path .find (subject )
309337
310338 if bids_start < 1 :
311- tmp_file_dir = tmpdirname
339+ tmp_file_dir = temporary_bids_dir
312340 else :
313341 bids_folder = Path (file_path [bids_start :]).parent
314- tmp_file_dir = os .path .join (tmpdirname , str (bids_folder ))
342+ tmp_file_dir = os .path .join (temporary_bids_dir , str (bids_folder ))
315343
316344 if not os .path .exists (tmp_file_dir ):
317345 os .makedirs (tmp_file_dir )
318346 output = os .path .join (tmp_file_dir , str (Path (file_path ).name ))
319347 _link_or_copy (file_path , output )
320348
349+ # Ensure participants.tsv exists; copy if missing, then filter
350+ participants_tsv_path = os .path .join (temporary_bids_dir , "participants.tsv" )
351+ if not os .path .exists (participants_tsv_path ):
352+ try :
353+ source_participants_tsv_path = None
354+ for candidate_path in files_list :
355+ if os .path .basename (candidate_path ) == "participants.tsv" :
356+ source_participants_tsv_path = candidate_path
357+ break
358+ if source_participants_tsv_path :
359+ _link_or_copy (source_participants_tsv_path , participants_tsv_path )
360+ except Exception : # noqa: BLE001
361+ pass
362+
363+ if os .path .exists (participants_tsv_path ):
364+ try :
365+ participants_table = pd .read_csv (participants_tsv_path , sep = "\t " )
366+ if "participant_id" in participants_table .columns :
367+ participant_ids = participants_table ["participant_id" ]
368+ is_current_subject = participant_ids .eq (subject )
369+ participants_table = participants_table [is_current_subject ]
370+ participants_table .to_csv (
371+ participants_tsv_path ,
372+ sep = "\t " ,
373+ index = False ,
374+ )
375+ except Exception as e : # noqa: F841
376+ # Non-fatal: continue validation even if filtering fails
377+ pass
378+
321379 # Run the validator
322380 nifti_head = ignore_nifti_headers
323381 call = build_validator_call (
324- tmpdirname , local_validator , nifti_head , schema = schema
382+ temporary_bids_dir , local_validator , nifti_head , schema = schema
325383 )
326384 ret = run_validator (call )
327385 if ret .returncode != 0 :
@@ -379,7 +437,7 @@ def bids_version(bids_dir, write=False, schema=None):
379437 Path to the BIDS schema file.
380438 """
381439 # Need to run validator to get output with schema version
382- # Copy code from `validate --sequential `
440+ # Copy code from `validate --validation-scope subject `
383441
384442 try : # return first subject
385443 # Get all folders that start with "sub-"
0 commit comments