88
99from bs4 import BeautifulSoup
1010
11- from bids_prov .utils import get_default_graph , CONTEXT_URL , get_id , label_mapping , compute_sha_256_entity , \
11+ from bids_prov .utils import (
12+ get_default_graph , CONTEXT_URL , label_mapping , compute_sha_256_entity ,
13+ get_activity_urn , get_agent_urn , get_entity_urn , make_alnum , get_uuid ,
1214 writing_jsonld
15+ )
1316
1417# regex to catch inputs
1518# in `cp /fsl/5.0/doc/fsl.css .files no_ext 5.0` --> only `.files` should match
@@ -250,7 +253,7 @@ def _get_entities_from_kwarg(entities, opts, parse_kwarg):
250253 value = []
251254 for (arg , val ) in opts ._get_kwargs ():
252255 # print("\n--arg, val", type(arg), type(val), arg, val)
253- if param .split ( "-" )[ 1 ] == arg :
256+ if param .strip ( '-' ) == arg :
254257 # print("\n----arg select", type(arg), arg)
255258 if val != None :
256259 # print("\n------val != None", type(val), val)
@@ -388,14 +391,14 @@ def get_entities(cmd_s, parameters):
388391 if "GeneratedBy" in parameters :
389392 outputs .extend (_get_arg (parameters ["GeneratedBy" ], arg_rest ))
390393
391- # print("\n\n inputs", inputs)
394+ # print("\n\n inputs", inputs)
392395 # print("\n\n outputs", outputs)
393396 # print("\n\n params", params)
394397
395398 return inputs , outputs , params
396399
397400
398- def build_records (groups : Mapping [str , List [str ]], agent_id : str ):
401+ def build_records (groups : Mapping [str , List [str ]], agent_id : str , verbose : bool = False ):
399402 """
400403 Build the `records` field for the final .jsonld file,
401404 from commands lines grouped by stage (e.g. `Registration`, `Post-stats`)
@@ -420,8 +423,11 @@ def build_records(groups: Mapping[str, List[str]], agent_id: str):
420423 for cmd in v :
421424 # process to remove + and - in pngappend command
422425 cmd = cmd .replace (" + " , " " ).replace (" - " , " " )
426+ # remove multiple spaces
427+ cmd = ' ' .join (cmd .split ())
428+ # split according to the following chars " ", "|", and "="
423429 cmd_s = re .split (" |=" , cmd )
424- a_name = cmd_s [0 ]
430+ activity_name = cmd_s [0 ]
425431
426432 inputs = []
427433 outputs = []
@@ -430,7 +436,7 @@ def build_records(groups: Mapping[str, List[str]], agent_id: str):
430436
431437 function_in_description_functions = False
432438
433- command_name_end = os .path .split (a_name )[1 ]
439+ command_name_end = os .path .split (activity_name )[1 ]
434440 for df in description_functions :
435441 if df ["Name" ] == command_name_end :
436442 description_of_command = df
@@ -439,7 +445,15 @@ def build_records(groups: Mapping[str, List[str]], agent_id: str):
439445 cmd_s [1 :], description_of_command )
440446 break
441447
448+ if verbose :
449+ print ("CMD" , cmd )
450+ print ('-> inputs: ' , inputs )
451+ print ('<- outputs: ' , outputs )
452+ print (" others args :" , * parameters )
453+
442454 if function_in_description_functions is False :
455+ print (f"-> { command_name_end } : Not present in description_functions" )
456+
443457 # if the function is not in our description file, the process is based on regex
444458 attributes = defaultdict (list )
445459
@@ -457,9 +471,9 @@ def build_records(groups: Mapping[str, List[str]], agent_id: str):
457471 outputs = list (chain (* (attributes .pop (k )
458472 for k in attributes .keys () & OUTPUT_TAGS )))
459473 entity_names = [_ for _ in re .findall (
460- INPUT_RE , cmd_without_attributes [len (a_name ):])]
474+ INPUT_RE , cmd_without_attributes [len (activity_name ):])]
461475
462- # # cmd_conf = get_closest_config(a_name ) # with the module boutiques
476+ # # cmd_conf = get_closest_config(activity_name ) # with the module boutiques
463477 # cmd_conf = None # None because boutiques is not used at this time
464478 # # if cmd_conf:
465479 # # pos_args = filter(lambda e: not e.startswith("-"), cmd_s) # TODO use "-key value" mappings
@@ -471,13 +485,14 @@ def build_records(groups: Mapping[str, List[str]], agent_id: str):
471485 if len (entity_names ) > 1 :
472486 inputs .append (entity_names [0 ])
473487
474- # the file name and possible extension
475- label = f"{ os .path .split (a_name )[1 ]} "
476-
477- a = {
478- "@id" : f"urn:{ get_id ()} " ,
479- "Label" : label_mapping (label , "fsl/fsl_labels.json" ),
480- "AssociatedWith" : "urn:" + agent_id ,
488+ # Create activity label & record
489+ activity_label = label_mapping (
490+ f'{ os .path .split (activity_name )[1 ]} ' ,
491+ 'fsl/fsl_labels.json' )
492+ activity = {
493+ "@id" : get_activity_urn (activity_label ),
494+ "Label" : activity_label ,
495+ "AssociatedWith" : agent_id ,
481496 "Command" : cmd ,
482497 # "attributes": [
483498 # {k: v if len(v) > 1 else v[0]} for k, v in attributes.items()
@@ -487,48 +502,58 @@ def build_records(groups: Mapping[str, List[str]], agent_id: str):
487502
488503 for input_path in inputs :
489504 # input_name = input_path.replace("/", "_") # TODO
490- input_id = f"urn:{ get_id ()} " # def format_id
505+ if not make_alnum (input_path ):
506+ input_id = 'urn:uuid:' + get_uuid ()
507+ else :
508+ input_id = get_entity_urn (input_path )
491509
492510 existing_input = next (
493- (entity for entity in records ["Entities" ] if entity ["AtLocation" ] == input_path ), None )
511+ (e for e in records ["Entities" ] if e ["AtLocation" ] == input_path ), None )
494512 if existing_input is None :
495- e = {
513+ entity = {
496514 "@id" : input_id ,
497515 "Label" : os .path .split (input_path )[1 ],
498516 "AtLocation" : input_path ,
499517 }
500- records ["Entities" ].append (e )
501- a ["Used" ].append (input_id )
518+ records ["Entities" ].append (entity )
519+ activity ["Used" ].append (input_id )
502520 else :
503- a ["Used" ].append (existing_input ["@id" ])
521+ activity ["Used" ].append (existing_input ["@id" ])
504522
505523 # Order does not matter and then makes sense to include only unique values
506- a ["Used" ] = sorted (set (a ["Used" ]))
524+ activity ["Used" ] = sorted (set (activity ["Used" ]))
507525
508526 for output_path in outputs :
509527 # output_name = output_path.replace("/", "_") # TODO
528+ if not make_alnum (output_path ):
529+ output_id = 'urn:uuid:' + get_uuid ()
530+ else :
531+ output_id = get_entity_urn (output_path )
532+
510533 records ["Entities" ].append (
511534 {
512- "@id" : f"urn: { get_id () } " ,
535+ "@id" : output_id ,
513536 "Label" : os .path .split (output_path )[1 ],
514537 "AtLocation" : output_path ,
515- "GeneratedBy" : a ["@id" ],
538+ "GeneratedBy" : activity ["@id" ],
516539 # "derivedFrom": input_id,
517540 }
518541 )
519542
520- records ["Activities" ].append (a )
543+ records ["Activities" ].append (activity )
544+ if verbose :
545+ print ('-------------------------' )
521546 return dict (records )
522547
523548
524549def fsl_to_bids_prov (filename : str , context_url = CONTEXT_URL , output_file = None ,
525550 soft_ver = "xxx" , indent = 2 , verbose = False ) -> bool : # TODO : add fsl version
526551
527552 graph , agent_id = get_default_graph (
528- label = "FSL" , context_url = context_url , soft_ver = soft_ver )
553+ soft_label = "FSL" , context_url = context_url , soft_version = soft_ver )
529554
530555 lines = readlines (filename )
531- records = build_records (lines , agent_id )
556+ records = build_records (lines , agent_id , verbose )
532557 graph ["Records" ].update (records )
533558
534559 compute_sha_256_entity (graph ["Records" ]["Entities" ])
0 commit comments