Skip to content

Commit 691710b

Browse files
authored
Merge branch 'master' into merge
2 parents 62dd89e + fb8b6c5 commit 691710b

File tree

180 files changed

+19036
-17259
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

180 files changed

+19036
-17259
lines changed

bids_prov/afni/afni_parser.py

Lines changed: 27 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,11 @@
77
from itertools import chain
88

99
from bids_prov.fsl.fsl_parser import get_entities
10-
from bids_prov.utils import get_default_graph, CONTEXT_URL, get_id, label_mapping, compute_sha_256_entity, \
10+
from bids_prov.utils import (
11+
get_default_graph, CONTEXT_URL, label_mapping, compute_sha_256_entity,
12+
get_activity_urn, get_agent_urn, get_entity_urn, make_alnum, get_uuid,
1113
writing_jsonld
14+
)
1215

1316
# regex to catch inputs
1417
# in `cp /fsl/5.0/doc/fsl.css .files no_ext 5.0` --> only `.files` should match
@@ -117,12 +120,12 @@ def build_records(commands_block: list, agent_id: str, verbose: bool = False):
117120

118121
for (block, cmd) in commands_block:
119122
cmd_s = re.split(" |=", cmd)
120-
a_name = cmd_s[0]
123+
activity_name = cmd_s[0]
121124
cmd_args_remain = cmd_s[1:]
122125
inputs = []
123126
outputs = []
124127
function_in_description_functions = False
125-
command_name_end = os.path.split(a_name)[1]
128+
command_name_end = os.path.split(activity_name)[1]
126129

127130
for df in description_functions:
128131
if df["Name"] == command_name_end:
@@ -182,27 +185,32 @@ def build_records(commands_block: list, agent_id: str, verbose: bool = False):
182185
outputs = list(chain(*(attributes.pop(k)
183186
for k in attributes.keys() & OUTPUT_TAGS)))
184187
entity_names = [_ for _ in re.findall(
185-
INPUT_RE, cmd_without_attributes[len(a_name):])]
188+
INPUT_RE, cmd_without_attributes[len(activity_name):])]
186189

187190
if entity_names and entity_names[0] in cmd_without_attributes:
188191
outputs.append(entity_names[-1])
189192
if len(entity_names) > 1:
190193
inputs.append(entity_names[0])
191194

192195
# the file name and possible extension
193-
label = f"{os.path.split(a_name)[1]}"
194-
196+
activity_label = label_mapping(
197+
f'{os.path.split(activity_name)[1]}',
198+
'afni/afni_labels.json')
195199
activity = {
196-
"@id": f"urn:{get_id()}",
197-
"Label": label_mapping(label, "afni/afni_labels.json"),
198-
"AssociatedWith": "urn:" + agent_id,
200+
"@id": get_activity_urn(activity_label),
201+
"Label": activity_label,
202+
"AssociatedWith": agent_id,
199203
"Command": cmd,
200204
"Parameters": param_dic,
201205
"Used": list(),
202206
}
203207

204208
for input_path in inputs:
205-
input_id = f"urn:{get_id()}" # def format_id
209+
# Deal with not human readable paths
210+
if not make_alnum(input_path):
211+
input_id = 'urn:uuid:' + get_uuid()
212+
else:
213+
input_id = get_entity_urn(input_path)
206214
existing_input = next(
207215
(entity for entity in records["Entities"] if entity["AtLocation"] == input_path), None)
208216

@@ -223,9 +231,13 @@ def build_records(commands_block: list, agent_id: str, verbose: bool = False):
223231
activity["Used"] = sorted(set(activity["Used"]))
224232

225233
for output_path in outputs:
234+
if not make_alnum(output_path):
235+
output_id = 'urn:uuid:' + get_uuid()
236+
else:
237+
output_id = get_entity_urn(output_path)
226238
records["Entities"].append(
227239
{
228-
"@id": f"urn:{get_id()}",
240+
"@id": output_id,
229241
"Label": os.path.split(output_path)[1],
230242
"AtLocation": output_path,
231243
"GeneratedBy": activity["@id"],
@@ -272,7 +284,7 @@ def gather_multiline(input_file: str) -> list:
272284

273285
def readlines(input_file: str) -> list:
274286
"""
275-
gather multiline command split by \ separator
287+
Read lines form an input file and return the list of commands it contains
276288
277289
Parameters
278290
----------
@@ -363,7 +375,7 @@ def fusion_activities(activities, label):
363375
command += activity["Command"] + "; "
364376

365377
return {
366-
"@id": f"urn:{get_id()}",
378+
"@id": get_activity_urn(label),
367379
"Label": label,
368380
"AssociatedWith": activities[0]["AssociatedWith"],
369381
"Command": command,
@@ -454,7 +466,8 @@ def afni_to_bids_prov(filename: str, context_url=CONTEXT_URL, output_file=None,
454466
"""
455467
commands_block = readlines(filename)
456468

457-
graph, agent_id = get_default_graph(label="AFNI", context_url=context_url, soft_ver=soft_ver)
469+
graph, agent_id = get_default_graph(
470+
soft_label="AFNI", context_url=context_url, soft_version=soft_ver)
458471
records, bloc_act = build_records(commands_block, agent_id, verbose=verbose)
459472

460473
graph["Records"].update(records)

bids_prov/afni/description_functions.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@
7676
"GeneratedBy": ["-prefix"]
7777
},
7878
{"Name": "3dAllineate",
79-
"Used": ["-base", "-input", "-1Dmatrix_apply"],
79+
"Used": ["-base", "-input", "-1Dmatrix_apply", "-master"],
8080
"GeneratedBy": ["-prefix"]
8181
},
8282
{"Name": "3dTstat",
@@ -115,7 +115,7 @@
115115
"GeneratedBy" : ["-x1D"]
116116
},
117117
{"Name": "3dmaskave",
118-
"Used" : [2,3],
118+
"Used" : ["-mask", 2,3],
119119
"GeneratedBy" : [">"]
120120
},
121121
{"Name": "3dTnorm",

bids_prov/fsl/description_functions.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
"Name": "fslmaths",
99
"Used": [0, "-add","-sub","-mul","-div","-rem","-mas","-max","-min","-seed","-restart","-save"],
1010
"GeneratedBy": [1],
11-
"ParametersValue" : ["-dt", "-odt","-thr","-thrp","-thrP","-uthr","-uthrp","-uthrP",
11+
"ParametersValue" : ["-dt", "-odt","-thr","-thrp","-thrP","-uthr","-uthrp","-uthrP", "-bptf",
1212
{
1313
"Name": "-grid",
1414
"Index": ["0:2"]
@@ -78,7 +78,7 @@
7878
{
7979
"Name": "cluster",
8080
"Used": ["-i", "-c"],
81-
"GeneratedBy": ["-o", "--othresh", "--olmax", "--olmaxim", "--osize", "--omax", "--omean", "--opvals", "-c", "--cope", "-x","--xfm", "--stdvol", "--warpvol"]
81+
"GeneratedBy": ["-o", "--othresh", "--olmax", "--olmaxim", "--osize", "--omax", "--omean", "--opvals", "-c", "--cope", "-x","--xfm", "--stdvol", "--warpvol", ">"]
8282
},
8383
{
8484
"Name": "echo",

bids_prov/fsl/fsl_parser.py

Lines changed: 52 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,11 @@
88

99
from bs4 import BeautifulSoup
1010

11-
from bids_prov.utils import get_default_graph, CONTEXT_URL, get_id, label_mapping, compute_sha_256_entity, \
11+
from bids_prov.utils import (
12+
get_default_graph, CONTEXT_URL, label_mapping, compute_sha_256_entity,
13+
get_activity_urn, get_agent_urn, get_entity_urn, make_alnum, get_uuid,
1214
writing_jsonld
15+
)
1316

1417
# regex to catch inputs
1518
# in `cp /fsl/5.0/doc/fsl.css .files no_ext 5.0` --> only `.files` should match
@@ -250,7 +253,7 @@ def _get_entities_from_kwarg(entities, opts, parse_kwarg):
250253
value = []
251254
for (arg, val) in opts._get_kwargs():
252255
# print("\n--arg, val", type(arg), type(val), arg, val)
253-
if param.split("-")[1] == arg:
256+
if param.strip('-') == arg:
254257
# print("\n----arg select", type(arg), arg)
255258
if val != None:
256259
# print("\n------val != None", type(val), val)
@@ -388,14 +391,14 @@ def get_entities(cmd_s, parameters):
388391
if "GeneratedBy" in parameters:
389392
outputs.extend(_get_arg(parameters["GeneratedBy"], arg_rest))
390393

391-
# print("\n\n inputs", inputs)
394+
# print("\n\n inputs", inputs)
392395
# print("\n\n outputs", outputs)
393396
# print("\n\n params", params)
394397

395398
return inputs, outputs, params
396399

397400

398-
def build_records(groups: Mapping[str, List[str]], agent_id: str):
401+
def build_records(groups: Mapping[str, List[str]], agent_id: str, verbose: bool = False):
399402
"""
400403
Build the `records` field for the final .jsonld file,
401404
from commands lines grouped by stage (e.g. `Registration`, `Post-stats`)
@@ -420,8 +423,11 @@ def build_records(groups: Mapping[str, List[str]], agent_id: str):
420423
for cmd in v:
421424
# process to remove + and - in pngappend command
422425
cmd = cmd.replace(" + ", " ").replace(" - ", " ")
426+
# remove multiple spaces
427+
cmd = ' '.join(cmd.split())
428+
# split according to the following chars " ", "|", and "="
423429
cmd_s = re.split(" |=", cmd)
424-
a_name = cmd_s[0]
430+
activity_name = cmd_s[0]
425431

426432
inputs = []
427433
outputs = []
@@ -430,7 +436,7 @@ def build_records(groups: Mapping[str, List[str]], agent_id: str):
430436

431437
function_in_description_functions = False
432438

433-
command_name_end = os.path.split(a_name)[1]
439+
command_name_end = os.path.split(activity_name)[1]
434440
for df in description_functions:
435441
if df["Name"] == command_name_end:
436442
description_of_command = df
@@ -439,7 +445,15 @@ def build_records(groups: Mapping[str, List[str]], agent_id: str):
439445
cmd_s[1:], description_of_command)
440446
break
441447

448+
if verbose:
449+
print("CMD", cmd)
450+
print('-> inputs: ', inputs)
451+
print('<- outputs: ', outputs)
452+
print(" others args :", *parameters)
453+
442454
if function_in_description_functions is False:
455+
print(f"-> {command_name_end} : Not present in description_functions")
456+
443457
# if the function is not in our description file, the process is based on regex
444458
attributes = defaultdict(list)
445459

@@ -457,9 +471,9 @@ def build_records(groups: Mapping[str, List[str]], agent_id: str):
457471
outputs = list(chain(*(attributes.pop(k)
458472
for k in attributes.keys() & OUTPUT_TAGS)))
459473
entity_names = [_ for _ in re.findall(
460-
INPUT_RE, cmd_without_attributes[len(a_name):])]
474+
INPUT_RE, cmd_without_attributes[len(activity_name):])]
461475

462-
# # cmd_conf = get_closest_config(a_name) # with the module boutiques
476+
# # cmd_conf = get_closest_config(activity_name) # with the module boutiques
463477
# cmd_conf = None # None because boutiques is not used at this time
464478
# # if cmd_conf:
465479
# # pos_args = filter(lambda e: not e.startswith("-"), cmd_s) # TODO use "-key value" mappings
@@ -471,13 +485,14 @@ def build_records(groups: Mapping[str, List[str]], agent_id: str):
471485
if len(entity_names) > 1:
472486
inputs.append(entity_names[0])
473487

474-
# the file name and possible extension
475-
label = f"{os.path.split(a_name)[1]}"
476-
477-
a = {
478-
"@id": f"urn:{get_id()}",
479-
"Label": label_mapping(label, "fsl/fsl_labels.json"),
480-
"AssociatedWith": "urn:" + agent_id,
488+
# Create activity label & record
489+
activity_label = label_mapping(
490+
f'{os.path.split(activity_name)[1]}',
491+
'fsl/fsl_labels.json')
492+
activity = {
493+
"@id": get_activity_urn(activity_label),
494+
"Label": activity_label,
495+
"AssociatedWith": agent_id,
481496
"Command": cmd,
482497
# "attributes": [
483498
# {k: v if len(v) > 1 else v[0]} for k, v in attributes.items()
@@ -487,48 +502,58 @@ def build_records(groups: Mapping[str, List[str]], agent_id: str):
487502

488503
for input_path in inputs:
489504
# input_name = input_path.replace("/", "_") # TODO
490-
input_id = f"urn:{get_id()}" # def format_id
505+
if not make_alnum(input_path):
506+
input_id = 'urn:uuid:' + get_uuid()
507+
else:
508+
input_id = get_entity_urn(input_path)
491509

492510
existing_input = next(
493-
(entity for entity in records["Entities"] if entity["AtLocation"] == input_path), None)
511+
(e for e in records["Entities"] if e["AtLocation"] == input_path), None)
494512
if existing_input is None:
495-
e = {
513+
entity = {
496514
"@id": input_id,
497515
"Label": os.path.split(input_path)[1],
498516
"AtLocation": input_path,
499517
}
500-
records["Entities"].append(e)
501-
a["Used"].append(input_id)
518+
records["Entities"].append(entity)
519+
activity["Used"].append(input_id)
502520
else:
503-
a["Used"].append(existing_input["@id"])
521+
activity["Used"].append(existing_input["@id"])
504522

505523
# Order does not matter and then makes sense to include only unique values
506-
a["Used"] = sorted(set(a["Used"]))
524+
activity["Used"] = sorted(set(activity["Used"]))
507525

508526
for output_path in outputs:
509527
# output_name = output_path.replace("/", "_") # TODO
528+
if not make_alnum(output_path):
529+
output_id = 'urn:uuid:' + get_uuid()
530+
else:
531+
output_id = get_entity_urn(output_path)
532+
510533
records["Entities"].append(
511534
{
512-
"@id": f"urn:{get_id()}",
535+
"@id": output_id,
513536
"Label": os.path.split(output_path)[1],
514537
"AtLocation": output_path,
515-
"GeneratedBy": a["@id"],
538+
"GeneratedBy": activity["@id"],
516539
# "derivedFrom": input_id,
517540
}
518541
)
519542

520-
records["Activities"].append(a)
543+
records["Activities"].append(activity)
544+
if verbose:
545+
print('-------------------------')
521546
return dict(records)
522547

523548

524549
def fsl_to_bids_prov(filename: str, context_url=CONTEXT_URL, output_file=None,
525550
soft_ver="xxx", indent=2, verbose=False) -> bool: # TODO : add fsl version
526551

527552
graph, agent_id = get_default_graph(
528-
label="FSL", context_url=context_url, soft_ver=soft_ver)
553+
soft_label="FSL", context_url=context_url, soft_version=soft_ver)
529554

530555
lines = readlines(filename)
531-
records = build_records(lines, agent_id)
556+
records = build_records(lines, agent_id, verbose)
532557
graph["Records"].update(records)
533558

534559
compute_sha_256_entity(graph["Records"]["Entities"])

0 commit comments

Comments
 (0)