-
-
Notifications
You must be signed in to change notification settings - Fork 16
Expand file tree
/
Copy pathgenerate_vo_mesh_mappings.py
More file actions
118 lines (106 loc) · 4.41 KB
/
generate_vo_mesh_mappings.py
File metadata and controls
118 lines (106 loc) · 4.41 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
"""Generate mappings from VO."""
import bioontologies
import pyobo
import ssslm
from bioontologies.obograph import Node
from bioregistry import NormalizedNamableReference
from curies.vocabulary import (
alternative_term,
exact_match,
lexical_matching_process,
see_also,
structural_matching,
)
from sssom_pydantic import MappingTool, SemanticMapping
from tqdm import tqdm
from biomappings.resources import append_predictions
from biomappings.utils import get_script_url
def main() -> None:
"""Generate mappings from between VO and MeSH."""
mesh_grounder: ssslm.Grounder[NormalizedNamableReference] = pyobo.get_grounder("mesh")
provenance = get_script_url(__file__)
graph = bioontologies.get_obograph_by_prefix("vo", check=False).guess("vo").standardize()
rows = []
extracted_mesh = 0
for node in tqdm(graph.nodes, unit="node", unit_scale=True):
if not node.name or node.prefix != "vo":
continue
if node.meta:
found_mesh = False
for p in node.meta.properties or []:
if not p.predicate:
continue
if p.predicate.curie == see_also.curie:
values = [
value.strip().replace(" ", "") for value in p.value_raw.strip().split(";")
]
for value in values:
# TODO this is place to extract other mapping types
if not value.lower().startswith("mesh:"):
continue
mesh_id = value.split(":", 1)[1].strip()
mesh_name = pyobo.get_name("mesh", mesh_id)
if not mesh_name:
tqdm.write(f"No mesh name for vo:{node.name} mapped to mesh:{mesh_id}")
continue
rows.append(
SemanticMapping(
subject=NormalizedNamableReference(
prefix=node.prefix,
identifier=node.identifier,
name=node.name,
),
predicate=exact_match,
object=NormalizedNamableReference(
prefix="mesh",
identifier=mesh_id,
name=mesh_name,
),
justification=structural_matching,
confidence=0.99,
mapping_tool=MappingTool(name=provenance),
)
)
found_mesh = True
extracted_mesh += 1
if found_mesh:
continue
_ground(mesh_grounder, node, rows, provenance)
append_predictions(rows)
print(f"extracted {extracted_mesh} mesh mappings. should be about 65")
def _ground(
grounder: ssslm.Grounder[NormalizedNamableReference],
node: Node,
rows: list[SemanticMapping],
provenance: str,
) -> None:
if not node.reference:
return None
texts = [node.name]
# VO doesn't store its synonyms using standard predicates,
# so look in IAO_0000118 (alternate label) or IAO_0000116 (editor note)
# with "synonym: " as the string prefix
if node.meta:
for p in node.meta.properties or []:
if not p.predicate:
continue
if p.predicate.curie == alternative_term.curie.lower():
texts.append(p.value_raw)
elif p.predicate.curie == "iao:0000116" and p.value_raw.startswith("synonym:"):
texts.append(p.value_raw.removeprefix("synonym:").strip())
for text in [node.name, *(s.value for s in node.synonyms)]:
if text is None:
continue
for scored_match in grounder.get_matches(text):
rows.append(
SemanticMapping(
subject=node.reference,
predicate=exact_match,
object=scored_match.reference,
justification=lexical_matching_process,
confidence=round(scored_match.score, 2),
mapping_tool=MappingTool(name=provenance),
)
)
if __name__ == "__main__":
main()