-
-
Notifications
You must be signed in to change notification settings - Fork 16
Expand file tree
/
Copy pathgenerate_wikipathways_orthologs.py
More file actions
61 lines (51 loc) · 2.13 KB
/
generate_wikipathways_orthologs.py
File metadata and controls
61 lines (51 loc) · 2.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
"""Generate orthologous relations between WikiPathways."""
import itertools as itt
from collections.abc import Iterable
from typing import cast
import pyobo
from bioregistry import NormalizedNamableReference
from curies.vocabulary import lexical_matching_process
from gilda.process import normalize
from sssom_pydantic import MappingTool, SemanticMapping
from tqdm import tqdm
from biomappings.resources import append_predictions
from biomappings.utils import get_script_url
def _lexical_exact_match(name1: str, name2: str) -> bool:
return cast(str, normalize(name1)) == cast(str, normalize(name2)) # type:ignore[no-untyped-call]
def iterate_orthologous_lexical_matches(prefix: str = "wikipathways") -> Iterable[SemanticMapping]:
"""Generate orthologous relations between lexical matches from different species."""
names = pyobo.get_id_name_mapping(prefix)
species = pyobo.get_id_species_mapping(prefix)
provenance = get_script_url(__file__)
count = 0
it = itt.combinations(sorted(names.items()), 2)
it = tqdm(
it,
unit_scale=True,
unit="pair",
total=len(names) * (len(names) - 1) / 2,
)
for (source_id, source_name), (target_id, target_name) in sorted(it):
source_species = species[source_id]
target_species = species[target_id]
if source_species == target_species:
continue
if _lexical_exact_match(source_name, target_name):
count += 1
yield SemanticMapping(
subject=NormalizedNamableReference(
prefix=prefix, identifier=source_id, name=source_name
),
predicate="RO:HOM0000017",
object=NormalizedNamableReference(
prefix=prefix,
identifier=target_id,
name=target_name,
),
justification=lexical_matching_process,
confidence=0.95,
mapping_tool=MappingTool(name=provenance),
)
tqdm.write(f"Identified {count:,} orthologs")
if __name__ == "__main__":
append_predictions(iterate_orthologous_lexical_matches())