-
-
Notifications
You must be signed in to change notification settings - Fork 16
Expand file tree
/
Copy pathimport_compath.py
More file actions
44 lines (35 loc) · 1.42 KB
/
import_compath.py
File metadata and controls
44 lines (35 loc) · 1.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
"""Import mappings from ComPath."""
import pandas as pd
import pyobo
from curies.vocabulary import exact_match, manual_mapping_curation
from tqdm import tqdm
from biomappings.resources import append_true_mappings
URL = "https://github.com/ComPath/compath-resources/raw/master/docs/data/compath.tsv"
BLACKLIST = {"decopath", "pathbank"}
def main() -> None:
"""Import mappings from ComPath."""
df = pd.read_csv(URL, sep="\t")
df = df[df["relation"] == exact_match.curie]
df = df[~df["source prefix"].isin(BLACKLIST)]
df = df[~df["target prefix"].isin(BLACKLIST)]
df["type"] = manual_mapping_curation.curie
df["source"] = "orcid:0000-0002-2046-6145" # ComPath is courtesy of Uncle Daniel
# TODO check that species are the same
# Make sure nomenclature is correct
df["source name"] = [
name if prefix == "kegg.pathway" else pyobo.get_name(prefix, identifier)
for prefix, identifier, name in tqdm(
df[["source prefix", "source identifier", "source name"]].values
)
]
df["target name"] = [
name if prefix == "kegg.pathway" else pyobo.get_name(prefix, identifier)
for prefix, identifier, name in tqdm(
df[["target prefix", "target identifier", "target name"]].values
)
]
df = df.drop_duplicates()
mappings = (mapping for _, mapping in df.iterrows())
append_true_mappings(mappings)
if __name__ == "__main__":
main()