Skip to content

Commit c44ea2e

Browse files
authored
Merge pull request #4 from brain-bican/feat/homba-dhba-xrefs
Add DHBA xrefs and Allen atlas linkouts to HOMBA terms
2 parents 76c5ac2 + 3c8b508 commit c44ea2e

9 files changed

Lines changed: 20738 additions & 5677 deletions

File tree

src/config/db_graph_atlas.yaml

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,13 @@
11
homba:
22
species: mammalian
33
prefix: https://purl.brain-bican.org/ontology/HOMBA_
4-
atlases: []
4+
dhba_prefix: https://purl.brain-bican.org/ontology/dhbao/DHBA_
5+
atlases:
6+
- id: 138322605
7+
name: Human, 34 years, Cortex - Gyral
8+
- id: 265297126
9+
name: Human, 34 years, Cortex - Mod. Brodmann
10+
- id: 3
11+
name: Human, 21 pcw
12+
- id: 138322603
13+
name: 287730656

src/ontology/components/all_templates.owl

Lines changed: 7421 additions & 0 deletions
Large diffs are not rendered by default.

src/ontology/homba.Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ $(TMPDIR)/tmp.json: $(TMPDIR)/tmp.owl
8282

8383
$(TEMPLATEDIR)/linkouts.tsv: $(TMPDIR)/tmp.json
8484
python3 $(SCRIPTSDIR)/gen_linkout_template.py $<
85+
python3 $(SCRIPTSDIR)/validate_linkout_template.py $(TEMPLATEDIR)/linkouts.tsv
8586

8687
$(COMPONENTSDIR)/linkouts.owl: $(TMPDIR)/tmp.owl $(TEMPLATEDIR)/linkouts.tsv
8788
$(ROBOT) template --template $(word 2, $^) --input $< --add-prefixes template_prefixes.json -o $@

src/ontology/template_prefixes.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
"@context": {
33
"UBERON": "http://purl.obolibrary.org/obo/UBERON_",
44
"OboInOwl": "http://www.geneontology.org/formats/oboInOwl#",
5-
"HOMBA": "https://purl.brain-bican.org/ontology/HOMBA_"
5+
"HOMBA": "https://purl.brain-bican.org/ontology/HOMBA_",
6+
"rdfs": "http://www.w3.org/2000/01/rdf-schema#"
67
}
78
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
UBERON: http://purl.obolibrary.org/obo/UBERON_
22
OboInOwl: http://www.geneontology.org/formats/oboInOwl#
33
HOMBA: https://purl.brain-bican.org/ontology/HOMBA_
4+
rdfs: http://www.w3.org/2000/01/rdf-schema#

src/patterns/definitions.owl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@ Prefix(rdfs:=<http://www.w3.org/2000/01/rdf-schema#>)
77

88

99
Ontology(<https://purl.brain-bican.org/ontology/homba/patterns/definitions.owl>
10-
<https://purl.brain-bican.org/ontology/homba/releases/2026-03-30/patterns/definitions.owl>
11-
Annotation(owl:versionInfo "2026-03-30")
10+
<https://purl.brain-bican.org/ontology/homba/releases/2026-04-02/patterns/definitions.owl>
11+
Annotation(owl:versionInfo "2026-04-02")
1212

1313

1414

Lines changed: 111 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,76 @@
11
import argparse
2+
import csv
23
import json
4+
from pathlib import Path
35
from string import Template
46

5-
import pandas as pd
6-
from ruamel.yaml import YAML
7+
8+
SCRIPT_DIR = Path(__file__).resolve().parent
9+
CONFIG_PATH = SCRIPT_DIR.parent / "config" / "db_graph_atlas.yaml"
10+
OUTPUT_PATH = SCRIPT_DIR.parent / "templates" / "linkouts.tsv"
11+
ATLAS_LINK = Template("http://atlas.brain-map.org/atlas?atlas=$atlas_id#structure=$structure_id")
12+
13+
14+
def is_local_homba_term(node_id, prefix):
15+
return prefix and str(node_id).startswith(prefix) and not str(node_id).endswith("_ENTITY")
16+
17+
18+
def is_numeric_homba_id(local_id):
19+
# Numeric HOMBA accessions correspond to DHBA terms; AA accessions are HOMBA-only groupings.
20+
return str(local_id).isdigit()
21+
22+
23+
def parse_scalar(value):
24+
value = value.strip()
25+
if value.startswith(("'", '"')) and value.endswith(("'", '"')):
26+
return value[1:-1]
27+
return value
28+
29+
30+
def load_simple_yaml(path):
31+
config = {}
32+
current_section = None
33+
current_atlas = None
34+
35+
for raw_line in path.read_text().splitlines():
36+
if not raw_line.strip() or raw_line.lstrip().startswith("#"):
37+
continue
38+
39+
indent = len(raw_line) - len(raw_line.lstrip(" "))
40+
stripped = raw_line.strip()
41+
42+
if indent == 0 and stripped.endswith(":"):
43+
current_section = stripped[:-1]
44+
config[current_section] = {"atlases": []}
45+
current_atlas = None
46+
continue
47+
48+
if current_section is None:
49+
continue
50+
51+
if indent == 2 and stripped.endswith(":"):
52+
continue
53+
54+
if indent == 2 and ":" in stripped:
55+
key, value = stripped.split(":", 1)
56+
config[current_section][key.strip()] = parse_scalar(value)
57+
continue
58+
59+
if indent == 4 and stripped.startswith("-"):
60+
current_atlas = {}
61+
config[current_section]["atlases"].append(current_atlas)
62+
remainder = stripped[1:].strip()
63+
if remainder:
64+
key, value = remainder.split(":", 1)
65+
current_atlas[key.strip()] = parse_scalar(value)
66+
continue
67+
68+
if indent == 6 and ":" in stripped and current_atlas is not None:
69+
key, value = stripped.split(":", 1)
70+
current_atlas[key.strip()] = parse_scalar(value)
71+
72+
return config
73+
774

875
parser = argparse.ArgumentParser(description="Generate HOMBA linkout template.")
976
parser.add_argument("filepath", help="Path to the json version of the ontology")
@@ -13,14 +80,12 @@
1380
ontology_json = json.loads(f.read())
1481

1582
graph = ontology_json["graphs"][0]
16-
with open("../config/db_graph_atlas.yaml", "r") as conf:
17-
mapping = YAML(typ="safe").load(conf.read()) or {}
18-
19-
link = Template("http://atlas.brain-map.org/atlas?atlas=$atlas_id#structure=$structure_id")
83+
mapping = load_simple_yaml(CONFIG_PATH)
2084

2185
seed = {
2286
"ID": "ID",
23-
"xref": "A OboInOwl:hasDbXref",
87+
"dhba_xref": "A OboInOwl:hasDbXref",
88+
"atlas_link": "A rdfs:seeAlso",
2489
"prefLabel": "A skos:prefLabel",
2590
}
2691

@@ -30,29 +95,47 @@
3095
if node.get("type") != "CLASS" or "lbl" not in node:
3196
continue
3297

33-
matched = False
98+
node_id = str(node["id"])
99+
pref_label = node["lbl"]
100+
tab.append({"ID": node_id, "dhba_xref": "", "atlas_link": "", "prefLabel": pref_label})
101+
34102
for _, cfg in mapping.items():
35103
prefix = cfg.get("prefix")
36-
if prefix and str(node["id"]).startswith(prefix) and not str(node["id"]).endswith("_ENTITY"):
37-
atlases = cfg.get("atlases", [])
38-
if atlases:
39-
for atlas in atlases:
40-
tab.append(
41-
{
42-
"ID": node["id"],
43-
"xref": link.substitute(
44-
atlas_id=atlas["id"],
45-
structure_id=str(node["id"]).rsplit("_", 1)[-1],
46-
),
47-
"prefLabel": "{} ({})".format(node["lbl"], cfg["species"]),
48-
}
49-
)
50-
else:
51-
tab.append({"ID": node["id"], "xref": "", "prefLabel": node["lbl"]})
52-
matched = True
104+
if not is_local_homba_term(node_id, prefix):
105+
continue
106+
107+
local_id = node_id.rsplit("_", 1)[-1]
108+
if not is_numeric_homba_id(local_id):
53109
break
54110

55-
if not matched:
56-
tab.append({"ID": node["id"], "xref": "", "prefLabel": node["lbl"]})
111+
dhba_prefix = cfg.get("dhba_prefix", "")
112+
if dhba_prefix:
113+
tab.append(
114+
{
115+
"ID": node_id,
116+
"dhba_xref": dhba_prefix + local_id,
117+
"atlas_link": "",
118+
"prefLabel": "",
119+
}
120+
)
121+
122+
for atlas in cfg.get("atlases", []):
123+
tab.append(
124+
{
125+
"ID": node_id,
126+
"dhba_xref": "",
127+
"atlas_link": ATLAS_LINK.substitute(atlas_id=atlas["id"], structure_id=local_id),
128+
"prefLabel": "",
129+
}
130+
)
131+
break
57132

58-
pd.DataFrame.from_records(tab).to_csv("../templates/linkouts.tsv", sep="\t", index=False)
133+
with open(OUTPUT_PATH, "w", newline="") as handle:
134+
writer = csv.DictWriter(
135+
handle,
136+
fieldnames=["ID", "dhba_xref", "atlas_link", "prefLabel"],
137+
delimiter="\t",
138+
lineterminator="\n",
139+
)
140+
writer.writeheader()
141+
writer.writerows(tab)
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
import argparse
2+
import csv
3+
from collections import defaultdict
4+
from pathlib import Path
5+
6+
7+
def is_numeric_homba_id(local_id):
8+
return str(local_id).isdigit()
9+
10+
11+
def parse_scalar(value):
12+
value = value.strip()
13+
if value.startswith(("'", '"')) and value.endswith(("'", '"')):
14+
return value[1:-1]
15+
return value
16+
17+
18+
def load_simple_yaml(path):
19+
config = {}
20+
current_section = None
21+
current_atlas = None
22+
23+
for raw_line in path.read_text().splitlines():
24+
if not raw_line.strip() or raw_line.lstrip().startswith("#"):
25+
continue
26+
27+
indent = len(raw_line) - len(raw_line.lstrip(" "))
28+
stripped = raw_line.strip()
29+
30+
if indent == 0 and stripped.endswith(":"):
31+
current_section = stripped[:-1]
32+
config[current_section] = {"atlases": []}
33+
current_atlas = None
34+
continue
35+
36+
if current_section is None:
37+
continue
38+
39+
if indent == 2 and stripped.endswith(":"):
40+
continue
41+
42+
if indent == 2 and ":" in stripped:
43+
key, value = stripped.split(":", 1)
44+
config[current_section][key.strip()] = parse_scalar(value)
45+
continue
46+
47+
if indent == 4 and stripped.startswith("-"):
48+
current_atlas = {}
49+
config[current_section]["atlases"].append(current_atlas)
50+
remainder = stripped[1:].strip()
51+
if remainder:
52+
key, value = remainder.split(":", 1)
53+
current_atlas[key.strip()] = parse_scalar(value)
54+
continue
55+
56+
if indent == 6 and ":" in stripped and current_atlas is not None:
57+
key, value = stripped.split(":", 1)
58+
current_atlas[key.strip()] = parse_scalar(value)
59+
60+
return config
61+
62+
63+
parser = argparse.ArgumentParser(description="Validate generated HOMBA linkout template rows.")
64+
parser.add_argument("template", help="Path to linkouts.tsv")
65+
parser.add_argument(
66+
"--config",
67+
default=str(Path(__file__).resolve().parent.parent / "config" / "db_graph_atlas.yaml"),
68+
help="Path to db_graph_atlas.yaml",
69+
)
70+
args = parser.parse_args()
71+
72+
config = load_simple_yaml(Path(args.config))
73+
homba_cfg = config.get("homba", {})
74+
homba_prefix = homba_cfg.get("prefix", "")
75+
dhba_prefix = homba_cfg.get("dhba_prefix", "")
76+
atlas_ids = [str(atlas["id"]) for atlas in homba_cfg.get("atlases", [])]
77+
expected_atlas_count = len(atlas_ids)
78+
79+
stats = defaultdict(lambda: {"dhba_xrefs": [], "atlas_links": []})
80+
81+
with open(args.template, "r", newline="") as handle:
82+
reader = csv.DictReader(handle, delimiter="\t")
83+
for row in reader:
84+
node_id = row["ID"]
85+
if node_id == "ID" or not node_id.startswith(homba_prefix):
86+
continue
87+
88+
stats[node_id]
89+
local_id = node_id.rsplit("_", 1)[-1]
90+
if row["dhba_xref"]:
91+
stats[node_id]["dhba_xrefs"].append(row["dhba_xref"])
92+
if row["atlas_link"]:
93+
stats[node_id]["atlas_links"].append(row["atlas_link"])
94+
95+
if not is_numeric_homba_id(local_id):
96+
if row["dhba_xref"]:
97+
raise ValueError(f"AA HOMBA term unexpectedly received a DHBA xref: {node_id}")
98+
if row["atlas_link"]:
99+
raise ValueError(f"AA HOMBA term unexpectedly received an atlas link: {node_id}")
100+
101+
for node_id, values in stats.items():
102+
local_id = node_id.rsplit("_", 1)[-1]
103+
if not is_numeric_homba_id(local_id):
104+
continue
105+
106+
expected_dhba = dhba_prefix + local_id
107+
if values["dhba_xrefs"] != [expected_dhba]:
108+
raise ValueError(
109+
f"{node_id} should have exactly one DHBA xref {expected_dhba}, found {values['dhba_xrefs']}"
110+
)
111+
112+
expected_links = [
113+
f"http://atlas.brain-map.org/atlas?atlas={atlas_id}#structure={local_id}"
114+
for atlas_id in atlas_ids
115+
]
116+
if values["atlas_links"] != expected_links:
117+
raise ValueError(
118+
f"{node_id} should have {expected_atlas_count} atlas links {expected_links}, "
119+
f"found {values['atlas_links']}"
120+
)
121+
122+
print(
123+
f"Validated HOMBA linkouts for {len(stats)} HOMBA classes; "
124+
f"numeric terms carry one DHBA xref and {expected_atlas_count} atlas links."
125+
)

0 commit comments

Comments
 (0)