Skip to content

Commit 09f21ea

Browse files
authored
Merge branch 'main' into jakeyheath-patch-1-1
2 parents eee6eef + d56f1f4 commit 09f21ea

28 files changed

+1018
-317
lines changed

.gitignore

+4
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,7 @@
66
.coverage*
77
/htmlcov/
88
/api/python/docs/
9+
/.vscode
10+
**/__pycache__/
11+
*.owl
12+
*.sssom.tsv

.release-please-manifest.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{"api/python":"1.3.1","ontology-assets":"1.2.0",".":"0.0.1"}
1+
{"api/python":"1.4.1","ontology-assets":"1.3.1",".":"0.0.1"}

api/python/CHANGELOG.md

+14
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,19 @@
11
# Changelog
22

3+
## [1.4.1](https://github.com/chanzuckerberg/cellxgene-ontology-guide/compare/python-api-v1.4.0...python-api-v1.4.1) (2025-01-28)
4+
5+
6+
### BugFixes
7+
8+
* upgrade EFO from 3.69 to 3.74 ([#257](https://github.com/chanzuckerberg/cellxgene-ontology-guide/issues/257)) ([c38e905](https://github.com/chanzuckerberg/cellxgene-ontology-guide/commit/c38e905c22113c8c83bc632e1e9cdd140510f5b3))
9+
10+
## [1.4.0](https://github.com/chanzuckerberg/cellxgene-ontology-guide/compare/python-api-v1.3.1...python-api-v1.4.0) (2025-01-28)
11+
12+
13+
### Features
14+
15+
* additional species-specific ontologies for cxg 5.3 multispecies schema ([#255](https://github.com/chanzuckerberg/cellxgene-ontology-guide/issues/255)) ([64c32fe](https://github.com/chanzuckerberg/cellxgene-ontology-guide/commit/64c32fe91256323e3d5a2eff4153aa4ec63f528e))
16+
317
## [1.3.1](https://github.com/chanzuckerberg/cellxgene-ontology-guide/compare/python-api-v1.3.0...python-api-v1.3.1) (2024-11-21)
418

519

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
b525a902031034abe2d73ec0d10d71e71fa65365
1+
016968365dd92ee02154dc95656535c0e26c8eff

api/python/pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "cellxgene_ontology_guide"
7-
version = "1.3.1"
7+
version = "1.4.1"
88
description = "Access ontology metadata used by CZ cellxgene"
99
authors = [
1010
{ name = "Chan Zuckerberg Initiative Foundation", email = "[email protected]" }

api/python/src/cellxgene_ontology_guide/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,5 +19,5 @@
1919
.. include:: ../../CHANGELOG.md
2020
"""
2121

22-
__version__ = "1.3.1"
22+
__version__ = "1.4.1"
2323
__all__ = ["curated_ontology_term_lists", "entities", "ontology_parser", "supported_versions"]

api/python/src/cellxgene_ontology_guide/entities.py

+5
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,11 @@ class Ontology(Enum):
1717
MmusDv = "mmusdv"
1818
PATO = "pato"
1919
NCBITaxon = "ncbitaxon"
20+
FBbt = "fbbt"
21+
FBdv = "fbdv"
22+
ZFA = "zfa"
23+
WBbt = "wbbt"
24+
WBls = "wbls"
2025

2126

2227
class CuratedOntologyTermList(Enum):

api/python/src/cellxgene_ontology_guide/ontology_parser.py

+61
Original file line numberDiff line numberDiff line change
@@ -662,3 +662,64 @@ def get_term_id_by_label(self, term_label: str, ontology_name: str) -> Optional[
662662
"""
663663
ontology_term_label_to_id_map = self.get_term_label_to_id_map(ontology_name)
664664
return ontology_term_label_to_id_map.get(term_label)
665+
666+
def get_bridge_term_id(self, term_id: str, cross_ontology: str) -> Optional[str]:
667+
"""
668+
For a given term ID, fetch the equivalent term ID from a given ontology. Only returns exact match if it exists.
669+
670+
If no applicable match is found, returns None.
671+
672+
Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology.
673+
674+
Example
675+
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
676+
>>> ontology_parser = OntologyParser()
677+
>>> ontology_parser.get_bridge_term_id("FBbt:00000001", "UBERON")
678+
'UBERON:0000468'
679+
680+
:param term_id: str ontology term to find equivalent term for
681+
:param cross_ontology: str name of ontology to search for equivalent term in
682+
:return: Optional[str] equivalent term ID from the cross_ontology
683+
"""
684+
if cross_ontology not in self.cxg_schema.cross_ontology_mappings:
685+
raise ValueError(
686+
f"{cross_ontology} is not in the set of supported cross ontology mappings "
687+
f"{self.cxg_schema.cross_ontology_mappings}."
688+
)
689+
ontology_name = self._parse_ontology_name(term_id)
690+
cross_ontology_terms = self.cxg_schema.ontology(ontology_name)[term_id].get("cross_ontology_terms")
691+
bridge_term_id: Optional[str] = None
692+
if cross_ontology_terms:
693+
bridge_term_id = cross_ontology_terms.get(cross_ontology)
694+
return bridge_term_id
695+
696+
def get_closest_bridge_term_ids(self, term_id: str, cross_ontology: str) -> List[str]:
697+
"""
698+
For a given term ID, fetch the equivalent term ID from a given ontology. If match is found,
699+
returns a list of 1 with the exact match. If no exact match is found, traverses the ancestors
700+
of the term for the closest match.
701+
702+
If no applicable match is found, returns an empty list.
703+
704+
If multiple ancestors of the same distance have matches, returns all possible closest matches.
705+
706+
Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology.
707+
708+
Example
709+
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
710+
>>> ontology_parser = OntologyParser()
711+
>>> ontology_parser.get_closest_bridge_term_ids("FBbt:00000039", "UBERON")
712+
['UBERON:0000476', 'UBERON:0000920']
713+
714+
:param term_id: str ontology term to find closest term for
715+
:param cross_ontology: str name of ontology to search for closest term in
716+
:return: List[str] list of closest term IDs from the cross_ontology
717+
"""
718+
closest_bridge_terms: List[str] = []
719+
terms_to_match = [term_id]
720+
while terms_to_match and not closest_bridge_terms:
721+
for term in terms_to_match:
722+
if closest_bridge_term := self.get_bridge_term_id(term, cross_ontology):
723+
closest_bridge_terms.append(closest_bridge_term)
724+
terms_to_match = [parent for child in terms_to_match for parent in self.get_term_parents(child)]
725+
return closest_bridge_terms

api/python/src/cellxgene_ontology_guide/supported_versions.py

+3
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,9 @@ def __init__(self, version: Optional[str] = None):
8484
for ontology, info in self.supported_ontologies.items()
8585
for imported_ontology in info.get("additional_ontologies", [])
8686
}
87+
self.cross_ontology_mappings = {
88+
ontology for ontology, info in self.supported_ontologies.items() if info.get("cross_ontology_mapping")
89+
}
8790
self.ontology_file_names: Dict[str, str] = {}
8891
self.deprecated_on = ontology_info[_version].get("deprecated_on")
8992
if self.deprecated_on:

api/python/tests/test_ontology_parser.py

+113-2
Original file line numberDiff line numberDiff line change
@@ -72,31 +72,118 @@ def ontology_dict_with_imports():
7272

7373

7474
@pytest.fixture
75-
def mock_CXGSchema(ontology_dict, ontology_dict_with_imports, mock_load_supported_versions, mock_load_ontology_file):
75+
def ontology_dict_with_cross_ontology_terms():
76+
return {
77+
# test cases: terms with exact matches + ancestors of terms without exact matches
78+
"ZFA:0000000": {
79+
"ancestors": {},
80+
"cross_ontology_terms": {
81+
"CL": "CL:0000000",
82+
},
83+
},
84+
"ZFA:0000001": {
85+
"ancestors": {
86+
"ZFA:0000000": 1,
87+
},
88+
"cross_ontology_terms": {
89+
"CL": "CL:0000001",
90+
},
91+
},
92+
"ZFA:0000002": {
93+
"ancestors": {
94+
"ZFA:0000000": 1,
95+
},
96+
"cross_ontology_terms": {
97+
"CL": "CL:0000002",
98+
},
99+
},
100+
"ZFA:0000003": {
101+
"ancestors": {
102+
"ZFA:0000000": 1,
103+
},
104+
"cross_ontology_terms": {
105+
"CL": "CL:0000003",
106+
},
107+
},
108+
# test case: term with no exact term and multiple closest terms 1 edge away
109+
"ZFA:0000004": {
110+
"ancestors": {
111+
"ZFA:0000001": 1,
112+
"ZFA:0000002": 1,
113+
"ZFA:0000000": 2,
114+
},
115+
},
116+
# test case: term with no exact term and 1 closest term, 1 edge away
117+
"ZFA:0000005": {
118+
"ancestors": {
119+
"ZFA:0000003": 1,
120+
"ZFA:0000000": 2,
121+
},
122+
},
123+
# test case: term with no exact term and multiple closest terms 2 edges away
124+
"ZFA:0000006": {
125+
"ancestors": {
126+
"ZFA:0000004": 1,
127+
"ZFA:0000005": 1,
128+
"ZFA:0000001": 2,
129+
"ZFA:0000002": 2,
130+
"ZFA:0000003": 2,
131+
"ZFA:0000000": 3,
132+
},
133+
},
134+
# test case: term with no exact or closest term
135+
"ZFA:0000007": {
136+
"ancestors": {},
137+
},
138+
}
139+
140+
141+
@pytest.fixture
142+
def mock_CXGSchema(
143+
ontology_dict,
144+
ontology_dict_with_imports,
145+
ontology_dict_with_cross_ontology_terms,
146+
mock_load_supported_versions,
147+
mock_load_ontology_file,
148+
):
76149
mock_load_supported_versions.return_value = {
77150
"5.0.0": {
78151
"ontologies": {
79-
"CL": {"version": "2024-01-01", "source": "http://example.com", "filename": "cl.owl"},
152+
"CL": {
153+
"version": "2024-01-01",
154+
"source": "http://example.com",
155+
"filename": "cl.owl",
156+
"cross_ontology_mapping": "cl.sssom",
157+
},
80158
"HANCESTRO": {
81159
"version": "2024-01-01",
82160
"source": "http://example.com",
83161
"filename": "cl.owl",
84162
"additional_ontologies": ["AfPO"],
85163
},
164+
"ZFA": {
165+
"version": "2024-01-01",
166+
"source": "http://example.com",
167+
"filename": "zfa.owl",
168+
"map_to": ["CL"],
169+
},
86170
}
87171
}
88172
}
89173
cxg_schema = CXGSchema()
90174
cxg_schema.ontology_file_names = {
91175
"CL": "CL-ontology-2024-01-01.json.gz",
92176
"HANCESTRO": "HANCESTRO-ontology-2024-01-01.json.gz",
177+
"ZFA": "ZFA-ontology-2024-01-01.json.gz",
93178
}
94179

95180
def get_mock_ontology_dict(file_name):
96181
if "CL" in file_name:
97182
return ontology_dict
98183
if "HANCESTRO" in file_name:
99184
return ontology_dict_with_imports
185+
if "ZFA" in file_name:
186+
return ontology_dict_with_cross_ontology_terms
100187
return None
101188

102189
mock_load_ontology_file.side_effect = get_mock_ontology_dict
@@ -584,3 +671,27 @@ def test_get_term_id_by_label(ontology_parser, label, ontology_name, expected):
584671
def test_get_term_id_by_label__unsupported_ontology_name(ontology_parser):
585672
with pytest.raises(ValueError):
586673
ontology_parser.get_term_id_by_label("gene A", "GO")
674+
675+
676+
@pytest.mark.parametrize("term_id,expected", [("ZFA:0000000", "CL:0000000"), ("ZFA:0000004", None)])
677+
def test_get_bridge_term_id(ontology_parser, term_id, expected):
678+
assert ontology_parser.get_bridge_term_id(term_id, "CL") == expected
679+
680+
681+
def test_get_bridge_term_id__unsupported_cross_ontology(ontology_parser):
682+
with pytest.raises(ValueError):
683+
ontology_parser.get_bridge_term_id("ZFA:0000000", "HANCESTRO")
684+
685+
686+
@pytest.mark.parametrize(
687+
"term_id,expected",
688+
[
689+
("ZFA:0000007", []),
690+
("ZFA:0000006", ["CL:0000001", "CL:0000002", "CL:0000003"]),
691+
("ZFA:0000005", ["CL:0000003"]),
692+
("ZFA:0000004", ["CL:0000001", "CL:0000002"]),
693+
("ZFA:0000000", ["CL:0000000"]),
694+
],
695+
)
696+
def test_get_closest_bridge_term_ids(ontology_parser, term_id, expected):
697+
assert ontology_parser.get_closest_bridge_term_ids(term_id, "CL") == expected

api/python/tests/test_supported_versions.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,12 @@ def ontology_info_content():
1919
return {
2020
"5.0.0": {
2121
"ontologies": {
22-
"CL": {"version": "v2024-01-01", "source": "http://example.com", "filename": "cl.owl"},
22+
"CL": {
23+
"version": "v2024-01-01",
24+
"source": "http://example.com",
25+
"filename": "cl.owl",
26+
"cross_ontology_mapping": "cl.sssom",
27+
},
2328
"HANCESTRO": {
2429
"version": "v2024-01-01",
2530
"source": "http://example.com",
@@ -94,6 +99,7 @@ def test__init__defaults(self, ontology_info_content, initialized_CXGSchemaInfo)
9499
assert initialized_CXGSchemaInfo.version == "5.0.0"
95100
assert initialized_CXGSchemaInfo.supported_ontologies == ontology_info_content["5.0.0"]["ontologies"]
96101
assert initialized_CXGSchemaInfo.imported_ontologies == {"FOO": "HANCESTRO", "OOF": "HANCESTRO"}
102+
assert initialized_CXGSchemaInfo.cross_ontology_mappings == {"CL"}
97103

98104
@pytest.mark.parametrize("version", ["v0.0.1", "0.0.1"])
99105
def test__init__specific_version(self, version, mock_load_supported_versions):

asset-schemas/all_ontology_schema.json

+10
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,16 @@
3131
"type": "integer"
3232
}
3333
},
34+
"cross_ontology_terms": {
35+
"type": "object",
36+
"description": "Map of bridge terms that connect this ontology term to other ontologies.",
37+
"patternProperties": {
38+
"^[A-Za-z0-9]+$": {
39+
"$ref": "ontology_term_id_schema.json#/definitions/supported_term_id"
40+
}
41+
},
42+
"additionalProperties": false
43+
},
3444
"comments": {
3545
"type": "array",
3646
"items": {

asset-schemas/ontology_info_schema.json

+13-4
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
"description": "A schema for the set of valid ontology reference files mapping to a CZ CellXGene Dataset Schema Versions",
55
"type": "object",
66
"patternProperties": {
7-
"^[0-9]+\\.[0-9]+\\.[0-9]+$": {
7+
"^[0-9]+\\.[0-9]+\\.[0-9]+(-.+)?$": {
88
"description": "The version of CellxGene schema that maps to this set of ontology versions",
99
"type": "object",
1010
"properties": {
@@ -51,15 +51,24 @@
5151
"type": "string"
5252
},
5353
"description": "List of additional term id prefixes to extracted from the source ontology file."
54+
},
55+
"cross_ontology_mapping": {
56+
"type": "string",
57+
"description": "name of SSSOM file mapping this ontology's terms to cross-species equivalent ontology terms."
58+
},
59+
"map_to": {
60+
"type": "array",
61+
"items": {
62+
"type": "string"
63+
},
64+
"description": "List of ontologies to map equivalent terms to this ontology"
5465
}
5566
},
5667
"required": [
5768
"version",
5869
"source",
5970
"filename"
60-
],
61-
"additionalProperties": false
71+
]
6272
}
6373
}
6474
}
65-

0 commit comments

Comments
 (0)