@@ -41,8 +41,8 @@ def _parse_ontology_name(self, term_id: str) -> str:
41
41
42
42
def get_term_ancestors (self , term_id : str , include_self : bool = False ) -> List [str ]:
43
43
"""
44
- Get the ancestor ontology terms for a given term. If include_self is True, the term itself will be included as an
45
- ancestor.
44
+ Get the ancestor ontology terms for a given term. If include_self is True, the term itself will be included as
45
+ an ancestor.
46
46
47
47
Example: get_term_ancestors("CL:0000005") -> ["CL:0000000", ...]
48
48
@@ -51,13 +51,30 @@ def get_term_ancestors(self, term_id: str, include_self: bool = False) -> List[s
51
51
:return: flattened List[str] of ancestor terms
52
52
"""
53
53
ontology_name = self ._parse_ontology_name (term_id )
54
- ancestors : List [ str ] = self .cxg_schema .ontology (ontology_name )[term_id ]["ancestors" ]
54
+ ancestors = list ( self .cxg_schema .ontology (ontology_name )[term_id ]["ancestors" ]. keys ())
55
55
return ancestors + [term_id ] if include_self else ancestors
56
56
57
- def get_term_list_ancestors (self , term_ids : str , include_self : bool = False ) -> Dict [str , List [ str ] ]:
57
+ def get_term_ancestors_with_distances (self , term_id : str , include_self : bool = False ) -> Dict [str , int ]:
58
58
"""
59
- Get the ancestor ontology terms for each term in a list. If include_self is True, the term itself will be included
60
- as an ancestor.
59
+ Get the ancestor ontology terms for a given term, and their distance from the term_id. If include_self is True,
60
+ the term itself will be included as an ancestor.
61
+
62
+ Example: get_term_ancestors_with_distances("CL:0000005") -> {"CL:0000000": 1, ...}
63
+
64
+ :param term_id: str ontology term to find ancestors for
65
+ :param include_self: boolean flag to include the term itself as an ancestor
66
+ :return: Dict[str, int] map of ancestor terms and their respective distances from the term_id
67
+ """
68
+ ontology_name = self ._parse_ontology_name (term_id )
69
+ ancestors : Dict [str , int ] = self .cxg_schema .ontology (ontology_name )[term_id ]["ancestors" ]
70
+ if include_self :
71
+ ancestors [term_id ] = 0
72
+ return ancestors
73
+
74
+ def get_term_list_ancestors (self , term_ids : List [str ], include_self : bool = False ) -> Dict [str , List [str ]]:
75
+ """
76
+ Get the ancestor ontology terms for each term in a list. If include_self is True, the term itself will be
77
+ included as an ancestor.
61
78
62
79
Example: get_term_list_ancestors(["CL:0000003", "CL:0000005"], include_self=True) -> {
63
80
"CL:0000003": ["CL:0000003"],
@@ -71,10 +88,106 @@ def get_term_list_ancestors(self, term_ids: str, include_self: bool = False) ->
71
88
"""
72
89
return {term_id : self .get_term_ancestors (term_id , include_self ) for term_id in term_ids }
73
90
91
+ def map_high_level_terms (self , term_ids : List [str ], high_level_terms : List [str ]) -> Dict [str , List [str ]]:
92
+ """
93
+ Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with
94
+ format
95
+
96
+ {"CL:0000003": ["CL:0000000", ...], "CL:0000005": ["CL:0000000", ...]}
97
+
98
+ Where each term_id is mapped to a List[str] of high-level terms that it is a descendant of. Includes self
99
+ as a descendant.
100
+
101
+ :param term_ids: list of str ontology terms to map high level terms for
102
+ :param high_level_terms: list of str ontology terms that can be mapped to descendant term_ids
103
+ :return: Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list.
104
+ Each key maps to empty list if there are no ancestors among the provided input.
105
+ """
106
+ ancestors = self .get_term_list_ancestors (term_ids , include_self = True )
107
+ for term_id in term_ids :
108
+ ancestors [term_id ] = [
109
+ high_level_term for high_level_term in ancestors [term_id ] if high_level_term in high_level_terms
110
+ ]
111
+ return ancestors
112
+
113
+ def get_distance_between_terms (self , term_id_1 : str , term_id_2 : str ) -> int :
114
+ """
115
+ Get the distance between two ontology terms. The distance is defined as the number of edges between the
116
+ two terms. Terms must be from the same ontology. Returns -1 if terms are disjoint.
117
+
118
+ :param term_id_1: str ontology term to find distance for
119
+ :param term_id_2: str ontology term to find distance for
120
+ :return: int distance between the two terms, measured in number of edges between their shortest path.
121
+ """
122
+ lcas = self .get_lowest_common_ancestors (term_id_1 , term_id_2 )
123
+ if not lcas :
124
+ return - 1
125
+ return int (
126
+ self .get_term_ancestors_with_distances (term_id_1 , include_self = True )[lcas [0 ]]
127
+ + self .get_term_ancestors_with_distances (term_id_2 , include_self = True )[lcas [0 ]]
128
+ )
129
+
130
+ def get_lowest_common_ancestors (self , term_id_1 : str , term_id_2 : str ) -> List [str ]:
131
+ """
132
+ Get the lowest common ancestors between two ontology terms that is from the given ontology.
133
+ Terms must be from the same ontology. Ontologies are DAGs, so there may be multiple lowest common ancestors.
134
+
135
+ :param term_id_1: str ontology term to find LCA for
136
+ :param term_id_2: str ontology term to find LCA for
137
+ :return: str term ID of the lowest common ancestor term
138
+ """
139
+ # include path to term itself
140
+ ontology = self ._parse_ontology_name (term_id_1 )
141
+ if ontology != self ._parse_ontology_name (term_id_2 ):
142
+ return []
143
+ ancestors_1 = self .get_term_ancestors_with_distances (term_id_1 , include_self = True )
144
+ ancestors_2 = self .get_term_ancestors_with_distances (term_id_2 , include_self = True )
145
+ common_ancestors = set (ancestors_1 .keys ()) & set (ancestors_2 .keys ())
146
+ min_sum_distances = float ("inf" )
147
+ for ancestors in common_ancestors :
148
+ sum_distances = ancestors_1 [ancestors ] + ancestors_2 [ancestors ]
149
+ if sum_distances < min_sum_distances :
150
+ min_sum_distances = sum_distances
151
+ return [
152
+ ancestor
153
+ for ancestor in common_ancestors
154
+ if ancestors_1 [ancestor ] + ancestors_2 [ancestor ] == min_sum_distances
155
+ ]
156
+
157
+ def map_highest_level_term (self , term_ids : List [str ], high_level_terms : List [str ]) -> Dict [str , Union [str , None ]]:
158
+ """
159
+ Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with
160
+ format
161
+
162
+ {"CL:0000003": "CL:0000000", "CL:0000005": "CL:0000000"}
163
+
164
+ Where each term_id is mapped to the highest level term that it is a descendant of, from the list provided. Includes
165
+ term itself as a descendant. Maps to None if term_id does not map to any high level terms among the provided input.
166
+
167
+ :param term_ids: list of str ontology terms to map high level terms for
168
+ :param high_level_terms: list of str ontology terms that can be mapped to descendant term_ids
169
+ :return: Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list.
170
+ Each key maps to empty list if there are no ancestors among the provided input.
171
+ """
172
+ high_level_term_map = self .map_high_level_terms (term_ids , high_level_terms )
173
+ highest_level_term_map = dict ()
174
+ for term_id in term_ids :
175
+ term_ancestors_and_distances = self .get_term_ancestors_with_distances (term_id , include_self = True )
176
+ # map term_id to the high_level_term with the longest distance from term_id
177
+ highest_level_term_map [term_id ] = (
178
+ max (
179
+ high_level_term_map [term_id ],
180
+ key = lambda high_level_term : term_ancestors_and_distances [high_level_term ],
181
+ )
182
+ if high_level_term_map [term_id ]
183
+ else None
184
+ )
185
+ return highest_level_term_map
186
+
74
187
def get_terms_descendants (self , term_ids : List [str ], include_self : bool = False ) -> Dict [str , List [str ]]:
75
188
"""
76
- Get the descendant ontology terms for each term in a list. If include_self is True, the term itself will be included
77
- as a descendant.
189
+ Get the descendant ontology terms for each term in a list. If include_self is True, the term itself will be
190
+ included as a descendant.
78
191
79
192
Example: get_terms_descendants(["CL:0000003", "CL:0000005"], include_self=True) -> {
80
193
"CL:0000003": ["CL:0000003", "CL:0000004", ...],
@@ -83,8 +196,8 @@ def get_terms_descendants(self, term_ids: List[str], include_self: bool = False)
83
196
84
197
:param term_ids: list of str ontology terms to find descendants for
85
198
:param include_self: boolean flag to include the term itself as an descendant
86
- :return: Dictionary mapping str term IDs to their respective flattened List[str] of descendant terms. Maps to empty
87
- list if there are no descendants.
199
+ :return: Dictionary mapping str term IDs to their respective flattened List[str] of descendant terms. Maps to
200
+ empty list if there are no descendants.
88
201
"""
89
202
descendants_dict = dict ()
90
203
ontology_names = set ()
@@ -96,7 +209,8 @@ def get_terms_descendants(self, term_ids: List[str], include_self: bool = False)
96
209
for ontology in ontology_names :
97
210
for candidate_descendant , candidate_metadata in self .cxg_schema .ontology (ontology ).items ():
98
211
for ancestor_id in descendants_dict :
99
- if ancestor_id in candidate_metadata ["ancestors" ]:
212
+ ancestors = candidate_metadata ["ancestors" ].keys ()
213
+ if ancestor_id in ancestors :
100
214
descendants_dict [ancestor_id ].append (candidate_descendant )
101
215
102
216
return descendants_dict
0 commit comments