cellxgene_ontology_guide.ontology_parser

  1import re
  2from typing import Any, Dict, Iterable, List, Optional, Union
  3
  4from cellxgene_ontology_guide._constants import VALID_NON_ONTOLOGY_TERMS
  5from cellxgene_ontology_guide.entities import OntologyNode
  6from cellxgene_ontology_guide.supported_versions import CXGSchema
  7
  8
  9class OntologyParser:
 10    """
 11    An object to parse ontology term metadata from ontologies corresponding to a given CellxGene Schema Version.
 12    """
 13
 14    cxg_schema: CXGSchema
 15    """ CXGSchema object to fetch ontology metadata from """
 16
 17    def __init__(self, schema_version: Optional[str] = None):
 18        """
 19        Initialize an OntologyParser object with the ontology metadata corresponding to the given CellxGene schema
 20        version. If not cached, it will make a network call to GitHub Release Assets to load in memory and
 21        parse the corresponding ontology metadata.
 22
 23        :param schema_version: str version of the schema to load ontology metadata for. If not provided, the latest
 24        is loaded.
 25        """
 26        self.cxg_schema = CXGSchema(version=schema_version) if schema_version else CXGSchema()
 27        self.term_label_to_id_map: Dict[str, Dict[str, str]] = {
 28            ontology_name: dict() for ontology_name in self.cxg_schema.supported_ontologies
 29        }
 30
 31    def get_term_label_to_id_map(self, ontology_name: str) -> Dict[str, str]:
 32        """
 33        Fetch the mapping of term labels to term IDs for a given ontology. Caches generated maps by ontology_name.
 34
 35        Example
 36        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
 37        >>> ontology_parser = OntologyParser()
 38        >>> ontology_parser.get_term_label_to_id_map("CL") # doctest: +SKIP
 39        {'Label A': 'CL:0000000', ... }
 40
 41        :param ontology_name: str name of ontology to get map of term labels to term IDs
 42        """
 43        supported_ontology_name: Optional[str] = self._get_supported_ontology_name(ontology_name)
 44        if not supported_ontology_name:
 45            raise ValueError(f"{supported_ontology_name} is not a supported ontology, its metadata cannot be fetched.")
 46
 47        if self.term_label_to_id_map[supported_ontology_name]:
 48            return self.term_label_to_id_map[supported_ontology_name].copy()
 49
 50        for term_id, term_metadata in self.cxg_schema.ontology(supported_ontology_name).items():
 51            self.term_label_to_id_map[supported_ontology_name][term_metadata["label"]] = term_id
 52
 53        return self.term_label_to_id_map[supported_ontology_name].copy()
 54
 55    def _parse_ontology_name(self, term_id: str) -> str:
 56        """
 57        Parse the ontology name from a given term ID. If the term ID does not conform to the expected term format or
 58        is not from an ontology supported by cellxgene-ontology-guide, raise a ValueError.
 59
 60        :param term_id: str ontology term to parse
 61        :return: str name of ontology that term belongs to
 62        """
 63        pattern = r"[A-Za-z]+:\d+"
 64        if not re.match(pattern, term_id):
 65            raise ValueError(f"{term_id} does not conform to expected regex pattern {pattern} and cannot be queried.")
 66
 67        ontology_term_prefix = term_id.split(":")[0]
 68        ontology_name: Optional[str] = self._get_supported_ontology_name(ontology_term_prefix)
 69        if not ontology_name:
 70            raise ValueError(f"{term_id} is not part of a supported ontology, its metadata cannot be fetched.")
 71
 72        return ontology_name
 73
 74    def _get_supported_ontology_name(self, ontology_term_prefix: str) -> Optional[str]:
 75        """
 76        Get the source ontology name for a given ontology term prefix, if it is supported by the CxG schema.
 77
 78        If ontology_term_prefix is directly supported by the CxG schema, returns ontology_term_prefix.
 79        If ontology_term_prefix is supported as an import from another ontology, returns the name of the source ontology
 80        it is imported in.
 81        Otherwise, returns None.
 82
 83        :param ontology_term_prefix: str ontology term prefix to check
 84        :return: str name of ontology that term belongs to, or None if it is not directly supported nor imported in
 85        a supported ontology in the CxG schema.
 86        """
 87        if ontology_term_prefix in self.cxg_schema.supported_ontologies:
 88            return ontology_term_prefix
 89        supported_ontology_name: Optional[str] = self.cxg_schema.imported_ontologies.get(ontology_term_prefix)
 90        return supported_ontology_name
 91
 92    def is_valid_term_id(self, term_id: str, ontology: Optional[str] = None) -> bool:
 93        """
 94        Check if an ontology term ID is valid and defined in a supported ontology. If deprecated but defined
 95        in the ontology, it is considered valid. Optionally, specify an ontology to check against, and determine
 96        if the term is defined in that particular ontology. Otherwise, checks if term is valid in any supported ontology
 97
 98        :param term_id: str ontology term to check
 99        :param ontology: str name of ontology to check against
100        :return: boolean flag indicating whether the term is supported
101        """
102        try:
103            ontology_name = self._parse_ontology_name(term_id)
104            if ontology and ontology_name != ontology:
105                return False
106            if term_id in self.cxg_schema.ontology(ontology_name):
107                return True
108        except ValueError:
109            return False
110        return False
111
112    def get_term_ancestors(self, term_id: str, include_self: bool = False) -> List[str]:
113        """
114        Get the ancestor ontology terms for a given term. If include_self is True, the term itself will be included as
115        an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.
116
117        Example
118        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
119        >>> ontology_parser = OntologyParser()
120        >>> ontology_parser.get_term_ancestors("CL:0000005") # doctest: +SKIP
121        ['CL:0000000', 'CL:0000057', ...
122
123        :param term_id: str ontology term to find ancestors for
124        :param include_self: boolean flag to include the term itself as an ancestor
125        :return: flattened List[str] of ancestor terms
126        """
127        if term_id in VALID_NON_ONTOLOGY_TERMS:
128            return []
129        ontology_name = self._parse_ontology_name(term_id)
130        ancestors = list(self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"].keys())
131        return ancestors + [term_id] if include_self else ancestors
132
133    def map_term_ancestors(self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]:
134        """
135        Get the ancestor ontology terms for each term in a list. If include_self is True, the term itself will be
136        included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.
137
138        Example
139        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
140        >>> ontology_parser = OntologyParser()
141        >>> ontology_parser.map_term_ancestors(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP
142        {
143            'CL:0000003': ['CL:0000003'],
144            'CL:0000005': ['CL:0000005', 'CL:0000000', ...]
145        }
146
147        :param term_ids: list of str ontology terms to find ancestors for
148        :param include_self: boolean flag to include the term itself as an ancestor
149        :return: Dictionary mapping str term IDs to their respective flattened List[str] of ancestor terms. Maps to
150        empty
151        list if there are no ancestors.
152        """
153        return {term_id: self.get_term_ancestors(term_id, include_self) for term_id in term_ids}
154
155    def get_term_ancestors_with_distances(self, term_id: str, include_self: bool = False) -> Dict[str, int]:
156        """
157        Get the ancestor ontology terms for a given term, and their distance from the term_id. If include_self is True,
158        the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a
159        supported ontology.
160
161        Example
162        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
163        >>> ontology_parser = OntologyParser()
164        >>> ontology_parser.get_term_ancestors_with_distances("CL:0000005") # doctest: +SKIP
165        {'CL:0000057': 1, 'CL:0002320': 2, 'CL:0000000': 3}
166
167        :param term_id: str ontology term to find ancestors for
168        :param include_self: boolean flag to include the term itself as an ancestor
169        :return: Dict[str, int] map of ancestor terms and their respective distances from the term_id
170        """
171        if term_id in VALID_NON_ONTOLOGY_TERMS:
172            return {}
173        ontology_name = self._parse_ontology_name(term_id)
174        ancestors: Dict[str, int] = self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"].copy()
175        return ancestors | {term_id: 0} if include_self else ancestors
176
177    def map_term_ancestors_with_distances(
178        self, term_ids: Iterable[str], include_self: bool = False
179    ) -> Dict[str, Dict[str, int]]:
180        """
181        Get the ancestor ontology terms for each term in a list, and their distance from the term_id. If include_self is
182        True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a
183        supported ontology.
184
185        Example
186        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
187        >>> ontology_parser = OntologyParser()
188        >>> ontology_parser.map_term_ancestors_with_distances(["CL:0000003", "CL:0000005"], include_self=True)
189        {'CL:0000003': {'CL:0000003': 0}, 'CL:0000005': {'CL:0000057': 1, 'CL:0002320': 2, 'CL:0000000': 3,
190        'CL:0000005': 0}}
191
192        :param term_ids: list of str ontology terms to find ancestors for
193        :param include_self: boolean flag to include the term itself as an ancestor
194        :return: Dictionary mapping str term IDs to their respective Dict[str, int] map of ancestor terms and their
195        respective distances from the term_id
196        """
197        return {term_id: self.get_term_ancestors_with_distances(term_id, include_self) for term_id in term_ids}
198
199    def get_term_parents(self, term_id: str) -> List[str]:
200        """
201        Get the direct parent ontology terms for a given term. Raises ValueError if the term ID is not valid member of
202        a supported ontology.
203
204        Example
205        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
206        >>> ontology_parser = OntologyParser()
207        >>> ontology_parser.get_term_parents("CL:0000101")
208        ['CL:0000526']
209
210        :param term_id: str ontology term to find parents for
211        :return: List[str] of parent terms
212        """
213        if term_id in VALID_NON_ONTOLOGY_TERMS:
214            return []
215        ontology_name = self._parse_ontology_name(term_id)
216        ancestors: Dict[str, int] = self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"]
217        parents: List[str] = [ancestor for ancestor, distance in ancestors.items() if distance == 1]
218        return parents
219
220    def get_distance_between_terms(self, term_id_1: str, term_id_2: str) -> int:
221        """
222        Get the distance between two ontology terms. The distance is defined as the number of edges between the
223        two terms. Terms must be from the same ontology. Returns -1 if terms are disjoint.
224        Raises ValueError if term IDs are not valid members of a supported ontology.
225
226        :param term_id_1: str ontology term to find distance for
227        :param term_id_2: str ontology term to find distance for
228        :return: int distance between the two terms, measured in number of edges between their shortest path.
229        """
230        lcas = self.get_lowest_common_ancestors(term_id_1, term_id_2)
231        if not lcas:
232            return -1
233        return int(
234            self.get_term_ancestors_with_distances(term_id_1, include_self=True)[lcas[0]]
235            + self.get_term_ancestors_with_distances(term_id_2, include_self=True)[lcas[0]]
236        )
237
238    def get_lowest_common_ancestors(self, term_id_1: str, term_id_2: str) -> List[str]:
239        """
240        Get the lowest common ancestors between two ontology terms that is from the given ontology.
241        Terms must be from the same ontology. Ontologies are DAGs, so there may be multiple lowest common ancestors.
242        Raises ValueError if term IDs are not valid members of a supported ontology.
243
244        :param term_id_1: str ontology term to find LCA for
245        :param term_id_2: str ontology term to find LCA for
246        :return: str term ID of the lowest common ancestor term
247        """
248        # include path to term itself
249        ontology = self._parse_ontology_name(term_id_1)
250        if ontology != self._parse_ontology_name(term_id_2):
251            return []
252        ancestors_1 = self.get_term_ancestors_with_distances(term_id_1, include_self=True)
253        ancestors_2 = self.get_term_ancestors_with_distances(term_id_2, include_self=True)
254        common_ancestors = set(ancestors_1.keys()) & set(ancestors_2.keys())
255        min_sum_distances = float("inf")
256        for ancestors in common_ancestors:
257            sum_distances = ancestors_1[ancestors] + ancestors_2[ancestors]
258            if sum_distances < min_sum_distances:
259                min_sum_distances = sum_distances
260        return [
261            ancestor
262            for ancestor in common_ancestors
263            if ancestors_1[ancestor] + ancestors_2[ancestor] == min_sum_distances
264        ]
265
266    def get_high_level_terms(self, term_id: str, high_level_terms: List[str]) -> List[str]:
267        """
268        Get the high-level ontology terms for a given term. High-level terms are defined as the ancestors of the term
269        that are part of the high-level ontology terms supported by cellxgene-ontology-guide. If more than 1
270        high_level_term is matched, the returned list of matches preserves the order of the input high_level_terms list.
271        Raises ValueError if term ID is not valid member of a supported ontology.
272
273        Example
274        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
275        >>> ontology_parser = OntologyParser()
276        >>> ontology_parser.get_high_level_terms("CL:0000005", ["CL:0000000", "CL:0000001"])
277        ['CL:0000000']
278
279        :param term_id: str ontology term to find high-level terms for
280        :param high_level_terms: list of str ontology terms to check for ancestry to term_id
281        :return: List[str] of high-level terms that the term is a descendant of
282        """
283        if term_id in VALID_NON_ONTOLOGY_TERMS:
284            return []
285        ancestors = self.get_term_ancestors(term_id, include_self=True)
286        return [high_level_term for high_level_term in high_level_terms if high_level_term in ancestors]
287
288    def map_high_level_terms(self, term_ids: List[str], high_level_terms: List[str]) -> Dict[str, List[str]]:
289        """
290        Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with
291        format
292
293        {"CL:0000003": ["CL:0000000", ...], "CL:0000005": ["CL:0000000", ...]}
294
295        Where each term_id is mapped to a List[str] of high-level terms that it is a descendant of. Includes self
296        as a descendant. Raises ValueError if term ID is not valid member of a supported ontology.
297
298        :param term_ids: list of str ontology terms to map high level terms for
299        :param high_level_terms: list of str ontology terms to be mapped to descendant term_ids
300        :return: Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list.
301        Each key maps to empty list if there are no ancestors among the provided input.
302        """
303        return {term_id: self.get_high_level_terms(term_id, high_level_terms) for term_id in term_ids}
304
305    def get_highest_level_term(self, term_id: str, high_level_terms: List[str]) -> Union[str, None]:
306        """
307        Get the highest level ontology term for a given term. The highest level term is defined as the ancestor of the
308        term that is part of the high-level ontology terms supported by cellxgene-ontology-guide. Raises ValueError
309        if term ID is not valid member of a supported ontology.
310
311        Example
312        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
313        >>> ontology_parser = OntologyParser()
314        >>> ontology_parser.get_highest_level_term("CL:0000005", ["CL:0000000", "CL:0000001"])
315        'CL:0000000'
316
317        :param term_id: str ontology term to find highest level term for
318        :param high_level_terms: list of str ontology terms to check for ancestry to term_id
319        :return: str highest level term that the term is a descendant of, or None if it is not a descendant of any
320        high-level terms
321        """
322        high_level_terms = self.get_high_level_terms(term_id, high_level_terms)
323        term_ancestors_and_distances = self.get_term_ancestors_with_distances(term_id, include_self=True)
324        if not high_level_terms:
325            return None
326        return max(high_level_terms, key=lambda high_level_term: term_ancestors_and_distances[high_level_term])
327
328    def map_highest_level_term(self, term_ids: List[str], high_level_terms: List[str]) -> Dict[str, Union[str, None]]:
329        """
330        Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with
331        format
332
333        {"CL:0000003": "CL:0000000", "CL:0000005": "CL:0000000"}
334
335        Where each term_id is mapped to the highest level term that it is a descendant of, from the list provided.
336        Includes term itself as a descendant. Maps to None if term_id does not map to any high level terms among the
337        provided input. Raises ValueError if term ID is not valid member of a supported ontology.
338
339        :param term_ids: list of str ontology terms to map high level terms for
340        :param high_level_terms: list of str ontology terms that can be mapped to descendant term_ids
341        :return: Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list.
342        Each key maps to empty list if there are no ancestors among the provided input.
343        """
344        return {term_id: self.get_highest_level_term(term_id, high_level_terms) for term_id in term_ids}
345
346    def get_term_descendants(self, term_id: str, include_self: bool = False) -> List[str]:
347        """
348        Get the descendant ontology terms for a given term. If include_self is True, the term itself will be included as
349        a descendant. Raises ValueError if term ID is not valid member of a supported ontology.
350
351        Example
352        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
353        >>> ontology_parser = OntologyParser()
354        >>> ontology_parser.get_term_descendants("CL:0000005") # doctest: +SKIP
355        ['CL:0002363']
356
357        :param term_id: str ontology term to find descendants for
358        :param include_self: boolean flag to include the term itself as a descendant
359        :return: List[str] of descendant terms
360        """
361        if term_id in VALID_NON_ONTOLOGY_TERMS:
362            return []
363        ontology_name = self._parse_ontology_name(term_id)
364        descendants = [term_id] if include_self else []
365        for candidate_descendant, candidate_metadata in self.cxg_schema.ontology(ontology_name).items():
366            ancestors = candidate_metadata["ancestors"].keys()
367            if term_id in ancestors:
368                descendants.append(candidate_descendant)
369        return descendants
370
371    def map_term_descendants(self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]:
372        """
373        Get the descendant ontology terms for each term in a list. If include_self is True, the term itself will be
374         included as a descendant. Raises ValueError if term ID is not valid member of a supported ontology.
375
376        Example
377        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
378        >>> ontology_parser = OntologyParser()
379        >>> ontology_parser.map_term_descendants(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP
380        {
381            'CL:0000003': ['CL:0000003', ...],
382            'CL:0000005': ['CL:0000005', 'CL:0002363', ...]
383        }
384
385        :param term_ids: list of str ontology terms to find descendants for
386        :param include_self: boolean flag to include the term itself as an descendant
387        :return: Dictionary mapping str term IDs to their respective flattened List[str] of descendant terms. Maps to
388        empty list if there are no descendants.
389        """
390        descendants_dict: Dict[str, List[str]] = dict()
391        ontology_names = set()
392        for term_id in term_ids:
393            if term_id in VALID_NON_ONTOLOGY_TERMS:
394                descendants_dict[term_id] = []
395                continue
396            ontology_name = self._parse_ontology_name(term_id)
397            descendants_dict[term_id] = [term_id] if include_self else []
398            ontology_names.add(ontology_name)
399
400        for ontology in ontology_names:
401            for candidate_descendant, candidate_metadata in self.cxg_schema.ontology(ontology).items():
402                for ancestor_id in descendants_dict:
403                    ancestors = candidate_metadata["ancestors"].keys()
404                    if ancestor_id in ancestors:
405                        descendants_dict[ancestor_id].append(candidate_descendant)
406
407        return descendants_dict
408
409    def get_term_children(self, term_id: str) -> List[str]:
410        """
411        Get the direct children ontology terms for a given term. Raises ValueError if term ID is not valid member of a
412        supported ontology.
413
414        Example
415        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
416        >>> ontology_parser = OntologyParser()
417        >>> ontology_parser.get_term_children("CL:0000526")
418        ['CL:0000101']
419
420        :param term_id: str ontology term to find children for
421        :return: List[str] of children terms
422        """
423        if term_id in VALID_NON_ONTOLOGY_TERMS:
424            return []
425        ontology_name = self._parse_ontology_name(term_id)
426        children = []
427        for candidate_child, candidate_metadata in self.cxg_schema.ontology(ontology_name).items():
428            for ancestor, distance in candidate_metadata["ancestors"].items():
429                if ancestor == term_id and distance == 1:
430                    children.append(candidate_child)
431        return children
432
433    def get_term_graph(self, term_id: str) -> OntologyNode:
434        """
435        Get the DAG of OntologyNode relationships, with the input term as the root node. Only includes terms from the
436        same ontology as the root term ID. Raises ValueError if term ID is not valid member of a supported ontology.
437
438        Example
439        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
440        >>> ontology_parser = OntologyParser()
441        >>> root_node = ontology_parser.get_term_graph("CL:0000000")
442        >>> root_node.term_id
443        'CL:0000000'
444        >>> root_node.to_dict() # doctest: +SKIP
445        {
446            "term_id": "CL:0000000",
447            "name": "cell A",
448            "children": [
449                {
450                    "term_id": "CL:0000001",
451                    "name": "cell B",
452                    "children": [...],
453                },
454                {
455                    "term_id": "CL:0000002",
456                    "name": "cell C",
457                    "children": [...],
458                },
459                ...
460            ]
461        }
462        >>> root_node.term_counter # doctest: +SKIP
463        Counter({'CL:0002058': 48, 'CL:0002471': 48, ...
464
465        :param term_id: str ontology term to build subtree for
466        :return: OntologyNode representation of graph with term_id as root.
467        """
468        term_label = self.get_term_label(term_id)
469        root = OntologyNode(term_id, term_label)
470        for child_term_id in self.get_term_children(term_id):
471            root.add_child(self.get_term_graph(child_term_id))
472        return root
473
474    def is_term_deprecated(self, term_id: str) -> bool:
475        """
476        Check if an ontology term is deprecated. Raises ValueError if term ID is not valid member of a supported
477        ontology.
478
479        Example
480        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
481        >>> ontology_parser = OntologyParser()
482        >>> ontology_parser.is_term_deprecated("CL:0000003")
483        True
484
485        :param term_id: str ontology term to check for deprecation
486        :return: boolean flag indicating whether the term is deprecated
487        """
488        if term_id in VALID_NON_ONTOLOGY_TERMS:
489            return False
490        ontology_name = self._parse_ontology_name(term_id)
491        is_deprecated: bool = self.cxg_schema.ontology(ontology_name)[term_id].get("deprecated")
492        return is_deprecated
493
494    def get_term_replacement(self, term_id: str) -> Union[str, None]:
495        """
496        Fetch the replacement term for a deprecated ontology term, if a replacement exists. Return None otherwise.
497        Raises ValueError if term ID is not valid member of a supported ontology.
498
499        Example
500        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
501        >>> ontology_parser = OntologyParser()
502        >>> ontology_parser.get_term_replacement("CL:0000003")
503        'CL:0000000'
504
505        :param term_id: str ontology term to check a replacement term for
506        :return: replacement str term ID if it exists, None otherwise
507        """
508        if term_id in VALID_NON_ONTOLOGY_TERMS:
509            return None
510        ontology_name = self._parse_ontology_name(term_id)
511        replaced_by: str = self.cxg_schema.ontology(ontology_name)[term_id].get("replaced_by")
512        return replaced_by if replaced_by else None
513
514    def get_term_metadata(self, term_id: str) -> Dict[str, Any]:
515        """
516        Fetch metadata for a given ontology term. Returns a dict with format
517
518        {"comments": ["...", ...], "term_tracker": "...", "consider": ["...", ...]}
519
520        Comments maps to List[str] of ontology curator comments
521        Term Tracker maps to a str url where there is discussion around this term's curation (or deprecation).
522        Consider maps to List[str] of alternate ontology terms to consider using instead of this term
523
524        All keys map to None if no metadata of that type is present. Raises ValueError if term ID is not valid member
525        of a supported ontology.
526
527        :param term_id: str ontology term to fetch metadata for
528        :return: Dict with keys 'Comments', 'Term Tracker', and 'Consider' containing associated metadata.
529        """
530        if term_id in VALID_NON_ONTOLOGY_TERMS:
531            return {"comments": None, "term_tracker": None, "consider": None}
532        ontology_name = self._parse_ontology_name(term_id)
533        return {
534            key: self.cxg_schema.ontology(ontology_name)[term_id].get(key, None)
535            for key in {"comments", "term_tracker", "consider"}
536        }
537
538    def get_term_label(self, term_id: str) -> str:
539        """
540        Fetch the human-readable label for a given ontology term. Raises ValueError if term ID is not valid member of a
541        supported ontology.
542
543        Example
544        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
545        >>> ontology_parser = OntologyParser()
546        >>> ontology_parser.get_term_label("CL:0000005")
547        'neural crest derived fibroblast'
548
549        :param term_id: str ontology term to fetch label for
550        :return: str human-readable label for the term
551        """
552        if term_id in VALID_NON_ONTOLOGY_TERMS:
553            return term_id
554        ontology_name = self._parse_ontology_name(term_id)
555        label: str = self.cxg_schema.ontology(ontology_name)[term_id]["label"]
556        return label
557
558    def map_term_labels(self, term_ids: Iterable[str]) -> Dict[str, str]:
559        """
560        Fetch the human-readable label for a given list of ontology terms. Raises ValueError if term ID is not valid
561        member of a supported ontology.
562
563        Example
564        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
565        >>> ontology_parser = OntologyParser()
566        >>> ontology_parser.map_term_labels(["CL:0000005", "CL:0000003"])
567        {'CL:0000005': 'neural crest derived fibroblast', 'CL:0000003': 'obsolete native cell'}
568
569        :param term_ids: list of str ontology terms to fetch label for
570        :return: Dict[str, str] mapping term IDs to their respective human-readable labels
571        """
572        return {term_id: self.get_term_label(term_id) for term_id in term_ids}
573
574    def get_term_description(self, term_id: str) -> Optional[str]:
575        """
576        Fetch the description for a given ontology term. Raises ValueError if term ID is not valid member of a
577        supported ontology.
578
579        Example
580        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
581        >>> ontology_parser = OntologyParser()
582        >>> ontology_parser.get_term_description("CL:0000005")
583        'Any fibroblast that is derived from the neural crest.'
584
585        :param term_id: str ontology term to fetch description for
586        :return: str description for the term
587        """
588        if term_id in VALID_NON_ONTOLOGY_TERMS:
589            return term_id
590        ontology_name = self._parse_ontology_name(term_id)
591        description: Optional[str] = self.cxg_schema.ontology(ontology_name)[term_id].get("description", None)
592        return description
593
594    def map_term_descriptions(self, term_ids: List[str]) -> Dict[str, Optional[str]]:
595        """
596        Fetch the descriptions for a given list of ontology terms. Raises ValueError if term ID is not valid member of
597        a supported ontology.
598
599        Example
600        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
601        >>> ontology_parser = OntologyParser()
602        >>> ontology_parser.map_term_descriptions(["CL:0000005", "CL:0000146"])
603        {'CL:0000005': 'Any fibroblast that is derived from the neural crest.', 'CL:0000146': None}
604
605        :param term_ids: list of str ontology terms to fetch descriptions for
606        :return: Dict[str, str] mapping term IDs to their respective descriptions
607        """
608        return {term_id: self.get_term_description(term_id) for term_id in term_ids}
609
610    def get_term_synonyms(self, term_id: str) -> List[str]:
611        """
612        Fetch a list of synonym labels for a given ontology term. Returns empty list if no synonyms found.
613        Raises ValueError if term ID is not valid member of a supported ontology.
614
615        Example
616        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
617        >>> ontology_parser = OntologyParser()
618        >>> ontology_parser.get_term_synonyms("CL:0000019")
619        ['sperm cell', 'spermatozoid', 'spermatozoon']
620
621        :param term_id: str ontology term to fetch synonyms for
622        :return: List[str] synonyms for the term
623        """
624        if term_id in VALID_NON_ONTOLOGY_TERMS:
625            return []
626        ontology_name = self._parse_ontology_name(term_id)
627        synonyms: List[str] = list(self.cxg_schema.ontology(ontology_name)[term_id].get("synonyms", []))
628        return synonyms
629
630    def map_term_synonyms(self, term_ids: List[str]) -> Dict[str, List[str]]:
631        """
632        Fetch the synonym labels for a given list of ontology terms. Raises ValueError if term ID is not valid member of
633        a supported ontology.
634
635        Example
636        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
637        >>> ontology_parser = OntologyParser()
638        >>> ontology_parser.map_term_synonyms(["CL:0000005", "CL:0000019"])
639        {'CL:0000005': ['fibroblast neural crest derived'], 'CL:0000019': ['sperm cell', 'spermatozoid', 'spermatozoon']}
640
641        :param term_ids: list of str ontology terms to fetch synonyms for
642        :return: Dict[str, List[str]] mapping term IDs to their respective synonym lists
643        """
644        return {term_id: self.get_term_synonyms(term_id) for term_id in term_ids}
645
646    def get_term_id_by_label(self, term_label: str, ontology_name: str) -> Optional[str]:
647        """
648        Fetch the ontology term ID from a given human-readable label. Filters by ontology_name. Raises ValueError if
649        ontology_name is not a supported ontology.
650
651        Returns None if term ID is not valid member of a supported ontology.
652
653        Example
654        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
655        >>> ontology_parser = OntologyParser()
656        >>> ontology_parser.get_term_id_by_label("neural crest derived fibroblast", "CL")
657        'CL:0000005'
658
659        :param term_label: str human-readable label to fetch term ID for
660        :param ontology_name: str name of ontology to search for term label in
661        :return: Optional[str] term IDs with that label, or None if the label is not found in the ontology
662        """
663        ontology_term_label_to_id_map = self.get_term_label_to_id_map(ontology_name)
664        return ontology_term_label_to_id_map.get(term_label)
665
666    def get_bridge_term_id(self, term_id: str, cross_ontology: str) -> Optional[str]:
667        """
668        For a given term ID, fetch the equivalent term ID from a given ontology. Only returns exact match if it exists.
669
670        If no applicable match is found, returns None.
671
672        Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology.
673
674        Example
675        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
676        >>> ontology_parser = OntologyParser()
677        >>> ontology_parser.get_bridge_term_id("FBbt:00000001", "UBERON")
678        'UBERON:0000468'
679
680        :param term_id: str ontology term to find equivalent term for
681        :param cross_ontology: str name of ontology to search for equivalent term in
682        :return: Optional[str] equivalent term ID from the cross_ontology
683        """
684        if cross_ontology not in self.cxg_schema.cross_ontology_mappings:
685            raise ValueError(
686                f"{cross_ontology} is not in the set of supported cross ontology mappings "
687                f"{self.cxg_schema.cross_ontology_mappings}."
688            )
689        ontology_name = self._parse_ontology_name(term_id)
690        cross_ontology_terms = self.cxg_schema.ontology(ontology_name)[term_id].get("cross_ontology_terms")
691        bridge_term_id: Optional[str] = None
692        if cross_ontology_terms:
693            bridge_term_id = cross_ontology_terms.get(cross_ontology)
694        return bridge_term_id
695
696    def get_closest_bridge_term_ids(self, term_id: str, cross_ontology: str) -> List[str]:
697        """
698        For a given term ID, fetch the equivalent term ID from a given ontology. If match is found,
699        returns a list of 1 with the exact match. If no exact match is found, traverses the ancestors
700        of the term for the closest match.
701
702        If no applicable match is found, returns an empty list.
703
704        If multiple ancestors of the same distance have matches, returns all possible closest matches.
705
706        Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology.
707
708        Example
709        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
710        >>> ontology_parser = OntologyParser()
711        >>> ontology_parser.get_closest_bridge_term_ids("FBbt:00000039", "UBERON")
712        ['UBERON:0000476', 'UBERON:0000920']
713
714        :param term_id: str ontology term to find closest term for
715        :param cross_ontology: str name of ontology to search for closest term in
716        :return: List[str] list of closest term IDs from the cross_ontology
717        """
718        closest_bridge_terms: List[str] = []
719        terms_to_match = [term_id]
720        while terms_to_match and not closest_bridge_terms:
721            for term in terms_to_match:
722                if closest_bridge_term := self.get_bridge_term_id(term, cross_ontology):
723                    closest_bridge_terms.append(closest_bridge_term)
724            terms_to_match = [parent for child in terms_to_match for parent in self.get_term_parents(child)]
725        return closest_bridge_terms
class OntologyParser:
 10class OntologyParser:
 11    """
 12    An object to parse ontology term metadata from ontologies corresponding to a given CellxGene Schema Version.
 13    """
 14
 15    cxg_schema: CXGSchema
 16    """ CXGSchema object to fetch ontology metadata from """
 17
 18    def __init__(self, schema_version: Optional[str] = None):
 19        """
 20        Initialize an OntologyParser object with the ontology metadata corresponding to the given CellxGene schema
 21        version. If not cached, it will make a network call to GitHub Release Assets to load in memory and
 22        parse the corresponding ontology metadata.
 23
 24        :param schema_version: str version of the schema to load ontology metadata for. If not provided, the latest
 25        is loaded.
 26        """
 27        self.cxg_schema = CXGSchema(version=schema_version) if schema_version else CXGSchema()
 28        self.term_label_to_id_map: Dict[str, Dict[str, str]] = {
 29            ontology_name: dict() for ontology_name in self.cxg_schema.supported_ontologies
 30        }
 31
 32    def get_term_label_to_id_map(self, ontology_name: str) -> Dict[str, str]:
 33        """
 34        Fetch the mapping of term labels to term IDs for a given ontology. Caches generated maps by ontology_name.
 35
 36        Example
 37        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
 38        >>> ontology_parser = OntologyParser()
 39        >>> ontology_parser.get_term_label_to_id_map("CL") # doctest: +SKIP
 40        {'Label A': 'CL:0000000', ... }
 41
 42        :param ontology_name: str name of ontology to get map of term labels to term IDs
 43        """
 44        supported_ontology_name: Optional[str] = self._get_supported_ontology_name(ontology_name)
 45        if not supported_ontology_name:
 46            raise ValueError(f"{supported_ontology_name} is not a supported ontology, its metadata cannot be fetched.")
 47
 48        if self.term_label_to_id_map[supported_ontology_name]:
 49            return self.term_label_to_id_map[supported_ontology_name].copy()
 50
 51        for term_id, term_metadata in self.cxg_schema.ontology(supported_ontology_name).items():
 52            self.term_label_to_id_map[supported_ontology_name][term_metadata["label"]] = term_id
 53
 54        return self.term_label_to_id_map[supported_ontology_name].copy()
 55
 56    def _parse_ontology_name(self, term_id: str) -> str:
 57        """
 58        Parse the ontology name from a given term ID. If the term ID does not conform to the expected term format or
 59        is not from an ontology supported by cellxgene-ontology-guide, raise a ValueError.
 60
 61        :param term_id: str ontology term to parse
 62        :return: str name of ontology that term belongs to
 63        """
 64        pattern = r"[A-Za-z]+:\d+"
 65        if not re.match(pattern, term_id):
 66            raise ValueError(f"{term_id} does not conform to expected regex pattern {pattern} and cannot be queried.")
 67
 68        ontology_term_prefix = term_id.split(":")[0]
 69        ontology_name: Optional[str] = self._get_supported_ontology_name(ontology_term_prefix)
 70        if not ontology_name:
 71            raise ValueError(f"{term_id} is not part of a supported ontology, its metadata cannot be fetched.")
 72
 73        return ontology_name
 74
 75    def _get_supported_ontology_name(self, ontology_term_prefix: str) -> Optional[str]:
 76        """
 77        Get the source ontology name for a given ontology term prefix, if it is supported by the CxG schema.
 78
 79        If ontology_term_prefix is directly supported by the CxG schema, returns ontology_term_prefix.
 80        If ontology_term_prefix is supported as an import from another ontology, returns the name of the source ontology
 81        it is imported in.
 82        Otherwise, returns None.
 83
 84        :param ontology_term_prefix: str ontology term prefix to check
 85        :return: str name of ontology that term belongs to, or None if it is not directly supported nor imported in
 86        a supported ontology in the CxG schema.
 87        """
 88        if ontology_term_prefix in self.cxg_schema.supported_ontologies:
 89            return ontology_term_prefix
 90        supported_ontology_name: Optional[str] = self.cxg_schema.imported_ontologies.get(ontology_term_prefix)
 91        return supported_ontology_name
 92
 93    def is_valid_term_id(self, term_id: str, ontology: Optional[str] = None) -> bool:
 94        """
 95        Check if an ontology term ID is valid and defined in a supported ontology. If deprecated but defined
 96        in the ontology, it is considered valid. Optionally, specify an ontology to check against, and determine
 97        if the term is defined in that particular ontology. Otherwise, checks if term is valid in any supported ontology
 98
 99        :param term_id: str ontology term to check
100        :param ontology: str name of ontology to check against
101        :return: boolean flag indicating whether the term is supported
102        """
103        try:
104            ontology_name = self._parse_ontology_name(term_id)
105            if ontology and ontology_name != ontology:
106                return False
107            if term_id in self.cxg_schema.ontology(ontology_name):
108                return True
109        except ValueError:
110            return False
111        return False
112
113    def get_term_ancestors(self, term_id: str, include_self: bool = False) -> List[str]:
114        """
115        Get the ancestor ontology terms for a given term. If include_self is True, the term itself will be included as
116        an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.
117
118        Example
119        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
120        >>> ontology_parser = OntologyParser()
121        >>> ontology_parser.get_term_ancestors("CL:0000005") # doctest: +SKIP
122        ['CL:0000000', 'CL:0000057', ...
123
124        :param term_id: str ontology term to find ancestors for
125        :param include_self: boolean flag to include the term itself as an ancestor
126        :return: flattened List[str] of ancestor terms
127        """
128        if term_id in VALID_NON_ONTOLOGY_TERMS:
129            return []
130        ontology_name = self._parse_ontology_name(term_id)
131        ancestors = list(self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"].keys())
132        return ancestors + [term_id] if include_self else ancestors
133
134    def map_term_ancestors(self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]:
135        """
136        Get the ancestor ontology terms for each term in a list. If include_self is True, the term itself will be
137        included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.
138
139        Example
140        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
141        >>> ontology_parser = OntologyParser()
142        >>> ontology_parser.map_term_ancestors(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP
143        {
144            'CL:0000003': ['CL:0000003'],
145            'CL:0000005': ['CL:0000005', 'CL:0000000', ...]
146        }
147
148        :param term_ids: list of str ontology terms to find ancestors for
149        :param include_self: boolean flag to include the term itself as an ancestor
150        :return: Dictionary mapping str term IDs to their respective flattened List[str] of ancestor terms. Maps to
151        empty
152        list if there are no ancestors.
153        """
154        return {term_id: self.get_term_ancestors(term_id, include_self) for term_id in term_ids}
155
156    def get_term_ancestors_with_distances(self, term_id: str, include_self: bool = False) -> Dict[str, int]:
157        """
158        Get the ancestor ontology terms for a given term, and their distance from the term_id. If include_self is True,
159        the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a
160        supported ontology.
161
162        Example
163        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
164        >>> ontology_parser = OntologyParser()
165        >>> ontology_parser.get_term_ancestors_with_distances("CL:0000005") # doctest: +SKIP
166        {'CL:0000057': 1, 'CL:0002320': 2, 'CL:0000000': 3}
167
168        :param term_id: str ontology term to find ancestors for
169        :param include_self: boolean flag to include the term itself as an ancestor
170        :return: Dict[str, int] map of ancestor terms and their respective distances from the term_id
171        """
172        if term_id in VALID_NON_ONTOLOGY_TERMS:
173            return {}
174        ontology_name = self._parse_ontology_name(term_id)
175        ancestors: Dict[str, int] = self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"].copy()
176        return ancestors | {term_id: 0} if include_self else ancestors
177
178    def map_term_ancestors_with_distances(
179        self, term_ids: Iterable[str], include_self: bool = False
180    ) -> Dict[str, Dict[str, int]]:
181        """
182        Get the ancestor ontology terms for each term in a list, and their distance from the term_id. If include_self is
183        True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a
184        supported ontology.
185
186        Example
187        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
188        >>> ontology_parser = OntologyParser()
189        >>> ontology_parser.map_term_ancestors_with_distances(["CL:0000003", "CL:0000005"], include_self=True)
190        {'CL:0000003': {'CL:0000003': 0}, 'CL:0000005': {'CL:0000057': 1, 'CL:0002320': 2, 'CL:0000000': 3,
191        'CL:0000005': 0}}
192
193        :param term_ids: list of str ontology terms to find ancestors for
194        :param include_self: boolean flag to include the term itself as an ancestor
195        :return: Dictionary mapping str term IDs to their respective Dict[str, int] map of ancestor terms and their
196        respective distances from the term_id
197        """
198        return {term_id: self.get_term_ancestors_with_distances(term_id, include_self) for term_id in term_ids}
199
200    def get_term_parents(self, term_id: str) -> List[str]:
201        """
202        Get the direct parent ontology terms for a given term. Raises ValueError if the term ID is not valid member of
203        a supported ontology.
204
205        Example
206        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
207        >>> ontology_parser = OntologyParser()
208        >>> ontology_parser.get_term_parents("CL:0000101")
209        ['CL:0000526']
210
211        :param term_id: str ontology term to find parents for
212        :return: List[str] of parent terms
213        """
214        if term_id in VALID_NON_ONTOLOGY_TERMS:
215            return []
216        ontology_name = self._parse_ontology_name(term_id)
217        ancestors: Dict[str, int] = self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"]
218        parents: List[str] = [ancestor for ancestor, distance in ancestors.items() if distance == 1]
219        return parents
220
221    def get_distance_between_terms(self, term_id_1: str, term_id_2: str) -> int:
222        """
223        Get the distance between two ontology terms. The distance is defined as the number of edges between the
224        two terms. Terms must be from the same ontology. Returns -1 if terms are disjoint.
225        Raises ValueError if term IDs are not valid members of a supported ontology.
226
227        :param term_id_1: str ontology term to find distance for
228        :param term_id_2: str ontology term to find distance for
229        :return: int distance between the two terms, measured in number of edges between their shortest path.
230        """
231        lcas = self.get_lowest_common_ancestors(term_id_1, term_id_2)
232        if not lcas:
233            return -1
234        return int(
235            self.get_term_ancestors_with_distances(term_id_1, include_self=True)[lcas[0]]
236            + self.get_term_ancestors_with_distances(term_id_2, include_self=True)[lcas[0]]
237        )
238
239    def get_lowest_common_ancestors(self, term_id_1: str, term_id_2: str) -> List[str]:
240        """
241        Get the lowest common ancestors between two ontology terms that is from the given ontology.
242        Terms must be from the same ontology. Ontologies are DAGs, so there may be multiple lowest common ancestors.
243        Raises ValueError if term IDs are not valid members of a supported ontology.
244
245        :param term_id_1: str ontology term to find LCA for
246        :param term_id_2: str ontology term to find LCA for
247        :return: str term ID of the lowest common ancestor term
248        """
249        # include path to term itself
250        ontology = self._parse_ontology_name(term_id_1)
251        if ontology != self._parse_ontology_name(term_id_2):
252            return []
253        ancestors_1 = self.get_term_ancestors_with_distances(term_id_1, include_self=True)
254        ancestors_2 = self.get_term_ancestors_with_distances(term_id_2, include_self=True)
255        common_ancestors = set(ancestors_1.keys()) & set(ancestors_2.keys())
256        min_sum_distances = float("inf")
257        for ancestors in common_ancestors:
258            sum_distances = ancestors_1[ancestors] + ancestors_2[ancestors]
259            if sum_distances < min_sum_distances:
260                min_sum_distances = sum_distances
261        return [
262            ancestor
263            for ancestor in common_ancestors
264            if ancestors_1[ancestor] + ancestors_2[ancestor] == min_sum_distances
265        ]
266
267    def get_high_level_terms(self, term_id: str, high_level_terms: List[str]) -> List[str]:
268        """
269        Get the high-level ontology terms for a given term. High-level terms are defined as the ancestors of the term
270        that are part of the high-level ontology terms supported by cellxgene-ontology-guide. If more than 1
271        high_level_term is matched, the returned list of matches preserves the order of the input high_level_terms list.
272        Raises ValueError if term ID is not valid member of a supported ontology.
273
274        Example
275        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
276        >>> ontology_parser = OntologyParser()
277        >>> ontology_parser.get_high_level_terms("CL:0000005", ["CL:0000000", "CL:0000001"])
278        ['CL:0000000']
279
280        :param term_id: str ontology term to find high-level terms for
281        :param high_level_terms: list of str ontology terms to check for ancestry to term_id
282        :return: List[str] of high-level terms that the term is a descendant of
283        """
284        if term_id in VALID_NON_ONTOLOGY_TERMS:
285            return []
286        ancestors = self.get_term_ancestors(term_id, include_self=True)
287        return [high_level_term for high_level_term in high_level_terms if high_level_term in ancestors]
288
289    def map_high_level_terms(self, term_ids: List[str], high_level_terms: List[str]) -> Dict[str, List[str]]:
290        """
291        Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with
292        format
293
294        {"CL:0000003": ["CL:0000000", ...], "CL:0000005": ["CL:0000000", ...]}
295
296        Where each term_id is mapped to a List[str] of high-level terms that it is a descendant of. Includes self
297        as a descendant. Raises ValueError if term ID is not valid member of a supported ontology.
298
299        :param term_ids: list of str ontology terms to map high level terms for
300        :param high_level_terms: list of str ontology terms to be mapped to descendant term_ids
301        :return: Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list.
302        Each key maps to empty list if there are no ancestors among the provided input.
303        """
304        return {term_id: self.get_high_level_terms(term_id, high_level_terms) for term_id in term_ids}
305
306    def get_highest_level_term(self, term_id: str, high_level_terms: List[str]) -> Union[str, None]:
307        """
308        Get the highest level ontology term for a given term. The highest level term is defined as the ancestor of the
309        term that is part of the high-level ontology terms supported by cellxgene-ontology-guide. Raises ValueError
310        if term ID is not valid member of a supported ontology.
311
312        Example
313        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
314        >>> ontology_parser = OntologyParser()
315        >>> ontology_parser.get_highest_level_term("CL:0000005", ["CL:0000000", "CL:0000001"])
316        'CL:0000000'
317
318        :param term_id: str ontology term to find highest level term for
319        :param high_level_terms: list of str ontology terms to check for ancestry to term_id
320        :return: str highest level term that the term is a descendant of, or None if it is not a descendant of any
321        high-level terms
322        """
323        high_level_terms = self.get_high_level_terms(term_id, high_level_terms)
324        term_ancestors_and_distances = self.get_term_ancestors_with_distances(term_id, include_self=True)
325        if not high_level_terms:
326            return None
327        return max(high_level_terms, key=lambda high_level_term: term_ancestors_and_distances[high_level_term])
328
329    def map_highest_level_term(self, term_ids: List[str], high_level_terms: List[str]) -> Dict[str, Union[str, None]]:
330        """
331        Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with
332        format
333
334        {"CL:0000003": "CL:0000000", "CL:0000005": "CL:0000000"}
335
336        Where each term_id is mapped to the highest level term that it is a descendant of, from the list provided.
337        Includes term itself as a descendant. Maps to None if term_id does not map to any high level terms among the
338        provided input. Raises ValueError if term ID is not valid member of a supported ontology.
339
340        :param term_ids: list of str ontology terms to map high level terms for
341        :param high_level_terms: list of str ontology terms that can be mapped to descendant term_ids
342        :return: Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list.
343        Each key maps to empty list if there are no ancestors among the provided input.
344        """
345        return {term_id: self.get_highest_level_term(term_id, high_level_terms) for term_id in term_ids}
346
347    def get_term_descendants(self, term_id: str, include_self: bool = False) -> List[str]:
348        """
349        Get the descendant ontology terms for a given term. If include_self is True, the term itself will be included as
350        a descendant. Raises ValueError if term ID is not valid member of a supported ontology.
351
352        Example
353        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
354        >>> ontology_parser = OntologyParser()
355        >>> ontology_parser.get_term_descendants("CL:0000005") # doctest: +SKIP
356        ['CL:0002363']
357
358        :param term_id: str ontology term to find descendants for
359        :param include_self: boolean flag to include the term itself as a descendant
360        :return: List[str] of descendant terms
361        """
362        if term_id in VALID_NON_ONTOLOGY_TERMS:
363            return []
364        ontology_name = self._parse_ontology_name(term_id)
365        descendants = [term_id] if include_self else []
366        for candidate_descendant, candidate_metadata in self.cxg_schema.ontology(ontology_name).items():
367            ancestors = candidate_metadata["ancestors"].keys()
368            if term_id in ancestors:
369                descendants.append(candidate_descendant)
370        return descendants
371
372    def map_term_descendants(self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]:
373        """
374        Get the descendant ontology terms for each term in a list. If include_self is True, the term itself will be
375         included as a descendant. Raises ValueError if term ID is not valid member of a supported ontology.
376
377        Example
378        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
379        >>> ontology_parser = OntologyParser()
380        >>> ontology_parser.map_term_descendants(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP
381        {
382            'CL:0000003': ['CL:0000003', ...],
383            'CL:0000005': ['CL:0000005', 'CL:0002363', ...]
384        }
385
386        :param term_ids: list of str ontology terms to find descendants for
387        :param include_self: boolean flag to include the term itself as an descendant
388        :return: Dictionary mapping str term IDs to their respective flattened List[str] of descendant terms. Maps to
389        empty list if there are no descendants.
390        """
391        descendants_dict: Dict[str, List[str]] = dict()
392        ontology_names = set()
393        for term_id in term_ids:
394            if term_id in VALID_NON_ONTOLOGY_TERMS:
395                descendants_dict[term_id] = []
396                continue
397            ontology_name = self._parse_ontology_name(term_id)
398            descendants_dict[term_id] = [term_id] if include_self else []
399            ontology_names.add(ontology_name)
400
401        for ontology in ontology_names:
402            for candidate_descendant, candidate_metadata in self.cxg_schema.ontology(ontology).items():
403                for ancestor_id in descendants_dict:
404                    ancestors = candidate_metadata["ancestors"].keys()
405                    if ancestor_id in ancestors:
406                        descendants_dict[ancestor_id].append(candidate_descendant)
407
408        return descendants_dict
409
410    def get_term_children(self, term_id: str) -> List[str]:
411        """
412        Get the direct children ontology terms for a given term. Raises ValueError if term ID is not valid member of a
413        supported ontology.
414
415        Example
416        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
417        >>> ontology_parser = OntologyParser()
418        >>> ontology_parser.get_term_children("CL:0000526")
419        ['CL:0000101']
420
421        :param term_id: str ontology term to find children for
422        :return: List[str] of children terms
423        """
424        if term_id in VALID_NON_ONTOLOGY_TERMS:
425            return []
426        ontology_name = self._parse_ontology_name(term_id)
427        children = []
428        for candidate_child, candidate_metadata in self.cxg_schema.ontology(ontology_name).items():
429            for ancestor, distance in candidate_metadata["ancestors"].items():
430                if ancestor == term_id and distance == 1:
431                    children.append(candidate_child)
432        return children
433
434    def get_term_graph(self, term_id: str) -> OntologyNode:
435        """
436        Get the DAG of OntologyNode relationships, with the input term as the root node. Only includes terms from the
437        same ontology as the root term ID. Raises ValueError if term ID is not valid member of a supported ontology.
438
439        Example
440        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
441        >>> ontology_parser = OntologyParser()
442        >>> root_node = ontology_parser.get_term_graph("CL:0000000")
443        >>> root_node.term_id
444        'CL:0000000'
445        >>> root_node.to_dict() # doctest: +SKIP
446        {
447            "term_id": "CL:0000000",
448            "name": "cell A",
449            "children": [
450                {
451                    "term_id": "CL:0000001",
452                    "name": "cell B",
453                    "children": [...],
454                },
455                {
456                    "term_id": "CL:0000002",
457                    "name": "cell C",
458                    "children": [...],
459                },
460                ...
461            ]
462        }
463        >>> root_node.term_counter # doctest: +SKIP
464        Counter({'CL:0002058': 48, 'CL:0002471': 48, ...
465
466        :param term_id: str ontology term to build subtree for
467        :return: OntologyNode representation of graph with term_id as root.
468        """
469        term_label = self.get_term_label(term_id)
470        root = OntologyNode(term_id, term_label)
471        for child_term_id in self.get_term_children(term_id):
472            root.add_child(self.get_term_graph(child_term_id))
473        return root
474
475    def is_term_deprecated(self, term_id: str) -> bool:
476        """
477        Check if an ontology term is deprecated. Raises ValueError if term ID is not valid member of a supported
478        ontology.
479
480        Example
481        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
482        >>> ontology_parser = OntologyParser()
483        >>> ontology_parser.is_term_deprecated("CL:0000003")
484        True
485
486        :param term_id: str ontology term to check for deprecation
487        :return: boolean flag indicating whether the term is deprecated
488        """
489        if term_id in VALID_NON_ONTOLOGY_TERMS:
490            return False
491        ontology_name = self._parse_ontology_name(term_id)
492        is_deprecated: bool = self.cxg_schema.ontology(ontology_name)[term_id].get("deprecated")
493        return is_deprecated
494
495    def get_term_replacement(self, term_id: str) -> Union[str, None]:
496        """
497        Fetch the replacement term for a deprecated ontology term, if a replacement exists. Return None otherwise.
498        Raises ValueError if term ID is not valid member of a supported ontology.
499
500        Example
501        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
502        >>> ontology_parser = OntologyParser()
503        >>> ontology_parser.get_term_replacement("CL:0000003")
504        'CL:0000000'
505
506        :param term_id: str ontology term to check a replacement term for
507        :return: replacement str term ID if it exists, None otherwise
508        """
509        if term_id in VALID_NON_ONTOLOGY_TERMS:
510            return None
511        ontology_name = self._parse_ontology_name(term_id)
512        replaced_by: str = self.cxg_schema.ontology(ontology_name)[term_id].get("replaced_by")
513        return replaced_by if replaced_by else None
514
515    def get_term_metadata(self, term_id: str) -> Dict[str, Any]:
516        """
517        Fetch metadata for a given ontology term. Returns a dict with format
518
519        {"comments": ["...", ...], "term_tracker": "...", "consider": ["...", ...]}
520
521        Comments maps to List[str] of ontology curator comments
522        Term Tracker maps to a str url where there is discussion around this term's curation (or deprecation).
523        Consider maps to List[str] of alternate ontology terms to consider using instead of this term
524
525        All keys map to None if no metadata of that type is present. Raises ValueError if term ID is not valid member
526        of a supported ontology.
527
528        :param term_id: str ontology term to fetch metadata for
529        :return: Dict with keys 'Comments', 'Term Tracker', and 'Consider' containing associated metadata.
530        """
531        if term_id in VALID_NON_ONTOLOGY_TERMS:
532            return {"comments": None, "term_tracker": None, "consider": None}
533        ontology_name = self._parse_ontology_name(term_id)
534        return {
535            key: self.cxg_schema.ontology(ontology_name)[term_id].get(key, None)
536            for key in {"comments", "term_tracker", "consider"}
537        }
538
539    def get_term_label(self, term_id: str) -> str:
540        """
541        Fetch the human-readable label for a given ontology term. Raises ValueError if term ID is not valid member of a
542        supported ontology.
543
544        Example
545        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
546        >>> ontology_parser = OntologyParser()
547        >>> ontology_parser.get_term_label("CL:0000005")
548        'neural crest derived fibroblast'
549
550        :param term_id: str ontology term to fetch label for
551        :return: str human-readable label for the term
552        """
553        if term_id in VALID_NON_ONTOLOGY_TERMS:
554            return term_id
555        ontology_name = self._parse_ontology_name(term_id)
556        label: str = self.cxg_schema.ontology(ontology_name)[term_id]["label"]
557        return label
558
559    def map_term_labels(self, term_ids: Iterable[str]) -> Dict[str, str]:
560        """
561        Fetch the human-readable label for a given list of ontology terms. Raises ValueError if term ID is not valid
562        member of a supported ontology.
563
564        Example
565        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
566        >>> ontology_parser = OntologyParser()
567        >>> ontology_parser.map_term_labels(["CL:0000005", "CL:0000003"])
568        {'CL:0000005': 'neural crest derived fibroblast', 'CL:0000003': 'obsolete native cell'}
569
570        :param term_ids: list of str ontology terms to fetch label for
571        :return: Dict[str, str] mapping term IDs to their respective human-readable labels
572        """
573        return {term_id: self.get_term_label(term_id) for term_id in term_ids}
574
575    def get_term_description(self, term_id: str) -> Optional[str]:
576        """
577        Fetch the description for a given ontology term. Raises ValueError if term ID is not valid member of a
578        supported ontology.
579
580        Example
581        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
582        >>> ontology_parser = OntologyParser()
583        >>> ontology_parser.get_term_description("CL:0000005")
584        'Any fibroblast that is derived from the neural crest.'
585
586        :param term_id: str ontology term to fetch description for
587        :return: str description for the term
588        """
589        if term_id in VALID_NON_ONTOLOGY_TERMS:
590            return term_id
591        ontology_name = self._parse_ontology_name(term_id)
592        description: Optional[str] = self.cxg_schema.ontology(ontology_name)[term_id].get("description", None)
593        return description
594
595    def map_term_descriptions(self, term_ids: List[str]) -> Dict[str, Optional[str]]:
596        """
597        Fetch the descriptions for a given list of ontology terms. Raises ValueError if term ID is not valid member of
598        a supported ontology.
599
600        Example
601        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
602        >>> ontology_parser = OntologyParser()
603        >>> ontology_parser.map_term_descriptions(["CL:0000005", "CL:0000146"])
604        {'CL:0000005': 'Any fibroblast that is derived from the neural crest.', 'CL:0000146': None}
605
606        :param term_ids: list of str ontology terms to fetch descriptions for
607        :return: Dict[str, str] mapping term IDs to their respective descriptions
608        """
609        return {term_id: self.get_term_description(term_id) for term_id in term_ids}
610
611    def get_term_synonyms(self, term_id: str) -> List[str]:
612        """
613        Fetch a list of synonym labels for a given ontology term. Returns empty list if no synonyms found.
614        Raises ValueError if term ID is not valid member of a supported ontology.
615
616        Example
617        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
618        >>> ontology_parser = OntologyParser()
619        >>> ontology_parser.get_term_synonyms("CL:0000019")
620        ['sperm cell', 'spermatozoid', 'spermatozoon']
621
622        :param term_id: str ontology term to fetch synonyms for
623        :return: List[str] synonyms for the term
624        """
625        if term_id in VALID_NON_ONTOLOGY_TERMS:
626            return []
627        ontology_name = self._parse_ontology_name(term_id)
628        synonyms: List[str] = list(self.cxg_schema.ontology(ontology_name)[term_id].get("synonyms", []))
629        return synonyms
630
631    def map_term_synonyms(self, term_ids: List[str]) -> Dict[str, List[str]]:
632        """
633        Fetch the synonym labels for a given list of ontology terms. Raises ValueError if term ID is not valid member of
634        a supported ontology.
635
636        Example
637        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
638        >>> ontology_parser = OntologyParser()
639        >>> ontology_parser.map_term_synonyms(["CL:0000005", "CL:0000019"])
640        {'CL:0000005': ['fibroblast neural crest derived'], 'CL:0000019': ['sperm cell', 'spermatozoid', 'spermatozoon']}
641
642        :param term_ids: list of str ontology terms to fetch synonyms for
643        :return: Dict[str, List[str]] mapping term IDs to their respective synonym lists
644        """
645        return {term_id: self.get_term_synonyms(term_id) for term_id in term_ids}
646
647    def get_term_id_by_label(self, term_label: str, ontology_name: str) -> Optional[str]:
648        """
649        Fetch the ontology term ID from a given human-readable label. Filters by ontology_name. Raises ValueError if
650        ontology_name is not a supported ontology.
651
652        Returns None if term ID is not valid member of a supported ontology.
653
654        Example
655        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
656        >>> ontology_parser = OntologyParser()
657        >>> ontology_parser.get_term_id_by_label("neural crest derived fibroblast", "CL")
658        'CL:0000005'
659
660        :param term_label: str human-readable label to fetch term ID for
661        :param ontology_name: str name of ontology to search for term label in
662        :return: Optional[str] term IDs with that label, or None if the label is not found in the ontology
663        """
664        ontology_term_label_to_id_map = self.get_term_label_to_id_map(ontology_name)
665        return ontology_term_label_to_id_map.get(term_label)
666
667    def get_bridge_term_id(self, term_id: str, cross_ontology: str) -> Optional[str]:
668        """
669        For a given term ID, fetch the equivalent term ID from a given ontology. Only returns exact match if it exists.
670
671        If no applicable match is found, returns None.
672
673        Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology.
674
675        Example
676        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
677        >>> ontology_parser = OntologyParser()
678        >>> ontology_parser.get_bridge_term_id("FBbt:00000001", "UBERON")
679        'UBERON:0000468'
680
681        :param term_id: str ontology term to find equivalent term for
682        :param cross_ontology: str name of ontology to search for equivalent term in
683        :return: Optional[str] equivalent term ID from the cross_ontology
684        """
685        if cross_ontology not in self.cxg_schema.cross_ontology_mappings:
686            raise ValueError(
687                f"{cross_ontology} is not in the set of supported cross ontology mappings "
688                f"{self.cxg_schema.cross_ontology_mappings}."
689            )
690        ontology_name = self._parse_ontology_name(term_id)
691        cross_ontology_terms = self.cxg_schema.ontology(ontology_name)[term_id].get("cross_ontology_terms")
692        bridge_term_id: Optional[str] = None
693        if cross_ontology_terms:
694            bridge_term_id = cross_ontology_terms.get(cross_ontology)
695        return bridge_term_id
696
697    def get_closest_bridge_term_ids(self, term_id: str, cross_ontology: str) -> List[str]:
698        """
699        For a given term ID, fetch the equivalent term ID from a given ontology. If match is found,
700        returns a list of 1 with the exact match. If no exact match is found, traverses the ancestors
701        of the term for the closest match.
702
703        If no applicable match is found, returns an empty list.
704
705        If multiple ancestors of the same distance have matches, returns all possible closest matches.
706
707        Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology.
708
709        Example
710        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
711        >>> ontology_parser = OntologyParser()
712        >>> ontology_parser.get_closest_bridge_term_ids("FBbt:00000039", "UBERON")
713        ['UBERON:0000476', 'UBERON:0000920']
714
715        :param term_id: str ontology term to find closest term for
716        :param cross_ontology: str name of ontology to search for closest term in
717        :return: List[str] list of closest term IDs from the cross_ontology
718        """
719        closest_bridge_terms: List[str] = []
720        terms_to_match = [term_id]
721        while terms_to_match and not closest_bridge_terms:
722            for term in terms_to_match:
723                if closest_bridge_term := self.get_bridge_term_id(term, cross_ontology):
724                    closest_bridge_terms.append(closest_bridge_term)
725            terms_to_match = [parent for child in terms_to_match for parent in self.get_term_parents(child)]
726        return closest_bridge_terms

An object to parse ontology term metadata from ontologies corresponding to a given CellxGene Schema Version.

OntologyParser(schema_version: Optional[str] = None)
18    def __init__(self, schema_version: Optional[str] = None):
19        """
20        Initialize an OntologyParser object with the ontology metadata corresponding to the given CellxGene schema
21        version. If not cached, it will make a network call to GitHub Release Assets to load in memory and
22        parse the corresponding ontology metadata.
23
24        :param schema_version: str version of the schema to load ontology metadata for. If not provided, the latest
25        is loaded.
26        """
27        self.cxg_schema = CXGSchema(version=schema_version) if schema_version else CXGSchema()
28        self.term_label_to_id_map: Dict[str, Dict[str, str]] = {
29            ontology_name: dict() for ontology_name in self.cxg_schema.supported_ontologies
30        }

Initialize an OntologyParser object with the ontology metadata corresponding to the given CellxGene schema version. If not cached, it will make a network call to GitHub Release Assets to load in memory and parse the corresponding ontology metadata.

Parameters
  • schema_version: str version of the schema to load ontology metadata for. If not provided, the latest is loaded.

CXGSchema object to fetch ontology metadata from

term_label_to_id_map: Dict[str, Dict[str, str]]
def get_term_label_to_id_map(self, ontology_name: str) -> Dict[str, str]:
32    def get_term_label_to_id_map(self, ontology_name: str) -> Dict[str, str]:
33        """
34        Fetch the mapping of term labels to term IDs for a given ontology. Caches generated maps by ontology_name.
35
36        Example
37        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
38        >>> ontology_parser = OntologyParser()
39        >>> ontology_parser.get_term_label_to_id_map("CL") # doctest: +SKIP
40        {'Label A': 'CL:0000000', ... }
41
42        :param ontology_name: str name of ontology to get map of term labels to term IDs
43        """
44        supported_ontology_name: Optional[str] = self._get_supported_ontology_name(ontology_name)
45        if not supported_ontology_name:
46            raise ValueError(f"{supported_ontology_name} is not a supported ontology, its metadata cannot be fetched.")
47
48        if self.term_label_to_id_map[supported_ontology_name]:
49            return self.term_label_to_id_map[supported_ontology_name].copy()
50
51        for term_id, term_metadata in self.cxg_schema.ontology(supported_ontology_name).items():
52            self.term_label_to_id_map[supported_ontology_name][term_metadata["label"]] = term_id
53
54        return self.term_label_to_id_map[supported_ontology_name].copy()

Fetch the mapping of term labels to term IDs for a given ontology. Caches generated maps by ontology_name.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_label_to_id_map("CL") # doctest: +SKIP
{'Label A': 'CL:0000000', ... }
Parameters
  • ontology_name: str name of ontology to get map of term labels to term IDs
def is_valid_term_id(self, term_id: str, ontology: Optional[str] = None) -> bool:
 93    def is_valid_term_id(self, term_id: str, ontology: Optional[str] = None) -> bool:
 94        """
 95        Check if an ontology term ID is valid and defined in a supported ontology. If deprecated but defined
 96        in the ontology, it is considered valid. Optionally, specify an ontology to check against, and determine
 97        if the term is defined in that particular ontology. Otherwise, checks if term is valid in any supported ontology
 98
 99        :param term_id: str ontology term to check
100        :param ontology: str name of ontology to check against
101        :return: boolean flag indicating whether the term is supported
102        """
103        try:
104            ontology_name = self._parse_ontology_name(term_id)
105            if ontology and ontology_name != ontology:
106                return False
107            if term_id in self.cxg_schema.ontology(ontology_name):
108                return True
109        except ValueError:
110            return False
111        return False

Check if an ontology term ID is valid and defined in a supported ontology. If deprecated but defined in the ontology, it is considered valid. Optionally, specify an ontology to check against, and determine if the term is defined in that particular ontology. Otherwise, checks if term is valid in any supported ontology

Parameters
  • term_id: str ontology term to check
  • ontology: str name of ontology to check against
Returns

boolean flag indicating whether the term is supported

def get_term_ancestors(self, term_id: str, include_self: bool = False) -> List[str]:
113    def get_term_ancestors(self, term_id: str, include_self: bool = False) -> List[str]:
114        """
115        Get the ancestor ontology terms for a given term. If include_self is True, the term itself will be included as
116        an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.
117
118        Example
119        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
120        >>> ontology_parser = OntologyParser()
121        >>> ontology_parser.get_term_ancestors("CL:0000005") # doctest: +SKIP
122        ['CL:0000000', 'CL:0000057', ...
123
124        :param term_id: str ontology term to find ancestors for
125        :param include_self: boolean flag to include the term itself as an ancestor
126        :return: flattened List[str] of ancestor terms
127        """
128        if term_id in VALID_NON_ONTOLOGY_TERMS:
129            return []
130        ontology_name = self._parse_ontology_name(term_id)
131        ancestors = list(self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"].keys())
132        return ancestors + [term_id] if include_self else ancestors

Get the ancestor ontology terms for a given term. If include_self is True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_ancestors("CL:0000005") # doctest: +SKIP
['CL:0000000', 'CL:0000057', ...
Parameters
  • term_id: str ontology term to find ancestors for
  • include_self: boolean flag to include the term itself as an ancestor
Returns

flattened List[str] of ancestor terms

def map_term_ancestors( self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]:
134    def map_term_ancestors(self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]:
135        """
136        Get the ancestor ontology terms for each term in a list. If include_self is True, the term itself will be
137        included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.
138
139        Example
140        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
141        >>> ontology_parser = OntologyParser()
142        >>> ontology_parser.map_term_ancestors(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP
143        {
144            'CL:0000003': ['CL:0000003'],
145            'CL:0000005': ['CL:0000005', 'CL:0000000', ...]
146        }
147
148        :param term_ids: list of str ontology terms to find ancestors for
149        :param include_self: boolean flag to include the term itself as an ancestor
150        :return: Dictionary mapping str term IDs to their respective flattened List[str] of ancestor terms. Maps to
151        empty
152        list if there are no ancestors.
153        """
154        return {term_id: self.get_term_ancestors(term_id, include_self) for term_id in term_ids}

Get the ancestor ontology terms for each term in a list. If include_self is True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.map_term_ancestors(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP
{
    'CL:0000003': ['CL:0000003'],
    'CL:0000005': ['CL:0000005', 'CL:0000000', ...]
}
Parameters
  • term_ids: list of str ontology terms to find ancestors for
  • include_self: boolean flag to include the term itself as an ancestor
Returns

Dictionary mapping str term IDs to their respective flattened List[str] of ancestor terms. Maps to empty list if there are no ancestors.

def get_term_ancestors_with_distances(self, term_id: str, include_self: bool = False) -> Dict[str, int]:
156    def get_term_ancestors_with_distances(self, term_id: str, include_self: bool = False) -> Dict[str, int]:
157        """
158        Get the ancestor ontology terms for a given term, and their distance from the term_id. If include_self is True,
159        the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a
160        supported ontology.
161
162        Example
163        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
164        >>> ontology_parser = OntologyParser()
165        >>> ontology_parser.get_term_ancestors_with_distances("CL:0000005") # doctest: +SKIP
166        {'CL:0000057': 1, 'CL:0002320': 2, 'CL:0000000': 3}
167
168        :param term_id: str ontology term to find ancestors for
169        :param include_self: boolean flag to include the term itself as an ancestor
170        :return: Dict[str, int] map of ancestor terms and their respective distances from the term_id
171        """
172        if term_id in VALID_NON_ONTOLOGY_TERMS:
173            return {}
174        ontology_name = self._parse_ontology_name(term_id)
175        ancestors: Dict[str, int] = self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"].copy()
176        return ancestors | {term_id: 0} if include_self else ancestors

Get the ancestor ontology terms for a given term, and their distance from the term_id. If include_self is True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_ancestors_with_distances("CL:0000005") # doctest: +SKIP
{'CL:0000057': 1, 'CL:0002320': 2, 'CL:0000000': 3}
Parameters
  • term_id: str ontology term to find ancestors for
  • include_self: boolean flag to include the term itself as an ancestor
Returns

Dict[str, int] map of ancestor terms and their respective distances from the term_id

def map_term_ancestors_with_distances( self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, Dict[str, int]]:
178    def map_term_ancestors_with_distances(
179        self, term_ids: Iterable[str], include_self: bool = False
180    ) -> Dict[str, Dict[str, int]]:
181        """
182        Get the ancestor ontology terms for each term in a list, and their distance from the term_id. If include_self is
183        True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a
184        supported ontology.
185
186        Example
187        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
188        >>> ontology_parser = OntologyParser()
189        >>> ontology_parser.map_term_ancestors_with_distances(["CL:0000003", "CL:0000005"], include_self=True)
190        {'CL:0000003': {'CL:0000003': 0}, 'CL:0000005': {'CL:0000057': 1, 'CL:0002320': 2, 'CL:0000000': 3,
191        'CL:0000005': 0}}
192
193        :param term_ids: list of str ontology terms to find ancestors for
194        :param include_self: boolean flag to include the term itself as an ancestor
195        :return: Dictionary mapping str term IDs to their respective Dict[str, int] map of ancestor terms and their
196        respective distances from the term_id
197        """
198        return {term_id: self.get_term_ancestors_with_distances(term_id, include_self) for term_id in term_ids}

Get the ancestor ontology terms for each term in a list, and their distance from the term_id. If include_self is True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.map_term_ancestors_with_distances(["CL:0000003", "CL:0000005"], include_self=True)
{'CL:0000003': {'CL:0000003': 0}, 'CL:0000005': {'CL:0000057': 1, 'CL:0002320': 2, 'CL:0000000': 3,
'CL:0000005': 0}}
Parameters
  • term_ids: list of str ontology terms to find ancestors for
  • include_self: boolean flag to include the term itself as an ancestor
Returns

Dictionary mapping str term IDs to their respective Dict[str, int] map of ancestor terms and their respective distances from the term_id

def get_term_parents(self, term_id: str) -> List[str]:
200    def get_term_parents(self, term_id: str) -> List[str]:
201        """
202        Get the direct parent ontology terms for a given term. Raises ValueError if the term ID is not valid member of
203        a supported ontology.
204
205        Example
206        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
207        >>> ontology_parser = OntologyParser()
208        >>> ontology_parser.get_term_parents("CL:0000101")
209        ['CL:0000526']
210
211        :param term_id: str ontology term to find parents for
212        :return: List[str] of parent terms
213        """
214        if term_id in VALID_NON_ONTOLOGY_TERMS:
215            return []
216        ontology_name = self._parse_ontology_name(term_id)
217        ancestors: Dict[str, int] = self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"]
218        parents: List[str] = [ancestor for ancestor, distance in ancestors.items() if distance == 1]
219        return parents

Get the direct parent ontology terms for a given term. Raises ValueError if the term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_parents("CL:0000101")
['CL:0000526']
Parameters
  • term_id: str ontology term to find parents for
Returns

List[str] of parent terms

def get_distance_between_terms(self, term_id_1: str, term_id_2: str) -> int:
221    def get_distance_between_terms(self, term_id_1: str, term_id_2: str) -> int:
222        """
223        Get the distance between two ontology terms. The distance is defined as the number of edges between the
224        two terms. Terms must be from the same ontology. Returns -1 if terms are disjoint.
225        Raises ValueError if term IDs are not valid members of a supported ontology.
226
227        :param term_id_1: str ontology term to find distance for
228        :param term_id_2: str ontology term to find distance for
229        :return: int distance between the two terms, measured in number of edges between their shortest path.
230        """
231        lcas = self.get_lowest_common_ancestors(term_id_1, term_id_2)
232        if not lcas:
233            return -1
234        return int(
235            self.get_term_ancestors_with_distances(term_id_1, include_self=True)[lcas[0]]
236            + self.get_term_ancestors_with_distances(term_id_2, include_self=True)[lcas[0]]
237        )

Get the distance between two ontology terms. The distance is defined as the number of edges between the two terms. Terms must be from the same ontology. Returns -1 if terms are disjoint. Raises ValueError if term IDs are not valid members of a supported ontology.

Parameters
  • term_id_1: str ontology term to find distance for
  • term_id_2: str ontology term to find distance for
Returns

int distance between the two terms, measured in number of edges between their shortest path.

def get_lowest_common_ancestors(self, term_id_1: str, term_id_2: str) -> List[str]:
239    def get_lowest_common_ancestors(self, term_id_1: str, term_id_2: str) -> List[str]:
240        """
241        Get the lowest common ancestors between two ontology terms that is from the given ontology.
242        Terms must be from the same ontology. Ontologies are DAGs, so there may be multiple lowest common ancestors.
243        Raises ValueError if term IDs are not valid members of a supported ontology.
244
245        :param term_id_1: str ontology term to find LCA for
246        :param term_id_2: str ontology term to find LCA for
247        :return: str term ID of the lowest common ancestor term
248        """
249        # include path to term itself
250        ontology = self._parse_ontology_name(term_id_1)
251        if ontology != self._parse_ontology_name(term_id_2):
252            return []
253        ancestors_1 = self.get_term_ancestors_with_distances(term_id_1, include_self=True)
254        ancestors_2 = self.get_term_ancestors_with_distances(term_id_2, include_self=True)
255        common_ancestors = set(ancestors_1.keys()) & set(ancestors_2.keys())
256        min_sum_distances = float("inf")
257        for ancestors in common_ancestors:
258            sum_distances = ancestors_1[ancestors] + ancestors_2[ancestors]
259            if sum_distances < min_sum_distances:
260                min_sum_distances = sum_distances
261        return [
262            ancestor
263            for ancestor in common_ancestors
264            if ancestors_1[ancestor] + ancestors_2[ancestor] == min_sum_distances
265        ]

Get the lowest common ancestors between two ontology terms that is from the given ontology. Terms must be from the same ontology. Ontologies are DAGs, so there may be multiple lowest common ancestors. Raises ValueError if term IDs are not valid members of a supported ontology.

Parameters
  • term_id_1: str ontology term to find LCA for
  • term_id_2: str ontology term to find LCA for
Returns

str term ID of the lowest common ancestor term

def get_high_level_terms(self, term_id: str, high_level_terms: List[str]) -> List[str]:
267    def get_high_level_terms(self, term_id: str, high_level_terms: List[str]) -> List[str]:
268        """
269        Get the high-level ontology terms for a given term. High-level terms are defined as the ancestors of the term
270        that are part of the high-level ontology terms supported by cellxgene-ontology-guide. If more than 1
271        high_level_term is matched, the returned list of matches preserves the order of the input high_level_terms list.
272        Raises ValueError if term ID is not valid member of a supported ontology.
273
274        Example
275        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
276        >>> ontology_parser = OntologyParser()
277        >>> ontology_parser.get_high_level_terms("CL:0000005", ["CL:0000000", "CL:0000001"])
278        ['CL:0000000']
279
280        :param term_id: str ontology term to find high-level terms for
281        :param high_level_terms: list of str ontology terms to check for ancestry to term_id
282        :return: List[str] of high-level terms that the term is a descendant of
283        """
284        if term_id in VALID_NON_ONTOLOGY_TERMS:
285            return []
286        ancestors = self.get_term_ancestors(term_id, include_self=True)
287        return [high_level_term for high_level_term in high_level_terms if high_level_term in ancestors]

Get the high-level ontology terms for a given term. High-level terms are defined as the ancestors of the term that are part of the high-level ontology terms supported by cellxgene-ontology-guide. If more than 1 high_level_term is matched, the returned list of matches preserves the order of the input high_level_terms list. Raises ValueError if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_high_level_terms("CL:0000005", ["CL:0000000", "CL:0000001"])
['CL:0000000']
Parameters
  • term_id: str ontology term to find high-level terms for
  • high_level_terms: list of str ontology terms to check for ancestry to term_id
Returns

List[str] of high-level terms that the term is a descendant of

def map_high_level_terms( self, term_ids: List[str], high_level_terms: List[str]) -> Dict[str, List[str]]:
289    def map_high_level_terms(self, term_ids: List[str], high_level_terms: List[str]) -> Dict[str, List[str]]:
290        """
291        Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with
292        format
293
294        {"CL:0000003": ["CL:0000000", ...], "CL:0000005": ["CL:0000000", ...]}
295
296        Where each term_id is mapped to a List[str] of high-level terms that it is a descendant of. Includes self
297        as a descendant. Raises ValueError if term ID is not valid member of a supported ontology.
298
299        :param term_ids: list of str ontology terms to map high level terms for
300        :param high_level_terms: list of str ontology terms to be mapped to descendant term_ids
301        :return: Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list.
302        Each key maps to empty list if there are no ancestors among the provided input.
303        """
304        return {term_id: self.get_high_level_terms(term_id, high_level_terms) for term_id in term_ids}

Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with format

{"CL:0000003": ["CL:0000000", ...], "CL:0000005": ["CL:0000000", ...]}

Where each term_id is mapped to a List[str] of high-level terms that it is a descendant of. Includes self as a descendant. Raises ValueError if term ID is not valid member of a supported ontology.

Parameters
  • term_ids: list of str ontology terms to map high level terms for
  • high_level_terms: list of str ontology terms to be mapped to descendant term_ids
Returns

Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list. Each key maps to empty list if there are no ancestors among the provided input.

def get_highest_level_term(self, term_id: str, high_level_terms: List[str]) -> Optional[str]:
306    def get_highest_level_term(self, term_id: str, high_level_terms: List[str]) -> Union[str, None]:
307        """
308        Get the highest level ontology term for a given term. The highest level term is defined as the ancestor of the
309        term that is part of the high-level ontology terms supported by cellxgene-ontology-guide. Raises ValueError
310        if term ID is not valid member of a supported ontology.
311
312        Example
313        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
314        >>> ontology_parser = OntologyParser()
315        >>> ontology_parser.get_highest_level_term("CL:0000005", ["CL:0000000", "CL:0000001"])
316        'CL:0000000'
317
318        :param term_id: str ontology term to find highest level term for
319        :param high_level_terms: list of str ontology terms to check for ancestry to term_id
320        :return: str highest level term that the term is a descendant of, or None if it is not a descendant of any
321        high-level terms
322        """
323        high_level_terms = self.get_high_level_terms(term_id, high_level_terms)
324        term_ancestors_and_distances = self.get_term_ancestors_with_distances(term_id, include_self=True)
325        if not high_level_terms:
326            return None
327        return max(high_level_terms, key=lambda high_level_term: term_ancestors_and_distances[high_level_term])

Get the highest level ontology term for a given term. The highest level term is defined as the ancestor of the term that is part of the high-level ontology terms supported by cellxgene-ontology-guide. Raises ValueError if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_highest_level_term("CL:0000005", ["CL:0000000", "CL:0000001"])
'CL:0000000'
Parameters
  • term_id: str ontology term to find highest level term for
  • high_level_terms: list of str ontology terms to check for ancestry to term_id
Returns

str highest level term that the term is a descendant of, or None if it is not a descendant of any high-level terms

def map_highest_level_term( self, term_ids: List[str], high_level_terms: List[str]) -> Dict[str, Optional[str]]:
329    def map_highest_level_term(self, term_ids: List[str], high_level_terms: List[str]) -> Dict[str, Union[str, None]]:
330        """
331        Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with
332        format
333
334        {"CL:0000003": "CL:0000000", "CL:0000005": "CL:0000000"}
335
336        Where each term_id is mapped to the highest level term that it is a descendant of, from the list provided.
337        Includes term itself as a descendant. Maps to None if term_id does not map to any high level terms among the
338        provided input. Raises ValueError if term ID is not valid member of a supported ontology.
339
340        :param term_ids: list of str ontology terms to map high level terms for
341        :param high_level_terms: list of str ontology terms that can be mapped to descendant term_ids
342        :return: Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list.
343        Each key maps to empty list if there are no ancestors among the provided input.
344        """
345        return {term_id: self.get_highest_level_term(term_id, high_level_terms) for term_id in term_ids}

Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with format

{"CL:0000003": "CL:0000000", "CL:0000005": "CL:0000000"}

Where each term_id is mapped to the highest level term that it is a descendant of, from the list provided. Includes term itself as a descendant. Maps to None if term_id does not map to any high level terms among the provided input. Raises ValueError if term ID is not valid member of a supported ontology.

Parameters
  • term_ids: list of str ontology terms to map high level terms for
  • high_level_terms: list of str ontology terms that can be mapped to descendant term_ids
Returns

Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list. Each key maps to empty list if there are no ancestors among the provided input.

def get_term_descendants(self, term_id: str, include_self: bool = False) -> List[str]:
347    def get_term_descendants(self, term_id: str, include_self: bool = False) -> List[str]:
348        """
349        Get the descendant ontology terms for a given term. If include_self is True, the term itself will be included as
350        a descendant. Raises ValueError if term ID is not valid member of a supported ontology.
351
352        Example
353        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
354        >>> ontology_parser = OntologyParser()
355        >>> ontology_parser.get_term_descendants("CL:0000005") # doctest: +SKIP
356        ['CL:0002363']
357
358        :param term_id: str ontology term to find descendants for
359        :param include_self: boolean flag to include the term itself as a descendant
360        :return: List[str] of descendant terms
361        """
362        if term_id in VALID_NON_ONTOLOGY_TERMS:
363            return []
364        ontology_name = self._parse_ontology_name(term_id)
365        descendants = [term_id] if include_self else []
366        for candidate_descendant, candidate_metadata in self.cxg_schema.ontology(ontology_name).items():
367            ancestors = candidate_metadata["ancestors"].keys()
368            if term_id in ancestors:
369                descendants.append(candidate_descendant)
370        return descendants

Get the descendant ontology terms for a given term. If include_self is True, the term itself will be included as a descendant. Raises ValueError if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_descendants("CL:0000005") # doctest: +SKIP
['CL:0002363']
Parameters
  • term_id: str ontology term to find descendants for
  • include_self: boolean flag to include the term itself as a descendant
Returns

List[str] of descendant terms

def map_term_descendants( self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]:
372    def map_term_descendants(self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]:
373        """
374        Get the descendant ontology terms for each term in a list. If include_self is True, the term itself will be
375         included as a descendant. Raises ValueError if term ID is not valid member of a supported ontology.
376
377        Example
378        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
379        >>> ontology_parser = OntologyParser()
380        >>> ontology_parser.map_term_descendants(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP
381        {
382            'CL:0000003': ['CL:0000003', ...],
383            'CL:0000005': ['CL:0000005', 'CL:0002363', ...]
384        }
385
386        :param term_ids: list of str ontology terms to find descendants for
387        :param include_self: boolean flag to include the term itself as an descendant
388        :return: Dictionary mapping str term IDs to their respective flattened List[str] of descendant terms. Maps to
389        empty list if there are no descendants.
390        """
391        descendants_dict: Dict[str, List[str]] = dict()
392        ontology_names = set()
393        for term_id in term_ids:
394            if term_id in VALID_NON_ONTOLOGY_TERMS:
395                descendants_dict[term_id] = []
396                continue
397            ontology_name = self._parse_ontology_name(term_id)
398            descendants_dict[term_id] = [term_id] if include_self else []
399            ontology_names.add(ontology_name)
400
401        for ontology in ontology_names:
402            for candidate_descendant, candidate_metadata in self.cxg_schema.ontology(ontology).items():
403                for ancestor_id in descendants_dict:
404                    ancestors = candidate_metadata["ancestors"].keys()
405                    if ancestor_id in ancestors:
406                        descendants_dict[ancestor_id].append(candidate_descendant)
407
408        return descendants_dict

Get the descendant ontology terms for each term in a list. If include_self is True, the term itself will be included as a descendant. Raises ValueError if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.map_term_descendants(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP
{
    'CL:0000003': ['CL:0000003', ...],
    'CL:0000005': ['CL:0000005', 'CL:0002363', ...]
}
Parameters
  • term_ids: list of str ontology terms to find descendants for
  • include_self: boolean flag to include the term itself as an descendant
Returns

Dictionary mapping str term IDs to their respective flattened List[str] of descendant terms. Maps to empty list if there are no descendants.

def get_term_children(self, term_id: str) -> List[str]:
410    def get_term_children(self, term_id: str) -> List[str]:
411        """
412        Get the direct children ontology terms for a given term. Raises ValueError if term ID is not valid member of a
413        supported ontology.
414
415        Example
416        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
417        >>> ontology_parser = OntologyParser()
418        >>> ontology_parser.get_term_children("CL:0000526")
419        ['CL:0000101']
420
421        :param term_id: str ontology term to find children for
422        :return: List[str] of children terms
423        """
424        if term_id in VALID_NON_ONTOLOGY_TERMS:
425            return []
426        ontology_name = self._parse_ontology_name(term_id)
427        children = []
428        for candidate_child, candidate_metadata in self.cxg_schema.ontology(ontology_name).items():
429            for ancestor, distance in candidate_metadata["ancestors"].items():
430                if ancestor == term_id and distance == 1:
431                    children.append(candidate_child)
432        return children

Get the direct children ontology terms for a given term. Raises ValueError if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_children("CL:0000526")
['CL:0000101']
Parameters
  • term_id: str ontology term to find children for
Returns

List[str] of children terms

def get_term_graph(self, term_id: str) -> cellxgene_ontology_guide.entities.OntologyNode:
434    def get_term_graph(self, term_id: str) -> OntologyNode:
435        """
436        Get the DAG of OntologyNode relationships, with the input term as the root node. Only includes terms from the
437        same ontology as the root term ID. Raises ValueError if term ID is not valid member of a supported ontology.
438
439        Example
440        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
441        >>> ontology_parser = OntologyParser()
442        >>> root_node = ontology_parser.get_term_graph("CL:0000000")
443        >>> root_node.term_id
444        'CL:0000000'
445        >>> root_node.to_dict() # doctest: +SKIP
446        {
447            "term_id": "CL:0000000",
448            "name": "cell A",
449            "children": [
450                {
451                    "term_id": "CL:0000001",
452                    "name": "cell B",
453                    "children": [...],
454                },
455                {
456                    "term_id": "CL:0000002",
457                    "name": "cell C",
458                    "children": [...],
459                },
460                ...
461            ]
462        }
463        >>> root_node.term_counter # doctest: +SKIP
464        Counter({'CL:0002058': 48, 'CL:0002471': 48, ...
465
466        :param term_id: str ontology term to build subtree for
467        :return: OntologyNode representation of graph with term_id as root.
468        """
469        term_label = self.get_term_label(term_id)
470        root = OntologyNode(term_id, term_label)
471        for child_term_id in self.get_term_children(term_id):
472            root.add_child(self.get_term_graph(child_term_id))
473        return root

Get the DAG of OntologyNode relationships, with the input term as the root node. Only includes terms from the same ontology as the root term ID. Raises ValueError if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> root_node = ontology_parser.get_term_graph("CL:0000000")
>>> root_node.term_id
'CL:0000000'
>>> root_node.to_dict() # doctest: +SKIP
{
    "term_id": "CL:0000000",
    "name": "cell A",
    "children": [
        {
            "term_id": "CL:0000001",
            "name": "cell B",
            "children": [...],
        },
        {
            "term_id": "CL:0000002",
            "name": "cell C",
            "children": [...],
        },
        ...
    ]
}
>>> root_node.term_counter # doctest: +SKIP
Counter({'CL:0002058': 48, 'CL:0002471': 48, ...
Parameters
  • term_id: str ontology term to build subtree for
Returns

OntologyNode representation of graph with term_id as root.

def is_term_deprecated(self, term_id: str) -> bool:
475    def is_term_deprecated(self, term_id: str) -> bool:
476        """
477        Check if an ontology term is deprecated. Raises ValueError if term ID is not valid member of a supported
478        ontology.
479
480        Example
481        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
482        >>> ontology_parser = OntologyParser()
483        >>> ontology_parser.is_term_deprecated("CL:0000003")
484        True
485
486        :param term_id: str ontology term to check for deprecation
487        :return: boolean flag indicating whether the term is deprecated
488        """
489        if term_id in VALID_NON_ONTOLOGY_TERMS:
490            return False
491        ontology_name = self._parse_ontology_name(term_id)
492        is_deprecated: bool = self.cxg_schema.ontology(ontology_name)[term_id].get("deprecated")
493        return is_deprecated

Check if an ontology term is deprecated. Raises ValueError if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.is_term_deprecated("CL:0000003")
True
Parameters
  • term_id: str ontology term to check for deprecation
Returns

boolean flag indicating whether the term is deprecated

def get_term_replacement(self, term_id: str) -> Optional[str]:
495    def get_term_replacement(self, term_id: str) -> Union[str, None]:
496        """
497        Fetch the replacement term for a deprecated ontology term, if a replacement exists. Return None otherwise.
498        Raises ValueError if term ID is not valid member of a supported ontology.
499
500        Example
501        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
502        >>> ontology_parser = OntologyParser()
503        >>> ontology_parser.get_term_replacement("CL:0000003")
504        'CL:0000000'
505
506        :param term_id: str ontology term to check a replacement term for
507        :return: replacement str term ID if it exists, None otherwise
508        """
509        if term_id in VALID_NON_ONTOLOGY_TERMS:
510            return None
511        ontology_name = self._parse_ontology_name(term_id)
512        replaced_by: str = self.cxg_schema.ontology(ontology_name)[term_id].get("replaced_by")
513        return replaced_by if replaced_by else None

Fetch the replacement term for a deprecated ontology term, if a replacement exists. Return None otherwise. Raises ValueError if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_replacement("CL:0000003")
'CL:0000000'
Parameters
  • term_id: str ontology term to check a replacement term for
Returns

replacement str term ID if it exists, None otherwise

def get_term_metadata(self, term_id: str) -> Dict[str, Any]:
515    def get_term_metadata(self, term_id: str) -> Dict[str, Any]:
516        """
517        Fetch metadata for a given ontology term. Returns a dict with format
518
519        {"comments": ["...", ...], "term_tracker": "...", "consider": ["...", ...]}
520
521        Comments maps to List[str] of ontology curator comments
522        Term Tracker maps to a str url where there is discussion around this term's curation (or deprecation).
523        Consider maps to List[str] of alternate ontology terms to consider using instead of this term
524
525        All keys map to None if no metadata of that type is present. Raises ValueError if term ID is not valid member
526        of a supported ontology.
527
528        :param term_id: str ontology term to fetch metadata for
529        :return: Dict with keys 'Comments', 'Term Tracker', and 'Consider' containing associated metadata.
530        """
531        if term_id in VALID_NON_ONTOLOGY_TERMS:
532            return {"comments": None, "term_tracker": None, "consider": None}
533        ontology_name = self._parse_ontology_name(term_id)
534        return {
535            key: self.cxg_schema.ontology(ontology_name)[term_id].get(key, None)
536            for key in {"comments", "term_tracker", "consider"}
537        }

Fetch metadata for a given ontology term. Returns a dict with format

{"comments": ["...", ...], "term_tracker": "...", "consider": ["...", ...]}

Comments maps to List[str] of ontology curator comments Term Tracker maps to a str url where there is discussion around this term's curation (or deprecation). Consider maps to List[str] of alternate ontology terms to consider using instead of this term

All keys map to None if no metadata of that type is present. Raises ValueError if term ID is not valid member of a supported ontology.

Parameters
  • term_id: str ontology term to fetch metadata for
Returns

Dict with keys 'Comments', 'Term Tracker', and 'Consider' containing associated metadata.

def get_term_label(self, term_id: str) -> str:
539    def get_term_label(self, term_id: str) -> str:
540        """
541        Fetch the human-readable label for a given ontology term. Raises ValueError if term ID is not valid member of a
542        supported ontology.
543
544        Example
545        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
546        >>> ontology_parser = OntologyParser()
547        >>> ontology_parser.get_term_label("CL:0000005")
548        'neural crest derived fibroblast'
549
550        :param term_id: str ontology term to fetch label for
551        :return: str human-readable label for the term
552        """
553        if term_id in VALID_NON_ONTOLOGY_TERMS:
554            return term_id
555        ontology_name = self._parse_ontology_name(term_id)
556        label: str = self.cxg_schema.ontology(ontology_name)[term_id]["label"]
557        return label

Fetch the human-readable label for a given ontology term. Raises ValueError if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_label("CL:0000005")
'neural crest derived fibroblast'
Parameters
  • term_id: str ontology term to fetch label for
Returns

str human-readable label for the term

def map_term_labels(self, term_ids: Iterable[str]) -> Dict[str, str]:
559    def map_term_labels(self, term_ids: Iterable[str]) -> Dict[str, str]:
560        """
561        Fetch the human-readable label for a given list of ontology terms. Raises ValueError if term ID is not valid
562        member of a supported ontology.
563
564        Example
565        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
566        >>> ontology_parser = OntologyParser()
567        >>> ontology_parser.map_term_labels(["CL:0000005", "CL:0000003"])
568        {'CL:0000005': 'neural crest derived fibroblast', 'CL:0000003': 'obsolete native cell'}
569
570        :param term_ids: list of str ontology terms to fetch label for
571        :return: Dict[str, str] mapping term IDs to their respective human-readable labels
572        """
573        return {term_id: self.get_term_label(term_id) for term_id in term_ids}

Fetch the human-readable label for a given list of ontology terms. Raises ValueError if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.map_term_labels(["CL:0000005", "CL:0000003"])
{'CL:0000005': 'neural crest derived fibroblast', 'CL:0000003': 'obsolete native cell'}
Parameters
  • term_ids: list of str ontology terms to fetch label for
Returns

Dict[str, str] mapping term IDs to their respective human-readable labels

def get_term_description(self, term_id: str) -> Optional[str]:
575    def get_term_description(self, term_id: str) -> Optional[str]:
576        """
577        Fetch the description for a given ontology term. Raises ValueError if term ID is not valid member of a
578        supported ontology.
579
580        Example
581        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
582        >>> ontology_parser = OntologyParser()
583        >>> ontology_parser.get_term_description("CL:0000005")
584        'Any fibroblast that is derived from the neural crest.'
585
586        :param term_id: str ontology term to fetch description for
587        :return: str description for the term
588        """
589        if term_id in VALID_NON_ONTOLOGY_TERMS:
590            return term_id
591        ontology_name = self._parse_ontology_name(term_id)
592        description: Optional[str] = self.cxg_schema.ontology(ontology_name)[term_id].get("description", None)
593        return description

Fetch the description for a given ontology term. Raises ValueError if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_description("CL:0000005")
'Any fibroblast that is derived from the neural crest.'
Parameters
  • term_id: str ontology term to fetch description for
Returns

str description for the term

def map_term_descriptions(self, term_ids: List[str]) -> Dict[str, Optional[str]]:
595    def map_term_descriptions(self, term_ids: List[str]) -> Dict[str, Optional[str]]:
596        """
597        Fetch the descriptions for a given list of ontology terms. Raises ValueError if term ID is not valid member of
598        a supported ontology.
599
600        Example
601        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
602        >>> ontology_parser = OntologyParser()
603        >>> ontology_parser.map_term_descriptions(["CL:0000005", "CL:0000146"])
604        {'CL:0000005': 'Any fibroblast that is derived from the neural crest.', 'CL:0000146': None}
605
606        :param term_ids: list of str ontology terms to fetch descriptions for
607        :return: Dict[str, str] mapping term IDs to their respective descriptions
608        """
609        return {term_id: self.get_term_description(term_id) for term_id in term_ids}

Fetch the descriptions for a given list of ontology terms. Raises ValueError if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.map_term_descriptions(["CL:0000005", "CL:0000146"])
{'CL:0000005': 'Any fibroblast that is derived from the neural crest.', 'CL:0000146': None}
Parameters
  • term_ids: list of str ontology terms to fetch descriptions for
Returns

Dict[str, str] mapping term IDs to their respective descriptions

def get_term_synonyms(self, term_id: str) -> List[str]:
611    def get_term_synonyms(self, term_id: str) -> List[str]:
612        """
613        Fetch a list of synonym labels for a given ontology term. Returns empty list if no synonyms found.
614        Raises ValueError if term ID is not valid member of a supported ontology.
615
616        Example
617        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
618        >>> ontology_parser = OntologyParser()
619        >>> ontology_parser.get_term_synonyms("CL:0000019")
620        ['sperm cell', 'spermatozoid', 'spermatozoon']
621
622        :param term_id: str ontology term to fetch synonyms for
623        :return: List[str] synonyms for the term
624        """
625        if term_id in VALID_NON_ONTOLOGY_TERMS:
626            return []
627        ontology_name = self._parse_ontology_name(term_id)
628        synonyms: List[str] = list(self.cxg_schema.ontology(ontology_name)[term_id].get("synonyms", []))
629        return synonyms

Fetch a list of synonym labels for a given ontology term. Returns empty list if no synonyms found. Raises ValueError if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_synonyms("CL:0000019")
['sperm cell', 'spermatozoid', 'spermatozoon']
Parameters
  • term_id: str ontology term to fetch synonyms for
Returns

List[str] synonyms for the term

def map_term_synonyms(self, term_ids: List[str]) -> Dict[str, List[str]]:
631    def map_term_synonyms(self, term_ids: List[str]) -> Dict[str, List[str]]:
632        """
633        Fetch the synonym labels for a given list of ontology terms. Raises ValueError if term ID is not valid member of
634        a supported ontology.
635
636        Example
637        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
638        >>> ontology_parser = OntologyParser()
639        >>> ontology_parser.map_term_synonyms(["CL:0000005", "CL:0000019"])
640        {'CL:0000005': ['fibroblast neural crest derived'], 'CL:0000019': ['sperm cell', 'spermatozoid', 'spermatozoon']}
641
642        :param term_ids: list of str ontology terms to fetch synonyms for
643        :return: Dict[str, List[str]] mapping term IDs to their respective synonym lists
644        """
645        return {term_id: self.get_term_synonyms(term_id) for term_id in term_ids}

Fetch the synonym labels for a given list of ontology terms. Raises ValueError if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.map_term_synonyms(["CL:0000005", "CL:0000019"])
{'CL:0000005': ['fibroblast neural crest derived'], 'CL:0000019': ['sperm cell', 'spermatozoid', 'spermatozoon']}
Parameters
  • term_ids: list of str ontology terms to fetch synonyms for
Returns

Dict[str, List[str]] mapping term IDs to their respective synonym lists

def get_term_id_by_label(self, term_label: str, ontology_name: str) -> Optional[str]:
647    def get_term_id_by_label(self, term_label: str, ontology_name: str) -> Optional[str]:
648        """
649        Fetch the ontology term ID from a given human-readable label. Filters by ontology_name. Raises ValueError if
650        ontology_name is not a supported ontology.
651
652        Returns None if term ID is not valid member of a supported ontology.
653
654        Example
655        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
656        >>> ontology_parser = OntologyParser()
657        >>> ontology_parser.get_term_id_by_label("neural crest derived fibroblast", "CL")
658        'CL:0000005'
659
660        :param term_label: str human-readable label to fetch term ID for
661        :param ontology_name: str name of ontology to search for term label in
662        :return: Optional[str] term IDs with that label, or None if the label is not found in the ontology
663        """
664        ontology_term_label_to_id_map = self.get_term_label_to_id_map(ontology_name)
665        return ontology_term_label_to_id_map.get(term_label)

Fetch the ontology term ID from a given human-readable label. Filters by ontology_name. Raises ValueError if ontology_name is not a supported ontology.

Returns None if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_id_by_label("neural crest derived fibroblast", "CL")
'CL:0000005'
Parameters
  • term_label: str human-readable label to fetch term ID for
  • ontology_name: str name of ontology to search for term label in
Returns

Optional[str] term IDs with that label, or None if the label is not found in the ontology

def get_bridge_term_id(self, term_id: str, cross_ontology: str) -> Optional[str]:
667    def get_bridge_term_id(self, term_id: str, cross_ontology: str) -> Optional[str]:
668        """
669        For a given term ID, fetch the equivalent term ID from a given ontology. Only returns exact match if it exists.
670
671        If no applicable match is found, returns None.
672
673        Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology.
674
675        Example
676        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
677        >>> ontology_parser = OntologyParser()
678        >>> ontology_parser.get_bridge_term_id("FBbt:00000001", "UBERON")
679        'UBERON:0000468'
680
681        :param term_id: str ontology term to find equivalent term for
682        :param cross_ontology: str name of ontology to search for equivalent term in
683        :return: Optional[str] equivalent term ID from the cross_ontology
684        """
685        if cross_ontology not in self.cxg_schema.cross_ontology_mappings:
686            raise ValueError(
687                f"{cross_ontology} is not in the set of supported cross ontology mappings "
688                f"{self.cxg_schema.cross_ontology_mappings}."
689            )
690        ontology_name = self._parse_ontology_name(term_id)
691        cross_ontology_terms = self.cxg_schema.ontology(ontology_name)[term_id].get("cross_ontology_terms")
692        bridge_term_id: Optional[str] = None
693        if cross_ontology_terms:
694            bridge_term_id = cross_ontology_terms.get(cross_ontology)
695        return bridge_term_id

For a given term ID, fetch the equivalent term ID from a given ontology. Only returns exact match if it exists.

If no applicable match is found, returns None.

Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_bridge_term_id("FBbt:00000001", "UBERON")
'UBERON:0000468'
Parameters
  • term_id: str ontology term to find equivalent term for
  • cross_ontology: str name of ontology to search for equivalent term in
Returns

Optional[str] equivalent term ID from the cross_ontology

def get_closest_bridge_term_ids(self, term_id: str, cross_ontology: str) -> List[str]:
697    def get_closest_bridge_term_ids(self, term_id: str, cross_ontology: str) -> List[str]:
698        """
699        For a given term ID, fetch the equivalent term ID from a given ontology. If match is found,
700        returns a list of 1 with the exact match. If no exact match is found, traverses the ancestors
701        of the term for the closest match.
702
703        If no applicable match is found, returns an empty list.
704
705        If multiple ancestors of the same distance have matches, returns all possible closest matches.
706
707        Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology.
708
709        Example
710        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
711        >>> ontology_parser = OntologyParser()
712        >>> ontology_parser.get_closest_bridge_term_ids("FBbt:00000039", "UBERON")
713        ['UBERON:0000476', 'UBERON:0000920']
714
715        :param term_id: str ontology term to find closest term for
716        :param cross_ontology: str name of ontology to search for closest term in
717        :return: List[str] list of closest term IDs from the cross_ontology
718        """
719        closest_bridge_terms: List[str] = []
720        terms_to_match = [term_id]
721        while terms_to_match and not closest_bridge_terms:
722            for term in terms_to_match:
723                if closest_bridge_term := self.get_bridge_term_id(term, cross_ontology):
724                    closest_bridge_terms.append(closest_bridge_term)
725            terms_to_match = [parent for child in terms_to_match for parent in self.get_term_parents(child)]
726        return closest_bridge_terms

For a given term ID, fetch the equivalent term ID from a given ontology. If match is found, returns a list of 1 with the exact match. If no exact match is found, traverses the ancestors of the term for the closest match.

If no applicable match is found, returns an empty list.

If multiple ancestors of the same distance have matches, returns all possible closest matches.

Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_closest_bridge_term_ids("FBbt:00000039", "UBERON")
['UBERON:0000476', 'UBERON:0000920']
Parameters
  • term_id: str ontology term to find closest term for
  • cross_ontology: str name of ontology to search for closest term in
Returns

List[str] list of closest term IDs from the cross_ontology