cellxgene_ontology_guide.ontology_parser

  1import re
  2from typing import Any, Dict, Iterable, List, Optional, Union
  3
  4from cellxgene_ontology_guide._constants import VALID_NON_ONTOLOGY_TERMS
  5from cellxgene_ontology_guide.entities import OntologyNode
  6from cellxgene_ontology_guide.supported_versions import CXGSchema
  7
  8
  9class OntologyParser:
 10    """
 11    An object to parse ontology term metadata from ontologies corresponding to a given CellxGene Schema Version.
 12    """
 13
 14    cxg_schema: CXGSchema
 15    """ CXGSchema object to fetch ontology metadata from """
 16
 17    def __init__(self, schema_version: Optional[str] = None):
 18        """
 19        Initialize an OntologyParser object with the ontology metadata corresponding to the given CellxGene schema
 20        version. If not cached, it will make a network call to GitHub Release Assets to load in memory and
 21        parse the corresponding ontology metadata.
 22
 23        :param schema_version: str version of the schema to load ontology metadata for. If not provided, the latest
 24        is loaded.
 25        """
 26        self.cxg_schema = CXGSchema(version=schema_version) if schema_version else CXGSchema()
 27        self.term_label_to_id_map: Dict[str, Dict[str, str]] = {
 28            ontology_name: dict() for ontology_name in self.cxg_schema.supported_ontologies
 29        }
 30
 31    def get_term_label_to_id_map(self, ontology_name: str) -> Dict[str, str]:
 32        """
 33        Fetch the mapping of term labels to term IDs for a given ontology. Caches generated maps by ontology_name.
 34
 35        Example
 36        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
 37        >>> ontology_parser = OntologyParser()
 38        >>> ontology_parser.get_term_label_to_id_map("CL") # doctest: +SKIP
 39        {'Label A': 'CL:0000000', ... }
 40
 41        :param ontology_name: str name of ontology to get map of term labels to term IDs
 42        """
 43        supported_ontology_name: Optional[str] = self._get_supported_ontology_name(ontology_name)
 44        if not supported_ontology_name:
 45            raise ValueError(f"{supported_ontology_name} is not a supported ontology, its metadata cannot be fetched.")
 46
 47        if self.term_label_to_id_map[supported_ontology_name]:
 48            return self.term_label_to_id_map[supported_ontology_name].copy()
 49
 50        for term_id, term_metadata in self.cxg_schema.ontology(supported_ontology_name).items():
 51            self.term_label_to_id_map[supported_ontology_name][term_metadata["label"]] = term_id
 52
 53        return self.term_label_to_id_map[supported_ontology_name].copy()
 54
 55    def _parse_ontology_name(self, term_id: str) -> str:
 56        """
 57        Parse the ontology name from a given term ID. If the term ID does not conform to the expected term format or
 58        is not from an ontology supported by cellxgene-ontology-guide, raise a ValueError.
 59
 60        :param term_id: str ontology term to parse
 61        :return: str name of ontology that term belongs to
 62        """
 63        # use names groups
 64        patterns = [r"([A-Za-z]+):[A-Za-z0-9]+", r"([A-Za-z]+)_[A-Za-z0-9]+"]
 65        pattern = "|".join(patterns)
 66        match = re.match(pattern, term_id)
 67        if not match:
 68            raise ValueError(f"{term_id} does not conform to expected regex pattern {pattern} and cannot be queried.")
 69
 70        ontology_term_prefix = match.group(1) or match.group(2)
 71        ontology_name: Optional[str] = self._get_supported_ontology_name(ontology_term_prefix)
 72        if not ontology_name:
 73            raise ValueError(f"{term_id} is not part of a supported ontology, its metadata cannot be fetched.")
 74
 75        id_separator = self.cxg_schema.supported_ontologies.get(ontology_name).get("id_separator", ":")
 76        if id_separator not in term_id:
 77            raise ValueError(f"{term_id} does not conform to expected format for {ontology_term_prefix} terms.")
 78        return ontology_name
 79
 80    def _get_supported_ontology_name(self, ontology_term_prefix: str) -> Optional[str]:
 81        """
 82        Get the source ontology name for a given ontology term prefix, if it is supported by the CxG schema.
 83
 84        If ontology_term_prefix is directly supported by the CxG schema, returns ontology_term_prefix.
 85        If ontology_term_prefix is supported as an import from another ontology, returns the name of the source ontology
 86        it is imported in.
 87        Otherwise, returns None.
 88
 89        :param ontology_term_prefix: str ontology term prefix to check
 90        :return: str name of ontology that term belongs to, or None if it is not directly supported nor imported in
 91        a supported ontology in the CxG schema.
 92        """
 93        if ontology_term_prefix in self.cxg_schema.supported_ontologies:
 94            return ontology_term_prefix
 95        # Case-insensitive lookup (e.g. "uniprot" prefix matches "UniProt" key)
 96        lower_prefix = ontology_term_prefix.lower()
 97        for key in self.cxg_schema.supported_ontologies:
 98            if key.lower() == lower_prefix:
 99                return str(key)
100        supported_ontology_name: Optional[str] = self.cxg_schema.imported_ontologies.get(ontology_term_prefix)
101        return supported_ontology_name
102
103    def is_valid_term_id(self, term_id: str, ontology: Optional[str] = None) -> bool:
104        """
105        Check if an ontology term ID is valid and defined in a supported ontology. If deprecated but defined
106        in the ontology, it is considered valid. Optionally, specify an ontology to check against, and determine
107        if the term is defined in that particular ontology. Otherwise, checks if term is valid in any supported ontology
108
109        :param term_id: str ontology term to check
110        :param ontology: str name of ontology to check against
111        :return: boolean flag indicating whether the term is supported
112        """
113        try:
114            ontology_name = self._parse_ontology_name(term_id)
115            if ontology and ontology_name != ontology:
116                return False
117            if term_id in self.cxg_schema.ontology(ontology_name):
118                return True
119        except ValueError:
120            return False
121        return False
122
123    def get_term_ancestors(self, term_id: str, include_self: bool = False) -> List[str]:
124        """
125        Get the ancestor ontology terms for a given term. If include_self is True, the term itself will be included as
126        an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.
127
128        Example
129        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
130        >>> ontology_parser = OntologyParser()
131        >>> ontology_parser.get_term_ancestors("CL:0000005") # doctest: +SKIP
132        ['CL:0000000', 'CL:0000057', ...
133
134        :param term_id: str ontology term to find ancestors for
135        :param include_self: boolean flag to include the term itself as an ancestor
136        :return: flattened List[str] of ancestor terms
137        """
138        if term_id in VALID_NON_ONTOLOGY_TERMS:
139            return []
140        ontology_name = self._parse_ontology_name(term_id)
141        ancestors = list(self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"].keys())
142        return ancestors + [term_id] if include_self else ancestors
143
144    def map_term_ancestors(self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]:
145        """
146        Get the ancestor ontology terms for each term in a list. If include_self is True, the term itself will be
147        included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.
148
149        Example
150        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
151        >>> ontology_parser = OntologyParser()
152        >>> ontology_parser.map_term_ancestors(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP
153        {
154            'CL:0000003': ['CL:0000003'],
155            'CL:0000005': ['CL:0000005', 'CL:0000000', ...]
156        }
157
158        :param term_ids: list of str ontology terms to find ancestors for
159        :param include_self: boolean flag to include the term itself as an ancestor
160        :return: Dictionary mapping str term IDs to their respective flattened List[str] of ancestor terms. Maps to
161        empty
162        list if there are no ancestors.
163        """
164        return {term_id: self.get_term_ancestors(term_id, include_self) for term_id in term_ids}
165
166    def get_term_ancestors_with_distances(self, term_id: str, include_self: bool = False) -> Dict[str, int]:
167        """
168        Get the ancestor ontology terms for a given term, and their distance from the term_id. If include_self is True,
169        the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a
170        supported ontology.
171
172        Example
173        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
174        >>> ontology_parser = OntologyParser()
175        >>> ontology_parser.get_term_ancestors_with_distances("CL:0000005") # doctest: +SKIP
176        {'CL:0000057': 1, 'CL:0002320': 2, 'CL:0000000': 3}
177
178        :param term_id: str ontology term to find ancestors for
179        :param include_self: boolean flag to include the term itself as an ancestor
180        :return: Dict[str, int] map of ancestor terms and their respective distances from the term_id
181        """
182        if term_id in VALID_NON_ONTOLOGY_TERMS:
183            return {}
184        ontology_name = self._parse_ontology_name(term_id)
185        ancestors: Dict[str, int] = self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"].copy()
186        return ancestors | {term_id: 0} if include_self else ancestors
187
188    def map_term_ancestors_with_distances(
189        self, term_ids: Iterable[str], include_self: bool = False
190    ) -> Dict[str, Dict[str, int]]:
191        """
192        Get the ancestor ontology terms for each term in a list, and their distance from the term_id. If include_self is
193        True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a
194        supported ontology.
195
196        Example
197        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
198        >>> ontology_parser = OntologyParser()
199        >>> ontology_parser.map_term_ancestors_with_distances(["CL:0000003", "CL:0000005"], include_self=True)
200        {'CL:0000003': {'CL:0000003': 0}, 'CL:0000005': {'CL:0000057': 1, 'CL:0000499': 2, 'CL:0002320': 3, 'CL:0000255': 4, 'CL:0000000': 5, 'CL:0000005': 0}}
201
202        :param term_ids: list of str ontology terms to find ancestors for
203        :param include_self: boolean flag to include the term itself as an ancestor
204        :return: Dictionary mapping str term IDs to their respective Dict[str, int] map of ancestor terms and their
205        respective distances from the term_id
206        """
207        return {term_id: self.get_term_ancestors_with_distances(term_id, include_self) for term_id in term_ids}
208
209    def get_term_parents(self, term_id: str) -> List[str]:
210        """
211        Get the direct parent ontology terms for a given term. Raises ValueError if the term ID is not valid member of
212        a supported ontology.
213
214        Example
215        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
216        >>> ontology_parser = OntologyParser()
217        >>> ontology_parser.get_term_parents("CL:0000101")
218        ['CL:0000526']
219
220        :param term_id: str ontology term to find parents for
221        :return: List[str] of parent terms
222        """
223        if term_id in VALID_NON_ONTOLOGY_TERMS:
224            return []
225        ontology_name = self._parse_ontology_name(term_id)
226        ancestors: Dict[str, int] = self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"]
227        parents: List[str] = [ancestor for ancestor, distance in ancestors.items() if distance == 1]
228        return parents
229
230    def get_distance_between_terms(self, term_id_1: str, term_id_2: str) -> int:
231        """
232        Get the distance between two ontology terms. The distance is defined as the number of edges between the
233        two terms. Terms must be from the same ontology. Returns -1 if terms are disjoint.
234        Raises ValueError if term IDs are not valid members of a supported ontology.
235
236        :param term_id_1: str ontology term to find distance for
237        :param term_id_2: str ontology term to find distance for
238        :return: int distance between the two terms, measured in number of edges between their shortest path.
239        """
240        lcas = self.get_lowest_common_ancestors(term_id_1, term_id_2)
241        if not lcas:
242            return -1
243        return int(
244            self.get_term_ancestors_with_distances(term_id_1, include_self=True)[lcas[0]]
245            + self.get_term_ancestors_with_distances(term_id_2, include_self=True)[lcas[0]]
246        )
247
248    def get_lowest_common_ancestors(self, term_id_1: str, term_id_2: str) -> List[str]:
249        """
250        Get the lowest common ancestors between two ontology terms that is from the given ontology.
251        Terms must be from the same ontology. Ontologies are DAGs, so there may be multiple lowest common ancestors.
252        Raises ValueError if term IDs are not valid members of a supported ontology.
253
254        :param term_id_1: str ontology term to find LCA for
255        :param term_id_2: str ontology term to find LCA for
256        :return: str term ID of the lowest common ancestor term
257        """
258        # include path to term itself
259        ontology = self._parse_ontology_name(term_id_1)
260        if ontology != self._parse_ontology_name(term_id_2):
261            return []
262        ancestors_1 = self.get_term_ancestors_with_distances(term_id_1, include_self=True)
263        ancestors_2 = self.get_term_ancestors_with_distances(term_id_2, include_self=True)
264        common_ancestors = set(ancestors_1.keys()) & set(ancestors_2.keys())
265        min_sum_distances = float("inf")
266        for ancestors in common_ancestors:
267            sum_distances = ancestors_1[ancestors] + ancestors_2[ancestors]
268            if sum_distances < min_sum_distances:
269                min_sum_distances = sum_distances
270        return [
271            ancestor
272            for ancestor in common_ancestors
273            if ancestors_1[ancestor] + ancestors_2[ancestor] == min_sum_distances
274        ]
275
276    def get_high_level_terms(self, term_id: str, high_level_terms: List[str]) -> List[str]:
277        """
278        Get the high-level ontology terms for a given term. High-level terms are defined as the ancestors of the term
279        that are part of the high-level ontology terms supported by cellxgene-ontology-guide. If more than 1
280        high_level_term is matched, the returned list of matches preserves the order of the input high_level_terms list.
281        Raises ValueError if term ID is not valid member of a supported ontology.
282
283        Example
284        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
285        >>> ontology_parser = OntologyParser()
286        >>> ontology_parser.get_high_level_terms("CL:0000005", ["CL:0000000", "CL:0000001"])
287        ['CL:0000000']
288
289        :param term_id: str ontology term to find high-level terms for
290        :param high_level_terms: list of str ontology terms to check for ancestry to term_id
291        :return: List[str] of high-level terms that the term is a descendant of
292        """
293        if term_id in VALID_NON_ONTOLOGY_TERMS:
294            return []
295        ancestors = self.get_term_ancestors(term_id, include_self=True)
296        return [high_level_term for high_level_term in high_level_terms if high_level_term in ancestors]
297
298    def map_high_level_terms(self, term_ids: List[str], high_level_terms: List[str]) -> Dict[str, List[str]]:
299        """
300        Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with
301        format
302
303        {"CL:0000003": ["CL:0000000", ...], "CL:0000005": ["CL:0000000", ...]}
304
305        Where each term_id is mapped to a List[str] of high-level terms that it is a descendant of. Includes self
306        as a descendant. Raises ValueError if term ID is not valid member of a supported ontology.
307
308        :param term_ids: list of str ontology terms to map high level terms for
309        :param high_level_terms: list of str ontology terms to be mapped to descendant term_ids
310        :return: Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list.
311        Each key maps to empty list if there are no ancestors among the provided input.
312        """
313        return {term_id: self.get_high_level_terms(term_id, high_level_terms) for term_id in term_ids}
314
315    def get_highest_level_term(self, term_id: str, high_level_terms: List[str]) -> Union[str, None]:
316        """
317        Get the highest level ontology term for a given term. The highest level term is defined as the ancestor of the
318        term that is part of the high-level ontology terms supported by cellxgene-ontology-guide. Raises ValueError
319        if term ID is not valid member of a supported ontology.
320
321        Example
322        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
323        >>> ontology_parser = OntologyParser()
324        >>> ontology_parser.get_highest_level_term("CL:0000005", ["CL:0000000", "CL:0000001"])
325        'CL:0000000'
326
327        :param term_id: str ontology term to find highest level term for
328        :param high_level_terms: list of str ontology terms to check for ancestry to term_id
329        :return: str highest level term that the term is a descendant of, or None if it is not a descendant of any
330        high-level terms
331        """
332        high_level_terms = self.get_high_level_terms(term_id, high_level_terms)
333        term_ancestors_and_distances = self.get_term_ancestors_with_distances(term_id, include_self=True)
334        if not high_level_terms:
335            return None
336        return max(high_level_terms, key=lambda high_level_term: term_ancestors_and_distances[high_level_term])
337
338    def map_highest_level_term(self, term_ids: List[str], high_level_terms: List[str]) -> Dict[str, Union[str, None]]:
339        """
340        Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with
341        format
342
343        {"CL:0000003": "CL:0000000", "CL:0000005": "CL:0000000"}
344
345        Where each term_id is mapped to the highest level term that it is a descendant of, from the list provided.
346        Includes term itself as a descendant. Maps to None if term_id does not map to any high level terms among the
347        provided input. Raises ValueError if term ID is not valid member of a supported ontology.
348
349        :param term_ids: list of str ontology terms to map high level terms for
350        :param high_level_terms: list of str ontology terms that can be mapped to descendant term_ids
351        :return: Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list.
352        Each key maps to empty list if there are no ancestors among the provided input.
353        """
354        return {term_id: self.get_highest_level_term(term_id, high_level_terms) for term_id in term_ids}
355
356    def get_term_descendants(self, term_id: str, include_self: bool = False) -> List[str]:
357        """
358        Get the descendant ontology terms for a given term. If include_self is True, the term itself will be included as
359        a descendant. Raises ValueError if term ID is not valid member of a supported ontology.
360
361        Example
362        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
363        >>> ontology_parser = OntologyParser()
364        >>> ontology_parser.get_term_descendants("CL:0000005") # doctest: +SKIP
365        ['CL:0002363']
366
367        :param term_id: str ontology term to find descendants for
368        :param include_self: boolean flag to include the term itself as a descendant
369        :return: List[str] of descendant terms
370        """
371        if term_id in VALID_NON_ONTOLOGY_TERMS:
372            return []
373        ontology_name = self._parse_ontology_name(term_id)
374        descendants = [term_id] if include_self else []
375        for candidate_descendant, candidate_metadata in self.cxg_schema.ontology(ontology_name).items():
376            ancestors = candidate_metadata["ancestors"].keys()
377            if term_id in ancestors:
378                descendants.append(candidate_descendant)
379        return descendants
380
381    def map_term_descendants(self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]:
382        """
383        Get the descendant ontology terms for each term in a list. If include_self is True, the term itself will be
384         included as a descendant. Raises ValueError if term ID is not valid member of a supported ontology.
385
386        Example
387        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
388        >>> ontology_parser = OntologyParser()
389        >>> ontology_parser.map_term_descendants(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP
390        {
391            'CL:0000003': ['CL:0000003', ...],
392            'CL:0000005': ['CL:0000005', 'CL:0002363', ...]
393        }
394
395        :param term_ids: list of str ontology terms to find descendants for
396        :param include_self: boolean flag to include the term itself as an descendant
397        :return: Dictionary mapping str term IDs to their respective flattened List[str] of descendant terms. Maps to
398        empty list if there are no descendants.
399        """
400        descendants_dict: Dict[str, List[str]] = dict()
401        ontology_names = set()
402        for term_id in term_ids:
403            if term_id in VALID_NON_ONTOLOGY_TERMS:
404                descendants_dict[term_id] = []
405                continue
406            ontology_name = self._parse_ontology_name(term_id)
407            descendants_dict[term_id] = [term_id] if include_self else []
408            ontology_names.add(ontology_name)
409
410        for ontology in ontology_names:
411            for candidate_descendant, candidate_metadata in self.cxg_schema.ontology(ontology).items():
412                for ancestor_id in descendants_dict:
413                    ancestors = candidate_metadata["ancestors"].keys()
414                    if ancestor_id in ancestors:
415                        descendants_dict[ancestor_id].append(candidate_descendant)
416
417        return descendants_dict
418
419    def get_term_children(self, term_id: str) -> List[str]:
420        """
421        Get the direct children ontology terms for a given term. Raises ValueError if term ID is not valid member of a
422        supported ontology.
423
424        Example
425        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
426        >>> ontology_parser = OntologyParser()
427        >>> ontology_parser.get_term_children("CL:0000526")
428        ['CL:0000101', 'CL:4042034']
429
430        :param term_id: str ontology term to find children for
431        :return: List[str] of children terms
432        """
433        if term_id in VALID_NON_ONTOLOGY_TERMS:
434            return []
435        ontology_name = self._parse_ontology_name(term_id)
436        children = []
437        for candidate_child, candidate_metadata in self.cxg_schema.ontology(ontology_name).items():
438            for ancestor, distance in candidate_metadata["ancestors"].items():
439                if ancestor == term_id and distance == 1:
440                    children.append(candidate_child)
441        return children
442
443    def get_term_graph(self, term_id: str) -> OntologyNode:
444        """
445        Get the DAG of OntologyNode relationships, with the input term as the root node. Only includes terms from the
446        same ontology as the root term ID. Raises ValueError if term ID is not valid member of a supported ontology.
447
448        Example
449        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
450        >>> ontology_parser = OntologyParser()
451        >>> root_node = ontology_parser.get_term_graph("CL:0000000")
452        >>> root_node.term_id
453        'CL:0000000'
454        >>> root_node.to_dict() # doctest: +SKIP
455        {
456            "term_id": "CL:0000000",
457            "name": "cell A",
458            "children": [
459                {
460                    "term_id": "CL:0000001",
461                    "name": "cell B",
462                    "children": [...],
463                },
464                {
465                    "term_id": "CL:0000002",
466                    "name": "cell C",
467                    "children": [...],
468                },
469                ...
470            ]
471        }
472        >>> root_node.term_counter # doctest: +SKIP
473        Counter({'CL:0002058': 48, 'CL:0002471': 48, ...
474
475        :param term_id: str ontology term to build subtree for
476        :return: OntologyNode representation of graph with term_id as root.
477        """
478        term_label = self.get_term_label(term_id)
479        root = OntologyNode(term_id, term_label)
480        for child_term_id in self.get_term_children(term_id):
481            root.add_child(self.get_term_graph(child_term_id))
482        return root
483
484    def is_term_deprecated(self, term_id: str) -> bool:
485        """
486        Check if an ontology term is deprecated. Raises ValueError if term ID is not valid member of a supported
487        ontology.
488
489        Example
490        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
491        >>> ontology_parser = OntologyParser()
492        >>> ontology_parser.is_term_deprecated("CL:0000003")
493        True
494
495        :param term_id: str ontology term to check for deprecation
496        :return: boolean flag indicating whether the term is deprecated
497        """
498        if term_id in VALID_NON_ONTOLOGY_TERMS:
499            return False
500        ontology_name = self._parse_ontology_name(term_id)
501        is_deprecated: bool = self.cxg_schema.ontology(ontology_name)[term_id].get("deprecated")
502        return is_deprecated
503
504    def get_term_replacement(self, term_id: str) -> Union[str, None]:
505        """
506        Fetch the replacement term for a deprecated ontology term, if a replacement exists. Return None otherwise.
507        Raises ValueError if term ID is not valid member of a supported ontology.
508
509        Example
510        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
511        >>> ontology_parser = OntologyParser()
512        >>> ontology_parser.get_term_replacement("CL:0000003")
513        'CL:0000000'
514
515        :param term_id: str ontology term to check a replacement term for
516        :return: replacement str term ID if it exists, None otherwise
517        """
518        if term_id in VALID_NON_ONTOLOGY_TERMS:
519            return None
520        ontology_name = self._parse_ontology_name(term_id)
521        replaced_by: str = self.cxg_schema.ontology(ontology_name)[term_id].get("replaced_by")
522        return replaced_by if replaced_by else None
523
524    def get_term_metadata(self, term_id: str) -> Dict[str, Any]:
525        """
526        Fetch metadata for a given ontology term. Returns a dict with format
527
528        {"comments": ["...", ...], "term_tracker": "...", "consider": ["...", ...]}
529
530        Comments maps to List[str] of ontology curator comments
531        Term Tracker maps to a str url where there is discussion around this term's curation (or deprecation).
532        Consider maps to List[str] of alternate ontology terms to consider using instead of this term
533
534        All keys map to None if no metadata of that type is present. Raises ValueError if term ID is not valid member
535        of a supported ontology.
536
537        :param term_id: str ontology term to fetch metadata for
538        :return: Dict with keys 'Comments', 'Term Tracker', and 'Consider' containing associated metadata.
539        """
540        if term_id in VALID_NON_ONTOLOGY_TERMS:
541            return {"comments": None, "term_tracker": None, "consider": None}
542        ontology_name = self._parse_ontology_name(term_id)
543        return {
544            key: self.cxg_schema.ontology(ontology_name)[term_id].get(key, None)
545            for key in {"comments", "term_tracker", "consider"}
546        }
547
548    def get_term_label(self, term_id: str) -> str:
549        """
550        Fetch the human-readable label for a given ontology term. Raises ValueError if term ID is not valid member of a
551        supported ontology.
552
553        Example
554        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
555        >>> ontology_parser = OntologyParser()
556        >>> ontology_parser.get_term_label("CL:0000005")
557        'neural crest derived fibroblast'
558
559        :param term_id: str ontology term to fetch label for
560        :return: str human-readable label for the term
561        """
562        if term_id in VALID_NON_ONTOLOGY_TERMS:
563            return term_id
564        ontology_name = self._parse_ontology_name(term_id)
565        label: str = self.cxg_schema.ontology(ontology_name)[term_id]["label"]
566        return label
567
568    def map_term_labels(self, term_ids: Iterable[str]) -> Dict[str, str]:
569        """
570        Fetch the human-readable label for a given list of ontology terms. Raises ValueError if term ID is not valid
571        member of a supported ontology.
572
573        Example
574        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
575        >>> ontology_parser = OntologyParser()
576        >>> ontology_parser.map_term_labels(["CL:0000005", "CL:0000003"])
577        {'CL:0000005': 'neural crest derived fibroblast', 'CL:0000003': 'obsolete native cell'}
578
579        :param term_ids: list of str ontology terms to fetch label for
580        :return: Dict[str, str] mapping term IDs to their respective human-readable labels
581        """
582        return {term_id: self.get_term_label(term_id) for term_id in term_ids}
583
584    def get_term_description(self, term_id: str) -> Optional[str]:
585        """
586        Fetch the description for a given ontology term. Raises ValueError if term ID is not valid member of a
587        supported ontology.
588
589        Example
590        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
591        >>> ontology_parser = OntologyParser()
592        >>> ontology_parser.get_term_description("CL:0000005")
593        'Any fibroblast that is derived from the neural crest.'
594
595        :param term_id: str ontology term to fetch description for
596        :return: str description for the term
597        """
598        if term_id in VALID_NON_ONTOLOGY_TERMS:
599            return term_id
600        ontology_name = self._parse_ontology_name(term_id)
601        description: Optional[str] = self.cxg_schema.ontology(ontology_name)[term_id].get("description", None)
602        return description
603
604    def map_term_descriptions(self, term_ids: List[str]) -> Dict[str, Optional[str]]:
605        """
606        Fetch the descriptions for a given list of ontology terms. Raises ValueError if term ID is not valid member of
607        a supported ontology.
608
609        Example
610        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
611        >>> ontology_parser = OntologyParser()
612        >>> ontology_parser.map_term_descriptions(["CL:0000005", "CL:0000146"])
613        {'CL:0000005': 'Any fibroblast that is derived from the neural crest.', 'CL:0000146': None}
614
615        :param term_ids: list of str ontology terms to fetch descriptions for
616        :return: Dict[str, str] mapping term IDs to their respective descriptions
617        """
618        return {term_id: self.get_term_description(term_id) for term_id in term_ids}
619
620    def get_term_synonyms(self, term_id: str) -> List[str]:
621        """
622        Fetch a list of synonym labels for a given ontology term. Returns empty list if no synonyms found.
623        Raises ValueError if term ID is not valid member of a supported ontology.
624
625        Example
626        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
627        >>> ontology_parser = OntologyParser()
628        >>> ontology_parser.get_term_synonyms("CL:0000019")
629        ['sperm cell', 'spermatozoid', 'spermatozoon']
630
631        :param term_id: str ontology term to fetch synonyms for
632        :return: List[str] synonyms for the term
633        """
634        if term_id in VALID_NON_ONTOLOGY_TERMS:
635            return []
636        ontology_name = self._parse_ontology_name(term_id)
637        synonyms: List[str] = list(self.cxg_schema.ontology(ontology_name)[term_id].get("synonyms", []))
638        return synonyms
639
640    def map_term_synonyms(self, term_ids: List[str]) -> Dict[str, List[str]]:
641        """
642        Fetch the synonym labels for a given list of ontology terms. Raises ValueError if term ID is not valid member of
643        a supported ontology.
644
645        Example
646        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
647        >>> ontology_parser = OntologyParser()
648        >>> ontology_parser.map_term_synonyms(["CL:0000005", "CL:0000019"])
649        {'CL:0000005': ['fibroblast neural crest derived'], 'CL:0000019': ['sperm cell', 'spermatozoid', 'spermatozoon']}
650
651        :param term_ids: list of str ontology terms to fetch synonyms for
652        :return: Dict[str, List[str]] mapping term IDs to their respective synonym lists
653        """
654        return {term_id: self.get_term_synonyms(term_id) for term_id in term_ids}
655
656    def get_term_id_by_label(self, term_label: str, ontology_name: str) -> Optional[str]:
657        """
658        Fetch the ontology term ID from a given human-readable label. Filters by ontology_name. Raises ValueError if
659        ontology_name is not a supported ontology.
660
661        Returns None if term ID is not valid member of a supported ontology.
662
663        Example
664        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
665        >>> ontology_parser = OntologyParser()
666        >>> ontology_parser.get_term_id_by_label("neural crest derived fibroblast", "CL")
667        'CL:0000005'
668
669        :param term_label: str human-readable label to fetch term ID for
670        :param ontology_name: str name of ontology to search for term label in
671        :return: Optional[str] term IDs with that label, or None if the label is not found in the ontology
672        """
673        ontology_term_label_to_id_map = self.get_term_label_to_id_map(ontology_name)
674        return ontology_term_label_to_id_map.get(term_label)
675
676    def get_bridge_term_id(self, term_id: str, cross_ontology: str) -> Optional[str]:
677        """
678        For a given term ID, fetch the equivalent term ID from a given ontology. Only returns exact match if it exists.
679
680        If no applicable match is found, returns None.
681
682        Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology.
683
684        Example
685        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
686        >>> ontology_parser = OntologyParser()
687        >>> ontology_parser.get_bridge_term_id("FBbt:00000001", "UBERON")
688        'UBERON:0000468'
689
690        :param term_id: str ontology term to find equivalent term for
691        :param cross_ontology: str name of ontology to search for equivalent term in
692        :return: Optional[str] equivalent term ID from the cross_ontology
693        """
694        if cross_ontology not in self.cxg_schema.cross_ontology_mappings:
695            raise ValueError(
696                f"{cross_ontology} is not in the set of supported cross ontology mappings "
697                f"{self.cxg_schema.cross_ontology_mappings}."
698            )
699        ontology_name = self._parse_ontology_name(term_id)
700        cross_ontology_terms = self.cxg_schema.ontology(ontology_name)[term_id].get("cross_ontology_terms")
701        bridge_term_id: Optional[str] = None
702        if cross_ontology_terms:
703            bridge_term_id = cross_ontology_terms.get(cross_ontology)
704        return bridge_term_id
705
706    def get_closest_bridge_term_ids(self, term_id: str, cross_ontology: str) -> List[str]:
707        """
708        For a given term ID, fetch the equivalent term ID from a given ontology. If match is found,
709        returns a list of 1 with the exact match. If no exact match is found, traverses the ancestors
710        of the term for the closest match.
711
712        If no applicable match is found, returns an empty list.
713
714        If multiple ancestors of the same distance have matches, returns all possible closest matches.
715
716        Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology.
717
718        Example
719        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
720        >>> ontology_parser = OntologyParser()
721        >>> ontology_parser.get_closest_bridge_term_ids("FBbt:00000039", "UBERON")
722        ['UBERON:0000476', 'UBERON:0000920']
723
724        :param term_id: str ontology term to find closest term for
725        :param cross_ontology: str name of ontology to search for closest term in
726        :return: List[str] list of closest term IDs from the cross_ontology
727        """
728        closest_bridge_terms: List[str] = []
729        terms_to_match = [term_id]
730        while terms_to_match and not closest_bridge_terms:
731            for term in terms_to_match:
732                if closest_bridge_term := self.get_bridge_term_id(term, cross_ontology):
733                    closest_bridge_terms.append(closest_bridge_term)
734            terms_to_match = [parent for child in terms_to_match for parent in self.get_term_parents(child)]
735        return closest_bridge_terms
class OntologyParser:
 10class OntologyParser:
 11    """
 12    An object to parse ontology term metadata from ontologies corresponding to a given CellxGene Schema Version.
 13    """
 14
 15    cxg_schema: CXGSchema
 16    """ CXGSchema object to fetch ontology metadata from """
 17
 18    def __init__(self, schema_version: Optional[str] = None):
 19        """
 20        Initialize an OntologyParser object with the ontology metadata corresponding to the given CellxGene schema
 21        version. If not cached, it will make a network call to GitHub Release Assets to load in memory and
 22        parse the corresponding ontology metadata.
 23
 24        :param schema_version: str version of the schema to load ontology metadata for. If not provided, the latest
 25        is loaded.
 26        """
 27        self.cxg_schema = CXGSchema(version=schema_version) if schema_version else CXGSchema()
 28        self.term_label_to_id_map: Dict[str, Dict[str, str]] = {
 29            ontology_name: dict() for ontology_name in self.cxg_schema.supported_ontologies
 30        }
 31
 32    def get_term_label_to_id_map(self, ontology_name: str) -> Dict[str, str]:
 33        """
 34        Fetch the mapping of term labels to term IDs for a given ontology. Caches generated maps by ontology_name.
 35
 36        Example
 37        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
 38        >>> ontology_parser = OntologyParser()
 39        >>> ontology_parser.get_term_label_to_id_map("CL") # doctest: +SKIP
 40        {'Label A': 'CL:0000000', ... }
 41
 42        :param ontology_name: str name of ontology to get map of term labels to term IDs
 43        """
 44        supported_ontology_name: Optional[str] = self._get_supported_ontology_name(ontology_name)
 45        if not supported_ontology_name:
 46            raise ValueError(f"{supported_ontology_name} is not a supported ontology, its metadata cannot be fetched.")
 47
 48        if self.term_label_to_id_map[supported_ontology_name]:
 49            return self.term_label_to_id_map[supported_ontology_name].copy()
 50
 51        for term_id, term_metadata in self.cxg_schema.ontology(supported_ontology_name).items():
 52            self.term_label_to_id_map[supported_ontology_name][term_metadata["label"]] = term_id
 53
 54        return self.term_label_to_id_map[supported_ontology_name].copy()
 55
 56    def _parse_ontology_name(self, term_id: str) -> str:
 57        """
 58        Parse the ontology name from a given term ID. If the term ID does not conform to the expected term format or
 59        is not from an ontology supported by cellxgene-ontology-guide, raise a ValueError.
 60
 61        :param term_id: str ontology term to parse
 62        :return: str name of ontology that term belongs to
 63        """
 64        # use names groups
 65        patterns = [r"([A-Za-z]+):[A-Za-z0-9]+", r"([A-Za-z]+)_[A-Za-z0-9]+"]
 66        pattern = "|".join(patterns)
 67        match = re.match(pattern, term_id)
 68        if not match:
 69            raise ValueError(f"{term_id} does not conform to expected regex pattern {pattern} and cannot be queried.")
 70
 71        ontology_term_prefix = match.group(1) or match.group(2)
 72        ontology_name: Optional[str] = self._get_supported_ontology_name(ontology_term_prefix)
 73        if not ontology_name:
 74            raise ValueError(f"{term_id} is not part of a supported ontology, its metadata cannot be fetched.")
 75
 76        id_separator = self.cxg_schema.supported_ontologies.get(ontology_name).get("id_separator", ":")
 77        if id_separator not in term_id:
 78            raise ValueError(f"{term_id} does not conform to expected format for {ontology_term_prefix} terms.")
 79        return ontology_name
 80
 81    def _get_supported_ontology_name(self, ontology_term_prefix: str) -> Optional[str]:
 82        """
 83        Get the source ontology name for a given ontology term prefix, if it is supported by the CxG schema.
 84
 85        If ontology_term_prefix is directly supported by the CxG schema, returns ontology_term_prefix.
 86        If ontology_term_prefix is supported as an import from another ontology, returns the name of the source ontology
 87        it is imported in.
 88        Otherwise, returns None.
 89
 90        :param ontology_term_prefix: str ontology term prefix to check
 91        :return: str name of ontology that term belongs to, or None if it is not directly supported nor imported in
 92        a supported ontology in the CxG schema.
 93        """
 94        if ontology_term_prefix in self.cxg_schema.supported_ontologies:
 95            return ontology_term_prefix
 96        # Case-insensitive lookup (e.g. "uniprot" prefix matches "UniProt" key)
 97        lower_prefix = ontology_term_prefix.lower()
 98        for key in self.cxg_schema.supported_ontologies:
 99            if key.lower() == lower_prefix:
100                return str(key)
101        supported_ontology_name: Optional[str] = self.cxg_schema.imported_ontologies.get(ontology_term_prefix)
102        return supported_ontology_name
103
104    def is_valid_term_id(self, term_id: str, ontology: Optional[str] = None) -> bool:
105        """
106        Check if an ontology term ID is valid and defined in a supported ontology. If deprecated but defined
107        in the ontology, it is considered valid. Optionally, specify an ontology to check against, and determine
108        if the term is defined in that particular ontology. Otherwise, checks if term is valid in any supported ontology
109
110        :param term_id: str ontology term to check
111        :param ontology: str name of ontology to check against
112        :return: boolean flag indicating whether the term is supported
113        """
114        try:
115            ontology_name = self._parse_ontology_name(term_id)
116            if ontology and ontology_name != ontology:
117                return False
118            if term_id in self.cxg_schema.ontology(ontology_name):
119                return True
120        except ValueError:
121            return False
122        return False
123
124    def get_term_ancestors(self, term_id: str, include_self: bool = False) -> List[str]:
125        """
126        Get the ancestor ontology terms for a given term. If include_self is True, the term itself will be included as
127        an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.
128
129        Example
130        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
131        >>> ontology_parser = OntologyParser()
132        >>> ontology_parser.get_term_ancestors("CL:0000005") # doctest: +SKIP
133        ['CL:0000000', 'CL:0000057', ...
134
135        :param term_id: str ontology term to find ancestors for
136        :param include_self: boolean flag to include the term itself as an ancestor
137        :return: flattened List[str] of ancestor terms
138        """
139        if term_id in VALID_NON_ONTOLOGY_TERMS:
140            return []
141        ontology_name = self._parse_ontology_name(term_id)
142        ancestors = list(self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"].keys())
143        return ancestors + [term_id] if include_self else ancestors
144
145    def map_term_ancestors(self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]:
146        """
147        Get the ancestor ontology terms for each term in a list. If include_self is True, the term itself will be
148        included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.
149
150        Example
151        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
152        >>> ontology_parser = OntologyParser()
153        >>> ontology_parser.map_term_ancestors(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP
154        {
155            'CL:0000003': ['CL:0000003'],
156            'CL:0000005': ['CL:0000005', 'CL:0000000', ...]
157        }
158
159        :param term_ids: list of str ontology terms to find ancestors for
160        :param include_self: boolean flag to include the term itself as an ancestor
161        :return: Dictionary mapping str term IDs to their respective flattened List[str] of ancestor terms. Maps to
162        empty
163        list if there are no ancestors.
164        """
165        return {term_id: self.get_term_ancestors(term_id, include_self) for term_id in term_ids}
166
167    def get_term_ancestors_with_distances(self, term_id: str, include_self: bool = False) -> Dict[str, int]:
168        """
169        Get the ancestor ontology terms for a given term, and their distance from the term_id. If include_self is True,
170        the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a
171        supported ontology.
172
173        Example
174        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
175        >>> ontology_parser = OntologyParser()
176        >>> ontology_parser.get_term_ancestors_with_distances("CL:0000005") # doctest: +SKIP
177        {'CL:0000057': 1, 'CL:0002320': 2, 'CL:0000000': 3}
178
179        :param term_id: str ontology term to find ancestors for
180        :param include_self: boolean flag to include the term itself as an ancestor
181        :return: Dict[str, int] map of ancestor terms and their respective distances from the term_id
182        """
183        if term_id in VALID_NON_ONTOLOGY_TERMS:
184            return {}
185        ontology_name = self._parse_ontology_name(term_id)
186        ancestors: Dict[str, int] = self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"].copy()
187        return ancestors | {term_id: 0} if include_self else ancestors
188
189    def map_term_ancestors_with_distances(
190        self, term_ids: Iterable[str], include_self: bool = False
191    ) -> Dict[str, Dict[str, int]]:
192        """
193        Get the ancestor ontology terms for each term in a list, and their distance from the term_id. If include_self is
194        True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a
195        supported ontology.
196
197        Example
198        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
199        >>> ontology_parser = OntologyParser()
200        >>> ontology_parser.map_term_ancestors_with_distances(["CL:0000003", "CL:0000005"], include_self=True)
201        {'CL:0000003': {'CL:0000003': 0}, 'CL:0000005': {'CL:0000057': 1, 'CL:0000499': 2, 'CL:0002320': 3, 'CL:0000255': 4, 'CL:0000000': 5, 'CL:0000005': 0}}
202
203        :param term_ids: list of str ontology terms to find ancestors for
204        :param include_self: boolean flag to include the term itself as an ancestor
205        :return: Dictionary mapping str term IDs to their respective Dict[str, int] map of ancestor terms and their
206        respective distances from the term_id
207        """
208        return {term_id: self.get_term_ancestors_with_distances(term_id, include_self) for term_id in term_ids}
209
210    def get_term_parents(self, term_id: str) -> List[str]:
211        """
212        Get the direct parent ontology terms for a given term. Raises ValueError if the term ID is not valid member of
213        a supported ontology.
214
215        Example
216        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
217        >>> ontology_parser = OntologyParser()
218        >>> ontology_parser.get_term_parents("CL:0000101")
219        ['CL:0000526']
220
221        :param term_id: str ontology term to find parents for
222        :return: List[str] of parent terms
223        """
224        if term_id in VALID_NON_ONTOLOGY_TERMS:
225            return []
226        ontology_name = self._parse_ontology_name(term_id)
227        ancestors: Dict[str, int] = self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"]
228        parents: List[str] = [ancestor for ancestor, distance in ancestors.items() if distance == 1]
229        return parents
230
231    def get_distance_between_terms(self, term_id_1: str, term_id_2: str) -> int:
232        """
233        Get the distance between two ontology terms. The distance is defined as the number of edges between the
234        two terms. Terms must be from the same ontology. Returns -1 if terms are disjoint.
235        Raises ValueError if term IDs are not valid members of a supported ontology.
236
237        :param term_id_1: str ontology term to find distance for
238        :param term_id_2: str ontology term to find distance for
239        :return: int distance between the two terms, measured in number of edges between their shortest path.
240        """
241        lcas = self.get_lowest_common_ancestors(term_id_1, term_id_2)
242        if not lcas:
243            return -1
244        return int(
245            self.get_term_ancestors_with_distances(term_id_1, include_self=True)[lcas[0]]
246            + self.get_term_ancestors_with_distances(term_id_2, include_self=True)[lcas[0]]
247        )
248
249    def get_lowest_common_ancestors(self, term_id_1: str, term_id_2: str) -> List[str]:
250        """
251        Get the lowest common ancestors between two ontology terms that is from the given ontology.
252        Terms must be from the same ontology. Ontologies are DAGs, so there may be multiple lowest common ancestors.
253        Raises ValueError if term IDs are not valid members of a supported ontology.
254
255        :param term_id_1: str ontology term to find LCA for
256        :param term_id_2: str ontology term to find LCA for
257        :return: str term ID of the lowest common ancestor term
258        """
259        # include path to term itself
260        ontology = self._parse_ontology_name(term_id_1)
261        if ontology != self._parse_ontology_name(term_id_2):
262            return []
263        ancestors_1 = self.get_term_ancestors_with_distances(term_id_1, include_self=True)
264        ancestors_2 = self.get_term_ancestors_with_distances(term_id_2, include_self=True)
265        common_ancestors = set(ancestors_1.keys()) & set(ancestors_2.keys())
266        min_sum_distances = float("inf")
267        for ancestors in common_ancestors:
268            sum_distances = ancestors_1[ancestors] + ancestors_2[ancestors]
269            if sum_distances < min_sum_distances:
270                min_sum_distances = sum_distances
271        return [
272            ancestor
273            for ancestor in common_ancestors
274            if ancestors_1[ancestor] + ancestors_2[ancestor] == min_sum_distances
275        ]
276
277    def get_high_level_terms(self, term_id: str, high_level_terms: List[str]) -> List[str]:
278        """
279        Get the high-level ontology terms for a given term. High-level terms are defined as the ancestors of the term
280        that are part of the high-level ontology terms supported by cellxgene-ontology-guide. If more than 1
281        high_level_term is matched, the returned list of matches preserves the order of the input high_level_terms list.
282        Raises ValueError if term ID is not valid member of a supported ontology.
283
284        Example
285        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
286        >>> ontology_parser = OntologyParser()
287        >>> ontology_parser.get_high_level_terms("CL:0000005", ["CL:0000000", "CL:0000001"])
288        ['CL:0000000']
289
290        :param term_id: str ontology term to find high-level terms for
291        :param high_level_terms: list of str ontology terms to check for ancestry to term_id
292        :return: List[str] of high-level terms that the term is a descendant of
293        """
294        if term_id in VALID_NON_ONTOLOGY_TERMS:
295            return []
296        ancestors = self.get_term_ancestors(term_id, include_self=True)
297        return [high_level_term for high_level_term in high_level_terms if high_level_term in ancestors]
298
299    def map_high_level_terms(self, term_ids: List[str], high_level_terms: List[str]) -> Dict[str, List[str]]:
300        """
301        Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with
302        format
303
304        {"CL:0000003": ["CL:0000000", ...], "CL:0000005": ["CL:0000000", ...]}
305
306        Where each term_id is mapped to a List[str] of high-level terms that it is a descendant of. Includes self
307        as a descendant. Raises ValueError if term ID is not valid member of a supported ontology.
308
309        :param term_ids: list of str ontology terms to map high level terms for
310        :param high_level_terms: list of str ontology terms to be mapped to descendant term_ids
311        :return: Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list.
312        Each key maps to empty list if there are no ancestors among the provided input.
313        """
314        return {term_id: self.get_high_level_terms(term_id, high_level_terms) for term_id in term_ids}
315
316    def get_highest_level_term(self, term_id: str, high_level_terms: List[str]) -> Union[str, None]:
317        """
318        Get the highest level ontology term for a given term. The highest level term is defined as the ancestor of the
319        term that is part of the high-level ontology terms supported by cellxgene-ontology-guide. Raises ValueError
320        if term ID is not valid member of a supported ontology.
321
322        Example
323        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
324        >>> ontology_parser = OntologyParser()
325        >>> ontology_parser.get_highest_level_term("CL:0000005", ["CL:0000000", "CL:0000001"])
326        'CL:0000000'
327
328        :param term_id: str ontology term to find highest level term for
329        :param high_level_terms: list of str ontology terms to check for ancestry to term_id
330        :return: str highest level term that the term is a descendant of, or None if it is not a descendant of any
331        high-level terms
332        """
333        high_level_terms = self.get_high_level_terms(term_id, high_level_terms)
334        term_ancestors_and_distances = self.get_term_ancestors_with_distances(term_id, include_self=True)
335        if not high_level_terms:
336            return None
337        return max(high_level_terms, key=lambda high_level_term: term_ancestors_and_distances[high_level_term])
338
339    def map_highest_level_term(self, term_ids: List[str], high_level_terms: List[str]) -> Dict[str, Union[str, None]]:
340        """
341        Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with
342        format
343
344        {"CL:0000003": "CL:0000000", "CL:0000005": "CL:0000000"}
345
346        Where each term_id is mapped to the highest level term that it is a descendant of, from the list provided.
347        Includes term itself as a descendant. Maps to None if term_id does not map to any high level terms among the
348        provided input. Raises ValueError if term ID is not valid member of a supported ontology.
349
350        :param term_ids: list of str ontology terms to map high level terms for
351        :param high_level_terms: list of str ontology terms that can be mapped to descendant term_ids
352        :return: Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list.
353        Each key maps to empty list if there are no ancestors among the provided input.
354        """
355        return {term_id: self.get_highest_level_term(term_id, high_level_terms) for term_id in term_ids}
356
357    def get_term_descendants(self, term_id: str, include_self: bool = False) -> List[str]:
358        """
359        Get the descendant ontology terms for a given term. If include_self is True, the term itself will be included as
360        a descendant. Raises ValueError if term ID is not valid member of a supported ontology.
361
362        Example
363        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
364        >>> ontology_parser = OntologyParser()
365        >>> ontology_parser.get_term_descendants("CL:0000005") # doctest: +SKIP
366        ['CL:0002363']
367
368        :param term_id: str ontology term to find descendants for
369        :param include_self: boolean flag to include the term itself as a descendant
370        :return: List[str] of descendant terms
371        """
372        if term_id in VALID_NON_ONTOLOGY_TERMS:
373            return []
374        ontology_name = self._parse_ontology_name(term_id)
375        descendants = [term_id] if include_self else []
376        for candidate_descendant, candidate_metadata in self.cxg_schema.ontology(ontology_name).items():
377            ancestors = candidate_metadata["ancestors"].keys()
378            if term_id in ancestors:
379                descendants.append(candidate_descendant)
380        return descendants
381
382    def map_term_descendants(self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]:
383        """
384        Get the descendant ontology terms for each term in a list. If include_self is True, the term itself will be
385         included as a descendant. Raises ValueError if term ID is not valid member of a supported ontology.
386
387        Example
388        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
389        >>> ontology_parser = OntologyParser()
390        >>> ontology_parser.map_term_descendants(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP
391        {
392            'CL:0000003': ['CL:0000003', ...],
393            'CL:0000005': ['CL:0000005', 'CL:0002363', ...]
394        }
395
396        :param term_ids: list of str ontology terms to find descendants for
397        :param include_self: boolean flag to include the term itself as an descendant
398        :return: Dictionary mapping str term IDs to their respective flattened List[str] of descendant terms. Maps to
399        empty list if there are no descendants.
400        """
401        descendants_dict: Dict[str, List[str]] = dict()
402        ontology_names = set()
403        for term_id in term_ids:
404            if term_id in VALID_NON_ONTOLOGY_TERMS:
405                descendants_dict[term_id] = []
406                continue
407            ontology_name = self._parse_ontology_name(term_id)
408            descendants_dict[term_id] = [term_id] if include_self else []
409            ontology_names.add(ontology_name)
410
411        for ontology in ontology_names:
412            for candidate_descendant, candidate_metadata in self.cxg_schema.ontology(ontology).items():
413                for ancestor_id in descendants_dict:
414                    ancestors = candidate_metadata["ancestors"].keys()
415                    if ancestor_id in ancestors:
416                        descendants_dict[ancestor_id].append(candidate_descendant)
417
418        return descendants_dict
419
420    def get_term_children(self, term_id: str) -> List[str]:
421        """
422        Get the direct children ontology terms for a given term. Raises ValueError if term ID is not valid member of a
423        supported ontology.
424
425        Example
426        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
427        >>> ontology_parser = OntologyParser()
428        >>> ontology_parser.get_term_children("CL:0000526")
429        ['CL:0000101', 'CL:4042034']
430
431        :param term_id: str ontology term to find children for
432        :return: List[str] of children terms
433        """
434        if term_id in VALID_NON_ONTOLOGY_TERMS:
435            return []
436        ontology_name = self._parse_ontology_name(term_id)
437        children = []
438        for candidate_child, candidate_metadata in self.cxg_schema.ontology(ontology_name).items():
439            for ancestor, distance in candidate_metadata["ancestors"].items():
440                if ancestor == term_id and distance == 1:
441                    children.append(candidate_child)
442        return children
443
444    def get_term_graph(self, term_id: str) -> OntologyNode:
445        """
446        Get the DAG of OntologyNode relationships, with the input term as the root node. Only includes terms from the
447        same ontology as the root term ID. Raises ValueError if term ID is not valid member of a supported ontology.
448
449        Example
450        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
451        >>> ontology_parser = OntologyParser()
452        >>> root_node = ontology_parser.get_term_graph("CL:0000000")
453        >>> root_node.term_id
454        'CL:0000000'
455        >>> root_node.to_dict() # doctest: +SKIP
456        {
457            "term_id": "CL:0000000",
458            "name": "cell A",
459            "children": [
460                {
461                    "term_id": "CL:0000001",
462                    "name": "cell B",
463                    "children": [...],
464                },
465                {
466                    "term_id": "CL:0000002",
467                    "name": "cell C",
468                    "children": [...],
469                },
470                ...
471            ]
472        }
473        >>> root_node.term_counter # doctest: +SKIP
474        Counter({'CL:0002058': 48, 'CL:0002471': 48, ...
475
476        :param term_id: str ontology term to build subtree for
477        :return: OntologyNode representation of graph with term_id as root.
478        """
479        term_label = self.get_term_label(term_id)
480        root = OntologyNode(term_id, term_label)
481        for child_term_id in self.get_term_children(term_id):
482            root.add_child(self.get_term_graph(child_term_id))
483        return root
484
485    def is_term_deprecated(self, term_id: str) -> bool:
486        """
487        Check if an ontology term is deprecated. Raises ValueError if term ID is not valid member of a supported
488        ontology.
489
490        Example
491        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
492        >>> ontology_parser = OntologyParser()
493        >>> ontology_parser.is_term_deprecated("CL:0000003")
494        True
495
496        :param term_id: str ontology term to check for deprecation
497        :return: boolean flag indicating whether the term is deprecated
498        """
499        if term_id in VALID_NON_ONTOLOGY_TERMS:
500            return False
501        ontology_name = self._parse_ontology_name(term_id)
502        is_deprecated: bool = self.cxg_schema.ontology(ontology_name)[term_id].get("deprecated")
503        return is_deprecated
504
505    def get_term_replacement(self, term_id: str) -> Union[str, None]:
506        """
507        Fetch the replacement term for a deprecated ontology term, if a replacement exists. Return None otherwise.
508        Raises ValueError if term ID is not valid member of a supported ontology.
509
510        Example
511        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
512        >>> ontology_parser = OntologyParser()
513        >>> ontology_parser.get_term_replacement("CL:0000003")
514        'CL:0000000'
515
516        :param term_id: str ontology term to check a replacement term for
517        :return: replacement str term ID if it exists, None otherwise
518        """
519        if term_id in VALID_NON_ONTOLOGY_TERMS:
520            return None
521        ontology_name = self._parse_ontology_name(term_id)
522        replaced_by: str = self.cxg_schema.ontology(ontology_name)[term_id].get("replaced_by")
523        return replaced_by if replaced_by else None
524
525    def get_term_metadata(self, term_id: str) -> Dict[str, Any]:
526        """
527        Fetch metadata for a given ontology term. Returns a dict with format
528
529        {"comments": ["...", ...], "term_tracker": "...", "consider": ["...", ...]}
530
531        Comments maps to List[str] of ontology curator comments
532        Term Tracker maps to a str url where there is discussion around this term's curation (or deprecation).
533        Consider maps to List[str] of alternate ontology terms to consider using instead of this term
534
535        All keys map to None if no metadata of that type is present. Raises ValueError if term ID is not valid member
536        of a supported ontology.
537
538        :param term_id: str ontology term to fetch metadata for
539        :return: Dict with keys 'Comments', 'Term Tracker', and 'Consider' containing associated metadata.
540        """
541        if term_id in VALID_NON_ONTOLOGY_TERMS:
542            return {"comments": None, "term_tracker": None, "consider": None}
543        ontology_name = self._parse_ontology_name(term_id)
544        return {
545            key: self.cxg_schema.ontology(ontology_name)[term_id].get(key, None)
546            for key in {"comments", "term_tracker", "consider"}
547        }
548
549    def get_term_label(self, term_id: str) -> str:
550        """
551        Fetch the human-readable label for a given ontology term. Raises ValueError if term ID is not valid member of a
552        supported ontology.
553
554        Example
555        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
556        >>> ontology_parser = OntologyParser()
557        >>> ontology_parser.get_term_label("CL:0000005")
558        'neural crest derived fibroblast'
559
560        :param term_id: str ontology term to fetch label for
561        :return: str human-readable label for the term
562        """
563        if term_id in VALID_NON_ONTOLOGY_TERMS:
564            return term_id
565        ontology_name = self._parse_ontology_name(term_id)
566        label: str = self.cxg_schema.ontology(ontology_name)[term_id]["label"]
567        return label
568
569    def map_term_labels(self, term_ids: Iterable[str]) -> Dict[str, str]:
570        """
571        Fetch the human-readable label for a given list of ontology terms. Raises ValueError if term ID is not valid
572        member of a supported ontology.
573
574        Example
575        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
576        >>> ontology_parser = OntologyParser()
577        >>> ontology_parser.map_term_labels(["CL:0000005", "CL:0000003"])
578        {'CL:0000005': 'neural crest derived fibroblast', 'CL:0000003': 'obsolete native cell'}
579
580        :param term_ids: list of str ontology terms to fetch label for
581        :return: Dict[str, str] mapping term IDs to their respective human-readable labels
582        """
583        return {term_id: self.get_term_label(term_id) for term_id in term_ids}
584
585    def get_term_description(self, term_id: str) -> Optional[str]:
586        """
587        Fetch the description for a given ontology term. Raises ValueError if term ID is not valid member of a
588        supported ontology.
589
590        Example
591        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
592        >>> ontology_parser = OntologyParser()
593        >>> ontology_parser.get_term_description("CL:0000005")
594        'Any fibroblast that is derived from the neural crest.'
595
596        :param term_id: str ontology term to fetch description for
597        :return: str description for the term
598        """
599        if term_id in VALID_NON_ONTOLOGY_TERMS:
600            return term_id
601        ontology_name = self._parse_ontology_name(term_id)
602        description: Optional[str] = self.cxg_schema.ontology(ontology_name)[term_id].get("description", None)
603        return description
604
605    def map_term_descriptions(self, term_ids: List[str]) -> Dict[str, Optional[str]]:
606        """
607        Fetch the descriptions for a given list of ontology terms. Raises ValueError if term ID is not valid member of
608        a supported ontology.
609
610        Example
611        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
612        >>> ontology_parser = OntologyParser()
613        >>> ontology_parser.map_term_descriptions(["CL:0000005", "CL:0000146"])
614        {'CL:0000005': 'Any fibroblast that is derived from the neural crest.', 'CL:0000146': None}
615
616        :param term_ids: list of str ontology terms to fetch descriptions for
617        :return: Dict[str, str] mapping term IDs to their respective descriptions
618        """
619        return {term_id: self.get_term_description(term_id) for term_id in term_ids}
620
621    def get_term_synonyms(self, term_id: str) -> List[str]:
622        """
623        Fetch a list of synonym labels for a given ontology term. Returns empty list if no synonyms found.
624        Raises ValueError if term ID is not valid member of a supported ontology.
625
626        Example
627        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
628        >>> ontology_parser = OntologyParser()
629        >>> ontology_parser.get_term_synonyms("CL:0000019")
630        ['sperm cell', 'spermatozoid', 'spermatozoon']
631
632        :param term_id: str ontology term to fetch synonyms for
633        :return: List[str] synonyms for the term
634        """
635        if term_id in VALID_NON_ONTOLOGY_TERMS:
636            return []
637        ontology_name = self._parse_ontology_name(term_id)
638        synonyms: List[str] = list(self.cxg_schema.ontology(ontology_name)[term_id].get("synonyms", []))
639        return synonyms
640
641    def map_term_synonyms(self, term_ids: List[str]) -> Dict[str, List[str]]:
642        """
643        Fetch the synonym labels for a given list of ontology terms. Raises ValueError if term ID is not valid member of
644        a supported ontology.
645
646        Example
647        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
648        >>> ontology_parser = OntologyParser()
649        >>> ontology_parser.map_term_synonyms(["CL:0000005", "CL:0000019"])
650        {'CL:0000005': ['fibroblast neural crest derived'], 'CL:0000019': ['sperm cell', 'spermatozoid', 'spermatozoon']}
651
652        :param term_ids: list of str ontology terms to fetch synonyms for
653        :return: Dict[str, List[str]] mapping term IDs to their respective synonym lists
654        """
655        return {term_id: self.get_term_synonyms(term_id) for term_id in term_ids}
656
657    def get_term_id_by_label(self, term_label: str, ontology_name: str) -> Optional[str]:
658        """
659        Fetch the ontology term ID from a given human-readable label. Filters by ontology_name. Raises ValueError if
660        ontology_name is not a supported ontology.
661
662        Returns None if term ID is not valid member of a supported ontology.
663
664        Example
665        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
666        >>> ontology_parser = OntologyParser()
667        >>> ontology_parser.get_term_id_by_label("neural crest derived fibroblast", "CL")
668        'CL:0000005'
669
670        :param term_label: str human-readable label to fetch term ID for
671        :param ontology_name: str name of ontology to search for term label in
672        :return: Optional[str] term IDs with that label, or None if the label is not found in the ontology
673        """
674        ontology_term_label_to_id_map = self.get_term_label_to_id_map(ontology_name)
675        return ontology_term_label_to_id_map.get(term_label)
676
677    def get_bridge_term_id(self, term_id: str, cross_ontology: str) -> Optional[str]:
678        """
679        For a given term ID, fetch the equivalent term ID from a given ontology. Only returns exact match if it exists.
680
681        If no applicable match is found, returns None.
682
683        Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology.
684
685        Example
686        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
687        >>> ontology_parser = OntologyParser()
688        >>> ontology_parser.get_bridge_term_id("FBbt:00000001", "UBERON")
689        'UBERON:0000468'
690
691        :param term_id: str ontology term to find equivalent term for
692        :param cross_ontology: str name of ontology to search for equivalent term in
693        :return: Optional[str] equivalent term ID from the cross_ontology
694        """
695        if cross_ontology not in self.cxg_schema.cross_ontology_mappings:
696            raise ValueError(
697                f"{cross_ontology} is not in the set of supported cross ontology mappings "
698                f"{self.cxg_schema.cross_ontology_mappings}."
699            )
700        ontology_name = self._parse_ontology_name(term_id)
701        cross_ontology_terms = self.cxg_schema.ontology(ontology_name)[term_id].get("cross_ontology_terms")
702        bridge_term_id: Optional[str] = None
703        if cross_ontology_terms:
704            bridge_term_id = cross_ontology_terms.get(cross_ontology)
705        return bridge_term_id
706
707    def get_closest_bridge_term_ids(self, term_id: str, cross_ontology: str) -> List[str]:
708        """
709        For a given term ID, fetch the equivalent term ID from a given ontology. If match is found,
710        returns a list of 1 with the exact match. If no exact match is found, traverses the ancestors
711        of the term for the closest match.
712
713        If no applicable match is found, returns an empty list.
714
715        If multiple ancestors of the same distance have matches, returns all possible closest matches.
716
717        Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology.
718
719        Example
720        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
721        >>> ontology_parser = OntologyParser()
722        >>> ontology_parser.get_closest_bridge_term_ids("FBbt:00000039", "UBERON")
723        ['UBERON:0000476', 'UBERON:0000920']
724
725        :param term_id: str ontology term to find closest term for
726        :param cross_ontology: str name of ontology to search for closest term in
727        :return: List[str] list of closest term IDs from the cross_ontology
728        """
729        closest_bridge_terms: List[str] = []
730        terms_to_match = [term_id]
731        while terms_to_match and not closest_bridge_terms:
732            for term in terms_to_match:
733                if closest_bridge_term := self.get_bridge_term_id(term, cross_ontology):
734                    closest_bridge_terms.append(closest_bridge_term)
735            terms_to_match = [parent for child in terms_to_match for parent in self.get_term_parents(child)]
736        return closest_bridge_terms

An object to parse ontology term metadata from ontologies corresponding to a given CellxGene Schema Version.

OntologyParser(schema_version: Optional[str] = None)
18    def __init__(self, schema_version: Optional[str] = None):
19        """
20        Initialize an OntologyParser object with the ontology metadata corresponding to the given CellxGene schema
21        version. If not cached, it will make a network call to GitHub Release Assets to load in memory and
22        parse the corresponding ontology metadata.
23
24        :param schema_version: str version of the schema to load ontology metadata for. If not provided, the latest
25        is loaded.
26        """
27        self.cxg_schema = CXGSchema(version=schema_version) if schema_version else CXGSchema()
28        self.term_label_to_id_map: Dict[str, Dict[str, str]] = {
29            ontology_name: dict() for ontology_name in self.cxg_schema.supported_ontologies
30        }

Initialize an OntologyParser object with the ontology metadata corresponding to the given CellxGene schema version. If not cached, it will make a network call to GitHub Release Assets to load in memory and parse the corresponding ontology metadata.

Parameters
  • schema_version: str version of the schema to load ontology metadata for. If not provided, the latest is loaded.

CXGSchema object to fetch ontology metadata from

term_label_to_id_map: Dict[str, Dict[str, str]]
def get_term_label_to_id_map(self, ontology_name: str) -> Dict[str, str]:
32    def get_term_label_to_id_map(self, ontology_name: str) -> Dict[str, str]:
33        """
34        Fetch the mapping of term labels to term IDs for a given ontology. Caches generated maps by ontology_name.
35
36        Example
37        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
38        >>> ontology_parser = OntologyParser()
39        >>> ontology_parser.get_term_label_to_id_map("CL") # doctest: +SKIP
40        {'Label A': 'CL:0000000', ... }
41
42        :param ontology_name: str name of ontology to get map of term labels to term IDs
43        """
44        supported_ontology_name: Optional[str] = self._get_supported_ontology_name(ontology_name)
45        if not supported_ontology_name:
46            raise ValueError(f"{supported_ontology_name} is not a supported ontology, its metadata cannot be fetched.")
47
48        if self.term_label_to_id_map[supported_ontology_name]:
49            return self.term_label_to_id_map[supported_ontology_name].copy()
50
51        for term_id, term_metadata in self.cxg_schema.ontology(supported_ontology_name).items():
52            self.term_label_to_id_map[supported_ontology_name][term_metadata["label"]] = term_id
53
54        return self.term_label_to_id_map[supported_ontology_name].copy()

Fetch the mapping of term labels to term IDs for a given ontology. Caches generated maps by ontology_name.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_label_to_id_map("CL") # doctest: +SKIP
{'Label A': 'CL:0000000', ... }
Parameters
  • ontology_name: str name of ontology to get map of term labels to term IDs
def is_valid_term_id(self, term_id: str, ontology: Optional[str] = None) -> bool:
104    def is_valid_term_id(self, term_id: str, ontology: Optional[str] = None) -> bool:
105        """
106        Check if an ontology term ID is valid and defined in a supported ontology. If deprecated but defined
107        in the ontology, it is considered valid. Optionally, specify an ontology to check against, and determine
108        if the term is defined in that particular ontology. Otherwise, checks if term is valid in any supported ontology
109
110        :param term_id: str ontology term to check
111        :param ontology: str name of ontology to check against
112        :return: boolean flag indicating whether the term is supported
113        """
114        try:
115            ontology_name = self._parse_ontology_name(term_id)
116            if ontology and ontology_name != ontology:
117                return False
118            if term_id in self.cxg_schema.ontology(ontology_name):
119                return True
120        except ValueError:
121            return False
122        return False

Check if an ontology term ID is valid and defined in a supported ontology. If deprecated but defined in the ontology, it is considered valid. Optionally, specify an ontology to check against, and determine if the term is defined in that particular ontology. Otherwise, checks if term is valid in any supported ontology

Parameters
  • term_id: str ontology term to check
  • ontology: str name of ontology to check against
Returns

boolean flag indicating whether the term is supported

def get_term_ancestors(self, term_id: str, include_self: bool = False) -> List[str]:
124    def get_term_ancestors(self, term_id: str, include_self: bool = False) -> List[str]:
125        """
126        Get the ancestor ontology terms for a given term. If include_self is True, the term itself will be included as
127        an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.
128
129        Example
130        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
131        >>> ontology_parser = OntologyParser()
132        >>> ontology_parser.get_term_ancestors("CL:0000005") # doctest: +SKIP
133        ['CL:0000000', 'CL:0000057', ...
134
135        :param term_id: str ontology term to find ancestors for
136        :param include_self: boolean flag to include the term itself as an ancestor
137        :return: flattened List[str] of ancestor terms
138        """
139        if term_id in VALID_NON_ONTOLOGY_TERMS:
140            return []
141        ontology_name = self._parse_ontology_name(term_id)
142        ancestors = list(self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"].keys())
143        return ancestors + [term_id] if include_self else ancestors

Get the ancestor ontology terms for a given term. If include_self is True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_ancestors("CL:0000005") # doctest: +SKIP
['CL:0000000', 'CL:0000057', ...
Parameters
  • term_id: str ontology term to find ancestors for
  • include_self: boolean flag to include the term itself as an ancestor
Returns

flattened List[str] of ancestor terms

def map_term_ancestors( self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]:
145    def map_term_ancestors(self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]:
146        """
147        Get the ancestor ontology terms for each term in a list. If include_self is True, the term itself will be
148        included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.
149
150        Example
151        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
152        >>> ontology_parser = OntologyParser()
153        >>> ontology_parser.map_term_ancestors(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP
154        {
155            'CL:0000003': ['CL:0000003'],
156            'CL:0000005': ['CL:0000005', 'CL:0000000', ...]
157        }
158
159        :param term_ids: list of str ontology terms to find ancestors for
160        :param include_self: boolean flag to include the term itself as an ancestor
161        :return: Dictionary mapping str term IDs to their respective flattened List[str] of ancestor terms. Maps to
162        empty
163        list if there are no ancestors.
164        """
165        return {term_id: self.get_term_ancestors(term_id, include_self) for term_id in term_ids}

Get the ancestor ontology terms for each term in a list. If include_self is True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.map_term_ancestors(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP
{
    'CL:0000003': ['CL:0000003'],
    'CL:0000005': ['CL:0000005', 'CL:0000000', ...]
}
Parameters
  • term_ids: list of str ontology terms to find ancestors for
  • include_self: boolean flag to include the term itself as an ancestor
Returns

Dictionary mapping str term IDs to their respective flattened List[str] of ancestor terms. Maps to empty list if there are no ancestors.

def get_term_ancestors_with_distances(self, term_id: str, include_self: bool = False) -> Dict[str, int]:
167    def get_term_ancestors_with_distances(self, term_id: str, include_self: bool = False) -> Dict[str, int]:
168        """
169        Get the ancestor ontology terms for a given term, and their distance from the term_id. If include_self is True,
170        the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a
171        supported ontology.
172
173        Example
174        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
175        >>> ontology_parser = OntologyParser()
176        >>> ontology_parser.get_term_ancestors_with_distances("CL:0000005") # doctest: +SKIP
177        {'CL:0000057': 1, 'CL:0002320': 2, 'CL:0000000': 3}
178
179        :param term_id: str ontology term to find ancestors for
180        :param include_self: boolean flag to include the term itself as an ancestor
181        :return: Dict[str, int] map of ancestor terms and their respective distances from the term_id
182        """
183        if term_id in VALID_NON_ONTOLOGY_TERMS:
184            return {}
185        ontology_name = self._parse_ontology_name(term_id)
186        ancestors: Dict[str, int] = self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"].copy()
187        return ancestors | {term_id: 0} if include_self else ancestors

Get the ancestor ontology terms for a given term, and their distance from the term_id. If include_self is True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_ancestors_with_distances("CL:0000005") # doctest: +SKIP
{'CL:0000057': 1, 'CL:0002320': 2, 'CL:0000000': 3}
Parameters
  • term_id: str ontology term to find ancestors for
  • include_self: boolean flag to include the term itself as an ancestor
Returns

Dict[str, int] map of ancestor terms and their respective distances from the term_id

def map_term_ancestors_with_distances( self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, Dict[str, int]]:
189    def map_term_ancestors_with_distances(
190        self, term_ids: Iterable[str], include_self: bool = False
191    ) -> Dict[str, Dict[str, int]]:
192        """
193        Get the ancestor ontology terms for each term in a list, and their distance from the term_id. If include_self is
194        True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a
195        supported ontology.
196
197        Example
198        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
199        >>> ontology_parser = OntologyParser()
200        >>> ontology_parser.map_term_ancestors_with_distances(["CL:0000003", "CL:0000005"], include_self=True)
201        {'CL:0000003': {'CL:0000003': 0}, 'CL:0000005': {'CL:0000057': 1, 'CL:0000499': 2, 'CL:0002320': 3, 'CL:0000255': 4, 'CL:0000000': 5, 'CL:0000005': 0}}
202
203        :param term_ids: list of str ontology terms to find ancestors for
204        :param include_self: boolean flag to include the term itself as an ancestor
205        :return: Dictionary mapping str term IDs to their respective Dict[str, int] map of ancestor terms and their
206        respective distances from the term_id
207        """
208        return {term_id: self.get_term_ancestors_with_distances(term_id, include_self) for term_id in term_ids}

Get the ancestor ontology terms for each term in a list, and their distance from the term_id. If include_self is True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.map_term_ancestors_with_distances(["CL:0000003", "CL:0000005"], include_self=True)
{'CL:0000003': {'CL:0000003': 0}, 'CL:0000005': {'CL:0000057': 1, 'CL:0000499': 2, 'CL:0002320': 3, 'CL:0000255': 4, 'CL:0000000': 5, 'CL:0000005': 0}}
Parameters
  • term_ids: list of str ontology terms to find ancestors for
  • include_self: boolean flag to include the term itself as an ancestor
Returns

Dictionary mapping str term IDs to their respective Dict[str, int] map of ancestor terms and their respective distances from the term_id

def get_term_parents(self, term_id: str) -> List[str]:
210    def get_term_parents(self, term_id: str) -> List[str]:
211        """
212        Get the direct parent ontology terms for a given term. Raises ValueError if the term ID is not valid member of
213        a supported ontology.
214
215        Example
216        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
217        >>> ontology_parser = OntologyParser()
218        >>> ontology_parser.get_term_parents("CL:0000101")
219        ['CL:0000526']
220
221        :param term_id: str ontology term to find parents for
222        :return: List[str] of parent terms
223        """
224        if term_id in VALID_NON_ONTOLOGY_TERMS:
225            return []
226        ontology_name = self._parse_ontology_name(term_id)
227        ancestors: Dict[str, int] = self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"]
228        parents: List[str] = [ancestor for ancestor, distance in ancestors.items() if distance == 1]
229        return parents

Get the direct parent ontology terms for a given term. Raises ValueError if the term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_parents("CL:0000101")
['CL:0000526']
Parameters
  • term_id: str ontology term to find parents for
Returns

List[str] of parent terms

def get_distance_between_terms(self, term_id_1: str, term_id_2: str) -> int:
231    def get_distance_between_terms(self, term_id_1: str, term_id_2: str) -> int:
232        """
233        Get the distance between two ontology terms. The distance is defined as the number of edges between the
234        two terms. Terms must be from the same ontology. Returns -1 if terms are disjoint.
235        Raises ValueError if term IDs are not valid members of a supported ontology.
236
237        :param term_id_1: str ontology term to find distance for
238        :param term_id_2: str ontology term to find distance for
239        :return: int distance between the two terms, measured in number of edges between their shortest path.
240        """
241        lcas = self.get_lowest_common_ancestors(term_id_1, term_id_2)
242        if not lcas:
243            return -1
244        return int(
245            self.get_term_ancestors_with_distances(term_id_1, include_self=True)[lcas[0]]
246            + self.get_term_ancestors_with_distances(term_id_2, include_self=True)[lcas[0]]
247        )

Get the distance between two ontology terms. The distance is defined as the number of edges between the two terms. Terms must be from the same ontology. Returns -1 if terms are disjoint. Raises ValueError if term IDs are not valid members of a supported ontology.

Parameters
  • term_id_1: str ontology term to find distance for
  • term_id_2: str ontology term to find distance for
Returns

int distance between the two terms, measured in number of edges between their shortest path.

def get_lowest_common_ancestors(self, term_id_1: str, term_id_2: str) -> List[str]:
249    def get_lowest_common_ancestors(self, term_id_1: str, term_id_2: str) -> List[str]:
250        """
251        Get the lowest common ancestors between two ontology terms that is from the given ontology.
252        Terms must be from the same ontology. Ontologies are DAGs, so there may be multiple lowest common ancestors.
253        Raises ValueError if term IDs are not valid members of a supported ontology.
254
255        :param term_id_1: str ontology term to find LCA for
256        :param term_id_2: str ontology term to find LCA for
257        :return: str term ID of the lowest common ancestor term
258        """
259        # include path to term itself
260        ontology = self._parse_ontology_name(term_id_1)
261        if ontology != self._parse_ontology_name(term_id_2):
262            return []
263        ancestors_1 = self.get_term_ancestors_with_distances(term_id_1, include_self=True)
264        ancestors_2 = self.get_term_ancestors_with_distances(term_id_2, include_self=True)
265        common_ancestors = set(ancestors_1.keys()) & set(ancestors_2.keys())
266        min_sum_distances = float("inf")
267        for ancestors in common_ancestors:
268            sum_distances = ancestors_1[ancestors] + ancestors_2[ancestors]
269            if sum_distances < min_sum_distances:
270                min_sum_distances = sum_distances
271        return [
272            ancestor
273            for ancestor in common_ancestors
274            if ancestors_1[ancestor] + ancestors_2[ancestor] == min_sum_distances
275        ]

Get the lowest common ancestors between two ontology terms that is from the given ontology. Terms must be from the same ontology. Ontologies are DAGs, so there may be multiple lowest common ancestors. Raises ValueError if term IDs are not valid members of a supported ontology.

Parameters
  • term_id_1: str ontology term to find LCA for
  • term_id_2: str ontology term to find LCA for
Returns

str term ID of the lowest common ancestor term

def get_high_level_terms(self, term_id: str, high_level_terms: List[str]) -> List[str]:
277    def get_high_level_terms(self, term_id: str, high_level_terms: List[str]) -> List[str]:
278        """
279        Get the high-level ontology terms for a given term. High-level terms are defined as the ancestors of the term
280        that are part of the high-level ontology terms supported by cellxgene-ontology-guide. If more than 1
281        high_level_term is matched, the returned list of matches preserves the order of the input high_level_terms list.
282        Raises ValueError if term ID is not valid member of a supported ontology.
283
284        Example
285        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
286        >>> ontology_parser = OntologyParser()
287        >>> ontology_parser.get_high_level_terms("CL:0000005", ["CL:0000000", "CL:0000001"])
288        ['CL:0000000']
289
290        :param term_id: str ontology term to find high-level terms for
291        :param high_level_terms: list of str ontology terms to check for ancestry to term_id
292        :return: List[str] of high-level terms that the term is a descendant of
293        """
294        if term_id in VALID_NON_ONTOLOGY_TERMS:
295            return []
296        ancestors = self.get_term_ancestors(term_id, include_self=True)
297        return [high_level_term for high_level_term in high_level_terms if high_level_term in ancestors]

Get the high-level ontology terms for a given term. High-level terms are defined as the ancestors of the term that are part of the high-level ontology terms supported by cellxgene-ontology-guide. If more than 1 high_level_term is matched, the returned list of matches preserves the order of the input high_level_terms list. Raises ValueError if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_high_level_terms("CL:0000005", ["CL:0000000", "CL:0000001"])
['CL:0000000']
Parameters
  • term_id: str ontology term to find high-level terms for
  • high_level_terms: list of str ontology terms to check for ancestry to term_id
Returns

List[str] of high-level terms that the term is a descendant of

def map_high_level_terms( self, term_ids: List[str], high_level_terms: List[str]) -> Dict[str, List[str]]:
299    def map_high_level_terms(self, term_ids: List[str], high_level_terms: List[str]) -> Dict[str, List[str]]:
300        """
301        Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with
302        format
303
304        {"CL:0000003": ["CL:0000000", ...], "CL:0000005": ["CL:0000000", ...]}
305
306        Where each term_id is mapped to a List[str] of high-level terms that it is a descendant of. Includes self
307        as a descendant. Raises ValueError if term ID is not valid member of a supported ontology.
308
309        :param term_ids: list of str ontology terms to map high level terms for
310        :param high_level_terms: list of str ontology terms to be mapped to descendant term_ids
311        :return: Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list.
312        Each key maps to empty list if there are no ancestors among the provided input.
313        """
314        return {term_id: self.get_high_level_terms(term_id, high_level_terms) for term_id in term_ids}

Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with format

{"CL:0000003": ["CL:0000000", ...], "CL:0000005": ["CL:0000000", ...]}

Where each term_id is mapped to a List[str] of high-level terms that it is a descendant of. Includes self as a descendant. Raises ValueError if term ID is not valid member of a supported ontology.

Parameters
  • term_ids: list of str ontology terms to map high level terms for
  • high_level_terms: list of str ontology terms to be mapped to descendant term_ids
Returns

Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list. Each key maps to empty list if there are no ancestors among the provided input.

def get_highest_level_term(self, term_id: str, high_level_terms: List[str]) -> Optional[str]:
316    def get_highest_level_term(self, term_id: str, high_level_terms: List[str]) -> Union[str, None]:
317        """
318        Get the highest level ontology term for a given term. The highest level term is defined as the ancestor of the
319        term that is part of the high-level ontology terms supported by cellxgene-ontology-guide. Raises ValueError
320        if term ID is not valid member of a supported ontology.
321
322        Example
323        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
324        >>> ontology_parser = OntologyParser()
325        >>> ontology_parser.get_highest_level_term("CL:0000005", ["CL:0000000", "CL:0000001"])
326        'CL:0000000'
327
328        :param term_id: str ontology term to find highest level term for
329        :param high_level_terms: list of str ontology terms to check for ancestry to term_id
330        :return: str highest level term that the term is a descendant of, or None if it is not a descendant of any
331        high-level terms
332        """
333        high_level_terms = self.get_high_level_terms(term_id, high_level_terms)
334        term_ancestors_and_distances = self.get_term_ancestors_with_distances(term_id, include_self=True)
335        if not high_level_terms:
336            return None
337        return max(high_level_terms, key=lambda high_level_term: term_ancestors_and_distances[high_level_term])

Get the highest level ontology term for a given term. The highest level term is defined as the ancestor of the term that is part of the high-level ontology terms supported by cellxgene-ontology-guide. Raises ValueError if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_highest_level_term("CL:0000005", ["CL:0000000", "CL:0000001"])
'CL:0000000'
Parameters
  • term_id: str ontology term to find highest level term for
  • high_level_terms: list of str ontology terms to check for ancestry to term_id
Returns

str highest level term that the term is a descendant of, or None if it is not a descendant of any high-level terms

def map_highest_level_term( self, term_ids: List[str], high_level_terms: List[str]) -> Dict[str, Optional[str]]:
339    def map_highest_level_term(self, term_ids: List[str], high_level_terms: List[str]) -> Dict[str, Union[str, None]]:
340        """
341        Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with
342        format
343
344        {"CL:0000003": "CL:0000000", "CL:0000005": "CL:0000000"}
345
346        Where each term_id is mapped to the highest level term that it is a descendant of, from the list provided.
347        Includes term itself as a descendant. Maps to None if term_id does not map to any high level terms among the
348        provided input. Raises ValueError if term ID is not valid member of a supported ontology.
349
350        :param term_ids: list of str ontology terms to map high level terms for
351        :param high_level_terms: list of str ontology terms that can be mapped to descendant term_ids
352        :return: Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list.
353        Each key maps to empty list if there are no ancestors among the provided input.
354        """
355        return {term_id: self.get_highest_level_term(term_id, high_level_terms) for term_id in term_ids}

Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with format

{"CL:0000003": "CL:0000000", "CL:0000005": "CL:0000000"}

Where each term_id is mapped to the highest level term that it is a descendant of, from the list provided. Includes term itself as a descendant. Maps to None if term_id does not map to any high level terms among the provided input. Raises ValueError if term ID is not valid member of a supported ontology.

Parameters
  • term_ids: list of str ontology terms to map high level terms for
  • high_level_terms: list of str ontology terms that can be mapped to descendant term_ids
Returns

Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list. Each key maps to empty list if there are no ancestors among the provided input.

def get_term_descendants(self, term_id: str, include_self: bool = False) -> List[str]:
357    def get_term_descendants(self, term_id: str, include_self: bool = False) -> List[str]:
358        """
359        Get the descendant ontology terms for a given term. If include_self is True, the term itself will be included as
360        a descendant. Raises ValueError if term ID is not valid member of a supported ontology.
361
362        Example
363        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
364        >>> ontology_parser = OntologyParser()
365        >>> ontology_parser.get_term_descendants("CL:0000005") # doctest: +SKIP
366        ['CL:0002363']
367
368        :param term_id: str ontology term to find descendants for
369        :param include_self: boolean flag to include the term itself as a descendant
370        :return: List[str] of descendant terms
371        """
372        if term_id in VALID_NON_ONTOLOGY_TERMS:
373            return []
374        ontology_name = self._parse_ontology_name(term_id)
375        descendants = [term_id] if include_self else []
376        for candidate_descendant, candidate_metadata in self.cxg_schema.ontology(ontology_name).items():
377            ancestors = candidate_metadata["ancestors"].keys()
378            if term_id in ancestors:
379                descendants.append(candidate_descendant)
380        return descendants

Get the descendant ontology terms for a given term. If include_self is True, the term itself will be included as a descendant. Raises ValueError if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_descendants("CL:0000005") # doctest: +SKIP
['CL:0002363']
Parameters
  • term_id: str ontology term to find descendants for
  • include_self: boolean flag to include the term itself as a descendant
Returns

List[str] of descendant terms

def map_term_descendants( self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]:
382    def map_term_descendants(self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]:
383        """
384        Get the descendant ontology terms for each term in a list. If include_self is True, the term itself will be
385         included as a descendant. Raises ValueError if term ID is not valid member of a supported ontology.
386
387        Example
388        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
389        >>> ontology_parser = OntologyParser()
390        >>> ontology_parser.map_term_descendants(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP
391        {
392            'CL:0000003': ['CL:0000003', ...],
393            'CL:0000005': ['CL:0000005', 'CL:0002363', ...]
394        }
395
396        :param term_ids: list of str ontology terms to find descendants for
397        :param include_self: boolean flag to include the term itself as an descendant
398        :return: Dictionary mapping str term IDs to their respective flattened List[str] of descendant terms. Maps to
399        empty list if there are no descendants.
400        """
401        descendants_dict: Dict[str, List[str]] = dict()
402        ontology_names = set()
403        for term_id in term_ids:
404            if term_id in VALID_NON_ONTOLOGY_TERMS:
405                descendants_dict[term_id] = []
406                continue
407            ontology_name = self._parse_ontology_name(term_id)
408            descendants_dict[term_id] = [term_id] if include_self else []
409            ontology_names.add(ontology_name)
410
411        for ontology in ontology_names:
412            for candidate_descendant, candidate_metadata in self.cxg_schema.ontology(ontology).items():
413                for ancestor_id in descendants_dict:
414                    ancestors = candidate_metadata["ancestors"].keys()
415                    if ancestor_id in ancestors:
416                        descendants_dict[ancestor_id].append(candidate_descendant)
417
418        return descendants_dict

Get the descendant ontology terms for each term in a list. If include_self is True, the term itself will be included as a descendant. Raises ValueError if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.map_term_descendants(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP
{
    'CL:0000003': ['CL:0000003', ...],
    'CL:0000005': ['CL:0000005', 'CL:0002363', ...]
}
Parameters
  • term_ids: list of str ontology terms to find descendants for
  • include_self: boolean flag to include the term itself as an descendant
Returns

Dictionary mapping str term IDs to their respective flattened List[str] of descendant terms. Maps to empty list if there are no descendants.

def get_term_children(self, term_id: str) -> List[str]:
420    def get_term_children(self, term_id: str) -> List[str]:
421        """
422        Get the direct children ontology terms for a given term. Raises ValueError if term ID is not valid member of a
423        supported ontology.
424
425        Example
426        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
427        >>> ontology_parser = OntologyParser()
428        >>> ontology_parser.get_term_children("CL:0000526")
429        ['CL:0000101', 'CL:4042034']
430
431        :param term_id: str ontology term to find children for
432        :return: List[str] of children terms
433        """
434        if term_id in VALID_NON_ONTOLOGY_TERMS:
435            return []
436        ontology_name = self._parse_ontology_name(term_id)
437        children = []
438        for candidate_child, candidate_metadata in self.cxg_schema.ontology(ontology_name).items():
439            for ancestor, distance in candidate_metadata["ancestors"].items():
440                if ancestor == term_id and distance == 1:
441                    children.append(candidate_child)
442        return children

Get the direct children ontology terms for a given term. Raises ValueError if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_children("CL:0000526")
['CL:0000101', 'CL:4042034']
Parameters
  • term_id: str ontology term to find children for
Returns

List[str] of children terms

def get_term_graph(self, term_id: str) -> cellxgene_ontology_guide.entities.OntologyNode:
444    def get_term_graph(self, term_id: str) -> OntologyNode:
445        """
446        Get the DAG of OntologyNode relationships, with the input term as the root node. Only includes terms from the
447        same ontology as the root term ID. Raises ValueError if term ID is not valid member of a supported ontology.
448
449        Example
450        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
451        >>> ontology_parser = OntologyParser()
452        >>> root_node = ontology_parser.get_term_graph("CL:0000000")
453        >>> root_node.term_id
454        'CL:0000000'
455        >>> root_node.to_dict() # doctest: +SKIP
456        {
457            "term_id": "CL:0000000",
458            "name": "cell A",
459            "children": [
460                {
461                    "term_id": "CL:0000001",
462                    "name": "cell B",
463                    "children": [...],
464                },
465                {
466                    "term_id": "CL:0000002",
467                    "name": "cell C",
468                    "children": [...],
469                },
470                ...
471            ]
472        }
473        >>> root_node.term_counter # doctest: +SKIP
474        Counter({'CL:0002058': 48, 'CL:0002471': 48, ...
475
476        :param term_id: str ontology term to build subtree for
477        :return: OntologyNode representation of graph with term_id as root.
478        """
479        term_label = self.get_term_label(term_id)
480        root = OntologyNode(term_id, term_label)
481        for child_term_id in self.get_term_children(term_id):
482            root.add_child(self.get_term_graph(child_term_id))
483        return root

Get the DAG of OntologyNode relationships, with the input term as the root node. Only includes terms from the same ontology as the root term ID. Raises ValueError if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> root_node = ontology_parser.get_term_graph("CL:0000000")
>>> root_node.term_id
'CL:0000000'
>>> root_node.to_dict() # doctest: +SKIP
{
    "term_id": "CL:0000000",
    "name": "cell A",
    "children": [
        {
            "term_id": "CL:0000001",
            "name": "cell B",
            "children": [...],
        },
        {
            "term_id": "CL:0000002",
            "name": "cell C",
            "children": [...],
        },
        ...
    ]
}
>>> root_node.term_counter # doctest: +SKIP
Counter({'CL:0002058': 48, 'CL:0002471': 48, ...
Parameters
  • term_id: str ontology term to build subtree for
Returns

OntologyNode representation of graph with term_id as root.

def is_term_deprecated(self, term_id: str) -> bool:
485    def is_term_deprecated(self, term_id: str) -> bool:
486        """
487        Check if an ontology term is deprecated. Raises ValueError if term ID is not valid member of a supported
488        ontology.
489
490        Example
491        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
492        >>> ontology_parser = OntologyParser()
493        >>> ontology_parser.is_term_deprecated("CL:0000003")
494        True
495
496        :param term_id: str ontology term to check for deprecation
497        :return: boolean flag indicating whether the term is deprecated
498        """
499        if term_id in VALID_NON_ONTOLOGY_TERMS:
500            return False
501        ontology_name = self._parse_ontology_name(term_id)
502        is_deprecated: bool = self.cxg_schema.ontology(ontology_name)[term_id].get("deprecated")
503        return is_deprecated

Check if an ontology term is deprecated. Raises ValueError if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.is_term_deprecated("CL:0000003")
True
Parameters
  • term_id: str ontology term to check for deprecation
Returns

boolean flag indicating whether the term is deprecated

def get_term_replacement(self, term_id: str) -> Optional[str]:
505    def get_term_replacement(self, term_id: str) -> Union[str, None]:
506        """
507        Fetch the replacement term for a deprecated ontology term, if a replacement exists. Return None otherwise.
508        Raises ValueError if term ID is not valid member of a supported ontology.
509
510        Example
511        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
512        >>> ontology_parser = OntologyParser()
513        >>> ontology_parser.get_term_replacement("CL:0000003")
514        'CL:0000000'
515
516        :param term_id: str ontology term to check a replacement term for
517        :return: replacement str term ID if it exists, None otherwise
518        """
519        if term_id in VALID_NON_ONTOLOGY_TERMS:
520            return None
521        ontology_name = self._parse_ontology_name(term_id)
522        replaced_by: str = self.cxg_schema.ontology(ontology_name)[term_id].get("replaced_by")
523        return replaced_by if replaced_by else None

Fetch the replacement term for a deprecated ontology term, if a replacement exists. Return None otherwise. Raises ValueError if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_replacement("CL:0000003")
'CL:0000000'
Parameters
  • term_id: str ontology term to check a replacement term for
Returns

replacement str term ID if it exists, None otherwise

def get_term_metadata(self, term_id: str) -> Dict[str, Any]:
525    def get_term_metadata(self, term_id: str) -> Dict[str, Any]:
526        """
527        Fetch metadata for a given ontology term. Returns a dict with format
528
529        {"comments": ["...", ...], "term_tracker": "...", "consider": ["...", ...]}
530
531        Comments maps to List[str] of ontology curator comments
532        Term Tracker maps to a str url where there is discussion around this term's curation (or deprecation).
533        Consider maps to List[str] of alternate ontology terms to consider using instead of this term
534
535        All keys map to None if no metadata of that type is present. Raises ValueError if term ID is not valid member
536        of a supported ontology.
537
538        :param term_id: str ontology term to fetch metadata for
539        :return: Dict with keys 'Comments', 'Term Tracker', and 'Consider' containing associated metadata.
540        """
541        if term_id in VALID_NON_ONTOLOGY_TERMS:
542            return {"comments": None, "term_tracker": None, "consider": None}
543        ontology_name = self._parse_ontology_name(term_id)
544        return {
545            key: self.cxg_schema.ontology(ontology_name)[term_id].get(key, None)
546            for key in {"comments", "term_tracker", "consider"}
547        }

Fetch metadata for a given ontology term. Returns a dict with format

{"comments": ["...", ...], "term_tracker": "...", "consider": ["...", ...]}

Comments maps to List[str] of ontology curator comments Term Tracker maps to a str url where there is discussion around this term's curation (or deprecation). Consider maps to List[str] of alternate ontology terms to consider using instead of this term

All keys map to None if no metadata of that type is present. Raises ValueError if term ID is not valid member of a supported ontology.

Parameters
  • term_id: str ontology term to fetch metadata for
Returns

Dict with keys 'Comments', 'Term Tracker', and 'Consider' containing associated metadata.

def get_term_label(self, term_id: str) -> str:
549    def get_term_label(self, term_id: str) -> str:
550        """
551        Fetch the human-readable label for a given ontology term. Raises ValueError if term ID is not valid member of a
552        supported ontology.
553
554        Example
555        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
556        >>> ontology_parser = OntologyParser()
557        >>> ontology_parser.get_term_label("CL:0000005")
558        'neural crest derived fibroblast'
559
560        :param term_id: str ontology term to fetch label for
561        :return: str human-readable label for the term
562        """
563        if term_id in VALID_NON_ONTOLOGY_TERMS:
564            return term_id
565        ontology_name = self._parse_ontology_name(term_id)
566        label: str = self.cxg_schema.ontology(ontology_name)[term_id]["label"]
567        return label

Fetch the human-readable label for a given ontology term. Raises ValueError if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_label("CL:0000005")
'neural crest derived fibroblast'
Parameters
  • term_id: str ontology term to fetch label for
Returns

str human-readable label for the term

def map_term_labels(self, term_ids: Iterable[str]) -> Dict[str, str]:
569    def map_term_labels(self, term_ids: Iterable[str]) -> Dict[str, str]:
570        """
571        Fetch the human-readable label for a given list of ontology terms. Raises ValueError if term ID is not valid
572        member of a supported ontology.
573
574        Example
575        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
576        >>> ontology_parser = OntologyParser()
577        >>> ontology_parser.map_term_labels(["CL:0000005", "CL:0000003"])
578        {'CL:0000005': 'neural crest derived fibroblast', 'CL:0000003': 'obsolete native cell'}
579
580        :param term_ids: list of str ontology terms to fetch label for
581        :return: Dict[str, str] mapping term IDs to their respective human-readable labels
582        """
583        return {term_id: self.get_term_label(term_id) for term_id in term_ids}

Fetch the human-readable label for a given list of ontology terms. Raises ValueError if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.map_term_labels(["CL:0000005", "CL:0000003"])
{'CL:0000005': 'neural crest derived fibroblast', 'CL:0000003': 'obsolete native cell'}
Parameters
  • term_ids: list of str ontology terms to fetch label for
Returns

Dict[str, str] mapping term IDs to their respective human-readable labels

def get_term_description(self, term_id: str) -> Optional[str]:
585    def get_term_description(self, term_id: str) -> Optional[str]:
586        """
587        Fetch the description for a given ontology term. Raises ValueError if term ID is not valid member of a
588        supported ontology.
589
590        Example
591        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
592        >>> ontology_parser = OntologyParser()
593        >>> ontology_parser.get_term_description("CL:0000005")
594        'Any fibroblast that is derived from the neural crest.'
595
596        :param term_id: str ontology term to fetch description for
597        :return: str description for the term
598        """
599        if term_id in VALID_NON_ONTOLOGY_TERMS:
600            return term_id
601        ontology_name = self._parse_ontology_name(term_id)
602        description: Optional[str] = self.cxg_schema.ontology(ontology_name)[term_id].get("description", None)
603        return description

Fetch the description for a given ontology term. Raises ValueError if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_description("CL:0000005")
'Any fibroblast that is derived from the neural crest.'
Parameters
  • term_id: str ontology term to fetch description for
Returns

str description for the term

def map_term_descriptions(self, term_ids: List[str]) -> Dict[str, Optional[str]]:
605    def map_term_descriptions(self, term_ids: List[str]) -> Dict[str, Optional[str]]:
606        """
607        Fetch the descriptions for a given list of ontology terms. Raises ValueError if term ID is not valid member of
608        a supported ontology.
609
610        Example
611        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
612        >>> ontology_parser = OntologyParser()
613        >>> ontology_parser.map_term_descriptions(["CL:0000005", "CL:0000146"])
614        {'CL:0000005': 'Any fibroblast that is derived from the neural crest.', 'CL:0000146': None}
615
616        :param term_ids: list of str ontology terms to fetch descriptions for
617        :return: Dict[str, str] mapping term IDs to their respective descriptions
618        """
619        return {term_id: self.get_term_description(term_id) for term_id in term_ids}

Fetch the descriptions for a given list of ontology terms. Raises ValueError if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.map_term_descriptions(["CL:0000005", "CL:0000146"])
{'CL:0000005': 'Any fibroblast that is derived from the neural crest.', 'CL:0000146': None}
Parameters
  • term_ids: list of str ontology terms to fetch descriptions for
Returns

Dict[str, str] mapping term IDs to their respective descriptions

def get_term_synonyms(self, term_id: str) -> List[str]:
621    def get_term_synonyms(self, term_id: str) -> List[str]:
622        """
623        Fetch a list of synonym labels for a given ontology term. Returns empty list if no synonyms found.
624        Raises ValueError if term ID is not valid member of a supported ontology.
625
626        Example
627        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
628        >>> ontology_parser = OntologyParser()
629        >>> ontology_parser.get_term_synonyms("CL:0000019")
630        ['sperm cell', 'spermatozoid', 'spermatozoon']
631
632        :param term_id: str ontology term to fetch synonyms for
633        :return: List[str] synonyms for the term
634        """
635        if term_id in VALID_NON_ONTOLOGY_TERMS:
636            return []
637        ontology_name = self._parse_ontology_name(term_id)
638        synonyms: List[str] = list(self.cxg_schema.ontology(ontology_name)[term_id].get("synonyms", []))
639        return synonyms

Fetch a list of synonym labels for a given ontology term. Returns empty list if no synonyms found. Raises ValueError if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_synonyms("CL:0000019")
['sperm cell', 'spermatozoid', 'spermatozoon']
Parameters
  • term_id: str ontology term to fetch synonyms for
Returns

List[str] synonyms for the term

def map_term_synonyms(self, term_ids: List[str]) -> Dict[str, List[str]]:
641    def map_term_synonyms(self, term_ids: List[str]) -> Dict[str, List[str]]:
642        """
643        Fetch the synonym labels for a given list of ontology terms. Raises ValueError if term ID is not valid member of
644        a supported ontology.
645
646        Example
647        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
648        >>> ontology_parser = OntologyParser()
649        >>> ontology_parser.map_term_synonyms(["CL:0000005", "CL:0000019"])
650        {'CL:0000005': ['fibroblast neural crest derived'], 'CL:0000019': ['sperm cell', 'spermatozoid', 'spermatozoon']}
651
652        :param term_ids: list of str ontology terms to fetch synonyms for
653        :return: Dict[str, List[str]] mapping term IDs to their respective synonym lists
654        """
655        return {term_id: self.get_term_synonyms(term_id) for term_id in term_ids}

Fetch the synonym labels for a given list of ontology terms. Raises ValueError if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.map_term_synonyms(["CL:0000005", "CL:0000019"])
{'CL:0000005': ['fibroblast neural crest derived'], 'CL:0000019': ['sperm cell', 'spermatozoid', 'spermatozoon']}
Parameters
  • term_ids: list of str ontology terms to fetch synonyms for
Returns

Dict[str, List[str]] mapping term IDs to their respective synonym lists

def get_term_id_by_label(self, term_label: str, ontology_name: str) -> Optional[str]:
657    def get_term_id_by_label(self, term_label: str, ontology_name: str) -> Optional[str]:
658        """
659        Fetch the ontology term ID from a given human-readable label. Filters by ontology_name. Raises ValueError if
660        ontology_name is not a supported ontology.
661
662        Returns None if term ID is not valid member of a supported ontology.
663
664        Example
665        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
666        >>> ontology_parser = OntologyParser()
667        >>> ontology_parser.get_term_id_by_label("neural crest derived fibroblast", "CL")
668        'CL:0000005'
669
670        :param term_label: str human-readable label to fetch term ID for
671        :param ontology_name: str name of ontology to search for term label in
672        :return: Optional[str] term IDs with that label, or None if the label is not found in the ontology
673        """
674        ontology_term_label_to_id_map = self.get_term_label_to_id_map(ontology_name)
675        return ontology_term_label_to_id_map.get(term_label)

Fetch the ontology term ID from a given human-readable label. Filters by ontology_name. Raises ValueError if ontology_name is not a supported ontology.

Returns None if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_id_by_label("neural crest derived fibroblast", "CL")
'CL:0000005'
Parameters
  • term_label: str human-readable label to fetch term ID for
  • ontology_name: str name of ontology to search for term label in
Returns

Optional[str] term IDs with that label, or None if the label is not found in the ontology

def get_bridge_term_id(self, term_id: str, cross_ontology: str) -> Optional[str]:
677    def get_bridge_term_id(self, term_id: str, cross_ontology: str) -> Optional[str]:
678        """
679        For a given term ID, fetch the equivalent term ID from a given ontology. Only returns exact match if it exists.
680
681        If no applicable match is found, returns None.
682
683        Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology.
684
685        Example
686        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
687        >>> ontology_parser = OntologyParser()
688        >>> ontology_parser.get_bridge_term_id("FBbt:00000001", "UBERON")
689        'UBERON:0000468'
690
691        :param term_id: str ontology term to find equivalent term for
692        :param cross_ontology: str name of ontology to search for equivalent term in
693        :return: Optional[str] equivalent term ID from the cross_ontology
694        """
695        if cross_ontology not in self.cxg_schema.cross_ontology_mappings:
696            raise ValueError(
697                f"{cross_ontology} is not in the set of supported cross ontology mappings "
698                f"{self.cxg_schema.cross_ontology_mappings}."
699            )
700        ontology_name = self._parse_ontology_name(term_id)
701        cross_ontology_terms = self.cxg_schema.ontology(ontology_name)[term_id].get("cross_ontology_terms")
702        bridge_term_id: Optional[str] = None
703        if cross_ontology_terms:
704            bridge_term_id = cross_ontology_terms.get(cross_ontology)
705        return bridge_term_id

For a given term ID, fetch the equivalent term ID from a given ontology. Only returns exact match if it exists.

If no applicable match is found, returns None.

Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_bridge_term_id("FBbt:00000001", "UBERON")
'UBERON:0000468'
Parameters
  • term_id: str ontology term to find equivalent term for
  • cross_ontology: str name of ontology to search for equivalent term in
Returns

Optional[str] equivalent term ID from the cross_ontology

def get_closest_bridge_term_ids(self, term_id: str, cross_ontology: str) -> List[str]:
707    def get_closest_bridge_term_ids(self, term_id: str, cross_ontology: str) -> List[str]:
708        """
709        For a given term ID, fetch the equivalent term ID from a given ontology. If match is found,
710        returns a list of 1 with the exact match. If no exact match is found, traverses the ancestors
711        of the term for the closest match.
712
713        If no applicable match is found, returns an empty list.
714
715        If multiple ancestors of the same distance have matches, returns all possible closest matches.
716
717        Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology.
718
719        Example
720        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
721        >>> ontology_parser = OntologyParser()
722        >>> ontology_parser.get_closest_bridge_term_ids("FBbt:00000039", "UBERON")
723        ['UBERON:0000476', 'UBERON:0000920']
724
725        :param term_id: str ontology term to find closest term for
726        :param cross_ontology: str name of ontology to search for closest term in
727        :return: List[str] list of closest term IDs from the cross_ontology
728        """
729        closest_bridge_terms: List[str] = []
730        terms_to_match = [term_id]
731        while terms_to_match and not closest_bridge_terms:
732            for term in terms_to_match:
733                if closest_bridge_term := self.get_bridge_term_id(term, cross_ontology):
734                    closest_bridge_terms.append(closest_bridge_term)
735            terms_to_match = [parent for child in terms_to_match for parent in self.get_term_parents(child)]
736        return closest_bridge_terms

For a given term ID, fetch the equivalent term ID from a given ontology. If match is found, returns a list of 1 with the exact match. If no exact match is found, traverses the ancestors of the term for the closest match.

If no applicable match is found, returns an empty list.

If multiple ancestors of the same distance have matches, returns all possible closest matches.

Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_closest_bridge_term_ids("FBbt:00000039", "UBERON")
['UBERON:0000476', 'UBERON:0000920']
Parameters
  • term_id: str ontology term to find closest term for
  • cross_ontology: str name of ontology to search for closest term in
Returns

List[str] list of closest term IDs from the cross_ontology