cellxgene_ontology_guide.ontology_parser

  1import re
  2from typing import Any, Dict, Iterable, List, Optional, Union
  3
  4from cellxgene_ontology_guide._constants import VALID_NON_ONTOLOGY_TERMS
  5from cellxgene_ontology_guide.entities import OntologyNode
  6from cellxgene_ontology_guide.supported_versions import CXGSchema
  7
  8
  9class OntologyParser:
 10    """
 11    An object to parse ontology term metadata from ontologies corresponding to a given CellxGene Schema Version.
 12    """
 13
 14    cxg_schema: CXGSchema
 15    """ CXGSchema object to fetch ontology metadata from """
 16
 17    def __init__(self, schema_version: Optional[str] = None):
 18        """
 19        Initialize an OntologyParser object with the ontology metadata corresponding to the given CellxGene schema
 20        version. If not cached, it will make a network call to GitHub Release Assets to load in memory and
 21        parse the corresponding ontology metadata.
 22
 23        :param schema_version: str version of the schema to load ontology metadata for. If not provided, the latest
 24        is loaded.
 25        """
 26        self.cxg_schema = CXGSchema(version=schema_version) if schema_version else CXGSchema()
 27        self.term_label_to_id_map: Dict[str, Dict[str, str]] = {
 28            ontology_name: dict() for ontology_name in self.cxg_schema.supported_ontologies
 29        }
 30
 31    def get_term_label_to_id_map(self, ontology_name: str) -> Dict[str, str]:
 32        """
 33        Fetch the mapping of term labels to term IDs for a given ontology. Caches generated maps by ontology_name.
 34
 35        Example
 36        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
 37        >>> ontology_parser = OntologyParser()
 38        >>> ontology_parser.get_term_label_to_id_map("CL") # doctest: +SKIP
 39        {'Label A': 'CL:0000000', ... }
 40
 41        :param ontology_name: str name of ontology to get map of term labels to term IDs
 42        """
 43        supported_ontology_name: Optional[str] = self._get_supported_ontology_name(ontology_name)
 44        if not supported_ontology_name:
 45            raise ValueError(f"{supported_ontology_name} is not a supported ontology, its metadata cannot be fetched.")
 46
 47        if self.term_label_to_id_map[supported_ontology_name]:
 48            return self.term_label_to_id_map[supported_ontology_name].copy()
 49
 50        for term_id, term_metadata in self.cxg_schema.ontology(supported_ontology_name).items():
 51            self.term_label_to_id_map[supported_ontology_name][term_metadata["label"]] = term_id
 52
 53        return self.term_label_to_id_map[supported_ontology_name].copy()
 54
 55    def _parse_ontology_name(self, term_id: str) -> str:
 56        """
 57        Parse the ontology name from a given term ID. If the term ID does not conform to the expected term format or
 58        is not from an ontology supported by cellxgene-ontology-guide, raise a ValueError.
 59
 60        :param term_id: str ontology term to parse
 61        :return: str name of ontology that term belongs to
 62        """
 63        pattern = r"[A-Za-z]+:\d+"
 64        if not re.match(pattern, term_id):
 65            raise ValueError(f"{term_id} does not conform to expected regex pattern {pattern} and cannot be queried.")
 66
 67        ontology_term_prefix = term_id.split(":")[0]
 68        ontology_name: Optional[str] = self._get_supported_ontology_name(ontology_term_prefix)
 69        if not ontology_name:
 70            raise ValueError(f"{term_id} is not part of a supported ontology, its metadata cannot be fetched.")
 71
 72        return ontology_name
 73
 74    def _get_supported_ontology_name(self, ontology_term_prefix: str) -> Optional[str]:
 75        """
 76        Get the source ontology name for a given ontology term prefix, if it is supported by the CxG schema.
 77
 78        If ontology_term_prefix is directly supported by the CxG schema, returns ontology_term_prefix.
 79        If ontology_term_prefix is supported as an import from another ontology, returns the name of the source ontology
 80        it is imported in.
 81        Otherwise, returns None.
 82
 83        :param ontology_term_prefix: str ontology term prefix to check
 84        :return: str name of ontology that term belongs to, or None if it is not directly supported nor imported in
 85        a supported ontology in the CxG schema.
 86        """
 87        if ontology_term_prefix in self.cxg_schema.supported_ontologies:
 88            return ontology_term_prefix
 89        supported_ontology_name: Optional[str] = self.cxg_schema.imported_ontologies.get(ontology_term_prefix)
 90        return supported_ontology_name
 91
 92    def is_valid_term_id(self, term_id: str, ontology: Optional[str] = None) -> bool:
 93        """
 94        Check if an ontology term ID is valid and defined in a supported ontology. If deprecated but defined
 95        in the ontology, it is considered valid. Optionally, specify an ontology to check against, and determine
 96        if the term is defined in that particular ontology. Otherwise, checks if term is valid in any supported ontology
 97
 98        :param term_id: str ontology term to check
 99        :param ontology: str name of ontology to check against
100        :return: boolean flag indicating whether the term is supported
101        """
102        try:
103            ontology_name = self._parse_ontology_name(term_id)
104            if ontology and ontology_name != ontology:
105                return False
106            if term_id in self.cxg_schema.ontology(ontology_name):
107                return True
108        except ValueError:
109            return False
110        return False
111
112    def get_term_ancestors(self, term_id: str, include_self: bool = False) -> List[str]:
113        """
114        Get the ancestor ontology terms for a given term. If include_self is True, the term itself will be included as
115        an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.
116
117        Example
118        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
119        >>> ontology_parser = OntologyParser()
120        >>> ontology_parser.get_term_ancestors("CL:0000005") # doctest: +SKIP
121        ['CL:0000000', 'CL:0000057', ...
122
123        :param term_id: str ontology term to find ancestors for
124        :param include_self: boolean flag to include the term itself as an ancestor
125        :return: flattened List[str] of ancestor terms
126        """
127        if term_id in VALID_NON_ONTOLOGY_TERMS:
128            return []
129        ontology_name = self._parse_ontology_name(term_id)
130        ancestors = list(self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"].keys())
131        return ancestors + [term_id] if include_self else ancestors
132
133    def map_term_ancestors(self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]:
134        """
135        Get the ancestor ontology terms for each term in a list. If include_self is True, the term itself will be
136        included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.
137
138        Example
139        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
140        >>> ontology_parser = OntologyParser()
141        >>> ontology_parser.map_term_ancestors(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP
142        {
143            'CL:0000003': ['CL:0000003'],
144            'CL:0000005': ['CL:0000005', 'CL:0000000', ...]
145        }
146
147        :param term_ids: list of str ontology terms to find ancestors for
148        :param include_self: boolean flag to include the term itself as an ancestor
149        :return: Dictionary mapping str term IDs to their respective flattened List[str] of ancestor terms. Maps to
150        empty
151        list if there are no ancestors.
152        """
153        return {term_id: self.get_term_ancestors(term_id, include_self) for term_id in term_ids}
154
155    def get_term_ancestors_with_distances(self, term_id: str, include_self: bool = False) -> Dict[str, int]:
156        """
157        Get the ancestor ontology terms for a given term, and their distance from the term_id. If include_self is True,
158        the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a
159        supported ontology.
160
161        Example
162        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
163        >>> ontology_parser = OntologyParser()
164        >>> ontology_parser.get_term_ancestors_with_distances("CL:0000005") # doctest: +SKIP
165        {'CL:0000057': 1, 'CL:0002320': 2, 'CL:0000000': 3}
166
167        :param term_id: str ontology term to find ancestors for
168        :param include_self: boolean flag to include the term itself as an ancestor
169        :return: Dict[str, int] map of ancestor terms and their respective distances from the term_id
170        """
171        if term_id in VALID_NON_ONTOLOGY_TERMS:
172            return {}
173        ontology_name = self._parse_ontology_name(term_id)
174        ancestors: Dict[str, int] = self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"].copy()
175        return ancestors | {term_id: 0} if include_self else ancestors
176
177    def map_term_ancestors_with_distances(
178        self, term_ids: Iterable[str], include_self: bool = False
179    ) -> Dict[str, Dict[str, int]]:
180        """
181        Get the ancestor ontology terms for each term in a list, and their distance from the term_id. If include_self is
182        True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a
183        supported ontology.
184
185        Example
186        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
187        >>> ontology_parser = OntologyParser()
188        >>> ontology_parser.map_term_ancestors_with_distances(["CL:0000003", "CL:0000005"], include_self=True)
189        {'CL:0000003': {'CL:0000003': 0}, 'CL:0000005': {'CL:0000057': 1, 'CL:0000499': 2, 'CL:0002320': 3, 'CL:0000255': 4, 'CL:0000000': 5, 'CL:0000005': 0}}
190
191        :param term_ids: list of str ontology terms to find ancestors for
192        :param include_self: boolean flag to include the term itself as an ancestor
193        :return: Dictionary mapping str term IDs to their respective Dict[str, int] map of ancestor terms and their
194        respective distances from the term_id
195        """
196        return {term_id: self.get_term_ancestors_with_distances(term_id, include_self) for term_id in term_ids}
197
198    def get_term_parents(self, term_id: str) -> List[str]:
199        """
200        Get the direct parent ontology terms for a given term. Raises ValueError if the term ID is not valid member of
201        a supported ontology.
202
203        Example
204        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
205        >>> ontology_parser = OntologyParser()
206        >>> ontology_parser.get_term_parents("CL:0000101")
207        ['CL:0000526']
208
209        :param term_id: str ontology term to find parents for
210        :return: List[str] of parent terms
211        """
212        if term_id in VALID_NON_ONTOLOGY_TERMS:
213            return []
214        ontology_name = self._parse_ontology_name(term_id)
215        ancestors: Dict[str, int] = self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"]
216        parents: List[str] = [ancestor for ancestor, distance in ancestors.items() if distance == 1]
217        return parents
218
219    def get_distance_between_terms(self, term_id_1: str, term_id_2: str) -> int:
220        """
221        Get the distance between two ontology terms. The distance is defined as the number of edges between the
222        two terms. Terms must be from the same ontology. Returns -1 if terms are disjoint.
223        Raises ValueError if term IDs are not valid members of a supported ontology.
224
225        :param term_id_1: str ontology term to find distance for
226        :param term_id_2: str ontology term to find distance for
227        :return: int distance between the two terms, measured in number of edges between their shortest path.
228        """
229        lcas = self.get_lowest_common_ancestors(term_id_1, term_id_2)
230        if not lcas:
231            return -1
232        return int(
233            self.get_term_ancestors_with_distances(term_id_1, include_self=True)[lcas[0]]
234            + self.get_term_ancestors_with_distances(term_id_2, include_self=True)[lcas[0]]
235        )
236
237    def get_lowest_common_ancestors(self, term_id_1: str, term_id_2: str) -> List[str]:
238        """
239        Get the lowest common ancestors between two ontology terms that is from the given ontology.
240        Terms must be from the same ontology. Ontologies are DAGs, so there may be multiple lowest common ancestors.
241        Raises ValueError if term IDs are not valid members of a supported ontology.
242
243        :param term_id_1: str ontology term to find LCA for
244        :param term_id_2: str ontology term to find LCA for
245        :return: str term ID of the lowest common ancestor term
246        """
247        # include path to term itself
248        ontology = self._parse_ontology_name(term_id_1)
249        if ontology != self._parse_ontology_name(term_id_2):
250            return []
251        ancestors_1 = self.get_term_ancestors_with_distances(term_id_1, include_self=True)
252        ancestors_2 = self.get_term_ancestors_with_distances(term_id_2, include_self=True)
253        common_ancestors = set(ancestors_1.keys()) & set(ancestors_2.keys())
254        min_sum_distances = float("inf")
255        for ancestors in common_ancestors:
256            sum_distances = ancestors_1[ancestors] + ancestors_2[ancestors]
257            if sum_distances < min_sum_distances:
258                min_sum_distances = sum_distances
259        return [
260            ancestor
261            for ancestor in common_ancestors
262            if ancestors_1[ancestor] + ancestors_2[ancestor] == min_sum_distances
263        ]
264
265    def get_high_level_terms(self, term_id: str, high_level_terms: List[str]) -> List[str]:
266        """
267        Get the high-level ontology terms for a given term. High-level terms are defined as the ancestors of the term
268        that are part of the high-level ontology terms supported by cellxgene-ontology-guide. If more than 1
269        high_level_term is matched, the returned list of matches preserves the order of the input high_level_terms list.
270        Raises ValueError if term ID is not valid member of a supported ontology.
271
272        Example
273        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
274        >>> ontology_parser = OntologyParser()
275        >>> ontology_parser.get_high_level_terms("CL:0000005", ["CL:0000000", "CL:0000001"])
276        ['CL:0000000']
277
278        :param term_id: str ontology term to find high-level terms for
279        :param high_level_terms: list of str ontology terms to check for ancestry to term_id
280        :return: List[str] of high-level terms that the term is a descendant of
281        """
282        if term_id in VALID_NON_ONTOLOGY_TERMS:
283            return []
284        ancestors = self.get_term_ancestors(term_id, include_self=True)
285        return [high_level_term for high_level_term in high_level_terms if high_level_term in ancestors]
286
287    def map_high_level_terms(self, term_ids: List[str], high_level_terms: List[str]) -> Dict[str, List[str]]:
288        """
289        Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with
290        format
291
292        {"CL:0000003": ["CL:0000000", ...], "CL:0000005": ["CL:0000000", ...]}
293
294        Where each term_id is mapped to a List[str] of high-level terms that it is a descendant of. Includes self
295        as a descendant. Raises ValueError if term ID is not valid member of a supported ontology.
296
297        :param term_ids: list of str ontology terms to map high level terms for
298        :param high_level_terms: list of str ontology terms to be mapped to descendant term_ids
299        :return: Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list.
300        Each key maps to empty list if there are no ancestors among the provided input.
301        """
302        return {term_id: self.get_high_level_terms(term_id, high_level_terms) for term_id in term_ids}
303
304    def get_highest_level_term(self, term_id: str, high_level_terms: List[str]) -> Union[str, None]:
305        """
306        Get the highest level ontology term for a given term. The highest level term is defined as the ancestor of the
307        term that is part of the high-level ontology terms supported by cellxgene-ontology-guide. Raises ValueError
308        if term ID is not valid member of a supported ontology.
309
310        Example
311        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
312        >>> ontology_parser = OntologyParser()
313        >>> ontology_parser.get_highest_level_term("CL:0000005", ["CL:0000000", "CL:0000001"])
314        'CL:0000000'
315
316        :param term_id: str ontology term to find highest level term for
317        :param high_level_terms: list of str ontology terms to check for ancestry to term_id
318        :return: str highest level term that the term is a descendant of, or None if it is not a descendant of any
319        high-level terms
320        """
321        high_level_terms = self.get_high_level_terms(term_id, high_level_terms)
322        term_ancestors_and_distances = self.get_term_ancestors_with_distances(term_id, include_self=True)
323        if not high_level_terms:
324            return None
325        return max(high_level_terms, key=lambda high_level_term: term_ancestors_and_distances[high_level_term])
326
327    def map_highest_level_term(self, term_ids: List[str], high_level_terms: List[str]) -> Dict[str, Union[str, None]]:
328        """
329        Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with
330        format
331
332        {"CL:0000003": "CL:0000000", "CL:0000005": "CL:0000000"}
333
334        Where each term_id is mapped to the highest level term that it is a descendant of, from the list provided.
335        Includes term itself as a descendant. Maps to None if term_id does not map to any high level terms among the
336        provided input. Raises ValueError if term ID is not valid member of a supported ontology.
337
338        :param term_ids: list of str ontology terms to map high level terms for
339        :param high_level_terms: list of str ontology terms that can be mapped to descendant term_ids
340        :return: Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list.
341        Each key maps to empty list if there are no ancestors among the provided input.
342        """
343        return {term_id: self.get_highest_level_term(term_id, high_level_terms) for term_id in term_ids}
344
345    def get_term_descendants(self, term_id: str, include_self: bool = False) -> List[str]:
346        """
347        Get the descendant ontology terms for a given term. If include_self is True, the term itself will be included as
348        a descendant. Raises ValueError if term ID is not valid member of a supported ontology.
349
350        Example
351        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
352        >>> ontology_parser = OntologyParser()
353        >>> ontology_parser.get_term_descendants("CL:0000005") # doctest: +SKIP
354        ['CL:0002363']
355
356        :param term_id: str ontology term to find descendants for
357        :param include_self: boolean flag to include the term itself as a descendant
358        :return: List[str] of descendant terms
359        """
360        if term_id in VALID_NON_ONTOLOGY_TERMS:
361            return []
362        ontology_name = self._parse_ontology_name(term_id)
363        descendants = [term_id] if include_self else []
364        for candidate_descendant, candidate_metadata in self.cxg_schema.ontology(ontology_name).items():
365            ancestors = candidate_metadata["ancestors"].keys()
366            if term_id in ancestors:
367                descendants.append(candidate_descendant)
368        return descendants
369
370    def map_term_descendants(self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]:
371        """
372        Get the descendant ontology terms for each term in a list. If include_self is True, the term itself will be
373         included as a descendant. Raises ValueError if term ID is not valid member of a supported ontology.
374
375        Example
376        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
377        >>> ontology_parser = OntologyParser()
378        >>> ontology_parser.map_term_descendants(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP
379        {
380            'CL:0000003': ['CL:0000003', ...],
381            'CL:0000005': ['CL:0000005', 'CL:0002363', ...]
382        }
383
384        :param term_ids: list of str ontology terms to find descendants for
385        :param include_self: boolean flag to include the term itself as an descendant
386        :return: Dictionary mapping str term IDs to their respective flattened List[str] of descendant terms. Maps to
387        empty list if there are no descendants.
388        """
389        descendants_dict: Dict[str, List[str]] = dict()
390        ontology_names = set()
391        for term_id in term_ids:
392            if term_id in VALID_NON_ONTOLOGY_TERMS:
393                descendants_dict[term_id] = []
394                continue
395            ontology_name = self._parse_ontology_name(term_id)
396            descendants_dict[term_id] = [term_id] if include_self else []
397            ontology_names.add(ontology_name)
398
399        for ontology in ontology_names:
400            for candidate_descendant, candidate_metadata in self.cxg_schema.ontology(ontology).items():
401                for ancestor_id in descendants_dict:
402                    ancestors = candidate_metadata["ancestors"].keys()
403                    if ancestor_id in ancestors:
404                        descendants_dict[ancestor_id].append(candidate_descendant)
405
406        return descendants_dict
407
408    def get_term_children(self, term_id: str) -> List[str]:
409        """
410        Get the direct children ontology terms for a given term. Raises ValueError if term ID is not valid member of a
411        supported ontology.
412
413        Example
414        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
415        >>> ontology_parser = OntologyParser()
416        >>> ontology_parser.get_term_children("CL:0000526")
417        ['CL:0000101', 'CL:4042034']
418
419        :param term_id: str ontology term to find children for
420        :return: List[str] of children terms
421        """
422        if term_id in VALID_NON_ONTOLOGY_TERMS:
423            return []
424        ontology_name = self._parse_ontology_name(term_id)
425        children = []
426        for candidate_child, candidate_metadata in self.cxg_schema.ontology(ontology_name).items():
427            for ancestor, distance in candidate_metadata["ancestors"].items():
428                if ancestor == term_id and distance == 1:
429                    children.append(candidate_child)
430        return children
431
432    def get_term_graph(self, term_id: str) -> OntologyNode:
433        """
434        Get the DAG of OntologyNode relationships, with the input term as the root node. Only includes terms from the
435        same ontology as the root term ID. Raises ValueError if term ID is not valid member of a supported ontology.
436
437        Example
438        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
439        >>> ontology_parser = OntologyParser()
440        >>> root_node = ontology_parser.get_term_graph("CL:0000000")
441        >>> root_node.term_id
442        'CL:0000000'
443        >>> root_node.to_dict() # doctest: +SKIP
444        {
445            "term_id": "CL:0000000",
446            "name": "cell A",
447            "children": [
448                {
449                    "term_id": "CL:0000001",
450                    "name": "cell B",
451                    "children": [...],
452                },
453                {
454                    "term_id": "CL:0000002",
455                    "name": "cell C",
456                    "children": [...],
457                },
458                ...
459            ]
460        }
461        >>> root_node.term_counter # doctest: +SKIP
462        Counter({'CL:0002058': 48, 'CL:0002471': 48, ...
463
464        :param term_id: str ontology term to build subtree for
465        :return: OntologyNode representation of graph with term_id as root.
466        """
467        term_label = self.get_term_label(term_id)
468        root = OntologyNode(term_id, term_label)
469        for child_term_id in self.get_term_children(term_id):
470            root.add_child(self.get_term_graph(child_term_id))
471        return root
472
473    def is_term_deprecated(self, term_id: str) -> bool:
474        """
475        Check if an ontology term is deprecated. Raises ValueError if term ID is not valid member of a supported
476        ontology.
477
478        Example
479        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
480        >>> ontology_parser = OntologyParser()
481        >>> ontology_parser.is_term_deprecated("CL:0000003")
482        True
483
484        :param term_id: str ontology term to check for deprecation
485        :return: boolean flag indicating whether the term is deprecated
486        """
487        if term_id in VALID_NON_ONTOLOGY_TERMS:
488            return False
489        ontology_name = self._parse_ontology_name(term_id)
490        is_deprecated: bool = self.cxg_schema.ontology(ontology_name)[term_id].get("deprecated")
491        return is_deprecated
492
493    def get_term_replacement(self, term_id: str) -> Union[str, None]:
494        """
495        Fetch the replacement term for a deprecated ontology term, if a replacement exists. Return None otherwise.
496        Raises ValueError if term ID is not valid member of a supported ontology.
497
498        Example
499        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
500        >>> ontology_parser = OntologyParser()
501        >>> ontology_parser.get_term_replacement("CL:0000003")
502        'CL:0000000'
503
504        :param term_id: str ontology term to check a replacement term for
505        :return: replacement str term ID if it exists, None otherwise
506        """
507        if term_id in VALID_NON_ONTOLOGY_TERMS:
508            return None
509        ontology_name = self._parse_ontology_name(term_id)
510        replaced_by: str = self.cxg_schema.ontology(ontology_name)[term_id].get("replaced_by")
511        return replaced_by if replaced_by else None
512
513    def get_term_metadata(self, term_id: str) -> Dict[str, Any]:
514        """
515        Fetch metadata for a given ontology term. Returns a dict with format
516
517        {"comments": ["...", ...], "term_tracker": "...", "consider": ["...", ...]}
518
519        Comments maps to List[str] of ontology curator comments
520        Term Tracker maps to a str url where there is discussion around this term's curation (or deprecation).
521        Consider maps to List[str] of alternate ontology terms to consider using instead of this term
522
523        All keys map to None if no metadata of that type is present. Raises ValueError if term ID is not valid member
524        of a supported ontology.
525
526        :param term_id: str ontology term to fetch metadata for
527        :return: Dict with keys 'Comments', 'Term Tracker', and 'Consider' containing associated metadata.
528        """
529        if term_id in VALID_NON_ONTOLOGY_TERMS:
530            return {"comments": None, "term_tracker": None, "consider": None}
531        ontology_name = self._parse_ontology_name(term_id)
532        return {
533            key: self.cxg_schema.ontology(ontology_name)[term_id].get(key, None)
534            for key in {"comments", "term_tracker", "consider"}
535        }
536
537    def get_term_label(self, term_id: str) -> str:
538        """
539        Fetch the human-readable label for a given ontology term. Raises ValueError if term ID is not valid member of a
540        supported ontology.
541
542        Example
543        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
544        >>> ontology_parser = OntologyParser()
545        >>> ontology_parser.get_term_label("CL:0000005")
546        'neural crest derived fibroblast'
547
548        :param term_id: str ontology term to fetch label for
549        :return: str human-readable label for the term
550        """
551        if term_id in VALID_NON_ONTOLOGY_TERMS:
552            return term_id
553        ontology_name = self._parse_ontology_name(term_id)
554        label: str = self.cxg_schema.ontology(ontology_name)[term_id]["label"]
555        return label
556
557    def map_term_labels(self, term_ids: Iterable[str]) -> Dict[str, str]:
558        """
559        Fetch the human-readable label for a given list of ontology terms. Raises ValueError if term ID is not valid
560        member of a supported ontology.
561
562        Example
563        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
564        >>> ontology_parser = OntologyParser()
565        >>> ontology_parser.map_term_labels(["CL:0000005", "CL:0000003"])
566        {'CL:0000005': 'neural crest derived fibroblast', 'CL:0000003': 'obsolete native cell'}
567
568        :param term_ids: list of str ontology terms to fetch label for
569        :return: Dict[str, str] mapping term IDs to their respective human-readable labels
570        """
571        return {term_id: self.get_term_label(term_id) for term_id in term_ids}
572
573    def get_term_description(self, term_id: str) -> Optional[str]:
574        """
575        Fetch the description for a given ontology term. Raises ValueError if term ID is not valid member of a
576        supported ontology.
577
578        Example
579        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
580        >>> ontology_parser = OntologyParser()
581        >>> ontology_parser.get_term_description("CL:0000005")
582        'Any fibroblast that is derived from the neural crest.'
583
584        :param term_id: str ontology term to fetch description for
585        :return: str description for the term
586        """
587        if term_id in VALID_NON_ONTOLOGY_TERMS:
588            return term_id
589        ontology_name = self._parse_ontology_name(term_id)
590        description: Optional[str] = self.cxg_schema.ontology(ontology_name)[term_id].get("description", None)
591        return description
592
593    def map_term_descriptions(self, term_ids: List[str]) -> Dict[str, Optional[str]]:
594        """
595        Fetch the descriptions for a given list of ontology terms. Raises ValueError if term ID is not valid member of
596        a supported ontology.
597
598        Example
599        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
600        >>> ontology_parser = OntologyParser()
601        >>> ontology_parser.map_term_descriptions(["CL:0000005", "CL:0000146"])
602        {'CL:0000005': 'Any fibroblast that is derived from the neural crest.', 'CL:0000146': None}
603
604        :param term_ids: list of str ontology terms to fetch descriptions for
605        :return: Dict[str, str] mapping term IDs to their respective descriptions
606        """
607        return {term_id: self.get_term_description(term_id) for term_id in term_ids}
608
609    def get_term_synonyms(self, term_id: str) -> List[str]:
610        """
611        Fetch a list of synonym labels for a given ontology term. Returns empty list if no synonyms found.
612        Raises ValueError if term ID is not valid member of a supported ontology.
613
614        Example
615        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
616        >>> ontology_parser = OntologyParser()
617        >>> ontology_parser.get_term_synonyms("CL:0000019")
618        ['sperm cell', 'spermatozoid', 'spermatozoon']
619
620        :param term_id: str ontology term to fetch synonyms for
621        :return: List[str] synonyms for the term
622        """
623        if term_id in VALID_NON_ONTOLOGY_TERMS:
624            return []
625        ontology_name = self._parse_ontology_name(term_id)
626        synonyms: List[str] = list(self.cxg_schema.ontology(ontology_name)[term_id].get("synonyms", []))
627        return synonyms
628
629    def map_term_synonyms(self, term_ids: List[str]) -> Dict[str, List[str]]:
630        """
631        Fetch the synonym labels for a given list of ontology terms. Raises ValueError if term ID is not valid member of
632        a supported ontology.
633
634        Example
635        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
636        >>> ontology_parser = OntologyParser()
637        >>> ontology_parser.map_term_synonyms(["CL:0000005", "CL:0000019"])
638        {'CL:0000005': ['fibroblast neural crest derived'], 'CL:0000019': ['sperm cell', 'spermatozoid', 'spermatozoon']}
639
640        :param term_ids: list of str ontology terms to fetch synonyms for
641        :return: Dict[str, List[str]] mapping term IDs to their respective synonym lists
642        """
643        return {term_id: self.get_term_synonyms(term_id) for term_id in term_ids}
644
645    def get_term_id_by_label(self, term_label: str, ontology_name: str) -> Optional[str]:
646        """
647        Fetch the ontology term ID from a given human-readable label. Filters by ontology_name. Raises ValueError if
648        ontology_name is not a supported ontology.
649
650        Returns None if term ID is not valid member of a supported ontology.
651
652        Example
653        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
654        >>> ontology_parser = OntologyParser()
655        >>> ontology_parser.get_term_id_by_label("neural crest derived fibroblast", "CL")
656        'CL:0000005'
657
658        :param term_label: str human-readable label to fetch term ID for
659        :param ontology_name: str name of ontology to search for term label in
660        :return: Optional[str] term IDs with that label, or None if the label is not found in the ontology
661        """
662        ontology_term_label_to_id_map = self.get_term_label_to_id_map(ontology_name)
663        return ontology_term_label_to_id_map.get(term_label)
664
665    def get_bridge_term_id(self, term_id: str, cross_ontology: str) -> Optional[str]:
666        """
667        For a given term ID, fetch the equivalent term ID from a given ontology. Only returns exact match if it exists.
668
669        If no applicable match is found, returns None.
670
671        Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology.
672
673        Example
674        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
675        >>> ontology_parser = OntologyParser()
676        >>> ontology_parser.get_bridge_term_id("FBbt:00000001", "UBERON")
677        'UBERON:0000468'
678
679        :param term_id: str ontology term to find equivalent term for
680        :param cross_ontology: str name of ontology to search for equivalent term in
681        :return: Optional[str] equivalent term ID from the cross_ontology
682        """
683        if cross_ontology not in self.cxg_schema.cross_ontology_mappings:
684            raise ValueError(
685                f"{cross_ontology} is not in the set of supported cross ontology mappings "
686                f"{self.cxg_schema.cross_ontology_mappings}."
687            )
688        ontology_name = self._parse_ontology_name(term_id)
689        cross_ontology_terms = self.cxg_schema.ontology(ontology_name)[term_id].get("cross_ontology_terms")
690        bridge_term_id: Optional[str] = None
691        if cross_ontology_terms:
692            bridge_term_id = cross_ontology_terms.get(cross_ontology)
693        return bridge_term_id
694
695    def get_closest_bridge_term_ids(self, term_id: str, cross_ontology: str) -> List[str]:
696        """
697        For a given term ID, fetch the equivalent term ID from a given ontology. If match is found,
698        returns a list of 1 with the exact match. If no exact match is found, traverses the ancestors
699        of the term for the closest match.
700
701        If no applicable match is found, returns an empty list.
702
703        If multiple ancestors of the same distance have matches, returns all possible closest matches.
704
705        Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology.
706
707        Example
708        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
709        >>> ontology_parser = OntologyParser()
710        >>> ontology_parser.get_closest_bridge_term_ids("FBbt:00000039", "UBERON")
711        ['UBERON:0000476', 'UBERON:0000920']
712
713        :param term_id: str ontology term to find closest term for
714        :param cross_ontology: str name of ontology to search for closest term in
715        :return: List[str] list of closest term IDs from the cross_ontology
716        """
717        closest_bridge_terms: List[str] = []
718        terms_to_match = [term_id]
719        while terms_to_match and not closest_bridge_terms:
720            for term in terms_to_match:
721                if closest_bridge_term := self.get_bridge_term_id(term, cross_ontology):
722                    closest_bridge_terms.append(closest_bridge_term)
723            terms_to_match = [parent for child in terms_to_match for parent in self.get_term_parents(child)]
724        return closest_bridge_terms
class OntologyParser:
 10class OntologyParser:
 11    """
 12    An object to parse ontology term metadata from ontologies corresponding to a given CellxGene Schema Version.
 13    """
 14
 15    cxg_schema: CXGSchema
 16    """ CXGSchema object to fetch ontology metadata from """
 17
 18    def __init__(self, schema_version: Optional[str] = None):
 19        """
 20        Initialize an OntologyParser object with the ontology metadata corresponding to the given CellxGene schema
 21        version. If not cached, it will make a network call to GitHub Release Assets to load in memory and
 22        parse the corresponding ontology metadata.
 23
 24        :param schema_version: str version of the schema to load ontology metadata for. If not provided, the latest
 25        is loaded.
 26        """
 27        self.cxg_schema = CXGSchema(version=schema_version) if schema_version else CXGSchema()
 28        self.term_label_to_id_map: Dict[str, Dict[str, str]] = {
 29            ontology_name: dict() for ontology_name in self.cxg_schema.supported_ontologies
 30        }
 31
 32    def get_term_label_to_id_map(self, ontology_name: str) -> Dict[str, str]:
 33        """
 34        Fetch the mapping of term labels to term IDs for a given ontology. Caches generated maps by ontology_name.
 35
 36        Example
 37        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
 38        >>> ontology_parser = OntologyParser()
 39        >>> ontology_parser.get_term_label_to_id_map("CL") # doctest: +SKIP
 40        {'Label A': 'CL:0000000', ... }
 41
 42        :param ontology_name: str name of ontology to get map of term labels to term IDs
 43        """
 44        supported_ontology_name: Optional[str] = self._get_supported_ontology_name(ontology_name)
 45        if not supported_ontology_name:
 46            raise ValueError(f"{supported_ontology_name} is not a supported ontology, its metadata cannot be fetched.")
 47
 48        if self.term_label_to_id_map[supported_ontology_name]:
 49            return self.term_label_to_id_map[supported_ontology_name].copy()
 50
 51        for term_id, term_metadata in self.cxg_schema.ontology(supported_ontology_name).items():
 52            self.term_label_to_id_map[supported_ontology_name][term_metadata["label"]] = term_id
 53
 54        return self.term_label_to_id_map[supported_ontology_name].copy()
 55
 56    def _parse_ontology_name(self, term_id: str) -> str:
 57        """
 58        Parse the ontology name from a given term ID. If the term ID does not conform to the expected term format or
 59        is not from an ontology supported by cellxgene-ontology-guide, raise a ValueError.
 60
 61        :param term_id: str ontology term to parse
 62        :return: str name of ontology that term belongs to
 63        """
 64        pattern = r"[A-Za-z]+:\d+"
 65        if not re.match(pattern, term_id):
 66            raise ValueError(f"{term_id} does not conform to expected regex pattern {pattern} and cannot be queried.")
 67
 68        ontology_term_prefix = term_id.split(":")[0]
 69        ontology_name: Optional[str] = self._get_supported_ontology_name(ontology_term_prefix)
 70        if not ontology_name:
 71            raise ValueError(f"{term_id} is not part of a supported ontology, its metadata cannot be fetched.")
 72
 73        return ontology_name
 74
 75    def _get_supported_ontology_name(self, ontology_term_prefix: str) -> Optional[str]:
 76        """
 77        Get the source ontology name for a given ontology term prefix, if it is supported by the CxG schema.
 78
 79        If ontology_term_prefix is directly supported by the CxG schema, returns ontology_term_prefix.
 80        If ontology_term_prefix is supported as an import from another ontology, returns the name of the source ontology
 81        it is imported in.
 82        Otherwise, returns None.
 83
 84        :param ontology_term_prefix: str ontology term prefix to check
 85        :return: str name of ontology that term belongs to, or None if it is not directly supported nor imported in
 86        a supported ontology in the CxG schema.
 87        """
 88        if ontology_term_prefix in self.cxg_schema.supported_ontologies:
 89            return ontology_term_prefix
 90        supported_ontology_name: Optional[str] = self.cxg_schema.imported_ontologies.get(ontology_term_prefix)
 91        return supported_ontology_name
 92
 93    def is_valid_term_id(self, term_id: str, ontology: Optional[str] = None) -> bool:
 94        """
 95        Check if an ontology term ID is valid and defined in a supported ontology. If deprecated but defined
 96        in the ontology, it is considered valid. Optionally, specify an ontology to check against, and determine
 97        if the term is defined in that particular ontology. Otherwise, checks if term is valid in any supported ontology
 98
 99        :param term_id: str ontology term to check
100        :param ontology: str name of ontology to check against
101        :return: boolean flag indicating whether the term is supported
102        """
103        try:
104            ontology_name = self._parse_ontology_name(term_id)
105            if ontology and ontology_name != ontology:
106                return False
107            if term_id in self.cxg_schema.ontology(ontology_name):
108                return True
109        except ValueError:
110            return False
111        return False
112
113    def get_term_ancestors(self, term_id: str, include_self: bool = False) -> List[str]:
114        """
115        Get the ancestor ontology terms for a given term. If include_self is True, the term itself will be included as
116        an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.
117
118        Example
119        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
120        >>> ontology_parser = OntologyParser()
121        >>> ontology_parser.get_term_ancestors("CL:0000005") # doctest: +SKIP
122        ['CL:0000000', 'CL:0000057', ...
123
124        :param term_id: str ontology term to find ancestors for
125        :param include_self: boolean flag to include the term itself as an ancestor
126        :return: flattened List[str] of ancestor terms
127        """
128        if term_id in VALID_NON_ONTOLOGY_TERMS:
129            return []
130        ontology_name = self._parse_ontology_name(term_id)
131        ancestors = list(self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"].keys())
132        return ancestors + [term_id] if include_self else ancestors
133
134    def map_term_ancestors(self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]:
135        """
136        Get the ancestor ontology terms for each term in a list. If include_self is True, the term itself will be
137        included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.
138
139        Example
140        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
141        >>> ontology_parser = OntologyParser()
142        >>> ontology_parser.map_term_ancestors(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP
143        {
144            'CL:0000003': ['CL:0000003'],
145            'CL:0000005': ['CL:0000005', 'CL:0000000', ...]
146        }
147
148        :param term_ids: list of str ontology terms to find ancestors for
149        :param include_self: boolean flag to include the term itself as an ancestor
150        :return: Dictionary mapping str term IDs to their respective flattened List[str] of ancestor terms. Maps to
151        empty
152        list if there are no ancestors.
153        """
154        return {term_id: self.get_term_ancestors(term_id, include_self) for term_id in term_ids}
155
156    def get_term_ancestors_with_distances(self, term_id: str, include_self: bool = False) -> Dict[str, int]:
157        """
158        Get the ancestor ontology terms for a given term, and their distance from the term_id. If include_self is True,
159        the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a
160        supported ontology.
161
162        Example
163        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
164        >>> ontology_parser = OntologyParser()
165        >>> ontology_parser.get_term_ancestors_with_distances("CL:0000005") # doctest: +SKIP
166        {'CL:0000057': 1, 'CL:0002320': 2, 'CL:0000000': 3}
167
168        :param term_id: str ontology term to find ancestors for
169        :param include_self: boolean flag to include the term itself as an ancestor
170        :return: Dict[str, int] map of ancestor terms and their respective distances from the term_id
171        """
172        if term_id in VALID_NON_ONTOLOGY_TERMS:
173            return {}
174        ontology_name = self._parse_ontology_name(term_id)
175        ancestors: Dict[str, int] = self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"].copy()
176        return ancestors | {term_id: 0} if include_self else ancestors
177
178    def map_term_ancestors_with_distances(
179        self, term_ids: Iterable[str], include_self: bool = False
180    ) -> Dict[str, Dict[str, int]]:
181        """
182        Get the ancestor ontology terms for each term in a list, and their distance from the term_id. If include_self is
183        True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a
184        supported ontology.
185
186        Example
187        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
188        >>> ontology_parser = OntologyParser()
189        >>> ontology_parser.map_term_ancestors_with_distances(["CL:0000003", "CL:0000005"], include_self=True)
190        {'CL:0000003': {'CL:0000003': 0}, 'CL:0000005': {'CL:0000057': 1, 'CL:0000499': 2, 'CL:0002320': 3, 'CL:0000255': 4, 'CL:0000000': 5, 'CL:0000005': 0}}
191
192        :param term_ids: list of str ontology terms to find ancestors for
193        :param include_self: boolean flag to include the term itself as an ancestor
194        :return: Dictionary mapping str term IDs to their respective Dict[str, int] map of ancestor terms and their
195        respective distances from the term_id
196        """
197        return {term_id: self.get_term_ancestors_with_distances(term_id, include_self) for term_id in term_ids}
198
199    def get_term_parents(self, term_id: str) -> List[str]:
200        """
201        Get the direct parent ontology terms for a given term. Raises ValueError if the term ID is not valid member of
202        a supported ontology.
203
204        Example
205        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
206        >>> ontology_parser = OntologyParser()
207        >>> ontology_parser.get_term_parents("CL:0000101")
208        ['CL:0000526']
209
210        :param term_id: str ontology term to find parents for
211        :return: List[str] of parent terms
212        """
213        if term_id in VALID_NON_ONTOLOGY_TERMS:
214            return []
215        ontology_name = self._parse_ontology_name(term_id)
216        ancestors: Dict[str, int] = self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"]
217        parents: List[str] = [ancestor for ancestor, distance in ancestors.items() if distance == 1]
218        return parents
219
220    def get_distance_between_terms(self, term_id_1: str, term_id_2: str) -> int:
221        """
222        Get the distance between two ontology terms. The distance is defined as the number of edges between the
223        two terms. Terms must be from the same ontology. Returns -1 if terms are disjoint.
224        Raises ValueError if term IDs are not valid members of a supported ontology.
225
226        :param term_id_1: str ontology term to find distance for
227        :param term_id_2: str ontology term to find distance for
228        :return: int distance between the two terms, measured in number of edges between their shortest path.
229        """
230        lcas = self.get_lowest_common_ancestors(term_id_1, term_id_2)
231        if not lcas:
232            return -1
233        return int(
234            self.get_term_ancestors_with_distances(term_id_1, include_self=True)[lcas[0]]
235            + self.get_term_ancestors_with_distances(term_id_2, include_self=True)[lcas[0]]
236        )
237
238    def get_lowest_common_ancestors(self, term_id_1: str, term_id_2: str) -> List[str]:
239        """
240        Get the lowest common ancestors between two ontology terms that is from the given ontology.
241        Terms must be from the same ontology. Ontologies are DAGs, so there may be multiple lowest common ancestors.
242        Raises ValueError if term IDs are not valid members of a supported ontology.
243
244        :param term_id_1: str ontology term to find LCA for
245        :param term_id_2: str ontology term to find LCA for
246        :return: str term ID of the lowest common ancestor term
247        """
248        # include path to term itself
249        ontology = self._parse_ontology_name(term_id_1)
250        if ontology != self._parse_ontology_name(term_id_2):
251            return []
252        ancestors_1 = self.get_term_ancestors_with_distances(term_id_1, include_self=True)
253        ancestors_2 = self.get_term_ancestors_with_distances(term_id_2, include_self=True)
254        common_ancestors = set(ancestors_1.keys()) & set(ancestors_2.keys())
255        min_sum_distances = float("inf")
256        for ancestors in common_ancestors:
257            sum_distances = ancestors_1[ancestors] + ancestors_2[ancestors]
258            if sum_distances < min_sum_distances:
259                min_sum_distances = sum_distances
260        return [
261            ancestor
262            for ancestor in common_ancestors
263            if ancestors_1[ancestor] + ancestors_2[ancestor] == min_sum_distances
264        ]
265
266    def get_high_level_terms(self, term_id: str, high_level_terms: List[str]) -> List[str]:
267        """
268        Get the high-level ontology terms for a given term. High-level terms are defined as the ancestors of the term
269        that are part of the high-level ontology terms supported by cellxgene-ontology-guide. If more than 1
270        high_level_term is matched, the returned list of matches preserves the order of the input high_level_terms list.
271        Raises ValueError if term ID is not valid member of a supported ontology.
272
273        Example
274        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
275        >>> ontology_parser = OntologyParser()
276        >>> ontology_parser.get_high_level_terms("CL:0000005", ["CL:0000000", "CL:0000001"])
277        ['CL:0000000']
278
279        :param term_id: str ontology term to find high-level terms for
280        :param high_level_terms: list of str ontology terms to check for ancestry to term_id
281        :return: List[str] of high-level terms that the term is a descendant of
282        """
283        if term_id in VALID_NON_ONTOLOGY_TERMS:
284            return []
285        ancestors = self.get_term_ancestors(term_id, include_self=True)
286        return [high_level_term for high_level_term in high_level_terms if high_level_term in ancestors]
287
288    def map_high_level_terms(self, term_ids: List[str], high_level_terms: List[str]) -> Dict[str, List[str]]:
289        """
290        Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with
291        format
292
293        {"CL:0000003": ["CL:0000000", ...], "CL:0000005": ["CL:0000000", ...]}
294
295        Where each term_id is mapped to a List[str] of high-level terms that it is a descendant of. Includes self
296        as a descendant. Raises ValueError if term ID is not valid member of a supported ontology.
297
298        :param term_ids: list of str ontology terms to map high level terms for
299        :param high_level_terms: list of str ontology terms to be mapped to descendant term_ids
300        :return: Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list.
301        Each key maps to empty list if there are no ancestors among the provided input.
302        """
303        return {term_id: self.get_high_level_terms(term_id, high_level_terms) for term_id in term_ids}
304
305    def get_highest_level_term(self, term_id: str, high_level_terms: List[str]) -> Union[str, None]:
306        """
307        Get the highest level ontology term for a given term. The highest level term is defined as the ancestor of the
308        term that is part of the high-level ontology terms supported by cellxgene-ontology-guide. Raises ValueError
309        if term ID is not valid member of a supported ontology.
310
311        Example
312        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
313        >>> ontology_parser = OntologyParser()
314        >>> ontology_parser.get_highest_level_term("CL:0000005", ["CL:0000000", "CL:0000001"])
315        'CL:0000000'
316
317        :param term_id: str ontology term to find highest level term for
318        :param high_level_terms: list of str ontology terms to check for ancestry to term_id
319        :return: str highest level term that the term is a descendant of, or None if it is not a descendant of any
320        high-level terms
321        """
322        high_level_terms = self.get_high_level_terms(term_id, high_level_terms)
323        term_ancestors_and_distances = self.get_term_ancestors_with_distances(term_id, include_self=True)
324        if not high_level_terms:
325            return None
326        return max(high_level_terms, key=lambda high_level_term: term_ancestors_and_distances[high_level_term])
327
328    def map_highest_level_term(self, term_ids: List[str], high_level_terms: List[str]) -> Dict[str, Union[str, None]]:
329        """
330        Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with
331        format
332
333        {"CL:0000003": "CL:0000000", "CL:0000005": "CL:0000000"}
334
335        Where each term_id is mapped to the highest level term that it is a descendant of, from the list provided.
336        Includes term itself as a descendant. Maps to None if term_id does not map to any high level terms among the
337        provided input. Raises ValueError if term ID is not valid member of a supported ontology.
338
339        :param term_ids: list of str ontology terms to map high level terms for
340        :param high_level_terms: list of str ontology terms that can be mapped to descendant term_ids
341        :return: Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list.
342        Each key maps to empty list if there are no ancestors among the provided input.
343        """
344        return {term_id: self.get_highest_level_term(term_id, high_level_terms) for term_id in term_ids}
345
346    def get_term_descendants(self, term_id: str, include_self: bool = False) -> List[str]:
347        """
348        Get the descendant ontology terms for a given term. If include_self is True, the term itself will be included as
349        a descendant. Raises ValueError if term ID is not valid member of a supported ontology.
350
351        Example
352        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
353        >>> ontology_parser = OntologyParser()
354        >>> ontology_parser.get_term_descendants("CL:0000005") # doctest: +SKIP
355        ['CL:0002363']
356
357        :param term_id: str ontology term to find descendants for
358        :param include_self: boolean flag to include the term itself as a descendant
359        :return: List[str] of descendant terms
360        """
361        if term_id in VALID_NON_ONTOLOGY_TERMS:
362            return []
363        ontology_name = self._parse_ontology_name(term_id)
364        descendants = [term_id] if include_self else []
365        for candidate_descendant, candidate_metadata in self.cxg_schema.ontology(ontology_name).items():
366            ancestors = candidate_metadata["ancestors"].keys()
367            if term_id in ancestors:
368                descendants.append(candidate_descendant)
369        return descendants
370
371    def map_term_descendants(self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]:
372        """
373        Get the descendant ontology terms for each term in a list. If include_self is True, the term itself will be
374         included as a descendant. Raises ValueError if term ID is not valid member of a supported ontology.
375
376        Example
377        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
378        >>> ontology_parser = OntologyParser()
379        >>> ontology_parser.map_term_descendants(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP
380        {
381            'CL:0000003': ['CL:0000003', ...],
382            'CL:0000005': ['CL:0000005', 'CL:0002363', ...]
383        }
384
385        :param term_ids: list of str ontology terms to find descendants for
386        :param include_self: boolean flag to include the term itself as an descendant
387        :return: Dictionary mapping str term IDs to their respective flattened List[str] of descendant terms. Maps to
388        empty list if there are no descendants.
389        """
390        descendants_dict: Dict[str, List[str]] = dict()
391        ontology_names = set()
392        for term_id in term_ids:
393            if term_id in VALID_NON_ONTOLOGY_TERMS:
394                descendants_dict[term_id] = []
395                continue
396            ontology_name = self._parse_ontology_name(term_id)
397            descendants_dict[term_id] = [term_id] if include_self else []
398            ontology_names.add(ontology_name)
399
400        for ontology in ontology_names:
401            for candidate_descendant, candidate_metadata in self.cxg_schema.ontology(ontology).items():
402                for ancestor_id in descendants_dict:
403                    ancestors = candidate_metadata["ancestors"].keys()
404                    if ancestor_id in ancestors:
405                        descendants_dict[ancestor_id].append(candidate_descendant)
406
407        return descendants_dict
408
409    def get_term_children(self, term_id: str) -> List[str]:
410        """
411        Get the direct children ontology terms for a given term. Raises ValueError if term ID is not valid member of a
412        supported ontology.
413
414        Example
415        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
416        >>> ontology_parser = OntologyParser()
417        >>> ontology_parser.get_term_children("CL:0000526")
418        ['CL:0000101', 'CL:4042034']
419
420        :param term_id: str ontology term to find children for
421        :return: List[str] of children terms
422        """
423        if term_id in VALID_NON_ONTOLOGY_TERMS:
424            return []
425        ontology_name = self._parse_ontology_name(term_id)
426        children = []
427        for candidate_child, candidate_metadata in self.cxg_schema.ontology(ontology_name).items():
428            for ancestor, distance in candidate_metadata["ancestors"].items():
429                if ancestor == term_id and distance == 1:
430                    children.append(candidate_child)
431        return children
432
433    def get_term_graph(self, term_id: str) -> OntologyNode:
434        """
435        Get the DAG of OntologyNode relationships, with the input term as the root node. Only includes terms from the
436        same ontology as the root term ID. Raises ValueError if term ID is not valid member of a supported ontology.
437
438        Example
439        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
440        >>> ontology_parser = OntologyParser()
441        >>> root_node = ontology_parser.get_term_graph("CL:0000000")
442        >>> root_node.term_id
443        'CL:0000000'
444        >>> root_node.to_dict() # doctest: +SKIP
445        {
446            "term_id": "CL:0000000",
447            "name": "cell A",
448            "children": [
449                {
450                    "term_id": "CL:0000001",
451                    "name": "cell B",
452                    "children": [...],
453                },
454                {
455                    "term_id": "CL:0000002",
456                    "name": "cell C",
457                    "children": [...],
458                },
459                ...
460            ]
461        }
462        >>> root_node.term_counter # doctest: +SKIP
463        Counter({'CL:0002058': 48, 'CL:0002471': 48, ...
464
465        :param term_id: str ontology term to build subtree for
466        :return: OntologyNode representation of graph with term_id as root.
467        """
468        term_label = self.get_term_label(term_id)
469        root = OntologyNode(term_id, term_label)
470        for child_term_id in self.get_term_children(term_id):
471            root.add_child(self.get_term_graph(child_term_id))
472        return root
473
474    def is_term_deprecated(self, term_id: str) -> bool:
475        """
476        Check if an ontology term is deprecated. Raises ValueError if term ID is not valid member of a supported
477        ontology.
478
479        Example
480        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
481        >>> ontology_parser = OntologyParser()
482        >>> ontology_parser.is_term_deprecated("CL:0000003")
483        True
484
485        :param term_id: str ontology term to check for deprecation
486        :return: boolean flag indicating whether the term is deprecated
487        """
488        if term_id in VALID_NON_ONTOLOGY_TERMS:
489            return False
490        ontology_name = self._parse_ontology_name(term_id)
491        is_deprecated: bool = self.cxg_schema.ontology(ontology_name)[term_id].get("deprecated")
492        return is_deprecated
493
494    def get_term_replacement(self, term_id: str) -> Union[str, None]:
495        """
496        Fetch the replacement term for a deprecated ontology term, if a replacement exists. Return None otherwise.
497        Raises ValueError if term ID is not valid member of a supported ontology.
498
499        Example
500        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
501        >>> ontology_parser = OntologyParser()
502        >>> ontology_parser.get_term_replacement("CL:0000003")
503        'CL:0000000'
504
505        :param term_id: str ontology term to check a replacement term for
506        :return: replacement str term ID if it exists, None otherwise
507        """
508        if term_id in VALID_NON_ONTOLOGY_TERMS:
509            return None
510        ontology_name = self._parse_ontology_name(term_id)
511        replaced_by: str = self.cxg_schema.ontology(ontology_name)[term_id].get("replaced_by")
512        return replaced_by if replaced_by else None
513
514    def get_term_metadata(self, term_id: str) -> Dict[str, Any]:
515        """
516        Fetch metadata for a given ontology term. Returns a dict with format
517
518        {"comments": ["...", ...], "term_tracker": "...", "consider": ["...", ...]}
519
520        Comments maps to List[str] of ontology curator comments
521        Term Tracker maps to a str url where there is discussion around this term's curation (or deprecation).
522        Consider maps to List[str] of alternate ontology terms to consider using instead of this term
523
524        All keys map to None if no metadata of that type is present. Raises ValueError if term ID is not valid member
525        of a supported ontology.
526
527        :param term_id: str ontology term to fetch metadata for
528        :return: Dict with keys 'Comments', 'Term Tracker', and 'Consider' containing associated metadata.
529        """
530        if term_id in VALID_NON_ONTOLOGY_TERMS:
531            return {"comments": None, "term_tracker": None, "consider": None}
532        ontology_name = self._parse_ontology_name(term_id)
533        return {
534            key: self.cxg_schema.ontology(ontology_name)[term_id].get(key, None)
535            for key in {"comments", "term_tracker", "consider"}
536        }
537
538    def get_term_label(self, term_id: str) -> str:
539        """
540        Fetch the human-readable label for a given ontology term. Raises ValueError if term ID is not valid member of a
541        supported ontology.
542
543        Example
544        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
545        >>> ontology_parser = OntologyParser()
546        >>> ontology_parser.get_term_label("CL:0000005")
547        'neural crest derived fibroblast'
548
549        :param term_id: str ontology term to fetch label for
550        :return: str human-readable label for the term
551        """
552        if term_id in VALID_NON_ONTOLOGY_TERMS:
553            return term_id
554        ontology_name = self._parse_ontology_name(term_id)
555        label: str = self.cxg_schema.ontology(ontology_name)[term_id]["label"]
556        return label
557
558    def map_term_labels(self, term_ids: Iterable[str]) -> Dict[str, str]:
559        """
560        Fetch the human-readable label for a given list of ontology terms. Raises ValueError if term ID is not valid
561        member of a supported ontology.
562
563        Example
564        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
565        >>> ontology_parser = OntologyParser()
566        >>> ontology_parser.map_term_labels(["CL:0000005", "CL:0000003"])
567        {'CL:0000005': 'neural crest derived fibroblast', 'CL:0000003': 'obsolete native cell'}
568
569        :param term_ids: list of str ontology terms to fetch label for
570        :return: Dict[str, str] mapping term IDs to their respective human-readable labels
571        """
572        return {term_id: self.get_term_label(term_id) for term_id in term_ids}
573
574    def get_term_description(self, term_id: str) -> Optional[str]:
575        """
576        Fetch the description for a given ontology term. Raises ValueError if term ID is not valid member of a
577        supported ontology.
578
579        Example
580        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
581        >>> ontology_parser = OntologyParser()
582        >>> ontology_parser.get_term_description("CL:0000005")
583        'Any fibroblast that is derived from the neural crest.'
584
585        :param term_id: str ontology term to fetch description for
586        :return: str description for the term
587        """
588        if term_id in VALID_NON_ONTOLOGY_TERMS:
589            return term_id
590        ontology_name = self._parse_ontology_name(term_id)
591        description: Optional[str] = self.cxg_schema.ontology(ontology_name)[term_id].get("description", None)
592        return description
593
594    def map_term_descriptions(self, term_ids: List[str]) -> Dict[str, Optional[str]]:
595        """
596        Fetch the descriptions for a given list of ontology terms. Raises ValueError if term ID is not valid member of
597        a supported ontology.
598
599        Example
600        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
601        >>> ontology_parser = OntologyParser()
602        >>> ontology_parser.map_term_descriptions(["CL:0000005", "CL:0000146"])
603        {'CL:0000005': 'Any fibroblast that is derived from the neural crest.', 'CL:0000146': None}
604
605        :param term_ids: list of str ontology terms to fetch descriptions for
606        :return: Dict[str, str] mapping term IDs to their respective descriptions
607        """
608        return {term_id: self.get_term_description(term_id) for term_id in term_ids}
609
610    def get_term_synonyms(self, term_id: str) -> List[str]:
611        """
612        Fetch a list of synonym labels for a given ontology term. Returns empty list if no synonyms found.
613        Raises ValueError if term ID is not valid member of a supported ontology.
614
615        Example
616        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
617        >>> ontology_parser = OntologyParser()
618        >>> ontology_parser.get_term_synonyms("CL:0000019")
619        ['sperm cell', 'spermatozoid', 'spermatozoon']
620
621        :param term_id: str ontology term to fetch synonyms for
622        :return: List[str] synonyms for the term
623        """
624        if term_id in VALID_NON_ONTOLOGY_TERMS:
625            return []
626        ontology_name = self._parse_ontology_name(term_id)
627        synonyms: List[str] = list(self.cxg_schema.ontology(ontology_name)[term_id].get("synonyms", []))
628        return synonyms
629
630    def map_term_synonyms(self, term_ids: List[str]) -> Dict[str, List[str]]:
631        """
632        Fetch the synonym labels for a given list of ontology terms. Raises ValueError if term ID is not valid member of
633        a supported ontology.
634
635        Example
636        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
637        >>> ontology_parser = OntologyParser()
638        >>> ontology_parser.map_term_synonyms(["CL:0000005", "CL:0000019"])
639        {'CL:0000005': ['fibroblast neural crest derived'], 'CL:0000019': ['sperm cell', 'spermatozoid', 'spermatozoon']}
640
641        :param term_ids: list of str ontology terms to fetch synonyms for
642        :return: Dict[str, List[str]] mapping term IDs to their respective synonym lists
643        """
644        return {term_id: self.get_term_synonyms(term_id) for term_id in term_ids}
645
646    def get_term_id_by_label(self, term_label: str, ontology_name: str) -> Optional[str]:
647        """
648        Fetch the ontology term ID from a given human-readable label. Filters by ontology_name. Raises ValueError if
649        ontology_name is not a supported ontology.
650
651        Returns None if term ID is not valid member of a supported ontology.
652
653        Example
654        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
655        >>> ontology_parser = OntologyParser()
656        >>> ontology_parser.get_term_id_by_label("neural crest derived fibroblast", "CL")
657        'CL:0000005'
658
659        :param term_label: str human-readable label to fetch term ID for
660        :param ontology_name: str name of ontology to search for term label in
661        :return: Optional[str] term IDs with that label, or None if the label is not found in the ontology
662        """
663        ontology_term_label_to_id_map = self.get_term_label_to_id_map(ontology_name)
664        return ontology_term_label_to_id_map.get(term_label)
665
666    def get_bridge_term_id(self, term_id: str, cross_ontology: str) -> Optional[str]:
667        """
668        For a given term ID, fetch the equivalent term ID from a given ontology. Only returns exact match if it exists.
669
670        If no applicable match is found, returns None.
671
672        Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology.
673
674        Example
675        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
676        >>> ontology_parser = OntologyParser()
677        >>> ontology_parser.get_bridge_term_id("FBbt:00000001", "UBERON")
678        'UBERON:0000468'
679
680        :param term_id: str ontology term to find equivalent term for
681        :param cross_ontology: str name of ontology to search for equivalent term in
682        :return: Optional[str] equivalent term ID from the cross_ontology
683        """
684        if cross_ontology not in self.cxg_schema.cross_ontology_mappings:
685            raise ValueError(
686                f"{cross_ontology} is not in the set of supported cross ontology mappings "
687                f"{self.cxg_schema.cross_ontology_mappings}."
688            )
689        ontology_name = self._parse_ontology_name(term_id)
690        cross_ontology_terms = self.cxg_schema.ontology(ontology_name)[term_id].get("cross_ontology_terms")
691        bridge_term_id: Optional[str] = None
692        if cross_ontology_terms:
693            bridge_term_id = cross_ontology_terms.get(cross_ontology)
694        return bridge_term_id
695
696    def get_closest_bridge_term_ids(self, term_id: str, cross_ontology: str) -> List[str]:
697        """
698        For a given term ID, fetch the equivalent term ID from a given ontology. If match is found,
699        returns a list of 1 with the exact match. If no exact match is found, traverses the ancestors
700        of the term for the closest match.
701
702        If no applicable match is found, returns an empty list.
703
704        If multiple ancestors of the same distance have matches, returns all possible closest matches.
705
706        Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology.
707
708        Example
709        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
710        >>> ontology_parser = OntologyParser()
711        >>> ontology_parser.get_closest_bridge_term_ids("FBbt:00000039", "UBERON")
712        ['UBERON:0000476', 'UBERON:0000920']
713
714        :param term_id: str ontology term to find closest term for
715        :param cross_ontology: str name of ontology to search for closest term in
716        :return: List[str] list of closest term IDs from the cross_ontology
717        """
718        closest_bridge_terms: List[str] = []
719        terms_to_match = [term_id]
720        while terms_to_match and not closest_bridge_terms:
721            for term in terms_to_match:
722                if closest_bridge_term := self.get_bridge_term_id(term, cross_ontology):
723                    closest_bridge_terms.append(closest_bridge_term)
724            terms_to_match = [parent for child in terms_to_match for parent in self.get_term_parents(child)]
725        return closest_bridge_terms

An object to parse ontology term metadata from ontologies corresponding to a given CellxGene Schema Version.

OntologyParser(schema_version: Optional[str] = None)
18    def __init__(self, schema_version: Optional[str] = None):
19        """
20        Initialize an OntologyParser object with the ontology metadata corresponding to the given CellxGene schema
21        version. If not cached, it will make a network call to GitHub Release Assets to load in memory and
22        parse the corresponding ontology metadata.
23
24        :param schema_version: str version of the schema to load ontology metadata for. If not provided, the latest
25        is loaded.
26        """
27        self.cxg_schema = CXGSchema(version=schema_version) if schema_version else CXGSchema()
28        self.term_label_to_id_map: Dict[str, Dict[str, str]] = {
29            ontology_name: dict() for ontology_name in self.cxg_schema.supported_ontologies
30        }

Initialize an OntologyParser object with the ontology metadata corresponding to the given CellxGene schema version. If not cached, it will make a network call to GitHub Release Assets to load in memory and parse the corresponding ontology metadata.

Parameters
  • schema_version: str version of the schema to load ontology metadata for. If not provided, the latest is loaded.

CXGSchema object to fetch ontology metadata from

term_label_to_id_map: Dict[str, Dict[str, str]]
def get_term_label_to_id_map(self, ontology_name: str) -> Dict[str, str]:
32    def get_term_label_to_id_map(self, ontology_name: str) -> Dict[str, str]:
33        """
34        Fetch the mapping of term labels to term IDs for a given ontology. Caches generated maps by ontology_name.
35
36        Example
37        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
38        >>> ontology_parser = OntologyParser()
39        >>> ontology_parser.get_term_label_to_id_map("CL") # doctest: +SKIP
40        {'Label A': 'CL:0000000', ... }
41
42        :param ontology_name: str name of ontology to get map of term labels to term IDs
43        """
44        supported_ontology_name: Optional[str] = self._get_supported_ontology_name(ontology_name)
45        if not supported_ontology_name:
46            raise ValueError(f"{supported_ontology_name} is not a supported ontology, its metadata cannot be fetched.")
47
48        if self.term_label_to_id_map[supported_ontology_name]:
49            return self.term_label_to_id_map[supported_ontology_name].copy()
50
51        for term_id, term_metadata in self.cxg_schema.ontology(supported_ontology_name).items():
52            self.term_label_to_id_map[supported_ontology_name][term_metadata["label"]] = term_id
53
54        return self.term_label_to_id_map[supported_ontology_name].copy()

Fetch the mapping of term labels to term IDs for a given ontology. Caches generated maps by ontology_name.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_label_to_id_map("CL") # doctest: +SKIP
{'Label A': 'CL:0000000', ... }
Parameters
  • ontology_name: str name of ontology to get map of term labels to term IDs
def is_valid_term_id(self, term_id: str, ontology: Optional[str] = None) -> bool:
 93    def is_valid_term_id(self, term_id: str, ontology: Optional[str] = None) -> bool:
 94        """
 95        Check if an ontology term ID is valid and defined in a supported ontology. If deprecated but defined
 96        in the ontology, it is considered valid. Optionally, specify an ontology to check against, and determine
 97        if the term is defined in that particular ontology. Otherwise, checks if term is valid in any supported ontology
 98
 99        :param term_id: str ontology term to check
100        :param ontology: str name of ontology to check against
101        :return: boolean flag indicating whether the term is supported
102        """
103        try:
104            ontology_name = self._parse_ontology_name(term_id)
105            if ontology and ontology_name != ontology:
106                return False
107            if term_id in self.cxg_schema.ontology(ontology_name):
108                return True
109        except ValueError:
110            return False
111        return False

Check if an ontology term ID is valid and defined in a supported ontology. If deprecated but defined in the ontology, it is considered valid. Optionally, specify an ontology to check against, and determine if the term is defined in that particular ontology. Otherwise, checks if term is valid in any supported ontology

Parameters
  • term_id: str ontology term to check
  • ontology: str name of ontology to check against
Returns

boolean flag indicating whether the term is supported

def get_term_ancestors(self, term_id: str, include_self: bool = False) -> List[str]:
113    def get_term_ancestors(self, term_id: str, include_self: bool = False) -> List[str]:
114        """
115        Get the ancestor ontology terms for a given term. If include_self is True, the term itself will be included as
116        an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.
117
118        Example
119        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
120        >>> ontology_parser = OntologyParser()
121        >>> ontology_parser.get_term_ancestors("CL:0000005") # doctest: +SKIP
122        ['CL:0000000', 'CL:0000057', ...
123
124        :param term_id: str ontology term to find ancestors for
125        :param include_self: boolean flag to include the term itself as an ancestor
126        :return: flattened List[str] of ancestor terms
127        """
128        if term_id in VALID_NON_ONTOLOGY_TERMS:
129            return []
130        ontology_name = self._parse_ontology_name(term_id)
131        ancestors = list(self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"].keys())
132        return ancestors + [term_id] if include_self else ancestors

Get the ancestor ontology terms for a given term. If include_self is True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_ancestors("CL:0000005") # doctest: +SKIP
['CL:0000000', 'CL:0000057', ...
Parameters
  • term_id: str ontology term to find ancestors for
  • include_self: boolean flag to include the term itself as an ancestor
Returns

flattened List[str] of ancestor terms

def map_term_ancestors( self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]:
134    def map_term_ancestors(self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]:
135        """
136        Get the ancestor ontology terms for each term in a list. If include_self is True, the term itself will be
137        included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.
138
139        Example
140        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
141        >>> ontology_parser = OntologyParser()
142        >>> ontology_parser.map_term_ancestors(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP
143        {
144            'CL:0000003': ['CL:0000003'],
145            'CL:0000005': ['CL:0000005', 'CL:0000000', ...]
146        }
147
148        :param term_ids: list of str ontology terms to find ancestors for
149        :param include_self: boolean flag to include the term itself as an ancestor
150        :return: Dictionary mapping str term IDs to their respective flattened List[str] of ancestor terms. Maps to
151        empty
152        list if there are no ancestors.
153        """
154        return {term_id: self.get_term_ancestors(term_id, include_self) for term_id in term_ids}

Get the ancestor ontology terms for each term in a list. If include_self is True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.map_term_ancestors(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP
{
    'CL:0000003': ['CL:0000003'],
    'CL:0000005': ['CL:0000005', 'CL:0000000', ...]
}
Parameters
  • term_ids: list of str ontology terms to find ancestors for
  • include_self: boolean flag to include the term itself as an ancestor
Returns

Dictionary mapping str term IDs to their respective flattened List[str] of ancestor terms. Maps to empty list if there are no ancestors.

def get_term_ancestors_with_distances(self, term_id: str, include_self: bool = False) -> Dict[str, int]:
156    def get_term_ancestors_with_distances(self, term_id: str, include_self: bool = False) -> Dict[str, int]:
157        """
158        Get the ancestor ontology terms for a given term, and their distance from the term_id. If include_self is True,
159        the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a
160        supported ontology.
161
162        Example
163        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
164        >>> ontology_parser = OntologyParser()
165        >>> ontology_parser.get_term_ancestors_with_distances("CL:0000005") # doctest: +SKIP
166        {'CL:0000057': 1, 'CL:0002320': 2, 'CL:0000000': 3}
167
168        :param term_id: str ontology term to find ancestors for
169        :param include_self: boolean flag to include the term itself as an ancestor
170        :return: Dict[str, int] map of ancestor terms and their respective distances from the term_id
171        """
172        if term_id in VALID_NON_ONTOLOGY_TERMS:
173            return {}
174        ontology_name = self._parse_ontology_name(term_id)
175        ancestors: Dict[str, int] = self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"].copy()
176        return ancestors | {term_id: 0} if include_self else ancestors

Get the ancestor ontology terms for a given term, and their distance from the term_id. If include_self is True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_ancestors_with_distances("CL:0000005") # doctest: +SKIP
{'CL:0000057': 1, 'CL:0002320': 2, 'CL:0000000': 3}
Parameters
  • term_id: str ontology term to find ancestors for
  • include_self: boolean flag to include the term itself as an ancestor
Returns

Dict[str, int] map of ancestor terms and their respective distances from the term_id

def map_term_ancestors_with_distances( self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, Dict[str, int]]:
178    def map_term_ancestors_with_distances(
179        self, term_ids: Iterable[str], include_self: bool = False
180    ) -> Dict[str, Dict[str, int]]:
181        """
182        Get the ancestor ontology terms for each term in a list, and their distance from the term_id. If include_self is
183        True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a
184        supported ontology.
185
186        Example
187        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
188        >>> ontology_parser = OntologyParser()
189        >>> ontology_parser.map_term_ancestors_with_distances(["CL:0000003", "CL:0000005"], include_self=True)
190        {'CL:0000003': {'CL:0000003': 0}, 'CL:0000005': {'CL:0000057': 1, 'CL:0000499': 2, 'CL:0002320': 3, 'CL:0000255': 4, 'CL:0000000': 5, 'CL:0000005': 0}}
191
192        :param term_ids: list of str ontology terms to find ancestors for
193        :param include_self: boolean flag to include the term itself as an ancestor
194        :return: Dictionary mapping str term IDs to their respective Dict[str, int] map of ancestor terms and their
195        respective distances from the term_id
196        """
197        return {term_id: self.get_term_ancestors_with_distances(term_id, include_self) for term_id in term_ids}

Get the ancestor ontology terms for each term in a list, and their distance from the term_id. If include_self is True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.map_term_ancestors_with_distances(["CL:0000003", "CL:0000005"], include_self=True)
{'CL:0000003': {'CL:0000003': 0}, 'CL:0000005': {'CL:0000057': 1, 'CL:0000499': 2, 'CL:0002320': 3, 'CL:0000255': 4, 'CL:0000000': 5, 'CL:0000005': 0}}
Parameters
  • term_ids: list of str ontology terms to find ancestors for
  • include_self: boolean flag to include the term itself as an ancestor
Returns

Dictionary mapping str term IDs to their respective Dict[str, int] map of ancestor terms and their respective distances from the term_id

def get_term_parents(self, term_id: str) -> List[str]:
199    def get_term_parents(self, term_id: str) -> List[str]:
200        """
201        Get the direct parent ontology terms for a given term. Raises ValueError if the term ID is not valid member of
202        a supported ontology.
203
204        Example
205        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
206        >>> ontology_parser = OntologyParser()
207        >>> ontology_parser.get_term_parents("CL:0000101")
208        ['CL:0000526']
209
210        :param term_id: str ontology term to find parents for
211        :return: List[str] of parent terms
212        """
213        if term_id in VALID_NON_ONTOLOGY_TERMS:
214            return []
215        ontology_name = self._parse_ontology_name(term_id)
216        ancestors: Dict[str, int] = self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"]
217        parents: List[str] = [ancestor for ancestor, distance in ancestors.items() if distance == 1]
218        return parents

Get the direct parent ontology terms for a given term. Raises ValueError if the term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_parents("CL:0000101")
['CL:0000526']
Parameters
  • term_id: str ontology term to find parents for
Returns

List[str] of parent terms

def get_distance_between_terms(self, term_id_1: str, term_id_2: str) -> int:
220    def get_distance_between_terms(self, term_id_1: str, term_id_2: str) -> int:
221        """
222        Get the distance between two ontology terms. The distance is defined as the number of edges between the
223        two terms. Terms must be from the same ontology. Returns -1 if terms are disjoint.
224        Raises ValueError if term IDs are not valid members of a supported ontology.
225
226        :param term_id_1: str ontology term to find distance for
227        :param term_id_2: str ontology term to find distance for
228        :return: int distance between the two terms, measured in number of edges between their shortest path.
229        """
230        lcas = self.get_lowest_common_ancestors(term_id_1, term_id_2)
231        if not lcas:
232            return -1
233        return int(
234            self.get_term_ancestors_with_distances(term_id_1, include_self=True)[lcas[0]]
235            + self.get_term_ancestors_with_distances(term_id_2, include_self=True)[lcas[0]]
236        )

Get the distance between two ontology terms. The distance is defined as the number of edges between the two terms. Terms must be from the same ontology. Returns -1 if terms are disjoint. Raises ValueError if term IDs are not valid members of a supported ontology.

Parameters
  • term_id_1: str ontology term to find distance for
  • term_id_2: str ontology term to find distance for
Returns

int distance between the two terms, measured in number of edges between their shortest path.

def get_lowest_common_ancestors(self, term_id_1: str, term_id_2: str) -> List[str]:
238    def get_lowest_common_ancestors(self, term_id_1: str, term_id_2: str) -> List[str]:
239        """
240        Get the lowest common ancestors between two ontology terms that is from the given ontology.
241        Terms must be from the same ontology. Ontologies are DAGs, so there may be multiple lowest common ancestors.
242        Raises ValueError if term IDs are not valid members of a supported ontology.
243
244        :param term_id_1: str ontology term to find LCA for
245        :param term_id_2: str ontology term to find LCA for
246        :return: str term ID of the lowest common ancestor term
247        """
248        # include path to term itself
249        ontology = self._parse_ontology_name(term_id_1)
250        if ontology != self._parse_ontology_name(term_id_2):
251            return []
252        ancestors_1 = self.get_term_ancestors_with_distances(term_id_1, include_self=True)
253        ancestors_2 = self.get_term_ancestors_with_distances(term_id_2, include_self=True)
254        common_ancestors = set(ancestors_1.keys()) & set(ancestors_2.keys())
255        min_sum_distances = float("inf")
256        for ancestors in common_ancestors:
257            sum_distances = ancestors_1[ancestors] + ancestors_2[ancestors]
258            if sum_distances < min_sum_distances:
259                min_sum_distances = sum_distances
260        return [
261            ancestor
262            for ancestor in common_ancestors
263            if ancestors_1[ancestor] + ancestors_2[ancestor] == min_sum_distances
264        ]

Get the lowest common ancestors between two ontology terms that is from the given ontology. Terms must be from the same ontology. Ontologies are DAGs, so there may be multiple lowest common ancestors. Raises ValueError if term IDs are not valid members of a supported ontology.

Parameters
  • term_id_1: str ontology term to find LCA for
  • term_id_2: str ontology term to find LCA for
Returns

str term ID of the lowest common ancestor term

def get_high_level_terms(self, term_id: str, high_level_terms: List[str]) -> List[str]:
266    def get_high_level_terms(self, term_id: str, high_level_terms: List[str]) -> List[str]:
267        """
268        Get the high-level ontology terms for a given term. High-level terms are defined as the ancestors of the term
269        that are part of the high-level ontology terms supported by cellxgene-ontology-guide. If more than 1
270        high_level_term is matched, the returned list of matches preserves the order of the input high_level_terms list.
271        Raises ValueError if term ID is not valid member of a supported ontology.
272
273        Example
274        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
275        >>> ontology_parser = OntologyParser()
276        >>> ontology_parser.get_high_level_terms("CL:0000005", ["CL:0000000", "CL:0000001"])
277        ['CL:0000000']
278
279        :param term_id: str ontology term to find high-level terms for
280        :param high_level_terms: list of str ontology terms to check for ancestry to term_id
281        :return: List[str] of high-level terms that the term is a descendant of
282        """
283        if term_id in VALID_NON_ONTOLOGY_TERMS:
284            return []
285        ancestors = self.get_term_ancestors(term_id, include_self=True)
286        return [high_level_term for high_level_term in high_level_terms if high_level_term in ancestors]

Get the high-level ontology terms for a given term. High-level terms are defined as the ancestors of the term that are part of the high-level ontology terms supported by cellxgene-ontology-guide. If more than 1 high_level_term is matched, the returned list of matches preserves the order of the input high_level_terms list. Raises ValueError if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_high_level_terms("CL:0000005", ["CL:0000000", "CL:0000001"])
['CL:0000000']
Parameters
  • term_id: str ontology term to find high-level terms for
  • high_level_terms: list of str ontology terms to check for ancestry to term_id
Returns

List[str] of high-level terms that the term is a descendant of

def map_high_level_terms( self, term_ids: List[str], high_level_terms: List[str]) -> Dict[str, List[str]]:
288    def map_high_level_terms(self, term_ids: List[str], high_level_terms: List[str]) -> Dict[str, List[str]]:
289        """
290        Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with
291        format
292
293        {"CL:0000003": ["CL:0000000", ...], "CL:0000005": ["CL:0000000", ...]}
294
295        Where each term_id is mapped to a List[str] of high-level terms that it is a descendant of. Includes self
296        as a descendant. Raises ValueError if term ID is not valid member of a supported ontology.
297
298        :param term_ids: list of str ontology terms to map high level terms for
299        :param high_level_terms: list of str ontology terms to be mapped to descendant term_ids
300        :return: Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list.
301        Each key maps to empty list if there are no ancestors among the provided input.
302        """
303        return {term_id: self.get_high_level_terms(term_id, high_level_terms) for term_id in term_ids}

Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with format

{"CL:0000003": ["CL:0000000", ...], "CL:0000005": ["CL:0000000", ...]}

Where each term_id is mapped to a List[str] of high-level terms that it is a descendant of. Includes self as a descendant. Raises ValueError if term ID is not valid member of a supported ontology.

Parameters
  • term_ids: list of str ontology terms to map high level terms for
  • high_level_terms: list of str ontology terms to be mapped to descendant term_ids
Returns

Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list. Each key maps to empty list if there are no ancestors among the provided input.

def get_highest_level_term(self, term_id: str, high_level_terms: List[str]) -> Optional[str]:
305    def get_highest_level_term(self, term_id: str, high_level_terms: List[str]) -> Union[str, None]:
306        """
307        Get the highest level ontology term for a given term. The highest level term is defined as the ancestor of the
308        term that is part of the high-level ontology terms supported by cellxgene-ontology-guide. Raises ValueError
309        if term ID is not valid member of a supported ontology.
310
311        Example
312        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
313        >>> ontology_parser = OntologyParser()
314        >>> ontology_parser.get_highest_level_term("CL:0000005", ["CL:0000000", "CL:0000001"])
315        'CL:0000000'
316
317        :param term_id: str ontology term to find highest level term for
318        :param high_level_terms: list of str ontology terms to check for ancestry to term_id
319        :return: str highest level term that the term is a descendant of, or None if it is not a descendant of any
320        high-level terms
321        """
322        high_level_terms = self.get_high_level_terms(term_id, high_level_terms)
323        term_ancestors_and_distances = self.get_term_ancestors_with_distances(term_id, include_self=True)
324        if not high_level_terms:
325            return None
326        return max(high_level_terms, key=lambda high_level_term: term_ancestors_and_distances[high_level_term])

Get the highest level ontology term for a given term. The highest level term is defined as the ancestor of the term that is part of the high-level ontology terms supported by cellxgene-ontology-guide. Raises ValueError if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_highest_level_term("CL:0000005", ["CL:0000000", "CL:0000001"])
'CL:0000000'
Parameters
  • term_id: str ontology term to find highest level term for
  • high_level_terms: list of str ontology terms to check for ancestry to term_id
Returns

str highest level term that the term is a descendant of, or None if it is not a descendant of any high-level terms

def map_highest_level_term( self, term_ids: List[str], high_level_terms: List[str]) -> Dict[str, Optional[str]]:
328    def map_highest_level_term(self, term_ids: List[str], high_level_terms: List[str]) -> Dict[str, Union[str, None]]:
329        """
330        Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with
331        format
332
333        {"CL:0000003": "CL:0000000", "CL:0000005": "CL:0000000"}
334
335        Where each term_id is mapped to the highest level term that it is a descendant of, from the list provided.
336        Includes term itself as a descendant. Maps to None if term_id does not map to any high level terms among the
337        provided input. Raises ValueError if term ID is not valid member of a supported ontology.
338
339        :param term_ids: list of str ontology terms to map high level terms for
340        :param high_level_terms: list of str ontology terms that can be mapped to descendant term_ids
341        :return: Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list.
342        Each key maps to empty list if there are no ancestors among the provided input.
343        """
344        return {term_id: self.get_highest_level_term(term_id, high_level_terms) for term_id in term_ids}

Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with format

{"CL:0000003": "CL:0000000", "CL:0000005": "CL:0000000"}

Where each term_id is mapped to the highest level term that it is a descendant of, from the list provided. Includes term itself as a descendant. Maps to None if term_id does not map to any high level terms among the provided input. Raises ValueError if term ID is not valid member of a supported ontology.

Parameters
  • term_ids: list of str ontology terms to map high level terms for
  • high_level_terms: list of str ontology terms that can be mapped to descendant term_ids
Returns

Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list. Each key maps to empty list if there are no ancestors among the provided input.

def get_term_descendants(self, term_id: str, include_self: bool = False) -> List[str]:
346    def get_term_descendants(self, term_id: str, include_self: bool = False) -> List[str]:
347        """
348        Get the descendant ontology terms for a given term. If include_self is True, the term itself will be included as
349        a descendant. Raises ValueError if term ID is not valid member of a supported ontology.
350
351        Example
352        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
353        >>> ontology_parser = OntologyParser()
354        >>> ontology_parser.get_term_descendants("CL:0000005") # doctest: +SKIP
355        ['CL:0002363']
356
357        :param term_id: str ontology term to find descendants for
358        :param include_self: boolean flag to include the term itself as a descendant
359        :return: List[str] of descendant terms
360        """
361        if term_id in VALID_NON_ONTOLOGY_TERMS:
362            return []
363        ontology_name = self._parse_ontology_name(term_id)
364        descendants = [term_id] if include_self else []
365        for candidate_descendant, candidate_metadata in self.cxg_schema.ontology(ontology_name).items():
366            ancestors = candidate_metadata["ancestors"].keys()
367            if term_id in ancestors:
368                descendants.append(candidate_descendant)
369        return descendants

Get the descendant ontology terms for a given term. If include_self is True, the term itself will be included as a descendant. Raises ValueError if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_descendants("CL:0000005") # doctest: +SKIP
['CL:0002363']
Parameters
  • term_id: str ontology term to find descendants for
  • include_self: boolean flag to include the term itself as a descendant
Returns

List[str] of descendant terms

def map_term_descendants( self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]:
371    def map_term_descendants(self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]:
372        """
373        Get the descendant ontology terms for each term in a list. If include_self is True, the term itself will be
374         included as a descendant. Raises ValueError if term ID is not valid member of a supported ontology.
375
376        Example
377        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
378        >>> ontology_parser = OntologyParser()
379        >>> ontology_parser.map_term_descendants(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP
380        {
381            'CL:0000003': ['CL:0000003', ...],
382            'CL:0000005': ['CL:0000005', 'CL:0002363', ...]
383        }
384
385        :param term_ids: list of str ontology terms to find descendants for
386        :param include_self: boolean flag to include the term itself as an descendant
387        :return: Dictionary mapping str term IDs to their respective flattened List[str] of descendant terms. Maps to
388        empty list if there are no descendants.
389        """
390        descendants_dict: Dict[str, List[str]] = dict()
391        ontology_names = set()
392        for term_id in term_ids:
393            if term_id in VALID_NON_ONTOLOGY_TERMS:
394                descendants_dict[term_id] = []
395                continue
396            ontology_name = self._parse_ontology_name(term_id)
397            descendants_dict[term_id] = [term_id] if include_self else []
398            ontology_names.add(ontology_name)
399
400        for ontology in ontology_names:
401            for candidate_descendant, candidate_metadata in self.cxg_schema.ontology(ontology).items():
402                for ancestor_id in descendants_dict:
403                    ancestors = candidate_metadata["ancestors"].keys()
404                    if ancestor_id in ancestors:
405                        descendants_dict[ancestor_id].append(candidate_descendant)
406
407        return descendants_dict

Get the descendant ontology terms for each term in a list. If include_self is True, the term itself will be included as a descendant. Raises ValueError if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.map_term_descendants(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP
{
    'CL:0000003': ['CL:0000003', ...],
    'CL:0000005': ['CL:0000005', 'CL:0002363', ...]
}
Parameters
  • term_ids: list of str ontology terms to find descendants for
  • include_self: boolean flag to include the term itself as an descendant
Returns

Dictionary mapping str term IDs to their respective flattened List[str] of descendant terms. Maps to empty list if there are no descendants.

def get_term_children(self, term_id: str) -> List[str]:
409    def get_term_children(self, term_id: str) -> List[str]:
410        """
411        Get the direct children ontology terms for a given term. Raises ValueError if term ID is not valid member of a
412        supported ontology.
413
414        Example
415        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
416        >>> ontology_parser = OntologyParser()
417        >>> ontology_parser.get_term_children("CL:0000526")
418        ['CL:0000101', 'CL:4042034']
419
420        :param term_id: str ontology term to find children for
421        :return: List[str] of children terms
422        """
423        if term_id in VALID_NON_ONTOLOGY_TERMS:
424            return []
425        ontology_name = self._parse_ontology_name(term_id)
426        children = []
427        for candidate_child, candidate_metadata in self.cxg_schema.ontology(ontology_name).items():
428            for ancestor, distance in candidate_metadata["ancestors"].items():
429                if ancestor == term_id and distance == 1:
430                    children.append(candidate_child)
431        return children

Get the direct children ontology terms for a given term. Raises ValueError if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_children("CL:0000526")
['CL:0000101', 'CL:4042034']
Parameters
  • term_id: str ontology term to find children for
Returns

List[str] of children terms

def get_term_graph(self, term_id: str) -> cellxgene_ontology_guide.entities.OntologyNode:
433    def get_term_graph(self, term_id: str) -> OntologyNode:
434        """
435        Get the DAG of OntologyNode relationships, with the input term as the root node. Only includes terms from the
436        same ontology as the root term ID. Raises ValueError if term ID is not valid member of a supported ontology.
437
438        Example
439        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
440        >>> ontology_parser = OntologyParser()
441        >>> root_node = ontology_parser.get_term_graph("CL:0000000")
442        >>> root_node.term_id
443        'CL:0000000'
444        >>> root_node.to_dict() # doctest: +SKIP
445        {
446            "term_id": "CL:0000000",
447            "name": "cell A",
448            "children": [
449                {
450                    "term_id": "CL:0000001",
451                    "name": "cell B",
452                    "children": [...],
453                },
454                {
455                    "term_id": "CL:0000002",
456                    "name": "cell C",
457                    "children": [...],
458                },
459                ...
460            ]
461        }
462        >>> root_node.term_counter # doctest: +SKIP
463        Counter({'CL:0002058': 48, 'CL:0002471': 48, ...
464
465        :param term_id: str ontology term to build subtree for
466        :return: OntologyNode representation of graph with term_id as root.
467        """
468        term_label = self.get_term_label(term_id)
469        root = OntologyNode(term_id, term_label)
470        for child_term_id in self.get_term_children(term_id):
471            root.add_child(self.get_term_graph(child_term_id))
472        return root

Get the DAG of OntologyNode relationships, with the input term as the root node. Only includes terms from the same ontology as the root term ID. Raises ValueError if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> root_node = ontology_parser.get_term_graph("CL:0000000")
>>> root_node.term_id
'CL:0000000'
>>> root_node.to_dict() # doctest: +SKIP
{
    "term_id": "CL:0000000",
    "name": "cell A",
    "children": [
        {
            "term_id": "CL:0000001",
            "name": "cell B",
            "children": [...],
        },
        {
            "term_id": "CL:0000002",
            "name": "cell C",
            "children": [...],
        },
        ...
    ]
}
>>> root_node.term_counter # doctest: +SKIP
Counter({'CL:0002058': 48, 'CL:0002471': 48, ...
Parameters
  • term_id: str ontology term to build subtree for
Returns

OntologyNode representation of graph with term_id as root.

def is_term_deprecated(self, term_id: str) -> bool:
474    def is_term_deprecated(self, term_id: str) -> bool:
475        """
476        Check if an ontology term is deprecated. Raises ValueError if term ID is not valid member of a supported
477        ontology.
478
479        Example
480        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
481        >>> ontology_parser = OntologyParser()
482        >>> ontology_parser.is_term_deprecated("CL:0000003")
483        True
484
485        :param term_id: str ontology term to check for deprecation
486        :return: boolean flag indicating whether the term is deprecated
487        """
488        if term_id in VALID_NON_ONTOLOGY_TERMS:
489            return False
490        ontology_name = self._parse_ontology_name(term_id)
491        is_deprecated: bool = self.cxg_schema.ontology(ontology_name)[term_id].get("deprecated")
492        return is_deprecated

Check if an ontology term is deprecated. Raises ValueError if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.is_term_deprecated("CL:0000003")
True
Parameters
  • term_id: str ontology term to check for deprecation
Returns

boolean flag indicating whether the term is deprecated

def get_term_replacement(self, term_id: str) -> Optional[str]:
494    def get_term_replacement(self, term_id: str) -> Union[str, None]:
495        """
496        Fetch the replacement term for a deprecated ontology term, if a replacement exists. Return None otherwise.
497        Raises ValueError if term ID is not valid member of a supported ontology.
498
499        Example
500        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
501        >>> ontology_parser = OntologyParser()
502        >>> ontology_parser.get_term_replacement("CL:0000003")
503        'CL:0000000'
504
505        :param term_id: str ontology term to check a replacement term for
506        :return: replacement str term ID if it exists, None otherwise
507        """
508        if term_id in VALID_NON_ONTOLOGY_TERMS:
509            return None
510        ontology_name = self._parse_ontology_name(term_id)
511        replaced_by: str = self.cxg_schema.ontology(ontology_name)[term_id].get("replaced_by")
512        return replaced_by if replaced_by else None

Fetch the replacement term for a deprecated ontology term, if a replacement exists. Return None otherwise. Raises ValueError if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_replacement("CL:0000003")
'CL:0000000'
Parameters
  • term_id: str ontology term to check a replacement term for
Returns

replacement str term ID if it exists, None otherwise

def get_term_metadata(self, term_id: str) -> Dict[str, Any]:
514    def get_term_metadata(self, term_id: str) -> Dict[str, Any]:
515        """
516        Fetch metadata for a given ontology term. Returns a dict with format
517
518        {"comments": ["...", ...], "term_tracker": "...", "consider": ["...", ...]}
519
520        Comments maps to List[str] of ontology curator comments
521        Term Tracker maps to a str url where there is discussion around this term's curation (or deprecation).
522        Consider maps to List[str] of alternate ontology terms to consider using instead of this term
523
524        All keys map to None if no metadata of that type is present. Raises ValueError if term ID is not valid member
525        of a supported ontology.
526
527        :param term_id: str ontology term to fetch metadata for
528        :return: Dict with keys 'Comments', 'Term Tracker', and 'Consider' containing associated metadata.
529        """
530        if term_id in VALID_NON_ONTOLOGY_TERMS:
531            return {"comments": None, "term_tracker": None, "consider": None}
532        ontology_name = self._parse_ontology_name(term_id)
533        return {
534            key: self.cxg_schema.ontology(ontology_name)[term_id].get(key, None)
535            for key in {"comments", "term_tracker", "consider"}
536        }

Fetch metadata for a given ontology term. Returns a dict with format

{"comments": ["...", ...], "term_tracker": "...", "consider": ["...", ...]}

Comments maps to List[str] of ontology curator comments Term Tracker maps to a str url where there is discussion around this term's curation (or deprecation). Consider maps to List[str] of alternate ontology terms to consider using instead of this term

All keys map to None if no metadata of that type is present. Raises ValueError if term ID is not valid member of a supported ontology.

Parameters
  • term_id: str ontology term to fetch metadata for
Returns

Dict with keys 'Comments', 'Term Tracker', and 'Consider' containing associated metadata.

def get_term_label(self, term_id: str) -> str:
538    def get_term_label(self, term_id: str) -> str:
539        """
540        Fetch the human-readable label for a given ontology term. Raises ValueError if term ID is not valid member of a
541        supported ontology.
542
543        Example
544        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
545        >>> ontology_parser = OntologyParser()
546        >>> ontology_parser.get_term_label("CL:0000005")
547        'neural crest derived fibroblast'
548
549        :param term_id: str ontology term to fetch label for
550        :return: str human-readable label for the term
551        """
552        if term_id in VALID_NON_ONTOLOGY_TERMS:
553            return term_id
554        ontology_name = self._parse_ontology_name(term_id)
555        label: str = self.cxg_schema.ontology(ontology_name)[term_id]["label"]
556        return label

Fetch the human-readable label for a given ontology term. Raises ValueError if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_label("CL:0000005")
'neural crest derived fibroblast'
Parameters
  • term_id: str ontology term to fetch label for
Returns

str human-readable label for the term

def map_term_labels(self, term_ids: Iterable[str]) -> Dict[str, str]:
558    def map_term_labels(self, term_ids: Iterable[str]) -> Dict[str, str]:
559        """
560        Fetch the human-readable label for a given list of ontology terms. Raises ValueError if term ID is not valid
561        member of a supported ontology.
562
563        Example
564        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
565        >>> ontology_parser = OntologyParser()
566        >>> ontology_parser.map_term_labels(["CL:0000005", "CL:0000003"])
567        {'CL:0000005': 'neural crest derived fibroblast', 'CL:0000003': 'obsolete native cell'}
568
569        :param term_ids: list of str ontology terms to fetch label for
570        :return: Dict[str, str] mapping term IDs to their respective human-readable labels
571        """
572        return {term_id: self.get_term_label(term_id) for term_id in term_ids}

Fetch the human-readable label for a given list of ontology terms. Raises ValueError if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.map_term_labels(["CL:0000005", "CL:0000003"])
{'CL:0000005': 'neural crest derived fibroblast', 'CL:0000003': 'obsolete native cell'}
Parameters
  • term_ids: list of str ontology terms to fetch label for
Returns

Dict[str, str] mapping term IDs to their respective human-readable labels

def get_term_description(self, term_id: str) -> Optional[str]:
574    def get_term_description(self, term_id: str) -> Optional[str]:
575        """
576        Fetch the description for a given ontology term. Raises ValueError if term ID is not valid member of a
577        supported ontology.
578
579        Example
580        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
581        >>> ontology_parser = OntologyParser()
582        >>> ontology_parser.get_term_description("CL:0000005")
583        'Any fibroblast that is derived from the neural crest.'
584
585        :param term_id: str ontology term to fetch description for
586        :return: str description for the term
587        """
588        if term_id in VALID_NON_ONTOLOGY_TERMS:
589            return term_id
590        ontology_name = self._parse_ontology_name(term_id)
591        description: Optional[str] = self.cxg_schema.ontology(ontology_name)[term_id].get("description", None)
592        return description

Fetch the description for a given ontology term. Raises ValueError if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_description("CL:0000005")
'Any fibroblast that is derived from the neural crest.'
Parameters
  • term_id: str ontology term to fetch description for
Returns

str description for the term

def map_term_descriptions(self, term_ids: List[str]) -> Dict[str, Optional[str]]:
594    def map_term_descriptions(self, term_ids: List[str]) -> Dict[str, Optional[str]]:
595        """
596        Fetch the descriptions for a given list of ontology terms. Raises ValueError if term ID is not valid member of
597        a supported ontology.
598
599        Example
600        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
601        >>> ontology_parser = OntologyParser()
602        >>> ontology_parser.map_term_descriptions(["CL:0000005", "CL:0000146"])
603        {'CL:0000005': 'Any fibroblast that is derived from the neural crest.', 'CL:0000146': None}
604
605        :param term_ids: list of str ontology terms to fetch descriptions for
606        :return: Dict[str, str] mapping term IDs to their respective descriptions
607        """
608        return {term_id: self.get_term_description(term_id) for term_id in term_ids}

Fetch the descriptions for a given list of ontology terms. Raises ValueError if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.map_term_descriptions(["CL:0000005", "CL:0000146"])
{'CL:0000005': 'Any fibroblast that is derived from the neural crest.', 'CL:0000146': None}
Parameters
  • term_ids: list of str ontology terms to fetch descriptions for
Returns

Dict[str, str] mapping term IDs to their respective descriptions

def get_term_synonyms(self, term_id: str) -> List[str]:
610    def get_term_synonyms(self, term_id: str) -> List[str]:
611        """
612        Fetch a list of synonym labels for a given ontology term. Returns empty list if no synonyms found.
613        Raises ValueError if term ID is not valid member of a supported ontology.
614
615        Example
616        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
617        >>> ontology_parser = OntologyParser()
618        >>> ontology_parser.get_term_synonyms("CL:0000019")
619        ['sperm cell', 'spermatozoid', 'spermatozoon']
620
621        :param term_id: str ontology term to fetch synonyms for
622        :return: List[str] synonyms for the term
623        """
624        if term_id in VALID_NON_ONTOLOGY_TERMS:
625            return []
626        ontology_name = self._parse_ontology_name(term_id)
627        synonyms: List[str] = list(self.cxg_schema.ontology(ontology_name)[term_id].get("synonyms", []))
628        return synonyms

Fetch a list of synonym labels for a given ontology term. Returns empty list if no synonyms found. Raises ValueError if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_synonyms("CL:0000019")
['sperm cell', 'spermatozoid', 'spermatozoon']
Parameters
  • term_id: str ontology term to fetch synonyms for
Returns

List[str] synonyms for the term

def map_term_synonyms(self, term_ids: List[str]) -> Dict[str, List[str]]:
630    def map_term_synonyms(self, term_ids: List[str]) -> Dict[str, List[str]]:
631        """
632        Fetch the synonym labels for a given list of ontology terms. Raises ValueError if term ID is not valid member of
633        a supported ontology.
634
635        Example
636        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
637        >>> ontology_parser = OntologyParser()
638        >>> ontology_parser.map_term_synonyms(["CL:0000005", "CL:0000019"])
639        {'CL:0000005': ['fibroblast neural crest derived'], 'CL:0000019': ['sperm cell', 'spermatozoid', 'spermatozoon']}
640
641        :param term_ids: list of str ontology terms to fetch synonyms for
642        :return: Dict[str, List[str]] mapping term IDs to their respective synonym lists
643        """
644        return {term_id: self.get_term_synonyms(term_id) for term_id in term_ids}

Fetch the synonym labels for a given list of ontology terms. Raises ValueError if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.map_term_synonyms(["CL:0000005", "CL:0000019"])
{'CL:0000005': ['fibroblast neural crest derived'], 'CL:0000019': ['sperm cell', 'spermatozoid', 'spermatozoon']}
Parameters
  • term_ids: list of str ontology terms to fetch synonyms for
Returns

Dict[str, List[str]] mapping term IDs to their respective synonym lists

def get_term_id_by_label(self, term_label: str, ontology_name: str) -> Optional[str]:
646    def get_term_id_by_label(self, term_label: str, ontology_name: str) -> Optional[str]:
647        """
648        Fetch the ontology term ID from a given human-readable label. Filters by ontology_name. Raises ValueError if
649        ontology_name is not a supported ontology.
650
651        Returns None if term ID is not valid member of a supported ontology.
652
653        Example
654        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
655        >>> ontology_parser = OntologyParser()
656        >>> ontology_parser.get_term_id_by_label("neural crest derived fibroblast", "CL")
657        'CL:0000005'
658
659        :param term_label: str human-readable label to fetch term ID for
660        :param ontology_name: str name of ontology to search for term label in
661        :return: Optional[str] term IDs with that label, or None if the label is not found in the ontology
662        """
663        ontology_term_label_to_id_map = self.get_term_label_to_id_map(ontology_name)
664        return ontology_term_label_to_id_map.get(term_label)

Fetch the ontology term ID from a given human-readable label. Filters by ontology_name. Raises ValueError if ontology_name is not a supported ontology.

Returns None if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_id_by_label("neural crest derived fibroblast", "CL")
'CL:0000005'
Parameters
  • term_label: str human-readable label to fetch term ID for
  • ontology_name: str name of ontology to search for term label in
Returns

Optional[str] term IDs with that label, or None if the label is not found in the ontology

def get_bridge_term_id(self, term_id: str, cross_ontology: str) -> Optional[str]:
666    def get_bridge_term_id(self, term_id: str, cross_ontology: str) -> Optional[str]:
667        """
668        For a given term ID, fetch the equivalent term ID from a given ontology. Only returns exact match if it exists.
669
670        If no applicable match is found, returns None.
671
672        Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology.
673
674        Example
675        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
676        >>> ontology_parser = OntologyParser()
677        >>> ontology_parser.get_bridge_term_id("FBbt:00000001", "UBERON")
678        'UBERON:0000468'
679
680        :param term_id: str ontology term to find equivalent term for
681        :param cross_ontology: str name of ontology to search for equivalent term in
682        :return: Optional[str] equivalent term ID from the cross_ontology
683        """
684        if cross_ontology not in self.cxg_schema.cross_ontology_mappings:
685            raise ValueError(
686                f"{cross_ontology} is not in the set of supported cross ontology mappings "
687                f"{self.cxg_schema.cross_ontology_mappings}."
688            )
689        ontology_name = self._parse_ontology_name(term_id)
690        cross_ontology_terms = self.cxg_schema.ontology(ontology_name)[term_id].get("cross_ontology_terms")
691        bridge_term_id: Optional[str] = None
692        if cross_ontology_terms:
693            bridge_term_id = cross_ontology_terms.get(cross_ontology)
694        return bridge_term_id

For a given term ID, fetch the equivalent term ID from a given ontology. Only returns exact match if it exists.

If no applicable match is found, returns None.

Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_bridge_term_id("FBbt:00000001", "UBERON")
'UBERON:0000468'
Parameters
  • term_id: str ontology term to find equivalent term for
  • cross_ontology: str name of ontology to search for equivalent term in
Returns

Optional[str] equivalent term ID from the cross_ontology

def get_closest_bridge_term_ids(self, term_id: str, cross_ontology: str) -> List[str]:
696    def get_closest_bridge_term_ids(self, term_id: str, cross_ontology: str) -> List[str]:
697        """
698        For a given term ID, fetch the equivalent term ID from a given ontology. If match is found,
699        returns a list of 1 with the exact match. If no exact match is found, traverses the ancestors
700        of the term for the closest match.
701
702        If no applicable match is found, returns an empty list.
703
704        If multiple ancestors of the same distance have matches, returns all possible closest matches.
705
706        Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology.
707
708        Example
709        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
710        >>> ontology_parser = OntologyParser()
711        >>> ontology_parser.get_closest_bridge_term_ids("FBbt:00000039", "UBERON")
712        ['UBERON:0000476', 'UBERON:0000920']
713
714        :param term_id: str ontology term to find closest term for
715        :param cross_ontology: str name of ontology to search for closest term in
716        :return: List[str] list of closest term IDs from the cross_ontology
717        """
718        closest_bridge_terms: List[str] = []
719        terms_to_match = [term_id]
720        while terms_to_match and not closest_bridge_terms:
721            for term in terms_to_match:
722                if closest_bridge_term := self.get_bridge_term_id(term, cross_ontology):
723                    closest_bridge_terms.append(closest_bridge_term)
724            terms_to_match = [parent for child in terms_to_match for parent in self.get_term_parents(child)]
725        return closest_bridge_terms

For a given term ID, fetch the equivalent term ID from a given ontology. If match is found, returns a list of 1 with the exact match. If no exact match is found, traverses the ancestors of the term for the closest match.

If no applicable match is found, returns an empty list.

If multiple ancestors of the same distance have matches, returns all possible closest matches.

Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_closest_bridge_term_ids("FBbt:00000039", "UBERON")
['UBERON:0000476', 'UBERON:0000920']
Parameters
  • term_id: str ontology term to find closest term for
  • cross_ontology: str name of ontology to search for closest term in
Returns

List[str] list of closest term IDs from the cross_ontology