cellxgene_ontology_guide.ontology_parser

  1import re
  2from typing import Any, Dict, Iterable, List, Optional, Union
  3
  4from cellxgene_ontology_guide._constants import VALID_NON_ONTOLOGY_TERMS
  5from cellxgene_ontology_guide.entities import OntologyNode
  6from cellxgene_ontology_guide.supported_versions import CXGSchema
  7
  8
  9class OntologyParser:
 10    """
 11    An object to parse ontology term metadata from ontologies corresponding to a given CellxGene Schema Version.
 12    """
 13
 14    cxg_schema: CXGSchema
 15    """ CXGSchema object to fetch ontology metadata from """
 16
 17    def __init__(self, schema_version: Optional[str] = None):
 18        """
 19        Initialize an OntologyParser object with the ontology metadata corresponding to the given CellxGene schema
 20        version. If not cached, it will make a network call to GitHub Release Assets to load in memory and
 21        parse the corresponding ontology metadata.
 22
 23        :param schema_version: str version of the schema to load ontology metadata for. If not provided, the latest
 24        is loaded.
 25        """
 26        self.cxg_schema = CXGSchema(version=schema_version) if schema_version else CXGSchema()
 27        self.term_label_to_id_map: Dict[str, Dict[str, str]] = {
 28            ontology_name: dict() for ontology_name in self.cxg_schema.supported_ontologies
 29        }
 30
 31    def get_term_label_to_id_map(self, ontology_name: str) -> Dict[str, str]:
 32        """
 33        Fetch the mapping of term labels to term IDs for a given ontology. Caches generated maps by ontology_name.
 34
 35        Example
 36        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
 37        >>> ontology_parser = OntologyParser()
 38        >>> ontology_parser.get_term_label_to_id_map("CL") # doctest: +SKIP
 39        {'Label A': 'CL:0000000', ... }
 40
 41        :param ontology_name: str name of ontology to get map of term labels to term IDs
 42        """
 43        supported_ontology_name: Optional[str] = self._get_supported_ontology_name(ontology_name)
 44        if not supported_ontology_name:
 45            raise ValueError(f"{supported_ontology_name} is not a supported ontology, its metadata cannot be fetched.")
 46
 47        if self.term_label_to_id_map[supported_ontology_name]:
 48            return self.term_label_to_id_map[supported_ontology_name].copy()
 49
 50        for term_id, term_metadata in self.cxg_schema.ontology(supported_ontology_name).items():
 51            self.term_label_to_id_map[supported_ontology_name][term_metadata["label"]] = term_id
 52
 53        return self.term_label_to_id_map[supported_ontology_name].copy()
 54
 55    def _parse_ontology_name(self, term_id: str) -> str:
 56        """
 57        Parse the ontology name from a given term ID. If the term ID does not conform to the expected term format or
 58        is not from an ontology supported by cellxgene-ontology-guide, raise a ValueError.
 59
 60        :param term_id: str ontology term to parse
 61        :return: str name of ontology that term belongs to
 62        """
 63        # use names groups
 64        patterns = [r"([A-Za-z]+):[0-9]+", r"([A-Za-z]+)_[A-Za-z0-9]+"]
 65        pattern = "|".join(patterns)
 66        match = re.match(pattern, term_id)
 67        if not match:
 68            raise ValueError(f"{term_id} does not conform to expected regex pattern {pattern} and cannot be queried.")
 69
 70        ontology_term_prefix = match.group(1) or match.group(2)
 71        ontology_name: Optional[str] = self._get_supported_ontology_name(ontology_term_prefix)
 72        if not ontology_name:
 73            raise ValueError(f"{term_id} is not part of a supported ontology, its metadata cannot be fetched.")
 74
 75        id_separator = self.cxg_schema.supported_ontologies.get(ontology_name).get("id_separator", ":")
 76        if id_separator not in term_id:
 77            raise ValueError(f"{term_id} does not conform to expected format for {ontology_term_prefix} terms.")
 78        return ontology_name
 79
 80    def _get_supported_ontology_name(self, ontology_term_prefix: str) -> Optional[str]:
 81        """
 82        Get the source ontology name for a given ontology term prefix, if it is supported by the CxG schema.
 83
 84        If ontology_term_prefix is directly supported by the CxG schema, returns ontology_term_prefix.
 85        If ontology_term_prefix is supported as an import from another ontology, returns the name of the source ontology
 86        it is imported in.
 87        Otherwise, returns None.
 88
 89        :param ontology_term_prefix: str ontology term prefix to check
 90        :return: str name of ontology that term belongs to, or None if it is not directly supported nor imported in
 91        a supported ontology in the CxG schema.
 92        """
 93        if ontology_term_prefix in self.cxg_schema.supported_ontologies:
 94            return ontology_term_prefix
 95        supported_ontology_name: Optional[str] = self.cxg_schema.imported_ontologies.get(ontology_term_prefix)
 96        return supported_ontology_name
 97
 98    def is_valid_term_id(self, term_id: str, ontology: Optional[str] = None) -> bool:
 99        """
100        Check if an ontology term ID is valid and defined in a supported ontology. If deprecated but defined
101        in the ontology, it is considered valid. Optionally, specify an ontology to check against, and determine
102        if the term is defined in that particular ontology. Otherwise, checks if term is valid in any supported ontology
103
104        :param term_id: str ontology term to check
105        :param ontology: str name of ontology to check against
106        :return: boolean flag indicating whether the term is supported
107        """
108        try:
109            ontology_name = self._parse_ontology_name(term_id)
110            if ontology and ontology_name != ontology:
111                return False
112            if term_id in self.cxg_schema.ontology(ontology_name):
113                return True
114        except ValueError:
115            return False
116        return False
117
118    def get_term_ancestors(self, term_id: str, include_self: bool = False) -> List[str]:
119        """
120        Get the ancestor ontology terms for a given term. If include_self is True, the term itself will be included as
121        an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.
122
123        Example
124        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
125        >>> ontology_parser = OntologyParser()
126        >>> ontology_parser.get_term_ancestors("CL:0000005") # doctest: +SKIP
127        ['CL:0000000', 'CL:0000057', ...
128
129        :param term_id: str ontology term to find ancestors for
130        :param include_self: boolean flag to include the term itself as an ancestor
131        :return: flattened List[str] of ancestor terms
132        """
133        if term_id in VALID_NON_ONTOLOGY_TERMS:
134            return []
135        ontology_name = self._parse_ontology_name(term_id)
136        ancestors = list(self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"].keys())
137        return ancestors + [term_id] if include_self else ancestors
138
139    def map_term_ancestors(self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]:
140        """
141        Get the ancestor ontology terms for each term in a list. If include_self is True, the term itself will be
142        included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.
143
144        Example
145        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
146        >>> ontology_parser = OntologyParser()
147        >>> ontology_parser.map_term_ancestors(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP
148        {
149            'CL:0000003': ['CL:0000003'],
150            'CL:0000005': ['CL:0000005', 'CL:0000000', ...]
151        }
152
153        :param term_ids: list of str ontology terms to find ancestors for
154        :param include_self: boolean flag to include the term itself as an ancestor
155        :return: Dictionary mapping str term IDs to their respective flattened List[str] of ancestor terms. Maps to
156        empty
157        list if there are no ancestors.
158        """
159        return {term_id: self.get_term_ancestors(term_id, include_self) for term_id in term_ids}
160
161    def get_term_ancestors_with_distances(self, term_id: str, include_self: bool = False) -> Dict[str, int]:
162        """
163        Get the ancestor ontology terms for a given term, and their distance from the term_id. If include_self is True,
164        the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a
165        supported ontology.
166
167        Example
168        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
169        >>> ontology_parser = OntologyParser()
170        >>> ontology_parser.get_term_ancestors_with_distances("CL:0000005") # doctest: +SKIP
171        {'CL:0000057': 1, 'CL:0002320': 2, 'CL:0000000': 3}
172
173        :param term_id: str ontology term to find ancestors for
174        :param include_self: boolean flag to include the term itself as an ancestor
175        :return: Dict[str, int] map of ancestor terms and their respective distances from the term_id
176        """
177        if term_id in VALID_NON_ONTOLOGY_TERMS:
178            return {}
179        ontology_name = self._parse_ontology_name(term_id)
180        ancestors: Dict[str, int] = self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"].copy()
181        return ancestors | {term_id: 0} if include_self else ancestors
182
183    def map_term_ancestors_with_distances(
184        self, term_ids: Iterable[str], include_self: bool = False
185    ) -> Dict[str, Dict[str, int]]:
186        """
187        Get the ancestor ontology terms for each term in a list, and their distance from the term_id. If include_self is
188        True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a
189        supported ontology.
190
191        Example
192        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
193        >>> ontology_parser = OntologyParser()
194        >>> ontology_parser.map_term_ancestors_with_distances(["CL:0000003", "CL:0000005"], include_self=True)
195        {'CL:0000003': {'CL:0000003': 0}, 'CL:0000005': {'CL:0000057': 1, 'CL:0000499': 2, 'CL:0002320': 3, 'CL:0000255': 4, 'CL:0000000': 5, 'CL:0000005': 0}}
196
197        :param term_ids: list of str ontology terms to find ancestors for
198        :param include_self: boolean flag to include the term itself as an ancestor
199        :return: Dictionary mapping str term IDs to their respective Dict[str, int] map of ancestor terms and their
200        respective distances from the term_id
201        """
202        return {term_id: self.get_term_ancestors_with_distances(term_id, include_self) for term_id in term_ids}
203
204    def get_term_parents(self, term_id: str) -> List[str]:
205        """
206        Get the direct parent ontology terms for a given term. Raises ValueError if the term ID is not valid member of
207        a supported ontology.
208
209        Example
210        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
211        >>> ontology_parser = OntologyParser()
212        >>> ontology_parser.get_term_parents("CL:0000101")
213        ['CL:0000526']
214
215        :param term_id: str ontology term to find parents for
216        :return: List[str] of parent terms
217        """
218        if term_id in VALID_NON_ONTOLOGY_TERMS:
219            return []
220        ontology_name = self._parse_ontology_name(term_id)
221        ancestors: Dict[str, int] = self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"]
222        parents: List[str] = [ancestor for ancestor, distance in ancestors.items() if distance == 1]
223        return parents
224
225    def get_distance_between_terms(self, term_id_1: str, term_id_2: str) -> int:
226        """
227        Get the distance between two ontology terms. The distance is defined as the number of edges between the
228        two terms. Terms must be from the same ontology. Returns -1 if terms are disjoint.
229        Raises ValueError if term IDs are not valid members of a supported ontology.
230
231        :param term_id_1: str ontology term to find distance for
232        :param term_id_2: str ontology term to find distance for
233        :return: int distance between the two terms, measured in number of edges between their shortest path.
234        """
235        lcas = self.get_lowest_common_ancestors(term_id_1, term_id_2)
236        if not lcas:
237            return -1
238        return int(
239            self.get_term_ancestors_with_distances(term_id_1, include_self=True)[lcas[0]]
240            + self.get_term_ancestors_with_distances(term_id_2, include_self=True)[lcas[0]]
241        )
242
243    def get_lowest_common_ancestors(self, term_id_1: str, term_id_2: str) -> List[str]:
244        """
245        Get the lowest common ancestors between two ontology terms that is from the given ontology.
246        Terms must be from the same ontology. Ontologies are DAGs, so there may be multiple lowest common ancestors.
247        Raises ValueError if term IDs are not valid members of a supported ontology.
248
249        :param term_id_1: str ontology term to find LCA for
250        :param term_id_2: str ontology term to find LCA for
251        :return: str term ID of the lowest common ancestor term
252        """
253        # include path to term itself
254        ontology = self._parse_ontology_name(term_id_1)
255        if ontology != self._parse_ontology_name(term_id_2):
256            return []
257        ancestors_1 = self.get_term_ancestors_with_distances(term_id_1, include_self=True)
258        ancestors_2 = self.get_term_ancestors_with_distances(term_id_2, include_self=True)
259        common_ancestors = set(ancestors_1.keys()) & set(ancestors_2.keys())
260        min_sum_distances = float("inf")
261        for ancestors in common_ancestors:
262            sum_distances = ancestors_1[ancestors] + ancestors_2[ancestors]
263            if sum_distances < min_sum_distances:
264                min_sum_distances = sum_distances
265        return [
266            ancestor
267            for ancestor in common_ancestors
268            if ancestors_1[ancestor] + ancestors_2[ancestor] == min_sum_distances
269        ]
270
271    def get_high_level_terms(self, term_id: str, high_level_terms: List[str]) -> List[str]:
272        """
273        Get the high-level ontology terms for a given term. High-level terms are defined as the ancestors of the term
274        that are part of the high-level ontology terms supported by cellxgene-ontology-guide. If more than 1
275        high_level_term is matched, the returned list of matches preserves the order of the input high_level_terms list.
276        Raises ValueError if term ID is not valid member of a supported ontology.
277
278        Example
279        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
280        >>> ontology_parser = OntologyParser()
281        >>> ontology_parser.get_high_level_terms("CL:0000005", ["CL:0000000", "CL:0000001"])
282        ['CL:0000000']
283
284        :param term_id: str ontology term to find high-level terms for
285        :param high_level_terms: list of str ontology terms to check for ancestry to term_id
286        :return: List[str] of high-level terms that the term is a descendant of
287        """
288        if term_id in VALID_NON_ONTOLOGY_TERMS:
289            return []
290        ancestors = self.get_term_ancestors(term_id, include_self=True)
291        return [high_level_term for high_level_term in high_level_terms if high_level_term in ancestors]
292
293    def map_high_level_terms(self, term_ids: List[str], high_level_terms: List[str]) -> Dict[str, List[str]]:
294        """
295        Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with
296        format
297
298        {"CL:0000003": ["CL:0000000", ...], "CL:0000005": ["CL:0000000", ...]}
299
300        Where each term_id is mapped to a List[str] of high-level terms that it is a descendant of. Includes self
301        as a descendant. Raises ValueError if term ID is not valid member of a supported ontology.
302
303        :param term_ids: list of str ontology terms to map high level terms for
304        :param high_level_terms: list of str ontology terms to be mapped to descendant term_ids
305        :return: Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list.
306        Each key maps to empty list if there are no ancestors among the provided input.
307        """
308        return {term_id: self.get_high_level_terms(term_id, high_level_terms) for term_id in term_ids}
309
310    def get_highest_level_term(self, term_id: str, high_level_terms: List[str]) -> Union[str, None]:
311        """
312        Get the highest level ontology term for a given term. The highest level term is defined as the ancestor of the
313        term that is part of the high-level ontology terms supported by cellxgene-ontology-guide. Raises ValueError
314        if term ID is not valid member of a supported ontology.
315
316        Example
317        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
318        >>> ontology_parser = OntologyParser()
319        >>> ontology_parser.get_highest_level_term("CL:0000005", ["CL:0000000", "CL:0000001"])
320        'CL:0000000'
321
322        :param term_id: str ontology term to find highest level term for
323        :param high_level_terms: list of str ontology terms to check for ancestry to term_id
324        :return: str highest level term that the term is a descendant of, or None if it is not a descendant of any
325        high-level terms
326        """
327        high_level_terms = self.get_high_level_terms(term_id, high_level_terms)
328        term_ancestors_and_distances = self.get_term_ancestors_with_distances(term_id, include_self=True)
329        if not high_level_terms:
330            return None
331        return max(high_level_terms, key=lambda high_level_term: term_ancestors_and_distances[high_level_term])
332
333    def map_highest_level_term(self, term_ids: List[str], high_level_terms: List[str]) -> Dict[str, Union[str, None]]:
334        """
335        Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with
336        format
337
338        {"CL:0000003": "CL:0000000", "CL:0000005": "CL:0000000"}
339
340        Where each term_id is mapped to the highest level term that it is a descendant of, from the list provided.
341        Includes term itself as a descendant. Maps to None if term_id does not map to any high level terms among the
342        provided input. Raises ValueError if term ID is not valid member of a supported ontology.
343
344        :param term_ids: list of str ontology terms to map high level terms for
345        :param high_level_terms: list of str ontology terms that can be mapped to descendant term_ids
346        :return: Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list.
347        Each key maps to empty list if there are no ancestors among the provided input.
348        """
349        return {term_id: self.get_highest_level_term(term_id, high_level_terms) for term_id in term_ids}
350
351    def get_term_descendants(self, term_id: str, include_self: bool = False) -> List[str]:
352        """
353        Get the descendant ontology terms for a given term. If include_self is True, the term itself will be included as
354        a descendant. Raises ValueError if term ID is not valid member of a supported ontology.
355
356        Example
357        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
358        >>> ontology_parser = OntologyParser()
359        >>> ontology_parser.get_term_descendants("CL:0000005") # doctest: +SKIP
360        ['CL:0002363']
361
362        :param term_id: str ontology term to find descendants for
363        :param include_self: boolean flag to include the term itself as a descendant
364        :return: List[str] of descendant terms
365        """
366        if term_id in VALID_NON_ONTOLOGY_TERMS:
367            return []
368        ontology_name = self._parse_ontology_name(term_id)
369        descendants = [term_id] if include_self else []
370        for candidate_descendant, candidate_metadata in self.cxg_schema.ontology(ontology_name).items():
371            ancestors = candidate_metadata["ancestors"].keys()
372            if term_id in ancestors:
373                descendants.append(candidate_descendant)
374        return descendants
375
376    def map_term_descendants(self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]:
377        """
378        Get the descendant ontology terms for each term in a list. If include_self is True, the term itself will be
379         included as a descendant. Raises ValueError if term ID is not valid member of a supported ontology.
380
381        Example
382        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
383        >>> ontology_parser = OntologyParser()
384        >>> ontology_parser.map_term_descendants(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP
385        {
386            'CL:0000003': ['CL:0000003', ...],
387            'CL:0000005': ['CL:0000005', 'CL:0002363', ...]
388        }
389
390        :param term_ids: list of str ontology terms to find descendants for
391        :param include_self: boolean flag to include the term itself as an descendant
392        :return: Dictionary mapping str term IDs to their respective flattened List[str] of descendant terms. Maps to
393        empty list if there are no descendants.
394        """
395        descendants_dict: Dict[str, List[str]] = dict()
396        ontology_names = set()
397        for term_id in term_ids:
398            if term_id in VALID_NON_ONTOLOGY_TERMS:
399                descendants_dict[term_id] = []
400                continue
401            ontology_name = self._parse_ontology_name(term_id)
402            descendants_dict[term_id] = [term_id] if include_self else []
403            ontology_names.add(ontology_name)
404
405        for ontology in ontology_names:
406            for candidate_descendant, candidate_metadata in self.cxg_schema.ontology(ontology).items():
407                for ancestor_id in descendants_dict:
408                    ancestors = candidate_metadata["ancestors"].keys()
409                    if ancestor_id in ancestors:
410                        descendants_dict[ancestor_id].append(candidate_descendant)
411
412        return descendants_dict
413
414    def get_term_children(self, term_id: str) -> List[str]:
415        """
416        Get the direct children ontology terms for a given term. Raises ValueError if term ID is not valid member of a
417        supported ontology.
418
419        Example
420        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
421        >>> ontology_parser = OntologyParser()
422        >>> ontology_parser.get_term_children("CL:0000526")
423        ['CL:0000101', 'CL:4042034']
424
425        :param term_id: str ontology term to find children for
426        :return: List[str] of children terms
427        """
428        if term_id in VALID_NON_ONTOLOGY_TERMS:
429            return []
430        ontology_name = self._parse_ontology_name(term_id)
431        children = []
432        for candidate_child, candidate_metadata in self.cxg_schema.ontology(ontology_name).items():
433            for ancestor, distance in candidate_metadata["ancestors"].items():
434                if ancestor == term_id and distance == 1:
435                    children.append(candidate_child)
436        return children
437
438    def get_term_graph(self, term_id: str) -> OntologyNode:
439        """
440        Get the DAG of OntologyNode relationships, with the input term as the root node. Only includes terms from the
441        same ontology as the root term ID. Raises ValueError if term ID is not valid member of a supported ontology.
442
443        Example
444        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
445        >>> ontology_parser = OntologyParser()
446        >>> root_node = ontology_parser.get_term_graph("CL:0000000")
447        >>> root_node.term_id
448        'CL:0000000'
449        >>> root_node.to_dict() # doctest: +SKIP
450        {
451            "term_id": "CL:0000000",
452            "name": "cell A",
453            "children": [
454                {
455                    "term_id": "CL:0000001",
456                    "name": "cell B",
457                    "children": [...],
458                },
459                {
460                    "term_id": "CL:0000002",
461                    "name": "cell C",
462                    "children": [...],
463                },
464                ...
465            ]
466        }
467        >>> root_node.term_counter # doctest: +SKIP
468        Counter({'CL:0002058': 48, 'CL:0002471': 48, ...
469
470        :param term_id: str ontology term to build subtree for
471        :return: OntologyNode representation of graph with term_id as root.
472        """
473        term_label = self.get_term_label(term_id)
474        root = OntologyNode(term_id, term_label)
475        for child_term_id in self.get_term_children(term_id):
476            root.add_child(self.get_term_graph(child_term_id))
477        return root
478
479    def is_term_deprecated(self, term_id: str) -> bool:
480        """
481        Check if an ontology term is deprecated. Raises ValueError if term ID is not valid member of a supported
482        ontology.
483
484        Example
485        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
486        >>> ontology_parser = OntologyParser()
487        >>> ontology_parser.is_term_deprecated("CL:0000003")
488        True
489
490        :param term_id: str ontology term to check for deprecation
491        :return: boolean flag indicating whether the term is deprecated
492        """
493        if term_id in VALID_NON_ONTOLOGY_TERMS:
494            return False
495        ontology_name = self._parse_ontology_name(term_id)
496        is_deprecated: bool = self.cxg_schema.ontology(ontology_name)[term_id].get("deprecated")
497        return is_deprecated
498
499    def get_term_replacement(self, term_id: str) -> Union[str, None]:
500        """
501        Fetch the replacement term for a deprecated ontology term, if a replacement exists. Return None otherwise.
502        Raises ValueError if term ID is not valid member of a supported ontology.
503
504        Example
505        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
506        >>> ontology_parser = OntologyParser()
507        >>> ontology_parser.get_term_replacement("CL:0000003")
508        'CL:0000000'
509
510        :param term_id: str ontology term to check a replacement term for
511        :return: replacement str term ID if it exists, None otherwise
512        """
513        if term_id in VALID_NON_ONTOLOGY_TERMS:
514            return None
515        ontology_name = self._parse_ontology_name(term_id)
516        replaced_by: str = self.cxg_schema.ontology(ontology_name)[term_id].get("replaced_by")
517        return replaced_by if replaced_by else None
518
519    def get_term_metadata(self, term_id: str) -> Dict[str, Any]:
520        """
521        Fetch metadata for a given ontology term. Returns a dict with format
522
523        {"comments": ["...", ...], "term_tracker": "...", "consider": ["...", ...]}
524
525        Comments maps to List[str] of ontology curator comments
526        Term Tracker maps to a str url where there is discussion around this term's curation (or deprecation).
527        Consider maps to List[str] of alternate ontology terms to consider using instead of this term
528
529        All keys map to None if no metadata of that type is present. Raises ValueError if term ID is not valid member
530        of a supported ontology.
531
532        :param term_id: str ontology term to fetch metadata for
533        :return: Dict with keys 'Comments', 'Term Tracker', and 'Consider' containing associated metadata.
534        """
535        if term_id in VALID_NON_ONTOLOGY_TERMS:
536            return {"comments": None, "term_tracker": None, "consider": None}
537        ontology_name = self._parse_ontology_name(term_id)
538        return {
539            key: self.cxg_schema.ontology(ontology_name)[term_id].get(key, None)
540            for key in {"comments", "term_tracker", "consider"}
541        }
542
543    def get_term_label(self, term_id: str) -> str:
544        """
545        Fetch the human-readable label for a given ontology term. Raises ValueError if term ID is not valid member of a
546        supported ontology.
547
548        Example
549        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
550        >>> ontology_parser = OntologyParser()
551        >>> ontology_parser.get_term_label("CL:0000005")
552        'neural crest derived fibroblast'
553
554        :param term_id: str ontology term to fetch label for
555        :return: str human-readable label for the term
556        """
557        if term_id in VALID_NON_ONTOLOGY_TERMS:
558            return term_id
559        ontology_name = self._parse_ontology_name(term_id)
560        label: str = self.cxg_schema.ontology(ontology_name)[term_id]["label"]
561        return label
562
563    def map_term_labels(self, term_ids: Iterable[str]) -> Dict[str, str]:
564        """
565        Fetch the human-readable label for a given list of ontology terms. Raises ValueError if term ID is not valid
566        member of a supported ontology.
567
568        Example
569        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
570        >>> ontology_parser = OntologyParser()
571        >>> ontology_parser.map_term_labels(["CL:0000005", "CL:0000003"])
572        {'CL:0000005': 'neural crest derived fibroblast', 'CL:0000003': 'obsolete native cell'}
573
574        :param term_ids: list of str ontology terms to fetch label for
575        :return: Dict[str, str] mapping term IDs to their respective human-readable labels
576        """
577        return {term_id: self.get_term_label(term_id) for term_id in term_ids}
578
579    def get_term_description(self, term_id: str) -> Optional[str]:
580        """
581        Fetch the description for a given ontology term. Raises ValueError if term ID is not valid member of a
582        supported ontology.
583
584        Example
585        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
586        >>> ontology_parser = OntologyParser()
587        >>> ontology_parser.get_term_description("CL:0000005")
588        'Any fibroblast that is derived from the neural crest.'
589
590        :param term_id: str ontology term to fetch description for
591        :return: str description for the term
592        """
593        if term_id in VALID_NON_ONTOLOGY_TERMS:
594            return term_id
595        ontology_name = self._parse_ontology_name(term_id)
596        description: Optional[str] = self.cxg_schema.ontology(ontology_name)[term_id].get("description", None)
597        return description
598
599    def map_term_descriptions(self, term_ids: List[str]) -> Dict[str, Optional[str]]:
600        """
601        Fetch the descriptions for a given list of ontology terms. Raises ValueError if term ID is not valid member of
602        a supported ontology.
603
604        Example
605        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
606        >>> ontology_parser = OntologyParser()
607        >>> ontology_parser.map_term_descriptions(["CL:0000005", "CL:0000146"])
608        {'CL:0000005': 'Any fibroblast that is derived from the neural crest.', 'CL:0000146': None}
609
610        :param term_ids: list of str ontology terms to fetch descriptions for
611        :return: Dict[str, str] mapping term IDs to their respective descriptions
612        """
613        return {term_id: self.get_term_description(term_id) for term_id in term_ids}
614
615    def get_term_synonyms(self, term_id: str) -> List[str]:
616        """
617        Fetch a list of synonym labels for a given ontology term. Returns empty list if no synonyms found.
618        Raises ValueError if term ID is not valid member of a supported ontology.
619
620        Example
621        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
622        >>> ontology_parser = OntologyParser()
623        >>> ontology_parser.get_term_synonyms("CL:0000019")
624        ['sperm cell', 'spermatozoid', 'spermatozoon']
625
626        :param term_id: str ontology term to fetch synonyms for
627        :return: List[str] synonyms for the term
628        """
629        if term_id in VALID_NON_ONTOLOGY_TERMS:
630            return []
631        ontology_name = self._parse_ontology_name(term_id)
632        synonyms: List[str] = list(self.cxg_schema.ontology(ontology_name)[term_id].get("synonyms", []))
633        return synonyms
634
635    def map_term_synonyms(self, term_ids: List[str]) -> Dict[str, List[str]]:
636        """
637        Fetch the synonym labels for a given list of ontology terms. Raises ValueError if term ID is not valid member of
638        a supported ontology.
639
640        Example
641        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
642        >>> ontology_parser = OntologyParser()
643        >>> ontology_parser.map_term_synonyms(["CL:0000005", "CL:0000019"])
644        {'CL:0000005': ['fibroblast neural crest derived'], 'CL:0000019': ['sperm cell', 'spermatozoid', 'spermatozoon']}
645
646        :param term_ids: list of str ontology terms to fetch synonyms for
647        :return: Dict[str, List[str]] mapping term IDs to their respective synonym lists
648        """
649        return {term_id: self.get_term_synonyms(term_id) for term_id in term_ids}
650
651    def get_term_id_by_label(self, term_label: str, ontology_name: str) -> Optional[str]:
652        """
653        Fetch the ontology term ID from a given human-readable label. Filters by ontology_name. Raises ValueError if
654        ontology_name is not a supported ontology.
655
656        Returns None if term ID is not valid member of a supported ontology.
657
658        Example
659        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
660        >>> ontology_parser = OntologyParser()
661        >>> ontology_parser.get_term_id_by_label("neural crest derived fibroblast", "CL")
662        'CL:0000005'
663
664        :param term_label: str human-readable label to fetch term ID for
665        :param ontology_name: str name of ontology to search for term label in
666        :return: Optional[str] term IDs with that label, or None if the label is not found in the ontology
667        """
668        ontology_term_label_to_id_map = self.get_term_label_to_id_map(ontology_name)
669        return ontology_term_label_to_id_map.get(term_label)
670
671    def get_bridge_term_id(self, term_id: str, cross_ontology: str) -> Optional[str]:
672        """
673        For a given term ID, fetch the equivalent term ID from a given ontology. Only returns exact match if it exists.
674
675        If no applicable match is found, returns None.
676
677        Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology.
678
679        Example
680        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
681        >>> ontology_parser = OntologyParser()
682        >>> ontology_parser.get_bridge_term_id("FBbt:00000001", "UBERON")
683        'UBERON:0000468'
684
685        :param term_id: str ontology term to find equivalent term for
686        :param cross_ontology: str name of ontology to search for equivalent term in
687        :return: Optional[str] equivalent term ID from the cross_ontology
688        """
689        if cross_ontology not in self.cxg_schema.cross_ontology_mappings:
690            raise ValueError(
691                f"{cross_ontology} is not in the set of supported cross ontology mappings "
692                f"{self.cxg_schema.cross_ontology_mappings}."
693            )
694        ontology_name = self._parse_ontology_name(term_id)
695        cross_ontology_terms = self.cxg_schema.ontology(ontology_name)[term_id].get("cross_ontology_terms")
696        bridge_term_id: Optional[str] = None
697        if cross_ontology_terms:
698            bridge_term_id = cross_ontology_terms.get(cross_ontology)
699        return bridge_term_id
700
701    def get_closest_bridge_term_ids(self, term_id: str, cross_ontology: str) -> List[str]:
702        """
703        For a given term ID, fetch the equivalent term ID from a given ontology. If match is found,
704        returns a list of 1 with the exact match. If no exact match is found, traverses the ancestors
705        of the term for the closest match.
706
707        If no applicable match is found, returns an empty list.
708
709        If multiple ancestors of the same distance have matches, returns all possible closest matches.
710
711        Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology.
712
713        Example
714        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
715        >>> ontology_parser = OntologyParser()
716        >>> ontology_parser.get_closest_bridge_term_ids("FBbt:00000039", "UBERON")
717        ['UBERON:0000476', 'UBERON:0000920']
718
719        :param term_id: str ontology term to find closest term for
720        :param cross_ontology: str name of ontology to search for closest term in
721        :return: List[str] list of closest term IDs from the cross_ontology
722        """
723        closest_bridge_terms: List[str] = []
724        terms_to_match = [term_id]
725        while terms_to_match and not closest_bridge_terms:
726            for term in terms_to_match:
727                if closest_bridge_term := self.get_bridge_term_id(term, cross_ontology):
728                    closest_bridge_terms.append(closest_bridge_term)
729            terms_to_match = [parent for child in terms_to_match for parent in self.get_term_parents(child)]
730        return closest_bridge_terms
class OntologyParser:
 10class OntologyParser:
 11    """
 12    An object to parse ontology term metadata from ontologies corresponding to a given CellxGene Schema Version.
 13    """
 14
 15    cxg_schema: CXGSchema
 16    """ CXGSchema object to fetch ontology metadata from """
 17
 18    def __init__(self, schema_version: Optional[str] = None):
 19        """
 20        Initialize an OntologyParser object with the ontology metadata corresponding to the given CellxGene schema
 21        version. If not cached, it will make a network call to GitHub Release Assets to load in memory and
 22        parse the corresponding ontology metadata.
 23
 24        :param schema_version: str version of the schema to load ontology metadata for. If not provided, the latest
 25        is loaded.
 26        """
 27        self.cxg_schema = CXGSchema(version=schema_version) if schema_version else CXGSchema()
 28        self.term_label_to_id_map: Dict[str, Dict[str, str]] = {
 29            ontology_name: dict() for ontology_name in self.cxg_schema.supported_ontologies
 30        }
 31
 32    def get_term_label_to_id_map(self, ontology_name: str) -> Dict[str, str]:
 33        """
 34        Fetch the mapping of term labels to term IDs for a given ontology. Caches generated maps by ontology_name.
 35
 36        Example
 37        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
 38        >>> ontology_parser = OntologyParser()
 39        >>> ontology_parser.get_term_label_to_id_map("CL") # doctest: +SKIP
 40        {'Label A': 'CL:0000000', ... }
 41
 42        :param ontology_name: str name of ontology to get map of term labels to term IDs
 43        """
 44        supported_ontology_name: Optional[str] = self._get_supported_ontology_name(ontology_name)
 45        if not supported_ontology_name:
 46            raise ValueError(f"{supported_ontology_name} is not a supported ontology, its metadata cannot be fetched.")
 47
 48        if self.term_label_to_id_map[supported_ontology_name]:
 49            return self.term_label_to_id_map[supported_ontology_name].copy()
 50
 51        for term_id, term_metadata in self.cxg_schema.ontology(supported_ontology_name).items():
 52            self.term_label_to_id_map[supported_ontology_name][term_metadata["label"]] = term_id
 53
 54        return self.term_label_to_id_map[supported_ontology_name].copy()
 55
 56    def _parse_ontology_name(self, term_id: str) -> str:
 57        """
 58        Parse the ontology name from a given term ID. If the term ID does not conform to the expected term format or
 59        is not from an ontology supported by cellxgene-ontology-guide, raise a ValueError.
 60
 61        :param term_id: str ontology term to parse
 62        :return: str name of ontology that term belongs to
 63        """
 64        # use names groups
 65        patterns = [r"([A-Za-z]+):[0-9]+", r"([A-Za-z]+)_[A-Za-z0-9]+"]
 66        pattern = "|".join(patterns)
 67        match = re.match(pattern, term_id)
 68        if not match:
 69            raise ValueError(f"{term_id} does not conform to expected regex pattern {pattern} and cannot be queried.")
 70
 71        ontology_term_prefix = match.group(1) or match.group(2)
 72        ontology_name: Optional[str] = self._get_supported_ontology_name(ontology_term_prefix)
 73        if not ontology_name:
 74            raise ValueError(f"{term_id} is not part of a supported ontology, its metadata cannot be fetched.")
 75
 76        id_separator = self.cxg_schema.supported_ontologies.get(ontology_name).get("id_separator", ":")
 77        if id_separator not in term_id:
 78            raise ValueError(f"{term_id} does not conform to expected format for {ontology_term_prefix} terms.")
 79        return ontology_name
 80
 81    def _get_supported_ontology_name(self, ontology_term_prefix: str) -> Optional[str]:
 82        """
 83        Get the source ontology name for a given ontology term prefix, if it is supported by the CxG schema.
 84
 85        If ontology_term_prefix is directly supported by the CxG schema, returns ontology_term_prefix.
 86        If ontology_term_prefix is supported as an import from another ontology, returns the name of the source ontology
 87        it is imported in.
 88        Otherwise, returns None.
 89
 90        :param ontology_term_prefix: str ontology term prefix to check
 91        :return: str name of ontology that term belongs to, or None if it is not directly supported nor imported in
 92        a supported ontology in the CxG schema.
 93        """
 94        if ontology_term_prefix in self.cxg_schema.supported_ontologies:
 95            return ontology_term_prefix
 96        supported_ontology_name: Optional[str] = self.cxg_schema.imported_ontologies.get(ontology_term_prefix)
 97        return supported_ontology_name
 98
 99    def is_valid_term_id(self, term_id: str, ontology: Optional[str] = None) -> bool:
100        """
101        Check if an ontology term ID is valid and defined in a supported ontology. If deprecated but defined
102        in the ontology, it is considered valid. Optionally, specify an ontology to check against, and determine
103        if the term is defined in that particular ontology. Otherwise, checks if term is valid in any supported ontology
104
105        :param term_id: str ontology term to check
106        :param ontology: str name of ontology to check against
107        :return: boolean flag indicating whether the term is supported
108        """
109        try:
110            ontology_name = self._parse_ontology_name(term_id)
111            if ontology and ontology_name != ontology:
112                return False
113            if term_id in self.cxg_schema.ontology(ontology_name):
114                return True
115        except ValueError:
116            return False
117        return False
118
119    def get_term_ancestors(self, term_id: str, include_self: bool = False) -> List[str]:
120        """
121        Get the ancestor ontology terms for a given term. If include_self is True, the term itself will be included as
122        an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.
123
124        Example
125        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
126        >>> ontology_parser = OntologyParser()
127        >>> ontology_parser.get_term_ancestors("CL:0000005") # doctest: +SKIP
128        ['CL:0000000', 'CL:0000057', ...
129
130        :param term_id: str ontology term to find ancestors for
131        :param include_self: boolean flag to include the term itself as an ancestor
132        :return: flattened List[str] of ancestor terms
133        """
134        if term_id in VALID_NON_ONTOLOGY_TERMS:
135            return []
136        ontology_name = self._parse_ontology_name(term_id)
137        ancestors = list(self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"].keys())
138        return ancestors + [term_id] if include_self else ancestors
139
140    def map_term_ancestors(self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]:
141        """
142        Get the ancestor ontology terms for each term in a list. If include_self is True, the term itself will be
143        included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.
144
145        Example
146        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
147        >>> ontology_parser = OntologyParser()
148        >>> ontology_parser.map_term_ancestors(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP
149        {
150            'CL:0000003': ['CL:0000003'],
151            'CL:0000005': ['CL:0000005', 'CL:0000000', ...]
152        }
153
154        :param term_ids: list of str ontology terms to find ancestors for
155        :param include_self: boolean flag to include the term itself as an ancestor
156        :return: Dictionary mapping str term IDs to their respective flattened List[str] of ancestor terms. Maps to
157        empty
158        list if there are no ancestors.
159        """
160        return {term_id: self.get_term_ancestors(term_id, include_self) for term_id in term_ids}
161
162    def get_term_ancestors_with_distances(self, term_id: str, include_self: bool = False) -> Dict[str, int]:
163        """
164        Get the ancestor ontology terms for a given term, and their distance from the term_id. If include_self is True,
165        the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a
166        supported ontology.
167
168        Example
169        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
170        >>> ontology_parser = OntologyParser()
171        >>> ontology_parser.get_term_ancestors_with_distances("CL:0000005") # doctest: +SKIP
172        {'CL:0000057': 1, 'CL:0002320': 2, 'CL:0000000': 3}
173
174        :param term_id: str ontology term to find ancestors for
175        :param include_self: boolean flag to include the term itself as an ancestor
176        :return: Dict[str, int] map of ancestor terms and their respective distances from the term_id
177        """
178        if term_id in VALID_NON_ONTOLOGY_TERMS:
179            return {}
180        ontology_name = self._parse_ontology_name(term_id)
181        ancestors: Dict[str, int] = self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"].copy()
182        return ancestors | {term_id: 0} if include_self else ancestors
183
184    def map_term_ancestors_with_distances(
185        self, term_ids: Iterable[str], include_self: bool = False
186    ) -> Dict[str, Dict[str, int]]:
187        """
188        Get the ancestor ontology terms for each term in a list, and their distance from the term_id. If include_self is
189        True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a
190        supported ontology.
191
192        Example
193        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
194        >>> ontology_parser = OntologyParser()
195        >>> ontology_parser.map_term_ancestors_with_distances(["CL:0000003", "CL:0000005"], include_self=True)
196        {'CL:0000003': {'CL:0000003': 0}, 'CL:0000005': {'CL:0000057': 1, 'CL:0000499': 2, 'CL:0002320': 3, 'CL:0000255': 4, 'CL:0000000': 5, 'CL:0000005': 0}}
197
198        :param term_ids: list of str ontology terms to find ancestors for
199        :param include_self: boolean flag to include the term itself as an ancestor
200        :return: Dictionary mapping str term IDs to their respective Dict[str, int] map of ancestor terms and their
201        respective distances from the term_id
202        """
203        return {term_id: self.get_term_ancestors_with_distances(term_id, include_self) for term_id in term_ids}
204
205    def get_term_parents(self, term_id: str) -> List[str]:
206        """
207        Get the direct parent ontology terms for a given term. Raises ValueError if the term ID is not valid member of
208        a supported ontology.
209
210        Example
211        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
212        >>> ontology_parser = OntologyParser()
213        >>> ontology_parser.get_term_parents("CL:0000101")
214        ['CL:0000526']
215
216        :param term_id: str ontology term to find parents for
217        :return: List[str] of parent terms
218        """
219        if term_id in VALID_NON_ONTOLOGY_TERMS:
220            return []
221        ontology_name = self._parse_ontology_name(term_id)
222        ancestors: Dict[str, int] = self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"]
223        parents: List[str] = [ancestor for ancestor, distance in ancestors.items() if distance == 1]
224        return parents
225
226    def get_distance_between_terms(self, term_id_1: str, term_id_2: str) -> int:
227        """
228        Get the distance between two ontology terms. The distance is defined as the number of edges between the
229        two terms. Terms must be from the same ontology. Returns -1 if terms are disjoint.
230        Raises ValueError if term IDs are not valid members of a supported ontology.
231
232        :param term_id_1: str ontology term to find distance for
233        :param term_id_2: str ontology term to find distance for
234        :return: int distance between the two terms, measured in number of edges between their shortest path.
235        """
236        lcas = self.get_lowest_common_ancestors(term_id_1, term_id_2)
237        if not lcas:
238            return -1
239        return int(
240            self.get_term_ancestors_with_distances(term_id_1, include_self=True)[lcas[0]]
241            + self.get_term_ancestors_with_distances(term_id_2, include_self=True)[lcas[0]]
242        )
243
244    def get_lowest_common_ancestors(self, term_id_1: str, term_id_2: str) -> List[str]:
245        """
246        Get the lowest common ancestors between two ontology terms that is from the given ontology.
247        Terms must be from the same ontology. Ontologies are DAGs, so there may be multiple lowest common ancestors.
248        Raises ValueError if term IDs are not valid members of a supported ontology.
249
250        :param term_id_1: str ontology term to find LCA for
251        :param term_id_2: str ontology term to find LCA for
252        :return: str term ID of the lowest common ancestor term
253        """
254        # include path to term itself
255        ontology = self._parse_ontology_name(term_id_1)
256        if ontology != self._parse_ontology_name(term_id_2):
257            return []
258        ancestors_1 = self.get_term_ancestors_with_distances(term_id_1, include_self=True)
259        ancestors_2 = self.get_term_ancestors_with_distances(term_id_2, include_self=True)
260        common_ancestors = set(ancestors_1.keys()) & set(ancestors_2.keys())
261        min_sum_distances = float("inf")
262        for ancestors in common_ancestors:
263            sum_distances = ancestors_1[ancestors] + ancestors_2[ancestors]
264            if sum_distances < min_sum_distances:
265                min_sum_distances = sum_distances
266        return [
267            ancestor
268            for ancestor in common_ancestors
269            if ancestors_1[ancestor] + ancestors_2[ancestor] == min_sum_distances
270        ]
271
272    def get_high_level_terms(self, term_id: str, high_level_terms: List[str]) -> List[str]:
273        """
274        Get the high-level ontology terms for a given term. High-level terms are defined as the ancestors of the term
275        that are part of the high-level ontology terms supported by cellxgene-ontology-guide. If more than 1
276        high_level_term is matched, the returned list of matches preserves the order of the input high_level_terms list.
277        Raises ValueError if term ID is not valid member of a supported ontology.
278
279        Example
280        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
281        >>> ontology_parser = OntologyParser()
282        >>> ontology_parser.get_high_level_terms("CL:0000005", ["CL:0000000", "CL:0000001"])
283        ['CL:0000000']
284
285        :param term_id: str ontology term to find high-level terms for
286        :param high_level_terms: list of str ontology terms to check for ancestry to term_id
287        :return: List[str] of high-level terms that the term is a descendant of
288        """
289        if term_id in VALID_NON_ONTOLOGY_TERMS:
290            return []
291        ancestors = self.get_term_ancestors(term_id, include_self=True)
292        return [high_level_term for high_level_term in high_level_terms if high_level_term in ancestors]
293
294    def map_high_level_terms(self, term_ids: List[str], high_level_terms: List[str]) -> Dict[str, List[str]]:
295        """
296        Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with
297        format
298
299        {"CL:0000003": ["CL:0000000", ...], "CL:0000005": ["CL:0000000", ...]}
300
301        Where each term_id is mapped to a List[str] of high-level terms that it is a descendant of. Includes self
302        as a descendant. Raises ValueError if term ID is not valid member of a supported ontology.
303
304        :param term_ids: list of str ontology terms to map high level terms for
305        :param high_level_terms: list of str ontology terms to be mapped to descendant term_ids
306        :return: Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list.
307        Each key maps to empty list if there are no ancestors among the provided input.
308        """
309        return {term_id: self.get_high_level_terms(term_id, high_level_terms) for term_id in term_ids}
310
311    def get_highest_level_term(self, term_id: str, high_level_terms: List[str]) -> Union[str, None]:
312        """
313        Get the highest level ontology term for a given term. The highest level term is defined as the ancestor of the
314        term that is part of the high-level ontology terms supported by cellxgene-ontology-guide. Raises ValueError
315        if term ID is not valid member of a supported ontology.
316
317        Example
318        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
319        >>> ontology_parser = OntologyParser()
320        >>> ontology_parser.get_highest_level_term("CL:0000005", ["CL:0000000", "CL:0000001"])
321        'CL:0000000'
322
323        :param term_id: str ontology term to find highest level term for
324        :param high_level_terms: list of str ontology terms to check for ancestry to term_id
325        :return: str highest level term that the term is a descendant of, or None if it is not a descendant of any
326        high-level terms
327        """
328        high_level_terms = self.get_high_level_terms(term_id, high_level_terms)
329        term_ancestors_and_distances = self.get_term_ancestors_with_distances(term_id, include_self=True)
330        if not high_level_terms:
331            return None
332        return max(high_level_terms, key=lambda high_level_term: term_ancestors_and_distances[high_level_term])
333
334    def map_highest_level_term(self, term_ids: List[str], high_level_terms: List[str]) -> Dict[str, Union[str, None]]:
335        """
336        Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with
337        format
338
339        {"CL:0000003": "CL:0000000", "CL:0000005": "CL:0000000"}
340
341        Where each term_id is mapped to the highest level term that it is a descendant of, from the list provided.
342        Includes term itself as a descendant. Maps to None if term_id does not map to any high level terms among the
343        provided input. Raises ValueError if term ID is not valid member of a supported ontology.
344
345        :param term_ids: list of str ontology terms to map high level terms for
346        :param high_level_terms: list of str ontology terms that can be mapped to descendant term_ids
347        :return: Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list.
348        Each key maps to empty list if there are no ancestors among the provided input.
349        """
350        return {term_id: self.get_highest_level_term(term_id, high_level_terms) for term_id in term_ids}
351
352    def get_term_descendants(self, term_id: str, include_self: bool = False) -> List[str]:
353        """
354        Get the descendant ontology terms for a given term. If include_self is True, the term itself will be included as
355        a descendant. Raises ValueError if term ID is not valid member of a supported ontology.
356
357        Example
358        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
359        >>> ontology_parser = OntologyParser()
360        >>> ontology_parser.get_term_descendants("CL:0000005") # doctest: +SKIP
361        ['CL:0002363']
362
363        :param term_id: str ontology term to find descendants for
364        :param include_self: boolean flag to include the term itself as a descendant
365        :return: List[str] of descendant terms
366        """
367        if term_id in VALID_NON_ONTOLOGY_TERMS:
368            return []
369        ontology_name = self._parse_ontology_name(term_id)
370        descendants = [term_id] if include_self else []
371        for candidate_descendant, candidate_metadata in self.cxg_schema.ontology(ontology_name).items():
372            ancestors = candidate_metadata["ancestors"].keys()
373            if term_id in ancestors:
374                descendants.append(candidate_descendant)
375        return descendants
376
377    def map_term_descendants(self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]:
378        """
379        Get the descendant ontology terms for each term in a list. If include_self is True, the term itself will be
380         included as a descendant. Raises ValueError if term ID is not valid member of a supported ontology.
381
382        Example
383        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
384        >>> ontology_parser = OntologyParser()
385        >>> ontology_parser.map_term_descendants(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP
386        {
387            'CL:0000003': ['CL:0000003', ...],
388            'CL:0000005': ['CL:0000005', 'CL:0002363', ...]
389        }
390
391        :param term_ids: list of str ontology terms to find descendants for
392        :param include_self: boolean flag to include the term itself as an descendant
393        :return: Dictionary mapping str term IDs to their respective flattened List[str] of descendant terms. Maps to
394        empty list if there are no descendants.
395        """
396        descendants_dict: Dict[str, List[str]] = dict()
397        ontology_names = set()
398        for term_id in term_ids:
399            if term_id in VALID_NON_ONTOLOGY_TERMS:
400                descendants_dict[term_id] = []
401                continue
402            ontology_name = self._parse_ontology_name(term_id)
403            descendants_dict[term_id] = [term_id] if include_self else []
404            ontology_names.add(ontology_name)
405
406        for ontology in ontology_names:
407            for candidate_descendant, candidate_metadata in self.cxg_schema.ontology(ontology).items():
408                for ancestor_id in descendants_dict:
409                    ancestors = candidate_metadata["ancestors"].keys()
410                    if ancestor_id in ancestors:
411                        descendants_dict[ancestor_id].append(candidate_descendant)
412
413        return descendants_dict
414
415    def get_term_children(self, term_id: str) -> List[str]:
416        """
417        Get the direct children ontology terms for a given term. Raises ValueError if term ID is not valid member of a
418        supported ontology.
419
420        Example
421        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
422        >>> ontology_parser = OntologyParser()
423        >>> ontology_parser.get_term_children("CL:0000526")
424        ['CL:0000101', 'CL:4042034']
425
426        :param term_id: str ontology term to find children for
427        :return: List[str] of children terms
428        """
429        if term_id in VALID_NON_ONTOLOGY_TERMS:
430            return []
431        ontology_name = self._parse_ontology_name(term_id)
432        children = []
433        for candidate_child, candidate_metadata in self.cxg_schema.ontology(ontology_name).items():
434            for ancestor, distance in candidate_metadata["ancestors"].items():
435                if ancestor == term_id and distance == 1:
436                    children.append(candidate_child)
437        return children
438
439    def get_term_graph(self, term_id: str) -> OntologyNode:
440        """
441        Get the DAG of OntologyNode relationships, with the input term as the root node. Only includes terms from the
442        same ontology as the root term ID. Raises ValueError if term ID is not valid member of a supported ontology.
443
444        Example
445        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
446        >>> ontology_parser = OntologyParser()
447        >>> root_node = ontology_parser.get_term_graph("CL:0000000")
448        >>> root_node.term_id
449        'CL:0000000'
450        >>> root_node.to_dict() # doctest: +SKIP
451        {
452            "term_id": "CL:0000000",
453            "name": "cell A",
454            "children": [
455                {
456                    "term_id": "CL:0000001",
457                    "name": "cell B",
458                    "children": [...],
459                },
460                {
461                    "term_id": "CL:0000002",
462                    "name": "cell C",
463                    "children": [...],
464                },
465                ...
466            ]
467        }
468        >>> root_node.term_counter # doctest: +SKIP
469        Counter({'CL:0002058': 48, 'CL:0002471': 48, ...
470
471        :param term_id: str ontology term to build subtree for
472        :return: OntologyNode representation of graph with term_id as root.
473        """
474        term_label = self.get_term_label(term_id)
475        root = OntologyNode(term_id, term_label)
476        for child_term_id in self.get_term_children(term_id):
477            root.add_child(self.get_term_graph(child_term_id))
478        return root
479
480    def is_term_deprecated(self, term_id: str) -> bool:
481        """
482        Check if an ontology term is deprecated. Raises ValueError if term ID is not valid member of a supported
483        ontology.
484
485        Example
486        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
487        >>> ontology_parser = OntologyParser()
488        >>> ontology_parser.is_term_deprecated("CL:0000003")
489        True
490
491        :param term_id: str ontology term to check for deprecation
492        :return: boolean flag indicating whether the term is deprecated
493        """
494        if term_id in VALID_NON_ONTOLOGY_TERMS:
495            return False
496        ontology_name = self._parse_ontology_name(term_id)
497        is_deprecated: bool = self.cxg_schema.ontology(ontology_name)[term_id].get("deprecated")
498        return is_deprecated
499
500    def get_term_replacement(self, term_id: str) -> Union[str, None]:
501        """
502        Fetch the replacement term for a deprecated ontology term, if a replacement exists. Return None otherwise.
503        Raises ValueError if term ID is not valid member of a supported ontology.
504
505        Example
506        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
507        >>> ontology_parser = OntologyParser()
508        >>> ontology_parser.get_term_replacement("CL:0000003")
509        'CL:0000000'
510
511        :param term_id: str ontology term to check a replacement term for
512        :return: replacement str term ID if it exists, None otherwise
513        """
514        if term_id in VALID_NON_ONTOLOGY_TERMS:
515            return None
516        ontology_name = self._parse_ontology_name(term_id)
517        replaced_by: str = self.cxg_schema.ontology(ontology_name)[term_id].get("replaced_by")
518        return replaced_by if replaced_by else None
519
520    def get_term_metadata(self, term_id: str) -> Dict[str, Any]:
521        """
522        Fetch metadata for a given ontology term. Returns a dict with format
523
524        {"comments": ["...", ...], "term_tracker": "...", "consider": ["...", ...]}
525
526        Comments maps to List[str] of ontology curator comments
527        Term Tracker maps to a str url where there is discussion around this term's curation (or deprecation).
528        Consider maps to List[str] of alternate ontology terms to consider using instead of this term
529
530        All keys map to None if no metadata of that type is present. Raises ValueError if term ID is not valid member
531        of a supported ontology.
532
533        :param term_id: str ontology term to fetch metadata for
534        :return: Dict with keys 'Comments', 'Term Tracker', and 'Consider' containing associated metadata.
535        """
536        if term_id in VALID_NON_ONTOLOGY_TERMS:
537            return {"comments": None, "term_tracker": None, "consider": None}
538        ontology_name = self._parse_ontology_name(term_id)
539        return {
540            key: self.cxg_schema.ontology(ontology_name)[term_id].get(key, None)
541            for key in {"comments", "term_tracker", "consider"}
542        }
543
544    def get_term_label(self, term_id: str) -> str:
545        """
546        Fetch the human-readable label for a given ontology term. Raises ValueError if term ID is not valid member of a
547        supported ontology.
548
549        Example
550        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
551        >>> ontology_parser = OntologyParser()
552        >>> ontology_parser.get_term_label("CL:0000005")
553        'neural crest derived fibroblast'
554
555        :param term_id: str ontology term to fetch label for
556        :return: str human-readable label for the term
557        """
558        if term_id in VALID_NON_ONTOLOGY_TERMS:
559            return term_id
560        ontology_name = self._parse_ontology_name(term_id)
561        label: str = self.cxg_schema.ontology(ontology_name)[term_id]["label"]
562        return label
563
564    def map_term_labels(self, term_ids: Iterable[str]) -> Dict[str, str]:
565        """
566        Fetch the human-readable label for a given list of ontology terms. Raises ValueError if term ID is not valid
567        member of a supported ontology.
568
569        Example
570        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
571        >>> ontology_parser = OntologyParser()
572        >>> ontology_parser.map_term_labels(["CL:0000005", "CL:0000003"])
573        {'CL:0000005': 'neural crest derived fibroblast', 'CL:0000003': 'obsolete native cell'}
574
575        :param term_ids: list of str ontology terms to fetch label for
576        :return: Dict[str, str] mapping term IDs to their respective human-readable labels
577        """
578        return {term_id: self.get_term_label(term_id) for term_id in term_ids}
579
580    def get_term_description(self, term_id: str) -> Optional[str]:
581        """
582        Fetch the description for a given ontology term. Raises ValueError if term ID is not valid member of a
583        supported ontology.
584
585        Example
586        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
587        >>> ontology_parser = OntologyParser()
588        >>> ontology_parser.get_term_description("CL:0000005")
589        'Any fibroblast that is derived from the neural crest.'
590
591        :param term_id: str ontology term to fetch description for
592        :return: str description for the term
593        """
594        if term_id in VALID_NON_ONTOLOGY_TERMS:
595            return term_id
596        ontology_name = self._parse_ontology_name(term_id)
597        description: Optional[str] = self.cxg_schema.ontology(ontology_name)[term_id].get("description", None)
598        return description
599
600    def map_term_descriptions(self, term_ids: List[str]) -> Dict[str, Optional[str]]:
601        """
602        Fetch the descriptions for a given list of ontology terms. Raises ValueError if term ID is not valid member of
603        a supported ontology.
604
605        Example
606        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
607        >>> ontology_parser = OntologyParser()
608        >>> ontology_parser.map_term_descriptions(["CL:0000005", "CL:0000146"])
609        {'CL:0000005': 'Any fibroblast that is derived from the neural crest.', 'CL:0000146': None}
610
611        :param term_ids: list of str ontology terms to fetch descriptions for
612        :return: Dict[str, str] mapping term IDs to their respective descriptions
613        """
614        return {term_id: self.get_term_description(term_id) for term_id in term_ids}
615
616    def get_term_synonyms(self, term_id: str) -> List[str]:
617        """
618        Fetch a list of synonym labels for a given ontology term. Returns empty list if no synonyms found.
619        Raises ValueError if term ID is not valid member of a supported ontology.
620
621        Example
622        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
623        >>> ontology_parser = OntologyParser()
624        >>> ontology_parser.get_term_synonyms("CL:0000019")
625        ['sperm cell', 'spermatozoid', 'spermatozoon']
626
627        :param term_id: str ontology term to fetch synonyms for
628        :return: List[str] synonyms for the term
629        """
630        if term_id in VALID_NON_ONTOLOGY_TERMS:
631            return []
632        ontology_name = self._parse_ontology_name(term_id)
633        synonyms: List[str] = list(self.cxg_schema.ontology(ontology_name)[term_id].get("synonyms", []))
634        return synonyms
635
636    def map_term_synonyms(self, term_ids: List[str]) -> Dict[str, List[str]]:
637        """
638        Fetch the synonym labels for a given list of ontology terms. Raises ValueError if term ID is not valid member of
639        a supported ontology.
640
641        Example
642        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
643        >>> ontology_parser = OntologyParser()
644        >>> ontology_parser.map_term_synonyms(["CL:0000005", "CL:0000019"])
645        {'CL:0000005': ['fibroblast neural crest derived'], 'CL:0000019': ['sperm cell', 'spermatozoid', 'spermatozoon']}
646
647        :param term_ids: list of str ontology terms to fetch synonyms for
648        :return: Dict[str, List[str]] mapping term IDs to their respective synonym lists
649        """
650        return {term_id: self.get_term_synonyms(term_id) for term_id in term_ids}
651
652    def get_term_id_by_label(self, term_label: str, ontology_name: str) -> Optional[str]:
653        """
654        Fetch the ontology term ID from a given human-readable label. Filters by ontology_name. Raises ValueError if
655        ontology_name is not a supported ontology.
656
657        Returns None if term ID is not valid member of a supported ontology.
658
659        Example
660        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
661        >>> ontology_parser = OntologyParser()
662        >>> ontology_parser.get_term_id_by_label("neural crest derived fibroblast", "CL")
663        'CL:0000005'
664
665        :param term_label: str human-readable label to fetch term ID for
666        :param ontology_name: str name of ontology to search for term label in
667        :return: Optional[str] term IDs with that label, or None if the label is not found in the ontology
668        """
669        ontology_term_label_to_id_map = self.get_term_label_to_id_map(ontology_name)
670        return ontology_term_label_to_id_map.get(term_label)
671
672    def get_bridge_term_id(self, term_id: str, cross_ontology: str) -> Optional[str]:
673        """
674        For a given term ID, fetch the equivalent term ID from a given ontology. Only returns exact match if it exists.
675
676        If no applicable match is found, returns None.
677
678        Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology.
679
680        Example
681        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
682        >>> ontology_parser = OntologyParser()
683        >>> ontology_parser.get_bridge_term_id("FBbt:00000001", "UBERON")
684        'UBERON:0000468'
685
686        :param term_id: str ontology term to find equivalent term for
687        :param cross_ontology: str name of ontology to search for equivalent term in
688        :return: Optional[str] equivalent term ID from the cross_ontology
689        """
690        if cross_ontology not in self.cxg_schema.cross_ontology_mappings:
691            raise ValueError(
692                f"{cross_ontology} is not in the set of supported cross ontology mappings "
693                f"{self.cxg_schema.cross_ontology_mappings}."
694            )
695        ontology_name = self._parse_ontology_name(term_id)
696        cross_ontology_terms = self.cxg_schema.ontology(ontology_name)[term_id].get("cross_ontology_terms")
697        bridge_term_id: Optional[str] = None
698        if cross_ontology_terms:
699            bridge_term_id = cross_ontology_terms.get(cross_ontology)
700        return bridge_term_id
701
702    def get_closest_bridge_term_ids(self, term_id: str, cross_ontology: str) -> List[str]:
703        """
704        For a given term ID, fetch the equivalent term ID from a given ontology. If match is found,
705        returns a list of 1 with the exact match. If no exact match is found, traverses the ancestors
706        of the term for the closest match.
707
708        If no applicable match is found, returns an empty list.
709
710        If multiple ancestors of the same distance have matches, returns all possible closest matches.
711
712        Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology.
713
714        Example
715        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
716        >>> ontology_parser = OntologyParser()
717        >>> ontology_parser.get_closest_bridge_term_ids("FBbt:00000039", "UBERON")
718        ['UBERON:0000476', 'UBERON:0000920']
719
720        :param term_id: str ontology term to find closest term for
721        :param cross_ontology: str name of ontology to search for closest term in
722        :return: List[str] list of closest term IDs from the cross_ontology
723        """
724        closest_bridge_terms: List[str] = []
725        terms_to_match = [term_id]
726        while terms_to_match and not closest_bridge_terms:
727            for term in terms_to_match:
728                if closest_bridge_term := self.get_bridge_term_id(term, cross_ontology):
729                    closest_bridge_terms.append(closest_bridge_term)
730            terms_to_match = [parent for child in terms_to_match for parent in self.get_term_parents(child)]
731        return closest_bridge_terms

An object to parse ontology term metadata from ontologies corresponding to a given CellxGene Schema Version.

OntologyParser(schema_version: Optional[str] = None)
18    def __init__(self, schema_version: Optional[str] = None):
19        """
20        Initialize an OntologyParser object with the ontology metadata corresponding to the given CellxGene schema
21        version. If not cached, it will make a network call to GitHub Release Assets to load in memory and
22        parse the corresponding ontology metadata.
23
24        :param schema_version: str version of the schema to load ontology metadata for. If not provided, the latest
25        is loaded.
26        """
27        self.cxg_schema = CXGSchema(version=schema_version) if schema_version else CXGSchema()
28        self.term_label_to_id_map: Dict[str, Dict[str, str]] = {
29            ontology_name: dict() for ontology_name in self.cxg_schema.supported_ontologies
30        }

Initialize an OntologyParser object with the ontology metadata corresponding to the given CellxGene schema version. If not cached, it will make a network call to GitHub Release Assets to load in memory and parse the corresponding ontology metadata.

Parameters
  • schema_version: str version of the schema to load ontology metadata for. If not provided, the latest is loaded.

CXGSchema object to fetch ontology metadata from

term_label_to_id_map: Dict[str, Dict[str, str]]
def get_term_label_to_id_map(self, ontology_name: str) -> Dict[str, str]:
32    def get_term_label_to_id_map(self, ontology_name: str) -> Dict[str, str]:
33        """
34        Fetch the mapping of term labels to term IDs for a given ontology. Caches generated maps by ontology_name.
35
36        Example
37        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
38        >>> ontology_parser = OntologyParser()
39        >>> ontology_parser.get_term_label_to_id_map("CL") # doctest: +SKIP
40        {'Label A': 'CL:0000000', ... }
41
42        :param ontology_name: str name of ontology to get map of term labels to term IDs
43        """
44        supported_ontology_name: Optional[str] = self._get_supported_ontology_name(ontology_name)
45        if not supported_ontology_name:
46            raise ValueError(f"{supported_ontology_name} is not a supported ontology, its metadata cannot be fetched.")
47
48        if self.term_label_to_id_map[supported_ontology_name]:
49            return self.term_label_to_id_map[supported_ontology_name].copy()
50
51        for term_id, term_metadata in self.cxg_schema.ontology(supported_ontology_name).items():
52            self.term_label_to_id_map[supported_ontology_name][term_metadata["label"]] = term_id
53
54        return self.term_label_to_id_map[supported_ontology_name].copy()

Fetch the mapping of term labels to term IDs for a given ontology. Caches generated maps by ontology_name.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_label_to_id_map("CL") # doctest: +SKIP
{'Label A': 'CL:0000000', ... }
Parameters
  • ontology_name: str name of ontology to get map of term labels to term IDs
def is_valid_term_id(self, term_id: str, ontology: Optional[str] = None) -> bool:
 99    def is_valid_term_id(self, term_id: str, ontology: Optional[str] = None) -> bool:
100        """
101        Check if an ontology term ID is valid and defined in a supported ontology. If deprecated but defined
102        in the ontology, it is considered valid. Optionally, specify an ontology to check against, and determine
103        if the term is defined in that particular ontology. Otherwise, checks if term is valid in any supported ontology
104
105        :param term_id: str ontology term to check
106        :param ontology: str name of ontology to check against
107        :return: boolean flag indicating whether the term is supported
108        """
109        try:
110            ontology_name = self._parse_ontology_name(term_id)
111            if ontology and ontology_name != ontology:
112                return False
113            if term_id in self.cxg_schema.ontology(ontology_name):
114                return True
115        except ValueError:
116            return False
117        return False

Check if an ontology term ID is valid and defined in a supported ontology. If deprecated but defined in the ontology, it is considered valid. Optionally, specify an ontology to check against, and determine if the term is defined in that particular ontology. Otherwise, checks if term is valid in any supported ontology

Parameters
  • term_id: str ontology term to check
  • ontology: str name of ontology to check against
Returns

boolean flag indicating whether the term is supported

def get_term_ancestors(self, term_id: str, include_self: bool = False) -> List[str]:
119    def get_term_ancestors(self, term_id: str, include_self: bool = False) -> List[str]:
120        """
121        Get the ancestor ontology terms for a given term. If include_self is True, the term itself will be included as
122        an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.
123
124        Example
125        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
126        >>> ontology_parser = OntologyParser()
127        >>> ontology_parser.get_term_ancestors("CL:0000005") # doctest: +SKIP
128        ['CL:0000000', 'CL:0000057', ...
129
130        :param term_id: str ontology term to find ancestors for
131        :param include_self: boolean flag to include the term itself as an ancestor
132        :return: flattened List[str] of ancestor terms
133        """
134        if term_id in VALID_NON_ONTOLOGY_TERMS:
135            return []
136        ontology_name = self._parse_ontology_name(term_id)
137        ancestors = list(self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"].keys())
138        return ancestors + [term_id] if include_self else ancestors

Get the ancestor ontology terms for a given term. If include_self is True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_ancestors("CL:0000005") # doctest: +SKIP
['CL:0000000', 'CL:0000057', ...
Parameters
  • term_id: str ontology term to find ancestors for
  • include_self: boolean flag to include the term itself as an ancestor
Returns

flattened List[str] of ancestor terms

def map_term_ancestors( self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]:
140    def map_term_ancestors(self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]:
141        """
142        Get the ancestor ontology terms for each term in a list. If include_self is True, the term itself will be
143        included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.
144
145        Example
146        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
147        >>> ontology_parser = OntologyParser()
148        >>> ontology_parser.map_term_ancestors(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP
149        {
150            'CL:0000003': ['CL:0000003'],
151            'CL:0000005': ['CL:0000005', 'CL:0000000', ...]
152        }
153
154        :param term_ids: list of str ontology terms to find ancestors for
155        :param include_self: boolean flag to include the term itself as an ancestor
156        :return: Dictionary mapping str term IDs to their respective flattened List[str] of ancestor terms. Maps to
157        empty
158        list if there are no ancestors.
159        """
160        return {term_id: self.get_term_ancestors(term_id, include_self) for term_id in term_ids}

Get the ancestor ontology terms for each term in a list. If include_self is True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.map_term_ancestors(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP
{
    'CL:0000003': ['CL:0000003'],
    'CL:0000005': ['CL:0000005', 'CL:0000000', ...]
}
Parameters
  • term_ids: list of str ontology terms to find ancestors for
  • include_self: boolean flag to include the term itself as an ancestor
Returns

Dictionary mapping str term IDs to their respective flattened List[str] of ancestor terms. Maps to empty list if there are no ancestors.

def get_term_ancestors_with_distances(self, term_id: str, include_self: bool = False) -> Dict[str, int]:
162    def get_term_ancestors_with_distances(self, term_id: str, include_self: bool = False) -> Dict[str, int]:
163        """
164        Get the ancestor ontology terms for a given term, and their distance from the term_id. If include_self is True,
165        the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a
166        supported ontology.
167
168        Example
169        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
170        >>> ontology_parser = OntologyParser()
171        >>> ontology_parser.get_term_ancestors_with_distances("CL:0000005") # doctest: +SKIP
172        {'CL:0000057': 1, 'CL:0002320': 2, 'CL:0000000': 3}
173
174        :param term_id: str ontology term to find ancestors for
175        :param include_self: boolean flag to include the term itself as an ancestor
176        :return: Dict[str, int] map of ancestor terms and their respective distances from the term_id
177        """
178        if term_id in VALID_NON_ONTOLOGY_TERMS:
179            return {}
180        ontology_name = self._parse_ontology_name(term_id)
181        ancestors: Dict[str, int] = self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"].copy()
182        return ancestors | {term_id: 0} if include_self else ancestors

Get the ancestor ontology terms for a given term, and their distance from the term_id. If include_self is True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_ancestors_with_distances("CL:0000005") # doctest: +SKIP
{'CL:0000057': 1, 'CL:0002320': 2, 'CL:0000000': 3}
Parameters
  • term_id: str ontology term to find ancestors for
  • include_self: boolean flag to include the term itself as an ancestor
Returns

Dict[str, int] map of ancestor terms and their respective distances from the term_id

def map_term_ancestors_with_distances( self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, Dict[str, int]]:
184    def map_term_ancestors_with_distances(
185        self, term_ids: Iterable[str], include_self: bool = False
186    ) -> Dict[str, Dict[str, int]]:
187        """
188        Get the ancestor ontology terms for each term in a list, and their distance from the term_id. If include_self is
189        True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a
190        supported ontology.
191
192        Example
193        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
194        >>> ontology_parser = OntologyParser()
195        >>> ontology_parser.map_term_ancestors_with_distances(["CL:0000003", "CL:0000005"], include_self=True)
196        {'CL:0000003': {'CL:0000003': 0}, 'CL:0000005': {'CL:0000057': 1, 'CL:0000499': 2, 'CL:0002320': 3, 'CL:0000255': 4, 'CL:0000000': 5, 'CL:0000005': 0}}
197
198        :param term_ids: list of str ontology terms to find ancestors for
199        :param include_self: boolean flag to include the term itself as an ancestor
200        :return: Dictionary mapping str term IDs to their respective Dict[str, int] map of ancestor terms and their
201        respective distances from the term_id
202        """
203        return {term_id: self.get_term_ancestors_with_distances(term_id, include_self) for term_id in term_ids}

Get the ancestor ontology terms for each term in a list, and their distance from the term_id. If include_self is True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.map_term_ancestors_with_distances(["CL:0000003", "CL:0000005"], include_self=True)
{'CL:0000003': {'CL:0000003': 0}, 'CL:0000005': {'CL:0000057': 1, 'CL:0000499': 2, 'CL:0002320': 3, 'CL:0000255': 4, 'CL:0000000': 5, 'CL:0000005': 0}}
Parameters
  • term_ids: list of str ontology terms to find ancestors for
  • include_self: boolean flag to include the term itself as an ancestor
Returns

Dictionary mapping str term IDs to their respective Dict[str, int] map of ancestor terms and their respective distances from the term_id

def get_term_parents(self, term_id: str) -> List[str]:
205    def get_term_parents(self, term_id: str) -> List[str]:
206        """
207        Get the direct parent ontology terms for a given term. Raises ValueError if the term ID is not valid member of
208        a supported ontology.
209
210        Example
211        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
212        >>> ontology_parser = OntologyParser()
213        >>> ontology_parser.get_term_parents("CL:0000101")
214        ['CL:0000526']
215
216        :param term_id: str ontology term to find parents for
217        :return: List[str] of parent terms
218        """
219        if term_id in VALID_NON_ONTOLOGY_TERMS:
220            return []
221        ontology_name = self._parse_ontology_name(term_id)
222        ancestors: Dict[str, int] = self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"]
223        parents: List[str] = [ancestor for ancestor, distance in ancestors.items() if distance == 1]
224        return parents

Get the direct parent ontology terms for a given term. Raises ValueError if the term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_parents("CL:0000101")
['CL:0000526']
Parameters
  • term_id: str ontology term to find parents for
Returns

List[str] of parent terms

def get_distance_between_terms(self, term_id_1: str, term_id_2: str) -> int:
226    def get_distance_between_terms(self, term_id_1: str, term_id_2: str) -> int:
227        """
228        Get the distance between two ontology terms. The distance is defined as the number of edges between the
229        two terms. Terms must be from the same ontology. Returns -1 if terms are disjoint.
230        Raises ValueError if term IDs are not valid members of a supported ontology.
231
232        :param term_id_1: str ontology term to find distance for
233        :param term_id_2: str ontology term to find distance for
234        :return: int distance between the two terms, measured in number of edges between their shortest path.
235        """
236        lcas = self.get_lowest_common_ancestors(term_id_1, term_id_2)
237        if not lcas:
238            return -1
239        return int(
240            self.get_term_ancestors_with_distances(term_id_1, include_self=True)[lcas[0]]
241            + self.get_term_ancestors_with_distances(term_id_2, include_self=True)[lcas[0]]
242        )

Get the distance between two ontology terms. The distance is defined as the number of edges between the two terms. Terms must be from the same ontology. Returns -1 if terms are disjoint. Raises ValueError if term IDs are not valid members of a supported ontology.

Parameters
  • term_id_1: str ontology term to find distance for
  • term_id_2: str ontology term to find distance for
Returns

int distance between the two terms, measured in number of edges between their shortest path.

def get_lowest_common_ancestors(self, term_id_1: str, term_id_2: str) -> List[str]:
244    def get_lowest_common_ancestors(self, term_id_1: str, term_id_2: str) -> List[str]:
245        """
246        Get the lowest common ancestors between two ontology terms that is from the given ontology.
247        Terms must be from the same ontology. Ontologies are DAGs, so there may be multiple lowest common ancestors.
248        Raises ValueError if term IDs are not valid members of a supported ontology.
249
250        :param term_id_1: str ontology term to find LCA for
251        :param term_id_2: str ontology term to find LCA for
252        :return: str term ID of the lowest common ancestor term
253        """
254        # include path to term itself
255        ontology = self._parse_ontology_name(term_id_1)
256        if ontology != self._parse_ontology_name(term_id_2):
257            return []
258        ancestors_1 = self.get_term_ancestors_with_distances(term_id_1, include_self=True)
259        ancestors_2 = self.get_term_ancestors_with_distances(term_id_2, include_self=True)
260        common_ancestors = set(ancestors_1.keys()) & set(ancestors_2.keys())
261        min_sum_distances = float("inf")
262        for ancestors in common_ancestors:
263            sum_distances = ancestors_1[ancestors] + ancestors_2[ancestors]
264            if sum_distances < min_sum_distances:
265                min_sum_distances = sum_distances
266        return [
267            ancestor
268            for ancestor in common_ancestors
269            if ancestors_1[ancestor] + ancestors_2[ancestor] == min_sum_distances
270        ]

Get the lowest common ancestors between two ontology terms that is from the given ontology. Terms must be from the same ontology. Ontologies are DAGs, so there may be multiple lowest common ancestors. Raises ValueError if term IDs are not valid members of a supported ontology.

Parameters
  • term_id_1: str ontology term to find LCA for
  • term_id_2: str ontology term to find LCA for
Returns

str term ID of the lowest common ancestor term

def get_high_level_terms(self, term_id: str, high_level_terms: List[str]) -> List[str]:
272    def get_high_level_terms(self, term_id: str, high_level_terms: List[str]) -> List[str]:
273        """
274        Get the high-level ontology terms for a given term. High-level terms are defined as the ancestors of the term
275        that are part of the high-level ontology terms supported by cellxgene-ontology-guide. If more than 1
276        high_level_term is matched, the returned list of matches preserves the order of the input high_level_terms list.
277        Raises ValueError if term ID is not valid member of a supported ontology.
278
279        Example
280        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
281        >>> ontology_parser = OntologyParser()
282        >>> ontology_parser.get_high_level_terms("CL:0000005", ["CL:0000000", "CL:0000001"])
283        ['CL:0000000']
284
285        :param term_id: str ontology term to find high-level terms for
286        :param high_level_terms: list of str ontology terms to check for ancestry to term_id
287        :return: List[str] of high-level terms that the term is a descendant of
288        """
289        if term_id in VALID_NON_ONTOLOGY_TERMS:
290            return []
291        ancestors = self.get_term_ancestors(term_id, include_self=True)
292        return [high_level_term for high_level_term in high_level_terms if high_level_term in ancestors]

Get the high-level ontology terms for a given term. High-level terms are defined as the ancestors of the term that are part of the high-level ontology terms supported by cellxgene-ontology-guide. If more than 1 high_level_term is matched, the returned list of matches preserves the order of the input high_level_terms list. Raises ValueError if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_high_level_terms("CL:0000005", ["CL:0000000", "CL:0000001"])
['CL:0000000']
Parameters
  • term_id: str ontology term to find high-level terms for
  • high_level_terms: list of str ontology terms to check for ancestry to term_id
Returns

List[str] of high-level terms that the term is a descendant of

def map_high_level_terms( self, term_ids: List[str], high_level_terms: List[str]) -> Dict[str, List[str]]:
294    def map_high_level_terms(self, term_ids: List[str], high_level_terms: List[str]) -> Dict[str, List[str]]:
295        """
296        Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with
297        format
298
299        {"CL:0000003": ["CL:0000000", ...], "CL:0000005": ["CL:0000000", ...]}
300
301        Where each term_id is mapped to a List[str] of high-level terms that it is a descendant of. Includes self
302        as a descendant. Raises ValueError if term ID is not valid member of a supported ontology.
303
304        :param term_ids: list of str ontology terms to map high level terms for
305        :param high_level_terms: list of str ontology terms to be mapped to descendant term_ids
306        :return: Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list.
307        Each key maps to empty list if there are no ancestors among the provided input.
308        """
309        return {term_id: self.get_high_level_terms(term_id, high_level_terms) for term_id in term_ids}

Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with format

{"CL:0000003": ["CL:0000000", ...], "CL:0000005": ["CL:0000000", ...]}

Where each term_id is mapped to a List[str] of high-level terms that it is a descendant of. Includes self as a descendant. Raises ValueError if term ID is not valid member of a supported ontology.

Parameters
  • term_ids: list of str ontology terms to map high level terms for
  • high_level_terms: list of str ontology terms to be mapped to descendant term_ids
Returns

Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list. Each key maps to empty list if there are no ancestors among the provided input.

def get_highest_level_term(self, term_id: str, high_level_terms: List[str]) -> Optional[str]:
311    def get_highest_level_term(self, term_id: str, high_level_terms: List[str]) -> Union[str, None]:
312        """
313        Get the highest level ontology term for a given term. The highest level term is defined as the ancestor of the
314        term that is part of the high-level ontology terms supported by cellxgene-ontology-guide. Raises ValueError
315        if term ID is not valid member of a supported ontology.
316
317        Example
318        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
319        >>> ontology_parser = OntologyParser()
320        >>> ontology_parser.get_highest_level_term("CL:0000005", ["CL:0000000", "CL:0000001"])
321        'CL:0000000'
322
323        :param term_id: str ontology term to find highest level term for
324        :param high_level_terms: list of str ontology terms to check for ancestry to term_id
325        :return: str highest level term that the term is a descendant of, or None if it is not a descendant of any
326        high-level terms
327        """
328        high_level_terms = self.get_high_level_terms(term_id, high_level_terms)
329        term_ancestors_and_distances = self.get_term_ancestors_with_distances(term_id, include_self=True)
330        if not high_level_terms:
331            return None
332        return max(high_level_terms, key=lambda high_level_term: term_ancestors_and_distances[high_level_term])

Get the highest level ontology term for a given term. The highest level term is defined as the ancestor of the term that is part of the high-level ontology terms supported by cellxgene-ontology-guide. Raises ValueError if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_highest_level_term("CL:0000005", ["CL:0000000", "CL:0000001"])
'CL:0000000'
Parameters
  • term_id: str ontology term to find highest level term for
  • high_level_terms: list of str ontology terms to check for ancestry to term_id
Returns

str highest level term that the term is a descendant of, or None if it is not a descendant of any high-level terms

def map_highest_level_term( self, term_ids: List[str], high_level_terms: List[str]) -> Dict[str, Optional[str]]:
334    def map_highest_level_term(self, term_ids: List[str], high_level_terms: List[str]) -> Dict[str, Union[str, None]]:
335        """
336        Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with
337        format
338
339        {"CL:0000003": "CL:0000000", "CL:0000005": "CL:0000000"}
340
341        Where each term_id is mapped to the highest level term that it is a descendant of, from the list provided.
342        Includes term itself as a descendant. Maps to None if term_id does not map to any high level terms among the
343        provided input. Raises ValueError if term ID is not valid member of a supported ontology.
344
345        :param term_ids: list of str ontology terms to map high level terms for
346        :param high_level_terms: list of str ontology terms that can be mapped to descendant term_ids
347        :return: Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list.
348        Each key maps to empty list if there are no ancestors among the provided input.
349        """
350        return {term_id: self.get_highest_level_term(term_id, high_level_terms) for term_id in term_ids}

Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with format

{"CL:0000003": "CL:0000000", "CL:0000005": "CL:0000000"}

Where each term_id is mapped to the highest level term that it is a descendant of, from the list provided. Includes term itself as a descendant. Maps to None if term_id does not map to any high level terms among the provided input. Raises ValueError if term ID is not valid member of a supported ontology.

Parameters
  • term_ids: list of str ontology terms to map high level terms for
  • high_level_terms: list of str ontology terms that can be mapped to descendant term_ids
Returns

Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list. Each key maps to empty list if there are no ancestors among the provided input.

def get_term_descendants(self, term_id: str, include_self: bool = False) -> List[str]:
352    def get_term_descendants(self, term_id: str, include_self: bool = False) -> List[str]:
353        """
354        Get the descendant ontology terms for a given term. If include_self is True, the term itself will be included as
355        a descendant. Raises ValueError if term ID is not valid member of a supported ontology.
356
357        Example
358        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
359        >>> ontology_parser = OntologyParser()
360        >>> ontology_parser.get_term_descendants("CL:0000005") # doctest: +SKIP
361        ['CL:0002363']
362
363        :param term_id: str ontology term to find descendants for
364        :param include_self: boolean flag to include the term itself as a descendant
365        :return: List[str] of descendant terms
366        """
367        if term_id in VALID_NON_ONTOLOGY_TERMS:
368            return []
369        ontology_name = self._parse_ontology_name(term_id)
370        descendants = [term_id] if include_self else []
371        for candidate_descendant, candidate_metadata in self.cxg_schema.ontology(ontology_name).items():
372            ancestors = candidate_metadata["ancestors"].keys()
373            if term_id in ancestors:
374                descendants.append(candidate_descendant)
375        return descendants

Get the descendant ontology terms for a given term. If include_self is True, the term itself will be included as a descendant. Raises ValueError if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_descendants("CL:0000005") # doctest: +SKIP
['CL:0002363']
Parameters
  • term_id: str ontology term to find descendants for
  • include_self: boolean flag to include the term itself as a descendant
Returns

List[str] of descendant terms

def map_term_descendants( self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]:
377    def map_term_descendants(self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]:
378        """
379        Get the descendant ontology terms for each term in a list. If include_self is True, the term itself will be
380         included as a descendant. Raises ValueError if term ID is not valid member of a supported ontology.
381
382        Example
383        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
384        >>> ontology_parser = OntologyParser()
385        >>> ontology_parser.map_term_descendants(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP
386        {
387            'CL:0000003': ['CL:0000003', ...],
388            'CL:0000005': ['CL:0000005', 'CL:0002363', ...]
389        }
390
391        :param term_ids: list of str ontology terms to find descendants for
392        :param include_self: boolean flag to include the term itself as an descendant
393        :return: Dictionary mapping str term IDs to their respective flattened List[str] of descendant terms. Maps to
394        empty list if there are no descendants.
395        """
396        descendants_dict: Dict[str, List[str]] = dict()
397        ontology_names = set()
398        for term_id in term_ids:
399            if term_id in VALID_NON_ONTOLOGY_TERMS:
400                descendants_dict[term_id] = []
401                continue
402            ontology_name = self._parse_ontology_name(term_id)
403            descendants_dict[term_id] = [term_id] if include_self else []
404            ontology_names.add(ontology_name)
405
406        for ontology in ontology_names:
407            for candidate_descendant, candidate_metadata in self.cxg_schema.ontology(ontology).items():
408                for ancestor_id in descendants_dict:
409                    ancestors = candidate_metadata["ancestors"].keys()
410                    if ancestor_id in ancestors:
411                        descendants_dict[ancestor_id].append(candidate_descendant)
412
413        return descendants_dict

Get the descendant ontology terms for each term in a list. If include_self is True, the term itself will be included as a descendant. Raises ValueError if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.map_term_descendants(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP
{
    'CL:0000003': ['CL:0000003', ...],
    'CL:0000005': ['CL:0000005', 'CL:0002363', ...]
}
Parameters
  • term_ids: list of str ontology terms to find descendants for
  • include_self: boolean flag to include the term itself as an descendant
Returns

Dictionary mapping str term IDs to their respective flattened List[str] of descendant terms. Maps to empty list if there are no descendants.

def get_term_children(self, term_id: str) -> List[str]:
415    def get_term_children(self, term_id: str) -> List[str]:
416        """
417        Get the direct children ontology terms for a given term. Raises ValueError if term ID is not valid member of a
418        supported ontology.
419
420        Example
421        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
422        >>> ontology_parser = OntologyParser()
423        >>> ontology_parser.get_term_children("CL:0000526")
424        ['CL:0000101', 'CL:4042034']
425
426        :param term_id: str ontology term to find children for
427        :return: List[str] of children terms
428        """
429        if term_id in VALID_NON_ONTOLOGY_TERMS:
430            return []
431        ontology_name = self._parse_ontology_name(term_id)
432        children = []
433        for candidate_child, candidate_metadata in self.cxg_schema.ontology(ontology_name).items():
434            for ancestor, distance in candidate_metadata["ancestors"].items():
435                if ancestor == term_id and distance == 1:
436                    children.append(candidate_child)
437        return children

Get the direct children ontology terms for a given term. Raises ValueError if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_children("CL:0000526")
['CL:0000101', 'CL:4042034']
Parameters
  • term_id: str ontology term to find children for
Returns

List[str] of children terms

def get_term_graph(self, term_id: str) -> cellxgene_ontology_guide.entities.OntologyNode:
439    def get_term_graph(self, term_id: str) -> OntologyNode:
440        """
441        Get the DAG of OntologyNode relationships, with the input term as the root node. Only includes terms from the
442        same ontology as the root term ID. Raises ValueError if term ID is not valid member of a supported ontology.
443
444        Example
445        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
446        >>> ontology_parser = OntologyParser()
447        >>> root_node = ontology_parser.get_term_graph("CL:0000000")
448        >>> root_node.term_id
449        'CL:0000000'
450        >>> root_node.to_dict() # doctest: +SKIP
451        {
452            "term_id": "CL:0000000",
453            "name": "cell A",
454            "children": [
455                {
456                    "term_id": "CL:0000001",
457                    "name": "cell B",
458                    "children": [...],
459                },
460                {
461                    "term_id": "CL:0000002",
462                    "name": "cell C",
463                    "children": [...],
464                },
465                ...
466            ]
467        }
468        >>> root_node.term_counter # doctest: +SKIP
469        Counter({'CL:0002058': 48, 'CL:0002471': 48, ...
470
471        :param term_id: str ontology term to build subtree for
472        :return: OntologyNode representation of graph with term_id as root.
473        """
474        term_label = self.get_term_label(term_id)
475        root = OntologyNode(term_id, term_label)
476        for child_term_id in self.get_term_children(term_id):
477            root.add_child(self.get_term_graph(child_term_id))
478        return root

Get the DAG of OntologyNode relationships, with the input term as the root node. Only includes terms from the same ontology as the root term ID. Raises ValueError if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> root_node = ontology_parser.get_term_graph("CL:0000000")
>>> root_node.term_id
'CL:0000000'
>>> root_node.to_dict() # doctest: +SKIP
{
    "term_id": "CL:0000000",
    "name": "cell A",
    "children": [
        {
            "term_id": "CL:0000001",
            "name": "cell B",
            "children": [...],
        },
        {
            "term_id": "CL:0000002",
            "name": "cell C",
            "children": [...],
        },
        ...
    ]
}
>>> root_node.term_counter # doctest: +SKIP
Counter({'CL:0002058': 48, 'CL:0002471': 48, ...
Parameters
  • term_id: str ontology term to build subtree for
Returns

OntologyNode representation of graph with term_id as root.

def is_term_deprecated(self, term_id: str) -> bool:
480    def is_term_deprecated(self, term_id: str) -> bool:
481        """
482        Check if an ontology term is deprecated. Raises ValueError if term ID is not valid member of a supported
483        ontology.
484
485        Example
486        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
487        >>> ontology_parser = OntologyParser()
488        >>> ontology_parser.is_term_deprecated("CL:0000003")
489        True
490
491        :param term_id: str ontology term to check for deprecation
492        :return: boolean flag indicating whether the term is deprecated
493        """
494        if term_id in VALID_NON_ONTOLOGY_TERMS:
495            return False
496        ontology_name = self._parse_ontology_name(term_id)
497        is_deprecated: bool = self.cxg_schema.ontology(ontology_name)[term_id].get("deprecated")
498        return is_deprecated

Check if an ontology term is deprecated. Raises ValueError if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.is_term_deprecated("CL:0000003")
True
Parameters
  • term_id: str ontology term to check for deprecation
Returns

boolean flag indicating whether the term is deprecated

def get_term_replacement(self, term_id: str) -> Optional[str]:
500    def get_term_replacement(self, term_id: str) -> Union[str, None]:
501        """
502        Fetch the replacement term for a deprecated ontology term, if a replacement exists. Return None otherwise.
503        Raises ValueError if term ID is not valid member of a supported ontology.
504
505        Example
506        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
507        >>> ontology_parser = OntologyParser()
508        >>> ontology_parser.get_term_replacement("CL:0000003")
509        'CL:0000000'
510
511        :param term_id: str ontology term to check a replacement term for
512        :return: replacement str term ID if it exists, None otherwise
513        """
514        if term_id in VALID_NON_ONTOLOGY_TERMS:
515            return None
516        ontology_name = self._parse_ontology_name(term_id)
517        replaced_by: str = self.cxg_schema.ontology(ontology_name)[term_id].get("replaced_by")
518        return replaced_by if replaced_by else None

Fetch the replacement term for a deprecated ontology term, if a replacement exists. Return None otherwise. Raises ValueError if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_replacement("CL:0000003")
'CL:0000000'
Parameters
  • term_id: str ontology term to check a replacement term for
Returns

replacement str term ID if it exists, None otherwise

def get_term_metadata(self, term_id: str) -> Dict[str, Any]:
520    def get_term_metadata(self, term_id: str) -> Dict[str, Any]:
521        """
522        Fetch metadata for a given ontology term. Returns a dict with format
523
524        {"comments": ["...", ...], "term_tracker": "...", "consider": ["...", ...]}
525
526        Comments maps to List[str] of ontology curator comments
527        Term Tracker maps to a str url where there is discussion around this term's curation (or deprecation).
528        Consider maps to List[str] of alternate ontology terms to consider using instead of this term
529
530        All keys map to None if no metadata of that type is present. Raises ValueError if term ID is not valid member
531        of a supported ontology.
532
533        :param term_id: str ontology term to fetch metadata for
534        :return: Dict with keys 'Comments', 'Term Tracker', and 'Consider' containing associated metadata.
535        """
536        if term_id in VALID_NON_ONTOLOGY_TERMS:
537            return {"comments": None, "term_tracker": None, "consider": None}
538        ontology_name = self._parse_ontology_name(term_id)
539        return {
540            key: self.cxg_schema.ontology(ontology_name)[term_id].get(key, None)
541            for key in {"comments", "term_tracker", "consider"}
542        }

Fetch metadata for a given ontology term. Returns a dict with format

{"comments": ["...", ...], "term_tracker": "...", "consider": ["...", ...]}

Comments maps to List[str] of ontology curator comments Term Tracker maps to a str url where there is discussion around this term's curation (or deprecation). Consider maps to List[str] of alternate ontology terms to consider using instead of this term

All keys map to None if no metadata of that type is present. Raises ValueError if term ID is not valid member of a supported ontology.

Parameters
  • term_id: str ontology term to fetch metadata for
Returns

Dict with keys 'Comments', 'Term Tracker', and 'Consider' containing associated metadata.

def get_term_label(self, term_id: str) -> str:
544    def get_term_label(self, term_id: str) -> str:
545        """
546        Fetch the human-readable label for a given ontology term. Raises ValueError if term ID is not valid member of a
547        supported ontology.
548
549        Example
550        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
551        >>> ontology_parser = OntologyParser()
552        >>> ontology_parser.get_term_label("CL:0000005")
553        'neural crest derived fibroblast'
554
555        :param term_id: str ontology term to fetch label for
556        :return: str human-readable label for the term
557        """
558        if term_id in VALID_NON_ONTOLOGY_TERMS:
559            return term_id
560        ontology_name = self._parse_ontology_name(term_id)
561        label: str = self.cxg_schema.ontology(ontology_name)[term_id]["label"]
562        return label

Fetch the human-readable label for a given ontology term. Raises ValueError if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_label("CL:0000005")
'neural crest derived fibroblast'
Parameters
  • term_id: str ontology term to fetch label for
Returns

str human-readable label for the term

def map_term_labels(self, term_ids: Iterable[str]) -> Dict[str, str]:
564    def map_term_labels(self, term_ids: Iterable[str]) -> Dict[str, str]:
565        """
566        Fetch the human-readable label for a given list of ontology terms. Raises ValueError if term ID is not valid
567        member of a supported ontology.
568
569        Example
570        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
571        >>> ontology_parser = OntologyParser()
572        >>> ontology_parser.map_term_labels(["CL:0000005", "CL:0000003"])
573        {'CL:0000005': 'neural crest derived fibroblast', 'CL:0000003': 'obsolete native cell'}
574
575        :param term_ids: list of str ontology terms to fetch label for
576        :return: Dict[str, str] mapping term IDs to their respective human-readable labels
577        """
578        return {term_id: self.get_term_label(term_id) for term_id in term_ids}

Fetch the human-readable label for a given list of ontology terms. Raises ValueError if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.map_term_labels(["CL:0000005", "CL:0000003"])
{'CL:0000005': 'neural crest derived fibroblast', 'CL:0000003': 'obsolete native cell'}
Parameters
  • term_ids: list of str ontology terms to fetch label for
Returns

Dict[str, str] mapping term IDs to their respective human-readable labels

def get_term_description(self, term_id: str) -> Optional[str]:
580    def get_term_description(self, term_id: str) -> Optional[str]:
581        """
582        Fetch the description for a given ontology term. Raises ValueError if term ID is not valid member of a
583        supported ontology.
584
585        Example
586        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
587        >>> ontology_parser = OntologyParser()
588        >>> ontology_parser.get_term_description("CL:0000005")
589        'Any fibroblast that is derived from the neural crest.'
590
591        :param term_id: str ontology term to fetch description for
592        :return: str description for the term
593        """
594        if term_id in VALID_NON_ONTOLOGY_TERMS:
595            return term_id
596        ontology_name = self._parse_ontology_name(term_id)
597        description: Optional[str] = self.cxg_schema.ontology(ontology_name)[term_id].get("description", None)
598        return description

Fetch the description for a given ontology term. Raises ValueError if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_description("CL:0000005")
'Any fibroblast that is derived from the neural crest.'
Parameters
  • term_id: str ontology term to fetch description for
Returns

str description for the term

def map_term_descriptions(self, term_ids: List[str]) -> Dict[str, Optional[str]]:
600    def map_term_descriptions(self, term_ids: List[str]) -> Dict[str, Optional[str]]:
601        """
602        Fetch the descriptions for a given list of ontology terms. Raises ValueError if term ID is not valid member of
603        a supported ontology.
604
605        Example
606        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
607        >>> ontology_parser = OntologyParser()
608        >>> ontology_parser.map_term_descriptions(["CL:0000005", "CL:0000146"])
609        {'CL:0000005': 'Any fibroblast that is derived from the neural crest.', 'CL:0000146': None}
610
611        :param term_ids: list of str ontology terms to fetch descriptions for
612        :return: Dict[str, str] mapping term IDs to their respective descriptions
613        """
614        return {term_id: self.get_term_description(term_id) for term_id in term_ids}

Fetch the descriptions for a given list of ontology terms. Raises ValueError if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.map_term_descriptions(["CL:0000005", "CL:0000146"])
{'CL:0000005': 'Any fibroblast that is derived from the neural crest.', 'CL:0000146': None}
Parameters
  • term_ids: list of str ontology terms to fetch descriptions for
Returns

Dict[str, str] mapping term IDs to their respective descriptions

def get_term_synonyms(self, term_id: str) -> List[str]:
616    def get_term_synonyms(self, term_id: str) -> List[str]:
617        """
618        Fetch a list of synonym labels for a given ontology term. Returns empty list if no synonyms found.
619        Raises ValueError if term ID is not valid member of a supported ontology.
620
621        Example
622        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
623        >>> ontology_parser = OntologyParser()
624        >>> ontology_parser.get_term_synonyms("CL:0000019")
625        ['sperm cell', 'spermatozoid', 'spermatozoon']
626
627        :param term_id: str ontology term to fetch synonyms for
628        :return: List[str] synonyms for the term
629        """
630        if term_id in VALID_NON_ONTOLOGY_TERMS:
631            return []
632        ontology_name = self._parse_ontology_name(term_id)
633        synonyms: List[str] = list(self.cxg_schema.ontology(ontology_name)[term_id].get("synonyms", []))
634        return synonyms

Fetch a list of synonym labels for a given ontology term. Returns empty list if no synonyms found. Raises ValueError if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_synonyms("CL:0000019")
['sperm cell', 'spermatozoid', 'spermatozoon']
Parameters
  • term_id: str ontology term to fetch synonyms for
Returns

List[str] synonyms for the term

def map_term_synonyms(self, term_ids: List[str]) -> Dict[str, List[str]]:
636    def map_term_synonyms(self, term_ids: List[str]) -> Dict[str, List[str]]:
637        """
638        Fetch the synonym labels for a given list of ontology terms. Raises ValueError if term ID is not valid member of
639        a supported ontology.
640
641        Example
642        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
643        >>> ontology_parser = OntologyParser()
644        >>> ontology_parser.map_term_synonyms(["CL:0000005", "CL:0000019"])
645        {'CL:0000005': ['fibroblast neural crest derived'], 'CL:0000019': ['sperm cell', 'spermatozoid', 'spermatozoon']}
646
647        :param term_ids: list of str ontology terms to fetch synonyms for
648        :return: Dict[str, List[str]] mapping term IDs to their respective synonym lists
649        """
650        return {term_id: self.get_term_synonyms(term_id) for term_id in term_ids}

Fetch the synonym labels for a given list of ontology terms. Raises ValueError if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.map_term_synonyms(["CL:0000005", "CL:0000019"])
{'CL:0000005': ['fibroblast neural crest derived'], 'CL:0000019': ['sperm cell', 'spermatozoid', 'spermatozoon']}
Parameters
  • term_ids: list of str ontology terms to fetch synonyms for
Returns

Dict[str, List[str]] mapping term IDs to their respective synonym lists

def get_term_id_by_label(self, term_label: str, ontology_name: str) -> Optional[str]:
652    def get_term_id_by_label(self, term_label: str, ontology_name: str) -> Optional[str]:
653        """
654        Fetch the ontology term ID from a given human-readable label. Filters by ontology_name. Raises ValueError if
655        ontology_name is not a supported ontology.
656
657        Returns None if term ID is not valid member of a supported ontology.
658
659        Example
660        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
661        >>> ontology_parser = OntologyParser()
662        >>> ontology_parser.get_term_id_by_label("neural crest derived fibroblast", "CL")
663        'CL:0000005'
664
665        :param term_label: str human-readable label to fetch term ID for
666        :param ontology_name: str name of ontology to search for term label in
667        :return: Optional[str] term IDs with that label, or None if the label is not found in the ontology
668        """
669        ontology_term_label_to_id_map = self.get_term_label_to_id_map(ontology_name)
670        return ontology_term_label_to_id_map.get(term_label)

Fetch the ontology term ID from a given human-readable label. Filters by ontology_name. Raises ValueError if ontology_name is not a supported ontology.

Returns None if term ID is not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_id_by_label("neural crest derived fibroblast", "CL")
'CL:0000005'
Parameters
  • term_label: str human-readable label to fetch term ID for
  • ontology_name: str name of ontology to search for term label in
Returns

Optional[str] term IDs with that label, or None if the label is not found in the ontology

def get_bridge_term_id(self, term_id: str, cross_ontology: str) -> Optional[str]:
672    def get_bridge_term_id(self, term_id: str, cross_ontology: str) -> Optional[str]:
673        """
674        For a given term ID, fetch the equivalent term ID from a given ontology. Only returns exact match if it exists.
675
676        If no applicable match is found, returns None.
677
678        Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology.
679
680        Example
681        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
682        >>> ontology_parser = OntologyParser()
683        >>> ontology_parser.get_bridge_term_id("FBbt:00000001", "UBERON")
684        'UBERON:0000468'
685
686        :param term_id: str ontology term to find equivalent term for
687        :param cross_ontology: str name of ontology to search for equivalent term in
688        :return: Optional[str] equivalent term ID from the cross_ontology
689        """
690        if cross_ontology not in self.cxg_schema.cross_ontology_mappings:
691            raise ValueError(
692                f"{cross_ontology} is not in the set of supported cross ontology mappings "
693                f"{self.cxg_schema.cross_ontology_mappings}."
694            )
695        ontology_name = self._parse_ontology_name(term_id)
696        cross_ontology_terms = self.cxg_schema.ontology(ontology_name)[term_id].get("cross_ontology_terms")
697        bridge_term_id: Optional[str] = None
698        if cross_ontology_terms:
699            bridge_term_id = cross_ontology_terms.get(cross_ontology)
700        return bridge_term_id

For a given term ID, fetch the equivalent term ID from a given ontology. Only returns exact match if it exists.

If no applicable match is found, returns None.

Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_bridge_term_id("FBbt:00000001", "UBERON")
'UBERON:0000468'
Parameters
  • term_id: str ontology term to find equivalent term for
  • cross_ontology: str name of ontology to search for equivalent term in
Returns

Optional[str] equivalent term ID from the cross_ontology

def get_closest_bridge_term_ids(self, term_id: str, cross_ontology: str) -> List[str]:
702    def get_closest_bridge_term_ids(self, term_id: str, cross_ontology: str) -> List[str]:
703        """
704        For a given term ID, fetch the equivalent term ID from a given ontology. If match is found,
705        returns a list of 1 with the exact match. If no exact match is found, traverses the ancestors
706        of the term for the closest match.
707
708        If no applicable match is found, returns an empty list.
709
710        If multiple ancestors of the same distance have matches, returns all possible closest matches.
711
712        Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology.
713
714        Example
715        >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
716        >>> ontology_parser = OntologyParser()
717        >>> ontology_parser.get_closest_bridge_term_ids("FBbt:00000039", "UBERON")
718        ['UBERON:0000476', 'UBERON:0000920']
719
720        :param term_id: str ontology term to find closest term for
721        :param cross_ontology: str name of ontology to search for closest term in
722        :return: List[str] list of closest term IDs from the cross_ontology
723        """
724        closest_bridge_terms: List[str] = []
725        terms_to_match = [term_id]
726        while terms_to_match and not closest_bridge_terms:
727            for term in terms_to_match:
728                if closest_bridge_term := self.get_bridge_term_id(term, cross_ontology):
729                    closest_bridge_terms.append(closest_bridge_term)
730            terms_to_match = [parent for child in terms_to_match for parent in self.get_term_parents(child)]
731        return closest_bridge_terms

For a given term ID, fetch the equivalent term ID from a given ontology. If match is found, returns a list of 1 with the exact match. If no exact match is found, traverses the ancestors of the term for the closest match.

If no applicable match is found, returns an empty list.

If multiple ancestors of the same distance have matches, returns all possible closest matches.

Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology.

Example

>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_closest_bridge_term_ids("FBbt:00000039", "UBERON")
['UBERON:0000476', 'UBERON:0000920']
Parameters
  • term_id: str ontology term to find closest term for
  • cross_ontology: str name of ontology to search for closest term in
Returns

List[str] list of closest term IDs from the cross_ontology