cellxgene_ontology_guide.ontology_parser
1import re 2from typing import Any, Dict, Iterable, List, Optional, Union 3 4from cellxgene_ontology_guide._constants import VALID_NON_ONTOLOGY_TERMS 5from cellxgene_ontology_guide.entities import OntologyNode 6from cellxgene_ontology_guide.supported_versions import CXGSchema 7 8 9class OntologyParser: 10 """ 11 An object to parse ontology term metadata from ontologies corresponding to a given CellxGene Schema Version. 12 """ 13 14 cxg_schema: CXGSchema 15 """ CXGSchema object to fetch ontology metadata from """ 16 17 def __init__(self, schema_version: Optional[str] = None): 18 """ 19 Initialize an OntologyParser object with the ontology metadata corresponding to the given CellxGene schema 20 version. If not cached, it will make a network call to GitHub Release Assets to load in memory and 21 parse the corresponding ontology metadata. 22 23 :param schema_version: str version of the schema to load ontology metadata for. If not provided, the latest 24 is loaded. 25 """ 26 self.cxg_schema = CXGSchema(version=schema_version) if schema_version else CXGSchema() 27 self.term_label_to_id_map: Dict[str, Dict[str, str]] = { 28 ontology_name: dict() for ontology_name in self.cxg_schema.supported_ontologies 29 } 30 31 def get_term_label_to_id_map(self, ontology_name: str) -> Dict[str, str]: 32 """ 33 Fetch the mapping of term labels to term IDs for a given ontology. Caches generated maps by ontology_name. 34 35 Example 36 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 37 >>> ontology_parser = OntologyParser() 38 >>> ontology_parser.get_term_label_to_id_map("CL") # doctest: +SKIP 39 {'Label A': 'CL:0000000', ... } 40 41 :param ontology_name: str name of ontology to get map of term labels to term IDs 42 """ 43 supported_ontology_name: Optional[str] = self._get_supported_ontology_name(ontology_name) 44 if not supported_ontology_name: 45 raise ValueError(f"{supported_ontology_name} is not a supported ontology, its metadata cannot be fetched.") 46 47 if self.term_label_to_id_map[supported_ontology_name]: 48 return self.term_label_to_id_map[supported_ontology_name].copy() 49 50 for term_id, term_metadata in self.cxg_schema.ontology(supported_ontology_name).items(): 51 self.term_label_to_id_map[supported_ontology_name][term_metadata["label"]] = term_id 52 53 return self.term_label_to_id_map[supported_ontology_name].copy() 54 55 def _parse_ontology_name(self, term_id: str) -> str: 56 """ 57 Parse the ontology name from a given term ID. If the term ID does not conform to the expected term format or 58 is not from an ontology supported by cellxgene-ontology-guide, raise a ValueError. 59 60 :param term_id: str ontology term to parse 61 :return: str name of ontology that term belongs to 62 """ 63 # use names groups 64 patterns = [r"([A-Za-z]+):[A-Za-z0-9]+", r"([A-Za-z]+)_[A-Za-z0-9]+"] 65 pattern = "|".join(patterns) 66 match = re.match(pattern, term_id) 67 if not match: 68 raise ValueError(f"{term_id} does not conform to expected regex pattern {pattern} and cannot be queried.") 69 70 ontology_term_prefix = match.group(1) or match.group(2) 71 ontology_name: Optional[str] = self._get_supported_ontology_name(ontology_term_prefix) 72 if not ontology_name: 73 raise ValueError(f"{term_id} is not part of a supported ontology, its metadata cannot be fetched.") 74 75 id_separator = self.cxg_schema.supported_ontologies.get(ontology_name).get("id_separator", ":") 76 if id_separator not in term_id: 77 raise ValueError(f"{term_id} does not conform to expected format for {ontology_term_prefix} terms.") 78 return ontology_name 79 80 def _get_supported_ontology_name(self, ontology_term_prefix: str) -> Optional[str]: 81 """ 82 Get the source ontology name for a given ontology term prefix, if it is supported by the CxG schema. 83 84 If ontology_term_prefix is directly supported by the CxG schema, returns ontology_term_prefix. 85 If ontology_term_prefix is supported as an import from another ontology, returns the name of the source ontology 86 it is imported in. 87 Otherwise, returns None. 88 89 :param ontology_term_prefix: str ontology term prefix to check 90 :return: str name of ontology that term belongs to, or None if it is not directly supported nor imported in 91 a supported ontology in the CxG schema. 92 """ 93 if ontology_term_prefix in self.cxg_schema.supported_ontologies: 94 return ontology_term_prefix 95 # Case-insensitive lookup (e.g. "uniprot" prefix matches "UniProt" key) 96 lower_prefix = ontology_term_prefix.lower() 97 for key in self.cxg_schema.supported_ontologies: 98 if key.lower() == lower_prefix: 99 return str(key) 100 supported_ontology_name: Optional[str] = self.cxg_schema.imported_ontologies.get(ontology_term_prefix) 101 return supported_ontology_name 102 103 def is_valid_term_id(self, term_id: str, ontology: Optional[str] = None) -> bool: 104 """ 105 Check if an ontology term ID is valid and defined in a supported ontology. If deprecated but defined 106 in the ontology, it is considered valid. Optionally, specify an ontology to check against, and determine 107 if the term is defined in that particular ontology. Otherwise, checks if term is valid in any supported ontology 108 109 :param term_id: str ontology term to check 110 :param ontology: str name of ontology to check against 111 :return: boolean flag indicating whether the term is supported 112 """ 113 try: 114 ontology_name = self._parse_ontology_name(term_id) 115 if ontology and ontology_name != ontology: 116 return False 117 if term_id in self.cxg_schema.ontology(ontology_name): 118 return True 119 except ValueError: 120 return False 121 return False 122 123 def get_term_ancestors(self, term_id: str, include_self: bool = False) -> List[str]: 124 """ 125 Get the ancestor ontology terms for a given term. If include_self is True, the term itself will be included as 126 an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology. 127 128 Example 129 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 130 >>> ontology_parser = OntologyParser() 131 >>> ontology_parser.get_term_ancestors("CL:0000005") # doctest: +SKIP 132 ['CL:0000000', 'CL:0000057', ... 133 134 :param term_id: str ontology term to find ancestors for 135 :param include_self: boolean flag to include the term itself as an ancestor 136 :return: flattened List[str] of ancestor terms 137 """ 138 if term_id in VALID_NON_ONTOLOGY_TERMS: 139 return [] 140 ontology_name = self._parse_ontology_name(term_id) 141 ancestors = list(self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"].keys()) 142 return ancestors + [term_id] if include_self else ancestors 143 144 def map_term_ancestors(self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]: 145 """ 146 Get the ancestor ontology terms for each term in a list. If include_self is True, the term itself will be 147 included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology. 148 149 Example 150 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 151 >>> ontology_parser = OntologyParser() 152 >>> ontology_parser.map_term_ancestors(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP 153 { 154 'CL:0000003': ['CL:0000003'], 155 'CL:0000005': ['CL:0000005', 'CL:0000000', ...] 156 } 157 158 :param term_ids: list of str ontology terms to find ancestors for 159 :param include_self: boolean flag to include the term itself as an ancestor 160 :return: Dictionary mapping str term IDs to their respective flattened List[str] of ancestor terms. Maps to 161 empty 162 list if there are no ancestors. 163 """ 164 return {term_id: self.get_term_ancestors(term_id, include_self) for term_id in term_ids} 165 166 def get_term_ancestors_with_distances(self, term_id: str, include_self: bool = False) -> Dict[str, int]: 167 """ 168 Get the ancestor ontology terms for a given term, and their distance from the term_id. If include_self is True, 169 the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a 170 supported ontology. 171 172 Example 173 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 174 >>> ontology_parser = OntologyParser() 175 >>> ontology_parser.get_term_ancestors_with_distances("CL:0000005") # doctest: +SKIP 176 {'CL:0000057': 1, 'CL:0002320': 2, 'CL:0000000': 3} 177 178 :param term_id: str ontology term to find ancestors for 179 :param include_self: boolean flag to include the term itself as an ancestor 180 :return: Dict[str, int] map of ancestor terms and their respective distances from the term_id 181 """ 182 if term_id in VALID_NON_ONTOLOGY_TERMS: 183 return {} 184 ontology_name = self._parse_ontology_name(term_id) 185 ancestors: Dict[str, int] = self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"].copy() 186 return ancestors | {term_id: 0} if include_self else ancestors 187 188 def map_term_ancestors_with_distances( 189 self, term_ids: Iterable[str], include_self: bool = False 190 ) -> Dict[str, Dict[str, int]]: 191 """ 192 Get the ancestor ontology terms for each term in a list, and their distance from the term_id. If include_self is 193 True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a 194 supported ontology. 195 196 Example 197 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 198 >>> ontology_parser = OntologyParser() 199 >>> ontology_parser.map_term_ancestors_with_distances(["CL:0000003", "CL:0000005"], include_self=True) 200 {'CL:0000003': {'CL:0000003': 0}, 'CL:0000005': {'CL:0000057': 1, 'CL:0000499': 2, 'CL:0002320': 3, 'CL:0000255': 4, 'CL:0000000': 5, 'CL:0000005': 0}} 201 202 :param term_ids: list of str ontology terms to find ancestors for 203 :param include_self: boolean flag to include the term itself as an ancestor 204 :return: Dictionary mapping str term IDs to their respective Dict[str, int] map of ancestor terms and their 205 respective distances from the term_id 206 """ 207 return {term_id: self.get_term_ancestors_with_distances(term_id, include_self) for term_id in term_ids} 208 209 def get_term_parents(self, term_id: str) -> List[str]: 210 """ 211 Get the direct parent ontology terms for a given term. Raises ValueError if the term ID is not valid member of 212 a supported ontology. 213 214 Example 215 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 216 >>> ontology_parser = OntologyParser() 217 >>> ontology_parser.get_term_parents("CL:0000101") 218 ['CL:0000526'] 219 220 :param term_id: str ontology term to find parents for 221 :return: List[str] of parent terms 222 """ 223 if term_id in VALID_NON_ONTOLOGY_TERMS: 224 return [] 225 ontology_name = self._parse_ontology_name(term_id) 226 ancestors: Dict[str, int] = self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"] 227 parents: List[str] = [ancestor for ancestor, distance in ancestors.items() if distance == 1] 228 return parents 229 230 def get_distance_between_terms(self, term_id_1: str, term_id_2: str) -> int: 231 """ 232 Get the distance between two ontology terms. The distance is defined as the number of edges between the 233 two terms. Terms must be from the same ontology. Returns -1 if terms are disjoint. 234 Raises ValueError if term IDs are not valid members of a supported ontology. 235 236 :param term_id_1: str ontology term to find distance for 237 :param term_id_2: str ontology term to find distance for 238 :return: int distance between the two terms, measured in number of edges between their shortest path. 239 """ 240 lcas = self.get_lowest_common_ancestors(term_id_1, term_id_2) 241 if not lcas: 242 return -1 243 return int( 244 self.get_term_ancestors_with_distances(term_id_1, include_self=True)[lcas[0]] 245 + self.get_term_ancestors_with_distances(term_id_2, include_self=True)[lcas[0]] 246 ) 247 248 def get_lowest_common_ancestors(self, term_id_1: str, term_id_2: str) -> List[str]: 249 """ 250 Get the lowest common ancestors between two ontology terms that is from the given ontology. 251 Terms must be from the same ontology. Ontologies are DAGs, so there may be multiple lowest common ancestors. 252 Raises ValueError if term IDs are not valid members of a supported ontology. 253 254 :param term_id_1: str ontology term to find LCA for 255 :param term_id_2: str ontology term to find LCA for 256 :return: str term ID of the lowest common ancestor term 257 """ 258 # include path to term itself 259 ontology = self._parse_ontology_name(term_id_1) 260 if ontology != self._parse_ontology_name(term_id_2): 261 return [] 262 ancestors_1 = self.get_term_ancestors_with_distances(term_id_1, include_self=True) 263 ancestors_2 = self.get_term_ancestors_with_distances(term_id_2, include_self=True) 264 common_ancestors = set(ancestors_1.keys()) & set(ancestors_2.keys()) 265 min_sum_distances = float("inf") 266 for ancestors in common_ancestors: 267 sum_distances = ancestors_1[ancestors] + ancestors_2[ancestors] 268 if sum_distances < min_sum_distances: 269 min_sum_distances = sum_distances 270 return [ 271 ancestor 272 for ancestor in common_ancestors 273 if ancestors_1[ancestor] + ancestors_2[ancestor] == min_sum_distances 274 ] 275 276 def get_high_level_terms(self, term_id: str, high_level_terms: List[str]) -> List[str]: 277 """ 278 Get the high-level ontology terms for a given term. High-level terms are defined as the ancestors of the term 279 that are part of the high-level ontology terms supported by cellxgene-ontology-guide. If more than 1 280 high_level_term is matched, the returned list of matches preserves the order of the input high_level_terms list. 281 Raises ValueError if term ID is not valid member of a supported ontology. 282 283 Example 284 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 285 >>> ontology_parser = OntologyParser() 286 >>> ontology_parser.get_high_level_terms("CL:0000005", ["CL:0000000", "CL:0000001"]) 287 ['CL:0000000'] 288 289 :param term_id: str ontology term to find high-level terms for 290 :param high_level_terms: list of str ontology terms to check for ancestry to term_id 291 :return: List[str] of high-level terms that the term is a descendant of 292 """ 293 if term_id in VALID_NON_ONTOLOGY_TERMS: 294 return [] 295 ancestors = self.get_term_ancestors(term_id, include_self=True) 296 return [high_level_term for high_level_term in high_level_terms if high_level_term in ancestors] 297 298 def map_high_level_terms(self, term_ids: List[str], high_level_terms: List[str]) -> Dict[str, List[str]]: 299 """ 300 Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with 301 format 302 303 {"CL:0000003": ["CL:0000000", ...], "CL:0000005": ["CL:0000000", ...]} 304 305 Where each term_id is mapped to a List[str] of high-level terms that it is a descendant of. Includes self 306 as a descendant. Raises ValueError if term ID is not valid member of a supported ontology. 307 308 :param term_ids: list of str ontology terms to map high level terms for 309 :param high_level_terms: list of str ontology terms to be mapped to descendant term_ids 310 :return: Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list. 311 Each key maps to empty list if there are no ancestors among the provided input. 312 """ 313 return {term_id: self.get_high_level_terms(term_id, high_level_terms) for term_id in term_ids} 314 315 def get_highest_level_term(self, term_id: str, high_level_terms: List[str]) -> Union[str, None]: 316 """ 317 Get the highest level ontology term for a given term. The highest level term is defined as the ancestor of the 318 term that is part of the high-level ontology terms supported by cellxgene-ontology-guide. Raises ValueError 319 if term ID is not valid member of a supported ontology. 320 321 Example 322 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 323 >>> ontology_parser = OntologyParser() 324 >>> ontology_parser.get_highest_level_term("CL:0000005", ["CL:0000000", "CL:0000001"]) 325 'CL:0000000' 326 327 :param term_id: str ontology term to find highest level term for 328 :param high_level_terms: list of str ontology terms to check for ancestry to term_id 329 :return: str highest level term that the term is a descendant of, or None if it is not a descendant of any 330 high-level terms 331 """ 332 high_level_terms = self.get_high_level_terms(term_id, high_level_terms) 333 term_ancestors_and_distances = self.get_term_ancestors_with_distances(term_id, include_self=True) 334 if not high_level_terms: 335 return None 336 return max(high_level_terms, key=lambda high_level_term: term_ancestors_and_distances[high_level_term]) 337 338 def map_highest_level_term(self, term_ids: List[str], high_level_terms: List[str]) -> Dict[str, Union[str, None]]: 339 """ 340 Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with 341 format 342 343 {"CL:0000003": "CL:0000000", "CL:0000005": "CL:0000000"} 344 345 Where each term_id is mapped to the highest level term that it is a descendant of, from the list provided. 346 Includes term itself as a descendant. Maps to None if term_id does not map to any high level terms among the 347 provided input. Raises ValueError if term ID is not valid member of a supported ontology. 348 349 :param term_ids: list of str ontology terms to map high level terms for 350 :param high_level_terms: list of str ontology terms that can be mapped to descendant term_ids 351 :return: Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list. 352 Each key maps to empty list if there are no ancestors among the provided input. 353 """ 354 return {term_id: self.get_highest_level_term(term_id, high_level_terms) for term_id in term_ids} 355 356 def get_term_descendants(self, term_id: str, include_self: bool = False) -> List[str]: 357 """ 358 Get the descendant ontology terms for a given term. If include_self is True, the term itself will be included as 359 a descendant. Raises ValueError if term ID is not valid member of a supported ontology. 360 361 Example 362 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 363 >>> ontology_parser = OntologyParser() 364 >>> ontology_parser.get_term_descendants("CL:0000005") # doctest: +SKIP 365 ['CL:0002363'] 366 367 :param term_id: str ontology term to find descendants for 368 :param include_self: boolean flag to include the term itself as a descendant 369 :return: List[str] of descendant terms 370 """ 371 if term_id in VALID_NON_ONTOLOGY_TERMS: 372 return [] 373 ontology_name = self._parse_ontology_name(term_id) 374 descendants = [term_id] if include_self else [] 375 for candidate_descendant, candidate_metadata in self.cxg_schema.ontology(ontology_name).items(): 376 ancestors = candidate_metadata["ancestors"].keys() 377 if term_id in ancestors: 378 descendants.append(candidate_descendant) 379 return descendants 380 381 def map_term_descendants(self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]: 382 """ 383 Get the descendant ontology terms for each term in a list. If include_self is True, the term itself will be 384 included as a descendant. Raises ValueError if term ID is not valid member of a supported ontology. 385 386 Example 387 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 388 >>> ontology_parser = OntologyParser() 389 >>> ontology_parser.map_term_descendants(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP 390 { 391 'CL:0000003': ['CL:0000003', ...], 392 'CL:0000005': ['CL:0000005', 'CL:0002363', ...] 393 } 394 395 :param term_ids: list of str ontology terms to find descendants for 396 :param include_self: boolean flag to include the term itself as an descendant 397 :return: Dictionary mapping str term IDs to their respective flattened List[str] of descendant terms. Maps to 398 empty list if there are no descendants. 399 """ 400 descendants_dict: Dict[str, List[str]] = dict() 401 ontology_names = set() 402 for term_id in term_ids: 403 if term_id in VALID_NON_ONTOLOGY_TERMS: 404 descendants_dict[term_id] = [] 405 continue 406 ontology_name = self._parse_ontology_name(term_id) 407 descendants_dict[term_id] = [term_id] if include_self else [] 408 ontology_names.add(ontology_name) 409 410 for ontology in ontology_names: 411 for candidate_descendant, candidate_metadata in self.cxg_schema.ontology(ontology).items(): 412 for ancestor_id in descendants_dict: 413 ancestors = candidate_metadata["ancestors"].keys() 414 if ancestor_id in ancestors: 415 descendants_dict[ancestor_id].append(candidate_descendant) 416 417 return descendants_dict 418 419 def get_term_children(self, term_id: str) -> List[str]: 420 """ 421 Get the direct children ontology terms for a given term. Raises ValueError if term ID is not valid member of a 422 supported ontology. 423 424 Example 425 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 426 >>> ontology_parser = OntologyParser() 427 >>> ontology_parser.get_term_children("CL:0000526") 428 ['CL:0000101', 'CL:4042034'] 429 430 :param term_id: str ontology term to find children for 431 :return: List[str] of children terms 432 """ 433 if term_id in VALID_NON_ONTOLOGY_TERMS: 434 return [] 435 ontology_name = self._parse_ontology_name(term_id) 436 children = [] 437 for candidate_child, candidate_metadata in self.cxg_schema.ontology(ontology_name).items(): 438 for ancestor, distance in candidate_metadata["ancestors"].items(): 439 if ancestor == term_id and distance == 1: 440 children.append(candidate_child) 441 return children 442 443 def get_term_graph(self, term_id: str) -> OntologyNode: 444 """ 445 Get the DAG of OntologyNode relationships, with the input term as the root node. Only includes terms from the 446 same ontology as the root term ID. Raises ValueError if term ID is not valid member of a supported ontology. 447 448 Example 449 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 450 >>> ontology_parser = OntologyParser() 451 >>> root_node = ontology_parser.get_term_graph("CL:0000000") 452 >>> root_node.term_id 453 'CL:0000000' 454 >>> root_node.to_dict() # doctest: +SKIP 455 { 456 "term_id": "CL:0000000", 457 "name": "cell A", 458 "children": [ 459 { 460 "term_id": "CL:0000001", 461 "name": "cell B", 462 "children": [...], 463 }, 464 { 465 "term_id": "CL:0000002", 466 "name": "cell C", 467 "children": [...], 468 }, 469 ... 470 ] 471 } 472 >>> root_node.term_counter # doctest: +SKIP 473 Counter({'CL:0002058': 48, 'CL:0002471': 48, ... 474 475 :param term_id: str ontology term to build subtree for 476 :return: OntologyNode representation of graph with term_id as root. 477 """ 478 term_label = self.get_term_label(term_id) 479 root = OntologyNode(term_id, term_label) 480 for child_term_id in self.get_term_children(term_id): 481 root.add_child(self.get_term_graph(child_term_id)) 482 return root 483 484 def is_term_deprecated(self, term_id: str) -> bool: 485 """ 486 Check if an ontology term is deprecated. Raises ValueError if term ID is not valid member of a supported 487 ontology. 488 489 Example 490 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 491 >>> ontology_parser = OntologyParser() 492 >>> ontology_parser.is_term_deprecated("CL:0000003") 493 True 494 495 :param term_id: str ontology term to check for deprecation 496 :return: boolean flag indicating whether the term is deprecated 497 """ 498 if term_id in VALID_NON_ONTOLOGY_TERMS: 499 return False 500 ontology_name = self._parse_ontology_name(term_id) 501 is_deprecated: bool = self.cxg_schema.ontology(ontology_name)[term_id].get("deprecated") 502 return is_deprecated 503 504 def get_term_replacement(self, term_id: str) -> Union[str, None]: 505 """ 506 Fetch the replacement term for a deprecated ontology term, if a replacement exists. Return None otherwise. 507 Raises ValueError if term ID is not valid member of a supported ontology. 508 509 Example 510 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 511 >>> ontology_parser = OntologyParser() 512 >>> ontology_parser.get_term_replacement("CL:0000003") 513 'CL:0000000' 514 515 :param term_id: str ontology term to check a replacement term for 516 :return: replacement str term ID if it exists, None otherwise 517 """ 518 if term_id in VALID_NON_ONTOLOGY_TERMS: 519 return None 520 ontology_name = self._parse_ontology_name(term_id) 521 replaced_by: str = self.cxg_schema.ontology(ontology_name)[term_id].get("replaced_by") 522 return replaced_by if replaced_by else None 523 524 def get_term_metadata(self, term_id: str) -> Dict[str, Any]: 525 """ 526 Fetch metadata for a given ontology term. Returns a dict with format 527 528 {"comments": ["...", ...], "term_tracker": "...", "consider": ["...", ...]} 529 530 Comments maps to List[str] of ontology curator comments 531 Term Tracker maps to a str url where there is discussion around this term's curation (or deprecation). 532 Consider maps to List[str] of alternate ontology terms to consider using instead of this term 533 534 All keys map to None if no metadata of that type is present. Raises ValueError if term ID is not valid member 535 of a supported ontology. 536 537 :param term_id: str ontology term to fetch metadata for 538 :return: Dict with keys 'Comments', 'Term Tracker', and 'Consider' containing associated metadata. 539 """ 540 if term_id in VALID_NON_ONTOLOGY_TERMS: 541 return {"comments": None, "term_tracker": None, "consider": None} 542 ontology_name = self._parse_ontology_name(term_id) 543 return { 544 key: self.cxg_schema.ontology(ontology_name)[term_id].get(key, None) 545 for key in {"comments", "term_tracker", "consider"} 546 } 547 548 def get_term_label(self, term_id: str) -> str: 549 """ 550 Fetch the human-readable label for a given ontology term. Raises ValueError if term ID is not valid member of a 551 supported ontology. 552 553 Example 554 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 555 >>> ontology_parser = OntologyParser() 556 >>> ontology_parser.get_term_label("CL:0000005") 557 'neural crest derived fibroblast' 558 559 :param term_id: str ontology term to fetch label for 560 :return: str human-readable label for the term 561 """ 562 if term_id in VALID_NON_ONTOLOGY_TERMS: 563 return term_id 564 ontology_name = self._parse_ontology_name(term_id) 565 label: str = self.cxg_schema.ontology(ontology_name)[term_id]["label"] 566 return label 567 568 def map_term_labels(self, term_ids: Iterable[str]) -> Dict[str, str]: 569 """ 570 Fetch the human-readable label for a given list of ontology terms. Raises ValueError if term ID is not valid 571 member of a supported ontology. 572 573 Example 574 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 575 >>> ontology_parser = OntologyParser() 576 >>> ontology_parser.map_term_labels(["CL:0000005", "CL:0000003"]) 577 {'CL:0000005': 'neural crest derived fibroblast', 'CL:0000003': 'obsolete native cell'} 578 579 :param term_ids: list of str ontology terms to fetch label for 580 :return: Dict[str, str] mapping term IDs to their respective human-readable labels 581 """ 582 return {term_id: self.get_term_label(term_id) for term_id in term_ids} 583 584 def get_term_description(self, term_id: str) -> Optional[str]: 585 """ 586 Fetch the description for a given ontology term. Raises ValueError if term ID is not valid member of a 587 supported ontology. 588 589 Example 590 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 591 >>> ontology_parser = OntologyParser() 592 >>> ontology_parser.get_term_description("CL:0000005") 593 'Any fibroblast that is derived from the neural crest.' 594 595 :param term_id: str ontology term to fetch description for 596 :return: str description for the term 597 """ 598 if term_id in VALID_NON_ONTOLOGY_TERMS: 599 return term_id 600 ontology_name = self._parse_ontology_name(term_id) 601 description: Optional[str] = self.cxg_schema.ontology(ontology_name)[term_id].get("description", None) 602 return description 603 604 def map_term_descriptions(self, term_ids: List[str]) -> Dict[str, Optional[str]]: 605 """ 606 Fetch the descriptions for a given list of ontology terms. Raises ValueError if term ID is not valid member of 607 a supported ontology. 608 609 Example 610 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 611 >>> ontology_parser = OntologyParser() 612 >>> ontology_parser.map_term_descriptions(["CL:0000005", "CL:0000146"]) 613 {'CL:0000005': 'Any fibroblast that is derived from the neural crest.', 'CL:0000146': None} 614 615 :param term_ids: list of str ontology terms to fetch descriptions for 616 :return: Dict[str, str] mapping term IDs to their respective descriptions 617 """ 618 return {term_id: self.get_term_description(term_id) for term_id in term_ids} 619 620 def get_term_synonyms(self, term_id: str) -> List[str]: 621 """ 622 Fetch a list of synonym labels for a given ontology term. Returns empty list if no synonyms found. 623 Raises ValueError if term ID is not valid member of a supported ontology. 624 625 Example 626 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 627 >>> ontology_parser = OntologyParser() 628 >>> ontology_parser.get_term_synonyms("CL:0000019") 629 ['sperm cell', 'spermatozoid', 'spermatozoon'] 630 631 :param term_id: str ontology term to fetch synonyms for 632 :return: List[str] synonyms for the term 633 """ 634 if term_id in VALID_NON_ONTOLOGY_TERMS: 635 return [] 636 ontology_name = self._parse_ontology_name(term_id) 637 synonyms: List[str] = list(self.cxg_schema.ontology(ontology_name)[term_id].get("synonyms", [])) 638 return synonyms 639 640 def map_term_synonyms(self, term_ids: List[str]) -> Dict[str, List[str]]: 641 """ 642 Fetch the synonym labels for a given list of ontology terms. Raises ValueError if term ID is not valid member of 643 a supported ontology. 644 645 Example 646 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 647 >>> ontology_parser = OntologyParser() 648 >>> ontology_parser.map_term_synonyms(["CL:0000005", "CL:0000019"]) 649 {'CL:0000005': ['fibroblast neural crest derived'], 'CL:0000019': ['sperm cell', 'spermatozoid', 'spermatozoon']} 650 651 :param term_ids: list of str ontology terms to fetch synonyms for 652 :return: Dict[str, List[str]] mapping term IDs to their respective synonym lists 653 """ 654 return {term_id: self.get_term_synonyms(term_id) for term_id in term_ids} 655 656 def get_term_id_by_label(self, term_label: str, ontology_name: str) -> Optional[str]: 657 """ 658 Fetch the ontology term ID from a given human-readable label. Filters by ontology_name. Raises ValueError if 659 ontology_name is not a supported ontology. 660 661 Returns None if term ID is not valid member of a supported ontology. 662 663 Example 664 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 665 >>> ontology_parser = OntologyParser() 666 >>> ontology_parser.get_term_id_by_label("neural crest derived fibroblast", "CL") 667 'CL:0000005' 668 669 :param term_label: str human-readable label to fetch term ID for 670 :param ontology_name: str name of ontology to search for term label in 671 :return: Optional[str] term IDs with that label, or None if the label is not found in the ontology 672 """ 673 ontology_term_label_to_id_map = self.get_term_label_to_id_map(ontology_name) 674 return ontology_term_label_to_id_map.get(term_label) 675 676 def get_bridge_term_id(self, term_id: str, cross_ontology: str) -> Optional[str]: 677 """ 678 For a given term ID, fetch the equivalent term ID from a given ontology. Only returns exact match if it exists. 679 680 If no applicable match is found, returns None. 681 682 Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology. 683 684 Example 685 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 686 >>> ontology_parser = OntologyParser() 687 >>> ontology_parser.get_bridge_term_id("FBbt:00000001", "UBERON") 688 'UBERON:0000468' 689 690 :param term_id: str ontology term to find equivalent term for 691 :param cross_ontology: str name of ontology to search for equivalent term in 692 :return: Optional[str] equivalent term ID from the cross_ontology 693 """ 694 if cross_ontology not in self.cxg_schema.cross_ontology_mappings: 695 raise ValueError( 696 f"{cross_ontology} is not in the set of supported cross ontology mappings " 697 f"{self.cxg_schema.cross_ontology_mappings}." 698 ) 699 ontology_name = self._parse_ontology_name(term_id) 700 cross_ontology_terms = self.cxg_schema.ontology(ontology_name)[term_id].get("cross_ontology_terms") 701 bridge_term_id: Optional[str] = None 702 if cross_ontology_terms: 703 bridge_term_id = cross_ontology_terms.get(cross_ontology) 704 return bridge_term_id 705 706 def get_closest_bridge_term_ids(self, term_id: str, cross_ontology: str) -> List[str]: 707 """ 708 For a given term ID, fetch the equivalent term ID from a given ontology. If match is found, 709 returns a list of 1 with the exact match. If no exact match is found, traverses the ancestors 710 of the term for the closest match. 711 712 If no applicable match is found, returns an empty list. 713 714 If multiple ancestors of the same distance have matches, returns all possible closest matches. 715 716 Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology. 717 718 Example 719 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 720 >>> ontology_parser = OntologyParser() 721 >>> ontology_parser.get_closest_bridge_term_ids("FBbt:00000039", "UBERON") 722 ['UBERON:0000476', 'UBERON:0000920'] 723 724 :param term_id: str ontology term to find closest term for 725 :param cross_ontology: str name of ontology to search for closest term in 726 :return: List[str] list of closest term IDs from the cross_ontology 727 """ 728 closest_bridge_terms: List[str] = [] 729 terms_to_match = [term_id] 730 while terms_to_match and not closest_bridge_terms: 731 for term in terms_to_match: 732 if closest_bridge_term := self.get_bridge_term_id(term, cross_ontology): 733 closest_bridge_terms.append(closest_bridge_term) 734 terms_to_match = [parent for child in terms_to_match for parent in self.get_term_parents(child)] 735 return closest_bridge_terms
10class OntologyParser: 11 """ 12 An object to parse ontology term metadata from ontologies corresponding to a given CellxGene Schema Version. 13 """ 14 15 cxg_schema: CXGSchema 16 """ CXGSchema object to fetch ontology metadata from """ 17 18 def __init__(self, schema_version: Optional[str] = None): 19 """ 20 Initialize an OntologyParser object with the ontology metadata corresponding to the given CellxGene schema 21 version. If not cached, it will make a network call to GitHub Release Assets to load in memory and 22 parse the corresponding ontology metadata. 23 24 :param schema_version: str version of the schema to load ontology metadata for. If not provided, the latest 25 is loaded. 26 """ 27 self.cxg_schema = CXGSchema(version=schema_version) if schema_version else CXGSchema() 28 self.term_label_to_id_map: Dict[str, Dict[str, str]] = { 29 ontology_name: dict() for ontology_name in self.cxg_schema.supported_ontologies 30 } 31 32 def get_term_label_to_id_map(self, ontology_name: str) -> Dict[str, str]: 33 """ 34 Fetch the mapping of term labels to term IDs for a given ontology. Caches generated maps by ontology_name. 35 36 Example 37 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 38 >>> ontology_parser = OntologyParser() 39 >>> ontology_parser.get_term_label_to_id_map("CL") # doctest: +SKIP 40 {'Label A': 'CL:0000000', ... } 41 42 :param ontology_name: str name of ontology to get map of term labels to term IDs 43 """ 44 supported_ontology_name: Optional[str] = self._get_supported_ontology_name(ontology_name) 45 if not supported_ontology_name: 46 raise ValueError(f"{supported_ontology_name} is not a supported ontology, its metadata cannot be fetched.") 47 48 if self.term_label_to_id_map[supported_ontology_name]: 49 return self.term_label_to_id_map[supported_ontology_name].copy() 50 51 for term_id, term_metadata in self.cxg_schema.ontology(supported_ontology_name).items(): 52 self.term_label_to_id_map[supported_ontology_name][term_metadata["label"]] = term_id 53 54 return self.term_label_to_id_map[supported_ontology_name].copy() 55 56 def _parse_ontology_name(self, term_id: str) -> str: 57 """ 58 Parse the ontology name from a given term ID. If the term ID does not conform to the expected term format or 59 is not from an ontology supported by cellxgene-ontology-guide, raise a ValueError. 60 61 :param term_id: str ontology term to parse 62 :return: str name of ontology that term belongs to 63 """ 64 # use names groups 65 patterns = [r"([A-Za-z]+):[A-Za-z0-9]+", r"([A-Za-z]+)_[A-Za-z0-9]+"] 66 pattern = "|".join(patterns) 67 match = re.match(pattern, term_id) 68 if not match: 69 raise ValueError(f"{term_id} does not conform to expected regex pattern {pattern} and cannot be queried.") 70 71 ontology_term_prefix = match.group(1) or match.group(2) 72 ontology_name: Optional[str] = self._get_supported_ontology_name(ontology_term_prefix) 73 if not ontology_name: 74 raise ValueError(f"{term_id} is not part of a supported ontology, its metadata cannot be fetched.") 75 76 id_separator = self.cxg_schema.supported_ontologies.get(ontology_name).get("id_separator", ":") 77 if id_separator not in term_id: 78 raise ValueError(f"{term_id} does not conform to expected format for {ontology_term_prefix} terms.") 79 return ontology_name 80 81 def _get_supported_ontology_name(self, ontology_term_prefix: str) -> Optional[str]: 82 """ 83 Get the source ontology name for a given ontology term prefix, if it is supported by the CxG schema. 84 85 If ontology_term_prefix is directly supported by the CxG schema, returns ontology_term_prefix. 86 If ontology_term_prefix is supported as an import from another ontology, returns the name of the source ontology 87 it is imported in. 88 Otherwise, returns None. 89 90 :param ontology_term_prefix: str ontology term prefix to check 91 :return: str name of ontology that term belongs to, or None if it is not directly supported nor imported in 92 a supported ontology in the CxG schema. 93 """ 94 if ontology_term_prefix in self.cxg_schema.supported_ontologies: 95 return ontology_term_prefix 96 # Case-insensitive lookup (e.g. "uniprot" prefix matches "UniProt" key) 97 lower_prefix = ontology_term_prefix.lower() 98 for key in self.cxg_schema.supported_ontologies: 99 if key.lower() == lower_prefix: 100 return str(key) 101 supported_ontology_name: Optional[str] = self.cxg_schema.imported_ontologies.get(ontology_term_prefix) 102 return supported_ontology_name 103 104 def is_valid_term_id(self, term_id: str, ontology: Optional[str] = None) -> bool: 105 """ 106 Check if an ontology term ID is valid and defined in a supported ontology. If deprecated but defined 107 in the ontology, it is considered valid. Optionally, specify an ontology to check against, and determine 108 if the term is defined in that particular ontology. Otherwise, checks if term is valid in any supported ontology 109 110 :param term_id: str ontology term to check 111 :param ontology: str name of ontology to check against 112 :return: boolean flag indicating whether the term is supported 113 """ 114 try: 115 ontology_name = self._parse_ontology_name(term_id) 116 if ontology and ontology_name != ontology: 117 return False 118 if term_id in self.cxg_schema.ontology(ontology_name): 119 return True 120 except ValueError: 121 return False 122 return False 123 124 def get_term_ancestors(self, term_id: str, include_self: bool = False) -> List[str]: 125 """ 126 Get the ancestor ontology terms for a given term. If include_self is True, the term itself will be included as 127 an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology. 128 129 Example 130 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 131 >>> ontology_parser = OntologyParser() 132 >>> ontology_parser.get_term_ancestors("CL:0000005") # doctest: +SKIP 133 ['CL:0000000', 'CL:0000057', ... 134 135 :param term_id: str ontology term to find ancestors for 136 :param include_self: boolean flag to include the term itself as an ancestor 137 :return: flattened List[str] of ancestor terms 138 """ 139 if term_id in VALID_NON_ONTOLOGY_TERMS: 140 return [] 141 ontology_name = self._parse_ontology_name(term_id) 142 ancestors = list(self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"].keys()) 143 return ancestors + [term_id] if include_self else ancestors 144 145 def map_term_ancestors(self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]: 146 """ 147 Get the ancestor ontology terms for each term in a list. If include_self is True, the term itself will be 148 included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology. 149 150 Example 151 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 152 >>> ontology_parser = OntologyParser() 153 >>> ontology_parser.map_term_ancestors(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP 154 { 155 'CL:0000003': ['CL:0000003'], 156 'CL:0000005': ['CL:0000005', 'CL:0000000', ...] 157 } 158 159 :param term_ids: list of str ontology terms to find ancestors for 160 :param include_self: boolean flag to include the term itself as an ancestor 161 :return: Dictionary mapping str term IDs to their respective flattened List[str] of ancestor terms. Maps to 162 empty 163 list if there are no ancestors. 164 """ 165 return {term_id: self.get_term_ancestors(term_id, include_self) for term_id in term_ids} 166 167 def get_term_ancestors_with_distances(self, term_id: str, include_self: bool = False) -> Dict[str, int]: 168 """ 169 Get the ancestor ontology terms for a given term, and their distance from the term_id. If include_self is True, 170 the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a 171 supported ontology. 172 173 Example 174 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 175 >>> ontology_parser = OntologyParser() 176 >>> ontology_parser.get_term_ancestors_with_distances("CL:0000005") # doctest: +SKIP 177 {'CL:0000057': 1, 'CL:0002320': 2, 'CL:0000000': 3} 178 179 :param term_id: str ontology term to find ancestors for 180 :param include_self: boolean flag to include the term itself as an ancestor 181 :return: Dict[str, int] map of ancestor terms and their respective distances from the term_id 182 """ 183 if term_id in VALID_NON_ONTOLOGY_TERMS: 184 return {} 185 ontology_name = self._parse_ontology_name(term_id) 186 ancestors: Dict[str, int] = self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"].copy() 187 return ancestors | {term_id: 0} if include_self else ancestors 188 189 def map_term_ancestors_with_distances( 190 self, term_ids: Iterable[str], include_self: bool = False 191 ) -> Dict[str, Dict[str, int]]: 192 """ 193 Get the ancestor ontology terms for each term in a list, and their distance from the term_id. If include_self is 194 True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a 195 supported ontology. 196 197 Example 198 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 199 >>> ontology_parser = OntologyParser() 200 >>> ontology_parser.map_term_ancestors_with_distances(["CL:0000003", "CL:0000005"], include_self=True) 201 {'CL:0000003': {'CL:0000003': 0}, 'CL:0000005': {'CL:0000057': 1, 'CL:0000499': 2, 'CL:0002320': 3, 'CL:0000255': 4, 'CL:0000000': 5, 'CL:0000005': 0}} 202 203 :param term_ids: list of str ontology terms to find ancestors for 204 :param include_self: boolean flag to include the term itself as an ancestor 205 :return: Dictionary mapping str term IDs to their respective Dict[str, int] map of ancestor terms and their 206 respective distances from the term_id 207 """ 208 return {term_id: self.get_term_ancestors_with_distances(term_id, include_self) for term_id in term_ids} 209 210 def get_term_parents(self, term_id: str) -> List[str]: 211 """ 212 Get the direct parent ontology terms for a given term. Raises ValueError if the term ID is not valid member of 213 a supported ontology. 214 215 Example 216 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 217 >>> ontology_parser = OntologyParser() 218 >>> ontology_parser.get_term_parents("CL:0000101") 219 ['CL:0000526'] 220 221 :param term_id: str ontology term to find parents for 222 :return: List[str] of parent terms 223 """ 224 if term_id in VALID_NON_ONTOLOGY_TERMS: 225 return [] 226 ontology_name = self._parse_ontology_name(term_id) 227 ancestors: Dict[str, int] = self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"] 228 parents: List[str] = [ancestor for ancestor, distance in ancestors.items() if distance == 1] 229 return parents 230 231 def get_distance_between_terms(self, term_id_1: str, term_id_2: str) -> int: 232 """ 233 Get the distance between two ontology terms. The distance is defined as the number of edges between the 234 two terms. Terms must be from the same ontology. Returns -1 if terms are disjoint. 235 Raises ValueError if term IDs are not valid members of a supported ontology. 236 237 :param term_id_1: str ontology term to find distance for 238 :param term_id_2: str ontology term to find distance for 239 :return: int distance between the two terms, measured in number of edges between their shortest path. 240 """ 241 lcas = self.get_lowest_common_ancestors(term_id_1, term_id_2) 242 if not lcas: 243 return -1 244 return int( 245 self.get_term_ancestors_with_distances(term_id_1, include_self=True)[lcas[0]] 246 + self.get_term_ancestors_with_distances(term_id_2, include_self=True)[lcas[0]] 247 ) 248 249 def get_lowest_common_ancestors(self, term_id_1: str, term_id_2: str) -> List[str]: 250 """ 251 Get the lowest common ancestors between two ontology terms that is from the given ontology. 252 Terms must be from the same ontology. Ontologies are DAGs, so there may be multiple lowest common ancestors. 253 Raises ValueError if term IDs are not valid members of a supported ontology. 254 255 :param term_id_1: str ontology term to find LCA for 256 :param term_id_2: str ontology term to find LCA for 257 :return: str term ID of the lowest common ancestor term 258 """ 259 # include path to term itself 260 ontology = self._parse_ontology_name(term_id_1) 261 if ontology != self._parse_ontology_name(term_id_2): 262 return [] 263 ancestors_1 = self.get_term_ancestors_with_distances(term_id_1, include_self=True) 264 ancestors_2 = self.get_term_ancestors_with_distances(term_id_2, include_self=True) 265 common_ancestors = set(ancestors_1.keys()) & set(ancestors_2.keys()) 266 min_sum_distances = float("inf") 267 for ancestors in common_ancestors: 268 sum_distances = ancestors_1[ancestors] + ancestors_2[ancestors] 269 if sum_distances < min_sum_distances: 270 min_sum_distances = sum_distances 271 return [ 272 ancestor 273 for ancestor in common_ancestors 274 if ancestors_1[ancestor] + ancestors_2[ancestor] == min_sum_distances 275 ] 276 277 def get_high_level_terms(self, term_id: str, high_level_terms: List[str]) -> List[str]: 278 """ 279 Get the high-level ontology terms for a given term. High-level terms are defined as the ancestors of the term 280 that are part of the high-level ontology terms supported by cellxgene-ontology-guide. If more than 1 281 high_level_term is matched, the returned list of matches preserves the order of the input high_level_terms list. 282 Raises ValueError if term ID is not valid member of a supported ontology. 283 284 Example 285 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 286 >>> ontology_parser = OntologyParser() 287 >>> ontology_parser.get_high_level_terms("CL:0000005", ["CL:0000000", "CL:0000001"]) 288 ['CL:0000000'] 289 290 :param term_id: str ontology term to find high-level terms for 291 :param high_level_terms: list of str ontology terms to check for ancestry to term_id 292 :return: List[str] of high-level terms that the term is a descendant of 293 """ 294 if term_id in VALID_NON_ONTOLOGY_TERMS: 295 return [] 296 ancestors = self.get_term_ancestors(term_id, include_self=True) 297 return [high_level_term for high_level_term in high_level_terms if high_level_term in ancestors] 298 299 def map_high_level_terms(self, term_ids: List[str], high_level_terms: List[str]) -> Dict[str, List[str]]: 300 """ 301 Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with 302 format 303 304 {"CL:0000003": ["CL:0000000", ...], "CL:0000005": ["CL:0000000", ...]} 305 306 Where each term_id is mapped to a List[str] of high-level terms that it is a descendant of. Includes self 307 as a descendant. Raises ValueError if term ID is not valid member of a supported ontology. 308 309 :param term_ids: list of str ontology terms to map high level terms for 310 :param high_level_terms: list of str ontology terms to be mapped to descendant term_ids 311 :return: Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list. 312 Each key maps to empty list if there are no ancestors among the provided input. 313 """ 314 return {term_id: self.get_high_level_terms(term_id, high_level_terms) for term_id in term_ids} 315 316 def get_highest_level_term(self, term_id: str, high_level_terms: List[str]) -> Union[str, None]: 317 """ 318 Get the highest level ontology term for a given term. The highest level term is defined as the ancestor of the 319 term that is part of the high-level ontology terms supported by cellxgene-ontology-guide. Raises ValueError 320 if term ID is not valid member of a supported ontology. 321 322 Example 323 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 324 >>> ontology_parser = OntologyParser() 325 >>> ontology_parser.get_highest_level_term("CL:0000005", ["CL:0000000", "CL:0000001"]) 326 'CL:0000000' 327 328 :param term_id: str ontology term to find highest level term for 329 :param high_level_terms: list of str ontology terms to check for ancestry to term_id 330 :return: str highest level term that the term is a descendant of, or None if it is not a descendant of any 331 high-level terms 332 """ 333 high_level_terms = self.get_high_level_terms(term_id, high_level_terms) 334 term_ancestors_and_distances = self.get_term_ancestors_with_distances(term_id, include_self=True) 335 if not high_level_terms: 336 return None 337 return max(high_level_terms, key=lambda high_level_term: term_ancestors_and_distances[high_level_term]) 338 339 def map_highest_level_term(self, term_ids: List[str], high_level_terms: List[str]) -> Dict[str, Union[str, None]]: 340 """ 341 Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with 342 format 343 344 {"CL:0000003": "CL:0000000", "CL:0000005": "CL:0000000"} 345 346 Where each term_id is mapped to the highest level term that it is a descendant of, from the list provided. 347 Includes term itself as a descendant. Maps to None if term_id does not map to any high level terms among the 348 provided input. Raises ValueError if term ID is not valid member of a supported ontology. 349 350 :param term_ids: list of str ontology terms to map high level terms for 351 :param high_level_terms: list of str ontology terms that can be mapped to descendant term_ids 352 :return: Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list. 353 Each key maps to empty list if there are no ancestors among the provided input. 354 """ 355 return {term_id: self.get_highest_level_term(term_id, high_level_terms) for term_id in term_ids} 356 357 def get_term_descendants(self, term_id: str, include_self: bool = False) -> List[str]: 358 """ 359 Get the descendant ontology terms for a given term. If include_self is True, the term itself will be included as 360 a descendant. Raises ValueError if term ID is not valid member of a supported ontology. 361 362 Example 363 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 364 >>> ontology_parser = OntologyParser() 365 >>> ontology_parser.get_term_descendants("CL:0000005") # doctest: +SKIP 366 ['CL:0002363'] 367 368 :param term_id: str ontology term to find descendants for 369 :param include_self: boolean flag to include the term itself as a descendant 370 :return: List[str] of descendant terms 371 """ 372 if term_id in VALID_NON_ONTOLOGY_TERMS: 373 return [] 374 ontology_name = self._parse_ontology_name(term_id) 375 descendants = [term_id] if include_self else [] 376 for candidate_descendant, candidate_metadata in self.cxg_schema.ontology(ontology_name).items(): 377 ancestors = candidate_metadata["ancestors"].keys() 378 if term_id in ancestors: 379 descendants.append(candidate_descendant) 380 return descendants 381 382 def map_term_descendants(self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]: 383 """ 384 Get the descendant ontology terms for each term in a list. If include_self is True, the term itself will be 385 included as a descendant. Raises ValueError if term ID is not valid member of a supported ontology. 386 387 Example 388 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 389 >>> ontology_parser = OntologyParser() 390 >>> ontology_parser.map_term_descendants(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP 391 { 392 'CL:0000003': ['CL:0000003', ...], 393 'CL:0000005': ['CL:0000005', 'CL:0002363', ...] 394 } 395 396 :param term_ids: list of str ontology terms to find descendants for 397 :param include_self: boolean flag to include the term itself as an descendant 398 :return: Dictionary mapping str term IDs to their respective flattened List[str] of descendant terms. Maps to 399 empty list if there are no descendants. 400 """ 401 descendants_dict: Dict[str, List[str]] = dict() 402 ontology_names = set() 403 for term_id in term_ids: 404 if term_id in VALID_NON_ONTOLOGY_TERMS: 405 descendants_dict[term_id] = [] 406 continue 407 ontology_name = self._parse_ontology_name(term_id) 408 descendants_dict[term_id] = [term_id] if include_self else [] 409 ontology_names.add(ontology_name) 410 411 for ontology in ontology_names: 412 for candidate_descendant, candidate_metadata in self.cxg_schema.ontology(ontology).items(): 413 for ancestor_id in descendants_dict: 414 ancestors = candidate_metadata["ancestors"].keys() 415 if ancestor_id in ancestors: 416 descendants_dict[ancestor_id].append(candidate_descendant) 417 418 return descendants_dict 419 420 def get_term_children(self, term_id: str) -> List[str]: 421 """ 422 Get the direct children ontology terms for a given term. Raises ValueError if term ID is not valid member of a 423 supported ontology. 424 425 Example 426 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 427 >>> ontology_parser = OntologyParser() 428 >>> ontology_parser.get_term_children("CL:0000526") 429 ['CL:0000101', 'CL:4042034'] 430 431 :param term_id: str ontology term to find children for 432 :return: List[str] of children terms 433 """ 434 if term_id in VALID_NON_ONTOLOGY_TERMS: 435 return [] 436 ontology_name = self._parse_ontology_name(term_id) 437 children = [] 438 for candidate_child, candidate_metadata in self.cxg_schema.ontology(ontology_name).items(): 439 for ancestor, distance in candidate_metadata["ancestors"].items(): 440 if ancestor == term_id and distance == 1: 441 children.append(candidate_child) 442 return children 443 444 def get_term_graph(self, term_id: str) -> OntologyNode: 445 """ 446 Get the DAG of OntologyNode relationships, with the input term as the root node. Only includes terms from the 447 same ontology as the root term ID. Raises ValueError if term ID is not valid member of a supported ontology. 448 449 Example 450 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 451 >>> ontology_parser = OntologyParser() 452 >>> root_node = ontology_parser.get_term_graph("CL:0000000") 453 >>> root_node.term_id 454 'CL:0000000' 455 >>> root_node.to_dict() # doctest: +SKIP 456 { 457 "term_id": "CL:0000000", 458 "name": "cell A", 459 "children": [ 460 { 461 "term_id": "CL:0000001", 462 "name": "cell B", 463 "children": [...], 464 }, 465 { 466 "term_id": "CL:0000002", 467 "name": "cell C", 468 "children": [...], 469 }, 470 ... 471 ] 472 } 473 >>> root_node.term_counter # doctest: +SKIP 474 Counter({'CL:0002058': 48, 'CL:0002471': 48, ... 475 476 :param term_id: str ontology term to build subtree for 477 :return: OntologyNode representation of graph with term_id as root. 478 """ 479 term_label = self.get_term_label(term_id) 480 root = OntologyNode(term_id, term_label) 481 for child_term_id in self.get_term_children(term_id): 482 root.add_child(self.get_term_graph(child_term_id)) 483 return root 484 485 def is_term_deprecated(self, term_id: str) -> bool: 486 """ 487 Check if an ontology term is deprecated. Raises ValueError if term ID is not valid member of a supported 488 ontology. 489 490 Example 491 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 492 >>> ontology_parser = OntologyParser() 493 >>> ontology_parser.is_term_deprecated("CL:0000003") 494 True 495 496 :param term_id: str ontology term to check for deprecation 497 :return: boolean flag indicating whether the term is deprecated 498 """ 499 if term_id in VALID_NON_ONTOLOGY_TERMS: 500 return False 501 ontology_name = self._parse_ontology_name(term_id) 502 is_deprecated: bool = self.cxg_schema.ontology(ontology_name)[term_id].get("deprecated") 503 return is_deprecated 504 505 def get_term_replacement(self, term_id: str) -> Union[str, None]: 506 """ 507 Fetch the replacement term for a deprecated ontology term, if a replacement exists. Return None otherwise. 508 Raises ValueError if term ID is not valid member of a supported ontology. 509 510 Example 511 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 512 >>> ontology_parser = OntologyParser() 513 >>> ontology_parser.get_term_replacement("CL:0000003") 514 'CL:0000000' 515 516 :param term_id: str ontology term to check a replacement term for 517 :return: replacement str term ID if it exists, None otherwise 518 """ 519 if term_id in VALID_NON_ONTOLOGY_TERMS: 520 return None 521 ontology_name = self._parse_ontology_name(term_id) 522 replaced_by: str = self.cxg_schema.ontology(ontology_name)[term_id].get("replaced_by") 523 return replaced_by if replaced_by else None 524 525 def get_term_metadata(self, term_id: str) -> Dict[str, Any]: 526 """ 527 Fetch metadata for a given ontology term. Returns a dict with format 528 529 {"comments": ["...", ...], "term_tracker": "...", "consider": ["...", ...]} 530 531 Comments maps to List[str] of ontology curator comments 532 Term Tracker maps to a str url where there is discussion around this term's curation (or deprecation). 533 Consider maps to List[str] of alternate ontology terms to consider using instead of this term 534 535 All keys map to None if no metadata of that type is present. Raises ValueError if term ID is not valid member 536 of a supported ontology. 537 538 :param term_id: str ontology term to fetch metadata for 539 :return: Dict with keys 'Comments', 'Term Tracker', and 'Consider' containing associated metadata. 540 """ 541 if term_id in VALID_NON_ONTOLOGY_TERMS: 542 return {"comments": None, "term_tracker": None, "consider": None} 543 ontology_name = self._parse_ontology_name(term_id) 544 return { 545 key: self.cxg_schema.ontology(ontology_name)[term_id].get(key, None) 546 for key in {"comments", "term_tracker", "consider"} 547 } 548 549 def get_term_label(self, term_id: str) -> str: 550 """ 551 Fetch the human-readable label for a given ontology term. Raises ValueError if term ID is not valid member of a 552 supported ontology. 553 554 Example 555 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 556 >>> ontology_parser = OntologyParser() 557 >>> ontology_parser.get_term_label("CL:0000005") 558 'neural crest derived fibroblast' 559 560 :param term_id: str ontology term to fetch label for 561 :return: str human-readable label for the term 562 """ 563 if term_id in VALID_NON_ONTOLOGY_TERMS: 564 return term_id 565 ontology_name = self._parse_ontology_name(term_id) 566 label: str = self.cxg_schema.ontology(ontology_name)[term_id]["label"] 567 return label 568 569 def map_term_labels(self, term_ids: Iterable[str]) -> Dict[str, str]: 570 """ 571 Fetch the human-readable label for a given list of ontology terms. Raises ValueError if term ID is not valid 572 member of a supported ontology. 573 574 Example 575 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 576 >>> ontology_parser = OntologyParser() 577 >>> ontology_parser.map_term_labels(["CL:0000005", "CL:0000003"]) 578 {'CL:0000005': 'neural crest derived fibroblast', 'CL:0000003': 'obsolete native cell'} 579 580 :param term_ids: list of str ontology terms to fetch label for 581 :return: Dict[str, str] mapping term IDs to their respective human-readable labels 582 """ 583 return {term_id: self.get_term_label(term_id) for term_id in term_ids} 584 585 def get_term_description(self, term_id: str) -> Optional[str]: 586 """ 587 Fetch the description for a given ontology term. Raises ValueError if term ID is not valid member of a 588 supported ontology. 589 590 Example 591 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 592 >>> ontology_parser = OntologyParser() 593 >>> ontology_parser.get_term_description("CL:0000005") 594 'Any fibroblast that is derived from the neural crest.' 595 596 :param term_id: str ontology term to fetch description for 597 :return: str description for the term 598 """ 599 if term_id in VALID_NON_ONTOLOGY_TERMS: 600 return term_id 601 ontology_name = self._parse_ontology_name(term_id) 602 description: Optional[str] = self.cxg_schema.ontology(ontology_name)[term_id].get("description", None) 603 return description 604 605 def map_term_descriptions(self, term_ids: List[str]) -> Dict[str, Optional[str]]: 606 """ 607 Fetch the descriptions for a given list of ontology terms. Raises ValueError if term ID is not valid member of 608 a supported ontology. 609 610 Example 611 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 612 >>> ontology_parser = OntologyParser() 613 >>> ontology_parser.map_term_descriptions(["CL:0000005", "CL:0000146"]) 614 {'CL:0000005': 'Any fibroblast that is derived from the neural crest.', 'CL:0000146': None} 615 616 :param term_ids: list of str ontology terms to fetch descriptions for 617 :return: Dict[str, str] mapping term IDs to their respective descriptions 618 """ 619 return {term_id: self.get_term_description(term_id) for term_id in term_ids} 620 621 def get_term_synonyms(self, term_id: str) -> List[str]: 622 """ 623 Fetch a list of synonym labels for a given ontology term. Returns empty list if no synonyms found. 624 Raises ValueError if term ID is not valid member of a supported ontology. 625 626 Example 627 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 628 >>> ontology_parser = OntologyParser() 629 >>> ontology_parser.get_term_synonyms("CL:0000019") 630 ['sperm cell', 'spermatozoid', 'spermatozoon'] 631 632 :param term_id: str ontology term to fetch synonyms for 633 :return: List[str] synonyms for the term 634 """ 635 if term_id in VALID_NON_ONTOLOGY_TERMS: 636 return [] 637 ontology_name = self._parse_ontology_name(term_id) 638 synonyms: List[str] = list(self.cxg_schema.ontology(ontology_name)[term_id].get("synonyms", [])) 639 return synonyms 640 641 def map_term_synonyms(self, term_ids: List[str]) -> Dict[str, List[str]]: 642 """ 643 Fetch the synonym labels for a given list of ontology terms. Raises ValueError if term ID is not valid member of 644 a supported ontology. 645 646 Example 647 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 648 >>> ontology_parser = OntologyParser() 649 >>> ontology_parser.map_term_synonyms(["CL:0000005", "CL:0000019"]) 650 {'CL:0000005': ['fibroblast neural crest derived'], 'CL:0000019': ['sperm cell', 'spermatozoid', 'spermatozoon']} 651 652 :param term_ids: list of str ontology terms to fetch synonyms for 653 :return: Dict[str, List[str]] mapping term IDs to their respective synonym lists 654 """ 655 return {term_id: self.get_term_synonyms(term_id) for term_id in term_ids} 656 657 def get_term_id_by_label(self, term_label: str, ontology_name: str) -> Optional[str]: 658 """ 659 Fetch the ontology term ID from a given human-readable label. Filters by ontology_name. Raises ValueError if 660 ontology_name is not a supported ontology. 661 662 Returns None if term ID is not valid member of a supported ontology. 663 664 Example 665 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 666 >>> ontology_parser = OntologyParser() 667 >>> ontology_parser.get_term_id_by_label("neural crest derived fibroblast", "CL") 668 'CL:0000005' 669 670 :param term_label: str human-readable label to fetch term ID for 671 :param ontology_name: str name of ontology to search for term label in 672 :return: Optional[str] term IDs with that label, or None if the label is not found in the ontology 673 """ 674 ontology_term_label_to_id_map = self.get_term_label_to_id_map(ontology_name) 675 return ontology_term_label_to_id_map.get(term_label) 676 677 def get_bridge_term_id(self, term_id: str, cross_ontology: str) -> Optional[str]: 678 """ 679 For a given term ID, fetch the equivalent term ID from a given ontology. Only returns exact match if it exists. 680 681 If no applicable match is found, returns None. 682 683 Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology. 684 685 Example 686 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 687 >>> ontology_parser = OntologyParser() 688 >>> ontology_parser.get_bridge_term_id("FBbt:00000001", "UBERON") 689 'UBERON:0000468' 690 691 :param term_id: str ontology term to find equivalent term for 692 :param cross_ontology: str name of ontology to search for equivalent term in 693 :return: Optional[str] equivalent term ID from the cross_ontology 694 """ 695 if cross_ontology not in self.cxg_schema.cross_ontology_mappings: 696 raise ValueError( 697 f"{cross_ontology} is not in the set of supported cross ontology mappings " 698 f"{self.cxg_schema.cross_ontology_mappings}." 699 ) 700 ontology_name = self._parse_ontology_name(term_id) 701 cross_ontology_terms = self.cxg_schema.ontology(ontology_name)[term_id].get("cross_ontology_terms") 702 bridge_term_id: Optional[str] = None 703 if cross_ontology_terms: 704 bridge_term_id = cross_ontology_terms.get(cross_ontology) 705 return bridge_term_id 706 707 def get_closest_bridge_term_ids(self, term_id: str, cross_ontology: str) -> List[str]: 708 """ 709 For a given term ID, fetch the equivalent term ID from a given ontology. If match is found, 710 returns a list of 1 with the exact match. If no exact match is found, traverses the ancestors 711 of the term for the closest match. 712 713 If no applicable match is found, returns an empty list. 714 715 If multiple ancestors of the same distance have matches, returns all possible closest matches. 716 717 Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology. 718 719 Example 720 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 721 >>> ontology_parser = OntologyParser() 722 >>> ontology_parser.get_closest_bridge_term_ids("FBbt:00000039", "UBERON") 723 ['UBERON:0000476', 'UBERON:0000920'] 724 725 :param term_id: str ontology term to find closest term for 726 :param cross_ontology: str name of ontology to search for closest term in 727 :return: List[str] list of closest term IDs from the cross_ontology 728 """ 729 closest_bridge_terms: List[str] = [] 730 terms_to_match = [term_id] 731 while terms_to_match and not closest_bridge_terms: 732 for term in terms_to_match: 733 if closest_bridge_term := self.get_bridge_term_id(term, cross_ontology): 734 closest_bridge_terms.append(closest_bridge_term) 735 terms_to_match = [parent for child in terms_to_match for parent in self.get_term_parents(child)] 736 return closest_bridge_terms
An object to parse ontology term metadata from ontologies corresponding to a given CellxGene Schema Version.
18 def __init__(self, schema_version: Optional[str] = None): 19 """ 20 Initialize an OntologyParser object with the ontology metadata corresponding to the given CellxGene schema 21 version. If not cached, it will make a network call to GitHub Release Assets to load in memory and 22 parse the corresponding ontology metadata. 23 24 :param schema_version: str version of the schema to load ontology metadata for. If not provided, the latest 25 is loaded. 26 """ 27 self.cxg_schema = CXGSchema(version=schema_version) if schema_version else CXGSchema() 28 self.term_label_to_id_map: Dict[str, Dict[str, str]] = { 29 ontology_name: dict() for ontology_name in self.cxg_schema.supported_ontologies 30 }
Initialize an OntologyParser object with the ontology metadata corresponding to the given CellxGene schema version. If not cached, it will make a network call to GitHub Release Assets to load in memory and parse the corresponding ontology metadata.
Parameters
- schema_version: str version of the schema to load ontology metadata for. If not provided, the latest is loaded.
32 def get_term_label_to_id_map(self, ontology_name: str) -> Dict[str, str]: 33 """ 34 Fetch the mapping of term labels to term IDs for a given ontology. Caches generated maps by ontology_name. 35 36 Example 37 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 38 >>> ontology_parser = OntologyParser() 39 >>> ontology_parser.get_term_label_to_id_map("CL") # doctest: +SKIP 40 {'Label A': 'CL:0000000', ... } 41 42 :param ontology_name: str name of ontology to get map of term labels to term IDs 43 """ 44 supported_ontology_name: Optional[str] = self._get_supported_ontology_name(ontology_name) 45 if not supported_ontology_name: 46 raise ValueError(f"{supported_ontology_name} is not a supported ontology, its metadata cannot be fetched.") 47 48 if self.term_label_to_id_map[supported_ontology_name]: 49 return self.term_label_to_id_map[supported_ontology_name].copy() 50 51 for term_id, term_metadata in self.cxg_schema.ontology(supported_ontology_name).items(): 52 self.term_label_to_id_map[supported_ontology_name][term_metadata["label"]] = term_id 53 54 return self.term_label_to_id_map[supported_ontology_name].copy()
Fetch the mapping of term labels to term IDs for a given ontology. Caches generated maps by ontology_name.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_label_to_id_map("CL") # doctest: +SKIP
{'Label A': 'CL:0000000', ... }
Parameters
- ontology_name: str name of ontology to get map of term labels to term IDs
104 def is_valid_term_id(self, term_id: str, ontology: Optional[str] = None) -> bool: 105 """ 106 Check if an ontology term ID is valid and defined in a supported ontology. If deprecated but defined 107 in the ontology, it is considered valid. Optionally, specify an ontology to check against, and determine 108 if the term is defined in that particular ontology. Otherwise, checks if term is valid in any supported ontology 109 110 :param term_id: str ontology term to check 111 :param ontology: str name of ontology to check against 112 :return: boolean flag indicating whether the term is supported 113 """ 114 try: 115 ontology_name = self._parse_ontology_name(term_id) 116 if ontology and ontology_name != ontology: 117 return False 118 if term_id in self.cxg_schema.ontology(ontology_name): 119 return True 120 except ValueError: 121 return False 122 return False
Check if an ontology term ID is valid and defined in a supported ontology. If deprecated but defined in the ontology, it is considered valid. Optionally, specify an ontology to check against, and determine if the term is defined in that particular ontology. Otherwise, checks if term is valid in any supported ontology
Parameters
- term_id: str ontology term to check
- ontology: str name of ontology to check against
Returns
boolean flag indicating whether the term is supported
124 def get_term_ancestors(self, term_id: str, include_self: bool = False) -> List[str]: 125 """ 126 Get the ancestor ontology terms for a given term. If include_self is True, the term itself will be included as 127 an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology. 128 129 Example 130 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 131 >>> ontology_parser = OntologyParser() 132 >>> ontology_parser.get_term_ancestors("CL:0000005") # doctest: +SKIP 133 ['CL:0000000', 'CL:0000057', ... 134 135 :param term_id: str ontology term to find ancestors for 136 :param include_self: boolean flag to include the term itself as an ancestor 137 :return: flattened List[str] of ancestor terms 138 """ 139 if term_id in VALID_NON_ONTOLOGY_TERMS: 140 return [] 141 ontology_name = self._parse_ontology_name(term_id) 142 ancestors = list(self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"].keys()) 143 return ancestors + [term_id] if include_self else ancestors
Get the ancestor ontology terms for a given term. If include_self is True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_ancestors("CL:0000005") # doctest: +SKIP
['CL:0000000', 'CL:0000057', ...
Parameters
- term_id: str ontology term to find ancestors for
- include_self: boolean flag to include the term itself as an ancestor
Returns
flattened List[str] of ancestor terms
145 def map_term_ancestors(self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]: 146 """ 147 Get the ancestor ontology terms for each term in a list. If include_self is True, the term itself will be 148 included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology. 149 150 Example 151 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 152 >>> ontology_parser = OntologyParser() 153 >>> ontology_parser.map_term_ancestors(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP 154 { 155 'CL:0000003': ['CL:0000003'], 156 'CL:0000005': ['CL:0000005', 'CL:0000000', ...] 157 } 158 159 :param term_ids: list of str ontology terms to find ancestors for 160 :param include_self: boolean flag to include the term itself as an ancestor 161 :return: Dictionary mapping str term IDs to their respective flattened List[str] of ancestor terms. Maps to 162 empty 163 list if there are no ancestors. 164 """ 165 return {term_id: self.get_term_ancestors(term_id, include_self) for term_id in term_ids}
Get the ancestor ontology terms for each term in a list. If include_self is True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.map_term_ancestors(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP
{
'CL:0000003': ['CL:0000003'],
'CL:0000005': ['CL:0000005', 'CL:0000000', ...]
}
Parameters
- term_ids: list of str ontology terms to find ancestors for
- include_self: boolean flag to include the term itself as an ancestor
Returns
Dictionary mapping str term IDs to their respective flattened List[str] of ancestor terms. Maps to empty list if there are no ancestors.
167 def get_term_ancestors_with_distances(self, term_id: str, include_self: bool = False) -> Dict[str, int]: 168 """ 169 Get the ancestor ontology terms for a given term, and their distance from the term_id. If include_self is True, 170 the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a 171 supported ontology. 172 173 Example 174 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 175 >>> ontology_parser = OntologyParser() 176 >>> ontology_parser.get_term_ancestors_with_distances("CL:0000005") # doctest: +SKIP 177 {'CL:0000057': 1, 'CL:0002320': 2, 'CL:0000000': 3} 178 179 :param term_id: str ontology term to find ancestors for 180 :param include_self: boolean flag to include the term itself as an ancestor 181 :return: Dict[str, int] map of ancestor terms and their respective distances from the term_id 182 """ 183 if term_id in VALID_NON_ONTOLOGY_TERMS: 184 return {} 185 ontology_name = self._parse_ontology_name(term_id) 186 ancestors: Dict[str, int] = self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"].copy() 187 return ancestors | {term_id: 0} if include_self else ancestors
Get the ancestor ontology terms for a given term, and their distance from the term_id. If include_self is True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_ancestors_with_distances("CL:0000005") # doctest: +SKIP
{'CL:0000057': 1, 'CL:0002320': 2, 'CL:0000000': 3}
Parameters
- term_id: str ontology term to find ancestors for
- include_self: boolean flag to include the term itself as an ancestor
Returns
Dict[str, int] map of ancestor terms and their respective distances from the term_id
189 def map_term_ancestors_with_distances( 190 self, term_ids: Iterable[str], include_self: bool = False 191 ) -> Dict[str, Dict[str, int]]: 192 """ 193 Get the ancestor ontology terms for each term in a list, and their distance from the term_id. If include_self is 194 True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a 195 supported ontology. 196 197 Example 198 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 199 >>> ontology_parser = OntologyParser() 200 >>> ontology_parser.map_term_ancestors_with_distances(["CL:0000003", "CL:0000005"], include_self=True) 201 {'CL:0000003': {'CL:0000003': 0}, 'CL:0000005': {'CL:0000057': 1, 'CL:0000499': 2, 'CL:0002320': 3, 'CL:0000255': 4, 'CL:0000000': 5, 'CL:0000005': 0}} 202 203 :param term_ids: list of str ontology terms to find ancestors for 204 :param include_self: boolean flag to include the term itself as an ancestor 205 :return: Dictionary mapping str term IDs to their respective Dict[str, int] map of ancestor terms and their 206 respective distances from the term_id 207 """ 208 return {term_id: self.get_term_ancestors_with_distances(term_id, include_self) for term_id in term_ids}
Get the ancestor ontology terms for each term in a list, and their distance from the term_id. If include_self is True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.map_term_ancestors_with_distances(["CL:0000003", "CL:0000005"], include_self=True)
{'CL:0000003': {'CL:0000003': 0}, 'CL:0000005': {'CL:0000057': 1, 'CL:0000499': 2, 'CL:0002320': 3, 'CL:0000255': 4, 'CL:0000000': 5, 'CL:0000005': 0}}
Parameters
- term_ids: list of str ontology terms to find ancestors for
- include_self: boolean flag to include the term itself as an ancestor
Returns
Dictionary mapping str term IDs to their respective Dict[str, int] map of ancestor terms and their respective distances from the term_id
210 def get_term_parents(self, term_id: str) -> List[str]: 211 """ 212 Get the direct parent ontology terms for a given term. Raises ValueError if the term ID is not valid member of 213 a supported ontology. 214 215 Example 216 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 217 >>> ontology_parser = OntologyParser() 218 >>> ontology_parser.get_term_parents("CL:0000101") 219 ['CL:0000526'] 220 221 :param term_id: str ontology term to find parents for 222 :return: List[str] of parent terms 223 """ 224 if term_id in VALID_NON_ONTOLOGY_TERMS: 225 return [] 226 ontology_name = self._parse_ontology_name(term_id) 227 ancestors: Dict[str, int] = self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"] 228 parents: List[str] = [ancestor for ancestor, distance in ancestors.items() if distance == 1] 229 return parents
Get the direct parent ontology terms for a given term. Raises ValueError if the term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_parents("CL:0000101")
['CL:0000526']
Parameters
- term_id: str ontology term to find parents for
Returns
List[str] of parent terms
231 def get_distance_between_terms(self, term_id_1: str, term_id_2: str) -> int: 232 """ 233 Get the distance between two ontology terms. The distance is defined as the number of edges between the 234 two terms. Terms must be from the same ontology. Returns -1 if terms are disjoint. 235 Raises ValueError if term IDs are not valid members of a supported ontology. 236 237 :param term_id_1: str ontology term to find distance for 238 :param term_id_2: str ontology term to find distance for 239 :return: int distance between the two terms, measured in number of edges between their shortest path. 240 """ 241 lcas = self.get_lowest_common_ancestors(term_id_1, term_id_2) 242 if not lcas: 243 return -1 244 return int( 245 self.get_term_ancestors_with_distances(term_id_1, include_self=True)[lcas[0]] 246 + self.get_term_ancestors_with_distances(term_id_2, include_self=True)[lcas[0]] 247 )
Get the distance between two ontology terms. The distance is defined as the number of edges between the two terms. Terms must be from the same ontology. Returns -1 if terms are disjoint. Raises ValueError if term IDs are not valid members of a supported ontology.
Parameters
- term_id_1: str ontology term to find distance for
- term_id_2: str ontology term to find distance for
Returns
int distance between the two terms, measured in number of edges between their shortest path.
249 def get_lowest_common_ancestors(self, term_id_1: str, term_id_2: str) -> List[str]: 250 """ 251 Get the lowest common ancestors between two ontology terms that is from the given ontology. 252 Terms must be from the same ontology. Ontologies are DAGs, so there may be multiple lowest common ancestors. 253 Raises ValueError if term IDs are not valid members of a supported ontology. 254 255 :param term_id_1: str ontology term to find LCA for 256 :param term_id_2: str ontology term to find LCA for 257 :return: str term ID of the lowest common ancestor term 258 """ 259 # include path to term itself 260 ontology = self._parse_ontology_name(term_id_1) 261 if ontology != self._parse_ontology_name(term_id_2): 262 return [] 263 ancestors_1 = self.get_term_ancestors_with_distances(term_id_1, include_self=True) 264 ancestors_2 = self.get_term_ancestors_with_distances(term_id_2, include_self=True) 265 common_ancestors = set(ancestors_1.keys()) & set(ancestors_2.keys()) 266 min_sum_distances = float("inf") 267 for ancestors in common_ancestors: 268 sum_distances = ancestors_1[ancestors] + ancestors_2[ancestors] 269 if sum_distances < min_sum_distances: 270 min_sum_distances = sum_distances 271 return [ 272 ancestor 273 for ancestor in common_ancestors 274 if ancestors_1[ancestor] + ancestors_2[ancestor] == min_sum_distances 275 ]
Get the lowest common ancestors between two ontology terms that is from the given ontology. Terms must be from the same ontology. Ontologies are DAGs, so there may be multiple lowest common ancestors. Raises ValueError if term IDs are not valid members of a supported ontology.
Parameters
- term_id_1: str ontology term to find LCA for
- term_id_2: str ontology term to find LCA for
Returns
str term ID of the lowest common ancestor term
277 def get_high_level_terms(self, term_id: str, high_level_terms: List[str]) -> List[str]: 278 """ 279 Get the high-level ontology terms for a given term. High-level terms are defined as the ancestors of the term 280 that are part of the high-level ontology terms supported by cellxgene-ontology-guide. If more than 1 281 high_level_term is matched, the returned list of matches preserves the order of the input high_level_terms list. 282 Raises ValueError if term ID is not valid member of a supported ontology. 283 284 Example 285 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 286 >>> ontology_parser = OntologyParser() 287 >>> ontology_parser.get_high_level_terms("CL:0000005", ["CL:0000000", "CL:0000001"]) 288 ['CL:0000000'] 289 290 :param term_id: str ontology term to find high-level terms for 291 :param high_level_terms: list of str ontology terms to check for ancestry to term_id 292 :return: List[str] of high-level terms that the term is a descendant of 293 """ 294 if term_id in VALID_NON_ONTOLOGY_TERMS: 295 return [] 296 ancestors = self.get_term_ancestors(term_id, include_self=True) 297 return [high_level_term for high_level_term in high_level_terms if high_level_term in ancestors]
Get the high-level ontology terms for a given term. High-level terms are defined as the ancestors of the term that are part of the high-level ontology terms supported by cellxgene-ontology-guide. If more than 1 high_level_term is matched, the returned list of matches preserves the order of the input high_level_terms list. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_high_level_terms("CL:0000005", ["CL:0000000", "CL:0000001"])
['CL:0000000']
Parameters
- term_id: str ontology term to find high-level terms for
- high_level_terms: list of str ontology terms to check for ancestry to term_id
Returns
List[str] of high-level terms that the term is a descendant of
299 def map_high_level_terms(self, term_ids: List[str], high_level_terms: List[str]) -> Dict[str, List[str]]: 300 """ 301 Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with 302 format 303 304 {"CL:0000003": ["CL:0000000", ...], "CL:0000005": ["CL:0000000", ...]} 305 306 Where each term_id is mapped to a List[str] of high-level terms that it is a descendant of. Includes self 307 as a descendant. Raises ValueError if term ID is not valid member of a supported ontology. 308 309 :param term_ids: list of str ontology terms to map high level terms for 310 :param high_level_terms: list of str ontology terms to be mapped to descendant term_ids 311 :return: Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list. 312 Each key maps to empty list if there are no ancestors among the provided input. 313 """ 314 return {term_id: self.get_high_level_terms(term_id, high_level_terms) for term_id in term_ids}
Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with format
{"CL:0000003": ["CL:0000000", ...], "CL:0000005": ["CL:0000000", ...]}
Where each term_id is mapped to a List[str] of high-level terms that it is a descendant of. Includes self as a descendant. Raises ValueError if term ID is not valid member of a supported ontology.
Parameters
- term_ids: list of str ontology terms to map high level terms for
- high_level_terms: list of str ontology terms to be mapped to descendant term_ids
Returns
Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list. Each key maps to empty list if there are no ancestors among the provided input.
316 def get_highest_level_term(self, term_id: str, high_level_terms: List[str]) -> Union[str, None]: 317 """ 318 Get the highest level ontology term for a given term. The highest level term is defined as the ancestor of the 319 term that is part of the high-level ontology terms supported by cellxgene-ontology-guide. Raises ValueError 320 if term ID is not valid member of a supported ontology. 321 322 Example 323 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 324 >>> ontology_parser = OntologyParser() 325 >>> ontology_parser.get_highest_level_term("CL:0000005", ["CL:0000000", "CL:0000001"]) 326 'CL:0000000' 327 328 :param term_id: str ontology term to find highest level term for 329 :param high_level_terms: list of str ontology terms to check for ancestry to term_id 330 :return: str highest level term that the term is a descendant of, or None if it is not a descendant of any 331 high-level terms 332 """ 333 high_level_terms = self.get_high_level_terms(term_id, high_level_terms) 334 term_ancestors_and_distances = self.get_term_ancestors_with_distances(term_id, include_self=True) 335 if not high_level_terms: 336 return None 337 return max(high_level_terms, key=lambda high_level_term: term_ancestors_and_distances[high_level_term])
Get the highest level ontology term for a given term. The highest level term is defined as the ancestor of the term that is part of the high-level ontology terms supported by cellxgene-ontology-guide. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_highest_level_term("CL:0000005", ["CL:0000000", "CL:0000001"])
'CL:0000000'
Parameters
- term_id: str ontology term to find highest level term for
- high_level_terms: list of str ontology terms to check for ancestry to term_id
Returns
str highest level term that the term is a descendant of, or None if it is not a descendant of any high-level terms
339 def map_highest_level_term(self, term_ids: List[str], high_level_terms: List[str]) -> Dict[str, Union[str, None]]: 340 """ 341 Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with 342 format 343 344 {"CL:0000003": "CL:0000000", "CL:0000005": "CL:0000000"} 345 346 Where each term_id is mapped to the highest level term that it is a descendant of, from the list provided. 347 Includes term itself as a descendant. Maps to None if term_id does not map to any high level terms among the 348 provided input. Raises ValueError if term ID is not valid member of a supported ontology. 349 350 :param term_ids: list of str ontology terms to map high level terms for 351 :param high_level_terms: list of str ontology terms that can be mapped to descendant term_ids 352 :return: Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list. 353 Each key maps to empty list if there are no ancestors among the provided input. 354 """ 355 return {term_id: self.get_highest_level_term(term_id, high_level_terms) for term_id in term_ids}
Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with format
{"CL:0000003": "CL:0000000", "CL:0000005": "CL:0000000"}
Where each term_id is mapped to the highest level term that it is a descendant of, from the list provided. Includes term itself as a descendant. Maps to None if term_id does not map to any high level terms among the provided input. Raises ValueError if term ID is not valid member of a supported ontology.
Parameters
- term_ids: list of str ontology terms to map high level terms for
- high_level_terms: list of str ontology terms that can be mapped to descendant term_ids
Returns
Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list. Each key maps to empty list if there are no ancestors among the provided input.
357 def get_term_descendants(self, term_id: str, include_self: bool = False) -> List[str]: 358 """ 359 Get the descendant ontology terms for a given term. If include_self is True, the term itself will be included as 360 a descendant. Raises ValueError if term ID is not valid member of a supported ontology. 361 362 Example 363 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 364 >>> ontology_parser = OntologyParser() 365 >>> ontology_parser.get_term_descendants("CL:0000005") # doctest: +SKIP 366 ['CL:0002363'] 367 368 :param term_id: str ontology term to find descendants for 369 :param include_self: boolean flag to include the term itself as a descendant 370 :return: List[str] of descendant terms 371 """ 372 if term_id in VALID_NON_ONTOLOGY_TERMS: 373 return [] 374 ontology_name = self._parse_ontology_name(term_id) 375 descendants = [term_id] if include_self else [] 376 for candidate_descendant, candidate_metadata in self.cxg_schema.ontology(ontology_name).items(): 377 ancestors = candidate_metadata["ancestors"].keys() 378 if term_id in ancestors: 379 descendants.append(candidate_descendant) 380 return descendants
Get the descendant ontology terms for a given term. If include_self is True, the term itself will be included as a descendant. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_descendants("CL:0000005") # doctest: +SKIP
['CL:0002363']
Parameters
- term_id: str ontology term to find descendants for
- include_self: boolean flag to include the term itself as a descendant
Returns
List[str] of descendant terms
382 def map_term_descendants(self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]: 383 """ 384 Get the descendant ontology terms for each term in a list. If include_self is True, the term itself will be 385 included as a descendant. Raises ValueError if term ID is not valid member of a supported ontology. 386 387 Example 388 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 389 >>> ontology_parser = OntologyParser() 390 >>> ontology_parser.map_term_descendants(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP 391 { 392 'CL:0000003': ['CL:0000003', ...], 393 'CL:0000005': ['CL:0000005', 'CL:0002363', ...] 394 } 395 396 :param term_ids: list of str ontology terms to find descendants for 397 :param include_self: boolean flag to include the term itself as an descendant 398 :return: Dictionary mapping str term IDs to their respective flattened List[str] of descendant terms. Maps to 399 empty list if there are no descendants. 400 """ 401 descendants_dict: Dict[str, List[str]] = dict() 402 ontology_names = set() 403 for term_id in term_ids: 404 if term_id in VALID_NON_ONTOLOGY_TERMS: 405 descendants_dict[term_id] = [] 406 continue 407 ontology_name = self._parse_ontology_name(term_id) 408 descendants_dict[term_id] = [term_id] if include_self else [] 409 ontology_names.add(ontology_name) 410 411 for ontology in ontology_names: 412 for candidate_descendant, candidate_metadata in self.cxg_schema.ontology(ontology).items(): 413 for ancestor_id in descendants_dict: 414 ancestors = candidate_metadata["ancestors"].keys() 415 if ancestor_id in ancestors: 416 descendants_dict[ancestor_id].append(candidate_descendant) 417 418 return descendants_dict
Get the descendant ontology terms for each term in a list. If include_self is True, the term itself will be included as a descendant. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.map_term_descendants(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP
{
'CL:0000003': ['CL:0000003', ...],
'CL:0000005': ['CL:0000005', 'CL:0002363', ...]
}
Parameters
- term_ids: list of str ontology terms to find descendants for
- include_self: boolean flag to include the term itself as an descendant
Returns
Dictionary mapping str term IDs to their respective flattened List[str] of descendant terms. Maps to empty list if there are no descendants.
420 def get_term_children(self, term_id: str) -> List[str]: 421 """ 422 Get the direct children ontology terms for a given term. Raises ValueError if term ID is not valid member of a 423 supported ontology. 424 425 Example 426 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 427 >>> ontology_parser = OntologyParser() 428 >>> ontology_parser.get_term_children("CL:0000526") 429 ['CL:0000101', 'CL:4042034'] 430 431 :param term_id: str ontology term to find children for 432 :return: List[str] of children terms 433 """ 434 if term_id in VALID_NON_ONTOLOGY_TERMS: 435 return [] 436 ontology_name = self._parse_ontology_name(term_id) 437 children = [] 438 for candidate_child, candidate_metadata in self.cxg_schema.ontology(ontology_name).items(): 439 for ancestor, distance in candidate_metadata["ancestors"].items(): 440 if ancestor == term_id and distance == 1: 441 children.append(candidate_child) 442 return children
Get the direct children ontology terms for a given term. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_children("CL:0000526")
['CL:0000101', 'CL:4042034']
Parameters
- term_id: str ontology term to find children for
Returns
List[str] of children terms
444 def get_term_graph(self, term_id: str) -> OntologyNode: 445 """ 446 Get the DAG of OntologyNode relationships, with the input term as the root node. Only includes terms from the 447 same ontology as the root term ID. Raises ValueError if term ID is not valid member of a supported ontology. 448 449 Example 450 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 451 >>> ontology_parser = OntologyParser() 452 >>> root_node = ontology_parser.get_term_graph("CL:0000000") 453 >>> root_node.term_id 454 'CL:0000000' 455 >>> root_node.to_dict() # doctest: +SKIP 456 { 457 "term_id": "CL:0000000", 458 "name": "cell A", 459 "children": [ 460 { 461 "term_id": "CL:0000001", 462 "name": "cell B", 463 "children": [...], 464 }, 465 { 466 "term_id": "CL:0000002", 467 "name": "cell C", 468 "children": [...], 469 }, 470 ... 471 ] 472 } 473 >>> root_node.term_counter # doctest: +SKIP 474 Counter({'CL:0002058': 48, 'CL:0002471': 48, ... 475 476 :param term_id: str ontology term to build subtree for 477 :return: OntologyNode representation of graph with term_id as root. 478 """ 479 term_label = self.get_term_label(term_id) 480 root = OntologyNode(term_id, term_label) 481 for child_term_id in self.get_term_children(term_id): 482 root.add_child(self.get_term_graph(child_term_id)) 483 return root
Get the DAG of OntologyNode relationships, with the input term as the root node. Only includes terms from the same ontology as the root term ID. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> root_node = ontology_parser.get_term_graph("CL:0000000")
>>> root_node.term_id
'CL:0000000'
>>> root_node.to_dict() # doctest: +SKIP
{
"term_id": "CL:0000000",
"name": "cell A",
"children": [
{
"term_id": "CL:0000001",
"name": "cell B",
"children": [...],
},
{
"term_id": "CL:0000002",
"name": "cell C",
"children": [...],
},
...
]
}
>>> root_node.term_counter # doctest: +SKIP
Counter({'CL:0002058': 48, 'CL:0002471': 48, ...
Parameters
- term_id: str ontology term to build subtree for
Returns
OntologyNode representation of graph with term_id as root.
485 def is_term_deprecated(self, term_id: str) -> bool: 486 """ 487 Check if an ontology term is deprecated. Raises ValueError if term ID is not valid member of a supported 488 ontology. 489 490 Example 491 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 492 >>> ontology_parser = OntologyParser() 493 >>> ontology_parser.is_term_deprecated("CL:0000003") 494 True 495 496 :param term_id: str ontology term to check for deprecation 497 :return: boolean flag indicating whether the term is deprecated 498 """ 499 if term_id in VALID_NON_ONTOLOGY_TERMS: 500 return False 501 ontology_name = self._parse_ontology_name(term_id) 502 is_deprecated: bool = self.cxg_schema.ontology(ontology_name)[term_id].get("deprecated") 503 return is_deprecated
Check if an ontology term is deprecated. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.is_term_deprecated("CL:0000003")
True
Parameters
- term_id: str ontology term to check for deprecation
Returns
boolean flag indicating whether the term is deprecated
505 def get_term_replacement(self, term_id: str) -> Union[str, None]: 506 """ 507 Fetch the replacement term for a deprecated ontology term, if a replacement exists. Return None otherwise. 508 Raises ValueError if term ID is not valid member of a supported ontology. 509 510 Example 511 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 512 >>> ontology_parser = OntologyParser() 513 >>> ontology_parser.get_term_replacement("CL:0000003") 514 'CL:0000000' 515 516 :param term_id: str ontology term to check a replacement term for 517 :return: replacement str term ID if it exists, None otherwise 518 """ 519 if term_id in VALID_NON_ONTOLOGY_TERMS: 520 return None 521 ontology_name = self._parse_ontology_name(term_id) 522 replaced_by: str = self.cxg_schema.ontology(ontology_name)[term_id].get("replaced_by") 523 return replaced_by if replaced_by else None
Fetch the replacement term for a deprecated ontology term, if a replacement exists. Return None otherwise. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_replacement("CL:0000003")
'CL:0000000'
Parameters
- term_id: str ontology term to check a replacement term for
Returns
replacement str term ID if it exists, None otherwise
525 def get_term_metadata(self, term_id: str) -> Dict[str, Any]: 526 """ 527 Fetch metadata for a given ontology term. Returns a dict with format 528 529 {"comments": ["...", ...], "term_tracker": "...", "consider": ["...", ...]} 530 531 Comments maps to List[str] of ontology curator comments 532 Term Tracker maps to a str url where there is discussion around this term's curation (or deprecation). 533 Consider maps to List[str] of alternate ontology terms to consider using instead of this term 534 535 All keys map to None if no metadata of that type is present. Raises ValueError if term ID is not valid member 536 of a supported ontology. 537 538 :param term_id: str ontology term to fetch metadata for 539 :return: Dict with keys 'Comments', 'Term Tracker', and 'Consider' containing associated metadata. 540 """ 541 if term_id in VALID_NON_ONTOLOGY_TERMS: 542 return {"comments": None, "term_tracker": None, "consider": None} 543 ontology_name = self._parse_ontology_name(term_id) 544 return { 545 key: self.cxg_schema.ontology(ontology_name)[term_id].get(key, None) 546 for key in {"comments", "term_tracker", "consider"} 547 }
Fetch metadata for a given ontology term. Returns a dict with format
{"comments": ["...", ...], "term_tracker": "...", "consider": ["...", ...]}
Comments maps to List[str] of ontology curator comments Term Tracker maps to a str url where there is discussion around this term's curation (or deprecation). Consider maps to List[str] of alternate ontology terms to consider using instead of this term
All keys map to None if no metadata of that type is present. Raises ValueError if term ID is not valid member of a supported ontology.
Parameters
- term_id: str ontology term to fetch metadata for
Returns
Dict with keys 'Comments', 'Term Tracker', and 'Consider' containing associated metadata.
549 def get_term_label(self, term_id: str) -> str: 550 """ 551 Fetch the human-readable label for a given ontology term. Raises ValueError if term ID is not valid member of a 552 supported ontology. 553 554 Example 555 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 556 >>> ontology_parser = OntologyParser() 557 >>> ontology_parser.get_term_label("CL:0000005") 558 'neural crest derived fibroblast' 559 560 :param term_id: str ontology term to fetch label for 561 :return: str human-readable label for the term 562 """ 563 if term_id in VALID_NON_ONTOLOGY_TERMS: 564 return term_id 565 ontology_name = self._parse_ontology_name(term_id) 566 label: str = self.cxg_schema.ontology(ontology_name)[term_id]["label"] 567 return label
Fetch the human-readable label for a given ontology term. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_label("CL:0000005")
'neural crest derived fibroblast'
Parameters
- term_id: str ontology term to fetch label for
Returns
str human-readable label for the term
569 def map_term_labels(self, term_ids: Iterable[str]) -> Dict[str, str]: 570 """ 571 Fetch the human-readable label for a given list of ontology terms. Raises ValueError if term ID is not valid 572 member of a supported ontology. 573 574 Example 575 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 576 >>> ontology_parser = OntologyParser() 577 >>> ontology_parser.map_term_labels(["CL:0000005", "CL:0000003"]) 578 {'CL:0000005': 'neural crest derived fibroblast', 'CL:0000003': 'obsolete native cell'} 579 580 :param term_ids: list of str ontology terms to fetch label for 581 :return: Dict[str, str] mapping term IDs to their respective human-readable labels 582 """ 583 return {term_id: self.get_term_label(term_id) for term_id in term_ids}
Fetch the human-readable label for a given list of ontology terms. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.map_term_labels(["CL:0000005", "CL:0000003"])
{'CL:0000005': 'neural crest derived fibroblast', 'CL:0000003': 'obsolete native cell'}
Parameters
- term_ids: list of str ontology terms to fetch label for
Returns
Dict[str, str] mapping term IDs to their respective human-readable labels
585 def get_term_description(self, term_id: str) -> Optional[str]: 586 """ 587 Fetch the description for a given ontology term. Raises ValueError if term ID is not valid member of a 588 supported ontology. 589 590 Example 591 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 592 >>> ontology_parser = OntologyParser() 593 >>> ontology_parser.get_term_description("CL:0000005") 594 'Any fibroblast that is derived from the neural crest.' 595 596 :param term_id: str ontology term to fetch description for 597 :return: str description for the term 598 """ 599 if term_id in VALID_NON_ONTOLOGY_TERMS: 600 return term_id 601 ontology_name = self._parse_ontology_name(term_id) 602 description: Optional[str] = self.cxg_schema.ontology(ontology_name)[term_id].get("description", None) 603 return description
Fetch the description for a given ontology term. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_description("CL:0000005")
'Any fibroblast that is derived from the neural crest.'
Parameters
- term_id: str ontology term to fetch description for
Returns
str description for the term
605 def map_term_descriptions(self, term_ids: List[str]) -> Dict[str, Optional[str]]: 606 """ 607 Fetch the descriptions for a given list of ontology terms. Raises ValueError if term ID is not valid member of 608 a supported ontology. 609 610 Example 611 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 612 >>> ontology_parser = OntologyParser() 613 >>> ontology_parser.map_term_descriptions(["CL:0000005", "CL:0000146"]) 614 {'CL:0000005': 'Any fibroblast that is derived from the neural crest.', 'CL:0000146': None} 615 616 :param term_ids: list of str ontology terms to fetch descriptions for 617 :return: Dict[str, str] mapping term IDs to their respective descriptions 618 """ 619 return {term_id: self.get_term_description(term_id) for term_id in term_ids}
Fetch the descriptions for a given list of ontology terms. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.map_term_descriptions(["CL:0000005", "CL:0000146"])
{'CL:0000005': 'Any fibroblast that is derived from the neural crest.', 'CL:0000146': None}
Parameters
- term_ids: list of str ontology terms to fetch descriptions for
Returns
Dict[str, str] mapping term IDs to their respective descriptions
621 def get_term_synonyms(self, term_id: str) -> List[str]: 622 """ 623 Fetch a list of synonym labels for a given ontology term. Returns empty list if no synonyms found. 624 Raises ValueError if term ID is not valid member of a supported ontology. 625 626 Example 627 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 628 >>> ontology_parser = OntologyParser() 629 >>> ontology_parser.get_term_synonyms("CL:0000019") 630 ['sperm cell', 'spermatozoid', 'spermatozoon'] 631 632 :param term_id: str ontology term to fetch synonyms for 633 :return: List[str] synonyms for the term 634 """ 635 if term_id in VALID_NON_ONTOLOGY_TERMS: 636 return [] 637 ontology_name = self._parse_ontology_name(term_id) 638 synonyms: List[str] = list(self.cxg_schema.ontology(ontology_name)[term_id].get("synonyms", [])) 639 return synonyms
Fetch a list of synonym labels for a given ontology term. Returns empty list if no synonyms found. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_synonyms("CL:0000019")
['sperm cell', 'spermatozoid', 'spermatozoon']
Parameters
- term_id: str ontology term to fetch synonyms for
Returns
List[str] synonyms for the term
641 def map_term_synonyms(self, term_ids: List[str]) -> Dict[str, List[str]]: 642 """ 643 Fetch the synonym labels for a given list of ontology terms. Raises ValueError if term ID is not valid member of 644 a supported ontology. 645 646 Example 647 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 648 >>> ontology_parser = OntologyParser() 649 >>> ontology_parser.map_term_synonyms(["CL:0000005", "CL:0000019"]) 650 {'CL:0000005': ['fibroblast neural crest derived'], 'CL:0000019': ['sperm cell', 'spermatozoid', 'spermatozoon']} 651 652 :param term_ids: list of str ontology terms to fetch synonyms for 653 :return: Dict[str, List[str]] mapping term IDs to their respective synonym lists 654 """ 655 return {term_id: self.get_term_synonyms(term_id) for term_id in term_ids}
Fetch the synonym labels for a given list of ontology terms. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.map_term_synonyms(["CL:0000005", "CL:0000019"])
{'CL:0000005': ['fibroblast neural crest derived'], 'CL:0000019': ['sperm cell', 'spermatozoid', 'spermatozoon']}
Parameters
- term_ids: list of str ontology terms to fetch synonyms for
Returns
Dict[str, List[str]] mapping term IDs to their respective synonym lists
657 def get_term_id_by_label(self, term_label: str, ontology_name: str) -> Optional[str]: 658 """ 659 Fetch the ontology term ID from a given human-readable label. Filters by ontology_name. Raises ValueError if 660 ontology_name is not a supported ontology. 661 662 Returns None if term ID is not valid member of a supported ontology. 663 664 Example 665 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 666 >>> ontology_parser = OntologyParser() 667 >>> ontology_parser.get_term_id_by_label("neural crest derived fibroblast", "CL") 668 'CL:0000005' 669 670 :param term_label: str human-readable label to fetch term ID for 671 :param ontology_name: str name of ontology to search for term label in 672 :return: Optional[str] term IDs with that label, or None if the label is not found in the ontology 673 """ 674 ontology_term_label_to_id_map = self.get_term_label_to_id_map(ontology_name) 675 return ontology_term_label_to_id_map.get(term_label)
Fetch the ontology term ID from a given human-readable label. Filters by ontology_name. Raises ValueError if ontology_name is not a supported ontology.
Returns None if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_id_by_label("neural crest derived fibroblast", "CL")
'CL:0000005'
Parameters
- term_label: str human-readable label to fetch term ID for
- ontology_name: str name of ontology to search for term label in
Returns
Optional[str] term IDs with that label, or None if the label is not found in the ontology
677 def get_bridge_term_id(self, term_id: str, cross_ontology: str) -> Optional[str]: 678 """ 679 For a given term ID, fetch the equivalent term ID from a given ontology. Only returns exact match if it exists. 680 681 If no applicable match is found, returns None. 682 683 Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology. 684 685 Example 686 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 687 >>> ontology_parser = OntologyParser() 688 >>> ontology_parser.get_bridge_term_id("FBbt:00000001", "UBERON") 689 'UBERON:0000468' 690 691 :param term_id: str ontology term to find equivalent term for 692 :param cross_ontology: str name of ontology to search for equivalent term in 693 :return: Optional[str] equivalent term ID from the cross_ontology 694 """ 695 if cross_ontology not in self.cxg_schema.cross_ontology_mappings: 696 raise ValueError( 697 f"{cross_ontology} is not in the set of supported cross ontology mappings " 698 f"{self.cxg_schema.cross_ontology_mappings}." 699 ) 700 ontology_name = self._parse_ontology_name(term_id) 701 cross_ontology_terms = self.cxg_schema.ontology(ontology_name)[term_id].get("cross_ontology_terms") 702 bridge_term_id: Optional[str] = None 703 if cross_ontology_terms: 704 bridge_term_id = cross_ontology_terms.get(cross_ontology) 705 return bridge_term_id
For a given term ID, fetch the equivalent term ID from a given ontology. Only returns exact match if it exists.
If no applicable match is found, returns None.
Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_bridge_term_id("FBbt:00000001", "UBERON")
'UBERON:0000468'
Parameters
- term_id: str ontology term to find equivalent term for
- cross_ontology: str name of ontology to search for equivalent term in
Returns
Optional[str] equivalent term ID from the cross_ontology
707 def get_closest_bridge_term_ids(self, term_id: str, cross_ontology: str) -> List[str]: 708 """ 709 For a given term ID, fetch the equivalent term ID from a given ontology. If match is found, 710 returns a list of 1 with the exact match. If no exact match is found, traverses the ancestors 711 of the term for the closest match. 712 713 If no applicable match is found, returns an empty list. 714 715 If multiple ancestors of the same distance have matches, returns all possible closest matches. 716 717 Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology. 718 719 Example 720 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 721 >>> ontology_parser = OntologyParser() 722 >>> ontology_parser.get_closest_bridge_term_ids("FBbt:00000039", "UBERON") 723 ['UBERON:0000476', 'UBERON:0000920'] 724 725 :param term_id: str ontology term to find closest term for 726 :param cross_ontology: str name of ontology to search for closest term in 727 :return: List[str] list of closest term IDs from the cross_ontology 728 """ 729 closest_bridge_terms: List[str] = [] 730 terms_to_match = [term_id] 731 while terms_to_match and not closest_bridge_terms: 732 for term in terms_to_match: 733 if closest_bridge_term := self.get_bridge_term_id(term, cross_ontology): 734 closest_bridge_terms.append(closest_bridge_term) 735 terms_to_match = [parent for child in terms_to_match for parent in self.get_term_parents(child)] 736 return closest_bridge_terms
For a given term ID, fetch the equivalent term ID from a given ontology. If match is found, returns a list of 1 with the exact match. If no exact match is found, traverses the ancestors of the term for the closest match.
If no applicable match is found, returns an empty list.
If multiple ancestors of the same distance have matches, returns all possible closest matches.
Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_closest_bridge_term_ids("FBbt:00000039", "UBERON")
['UBERON:0000476', 'UBERON:0000920']
Parameters
- term_id: str ontology term to find closest term for
- cross_ontology: str name of ontology to search for closest term in
Returns
List[str] list of closest term IDs from the cross_ontology