cellxgene_ontology_guide.ontology_parser
1import re 2from typing import Any, Dict, Iterable, List, Optional, Union 3 4from cellxgene_ontology_guide._constants import VALID_NON_ONTOLOGY_TERMS 5from cellxgene_ontology_guide.entities import OntologyNode 6from cellxgene_ontology_guide.supported_versions import CXGSchema 7 8 9class OntologyParser: 10 """ 11 An object to parse ontology term metadata from ontologies corresponding to a given CellxGene Schema Version. 12 """ 13 14 cxg_schema: CXGSchema 15 """ CXGSchema object to fetch ontology metadata from """ 16 17 def __init__(self, schema_version: Optional[str] = None): 18 """ 19 Initialize an OntologyParser object with the ontology metadata corresponding to the given CellxGene schema 20 version. If not cached, it will make a network call to GitHub Release Assets to load in memory and 21 parse the corresponding ontology metadata. 22 23 :param schema_version: str version of the schema to load ontology metadata for. If not provided, the latest 24 is loaded. 25 """ 26 self.cxg_schema = CXGSchema(version=schema_version) if schema_version else CXGSchema() 27 self.term_label_to_id_map: Dict[str, Dict[str, str]] = { 28 ontology_name: dict() for ontology_name in self.cxg_schema.supported_ontologies 29 } 30 31 def get_term_label_to_id_map(self, ontology_name: str) -> Dict[str, str]: 32 """ 33 Fetch the mapping of term labels to term IDs for a given ontology. Caches generated maps by ontology_name. 34 35 Example 36 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 37 >>> ontology_parser = OntologyParser() 38 >>> ontology_parser.get_term_label_to_id_map("CL") # doctest: +SKIP 39 {'Label A': 'CL:0000000', ... } 40 41 :param ontology_name: str name of ontology to get map of term labels to term IDs 42 """ 43 supported_ontology_name: Optional[str] = self._get_supported_ontology_name(ontology_name) 44 if not supported_ontology_name: 45 raise ValueError(f"{supported_ontology_name} is not a supported ontology, its metadata cannot be fetched.") 46 47 if self.term_label_to_id_map[supported_ontology_name]: 48 return self.term_label_to_id_map[supported_ontology_name].copy() 49 50 for term_id, term_metadata in self.cxg_schema.ontology(supported_ontology_name).items(): 51 self.term_label_to_id_map[supported_ontology_name][term_metadata["label"]] = term_id 52 53 return self.term_label_to_id_map[supported_ontology_name].copy() 54 55 def _parse_ontology_name(self, term_id: str) -> str: 56 """ 57 Parse the ontology name from a given term ID. If the term ID does not conform to the expected term format or 58 is not from an ontology supported by cellxgene-ontology-guide, raise a ValueError. 59 60 :param term_id: str ontology term to parse 61 :return: str name of ontology that term belongs to 62 """ 63 pattern = r"[A-Za-z]+:\d+" 64 if not re.match(pattern, term_id): 65 raise ValueError(f"{term_id} does not conform to expected regex pattern {pattern} and cannot be queried.") 66 67 ontology_term_prefix = term_id.split(":")[0] 68 ontology_name: Optional[str] = self._get_supported_ontology_name(ontology_term_prefix) 69 if not ontology_name: 70 raise ValueError(f"{term_id} is not part of a supported ontology, its metadata cannot be fetched.") 71 72 return ontology_name 73 74 def _get_supported_ontology_name(self, ontology_term_prefix: str) -> Optional[str]: 75 """ 76 Get the source ontology name for a given ontology term prefix, if it is supported by the CxG schema. 77 78 If ontology_term_prefix is directly supported by the CxG schema, returns ontology_term_prefix. 79 If ontology_term_prefix is supported as an import from another ontology, returns the name of the source ontology 80 it is imported in. 81 Otherwise, returns None. 82 83 :param ontology_term_prefix: str ontology term prefix to check 84 :return: str name of ontology that term belongs to, or None if it is not directly supported nor imported in 85 a supported ontology in the CxG schema. 86 """ 87 if ontology_term_prefix in self.cxg_schema.supported_ontologies: 88 return ontology_term_prefix 89 supported_ontology_name: Optional[str] = self.cxg_schema.imported_ontologies.get(ontology_term_prefix) 90 return supported_ontology_name 91 92 def is_valid_term_id(self, term_id: str, ontology: Optional[str] = None) -> bool: 93 """ 94 Check if an ontology term ID is valid and defined in a supported ontology. If deprecated but defined 95 in the ontology, it is considered valid. Optionally, specify an ontology to check against, and determine 96 if the term is defined in that particular ontology. Otherwise, checks if term is valid in any supported ontology 97 98 :param term_id: str ontology term to check 99 :param ontology: str name of ontology to check against 100 :return: boolean flag indicating whether the term is supported 101 """ 102 try: 103 ontology_name = self._parse_ontology_name(term_id) 104 if ontology and ontology_name != ontology: 105 return False 106 if term_id in self.cxg_schema.ontology(ontology_name): 107 return True 108 except ValueError: 109 return False 110 return False 111 112 def get_term_ancestors(self, term_id: str, include_self: bool = False) -> List[str]: 113 """ 114 Get the ancestor ontology terms for a given term. If include_self is True, the term itself will be included as 115 an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology. 116 117 Example 118 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 119 >>> ontology_parser = OntologyParser() 120 >>> ontology_parser.get_term_ancestors("CL:0000005") # doctest: +SKIP 121 ['CL:0000000', 'CL:0000057', ... 122 123 :param term_id: str ontology term to find ancestors for 124 :param include_self: boolean flag to include the term itself as an ancestor 125 :return: flattened List[str] of ancestor terms 126 """ 127 if term_id in VALID_NON_ONTOLOGY_TERMS: 128 return [] 129 ontology_name = self._parse_ontology_name(term_id) 130 ancestors = list(self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"].keys()) 131 return ancestors + [term_id] if include_self else ancestors 132 133 def map_term_ancestors(self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]: 134 """ 135 Get the ancestor ontology terms for each term in a list. If include_self is True, the term itself will be 136 included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology. 137 138 Example 139 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 140 >>> ontology_parser = OntologyParser() 141 >>> ontology_parser.map_term_ancestors(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP 142 { 143 'CL:0000003': ['CL:0000003'], 144 'CL:0000005': ['CL:0000005', 'CL:0000000', ...] 145 } 146 147 :param term_ids: list of str ontology terms to find ancestors for 148 :param include_self: boolean flag to include the term itself as an ancestor 149 :return: Dictionary mapping str term IDs to their respective flattened List[str] of ancestor terms. Maps to 150 empty 151 list if there are no ancestors. 152 """ 153 return {term_id: self.get_term_ancestors(term_id, include_self) for term_id in term_ids} 154 155 def get_term_ancestors_with_distances(self, term_id: str, include_self: bool = False) -> Dict[str, int]: 156 """ 157 Get the ancestor ontology terms for a given term, and their distance from the term_id. If include_self is True, 158 the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a 159 supported ontology. 160 161 Example 162 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 163 >>> ontology_parser = OntologyParser() 164 >>> ontology_parser.get_term_ancestors_with_distances("CL:0000005") # doctest: +SKIP 165 {'CL:0000057': 1, 'CL:0002320': 2, 'CL:0000000': 3} 166 167 :param term_id: str ontology term to find ancestors for 168 :param include_self: boolean flag to include the term itself as an ancestor 169 :return: Dict[str, int] map of ancestor terms and their respective distances from the term_id 170 """ 171 if term_id in VALID_NON_ONTOLOGY_TERMS: 172 return {} 173 ontology_name = self._parse_ontology_name(term_id) 174 ancestors: Dict[str, int] = self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"].copy() 175 return ancestors | {term_id: 0} if include_self else ancestors 176 177 def map_term_ancestors_with_distances( 178 self, term_ids: Iterable[str], include_self: bool = False 179 ) -> Dict[str, Dict[str, int]]: 180 """ 181 Get the ancestor ontology terms for each term in a list, and their distance from the term_id. If include_self is 182 True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a 183 supported ontology. 184 185 Example 186 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 187 >>> ontology_parser = OntologyParser() 188 >>> ontology_parser.map_term_ancestors_with_distances(["CL:0000003", "CL:0000005"], include_self=True) 189 {'CL:0000003': {'CL:0000003': 0}, 'CL:0000005': {'CL:0000057': 1, 'CL:0002320': 2, 'CL:0000000': 3, 190 'CL:0000005': 0}} 191 192 :param term_ids: list of str ontology terms to find ancestors for 193 :param include_self: boolean flag to include the term itself as an ancestor 194 :return: Dictionary mapping str term IDs to their respective Dict[str, int] map of ancestor terms and their 195 respective distances from the term_id 196 """ 197 return {term_id: self.get_term_ancestors_with_distances(term_id, include_self) for term_id in term_ids} 198 199 def get_term_parents(self, term_id: str) -> List[str]: 200 """ 201 Get the direct parent ontology terms for a given term. Raises ValueError if the term ID is not valid member of 202 a supported ontology. 203 204 Example 205 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 206 >>> ontology_parser = OntologyParser() 207 >>> ontology_parser.get_term_parents("CL:0000101") 208 ['CL:0000526'] 209 210 :param term_id: str ontology term to find parents for 211 :return: List[str] of parent terms 212 """ 213 if term_id in VALID_NON_ONTOLOGY_TERMS: 214 return [] 215 ontology_name = self._parse_ontology_name(term_id) 216 ancestors: Dict[str, int] = self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"] 217 parents: List[str] = [ancestor for ancestor, distance in ancestors.items() if distance == 1] 218 return parents 219 220 def get_distance_between_terms(self, term_id_1: str, term_id_2: str) -> int: 221 """ 222 Get the distance between two ontology terms. The distance is defined as the number of edges between the 223 two terms. Terms must be from the same ontology. Returns -1 if terms are disjoint. 224 Raises ValueError if term IDs are not valid members of a supported ontology. 225 226 :param term_id_1: str ontology term to find distance for 227 :param term_id_2: str ontology term to find distance for 228 :return: int distance between the two terms, measured in number of edges between their shortest path. 229 """ 230 lcas = self.get_lowest_common_ancestors(term_id_1, term_id_2) 231 if not lcas: 232 return -1 233 return int( 234 self.get_term_ancestors_with_distances(term_id_1, include_self=True)[lcas[0]] 235 + self.get_term_ancestors_with_distances(term_id_2, include_self=True)[lcas[0]] 236 ) 237 238 def get_lowest_common_ancestors(self, term_id_1: str, term_id_2: str) -> List[str]: 239 """ 240 Get the lowest common ancestors between two ontology terms that is from the given ontology. 241 Terms must be from the same ontology. Ontologies are DAGs, so there may be multiple lowest common ancestors. 242 Raises ValueError if term IDs are not valid members of a supported ontology. 243 244 :param term_id_1: str ontology term to find LCA for 245 :param term_id_2: str ontology term to find LCA for 246 :return: str term ID of the lowest common ancestor term 247 """ 248 # include path to term itself 249 ontology = self._parse_ontology_name(term_id_1) 250 if ontology != self._parse_ontology_name(term_id_2): 251 return [] 252 ancestors_1 = self.get_term_ancestors_with_distances(term_id_1, include_self=True) 253 ancestors_2 = self.get_term_ancestors_with_distances(term_id_2, include_self=True) 254 common_ancestors = set(ancestors_1.keys()) & set(ancestors_2.keys()) 255 min_sum_distances = float("inf") 256 for ancestors in common_ancestors: 257 sum_distances = ancestors_1[ancestors] + ancestors_2[ancestors] 258 if sum_distances < min_sum_distances: 259 min_sum_distances = sum_distances 260 return [ 261 ancestor 262 for ancestor in common_ancestors 263 if ancestors_1[ancestor] + ancestors_2[ancestor] == min_sum_distances 264 ] 265 266 def get_high_level_terms(self, term_id: str, high_level_terms: List[str]) -> List[str]: 267 """ 268 Get the high-level ontology terms for a given term. High-level terms are defined as the ancestors of the term 269 that are part of the high-level ontology terms supported by cellxgene-ontology-guide. If more than 1 270 high_level_term is matched, the returned list of matches preserves the order of the input high_level_terms list. 271 Raises ValueError if term ID is not valid member of a supported ontology. 272 273 Example 274 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 275 >>> ontology_parser = OntologyParser() 276 >>> ontology_parser.get_high_level_terms("CL:0000005", ["CL:0000000", "CL:0000001"]) 277 ['CL:0000000'] 278 279 :param term_id: str ontology term to find high-level terms for 280 :param high_level_terms: list of str ontology terms to check for ancestry to term_id 281 :return: List[str] of high-level terms that the term is a descendant of 282 """ 283 if term_id in VALID_NON_ONTOLOGY_TERMS: 284 return [] 285 ancestors = self.get_term_ancestors(term_id, include_self=True) 286 return [high_level_term for high_level_term in high_level_terms if high_level_term in ancestors] 287 288 def map_high_level_terms(self, term_ids: List[str], high_level_terms: List[str]) -> Dict[str, List[str]]: 289 """ 290 Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with 291 format 292 293 {"CL:0000003": ["CL:0000000", ...], "CL:0000005": ["CL:0000000", ...]} 294 295 Where each term_id is mapped to a List[str] of high-level terms that it is a descendant of. Includes self 296 as a descendant. Raises ValueError if term ID is not valid member of a supported ontology. 297 298 :param term_ids: list of str ontology terms to map high level terms for 299 :param high_level_terms: list of str ontology terms to be mapped to descendant term_ids 300 :return: Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list. 301 Each key maps to empty list if there are no ancestors among the provided input. 302 """ 303 return {term_id: self.get_high_level_terms(term_id, high_level_terms) for term_id in term_ids} 304 305 def get_highest_level_term(self, term_id: str, high_level_terms: List[str]) -> Union[str, None]: 306 """ 307 Get the highest level ontology term for a given term. The highest level term is defined as the ancestor of the 308 term that is part of the high-level ontology terms supported by cellxgene-ontology-guide. Raises ValueError 309 if term ID is not valid member of a supported ontology. 310 311 Example 312 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 313 >>> ontology_parser = OntologyParser() 314 >>> ontology_parser.get_highest_level_term("CL:0000005", ["CL:0000000", "CL:0000001"]) 315 'CL:0000000' 316 317 :param term_id: str ontology term to find highest level term for 318 :param high_level_terms: list of str ontology terms to check for ancestry to term_id 319 :return: str highest level term that the term is a descendant of, or None if it is not a descendant of any 320 high-level terms 321 """ 322 high_level_terms = self.get_high_level_terms(term_id, high_level_terms) 323 term_ancestors_and_distances = self.get_term_ancestors_with_distances(term_id, include_self=True) 324 if not high_level_terms: 325 return None 326 return max(high_level_terms, key=lambda high_level_term: term_ancestors_and_distances[high_level_term]) 327 328 def map_highest_level_term(self, term_ids: List[str], high_level_terms: List[str]) -> Dict[str, Union[str, None]]: 329 """ 330 Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with 331 format 332 333 {"CL:0000003": "CL:0000000", "CL:0000005": "CL:0000000"} 334 335 Where each term_id is mapped to the highest level term that it is a descendant of, from the list provided. 336 Includes term itself as a descendant. Maps to None if term_id does not map to any high level terms among the 337 provided input. Raises ValueError if term ID is not valid member of a supported ontology. 338 339 :param term_ids: list of str ontology terms to map high level terms for 340 :param high_level_terms: list of str ontology terms that can be mapped to descendant term_ids 341 :return: Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list. 342 Each key maps to empty list if there are no ancestors among the provided input. 343 """ 344 return {term_id: self.get_highest_level_term(term_id, high_level_terms) for term_id in term_ids} 345 346 def get_term_descendants(self, term_id: str, include_self: bool = False) -> List[str]: 347 """ 348 Get the descendant ontology terms for a given term. If include_self is True, the term itself will be included as 349 a descendant. Raises ValueError if term ID is not valid member of a supported ontology. 350 351 Example 352 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 353 >>> ontology_parser = OntologyParser() 354 >>> ontology_parser.get_term_descendants("CL:0000005") # doctest: +SKIP 355 ['CL:0002363'] 356 357 :param term_id: str ontology term to find descendants for 358 :param include_self: boolean flag to include the term itself as a descendant 359 :return: List[str] of descendant terms 360 """ 361 if term_id in VALID_NON_ONTOLOGY_TERMS: 362 return [] 363 ontology_name = self._parse_ontology_name(term_id) 364 descendants = [term_id] if include_self else [] 365 for candidate_descendant, candidate_metadata in self.cxg_schema.ontology(ontology_name).items(): 366 ancestors = candidate_metadata["ancestors"].keys() 367 if term_id in ancestors: 368 descendants.append(candidate_descendant) 369 return descendants 370 371 def map_term_descendants(self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]: 372 """ 373 Get the descendant ontology terms for each term in a list. If include_self is True, the term itself will be 374 included as a descendant. Raises ValueError if term ID is not valid member of a supported ontology. 375 376 Example 377 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 378 >>> ontology_parser = OntologyParser() 379 >>> ontology_parser.map_term_descendants(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP 380 { 381 'CL:0000003': ['CL:0000003', ...], 382 'CL:0000005': ['CL:0000005', 'CL:0002363', ...] 383 } 384 385 :param term_ids: list of str ontology terms to find descendants for 386 :param include_self: boolean flag to include the term itself as an descendant 387 :return: Dictionary mapping str term IDs to their respective flattened List[str] of descendant terms. Maps to 388 empty list if there are no descendants. 389 """ 390 descendants_dict: Dict[str, List[str]] = dict() 391 ontology_names = set() 392 for term_id in term_ids: 393 if term_id in VALID_NON_ONTOLOGY_TERMS: 394 descendants_dict[term_id] = [] 395 continue 396 ontology_name = self._parse_ontology_name(term_id) 397 descendants_dict[term_id] = [term_id] if include_self else [] 398 ontology_names.add(ontology_name) 399 400 for ontology in ontology_names: 401 for candidate_descendant, candidate_metadata in self.cxg_schema.ontology(ontology).items(): 402 for ancestor_id in descendants_dict: 403 ancestors = candidate_metadata["ancestors"].keys() 404 if ancestor_id in ancestors: 405 descendants_dict[ancestor_id].append(candidate_descendant) 406 407 return descendants_dict 408 409 def get_term_children(self, term_id: str) -> List[str]: 410 """ 411 Get the direct children ontology terms for a given term. Raises ValueError if term ID is not valid member of a 412 supported ontology. 413 414 Example 415 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 416 >>> ontology_parser = OntologyParser() 417 >>> ontology_parser.get_term_children("CL:0000526") 418 ['CL:0000101'] 419 420 :param term_id: str ontology term to find children for 421 :return: List[str] of children terms 422 """ 423 if term_id in VALID_NON_ONTOLOGY_TERMS: 424 return [] 425 ontology_name = self._parse_ontology_name(term_id) 426 children = [] 427 for candidate_child, candidate_metadata in self.cxg_schema.ontology(ontology_name).items(): 428 for ancestor, distance in candidate_metadata["ancestors"].items(): 429 if ancestor == term_id and distance == 1: 430 children.append(candidate_child) 431 return children 432 433 def get_term_graph(self, term_id: str) -> OntologyNode: 434 """ 435 Get the DAG of OntologyNode relationships, with the input term as the root node. Only includes terms from the 436 same ontology as the root term ID. Raises ValueError if term ID is not valid member of a supported ontology. 437 438 Example 439 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 440 >>> ontology_parser = OntologyParser() 441 >>> root_node = ontology_parser.get_term_graph("CL:0000000") 442 >>> root_node.term_id 443 'CL:0000000' 444 >>> root_node.to_dict() # doctest: +SKIP 445 { 446 "term_id": "CL:0000000", 447 "name": "cell A", 448 "children": [ 449 { 450 "term_id": "CL:0000001", 451 "name": "cell B", 452 "children": [...], 453 }, 454 { 455 "term_id": "CL:0000002", 456 "name": "cell C", 457 "children": [...], 458 }, 459 ... 460 ] 461 } 462 >>> root_node.term_counter # doctest: +SKIP 463 Counter({'CL:0002058': 48, 'CL:0002471': 48, ... 464 465 :param term_id: str ontology term to build subtree for 466 :return: OntologyNode representation of graph with term_id as root. 467 """ 468 term_label = self.get_term_label(term_id) 469 root = OntologyNode(term_id, term_label) 470 for child_term_id in self.get_term_children(term_id): 471 root.add_child(self.get_term_graph(child_term_id)) 472 return root 473 474 def is_term_deprecated(self, term_id: str) -> bool: 475 """ 476 Check if an ontology term is deprecated. Raises ValueError if term ID is not valid member of a supported 477 ontology. 478 479 Example 480 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 481 >>> ontology_parser = OntologyParser() 482 >>> ontology_parser.is_term_deprecated("CL:0000003") 483 True 484 485 :param term_id: str ontology term to check for deprecation 486 :return: boolean flag indicating whether the term is deprecated 487 """ 488 if term_id in VALID_NON_ONTOLOGY_TERMS: 489 return False 490 ontology_name = self._parse_ontology_name(term_id) 491 is_deprecated: bool = self.cxg_schema.ontology(ontology_name)[term_id].get("deprecated") 492 return is_deprecated 493 494 def get_term_replacement(self, term_id: str) -> Union[str, None]: 495 """ 496 Fetch the replacement term for a deprecated ontology term, if a replacement exists. Return None otherwise. 497 Raises ValueError if term ID is not valid member of a supported ontology. 498 499 Example 500 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 501 >>> ontology_parser = OntologyParser() 502 >>> ontology_parser.get_term_replacement("CL:0000003") 503 'CL:0000000' 504 505 :param term_id: str ontology term to check a replacement term for 506 :return: replacement str term ID if it exists, None otherwise 507 """ 508 if term_id in VALID_NON_ONTOLOGY_TERMS: 509 return None 510 ontology_name = self._parse_ontology_name(term_id) 511 replaced_by: str = self.cxg_schema.ontology(ontology_name)[term_id].get("replaced_by") 512 return replaced_by if replaced_by else None 513 514 def get_term_metadata(self, term_id: str) -> Dict[str, Any]: 515 """ 516 Fetch metadata for a given ontology term. Returns a dict with format 517 518 {"comments": ["...", ...], "term_tracker": "...", "consider": ["...", ...]} 519 520 Comments maps to List[str] of ontology curator comments 521 Term Tracker maps to a str url where there is discussion around this term's curation (or deprecation). 522 Consider maps to List[str] of alternate ontology terms to consider using instead of this term 523 524 All keys map to None if no metadata of that type is present. Raises ValueError if term ID is not valid member 525 of a supported ontology. 526 527 :param term_id: str ontology term to fetch metadata for 528 :return: Dict with keys 'Comments', 'Term Tracker', and 'Consider' containing associated metadata. 529 """ 530 if term_id in VALID_NON_ONTOLOGY_TERMS: 531 return {"comments": None, "term_tracker": None, "consider": None} 532 ontology_name = self._parse_ontology_name(term_id) 533 return { 534 key: self.cxg_schema.ontology(ontology_name)[term_id].get(key, None) 535 for key in {"comments", "term_tracker", "consider"} 536 } 537 538 def get_term_label(self, term_id: str) -> str: 539 """ 540 Fetch the human-readable label for a given ontology term. Raises ValueError if term ID is not valid member of a 541 supported ontology. 542 543 Example 544 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 545 >>> ontology_parser = OntologyParser() 546 >>> ontology_parser.get_term_label("CL:0000005") 547 'neural crest derived fibroblast' 548 549 :param term_id: str ontology term to fetch label for 550 :return: str human-readable label for the term 551 """ 552 if term_id in VALID_NON_ONTOLOGY_TERMS: 553 return term_id 554 ontology_name = self._parse_ontology_name(term_id) 555 label: str = self.cxg_schema.ontology(ontology_name)[term_id]["label"] 556 return label 557 558 def map_term_labels(self, term_ids: Iterable[str]) -> Dict[str, str]: 559 """ 560 Fetch the human-readable label for a given list of ontology terms. Raises ValueError if term ID is not valid 561 member of a supported ontology. 562 563 Example 564 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 565 >>> ontology_parser = OntologyParser() 566 >>> ontology_parser.map_term_labels(["CL:0000005", "CL:0000003"]) 567 {'CL:0000005': 'neural crest derived fibroblast', 'CL:0000003': 'obsolete native cell'} 568 569 :param term_ids: list of str ontology terms to fetch label for 570 :return: Dict[str, str] mapping term IDs to their respective human-readable labels 571 """ 572 return {term_id: self.get_term_label(term_id) for term_id in term_ids} 573 574 def get_term_description(self, term_id: str) -> Optional[str]: 575 """ 576 Fetch the description for a given ontology term. Raises ValueError if term ID is not valid member of a 577 supported ontology. 578 579 Example 580 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 581 >>> ontology_parser = OntologyParser() 582 >>> ontology_parser.get_term_description("CL:0000005") 583 'Any fibroblast that is derived from the neural crest.' 584 585 :param term_id: str ontology term to fetch description for 586 :return: str description for the term 587 """ 588 if term_id in VALID_NON_ONTOLOGY_TERMS: 589 return term_id 590 ontology_name = self._parse_ontology_name(term_id) 591 description: Optional[str] = self.cxg_schema.ontology(ontology_name)[term_id].get("description", None) 592 return description 593 594 def map_term_descriptions(self, term_ids: List[str]) -> Dict[str, Optional[str]]: 595 """ 596 Fetch the descriptions for a given list of ontology terms. Raises ValueError if term ID is not valid member of 597 a supported ontology. 598 599 Example 600 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 601 >>> ontology_parser = OntologyParser() 602 >>> ontology_parser.map_term_descriptions(["CL:0000005", "CL:0000146"]) 603 {'CL:0000005': 'Any fibroblast that is derived from the neural crest.', 'CL:0000146': None} 604 605 :param term_ids: list of str ontology terms to fetch descriptions for 606 :return: Dict[str, str] mapping term IDs to their respective descriptions 607 """ 608 return {term_id: self.get_term_description(term_id) for term_id in term_ids} 609 610 def get_term_synonyms(self, term_id: str) -> List[str]: 611 """ 612 Fetch a list of synonym labels for a given ontology term. Returns empty list if no synonyms found. 613 Raises ValueError if term ID is not valid member of a supported ontology. 614 615 Example 616 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 617 >>> ontology_parser = OntologyParser() 618 >>> ontology_parser.get_term_synonyms("CL:0000019") 619 ['sperm cell', 'spermatozoid', 'spermatozoon'] 620 621 :param term_id: str ontology term to fetch synonyms for 622 :return: List[str] synonyms for the term 623 """ 624 if term_id in VALID_NON_ONTOLOGY_TERMS: 625 return [] 626 ontology_name = self._parse_ontology_name(term_id) 627 synonyms: List[str] = list(self.cxg_schema.ontology(ontology_name)[term_id].get("synonyms", [])) 628 return synonyms 629 630 def map_term_synonyms(self, term_ids: List[str]) -> Dict[str, List[str]]: 631 """ 632 Fetch the synonym labels for a given list of ontology terms. Raises ValueError if term ID is not valid member of 633 a supported ontology. 634 635 Example 636 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 637 >>> ontology_parser = OntologyParser() 638 >>> ontology_parser.map_term_synonyms(["CL:0000005", "CL:0000019"]) 639 {'CL:0000005': ['fibroblast neural crest derived'], 'CL:0000019': ['sperm cell', 'spermatozoid', 'spermatozoon']} 640 641 :param term_ids: list of str ontology terms to fetch synonyms for 642 :return: Dict[str, List[str]] mapping term IDs to their respective synonym lists 643 """ 644 return {term_id: self.get_term_synonyms(term_id) for term_id in term_ids} 645 646 def get_term_id_by_label(self, term_label: str, ontology_name: str) -> Optional[str]: 647 """ 648 Fetch the ontology term ID from a given human-readable label. Filters by ontology_name. Raises ValueError if 649 ontology_name is not a supported ontology. 650 651 Returns None if term ID is not valid member of a supported ontology. 652 653 Example 654 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 655 >>> ontology_parser = OntologyParser() 656 >>> ontology_parser.get_term_id_by_label("neural crest derived fibroblast", "CL") 657 'CL:0000005' 658 659 :param term_label: str human-readable label to fetch term ID for 660 :param ontology_name: str name of ontology to search for term label in 661 :return: Optional[str] term IDs with that label, or None if the label is not found in the ontology 662 """ 663 ontology_term_label_to_id_map = self.get_term_label_to_id_map(ontology_name) 664 return ontology_term_label_to_id_map.get(term_label) 665 666 def get_bridge_term_id(self, term_id: str, cross_ontology: str) -> Optional[str]: 667 """ 668 For a given term ID, fetch the equivalent term ID from a given ontology. Only returns exact match if it exists. 669 670 If no applicable match is found, returns None. 671 672 Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology. 673 674 Example 675 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 676 >>> ontology_parser = OntologyParser() 677 >>> ontology_parser.get_bridge_term_id("FBbt:00000001", "UBERON") 678 'UBERON:0000468' 679 680 :param term_id: str ontology term to find equivalent term for 681 :param cross_ontology: str name of ontology to search for equivalent term in 682 :return: Optional[str] equivalent term ID from the cross_ontology 683 """ 684 if cross_ontology not in self.cxg_schema.cross_ontology_mappings: 685 raise ValueError( 686 f"{cross_ontology} is not in the set of supported cross ontology mappings " 687 f"{self.cxg_schema.cross_ontology_mappings}." 688 ) 689 ontology_name = self._parse_ontology_name(term_id) 690 cross_ontology_terms = self.cxg_schema.ontology(ontology_name)[term_id].get("cross_ontology_terms") 691 bridge_term_id: Optional[str] = None 692 if cross_ontology_terms: 693 bridge_term_id = cross_ontology_terms.get(cross_ontology) 694 return bridge_term_id 695 696 def get_closest_bridge_term_ids(self, term_id: str, cross_ontology: str) -> List[str]: 697 """ 698 For a given term ID, fetch the equivalent term ID from a given ontology. If match is found, 699 returns a list of 1 with the exact match. If no exact match is found, traverses the ancestors 700 of the term for the closest match. 701 702 If no applicable match is found, returns an empty list. 703 704 If multiple ancestors of the same distance have matches, returns all possible closest matches. 705 706 Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology. 707 708 Example 709 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 710 >>> ontology_parser = OntologyParser() 711 >>> ontology_parser.get_closest_bridge_term_ids("FBbt:00000039", "UBERON") 712 ['UBERON:0000476', 'UBERON:0000920'] 713 714 :param term_id: str ontology term to find closest term for 715 :param cross_ontology: str name of ontology to search for closest term in 716 :return: List[str] list of closest term IDs from the cross_ontology 717 """ 718 closest_bridge_terms: List[str] = [] 719 terms_to_match = [term_id] 720 while terms_to_match and not closest_bridge_terms: 721 for term in terms_to_match: 722 if closest_bridge_term := self.get_bridge_term_id(term, cross_ontology): 723 closest_bridge_terms.append(closest_bridge_term) 724 terms_to_match = [parent for child in terms_to_match for parent in self.get_term_parents(child)] 725 return closest_bridge_terms
10class OntologyParser: 11 """ 12 An object to parse ontology term metadata from ontologies corresponding to a given CellxGene Schema Version. 13 """ 14 15 cxg_schema: CXGSchema 16 """ CXGSchema object to fetch ontology metadata from """ 17 18 def __init__(self, schema_version: Optional[str] = None): 19 """ 20 Initialize an OntologyParser object with the ontology metadata corresponding to the given CellxGene schema 21 version. If not cached, it will make a network call to GitHub Release Assets to load in memory and 22 parse the corresponding ontology metadata. 23 24 :param schema_version: str version of the schema to load ontology metadata for. If not provided, the latest 25 is loaded. 26 """ 27 self.cxg_schema = CXGSchema(version=schema_version) if schema_version else CXGSchema() 28 self.term_label_to_id_map: Dict[str, Dict[str, str]] = { 29 ontology_name: dict() for ontology_name in self.cxg_schema.supported_ontologies 30 } 31 32 def get_term_label_to_id_map(self, ontology_name: str) -> Dict[str, str]: 33 """ 34 Fetch the mapping of term labels to term IDs for a given ontology. Caches generated maps by ontology_name. 35 36 Example 37 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 38 >>> ontology_parser = OntologyParser() 39 >>> ontology_parser.get_term_label_to_id_map("CL") # doctest: +SKIP 40 {'Label A': 'CL:0000000', ... } 41 42 :param ontology_name: str name of ontology to get map of term labels to term IDs 43 """ 44 supported_ontology_name: Optional[str] = self._get_supported_ontology_name(ontology_name) 45 if not supported_ontology_name: 46 raise ValueError(f"{supported_ontology_name} is not a supported ontology, its metadata cannot be fetched.") 47 48 if self.term_label_to_id_map[supported_ontology_name]: 49 return self.term_label_to_id_map[supported_ontology_name].copy() 50 51 for term_id, term_metadata in self.cxg_schema.ontology(supported_ontology_name).items(): 52 self.term_label_to_id_map[supported_ontology_name][term_metadata["label"]] = term_id 53 54 return self.term_label_to_id_map[supported_ontology_name].copy() 55 56 def _parse_ontology_name(self, term_id: str) -> str: 57 """ 58 Parse the ontology name from a given term ID. If the term ID does not conform to the expected term format or 59 is not from an ontology supported by cellxgene-ontology-guide, raise a ValueError. 60 61 :param term_id: str ontology term to parse 62 :return: str name of ontology that term belongs to 63 """ 64 pattern = r"[A-Za-z]+:\d+" 65 if not re.match(pattern, term_id): 66 raise ValueError(f"{term_id} does not conform to expected regex pattern {pattern} and cannot be queried.") 67 68 ontology_term_prefix = term_id.split(":")[0] 69 ontology_name: Optional[str] = self._get_supported_ontology_name(ontology_term_prefix) 70 if not ontology_name: 71 raise ValueError(f"{term_id} is not part of a supported ontology, its metadata cannot be fetched.") 72 73 return ontology_name 74 75 def _get_supported_ontology_name(self, ontology_term_prefix: str) -> Optional[str]: 76 """ 77 Get the source ontology name for a given ontology term prefix, if it is supported by the CxG schema. 78 79 If ontology_term_prefix is directly supported by the CxG schema, returns ontology_term_prefix. 80 If ontology_term_prefix is supported as an import from another ontology, returns the name of the source ontology 81 it is imported in. 82 Otherwise, returns None. 83 84 :param ontology_term_prefix: str ontology term prefix to check 85 :return: str name of ontology that term belongs to, or None if it is not directly supported nor imported in 86 a supported ontology in the CxG schema. 87 """ 88 if ontology_term_prefix in self.cxg_schema.supported_ontologies: 89 return ontology_term_prefix 90 supported_ontology_name: Optional[str] = self.cxg_schema.imported_ontologies.get(ontology_term_prefix) 91 return supported_ontology_name 92 93 def is_valid_term_id(self, term_id: str, ontology: Optional[str] = None) -> bool: 94 """ 95 Check if an ontology term ID is valid and defined in a supported ontology. If deprecated but defined 96 in the ontology, it is considered valid. Optionally, specify an ontology to check against, and determine 97 if the term is defined in that particular ontology. Otherwise, checks if term is valid in any supported ontology 98 99 :param term_id: str ontology term to check 100 :param ontology: str name of ontology to check against 101 :return: boolean flag indicating whether the term is supported 102 """ 103 try: 104 ontology_name = self._parse_ontology_name(term_id) 105 if ontology and ontology_name != ontology: 106 return False 107 if term_id in self.cxg_schema.ontology(ontology_name): 108 return True 109 except ValueError: 110 return False 111 return False 112 113 def get_term_ancestors(self, term_id: str, include_self: bool = False) -> List[str]: 114 """ 115 Get the ancestor ontology terms for a given term. If include_self is True, the term itself will be included as 116 an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology. 117 118 Example 119 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 120 >>> ontology_parser = OntologyParser() 121 >>> ontology_parser.get_term_ancestors("CL:0000005") # doctest: +SKIP 122 ['CL:0000000', 'CL:0000057', ... 123 124 :param term_id: str ontology term to find ancestors for 125 :param include_self: boolean flag to include the term itself as an ancestor 126 :return: flattened List[str] of ancestor terms 127 """ 128 if term_id in VALID_NON_ONTOLOGY_TERMS: 129 return [] 130 ontology_name = self._parse_ontology_name(term_id) 131 ancestors = list(self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"].keys()) 132 return ancestors + [term_id] if include_self else ancestors 133 134 def map_term_ancestors(self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]: 135 """ 136 Get the ancestor ontology terms for each term in a list. If include_self is True, the term itself will be 137 included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology. 138 139 Example 140 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 141 >>> ontology_parser = OntologyParser() 142 >>> ontology_parser.map_term_ancestors(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP 143 { 144 'CL:0000003': ['CL:0000003'], 145 'CL:0000005': ['CL:0000005', 'CL:0000000', ...] 146 } 147 148 :param term_ids: list of str ontology terms to find ancestors for 149 :param include_self: boolean flag to include the term itself as an ancestor 150 :return: Dictionary mapping str term IDs to their respective flattened List[str] of ancestor terms. Maps to 151 empty 152 list if there are no ancestors. 153 """ 154 return {term_id: self.get_term_ancestors(term_id, include_self) for term_id in term_ids} 155 156 def get_term_ancestors_with_distances(self, term_id: str, include_self: bool = False) -> Dict[str, int]: 157 """ 158 Get the ancestor ontology terms for a given term, and their distance from the term_id. If include_self is True, 159 the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a 160 supported ontology. 161 162 Example 163 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 164 >>> ontology_parser = OntologyParser() 165 >>> ontology_parser.get_term_ancestors_with_distances("CL:0000005") # doctest: +SKIP 166 {'CL:0000057': 1, 'CL:0002320': 2, 'CL:0000000': 3} 167 168 :param term_id: str ontology term to find ancestors for 169 :param include_self: boolean flag to include the term itself as an ancestor 170 :return: Dict[str, int] map of ancestor terms and their respective distances from the term_id 171 """ 172 if term_id in VALID_NON_ONTOLOGY_TERMS: 173 return {} 174 ontology_name = self._parse_ontology_name(term_id) 175 ancestors: Dict[str, int] = self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"].copy() 176 return ancestors | {term_id: 0} if include_self else ancestors 177 178 def map_term_ancestors_with_distances( 179 self, term_ids: Iterable[str], include_self: bool = False 180 ) -> Dict[str, Dict[str, int]]: 181 """ 182 Get the ancestor ontology terms for each term in a list, and their distance from the term_id. If include_self is 183 True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a 184 supported ontology. 185 186 Example 187 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 188 >>> ontology_parser = OntologyParser() 189 >>> ontology_parser.map_term_ancestors_with_distances(["CL:0000003", "CL:0000005"], include_self=True) 190 {'CL:0000003': {'CL:0000003': 0}, 'CL:0000005': {'CL:0000057': 1, 'CL:0002320': 2, 'CL:0000000': 3, 191 'CL:0000005': 0}} 192 193 :param term_ids: list of str ontology terms to find ancestors for 194 :param include_self: boolean flag to include the term itself as an ancestor 195 :return: Dictionary mapping str term IDs to their respective Dict[str, int] map of ancestor terms and their 196 respective distances from the term_id 197 """ 198 return {term_id: self.get_term_ancestors_with_distances(term_id, include_self) for term_id in term_ids} 199 200 def get_term_parents(self, term_id: str) -> List[str]: 201 """ 202 Get the direct parent ontology terms for a given term. Raises ValueError if the term ID is not valid member of 203 a supported ontology. 204 205 Example 206 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 207 >>> ontology_parser = OntologyParser() 208 >>> ontology_parser.get_term_parents("CL:0000101") 209 ['CL:0000526'] 210 211 :param term_id: str ontology term to find parents for 212 :return: List[str] of parent terms 213 """ 214 if term_id in VALID_NON_ONTOLOGY_TERMS: 215 return [] 216 ontology_name = self._parse_ontology_name(term_id) 217 ancestors: Dict[str, int] = self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"] 218 parents: List[str] = [ancestor for ancestor, distance in ancestors.items() if distance == 1] 219 return parents 220 221 def get_distance_between_terms(self, term_id_1: str, term_id_2: str) -> int: 222 """ 223 Get the distance between two ontology terms. The distance is defined as the number of edges between the 224 two terms. Terms must be from the same ontology. Returns -1 if terms are disjoint. 225 Raises ValueError if term IDs are not valid members of a supported ontology. 226 227 :param term_id_1: str ontology term to find distance for 228 :param term_id_2: str ontology term to find distance for 229 :return: int distance between the two terms, measured in number of edges between their shortest path. 230 """ 231 lcas = self.get_lowest_common_ancestors(term_id_1, term_id_2) 232 if not lcas: 233 return -1 234 return int( 235 self.get_term_ancestors_with_distances(term_id_1, include_self=True)[lcas[0]] 236 + self.get_term_ancestors_with_distances(term_id_2, include_self=True)[lcas[0]] 237 ) 238 239 def get_lowest_common_ancestors(self, term_id_1: str, term_id_2: str) -> List[str]: 240 """ 241 Get the lowest common ancestors between two ontology terms that is from the given ontology. 242 Terms must be from the same ontology. Ontologies are DAGs, so there may be multiple lowest common ancestors. 243 Raises ValueError if term IDs are not valid members of a supported ontology. 244 245 :param term_id_1: str ontology term to find LCA for 246 :param term_id_2: str ontology term to find LCA for 247 :return: str term ID of the lowest common ancestor term 248 """ 249 # include path to term itself 250 ontology = self._parse_ontology_name(term_id_1) 251 if ontology != self._parse_ontology_name(term_id_2): 252 return [] 253 ancestors_1 = self.get_term_ancestors_with_distances(term_id_1, include_self=True) 254 ancestors_2 = self.get_term_ancestors_with_distances(term_id_2, include_self=True) 255 common_ancestors = set(ancestors_1.keys()) & set(ancestors_2.keys()) 256 min_sum_distances = float("inf") 257 for ancestors in common_ancestors: 258 sum_distances = ancestors_1[ancestors] + ancestors_2[ancestors] 259 if sum_distances < min_sum_distances: 260 min_sum_distances = sum_distances 261 return [ 262 ancestor 263 for ancestor in common_ancestors 264 if ancestors_1[ancestor] + ancestors_2[ancestor] == min_sum_distances 265 ] 266 267 def get_high_level_terms(self, term_id: str, high_level_terms: List[str]) -> List[str]: 268 """ 269 Get the high-level ontology terms for a given term. High-level terms are defined as the ancestors of the term 270 that are part of the high-level ontology terms supported by cellxgene-ontology-guide. If more than 1 271 high_level_term is matched, the returned list of matches preserves the order of the input high_level_terms list. 272 Raises ValueError if term ID is not valid member of a supported ontology. 273 274 Example 275 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 276 >>> ontology_parser = OntologyParser() 277 >>> ontology_parser.get_high_level_terms("CL:0000005", ["CL:0000000", "CL:0000001"]) 278 ['CL:0000000'] 279 280 :param term_id: str ontology term to find high-level terms for 281 :param high_level_terms: list of str ontology terms to check for ancestry to term_id 282 :return: List[str] of high-level terms that the term is a descendant of 283 """ 284 if term_id in VALID_NON_ONTOLOGY_TERMS: 285 return [] 286 ancestors = self.get_term_ancestors(term_id, include_self=True) 287 return [high_level_term for high_level_term in high_level_terms if high_level_term in ancestors] 288 289 def map_high_level_terms(self, term_ids: List[str], high_level_terms: List[str]) -> Dict[str, List[str]]: 290 """ 291 Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with 292 format 293 294 {"CL:0000003": ["CL:0000000", ...], "CL:0000005": ["CL:0000000", ...]} 295 296 Where each term_id is mapped to a List[str] of high-level terms that it is a descendant of. Includes self 297 as a descendant. Raises ValueError if term ID is not valid member of a supported ontology. 298 299 :param term_ids: list of str ontology terms to map high level terms for 300 :param high_level_terms: list of str ontology terms to be mapped to descendant term_ids 301 :return: Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list. 302 Each key maps to empty list if there are no ancestors among the provided input. 303 """ 304 return {term_id: self.get_high_level_terms(term_id, high_level_terms) for term_id in term_ids} 305 306 def get_highest_level_term(self, term_id: str, high_level_terms: List[str]) -> Union[str, None]: 307 """ 308 Get the highest level ontology term for a given term. The highest level term is defined as the ancestor of the 309 term that is part of the high-level ontology terms supported by cellxgene-ontology-guide. Raises ValueError 310 if term ID is not valid member of a supported ontology. 311 312 Example 313 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 314 >>> ontology_parser = OntologyParser() 315 >>> ontology_parser.get_highest_level_term("CL:0000005", ["CL:0000000", "CL:0000001"]) 316 'CL:0000000' 317 318 :param term_id: str ontology term to find highest level term for 319 :param high_level_terms: list of str ontology terms to check for ancestry to term_id 320 :return: str highest level term that the term is a descendant of, or None if it is not a descendant of any 321 high-level terms 322 """ 323 high_level_terms = self.get_high_level_terms(term_id, high_level_terms) 324 term_ancestors_and_distances = self.get_term_ancestors_with_distances(term_id, include_self=True) 325 if not high_level_terms: 326 return None 327 return max(high_level_terms, key=lambda high_level_term: term_ancestors_and_distances[high_level_term]) 328 329 def map_highest_level_term(self, term_ids: List[str], high_level_terms: List[str]) -> Dict[str, Union[str, None]]: 330 """ 331 Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with 332 format 333 334 {"CL:0000003": "CL:0000000", "CL:0000005": "CL:0000000"} 335 336 Where each term_id is mapped to the highest level term that it is a descendant of, from the list provided. 337 Includes term itself as a descendant. Maps to None if term_id does not map to any high level terms among the 338 provided input. Raises ValueError if term ID is not valid member of a supported ontology. 339 340 :param term_ids: list of str ontology terms to map high level terms for 341 :param high_level_terms: list of str ontology terms that can be mapped to descendant term_ids 342 :return: Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list. 343 Each key maps to empty list if there are no ancestors among the provided input. 344 """ 345 return {term_id: self.get_highest_level_term(term_id, high_level_terms) for term_id in term_ids} 346 347 def get_term_descendants(self, term_id: str, include_self: bool = False) -> List[str]: 348 """ 349 Get the descendant ontology terms for a given term. If include_self is True, the term itself will be included as 350 a descendant. Raises ValueError if term ID is not valid member of a supported ontology. 351 352 Example 353 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 354 >>> ontology_parser = OntologyParser() 355 >>> ontology_parser.get_term_descendants("CL:0000005") # doctest: +SKIP 356 ['CL:0002363'] 357 358 :param term_id: str ontology term to find descendants for 359 :param include_self: boolean flag to include the term itself as a descendant 360 :return: List[str] of descendant terms 361 """ 362 if term_id in VALID_NON_ONTOLOGY_TERMS: 363 return [] 364 ontology_name = self._parse_ontology_name(term_id) 365 descendants = [term_id] if include_self else [] 366 for candidate_descendant, candidate_metadata in self.cxg_schema.ontology(ontology_name).items(): 367 ancestors = candidate_metadata["ancestors"].keys() 368 if term_id in ancestors: 369 descendants.append(candidate_descendant) 370 return descendants 371 372 def map_term_descendants(self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]: 373 """ 374 Get the descendant ontology terms for each term in a list. If include_self is True, the term itself will be 375 included as a descendant. Raises ValueError if term ID is not valid member of a supported ontology. 376 377 Example 378 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 379 >>> ontology_parser = OntologyParser() 380 >>> ontology_parser.map_term_descendants(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP 381 { 382 'CL:0000003': ['CL:0000003', ...], 383 'CL:0000005': ['CL:0000005', 'CL:0002363', ...] 384 } 385 386 :param term_ids: list of str ontology terms to find descendants for 387 :param include_self: boolean flag to include the term itself as an descendant 388 :return: Dictionary mapping str term IDs to their respective flattened List[str] of descendant terms. Maps to 389 empty list if there are no descendants. 390 """ 391 descendants_dict: Dict[str, List[str]] = dict() 392 ontology_names = set() 393 for term_id in term_ids: 394 if term_id in VALID_NON_ONTOLOGY_TERMS: 395 descendants_dict[term_id] = [] 396 continue 397 ontology_name = self._parse_ontology_name(term_id) 398 descendants_dict[term_id] = [term_id] if include_self else [] 399 ontology_names.add(ontology_name) 400 401 for ontology in ontology_names: 402 for candidate_descendant, candidate_metadata in self.cxg_schema.ontology(ontology).items(): 403 for ancestor_id in descendants_dict: 404 ancestors = candidate_metadata["ancestors"].keys() 405 if ancestor_id in ancestors: 406 descendants_dict[ancestor_id].append(candidate_descendant) 407 408 return descendants_dict 409 410 def get_term_children(self, term_id: str) -> List[str]: 411 """ 412 Get the direct children ontology terms for a given term. Raises ValueError if term ID is not valid member of a 413 supported ontology. 414 415 Example 416 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 417 >>> ontology_parser = OntologyParser() 418 >>> ontology_parser.get_term_children("CL:0000526") 419 ['CL:0000101'] 420 421 :param term_id: str ontology term to find children for 422 :return: List[str] of children terms 423 """ 424 if term_id in VALID_NON_ONTOLOGY_TERMS: 425 return [] 426 ontology_name = self._parse_ontology_name(term_id) 427 children = [] 428 for candidate_child, candidate_metadata in self.cxg_schema.ontology(ontology_name).items(): 429 for ancestor, distance in candidate_metadata["ancestors"].items(): 430 if ancestor == term_id and distance == 1: 431 children.append(candidate_child) 432 return children 433 434 def get_term_graph(self, term_id: str) -> OntologyNode: 435 """ 436 Get the DAG of OntologyNode relationships, with the input term as the root node. Only includes terms from the 437 same ontology as the root term ID. Raises ValueError if term ID is not valid member of a supported ontology. 438 439 Example 440 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 441 >>> ontology_parser = OntologyParser() 442 >>> root_node = ontology_parser.get_term_graph("CL:0000000") 443 >>> root_node.term_id 444 'CL:0000000' 445 >>> root_node.to_dict() # doctest: +SKIP 446 { 447 "term_id": "CL:0000000", 448 "name": "cell A", 449 "children": [ 450 { 451 "term_id": "CL:0000001", 452 "name": "cell B", 453 "children": [...], 454 }, 455 { 456 "term_id": "CL:0000002", 457 "name": "cell C", 458 "children": [...], 459 }, 460 ... 461 ] 462 } 463 >>> root_node.term_counter # doctest: +SKIP 464 Counter({'CL:0002058': 48, 'CL:0002471': 48, ... 465 466 :param term_id: str ontology term to build subtree for 467 :return: OntologyNode representation of graph with term_id as root. 468 """ 469 term_label = self.get_term_label(term_id) 470 root = OntologyNode(term_id, term_label) 471 for child_term_id in self.get_term_children(term_id): 472 root.add_child(self.get_term_graph(child_term_id)) 473 return root 474 475 def is_term_deprecated(self, term_id: str) -> bool: 476 """ 477 Check if an ontology term is deprecated. Raises ValueError if term ID is not valid member of a supported 478 ontology. 479 480 Example 481 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 482 >>> ontology_parser = OntologyParser() 483 >>> ontology_parser.is_term_deprecated("CL:0000003") 484 True 485 486 :param term_id: str ontology term to check for deprecation 487 :return: boolean flag indicating whether the term is deprecated 488 """ 489 if term_id in VALID_NON_ONTOLOGY_TERMS: 490 return False 491 ontology_name = self._parse_ontology_name(term_id) 492 is_deprecated: bool = self.cxg_schema.ontology(ontology_name)[term_id].get("deprecated") 493 return is_deprecated 494 495 def get_term_replacement(self, term_id: str) -> Union[str, None]: 496 """ 497 Fetch the replacement term for a deprecated ontology term, if a replacement exists. Return None otherwise. 498 Raises ValueError if term ID is not valid member of a supported ontology. 499 500 Example 501 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 502 >>> ontology_parser = OntologyParser() 503 >>> ontology_parser.get_term_replacement("CL:0000003") 504 'CL:0000000' 505 506 :param term_id: str ontology term to check a replacement term for 507 :return: replacement str term ID if it exists, None otherwise 508 """ 509 if term_id in VALID_NON_ONTOLOGY_TERMS: 510 return None 511 ontology_name = self._parse_ontology_name(term_id) 512 replaced_by: str = self.cxg_schema.ontology(ontology_name)[term_id].get("replaced_by") 513 return replaced_by if replaced_by else None 514 515 def get_term_metadata(self, term_id: str) -> Dict[str, Any]: 516 """ 517 Fetch metadata for a given ontology term. Returns a dict with format 518 519 {"comments": ["...", ...], "term_tracker": "...", "consider": ["...", ...]} 520 521 Comments maps to List[str] of ontology curator comments 522 Term Tracker maps to a str url where there is discussion around this term's curation (or deprecation). 523 Consider maps to List[str] of alternate ontology terms to consider using instead of this term 524 525 All keys map to None if no metadata of that type is present. Raises ValueError if term ID is not valid member 526 of a supported ontology. 527 528 :param term_id: str ontology term to fetch metadata for 529 :return: Dict with keys 'Comments', 'Term Tracker', and 'Consider' containing associated metadata. 530 """ 531 if term_id in VALID_NON_ONTOLOGY_TERMS: 532 return {"comments": None, "term_tracker": None, "consider": None} 533 ontology_name = self._parse_ontology_name(term_id) 534 return { 535 key: self.cxg_schema.ontology(ontology_name)[term_id].get(key, None) 536 for key in {"comments", "term_tracker", "consider"} 537 } 538 539 def get_term_label(self, term_id: str) -> str: 540 """ 541 Fetch the human-readable label for a given ontology term. Raises ValueError if term ID is not valid member of a 542 supported ontology. 543 544 Example 545 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 546 >>> ontology_parser = OntologyParser() 547 >>> ontology_parser.get_term_label("CL:0000005") 548 'neural crest derived fibroblast' 549 550 :param term_id: str ontology term to fetch label for 551 :return: str human-readable label for the term 552 """ 553 if term_id in VALID_NON_ONTOLOGY_TERMS: 554 return term_id 555 ontology_name = self._parse_ontology_name(term_id) 556 label: str = self.cxg_schema.ontology(ontology_name)[term_id]["label"] 557 return label 558 559 def map_term_labels(self, term_ids: Iterable[str]) -> Dict[str, str]: 560 """ 561 Fetch the human-readable label for a given list of ontology terms. Raises ValueError if term ID is not valid 562 member of a supported ontology. 563 564 Example 565 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 566 >>> ontology_parser = OntologyParser() 567 >>> ontology_parser.map_term_labels(["CL:0000005", "CL:0000003"]) 568 {'CL:0000005': 'neural crest derived fibroblast', 'CL:0000003': 'obsolete native cell'} 569 570 :param term_ids: list of str ontology terms to fetch label for 571 :return: Dict[str, str] mapping term IDs to their respective human-readable labels 572 """ 573 return {term_id: self.get_term_label(term_id) for term_id in term_ids} 574 575 def get_term_description(self, term_id: str) -> Optional[str]: 576 """ 577 Fetch the description for a given ontology term. Raises ValueError if term ID is not valid member of a 578 supported ontology. 579 580 Example 581 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 582 >>> ontology_parser = OntologyParser() 583 >>> ontology_parser.get_term_description("CL:0000005") 584 'Any fibroblast that is derived from the neural crest.' 585 586 :param term_id: str ontology term to fetch description for 587 :return: str description for the term 588 """ 589 if term_id in VALID_NON_ONTOLOGY_TERMS: 590 return term_id 591 ontology_name = self._parse_ontology_name(term_id) 592 description: Optional[str] = self.cxg_schema.ontology(ontology_name)[term_id].get("description", None) 593 return description 594 595 def map_term_descriptions(self, term_ids: List[str]) -> Dict[str, Optional[str]]: 596 """ 597 Fetch the descriptions for a given list of ontology terms. Raises ValueError if term ID is not valid member of 598 a supported ontology. 599 600 Example 601 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 602 >>> ontology_parser = OntologyParser() 603 >>> ontology_parser.map_term_descriptions(["CL:0000005", "CL:0000146"]) 604 {'CL:0000005': 'Any fibroblast that is derived from the neural crest.', 'CL:0000146': None} 605 606 :param term_ids: list of str ontology terms to fetch descriptions for 607 :return: Dict[str, str] mapping term IDs to their respective descriptions 608 """ 609 return {term_id: self.get_term_description(term_id) for term_id in term_ids} 610 611 def get_term_synonyms(self, term_id: str) -> List[str]: 612 """ 613 Fetch a list of synonym labels for a given ontology term. Returns empty list if no synonyms found. 614 Raises ValueError if term ID is not valid member of a supported ontology. 615 616 Example 617 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 618 >>> ontology_parser = OntologyParser() 619 >>> ontology_parser.get_term_synonyms("CL:0000019") 620 ['sperm cell', 'spermatozoid', 'spermatozoon'] 621 622 :param term_id: str ontology term to fetch synonyms for 623 :return: List[str] synonyms for the term 624 """ 625 if term_id in VALID_NON_ONTOLOGY_TERMS: 626 return [] 627 ontology_name = self._parse_ontology_name(term_id) 628 synonyms: List[str] = list(self.cxg_schema.ontology(ontology_name)[term_id].get("synonyms", [])) 629 return synonyms 630 631 def map_term_synonyms(self, term_ids: List[str]) -> Dict[str, List[str]]: 632 """ 633 Fetch the synonym labels for a given list of ontology terms. Raises ValueError if term ID is not valid member of 634 a supported ontology. 635 636 Example 637 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 638 >>> ontology_parser = OntologyParser() 639 >>> ontology_parser.map_term_synonyms(["CL:0000005", "CL:0000019"]) 640 {'CL:0000005': ['fibroblast neural crest derived'], 'CL:0000019': ['sperm cell', 'spermatozoid', 'spermatozoon']} 641 642 :param term_ids: list of str ontology terms to fetch synonyms for 643 :return: Dict[str, List[str]] mapping term IDs to their respective synonym lists 644 """ 645 return {term_id: self.get_term_synonyms(term_id) for term_id in term_ids} 646 647 def get_term_id_by_label(self, term_label: str, ontology_name: str) -> Optional[str]: 648 """ 649 Fetch the ontology term ID from a given human-readable label. Filters by ontology_name. Raises ValueError if 650 ontology_name is not a supported ontology. 651 652 Returns None if term ID is not valid member of a supported ontology. 653 654 Example 655 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 656 >>> ontology_parser = OntologyParser() 657 >>> ontology_parser.get_term_id_by_label("neural crest derived fibroblast", "CL") 658 'CL:0000005' 659 660 :param term_label: str human-readable label to fetch term ID for 661 :param ontology_name: str name of ontology to search for term label in 662 :return: Optional[str] term IDs with that label, or None if the label is not found in the ontology 663 """ 664 ontology_term_label_to_id_map = self.get_term_label_to_id_map(ontology_name) 665 return ontology_term_label_to_id_map.get(term_label) 666 667 def get_bridge_term_id(self, term_id: str, cross_ontology: str) -> Optional[str]: 668 """ 669 For a given term ID, fetch the equivalent term ID from a given ontology. Only returns exact match if it exists. 670 671 If no applicable match is found, returns None. 672 673 Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology. 674 675 Example 676 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 677 >>> ontology_parser = OntologyParser() 678 >>> ontology_parser.get_bridge_term_id("FBbt:00000001", "UBERON") 679 'UBERON:0000468' 680 681 :param term_id: str ontology term to find equivalent term for 682 :param cross_ontology: str name of ontology to search for equivalent term in 683 :return: Optional[str] equivalent term ID from the cross_ontology 684 """ 685 if cross_ontology not in self.cxg_schema.cross_ontology_mappings: 686 raise ValueError( 687 f"{cross_ontology} is not in the set of supported cross ontology mappings " 688 f"{self.cxg_schema.cross_ontology_mappings}." 689 ) 690 ontology_name = self._parse_ontology_name(term_id) 691 cross_ontology_terms = self.cxg_schema.ontology(ontology_name)[term_id].get("cross_ontology_terms") 692 bridge_term_id: Optional[str] = None 693 if cross_ontology_terms: 694 bridge_term_id = cross_ontology_terms.get(cross_ontology) 695 return bridge_term_id 696 697 def get_closest_bridge_term_ids(self, term_id: str, cross_ontology: str) -> List[str]: 698 """ 699 For a given term ID, fetch the equivalent term ID from a given ontology. If match is found, 700 returns a list of 1 with the exact match. If no exact match is found, traverses the ancestors 701 of the term for the closest match. 702 703 If no applicable match is found, returns an empty list. 704 705 If multiple ancestors of the same distance have matches, returns all possible closest matches. 706 707 Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology. 708 709 Example 710 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 711 >>> ontology_parser = OntologyParser() 712 >>> ontology_parser.get_closest_bridge_term_ids("FBbt:00000039", "UBERON") 713 ['UBERON:0000476', 'UBERON:0000920'] 714 715 :param term_id: str ontology term to find closest term for 716 :param cross_ontology: str name of ontology to search for closest term in 717 :return: List[str] list of closest term IDs from the cross_ontology 718 """ 719 closest_bridge_terms: List[str] = [] 720 terms_to_match = [term_id] 721 while terms_to_match and not closest_bridge_terms: 722 for term in terms_to_match: 723 if closest_bridge_term := self.get_bridge_term_id(term, cross_ontology): 724 closest_bridge_terms.append(closest_bridge_term) 725 terms_to_match = [parent for child in terms_to_match for parent in self.get_term_parents(child)] 726 return closest_bridge_terms
An object to parse ontology term metadata from ontologies corresponding to a given CellxGene Schema Version.
18 def __init__(self, schema_version: Optional[str] = None): 19 """ 20 Initialize an OntologyParser object with the ontology metadata corresponding to the given CellxGene schema 21 version. If not cached, it will make a network call to GitHub Release Assets to load in memory and 22 parse the corresponding ontology metadata. 23 24 :param schema_version: str version of the schema to load ontology metadata for. If not provided, the latest 25 is loaded. 26 """ 27 self.cxg_schema = CXGSchema(version=schema_version) if schema_version else CXGSchema() 28 self.term_label_to_id_map: Dict[str, Dict[str, str]] = { 29 ontology_name: dict() for ontology_name in self.cxg_schema.supported_ontologies 30 }
Initialize an OntologyParser object with the ontology metadata corresponding to the given CellxGene schema version. If not cached, it will make a network call to GitHub Release Assets to load in memory and parse the corresponding ontology metadata.
Parameters
- schema_version: str version of the schema to load ontology metadata for. If not provided, the latest is loaded.
32 def get_term_label_to_id_map(self, ontology_name: str) -> Dict[str, str]: 33 """ 34 Fetch the mapping of term labels to term IDs for a given ontology. Caches generated maps by ontology_name. 35 36 Example 37 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 38 >>> ontology_parser = OntologyParser() 39 >>> ontology_parser.get_term_label_to_id_map("CL") # doctest: +SKIP 40 {'Label A': 'CL:0000000', ... } 41 42 :param ontology_name: str name of ontology to get map of term labels to term IDs 43 """ 44 supported_ontology_name: Optional[str] = self._get_supported_ontology_name(ontology_name) 45 if not supported_ontology_name: 46 raise ValueError(f"{supported_ontology_name} is not a supported ontology, its metadata cannot be fetched.") 47 48 if self.term_label_to_id_map[supported_ontology_name]: 49 return self.term_label_to_id_map[supported_ontology_name].copy() 50 51 for term_id, term_metadata in self.cxg_schema.ontology(supported_ontology_name).items(): 52 self.term_label_to_id_map[supported_ontology_name][term_metadata["label"]] = term_id 53 54 return self.term_label_to_id_map[supported_ontology_name].copy()
Fetch the mapping of term labels to term IDs for a given ontology. Caches generated maps by ontology_name.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_label_to_id_map("CL") # doctest: +SKIP
{'Label A': 'CL:0000000', ... }
Parameters
- ontology_name: str name of ontology to get map of term labels to term IDs
93 def is_valid_term_id(self, term_id: str, ontology: Optional[str] = None) -> bool: 94 """ 95 Check if an ontology term ID is valid and defined in a supported ontology. If deprecated but defined 96 in the ontology, it is considered valid. Optionally, specify an ontology to check against, and determine 97 if the term is defined in that particular ontology. Otherwise, checks if term is valid in any supported ontology 98 99 :param term_id: str ontology term to check 100 :param ontology: str name of ontology to check against 101 :return: boolean flag indicating whether the term is supported 102 """ 103 try: 104 ontology_name = self._parse_ontology_name(term_id) 105 if ontology and ontology_name != ontology: 106 return False 107 if term_id in self.cxg_schema.ontology(ontology_name): 108 return True 109 except ValueError: 110 return False 111 return False
Check if an ontology term ID is valid and defined in a supported ontology. If deprecated but defined in the ontology, it is considered valid. Optionally, specify an ontology to check against, and determine if the term is defined in that particular ontology. Otherwise, checks if term is valid in any supported ontology
Parameters
- term_id: str ontology term to check
- ontology: str name of ontology to check against
Returns
boolean flag indicating whether the term is supported
113 def get_term_ancestors(self, term_id: str, include_self: bool = False) -> List[str]: 114 """ 115 Get the ancestor ontology terms for a given term. If include_self is True, the term itself will be included as 116 an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology. 117 118 Example 119 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 120 >>> ontology_parser = OntologyParser() 121 >>> ontology_parser.get_term_ancestors("CL:0000005") # doctest: +SKIP 122 ['CL:0000000', 'CL:0000057', ... 123 124 :param term_id: str ontology term to find ancestors for 125 :param include_self: boolean flag to include the term itself as an ancestor 126 :return: flattened List[str] of ancestor terms 127 """ 128 if term_id in VALID_NON_ONTOLOGY_TERMS: 129 return [] 130 ontology_name = self._parse_ontology_name(term_id) 131 ancestors = list(self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"].keys()) 132 return ancestors + [term_id] if include_self else ancestors
Get the ancestor ontology terms for a given term. If include_self is True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_ancestors("CL:0000005") # doctest: +SKIP
['CL:0000000', 'CL:0000057', ...
Parameters
- term_id: str ontology term to find ancestors for
- include_self: boolean flag to include the term itself as an ancestor
Returns
flattened List[str] of ancestor terms
134 def map_term_ancestors(self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]: 135 """ 136 Get the ancestor ontology terms for each term in a list. If include_self is True, the term itself will be 137 included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology. 138 139 Example 140 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 141 >>> ontology_parser = OntologyParser() 142 >>> ontology_parser.map_term_ancestors(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP 143 { 144 'CL:0000003': ['CL:0000003'], 145 'CL:0000005': ['CL:0000005', 'CL:0000000', ...] 146 } 147 148 :param term_ids: list of str ontology terms to find ancestors for 149 :param include_self: boolean flag to include the term itself as an ancestor 150 :return: Dictionary mapping str term IDs to their respective flattened List[str] of ancestor terms. Maps to 151 empty 152 list if there are no ancestors. 153 """ 154 return {term_id: self.get_term_ancestors(term_id, include_self) for term_id in term_ids}
Get the ancestor ontology terms for each term in a list. If include_self is True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.map_term_ancestors(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP
{
'CL:0000003': ['CL:0000003'],
'CL:0000005': ['CL:0000005', 'CL:0000000', ...]
}
Parameters
- term_ids: list of str ontology terms to find ancestors for
- include_self: boolean flag to include the term itself as an ancestor
Returns
Dictionary mapping str term IDs to their respective flattened List[str] of ancestor terms. Maps to empty list if there are no ancestors.
156 def get_term_ancestors_with_distances(self, term_id: str, include_self: bool = False) -> Dict[str, int]: 157 """ 158 Get the ancestor ontology terms for a given term, and their distance from the term_id. If include_self is True, 159 the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a 160 supported ontology. 161 162 Example 163 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 164 >>> ontology_parser = OntologyParser() 165 >>> ontology_parser.get_term_ancestors_with_distances("CL:0000005") # doctest: +SKIP 166 {'CL:0000057': 1, 'CL:0002320': 2, 'CL:0000000': 3} 167 168 :param term_id: str ontology term to find ancestors for 169 :param include_self: boolean flag to include the term itself as an ancestor 170 :return: Dict[str, int] map of ancestor terms and their respective distances from the term_id 171 """ 172 if term_id in VALID_NON_ONTOLOGY_TERMS: 173 return {} 174 ontology_name = self._parse_ontology_name(term_id) 175 ancestors: Dict[str, int] = self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"].copy() 176 return ancestors | {term_id: 0} if include_self else ancestors
Get the ancestor ontology terms for a given term, and their distance from the term_id. If include_self is True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_ancestors_with_distances("CL:0000005") # doctest: +SKIP
{'CL:0000057': 1, 'CL:0002320': 2, 'CL:0000000': 3}
Parameters
- term_id: str ontology term to find ancestors for
- include_self: boolean flag to include the term itself as an ancestor
Returns
Dict[str, int] map of ancestor terms and their respective distances from the term_id
178 def map_term_ancestors_with_distances( 179 self, term_ids: Iterable[str], include_self: bool = False 180 ) -> Dict[str, Dict[str, int]]: 181 """ 182 Get the ancestor ontology terms for each term in a list, and their distance from the term_id. If include_self is 183 True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a 184 supported ontology. 185 186 Example 187 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 188 >>> ontology_parser = OntologyParser() 189 >>> ontology_parser.map_term_ancestors_with_distances(["CL:0000003", "CL:0000005"], include_self=True) 190 {'CL:0000003': {'CL:0000003': 0}, 'CL:0000005': {'CL:0000057': 1, 'CL:0002320': 2, 'CL:0000000': 3, 191 'CL:0000005': 0}} 192 193 :param term_ids: list of str ontology terms to find ancestors for 194 :param include_self: boolean flag to include the term itself as an ancestor 195 :return: Dictionary mapping str term IDs to their respective Dict[str, int] map of ancestor terms and their 196 respective distances from the term_id 197 """ 198 return {term_id: self.get_term_ancestors_with_distances(term_id, include_self) for term_id in term_ids}
Get the ancestor ontology terms for each term in a list, and their distance from the term_id. If include_self is True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.map_term_ancestors_with_distances(["CL:0000003", "CL:0000005"], include_self=True)
{'CL:0000003': {'CL:0000003': 0}, 'CL:0000005': {'CL:0000057': 1, 'CL:0002320': 2, 'CL:0000000': 3,
'CL:0000005': 0}}
Parameters
- term_ids: list of str ontology terms to find ancestors for
- include_self: boolean flag to include the term itself as an ancestor
Returns
Dictionary mapping str term IDs to their respective Dict[str, int] map of ancestor terms and their respective distances from the term_id
200 def get_term_parents(self, term_id: str) -> List[str]: 201 """ 202 Get the direct parent ontology terms for a given term. Raises ValueError if the term ID is not valid member of 203 a supported ontology. 204 205 Example 206 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 207 >>> ontology_parser = OntologyParser() 208 >>> ontology_parser.get_term_parents("CL:0000101") 209 ['CL:0000526'] 210 211 :param term_id: str ontology term to find parents for 212 :return: List[str] of parent terms 213 """ 214 if term_id in VALID_NON_ONTOLOGY_TERMS: 215 return [] 216 ontology_name = self._parse_ontology_name(term_id) 217 ancestors: Dict[str, int] = self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"] 218 parents: List[str] = [ancestor for ancestor, distance in ancestors.items() if distance == 1] 219 return parents
Get the direct parent ontology terms for a given term. Raises ValueError if the term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_parents("CL:0000101")
['CL:0000526']
Parameters
- term_id: str ontology term to find parents for
Returns
List[str] of parent terms
221 def get_distance_between_terms(self, term_id_1: str, term_id_2: str) -> int: 222 """ 223 Get the distance between two ontology terms. The distance is defined as the number of edges between the 224 two terms. Terms must be from the same ontology. Returns -1 if terms are disjoint. 225 Raises ValueError if term IDs are not valid members of a supported ontology. 226 227 :param term_id_1: str ontology term to find distance for 228 :param term_id_2: str ontology term to find distance for 229 :return: int distance between the two terms, measured in number of edges between their shortest path. 230 """ 231 lcas = self.get_lowest_common_ancestors(term_id_1, term_id_2) 232 if not lcas: 233 return -1 234 return int( 235 self.get_term_ancestors_with_distances(term_id_1, include_self=True)[lcas[0]] 236 + self.get_term_ancestors_with_distances(term_id_2, include_self=True)[lcas[0]] 237 )
Get the distance between two ontology terms. The distance is defined as the number of edges between the two terms. Terms must be from the same ontology. Returns -1 if terms are disjoint. Raises ValueError if term IDs are not valid members of a supported ontology.
Parameters
- term_id_1: str ontology term to find distance for
- term_id_2: str ontology term to find distance for
Returns
int distance between the two terms, measured in number of edges between their shortest path.
239 def get_lowest_common_ancestors(self, term_id_1: str, term_id_2: str) -> List[str]: 240 """ 241 Get the lowest common ancestors between two ontology terms that is from the given ontology. 242 Terms must be from the same ontology. Ontologies are DAGs, so there may be multiple lowest common ancestors. 243 Raises ValueError if term IDs are not valid members of a supported ontology. 244 245 :param term_id_1: str ontology term to find LCA for 246 :param term_id_2: str ontology term to find LCA for 247 :return: str term ID of the lowest common ancestor term 248 """ 249 # include path to term itself 250 ontology = self._parse_ontology_name(term_id_1) 251 if ontology != self._parse_ontology_name(term_id_2): 252 return [] 253 ancestors_1 = self.get_term_ancestors_with_distances(term_id_1, include_self=True) 254 ancestors_2 = self.get_term_ancestors_with_distances(term_id_2, include_self=True) 255 common_ancestors = set(ancestors_1.keys()) & set(ancestors_2.keys()) 256 min_sum_distances = float("inf") 257 for ancestors in common_ancestors: 258 sum_distances = ancestors_1[ancestors] + ancestors_2[ancestors] 259 if sum_distances < min_sum_distances: 260 min_sum_distances = sum_distances 261 return [ 262 ancestor 263 for ancestor in common_ancestors 264 if ancestors_1[ancestor] + ancestors_2[ancestor] == min_sum_distances 265 ]
Get the lowest common ancestors between two ontology terms that is from the given ontology. Terms must be from the same ontology. Ontologies are DAGs, so there may be multiple lowest common ancestors. Raises ValueError if term IDs are not valid members of a supported ontology.
Parameters
- term_id_1: str ontology term to find LCA for
- term_id_2: str ontology term to find LCA for
Returns
str term ID of the lowest common ancestor term
267 def get_high_level_terms(self, term_id: str, high_level_terms: List[str]) -> List[str]: 268 """ 269 Get the high-level ontology terms for a given term. High-level terms are defined as the ancestors of the term 270 that are part of the high-level ontology terms supported by cellxgene-ontology-guide. If more than 1 271 high_level_term is matched, the returned list of matches preserves the order of the input high_level_terms list. 272 Raises ValueError if term ID is not valid member of a supported ontology. 273 274 Example 275 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 276 >>> ontology_parser = OntologyParser() 277 >>> ontology_parser.get_high_level_terms("CL:0000005", ["CL:0000000", "CL:0000001"]) 278 ['CL:0000000'] 279 280 :param term_id: str ontology term to find high-level terms for 281 :param high_level_terms: list of str ontology terms to check for ancestry to term_id 282 :return: List[str] of high-level terms that the term is a descendant of 283 """ 284 if term_id in VALID_NON_ONTOLOGY_TERMS: 285 return [] 286 ancestors = self.get_term_ancestors(term_id, include_self=True) 287 return [high_level_term for high_level_term in high_level_terms if high_level_term in ancestors]
Get the high-level ontology terms for a given term. High-level terms are defined as the ancestors of the term that are part of the high-level ontology terms supported by cellxgene-ontology-guide. If more than 1 high_level_term is matched, the returned list of matches preserves the order of the input high_level_terms list. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_high_level_terms("CL:0000005", ["CL:0000000", "CL:0000001"])
['CL:0000000']
Parameters
- term_id: str ontology term to find high-level terms for
- high_level_terms: list of str ontology terms to check for ancestry to term_id
Returns
List[str] of high-level terms that the term is a descendant of
289 def map_high_level_terms(self, term_ids: List[str], high_level_terms: List[str]) -> Dict[str, List[str]]: 290 """ 291 Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with 292 format 293 294 {"CL:0000003": ["CL:0000000", ...], "CL:0000005": ["CL:0000000", ...]} 295 296 Where each term_id is mapped to a List[str] of high-level terms that it is a descendant of. Includes self 297 as a descendant. Raises ValueError if term ID is not valid member of a supported ontology. 298 299 :param term_ids: list of str ontology terms to map high level terms for 300 :param high_level_terms: list of str ontology terms to be mapped to descendant term_ids 301 :return: Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list. 302 Each key maps to empty list if there are no ancestors among the provided input. 303 """ 304 return {term_id: self.get_high_level_terms(term_id, high_level_terms) for term_id in term_ids}
Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with format
{"CL:0000003": ["CL:0000000", ...], "CL:0000005": ["CL:0000000", ...]}
Where each term_id is mapped to a List[str] of high-level terms that it is a descendant of. Includes self as a descendant. Raises ValueError if term ID is not valid member of a supported ontology.
Parameters
- term_ids: list of str ontology terms to map high level terms for
- high_level_terms: list of str ontology terms to be mapped to descendant term_ids
Returns
Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list. Each key maps to empty list if there are no ancestors among the provided input.
306 def get_highest_level_term(self, term_id: str, high_level_terms: List[str]) -> Union[str, None]: 307 """ 308 Get the highest level ontology term for a given term. The highest level term is defined as the ancestor of the 309 term that is part of the high-level ontology terms supported by cellxgene-ontology-guide. Raises ValueError 310 if term ID is not valid member of a supported ontology. 311 312 Example 313 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 314 >>> ontology_parser = OntologyParser() 315 >>> ontology_parser.get_highest_level_term("CL:0000005", ["CL:0000000", "CL:0000001"]) 316 'CL:0000000' 317 318 :param term_id: str ontology term to find highest level term for 319 :param high_level_terms: list of str ontology terms to check for ancestry to term_id 320 :return: str highest level term that the term is a descendant of, or None if it is not a descendant of any 321 high-level terms 322 """ 323 high_level_terms = self.get_high_level_terms(term_id, high_level_terms) 324 term_ancestors_and_distances = self.get_term_ancestors_with_distances(term_id, include_self=True) 325 if not high_level_terms: 326 return None 327 return max(high_level_terms, key=lambda high_level_term: term_ancestors_and_distances[high_level_term])
Get the highest level ontology term for a given term. The highest level term is defined as the ancestor of the term that is part of the high-level ontology terms supported by cellxgene-ontology-guide. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_highest_level_term("CL:0000005", ["CL:0000000", "CL:0000001"])
'CL:0000000'
Parameters
- term_id: str ontology term to find highest level term for
- high_level_terms: list of str ontology terms to check for ancestry to term_id
Returns
str highest level term that the term is a descendant of, or None if it is not a descendant of any high-level terms
329 def map_highest_level_term(self, term_ids: List[str], high_level_terms: List[str]) -> Dict[str, Union[str, None]]: 330 """ 331 Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with 332 format 333 334 {"CL:0000003": "CL:0000000", "CL:0000005": "CL:0000000"} 335 336 Where each term_id is mapped to the highest level term that it is a descendant of, from the list provided. 337 Includes term itself as a descendant. Maps to None if term_id does not map to any high level terms among the 338 provided input. Raises ValueError if term ID is not valid member of a supported ontology. 339 340 :param term_ids: list of str ontology terms to map high level terms for 341 :param high_level_terms: list of str ontology terms that can be mapped to descendant term_ids 342 :return: Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list. 343 Each key maps to empty list if there are no ancestors among the provided input. 344 """ 345 return {term_id: self.get_highest_level_term(term_id, high_level_terms) for term_id in term_ids}
Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with format
{"CL:0000003": "CL:0000000", "CL:0000005": "CL:0000000"}
Where each term_id is mapped to the highest level term that it is a descendant of, from the list provided. Includes term itself as a descendant. Maps to None if term_id does not map to any high level terms among the provided input. Raises ValueError if term ID is not valid member of a supported ontology.
Parameters
- term_ids: list of str ontology terms to map high level terms for
- high_level_terms: list of str ontology terms that can be mapped to descendant term_ids
Returns
Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list. Each key maps to empty list if there are no ancestors among the provided input.
347 def get_term_descendants(self, term_id: str, include_self: bool = False) -> List[str]: 348 """ 349 Get the descendant ontology terms for a given term. If include_self is True, the term itself will be included as 350 a descendant. Raises ValueError if term ID is not valid member of a supported ontology. 351 352 Example 353 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 354 >>> ontology_parser = OntologyParser() 355 >>> ontology_parser.get_term_descendants("CL:0000005") # doctest: +SKIP 356 ['CL:0002363'] 357 358 :param term_id: str ontology term to find descendants for 359 :param include_self: boolean flag to include the term itself as a descendant 360 :return: List[str] of descendant terms 361 """ 362 if term_id in VALID_NON_ONTOLOGY_TERMS: 363 return [] 364 ontology_name = self._parse_ontology_name(term_id) 365 descendants = [term_id] if include_self else [] 366 for candidate_descendant, candidate_metadata in self.cxg_schema.ontology(ontology_name).items(): 367 ancestors = candidate_metadata["ancestors"].keys() 368 if term_id in ancestors: 369 descendants.append(candidate_descendant) 370 return descendants
Get the descendant ontology terms for a given term. If include_self is True, the term itself will be included as a descendant. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_descendants("CL:0000005") # doctest: +SKIP
['CL:0002363']
Parameters
- term_id: str ontology term to find descendants for
- include_self: boolean flag to include the term itself as a descendant
Returns
List[str] of descendant terms
372 def map_term_descendants(self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]: 373 """ 374 Get the descendant ontology terms for each term in a list. If include_self is True, the term itself will be 375 included as a descendant. Raises ValueError if term ID is not valid member of a supported ontology. 376 377 Example 378 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 379 >>> ontology_parser = OntologyParser() 380 >>> ontology_parser.map_term_descendants(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP 381 { 382 'CL:0000003': ['CL:0000003', ...], 383 'CL:0000005': ['CL:0000005', 'CL:0002363', ...] 384 } 385 386 :param term_ids: list of str ontology terms to find descendants for 387 :param include_self: boolean flag to include the term itself as an descendant 388 :return: Dictionary mapping str term IDs to their respective flattened List[str] of descendant terms. Maps to 389 empty list if there are no descendants. 390 """ 391 descendants_dict: Dict[str, List[str]] = dict() 392 ontology_names = set() 393 for term_id in term_ids: 394 if term_id in VALID_NON_ONTOLOGY_TERMS: 395 descendants_dict[term_id] = [] 396 continue 397 ontology_name = self._parse_ontology_name(term_id) 398 descendants_dict[term_id] = [term_id] if include_self else [] 399 ontology_names.add(ontology_name) 400 401 for ontology in ontology_names: 402 for candidate_descendant, candidate_metadata in self.cxg_schema.ontology(ontology).items(): 403 for ancestor_id in descendants_dict: 404 ancestors = candidate_metadata["ancestors"].keys() 405 if ancestor_id in ancestors: 406 descendants_dict[ancestor_id].append(candidate_descendant) 407 408 return descendants_dict
Get the descendant ontology terms for each term in a list. If include_self is True, the term itself will be included as a descendant. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.map_term_descendants(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP
{
'CL:0000003': ['CL:0000003', ...],
'CL:0000005': ['CL:0000005', 'CL:0002363', ...]
}
Parameters
- term_ids: list of str ontology terms to find descendants for
- include_self: boolean flag to include the term itself as an descendant
Returns
Dictionary mapping str term IDs to their respective flattened List[str] of descendant terms. Maps to empty list if there are no descendants.
410 def get_term_children(self, term_id: str) -> List[str]: 411 """ 412 Get the direct children ontology terms for a given term. Raises ValueError if term ID is not valid member of a 413 supported ontology. 414 415 Example 416 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 417 >>> ontology_parser = OntologyParser() 418 >>> ontology_parser.get_term_children("CL:0000526") 419 ['CL:0000101'] 420 421 :param term_id: str ontology term to find children for 422 :return: List[str] of children terms 423 """ 424 if term_id in VALID_NON_ONTOLOGY_TERMS: 425 return [] 426 ontology_name = self._parse_ontology_name(term_id) 427 children = [] 428 for candidate_child, candidate_metadata in self.cxg_schema.ontology(ontology_name).items(): 429 for ancestor, distance in candidate_metadata["ancestors"].items(): 430 if ancestor == term_id and distance == 1: 431 children.append(candidate_child) 432 return children
Get the direct children ontology terms for a given term. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_children("CL:0000526")
['CL:0000101']
Parameters
- term_id: str ontology term to find children for
Returns
List[str] of children terms
434 def get_term_graph(self, term_id: str) -> OntologyNode: 435 """ 436 Get the DAG of OntologyNode relationships, with the input term as the root node. Only includes terms from the 437 same ontology as the root term ID. Raises ValueError if term ID is not valid member of a supported ontology. 438 439 Example 440 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 441 >>> ontology_parser = OntologyParser() 442 >>> root_node = ontology_parser.get_term_graph("CL:0000000") 443 >>> root_node.term_id 444 'CL:0000000' 445 >>> root_node.to_dict() # doctest: +SKIP 446 { 447 "term_id": "CL:0000000", 448 "name": "cell A", 449 "children": [ 450 { 451 "term_id": "CL:0000001", 452 "name": "cell B", 453 "children": [...], 454 }, 455 { 456 "term_id": "CL:0000002", 457 "name": "cell C", 458 "children": [...], 459 }, 460 ... 461 ] 462 } 463 >>> root_node.term_counter # doctest: +SKIP 464 Counter({'CL:0002058': 48, 'CL:0002471': 48, ... 465 466 :param term_id: str ontology term to build subtree for 467 :return: OntologyNode representation of graph with term_id as root. 468 """ 469 term_label = self.get_term_label(term_id) 470 root = OntologyNode(term_id, term_label) 471 for child_term_id in self.get_term_children(term_id): 472 root.add_child(self.get_term_graph(child_term_id)) 473 return root
Get the DAG of OntologyNode relationships, with the input term as the root node. Only includes terms from the same ontology as the root term ID. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> root_node = ontology_parser.get_term_graph("CL:0000000")
>>> root_node.term_id
'CL:0000000'
>>> root_node.to_dict() # doctest: +SKIP
{
"term_id": "CL:0000000",
"name": "cell A",
"children": [
{
"term_id": "CL:0000001",
"name": "cell B",
"children": [...],
},
{
"term_id": "CL:0000002",
"name": "cell C",
"children": [...],
},
...
]
}
>>> root_node.term_counter # doctest: +SKIP
Counter({'CL:0002058': 48, 'CL:0002471': 48, ...
Parameters
- term_id: str ontology term to build subtree for
Returns
OntologyNode representation of graph with term_id as root.
475 def is_term_deprecated(self, term_id: str) -> bool: 476 """ 477 Check if an ontology term is deprecated. Raises ValueError if term ID is not valid member of a supported 478 ontology. 479 480 Example 481 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 482 >>> ontology_parser = OntologyParser() 483 >>> ontology_parser.is_term_deprecated("CL:0000003") 484 True 485 486 :param term_id: str ontology term to check for deprecation 487 :return: boolean flag indicating whether the term is deprecated 488 """ 489 if term_id in VALID_NON_ONTOLOGY_TERMS: 490 return False 491 ontology_name = self._parse_ontology_name(term_id) 492 is_deprecated: bool = self.cxg_schema.ontology(ontology_name)[term_id].get("deprecated") 493 return is_deprecated
Check if an ontology term is deprecated. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.is_term_deprecated("CL:0000003")
True
Parameters
- term_id: str ontology term to check for deprecation
Returns
boolean flag indicating whether the term is deprecated
495 def get_term_replacement(self, term_id: str) -> Union[str, None]: 496 """ 497 Fetch the replacement term for a deprecated ontology term, if a replacement exists. Return None otherwise. 498 Raises ValueError if term ID is not valid member of a supported ontology. 499 500 Example 501 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 502 >>> ontology_parser = OntologyParser() 503 >>> ontology_parser.get_term_replacement("CL:0000003") 504 'CL:0000000' 505 506 :param term_id: str ontology term to check a replacement term for 507 :return: replacement str term ID if it exists, None otherwise 508 """ 509 if term_id in VALID_NON_ONTOLOGY_TERMS: 510 return None 511 ontology_name = self._parse_ontology_name(term_id) 512 replaced_by: str = self.cxg_schema.ontology(ontology_name)[term_id].get("replaced_by") 513 return replaced_by if replaced_by else None
Fetch the replacement term for a deprecated ontology term, if a replacement exists. Return None otherwise. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_replacement("CL:0000003")
'CL:0000000'
Parameters
- term_id: str ontology term to check a replacement term for
Returns
replacement str term ID if it exists, None otherwise
515 def get_term_metadata(self, term_id: str) -> Dict[str, Any]: 516 """ 517 Fetch metadata for a given ontology term. Returns a dict with format 518 519 {"comments": ["...", ...], "term_tracker": "...", "consider": ["...", ...]} 520 521 Comments maps to List[str] of ontology curator comments 522 Term Tracker maps to a str url where there is discussion around this term's curation (or deprecation). 523 Consider maps to List[str] of alternate ontology terms to consider using instead of this term 524 525 All keys map to None if no metadata of that type is present. Raises ValueError if term ID is not valid member 526 of a supported ontology. 527 528 :param term_id: str ontology term to fetch metadata for 529 :return: Dict with keys 'Comments', 'Term Tracker', and 'Consider' containing associated metadata. 530 """ 531 if term_id in VALID_NON_ONTOLOGY_TERMS: 532 return {"comments": None, "term_tracker": None, "consider": None} 533 ontology_name = self._parse_ontology_name(term_id) 534 return { 535 key: self.cxg_schema.ontology(ontology_name)[term_id].get(key, None) 536 for key in {"comments", "term_tracker", "consider"} 537 }
Fetch metadata for a given ontology term. Returns a dict with format
{"comments": ["...", ...], "term_tracker": "...", "consider": ["...", ...]}
Comments maps to List[str] of ontology curator comments Term Tracker maps to a str url where there is discussion around this term's curation (or deprecation). Consider maps to List[str] of alternate ontology terms to consider using instead of this term
All keys map to None if no metadata of that type is present. Raises ValueError if term ID is not valid member of a supported ontology.
Parameters
- term_id: str ontology term to fetch metadata for
Returns
Dict with keys 'Comments', 'Term Tracker', and 'Consider' containing associated metadata.
539 def get_term_label(self, term_id: str) -> str: 540 """ 541 Fetch the human-readable label for a given ontology term. Raises ValueError if term ID is not valid member of a 542 supported ontology. 543 544 Example 545 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 546 >>> ontology_parser = OntologyParser() 547 >>> ontology_parser.get_term_label("CL:0000005") 548 'neural crest derived fibroblast' 549 550 :param term_id: str ontology term to fetch label for 551 :return: str human-readable label for the term 552 """ 553 if term_id in VALID_NON_ONTOLOGY_TERMS: 554 return term_id 555 ontology_name = self._parse_ontology_name(term_id) 556 label: str = self.cxg_schema.ontology(ontology_name)[term_id]["label"] 557 return label
Fetch the human-readable label for a given ontology term. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_label("CL:0000005")
'neural crest derived fibroblast'
Parameters
- term_id: str ontology term to fetch label for
Returns
str human-readable label for the term
559 def map_term_labels(self, term_ids: Iterable[str]) -> Dict[str, str]: 560 """ 561 Fetch the human-readable label for a given list of ontology terms. Raises ValueError if term ID is not valid 562 member of a supported ontology. 563 564 Example 565 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 566 >>> ontology_parser = OntologyParser() 567 >>> ontology_parser.map_term_labels(["CL:0000005", "CL:0000003"]) 568 {'CL:0000005': 'neural crest derived fibroblast', 'CL:0000003': 'obsolete native cell'} 569 570 :param term_ids: list of str ontology terms to fetch label for 571 :return: Dict[str, str] mapping term IDs to their respective human-readable labels 572 """ 573 return {term_id: self.get_term_label(term_id) for term_id in term_ids}
Fetch the human-readable label for a given list of ontology terms. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.map_term_labels(["CL:0000005", "CL:0000003"])
{'CL:0000005': 'neural crest derived fibroblast', 'CL:0000003': 'obsolete native cell'}
Parameters
- term_ids: list of str ontology terms to fetch label for
Returns
Dict[str, str] mapping term IDs to their respective human-readable labels
575 def get_term_description(self, term_id: str) -> Optional[str]: 576 """ 577 Fetch the description for a given ontology term. Raises ValueError if term ID is not valid member of a 578 supported ontology. 579 580 Example 581 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 582 >>> ontology_parser = OntologyParser() 583 >>> ontology_parser.get_term_description("CL:0000005") 584 'Any fibroblast that is derived from the neural crest.' 585 586 :param term_id: str ontology term to fetch description for 587 :return: str description for the term 588 """ 589 if term_id in VALID_NON_ONTOLOGY_TERMS: 590 return term_id 591 ontology_name = self._parse_ontology_name(term_id) 592 description: Optional[str] = self.cxg_schema.ontology(ontology_name)[term_id].get("description", None) 593 return description
Fetch the description for a given ontology term. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_description("CL:0000005")
'Any fibroblast that is derived from the neural crest.'
Parameters
- term_id: str ontology term to fetch description for
Returns
str description for the term
595 def map_term_descriptions(self, term_ids: List[str]) -> Dict[str, Optional[str]]: 596 """ 597 Fetch the descriptions for a given list of ontology terms. Raises ValueError if term ID is not valid member of 598 a supported ontology. 599 600 Example 601 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 602 >>> ontology_parser = OntologyParser() 603 >>> ontology_parser.map_term_descriptions(["CL:0000005", "CL:0000146"]) 604 {'CL:0000005': 'Any fibroblast that is derived from the neural crest.', 'CL:0000146': None} 605 606 :param term_ids: list of str ontology terms to fetch descriptions for 607 :return: Dict[str, str] mapping term IDs to their respective descriptions 608 """ 609 return {term_id: self.get_term_description(term_id) for term_id in term_ids}
Fetch the descriptions for a given list of ontology terms. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.map_term_descriptions(["CL:0000005", "CL:0000146"])
{'CL:0000005': 'Any fibroblast that is derived from the neural crest.', 'CL:0000146': None}
Parameters
- term_ids: list of str ontology terms to fetch descriptions for
Returns
Dict[str, str] mapping term IDs to their respective descriptions
611 def get_term_synonyms(self, term_id: str) -> List[str]: 612 """ 613 Fetch a list of synonym labels for a given ontology term. Returns empty list if no synonyms found. 614 Raises ValueError if term ID is not valid member of a supported ontology. 615 616 Example 617 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 618 >>> ontology_parser = OntologyParser() 619 >>> ontology_parser.get_term_synonyms("CL:0000019") 620 ['sperm cell', 'spermatozoid', 'spermatozoon'] 621 622 :param term_id: str ontology term to fetch synonyms for 623 :return: List[str] synonyms for the term 624 """ 625 if term_id in VALID_NON_ONTOLOGY_TERMS: 626 return [] 627 ontology_name = self._parse_ontology_name(term_id) 628 synonyms: List[str] = list(self.cxg_schema.ontology(ontology_name)[term_id].get("synonyms", [])) 629 return synonyms
Fetch a list of synonym labels for a given ontology term. Returns empty list if no synonyms found. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_synonyms("CL:0000019")
['sperm cell', 'spermatozoid', 'spermatozoon']
Parameters
- term_id: str ontology term to fetch synonyms for
Returns
List[str] synonyms for the term
631 def map_term_synonyms(self, term_ids: List[str]) -> Dict[str, List[str]]: 632 """ 633 Fetch the synonym labels for a given list of ontology terms. Raises ValueError if term ID is not valid member of 634 a supported ontology. 635 636 Example 637 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 638 >>> ontology_parser = OntologyParser() 639 >>> ontology_parser.map_term_synonyms(["CL:0000005", "CL:0000019"]) 640 {'CL:0000005': ['fibroblast neural crest derived'], 'CL:0000019': ['sperm cell', 'spermatozoid', 'spermatozoon']} 641 642 :param term_ids: list of str ontology terms to fetch synonyms for 643 :return: Dict[str, List[str]] mapping term IDs to their respective synonym lists 644 """ 645 return {term_id: self.get_term_synonyms(term_id) for term_id in term_ids}
Fetch the synonym labels for a given list of ontology terms. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.map_term_synonyms(["CL:0000005", "CL:0000019"])
{'CL:0000005': ['fibroblast neural crest derived'], 'CL:0000019': ['sperm cell', 'spermatozoid', 'spermatozoon']}
Parameters
- term_ids: list of str ontology terms to fetch synonyms for
Returns
Dict[str, List[str]] mapping term IDs to their respective synonym lists
647 def get_term_id_by_label(self, term_label: str, ontology_name: str) -> Optional[str]: 648 """ 649 Fetch the ontology term ID from a given human-readable label. Filters by ontology_name. Raises ValueError if 650 ontology_name is not a supported ontology. 651 652 Returns None if term ID is not valid member of a supported ontology. 653 654 Example 655 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 656 >>> ontology_parser = OntologyParser() 657 >>> ontology_parser.get_term_id_by_label("neural crest derived fibroblast", "CL") 658 'CL:0000005' 659 660 :param term_label: str human-readable label to fetch term ID for 661 :param ontology_name: str name of ontology to search for term label in 662 :return: Optional[str] term IDs with that label, or None if the label is not found in the ontology 663 """ 664 ontology_term_label_to_id_map = self.get_term_label_to_id_map(ontology_name) 665 return ontology_term_label_to_id_map.get(term_label)
Fetch the ontology term ID from a given human-readable label. Filters by ontology_name. Raises ValueError if ontology_name is not a supported ontology.
Returns None if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_id_by_label("neural crest derived fibroblast", "CL")
'CL:0000005'
Parameters
- term_label: str human-readable label to fetch term ID for
- ontology_name: str name of ontology to search for term label in
Returns
Optional[str] term IDs with that label, or None if the label is not found in the ontology
667 def get_bridge_term_id(self, term_id: str, cross_ontology: str) -> Optional[str]: 668 """ 669 For a given term ID, fetch the equivalent term ID from a given ontology. Only returns exact match if it exists. 670 671 If no applicable match is found, returns None. 672 673 Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology. 674 675 Example 676 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 677 >>> ontology_parser = OntologyParser() 678 >>> ontology_parser.get_bridge_term_id("FBbt:00000001", "UBERON") 679 'UBERON:0000468' 680 681 :param term_id: str ontology term to find equivalent term for 682 :param cross_ontology: str name of ontology to search for equivalent term in 683 :return: Optional[str] equivalent term ID from the cross_ontology 684 """ 685 if cross_ontology not in self.cxg_schema.cross_ontology_mappings: 686 raise ValueError( 687 f"{cross_ontology} is not in the set of supported cross ontology mappings " 688 f"{self.cxg_schema.cross_ontology_mappings}." 689 ) 690 ontology_name = self._parse_ontology_name(term_id) 691 cross_ontology_terms = self.cxg_schema.ontology(ontology_name)[term_id].get("cross_ontology_terms") 692 bridge_term_id: Optional[str] = None 693 if cross_ontology_terms: 694 bridge_term_id = cross_ontology_terms.get(cross_ontology) 695 return bridge_term_id
For a given term ID, fetch the equivalent term ID from a given ontology. Only returns exact match if it exists.
If no applicable match is found, returns None.
Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_bridge_term_id("FBbt:00000001", "UBERON")
'UBERON:0000468'
Parameters
- term_id: str ontology term to find equivalent term for
- cross_ontology: str name of ontology to search for equivalent term in
Returns
Optional[str] equivalent term ID from the cross_ontology
697 def get_closest_bridge_term_ids(self, term_id: str, cross_ontology: str) -> List[str]: 698 """ 699 For a given term ID, fetch the equivalent term ID from a given ontology. If match is found, 700 returns a list of 1 with the exact match. If no exact match is found, traverses the ancestors 701 of the term for the closest match. 702 703 If no applicable match is found, returns an empty list. 704 705 If multiple ancestors of the same distance have matches, returns all possible closest matches. 706 707 Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology. 708 709 Example 710 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 711 >>> ontology_parser = OntologyParser() 712 >>> ontology_parser.get_closest_bridge_term_ids("FBbt:00000039", "UBERON") 713 ['UBERON:0000476', 'UBERON:0000920'] 714 715 :param term_id: str ontology term to find closest term for 716 :param cross_ontology: str name of ontology to search for closest term in 717 :return: List[str] list of closest term IDs from the cross_ontology 718 """ 719 closest_bridge_terms: List[str] = [] 720 terms_to_match = [term_id] 721 while terms_to_match and not closest_bridge_terms: 722 for term in terms_to_match: 723 if closest_bridge_term := self.get_bridge_term_id(term, cross_ontology): 724 closest_bridge_terms.append(closest_bridge_term) 725 terms_to_match = [parent for child in terms_to_match for parent in self.get_term_parents(child)] 726 return closest_bridge_terms
For a given term ID, fetch the equivalent term ID from a given ontology. If match is found, returns a list of 1 with the exact match. If no exact match is found, traverses the ancestors of the term for the closest match.
If no applicable match is found, returns an empty list.
If multiple ancestors of the same distance have matches, returns all possible closest matches.
Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_closest_bridge_term_ids("FBbt:00000039", "UBERON")
['UBERON:0000476', 'UBERON:0000920']
Parameters
- term_id: str ontology term to find closest term for
- cross_ontology: str name of ontology to search for closest term in
Returns
List[str] list of closest term IDs from the cross_ontology