cellxgene_ontology_guide.ontology_parser
1import re 2from typing import Any, Dict, Iterable, List, Optional, Union 3 4from cellxgene_ontology_guide._constants import VALID_NON_ONTOLOGY_TERMS 5from cellxgene_ontology_guide.entities import OntologyNode 6from cellxgene_ontology_guide.supported_versions import CXGSchema 7 8 9class OntologyParser: 10 """ 11 An object to parse ontology term metadata from ontologies corresponding to a given CellxGene Schema Version. 12 """ 13 14 cxg_schema: CXGSchema 15 """ CXGSchema object to fetch ontology metadata from """ 16 17 def __init__(self, schema_version: Optional[str] = None): 18 """ 19 Initialize an OntologyParser object with the ontology metadata corresponding to the given CellxGene schema 20 version. If not cached, it will make a network call to GitHub Release Assets to load in memory and 21 parse the corresponding ontology metadata. 22 23 :param schema_version: str version of the schema to load ontology metadata for. If not provided, the latest 24 is loaded. 25 """ 26 self.cxg_schema = CXGSchema(version=schema_version) if schema_version else CXGSchema() 27 self.term_label_to_id_map: Dict[str, Dict[str, str]] = { 28 ontology_name: dict() for ontology_name in self.cxg_schema.supported_ontologies 29 } 30 31 def get_term_label_to_id_map(self, ontology_name: str) -> Dict[str, str]: 32 """ 33 Fetch the mapping of term labels to term IDs for a given ontology. Caches generated maps by ontology_name. 34 35 Example 36 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 37 >>> ontology_parser = OntologyParser() 38 >>> ontology_parser.get_term_label_to_id_map("CL") # doctest: +SKIP 39 {'Label A': 'CL:0000000', ... } 40 41 :param ontology_name: str name of ontology to get map of term labels to term IDs 42 """ 43 supported_ontology_name: Optional[str] = self._get_supported_ontology_name(ontology_name) 44 if not supported_ontology_name: 45 raise ValueError(f"{supported_ontology_name} is not a supported ontology, its metadata cannot be fetched.") 46 47 if self.term_label_to_id_map[supported_ontology_name]: 48 return self.term_label_to_id_map[supported_ontology_name].copy() 49 50 for term_id, term_metadata in self.cxg_schema.ontology(supported_ontology_name).items(): 51 self.term_label_to_id_map[supported_ontology_name][term_metadata["label"]] = term_id 52 53 return self.term_label_to_id_map[supported_ontology_name].copy() 54 55 def _parse_ontology_name(self, term_id: str) -> str: 56 """ 57 Parse the ontology name from a given term ID. If the term ID does not conform to the expected term format or 58 is not from an ontology supported by cellxgene-ontology-guide, raise a ValueError. 59 60 :param term_id: str ontology term to parse 61 :return: str name of ontology that term belongs to 62 """ 63 pattern = r"[A-Za-z]+:\d+" 64 if not re.match(pattern, term_id): 65 raise ValueError(f"{term_id} does not conform to expected regex pattern {pattern} and cannot be queried.") 66 67 ontology_term_prefix = term_id.split(":")[0] 68 ontology_name: Optional[str] = self._get_supported_ontology_name(ontology_term_prefix) 69 if not ontology_name: 70 raise ValueError(f"{term_id} is not part of a supported ontology, its metadata cannot be fetched.") 71 72 return ontology_name 73 74 def _get_supported_ontology_name(self, ontology_term_prefix: str) -> Optional[str]: 75 """ 76 Get the source ontology name for a given ontology term prefix, if it is supported by the CxG schema. 77 78 If ontology_term_prefix is directly supported by the CxG schema, returns ontology_term_prefix. 79 If ontology_term_prefix is supported as an import from another ontology, returns the name of the source ontology 80 it is imported in. 81 Otherwise, returns None. 82 83 :param ontology_term_prefix: str ontology term prefix to check 84 :return: str name of ontology that term belongs to, or None if it is not directly supported nor imported in 85 a supported ontology in the CxG schema. 86 """ 87 if ontology_term_prefix in self.cxg_schema.supported_ontologies: 88 return ontology_term_prefix 89 supported_ontology_name: Optional[str] = self.cxg_schema.imported_ontologies.get(ontology_term_prefix) 90 return supported_ontology_name 91 92 def is_valid_term_id(self, term_id: str, ontology: Optional[str] = None) -> bool: 93 """ 94 Check if an ontology term ID is valid and defined in a supported ontology. If deprecated but defined 95 in the ontology, it is considered valid. Optionally, specify an ontology to check against, and determine 96 if the term is defined in that particular ontology. Otherwise, checks if term is valid in any supported ontology 97 98 :param term_id: str ontology term to check 99 :param ontology: str name of ontology to check against 100 :return: boolean flag indicating whether the term is supported 101 """ 102 try: 103 ontology_name = self._parse_ontology_name(term_id) 104 if ontology and ontology_name != ontology: 105 return False 106 if term_id in self.cxg_schema.ontology(ontology_name): 107 return True 108 except ValueError: 109 return False 110 return False 111 112 def get_term_ancestors(self, term_id: str, include_self: bool = False) -> List[str]: 113 """ 114 Get the ancestor ontology terms for a given term. If include_self is True, the term itself will be included as 115 an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology. 116 117 Example 118 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 119 >>> ontology_parser = OntologyParser() 120 >>> ontology_parser.get_term_ancestors("CL:0000005") # doctest: +SKIP 121 ['CL:0000000', 'CL:0000057', ... 122 123 :param term_id: str ontology term to find ancestors for 124 :param include_self: boolean flag to include the term itself as an ancestor 125 :return: flattened List[str] of ancestor terms 126 """ 127 if term_id in VALID_NON_ONTOLOGY_TERMS: 128 return [] 129 ontology_name = self._parse_ontology_name(term_id) 130 ancestors = list(self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"].keys()) 131 return ancestors + [term_id] if include_self else ancestors 132 133 def map_term_ancestors(self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]: 134 """ 135 Get the ancestor ontology terms for each term in a list. If include_self is True, the term itself will be 136 included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology. 137 138 Example 139 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 140 >>> ontology_parser = OntologyParser() 141 >>> ontology_parser.map_term_ancestors(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP 142 { 143 'CL:0000003': ['CL:0000003'], 144 'CL:0000005': ['CL:0000005', 'CL:0000000', ...] 145 } 146 147 :param term_ids: list of str ontology terms to find ancestors for 148 :param include_self: boolean flag to include the term itself as an ancestor 149 :return: Dictionary mapping str term IDs to their respective flattened List[str] of ancestor terms. Maps to 150 empty 151 list if there are no ancestors. 152 """ 153 return {term_id: self.get_term_ancestors(term_id, include_self) for term_id in term_ids} 154 155 def get_term_ancestors_with_distances(self, term_id: str, include_self: bool = False) -> Dict[str, int]: 156 """ 157 Get the ancestor ontology terms for a given term, and their distance from the term_id. If include_self is True, 158 the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a 159 supported ontology. 160 161 Example 162 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 163 >>> ontology_parser = OntologyParser() 164 >>> ontology_parser.get_term_ancestors_with_distances("CL:0000005") # doctest: +SKIP 165 {'CL:0000057': 1, 'CL:0002320': 2, 'CL:0000000': 3} 166 167 :param term_id: str ontology term to find ancestors for 168 :param include_self: boolean flag to include the term itself as an ancestor 169 :return: Dict[str, int] map of ancestor terms and their respective distances from the term_id 170 """ 171 if term_id in VALID_NON_ONTOLOGY_TERMS: 172 return {} 173 ontology_name = self._parse_ontology_name(term_id) 174 ancestors: Dict[str, int] = self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"].copy() 175 return ancestors | {term_id: 0} if include_self else ancestors 176 177 def map_term_ancestors_with_distances( 178 self, term_ids: Iterable[str], include_self: bool = False 179 ) -> Dict[str, Dict[str, int]]: 180 """ 181 Get the ancestor ontology terms for each term in a list, and their distance from the term_id. If include_self is 182 True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a 183 supported ontology. 184 185 Example 186 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 187 >>> ontology_parser = OntologyParser() 188 >>> ontology_parser.map_term_ancestors_with_distances(["CL:0000003", "CL:0000005"], include_self=True) 189 {'CL:0000003': {'CL:0000003': 0}, 'CL:0000005': {'CL:0000057': 1, 'CL:0000499': 2, 'CL:0002320': 3, 'CL:0000255': 4, 'CL:0000000': 5, 'CL:0000005': 0}} 190 191 :param term_ids: list of str ontology terms to find ancestors for 192 :param include_self: boolean flag to include the term itself as an ancestor 193 :return: Dictionary mapping str term IDs to their respective Dict[str, int] map of ancestor terms and their 194 respective distances from the term_id 195 """ 196 return {term_id: self.get_term_ancestors_with_distances(term_id, include_self) for term_id in term_ids} 197 198 def get_term_parents(self, term_id: str) -> List[str]: 199 """ 200 Get the direct parent ontology terms for a given term. Raises ValueError if the term ID is not valid member of 201 a supported ontology. 202 203 Example 204 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 205 >>> ontology_parser = OntologyParser() 206 >>> ontology_parser.get_term_parents("CL:0000101") 207 ['CL:0000526'] 208 209 :param term_id: str ontology term to find parents for 210 :return: List[str] of parent terms 211 """ 212 if term_id in VALID_NON_ONTOLOGY_TERMS: 213 return [] 214 ontology_name = self._parse_ontology_name(term_id) 215 ancestors: Dict[str, int] = self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"] 216 parents: List[str] = [ancestor for ancestor, distance in ancestors.items() if distance == 1] 217 return parents 218 219 def get_distance_between_terms(self, term_id_1: str, term_id_2: str) -> int: 220 """ 221 Get the distance between two ontology terms. The distance is defined as the number of edges between the 222 two terms. Terms must be from the same ontology. Returns -1 if terms are disjoint. 223 Raises ValueError if term IDs are not valid members of a supported ontology. 224 225 :param term_id_1: str ontology term to find distance for 226 :param term_id_2: str ontology term to find distance for 227 :return: int distance between the two terms, measured in number of edges between their shortest path. 228 """ 229 lcas = self.get_lowest_common_ancestors(term_id_1, term_id_2) 230 if not lcas: 231 return -1 232 return int( 233 self.get_term_ancestors_with_distances(term_id_1, include_self=True)[lcas[0]] 234 + self.get_term_ancestors_with_distances(term_id_2, include_self=True)[lcas[0]] 235 ) 236 237 def get_lowest_common_ancestors(self, term_id_1: str, term_id_2: str) -> List[str]: 238 """ 239 Get the lowest common ancestors between two ontology terms that is from the given ontology. 240 Terms must be from the same ontology. Ontologies are DAGs, so there may be multiple lowest common ancestors. 241 Raises ValueError if term IDs are not valid members of a supported ontology. 242 243 :param term_id_1: str ontology term to find LCA for 244 :param term_id_2: str ontology term to find LCA for 245 :return: str term ID of the lowest common ancestor term 246 """ 247 # include path to term itself 248 ontology = self._parse_ontology_name(term_id_1) 249 if ontology != self._parse_ontology_name(term_id_2): 250 return [] 251 ancestors_1 = self.get_term_ancestors_with_distances(term_id_1, include_self=True) 252 ancestors_2 = self.get_term_ancestors_with_distances(term_id_2, include_self=True) 253 common_ancestors = set(ancestors_1.keys()) & set(ancestors_2.keys()) 254 min_sum_distances = float("inf") 255 for ancestors in common_ancestors: 256 sum_distances = ancestors_1[ancestors] + ancestors_2[ancestors] 257 if sum_distances < min_sum_distances: 258 min_sum_distances = sum_distances 259 return [ 260 ancestor 261 for ancestor in common_ancestors 262 if ancestors_1[ancestor] + ancestors_2[ancestor] == min_sum_distances 263 ] 264 265 def get_high_level_terms(self, term_id: str, high_level_terms: List[str]) -> List[str]: 266 """ 267 Get the high-level ontology terms for a given term. High-level terms are defined as the ancestors of the term 268 that are part of the high-level ontology terms supported by cellxgene-ontology-guide. If more than 1 269 high_level_term is matched, the returned list of matches preserves the order of the input high_level_terms list. 270 Raises ValueError if term ID is not valid member of a supported ontology. 271 272 Example 273 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 274 >>> ontology_parser = OntologyParser() 275 >>> ontology_parser.get_high_level_terms("CL:0000005", ["CL:0000000", "CL:0000001"]) 276 ['CL:0000000'] 277 278 :param term_id: str ontology term to find high-level terms for 279 :param high_level_terms: list of str ontology terms to check for ancestry to term_id 280 :return: List[str] of high-level terms that the term is a descendant of 281 """ 282 if term_id in VALID_NON_ONTOLOGY_TERMS: 283 return [] 284 ancestors = self.get_term_ancestors(term_id, include_self=True) 285 return [high_level_term for high_level_term in high_level_terms if high_level_term in ancestors] 286 287 def map_high_level_terms(self, term_ids: List[str], high_level_terms: List[str]) -> Dict[str, List[str]]: 288 """ 289 Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with 290 format 291 292 {"CL:0000003": ["CL:0000000", ...], "CL:0000005": ["CL:0000000", ...]} 293 294 Where each term_id is mapped to a List[str] of high-level terms that it is a descendant of. Includes self 295 as a descendant. Raises ValueError if term ID is not valid member of a supported ontology. 296 297 :param term_ids: list of str ontology terms to map high level terms for 298 :param high_level_terms: list of str ontology terms to be mapped to descendant term_ids 299 :return: Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list. 300 Each key maps to empty list if there are no ancestors among the provided input. 301 """ 302 return {term_id: self.get_high_level_terms(term_id, high_level_terms) for term_id in term_ids} 303 304 def get_highest_level_term(self, term_id: str, high_level_terms: List[str]) -> Union[str, None]: 305 """ 306 Get the highest level ontology term for a given term. The highest level term is defined as the ancestor of the 307 term that is part of the high-level ontology terms supported by cellxgene-ontology-guide. Raises ValueError 308 if term ID is not valid member of a supported ontology. 309 310 Example 311 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 312 >>> ontology_parser = OntologyParser() 313 >>> ontology_parser.get_highest_level_term("CL:0000005", ["CL:0000000", "CL:0000001"]) 314 'CL:0000000' 315 316 :param term_id: str ontology term to find highest level term for 317 :param high_level_terms: list of str ontology terms to check for ancestry to term_id 318 :return: str highest level term that the term is a descendant of, or None if it is not a descendant of any 319 high-level terms 320 """ 321 high_level_terms = self.get_high_level_terms(term_id, high_level_terms) 322 term_ancestors_and_distances = self.get_term_ancestors_with_distances(term_id, include_self=True) 323 if not high_level_terms: 324 return None 325 return max(high_level_terms, key=lambda high_level_term: term_ancestors_and_distances[high_level_term]) 326 327 def map_highest_level_term(self, term_ids: List[str], high_level_terms: List[str]) -> Dict[str, Union[str, None]]: 328 """ 329 Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with 330 format 331 332 {"CL:0000003": "CL:0000000", "CL:0000005": "CL:0000000"} 333 334 Where each term_id is mapped to the highest level term that it is a descendant of, from the list provided. 335 Includes term itself as a descendant. Maps to None if term_id does not map to any high level terms among the 336 provided input. Raises ValueError if term ID is not valid member of a supported ontology. 337 338 :param term_ids: list of str ontology terms to map high level terms for 339 :param high_level_terms: list of str ontology terms that can be mapped to descendant term_ids 340 :return: Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list. 341 Each key maps to empty list if there are no ancestors among the provided input. 342 """ 343 return {term_id: self.get_highest_level_term(term_id, high_level_terms) for term_id in term_ids} 344 345 def get_term_descendants(self, term_id: str, include_self: bool = False) -> List[str]: 346 """ 347 Get the descendant ontology terms for a given term. If include_self is True, the term itself will be included as 348 a descendant. Raises ValueError if term ID is not valid member of a supported ontology. 349 350 Example 351 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 352 >>> ontology_parser = OntologyParser() 353 >>> ontology_parser.get_term_descendants("CL:0000005") # doctest: +SKIP 354 ['CL:0002363'] 355 356 :param term_id: str ontology term to find descendants for 357 :param include_self: boolean flag to include the term itself as a descendant 358 :return: List[str] of descendant terms 359 """ 360 if term_id in VALID_NON_ONTOLOGY_TERMS: 361 return [] 362 ontology_name = self._parse_ontology_name(term_id) 363 descendants = [term_id] if include_self else [] 364 for candidate_descendant, candidate_metadata in self.cxg_schema.ontology(ontology_name).items(): 365 ancestors = candidate_metadata["ancestors"].keys() 366 if term_id in ancestors: 367 descendants.append(candidate_descendant) 368 return descendants 369 370 def map_term_descendants(self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]: 371 """ 372 Get the descendant ontology terms for each term in a list. If include_self is True, the term itself will be 373 included as a descendant. Raises ValueError if term ID is not valid member of a supported ontology. 374 375 Example 376 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 377 >>> ontology_parser = OntologyParser() 378 >>> ontology_parser.map_term_descendants(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP 379 { 380 'CL:0000003': ['CL:0000003', ...], 381 'CL:0000005': ['CL:0000005', 'CL:0002363', ...] 382 } 383 384 :param term_ids: list of str ontology terms to find descendants for 385 :param include_self: boolean flag to include the term itself as an descendant 386 :return: Dictionary mapping str term IDs to their respective flattened List[str] of descendant terms. Maps to 387 empty list if there are no descendants. 388 """ 389 descendants_dict: Dict[str, List[str]] = dict() 390 ontology_names = set() 391 for term_id in term_ids: 392 if term_id in VALID_NON_ONTOLOGY_TERMS: 393 descendants_dict[term_id] = [] 394 continue 395 ontology_name = self._parse_ontology_name(term_id) 396 descendants_dict[term_id] = [term_id] if include_self else [] 397 ontology_names.add(ontology_name) 398 399 for ontology in ontology_names: 400 for candidate_descendant, candidate_metadata in self.cxg_schema.ontology(ontology).items(): 401 for ancestor_id in descendants_dict: 402 ancestors = candidate_metadata["ancestors"].keys() 403 if ancestor_id in ancestors: 404 descendants_dict[ancestor_id].append(candidate_descendant) 405 406 return descendants_dict 407 408 def get_term_children(self, term_id: str) -> List[str]: 409 """ 410 Get the direct children ontology terms for a given term. Raises ValueError if term ID is not valid member of a 411 supported ontology. 412 413 Example 414 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 415 >>> ontology_parser = OntologyParser() 416 >>> ontology_parser.get_term_children("CL:0000526") 417 ['CL:0000101', 'CL:4042034'] 418 419 :param term_id: str ontology term to find children for 420 :return: List[str] of children terms 421 """ 422 if term_id in VALID_NON_ONTOLOGY_TERMS: 423 return [] 424 ontology_name = self._parse_ontology_name(term_id) 425 children = [] 426 for candidate_child, candidate_metadata in self.cxg_schema.ontology(ontology_name).items(): 427 for ancestor, distance in candidate_metadata["ancestors"].items(): 428 if ancestor == term_id and distance == 1: 429 children.append(candidate_child) 430 return children 431 432 def get_term_graph(self, term_id: str) -> OntologyNode: 433 """ 434 Get the DAG of OntologyNode relationships, with the input term as the root node. Only includes terms from the 435 same ontology as the root term ID. Raises ValueError if term ID is not valid member of a supported ontology. 436 437 Example 438 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 439 >>> ontology_parser = OntologyParser() 440 >>> root_node = ontology_parser.get_term_graph("CL:0000000") 441 >>> root_node.term_id 442 'CL:0000000' 443 >>> root_node.to_dict() # doctest: +SKIP 444 { 445 "term_id": "CL:0000000", 446 "name": "cell A", 447 "children": [ 448 { 449 "term_id": "CL:0000001", 450 "name": "cell B", 451 "children": [...], 452 }, 453 { 454 "term_id": "CL:0000002", 455 "name": "cell C", 456 "children": [...], 457 }, 458 ... 459 ] 460 } 461 >>> root_node.term_counter # doctest: +SKIP 462 Counter({'CL:0002058': 48, 'CL:0002471': 48, ... 463 464 :param term_id: str ontology term to build subtree for 465 :return: OntologyNode representation of graph with term_id as root. 466 """ 467 term_label = self.get_term_label(term_id) 468 root = OntologyNode(term_id, term_label) 469 for child_term_id in self.get_term_children(term_id): 470 root.add_child(self.get_term_graph(child_term_id)) 471 return root 472 473 def is_term_deprecated(self, term_id: str) -> bool: 474 """ 475 Check if an ontology term is deprecated. Raises ValueError if term ID is not valid member of a supported 476 ontology. 477 478 Example 479 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 480 >>> ontology_parser = OntologyParser() 481 >>> ontology_parser.is_term_deprecated("CL:0000003") 482 True 483 484 :param term_id: str ontology term to check for deprecation 485 :return: boolean flag indicating whether the term is deprecated 486 """ 487 if term_id in VALID_NON_ONTOLOGY_TERMS: 488 return False 489 ontology_name = self._parse_ontology_name(term_id) 490 is_deprecated: bool = self.cxg_schema.ontology(ontology_name)[term_id].get("deprecated") 491 return is_deprecated 492 493 def get_term_replacement(self, term_id: str) -> Union[str, None]: 494 """ 495 Fetch the replacement term for a deprecated ontology term, if a replacement exists. Return None otherwise. 496 Raises ValueError if term ID is not valid member of a supported ontology. 497 498 Example 499 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 500 >>> ontology_parser = OntologyParser() 501 >>> ontology_parser.get_term_replacement("CL:0000003") 502 'CL:0000000' 503 504 :param term_id: str ontology term to check a replacement term for 505 :return: replacement str term ID if it exists, None otherwise 506 """ 507 if term_id in VALID_NON_ONTOLOGY_TERMS: 508 return None 509 ontology_name = self._parse_ontology_name(term_id) 510 replaced_by: str = self.cxg_schema.ontology(ontology_name)[term_id].get("replaced_by") 511 return replaced_by if replaced_by else None 512 513 def get_term_metadata(self, term_id: str) -> Dict[str, Any]: 514 """ 515 Fetch metadata for a given ontology term. Returns a dict with format 516 517 {"comments": ["...", ...], "term_tracker": "...", "consider": ["...", ...]} 518 519 Comments maps to List[str] of ontology curator comments 520 Term Tracker maps to a str url where there is discussion around this term's curation (or deprecation). 521 Consider maps to List[str] of alternate ontology terms to consider using instead of this term 522 523 All keys map to None if no metadata of that type is present. Raises ValueError if term ID is not valid member 524 of a supported ontology. 525 526 :param term_id: str ontology term to fetch metadata for 527 :return: Dict with keys 'Comments', 'Term Tracker', and 'Consider' containing associated metadata. 528 """ 529 if term_id in VALID_NON_ONTOLOGY_TERMS: 530 return {"comments": None, "term_tracker": None, "consider": None} 531 ontology_name = self._parse_ontology_name(term_id) 532 return { 533 key: self.cxg_schema.ontology(ontology_name)[term_id].get(key, None) 534 for key in {"comments", "term_tracker", "consider"} 535 } 536 537 def get_term_label(self, term_id: str) -> str: 538 """ 539 Fetch the human-readable label for a given ontology term. Raises ValueError if term ID is not valid member of a 540 supported ontology. 541 542 Example 543 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 544 >>> ontology_parser = OntologyParser() 545 >>> ontology_parser.get_term_label("CL:0000005") 546 'neural crest derived fibroblast' 547 548 :param term_id: str ontology term to fetch label for 549 :return: str human-readable label for the term 550 """ 551 if term_id in VALID_NON_ONTOLOGY_TERMS: 552 return term_id 553 ontology_name = self._parse_ontology_name(term_id) 554 label: str = self.cxg_schema.ontology(ontology_name)[term_id]["label"] 555 return label 556 557 def map_term_labels(self, term_ids: Iterable[str]) -> Dict[str, str]: 558 """ 559 Fetch the human-readable label for a given list of ontology terms. Raises ValueError if term ID is not valid 560 member of a supported ontology. 561 562 Example 563 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 564 >>> ontology_parser = OntologyParser() 565 >>> ontology_parser.map_term_labels(["CL:0000005", "CL:0000003"]) 566 {'CL:0000005': 'neural crest derived fibroblast', 'CL:0000003': 'obsolete native cell'} 567 568 :param term_ids: list of str ontology terms to fetch label for 569 :return: Dict[str, str] mapping term IDs to their respective human-readable labels 570 """ 571 return {term_id: self.get_term_label(term_id) for term_id in term_ids} 572 573 def get_term_description(self, term_id: str) -> Optional[str]: 574 """ 575 Fetch the description for a given ontology term. Raises ValueError if term ID is not valid member of a 576 supported ontology. 577 578 Example 579 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 580 >>> ontology_parser = OntologyParser() 581 >>> ontology_parser.get_term_description("CL:0000005") 582 'Any fibroblast that is derived from the neural crest.' 583 584 :param term_id: str ontology term to fetch description for 585 :return: str description for the term 586 """ 587 if term_id in VALID_NON_ONTOLOGY_TERMS: 588 return term_id 589 ontology_name = self._parse_ontology_name(term_id) 590 description: Optional[str] = self.cxg_schema.ontology(ontology_name)[term_id].get("description", None) 591 return description 592 593 def map_term_descriptions(self, term_ids: List[str]) -> Dict[str, Optional[str]]: 594 """ 595 Fetch the descriptions for a given list of ontology terms. Raises ValueError if term ID is not valid member of 596 a supported ontology. 597 598 Example 599 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 600 >>> ontology_parser = OntologyParser() 601 >>> ontology_parser.map_term_descriptions(["CL:0000005", "CL:0000146"]) 602 {'CL:0000005': 'Any fibroblast that is derived from the neural crest.', 'CL:0000146': None} 603 604 :param term_ids: list of str ontology terms to fetch descriptions for 605 :return: Dict[str, str] mapping term IDs to their respective descriptions 606 """ 607 return {term_id: self.get_term_description(term_id) for term_id in term_ids} 608 609 def get_term_synonyms(self, term_id: str) -> List[str]: 610 """ 611 Fetch a list of synonym labels for a given ontology term. Returns empty list if no synonyms found. 612 Raises ValueError if term ID is not valid member of a supported ontology. 613 614 Example 615 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 616 >>> ontology_parser = OntologyParser() 617 >>> ontology_parser.get_term_synonyms("CL:0000019") 618 ['sperm cell', 'spermatozoid', 'spermatozoon'] 619 620 :param term_id: str ontology term to fetch synonyms for 621 :return: List[str] synonyms for the term 622 """ 623 if term_id in VALID_NON_ONTOLOGY_TERMS: 624 return [] 625 ontology_name = self._parse_ontology_name(term_id) 626 synonyms: List[str] = list(self.cxg_schema.ontology(ontology_name)[term_id].get("synonyms", [])) 627 return synonyms 628 629 def map_term_synonyms(self, term_ids: List[str]) -> Dict[str, List[str]]: 630 """ 631 Fetch the synonym labels for a given list of ontology terms. Raises ValueError if term ID is not valid member of 632 a supported ontology. 633 634 Example 635 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 636 >>> ontology_parser = OntologyParser() 637 >>> ontology_parser.map_term_synonyms(["CL:0000005", "CL:0000019"]) 638 {'CL:0000005': ['fibroblast neural crest derived'], 'CL:0000019': ['sperm cell', 'spermatozoid', 'spermatozoon']} 639 640 :param term_ids: list of str ontology terms to fetch synonyms for 641 :return: Dict[str, List[str]] mapping term IDs to their respective synonym lists 642 """ 643 return {term_id: self.get_term_synonyms(term_id) for term_id in term_ids} 644 645 def get_term_id_by_label(self, term_label: str, ontology_name: str) -> Optional[str]: 646 """ 647 Fetch the ontology term ID from a given human-readable label. Filters by ontology_name. Raises ValueError if 648 ontology_name is not a supported ontology. 649 650 Returns None if term ID is not valid member of a supported ontology. 651 652 Example 653 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 654 >>> ontology_parser = OntologyParser() 655 >>> ontology_parser.get_term_id_by_label("neural crest derived fibroblast", "CL") 656 'CL:0000005' 657 658 :param term_label: str human-readable label to fetch term ID for 659 :param ontology_name: str name of ontology to search for term label in 660 :return: Optional[str] term IDs with that label, or None if the label is not found in the ontology 661 """ 662 ontology_term_label_to_id_map = self.get_term_label_to_id_map(ontology_name) 663 return ontology_term_label_to_id_map.get(term_label) 664 665 def get_bridge_term_id(self, term_id: str, cross_ontology: str) -> Optional[str]: 666 """ 667 For a given term ID, fetch the equivalent term ID from a given ontology. Only returns exact match if it exists. 668 669 If no applicable match is found, returns None. 670 671 Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology. 672 673 Example 674 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 675 >>> ontology_parser = OntologyParser() 676 >>> ontology_parser.get_bridge_term_id("FBbt:00000001", "UBERON") 677 'UBERON:0000468' 678 679 :param term_id: str ontology term to find equivalent term for 680 :param cross_ontology: str name of ontology to search for equivalent term in 681 :return: Optional[str] equivalent term ID from the cross_ontology 682 """ 683 if cross_ontology not in self.cxg_schema.cross_ontology_mappings: 684 raise ValueError( 685 f"{cross_ontology} is not in the set of supported cross ontology mappings " 686 f"{self.cxg_schema.cross_ontology_mappings}." 687 ) 688 ontology_name = self._parse_ontology_name(term_id) 689 cross_ontology_terms = self.cxg_schema.ontology(ontology_name)[term_id].get("cross_ontology_terms") 690 bridge_term_id: Optional[str] = None 691 if cross_ontology_terms: 692 bridge_term_id = cross_ontology_terms.get(cross_ontology) 693 return bridge_term_id 694 695 def get_closest_bridge_term_ids(self, term_id: str, cross_ontology: str) -> List[str]: 696 """ 697 For a given term ID, fetch the equivalent term ID from a given ontology. If match is found, 698 returns a list of 1 with the exact match. If no exact match is found, traverses the ancestors 699 of the term for the closest match. 700 701 If no applicable match is found, returns an empty list. 702 703 If multiple ancestors of the same distance have matches, returns all possible closest matches. 704 705 Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology. 706 707 Example 708 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 709 >>> ontology_parser = OntologyParser() 710 >>> ontology_parser.get_closest_bridge_term_ids("FBbt:00000039", "UBERON") 711 ['UBERON:0000476', 'UBERON:0000920'] 712 713 :param term_id: str ontology term to find closest term for 714 :param cross_ontology: str name of ontology to search for closest term in 715 :return: List[str] list of closest term IDs from the cross_ontology 716 """ 717 closest_bridge_terms: List[str] = [] 718 terms_to_match = [term_id] 719 while terms_to_match and not closest_bridge_terms: 720 for term in terms_to_match: 721 if closest_bridge_term := self.get_bridge_term_id(term, cross_ontology): 722 closest_bridge_terms.append(closest_bridge_term) 723 terms_to_match = [parent for child in terms_to_match for parent in self.get_term_parents(child)] 724 return closest_bridge_terms
10class OntologyParser: 11 """ 12 An object to parse ontology term metadata from ontologies corresponding to a given CellxGene Schema Version. 13 """ 14 15 cxg_schema: CXGSchema 16 """ CXGSchema object to fetch ontology metadata from """ 17 18 def __init__(self, schema_version: Optional[str] = None): 19 """ 20 Initialize an OntologyParser object with the ontology metadata corresponding to the given CellxGene schema 21 version. If not cached, it will make a network call to GitHub Release Assets to load in memory and 22 parse the corresponding ontology metadata. 23 24 :param schema_version: str version of the schema to load ontology metadata for. If not provided, the latest 25 is loaded. 26 """ 27 self.cxg_schema = CXGSchema(version=schema_version) if schema_version else CXGSchema() 28 self.term_label_to_id_map: Dict[str, Dict[str, str]] = { 29 ontology_name: dict() for ontology_name in self.cxg_schema.supported_ontologies 30 } 31 32 def get_term_label_to_id_map(self, ontology_name: str) -> Dict[str, str]: 33 """ 34 Fetch the mapping of term labels to term IDs for a given ontology. Caches generated maps by ontology_name. 35 36 Example 37 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 38 >>> ontology_parser = OntologyParser() 39 >>> ontology_parser.get_term_label_to_id_map("CL") # doctest: +SKIP 40 {'Label A': 'CL:0000000', ... } 41 42 :param ontology_name: str name of ontology to get map of term labels to term IDs 43 """ 44 supported_ontology_name: Optional[str] = self._get_supported_ontology_name(ontology_name) 45 if not supported_ontology_name: 46 raise ValueError(f"{supported_ontology_name} is not a supported ontology, its metadata cannot be fetched.") 47 48 if self.term_label_to_id_map[supported_ontology_name]: 49 return self.term_label_to_id_map[supported_ontology_name].copy() 50 51 for term_id, term_metadata in self.cxg_schema.ontology(supported_ontology_name).items(): 52 self.term_label_to_id_map[supported_ontology_name][term_metadata["label"]] = term_id 53 54 return self.term_label_to_id_map[supported_ontology_name].copy() 55 56 def _parse_ontology_name(self, term_id: str) -> str: 57 """ 58 Parse the ontology name from a given term ID. If the term ID does not conform to the expected term format or 59 is not from an ontology supported by cellxgene-ontology-guide, raise a ValueError. 60 61 :param term_id: str ontology term to parse 62 :return: str name of ontology that term belongs to 63 """ 64 pattern = r"[A-Za-z]+:\d+" 65 if not re.match(pattern, term_id): 66 raise ValueError(f"{term_id} does not conform to expected regex pattern {pattern} and cannot be queried.") 67 68 ontology_term_prefix = term_id.split(":")[0] 69 ontology_name: Optional[str] = self._get_supported_ontology_name(ontology_term_prefix) 70 if not ontology_name: 71 raise ValueError(f"{term_id} is not part of a supported ontology, its metadata cannot be fetched.") 72 73 return ontology_name 74 75 def _get_supported_ontology_name(self, ontology_term_prefix: str) -> Optional[str]: 76 """ 77 Get the source ontology name for a given ontology term prefix, if it is supported by the CxG schema. 78 79 If ontology_term_prefix is directly supported by the CxG schema, returns ontology_term_prefix. 80 If ontology_term_prefix is supported as an import from another ontology, returns the name of the source ontology 81 it is imported in. 82 Otherwise, returns None. 83 84 :param ontology_term_prefix: str ontology term prefix to check 85 :return: str name of ontology that term belongs to, or None if it is not directly supported nor imported in 86 a supported ontology in the CxG schema. 87 """ 88 if ontology_term_prefix in self.cxg_schema.supported_ontologies: 89 return ontology_term_prefix 90 supported_ontology_name: Optional[str] = self.cxg_schema.imported_ontologies.get(ontology_term_prefix) 91 return supported_ontology_name 92 93 def is_valid_term_id(self, term_id: str, ontology: Optional[str] = None) -> bool: 94 """ 95 Check if an ontology term ID is valid and defined in a supported ontology. If deprecated but defined 96 in the ontology, it is considered valid. Optionally, specify an ontology to check against, and determine 97 if the term is defined in that particular ontology. Otherwise, checks if term is valid in any supported ontology 98 99 :param term_id: str ontology term to check 100 :param ontology: str name of ontology to check against 101 :return: boolean flag indicating whether the term is supported 102 """ 103 try: 104 ontology_name = self._parse_ontology_name(term_id) 105 if ontology and ontology_name != ontology: 106 return False 107 if term_id in self.cxg_schema.ontology(ontology_name): 108 return True 109 except ValueError: 110 return False 111 return False 112 113 def get_term_ancestors(self, term_id: str, include_self: bool = False) -> List[str]: 114 """ 115 Get the ancestor ontology terms for a given term. If include_self is True, the term itself will be included as 116 an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology. 117 118 Example 119 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 120 >>> ontology_parser = OntologyParser() 121 >>> ontology_parser.get_term_ancestors("CL:0000005") # doctest: +SKIP 122 ['CL:0000000', 'CL:0000057', ... 123 124 :param term_id: str ontology term to find ancestors for 125 :param include_self: boolean flag to include the term itself as an ancestor 126 :return: flattened List[str] of ancestor terms 127 """ 128 if term_id in VALID_NON_ONTOLOGY_TERMS: 129 return [] 130 ontology_name = self._parse_ontology_name(term_id) 131 ancestors = list(self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"].keys()) 132 return ancestors + [term_id] if include_self else ancestors 133 134 def map_term_ancestors(self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]: 135 """ 136 Get the ancestor ontology terms for each term in a list. If include_self is True, the term itself will be 137 included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology. 138 139 Example 140 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 141 >>> ontology_parser = OntologyParser() 142 >>> ontology_parser.map_term_ancestors(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP 143 { 144 'CL:0000003': ['CL:0000003'], 145 'CL:0000005': ['CL:0000005', 'CL:0000000', ...] 146 } 147 148 :param term_ids: list of str ontology terms to find ancestors for 149 :param include_self: boolean flag to include the term itself as an ancestor 150 :return: Dictionary mapping str term IDs to their respective flattened List[str] of ancestor terms. Maps to 151 empty 152 list if there are no ancestors. 153 """ 154 return {term_id: self.get_term_ancestors(term_id, include_self) for term_id in term_ids} 155 156 def get_term_ancestors_with_distances(self, term_id: str, include_self: bool = False) -> Dict[str, int]: 157 """ 158 Get the ancestor ontology terms for a given term, and their distance from the term_id. If include_self is True, 159 the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a 160 supported ontology. 161 162 Example 163 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 164 >>> ontology_parser = OntologyParser() 165 >>> ontology_parser.get_term_ancestors_with_distances("CL:0000005") # doctest: +SKIP 166 {'CL:0000057': 1, 'CL:0002320': 2, 'CL:0000000': 3} 167 168 :param term_id: str ontology term to find ancestors for 169 :param include_self: boolean flag to include the term itself as an ancestor 170 :return: Dict[str, int] map of ancestor terms and their respective distances from the term_id 171 """ 172 if term_id in VALID_NON_ONTOLOGY_TERMS: 173 return {} 174 ontology_name = self._parse_ontology_name(term_id) 175 ancestors: Dict[str, int] = self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"].copy() 176 return ancestors | {term_id: 0} if include_self else ancestors 177 178 def map_term_ancestors_with_distances( 179 self, term_ids: Iterable[str], include_self: bool = False 180 ) -> Dict[str, Dict[str, int]]: 181 """ 182 Get the ancestor ontology terms for each term in a list, and their distance from the term_id. If include_self is 183 True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a 184 supported ontology. 185 186 Example 187 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 188 >>> ontology_parser = OntologyParser() 189 >>> ontology_parser.map_term_ancestors_with_distances(["CL:0000003", "CL:0000005"], include_self=True) 190 {'CL:0000003': {'CL:0000003': 0}, 'CL:0000005': {'CL:0000057': 1, 'CL:0000499': 2, 'CL:0002320': 3, 'CL:0000255': 4, 'CL:0000000': 5, 'CL:0000005': 0}} 191 192 :param term_ids: list of str ontology terms to find ancestors for 193 :param include_self: boolean flag to include the term itself as an ancestor 194 :return: Dictionary mapping str term IDs to their respective Dict[str, int] map of ancestor terms and their 195 respective distances from the term_id 196 """ 197 return {term_id: self.get_term_ancestors_with_distances(term_id, include_self) for term_id in term_ids} 198 199 def get_term_parents(self, term_id: str) -> List[str]: 200 """ 201 Get the direct parent ontology terms for a given term. Raises ValueError if the term ID is not valid member of 202 a supported ontology. 203 204 Example 205 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 206 >>> ontology_parser = OntologyParser() 207 >>> ontology_parser.get_term_parents("CL:0000101") 208 ['CL:0000526'] 209 210 :param term_id: str ontology term to find parents for 211 :return: List[str] of parent terms 212 """ 213 if term_id in VALID_NON_ONTOLOGY_TERMS: 214 return [] 215 ontology_name = self._parse_ontology_name(term_id) 216 ancestors: Dict[str, int] = self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"] 217 parents: List[str] = [ancestor for ancestor, distance in ancestors.items() if distance == 1] 218 return parents 219 220 def get_distance_between_terms(self, term_id_1: str, term_id_2: str) -> int: 221 """ 222 Get the distance between two ontology terms. The distance is defined as the number of edges between the 223 two terms. Terms must be from the same ontology. Returns -1 if terms are disjoint. 224 Raises ValueError if term IDs are not valid members of a supported ontology. 225 226 :param term_id_1: str ontology term to find distance for 227 :param term_id_2: str ontology term to find distance for 228 :return: int distance between the two terms, measured in number of edges between their shortest path. 229 """ 230 lcas = self.get_lowest_common_ancestors(term_id_1, term_id_2) 231 if not lcas: 232 return -1 233 return int( 234 self.get_term_ancestors_with_distances(term_id_1, include_self=True)[lcas[0]] 235 + self.get_term_ancestors_with_distances(term_id_2, include_self=True)[lcas[0]] 236 ) 237 238 def get_lowest_common_ancestors(self, term_id_1: str, term_id_2: str) -> List[str]: 239 """ 240 Get the lowest common ancestors between two ontology terms that is from the given ontology. 241 Terms must be from the same ontology. Ontologies are DAGs, so there may be multiple lowest common ancestors. 242 Raises ValueError if term IDs are not valid members of a supported ontology. 243 244 :param term_id_1: str ontology term to find LCA for 245 :param term_id_2: str ontology term to find LCA for 246 :return: str term ID of the lowest common ancestor term 247 """ 248 # include path to term itself 249 ontology = self._parse_ontology_name(term_id_1) 250 if ontology != self._parse_ontology_name(term_id_2): 251 return [] 252 ancestors_1 = self.get_term_ancestors_with_distances(term_id_1, include_self=True) 253 ancestors_2 = self.get_term_ancestors_with_distances(term_id_2, include_self=True) 254 common_ancestors = set(ancestors_1.keys()) & set(ancestors_2.keys()) 255 min_sum_distances = float("inf") 256 for ancestors in common_ancestors: 257 sum_distances = ancestors_1[ancestors] + ancestors_2[ancestors] 258 if sum_distances < min_sum_distances: 259 min_sum_distances = sum_distances 260 return [ 261 ancestor 262 for ancestor in common_ancestors 263 if ancestors_1[ancestor] + ancestors_2[ancestor] == min_sum_distances 264 ] 265 266 def get_high_level_terms(self, term_id: str, high_level_terms: List[str]) -> List[str]: 267 """ 268 Get the high-level ontology terms for a given term. High-level terms are defined as the ancestors of the term 269 that are part of the high-level ontology terms supported by cellxgene-ontology-guide. If more than 1 270 high_level_term is matched, the returned list of matches preserves the order of the input high_level_terms list. 271 Raises ValueError if term ID is not valid member of a supported ontology. 272 273 Example 274 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 275 >>> ontology_parser = OntologyParser() 276 >>> ontology_parser.get_high_level_terms("CL:0000005", ["CL:0000000", "CL:0000001"]) 277 ['CL:0000000'] 278 279 :param term_id: str ontology term to find high-level terms for 280 :param high_level_terms: list of str ontology terms to check for ancestry to term_id 281 :return: List[str] of high-level terms that the term is a descendant of 282 """ 283 if term_id in VALID_NON_ONTOLOGY_TERMS: 284 return [] 285 ancestors = self.get_term_ancestors(term_id, include_self=True) 286 return [high_level_term for high_level_term in high_level_terms if high_level_term in ancestors] 287 288 def map_high_level_terms(self, term_ids: List[str], high_level_terms: List[str]) -> Dict[str, List[str]]: 289 """ 290 Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with 291 format 292 293 {"CL:0000003": ["CL:0000000", ...], "CL:0000005": ["CL:0000000", ...]} 294 295 Where each term_id is mapped to a List[str] of high-level terms that it is a descendant of. Includes self 296 as a descendant. Raises ValueError if term ID is not valid member of a supported ontology. 297 298 :param term_ids: list of str ontology terms to map high level terms for 299 :param high_level_terms: list of str ontology terms to be mapped to descendant term_ids 300 :return: Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list. 301 Each key maps to empty list if there are no ancestors among the provided input. 302 """ 303 return {term_id: self.get_high_level_terms(term_id, high_level_terms) for term_id in term_ids} 304 305 def get_highest_level_term(self, term_id: str, high_level_terms: List[str]) -> Union[str, None]: 306 """ 307 Get the highest level ontology term for a given term. The highest level term is defined as the ancestor of the 308 term that is part of the high-level ontology terms supported by cellxgene-ontology-guide. Raises ValueError 309 if term ID is not valid member of a supported ontology. 310 311 Example 312 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 313 >>> ontology_parser = OntologyParser() 314 >>> ontology_parser.get_highest_level_term("CL:0000005", ["CL:0000000", "CL:0000001"]) 315 'CL:0000000' 316 317 :param term_id: str ontology term to find highest level term for 318 :param high_level_terms: list of str ontology terms to check for ancestry to term_id 319 :return: str highest level term that the term is a descendant of, or None if it is not a descendant of any 320 high-level terms 321 """ 322 high_level_terms = self.get_high_level_terms(term_id, high_level_terms) 323 term_ancestors_and_distances = self.get_term_ancestors_with_distances(term_id, include_self=True) 324 if not high_level_terms: 325 return None 326 return max(high_level_terms, key=lambda high_level_term: term_ancestors_and_distances[high_level_term]) 327 328 def map_highest_level_term(self, term_ids: List[str], high_level_terms: List[str]) -> Dict[str, Union[str, None]]: 329 """ 330 Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with 331 format 332 333 {"CL:0000003": "CL:0000000", "CL:0000005": "CL:0000000"} 334 335 Where each term_id is mapped to the highest level term that it is a descendant of, from the list provided. 336 Includes term itself as a descendant. Maps to None if term_id does not map to any high level terms among the 337 provided input. Raises ValueError if term ID is not valid member of a supported ontology. 338 339 :param term_ids: list of str ontology terms to map high level terms for 340 :param high_level_terms: list of str ontology terms that can be mapped to descendant term_ids 341 :return: Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list. 342 Each key maps to empty list if there are no ancestors among the provided input. 343 """ 344 return {term_id: self.get_highest_level_term(term_id, high_level_terms) for term_id in term_ids} 345 346 def get_term_descendants(self, term_id: str, include_self: bool = False) -> List[str]: 347 """ 348 Get the descendant ontology terms for a given term. If include_self is True, the term itself will be included as 349 a descendant. Raises ValueError if term ID is not valid member of a supported ontology. 350 351 Example 352 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 353 >>> ontology_parser = OntologyParser() 354 >>> ontology_parser.get_term_descendants("CL:0000005") # doctest: +SKIP 355 ['CL:0002363'] 356 357 :param term_id: str ontology term to find descendants for 358 :param include_self: boolean flag to include the term itself as a descendant 359 :return: List[str] of descendant terms 360 """ 361 if term_id in VALID_NON_ONTOLOGY_TERMS: 362 return [] 363 ontology_name = self._parse_ontology_name(term_id) 364 descendants = [term_id] if include_self else [] 365 for candidate_descendant, candidate_metadata in self.cxg_schema.ontology(ontology_name).items(): 366 ancestors = candidate_metadata["ancestors"].keys() 367 if term_id in ancestors: 368 descendants.append(candidate_descendant) 369 return descendants 370 371 def map_term_descendants(self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]: 372 """ 373 Get the descendant ontology terms for each term in a list. If include_self is True, the term itself will be 374 included as a descendant. Raises ValueError if term ID is not valid member of a supported ontology. 375 376 Example 377 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 378 >>> ontology_parser = OntologyParser() 379 >>> ontology_parser.map_term_descendants(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP 380 { 381 'CL:0000003': ['CL:0000003', ...], 382 'CL:0000005': ['CL:0000005', 'CL:0002363', ...] 383 } 384 385 :param term_ids: list of str ontology terms to find descendants for 386 :param include_self: boolean flag to include the term itself as an descendant 387 :return: Dictionary mapping str term IDs to their respective flattened List[str] of descendant terms. Maps to 388 empty list if there are no descendants. 389 """ 390 descendants_dict: Dict[str, List[str]] = dict() 391 ontology_names = set() 392 for term_id in term_ids: 393 if term_id in VALID_NON_ONTOLOGY_TERMS: 394 descendants_dict[term_id] = [] 395 continue 396 ontology_name = self._parse_ontology_name(term_id) 397 descendants_dict[term_id] = [term_id] if include_self else [] 398 ontology_names.add(ontology_name) 399 400 for ontology in ontology_names: 401 for candidate_descendant, candidate_metadata in self.cxg_schema.ontology(ontology).items(): 402 for ancestor_id in descendants_dict: 403 ancestors = candidate_metadata["ancestors"].keys() 404 if ancestor_id in ancestors: 405 descendants_dict[ancestor_id].append(candidate_descendant) 406 407 return descendants_dict 408 409 def get_term_children(self, term_id: str) -> List[str]: 410 """ 411 Get the direct children ontology terms for a given term. Raises ValueError if term ID is not valid member of a 412 supported ontology. 413 414 Example 415 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 416 >>> ontology_parser = OntologyParser() 417 >>> ontology_parser.get_term_children("CL:0000526") 418 ['CL:0000101', 'CL:4042034'] 419 420 :param term_id: str ontology term to find children for 421 :return: List[str] of children terms 422 """ 423 if term_id in VALID_NON_ONTOLOGY_TERMS: 424 return [] 425 ontology_name = self._parse_ontology_name(term_id) 426 children = [] 427 for candidate_child, candidate_metadata in self.cxg_schema.ontology(ontology_name).items(): 428 for ancestor, distance in candidate_metadata["ancestors"].items(): 429 if ancestor == term_id and distance == 1: 430 children.append(candidate_child) 431 return children 432 433 def get_term_graph(self, term_id: str) -> OntologyNode: 434 """ 435 Get the DAG of OntologyNode relationships, with the input term as the root node. Only includes terms from the 436 same ontology as the root term ID. Raises ValueError if term ID is not valid member of a supported ontology. 437 438 Example 439 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 440 >>> ontology_parser = OntologyParser() 441 >>> root_node = ontology_parser.get_term_graph("CL:0000000") 442 >>> root_node.term_id 443 'CL:0000000' 444 >>> root_node.to_dict() # doctest: +SKIP 445 { 446 "term_id": "CL:0000000", 447 "name": "cell A", 448 "children": [ 449 { 450 "term_id": "CL:0000001", 451 "name": "cell B", 452 "children": [...], 453 }, 454 { 455 "term_id": "CL:0000002", 456 "name": "cell C", 457 "children": [...], 458 }, 459 ... 460 ] 461 } 462 >>> root_node.term_counter # doctest: +SKIP 463 Counter({'CL:0002058': 48, 'CL:0002471': 48, ... 464 465 :param term_id: str ontology term to build subtree for 466 :return: OntologyNode representation of graph with term_id as root. 467 """ 468 term_label = self.get_term_label(term_id) 469 root = OntologyNode(term_id, term_label) 470 for child_term_id in self.get_term_children(term_id): 471 root.add_child(self.get_term_graph(child_term_id)) 472 return root 473 474 def is_term_deprecated(self, term_id: str) -> bool: 475 """ 476 Check if an ontology term is deprecated. Raises ValueError if term ID is not valid member of a supported 477 ontology. 478 479 Example 480 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 481 >>> ontology_parser = OntologyParser() 482 >>> ontology_parser.is_term_deprecated("CL:0000003") 483 True 484 485 :param term_id: str ontology term to check for deprecation 486 :return: boolean flag indicating whether the term is deprecated 487 """ 488 if term_id in VALID_NON_ONTOLOGY_TERMS: 489 return False 490 ontology_name = self._parse_ontology_name(term_id) 491 is_deprecated: bool = self.cxg_schema.ontology(ontology_name)[term_id].get("deprecated") 492 return is_deprecated 493 494 def get_term_replacement(self, term_id: str) -> Union[str, None]: 495 """ 496 Fetch the replacement term for a deprecated ontology term, if a replacement exists. Return None otherwise. 497 Raises ValueError if term ID is not valid member of a supported ontology. 498 499 Example 500 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 501 >>> ontology_parser = OntologyParser() 502 >>> ontology_parser.get_term_replacement("CL:0000003") 503 'CL:0000000' 504 505 :param term_id: str ontology term to check a replacement term for 506 :return: replacement str term ID if it exists, None otherwise 507 """ 508 if term_id in VALID_NON_ONTOLOGY_TERMS: 509 return None 510 ontology_name = self._parse_ontology_name(term_id) 511 replaced_by: str = self.cxg_schema.ontology(ontology_name)[term_id].get("replaced_by") 512 return replaced_by if replaced_by else None 513 514 def get_term_metadata(self, term_id: str) -> Dict[str, Any]: 515 """ 516 Fetch metadata for a given ontology term. Returns a dict with format 517 518 {"comments": ["...", ...], "term_tracker": "...", "consider": ["...", ...]} 519 520 Comments maps to List[str] of ontology curator comments 521 Term Tracker maps to a str url where there is discussion around this term's curation (or deprecation). 522 Consider maps to List[str] of alternate ontology terms to consider using instead of this term 523 524 All keys map to None if no metadata of that type is present. Raises ValueError if term ID is not valid member 525 of a supported ontology. 526 527 :param term_id: str ontology term to fetch metadata for 528 :return: Dict with keys 'Comments', 'Term Tracker', and 'Consider' containing associated metadata. 529 """ 530 if term_id in VALID_NON_ONTOLOGY_TERMS: 531 return {"comments": None, "term_tracker": None, "consider": None} 532 ontology_name = self._parse_ontology_name(term_id) 533 return { 534 key: self.cxg_schema.ontology(ontology_name)[term_id].get(key, None) 535 for key in {"comments", "term_tracker", "consider"} 536 } 537 538 def get_term_label(self, term_id: str) -> str: 539 """ 540 Fetch the human-readable label for a given ontology term. Raises ValueError if term ID is not valid member of a 541 supported ontology. 542 543 Example 544 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 545 >>> ontology_parser = OntologyParser() 546 >>> ontology_parser.get_term_label("CL:0000005") 547 'neural crest derived fibroblast' 548 549 :param term_id: str ontology term to fetch label for 550 :return: str human-readable label for the term 551 """ 552 if term_id in VALID_NON_ONTOLOGY_TERMS: 553 return term_id 554 ontology_name = self._parse_ontology_name(term_id) 555 label: str = self.cxg_schema.ontology(ontology_name)[term_id]["label"] 556 return label 557 558 def map_term_labels(self, term_ids: Iterable[str]) -> Dict[str, str]: 559 """ 560 Fetch the human-readable label for a given list of ontology terms. Raises ValueError if term ID is not valid 561 member of a supported ontology. 562 563 Example 564 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 565 >>> ontology_parser = OntologyParser() 566 >>> ontology_parser.map_term_labels(["CL:0000005", "CL:0000003"]) 567 {'CL:0000005': 'neural crest derived fibroblast', 'CL:0000003': 'obsolete native cell'} 568 569 :param term_ids: list of str ontology terms to fetch label for 570 :return: Dict[str, str] mapping term IDs to their respective human-readable labels 571 """ 572 return {term_id: self.get_term_label(term_id) for term_id in term_ids} 573 574 def get_term_description(self, term_id: str) -> Optional[str]: 575 """ 576 Fetch the description for a given ontology term. Raises ValueError if term ID is not valid member of a 577 supported ontology. 578 579 Example 580 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 581 >>> ontology_parser = OntologyParser() 582 >>> ontology_parser.get_term_description("CL:0000005") 583 'Any fibroblast that is derived from the neural crest.' 584 585 :param term_id: str ontology term to fetch description for 586 :return: str description for the term 587 """ 588 if term_id in VALID_NON_ONTOLOGY_TERMS: 589 return term_id 590 ontology_name = self._parse_ontology_name(term_id) 591 description: Optional[str] = self.cxg_schema.ontology(ontology_name)[term_id].get("description", None) 592 return description 593 594 def map_term_descriptions(self, term_ids: List[str]) -> Dict[str, Optional[str]]: 595 """ 596 Fetch the descriptions for a given list of ontology terms. Raises ValueError if term ID is not valid member of 597 a supported ontology. 598 599 Example 600 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 601 >>> ontology_parser = OntologyParser() 602 >>> ontology_parser.map_term_descriptions(["CL:0000005", "CL:0000146"]) 603 {'CL:0000005': 'Any fibroblast that is derived from the neural crest.', 'CL:0000146': None} 604 605 :param term_ids: list of str ontology terms to fetch descriptions for 606 :return: Dict[str, str] mapping term IDs to their respective descriptions 607 """ 608 return {term_id: self.get_term_description(term_id) for term_id in term_ids} 609 610 def get_term_synonyms(self, term_id: str) -> List[str]: 611 """ 612 Fetch a list of synonym labels for a given ontology term. Returns empty list if no synonyms found. 613 Raises ValueError if term ID is not valid member of a supported ontology. 614 615 Example 616 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 617 >>> ontology_parser = OntologyParser() 618 >>> ontology_parser.get_term_synonyms("CL:0000019") 619 ['sperm cell', 'spermatozoid', 'spermatozoon'] 620 621 :param term_id: str ontology term to fetch synonyms for 622 :return: List[str] synonyms for the term 623 """ 624 if term_id in VALID_NON_ONTOLOGY_TERMS: 625 return [] 626 ontology_name = self._parse_ontology_name(term_id) 627 synonyms: List[str] = list(self.cxg_schema.ontology(ontology_name)[term_id].get("synonyms", [])) 628 return synonyms 629 630 def map_term_synonyms(self, term_ids: List[str]) -> Dict[str, List[str]]: 631 """ 632 Fetch the synonym labels for a given list of ontology terms. Raises ValueError if term ID is not valid member of 633 a supported ontology. 634 635 Example 636 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 637 >>> ontology_parser = OntologyParser() 638 >>> ontology_parser.map_term_synonyms(["CL:0000005", "CL:0000019"]) 639 {'CL:0000005': ['fibroblast neural crest derived'], 'CL:0000019': ['sperm cell', 'spermatozoid', 'spermatozoon']} 640 641 :param term_ids: list of str ontology terms to fetch synonyms for 642 :return: Dict[str, List[str]] mapping term IDs to their respective synonym lists 643 """ 644 return {term_id: self.get_term_synonyms(term_id) for term_id in term_ids} 645 646 def get_term_id_by_label(self, term_label: str, ontology_name: str) -> Optional[str]: 647 """ 648 Fetch the ontology term ID from a given human-readable label. Filters by ontology_name. Raises ValueError if 649 ontology_name is not a supported ontology. 650 651 Returns None if term ID is not valid member of a supported ontology. 652 653 Example 654 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 655 >>> ontology_parser = OntologyParser() 656 >>> ontology_parser.get_term_id_by_label("neural crest derived fibroblast", "CL") 657 'CL:0000005' 658 659 :param term_label: str human-readable label to fetch term ID for 660 :param ontology_name: str name of ontology to search for term label in 661 :return: Optional[str] term IDs with that label, or None if the label is not found in the ontology 662 """ 663 ontology_term_label_to_id_map = self.get_term_label_to_id_map(ontology_name) 664 return ontology_term_label_to_id_map.get(term_label) 665 666 def get_bridge_term_id(self, term_id: str, cross_ontology: str) -> Optional[str]: 667 """ 668 For a given term ID, fetch the equivalent term ID from a given ontology. Only returns exact match if it exists. 669 670 If no applicable match is found, returns None. 671 672 Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology. 673 674 Example 675 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 676 >>> ontology_parser = OntologyParser() 677 >>> ontology_parser.get_bridge_term_id("FBbt:00000001", "UBERON") 678 'UBERON:0000468' 679 680 :param term_id: str ontology term to find equivalent term for 681 :param cross_ontology: str name of ontology to search for equivalent term in 682 :return: Optional[str] equivalent term ID from the cross_ontology 683 """ 684 if cross_ontology not in self.cxg_schema.cross_ontology_mappings: 685 raise ValueError( 686 f"{cross_ontology} is not in the set of supported cross ontology mappings " 687 f"{self.cxg_schema.cross_ontology_mappings}." 688 ) 689 ontology_name = self._parse_ontology_name(term_id) 690 cross_ontology_terms = self.cxg_schema.ontology(ontology_name)[term_id].get("cross_ontology_terms") 691 bridge_term_id: Optional[str] = None 692 if cross_ontology_terms: 693 bridge_term_id = cross_ontology_terms.get(cross_ontology) 694 return bridge_term_id 695 696 def get_closest_bridge_term_ids(self, term_id: str, cross_ontology: str) -> List[str]: 697 """ 698 For a given term ID, fetch the equivalent term ID from a given ontology. If match is found, 699 returns a list of 1 with the exact match. If no exact match is found, traverses the ancestors 700 of the term for the closest match. 701 702 If no applicable match is found, returns an empty list. 703 704 If multiple ancestors of the same distance have matches, returns all possible closest matches. 705 706 Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology. 707 708 Example 709 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 710 >>> ontology_parser = OntologyParser() 711 >>> ontology_parser.get_closest_bridge_term_ids("FBbt:00000039", "UBERON") 712 ['UBERON:0000476', 'UBERON:0000920'] 713 714 :param term_id: str ontology term to find closest term for 715 :param cross_ontology: str name of ontology to search for closest term in 716 :return: List[str] list of closest term IDs from the cross_ontology 717 """ 718 closest_bridge_terms: List[str] = [] 719 terms_to_match = [term_id] 720 while terms_to_match and not closest_bridge_terms: 721 for term in terms_to_match: 722 if closest_bridge_term := self.get_bridge_term_id(term, cross_ontology): 723 closest_bridge_terms.append(closest_bridge_term) 724 terms_to_match = [parent for child in terms_to_match for parent in self.get_term_parents(child)] 725 return closest_bridge_terms
An object to parse ontology term metadata from ontologies corresponding to a given CellxGene Schema Version.
18 def __init__(self, schema_version: Optional[str] = None): 19 """ 20 Initialize an OntologyParser object with the ontology metadata corresponding to the given CellxGene schema 21 version. If not cached, it will make a network call to GitHub Release Assets to load in memory and 22 parse the corresponding ontology metadata. 23 24 :param schema_version: str version of the schema to load ontology metadata for. If not provided, the latest 25 is loaded. 26 """ 27 self.cxg_schema = CXGSchema(version=schema_version) if schema_version else CXGSchema() 28 self.term_label_to_id_map: Dict[str, Dict[str, str]] = { 29 ontology_name: dict() for ontology_name in self.cxg_schema.supported_ontologies 30 }
Initialize an OntologyParser object with the ontology metadata corresponding to the given CellxGene schema version. If not cached, it will make a network call to GitHub Release Assets to load in memory and parse the corresponding ontology metadata.
Parameters
- schema_version: str version of the schema to load ontology metadata for. If not provided, the latest is loaded.
32 def get_term_label_to_id_map(self, ontology_name: str) -> Dict[str, str]: 33 """ 34 Fetch the mapping of term labels to term IDs for a given ontology. Caches generated maps by ontology_name. 35 36 Example 37 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 38 >>> ontology_parser = OntologyParser() 39 >>> ontology_parser.get_term_label_to_id_map("CL") # doctest: +SKIP 40 {'Label A': 'CL:0000000', ... } 41 42 :param ontology_name: str name of ontology to get map of term labels to term IDs 43 """ 44 supported_ontology_name: Optional[str] = self._get_supported_ontology_name(ontology_name) 45 if not supported_ontology_name: 46 raise ValueError(f"{supported_ontology_name} is not a supported ontology, its metadata cannot be fetched.") 47 48 if self.term_label_to_id_map[supported_ontology_name]: 49 return self.term_label_to_id_map[supported_ontology_name].copy() 50 51 for term_id, term_metadata in self.cxg_schema.ontology(supported_ontology_name).items(): 52 self.term_label_to_id_map[supported_ontology_name][term_metadata["label"]] = term_id 53 54 return self.term_label_to_id_map[supported_ontology_name].copy()
Fetch the mapping of term labels to term IDs for a given ontology. Caches generated maps by ontology_name.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_label_to_id_map("CL") # doctest: +SKIP
{'Label A': 'CL:0000000', ... }
Parameters
- ontology_name: str name of ontology to get map of term labels to term IDs
93 def is_valid_term_id(self, term_id: str, ontology: Optional[str] = None) -> bool: 94 """ 95 Check if an ontology term ID is valid and defined in a supported ontology. If deprecated but defined 96 in the ontology, it is considered valid. Optionally, specify an ontology to check against, and determine 97 if the term is defined in that particular ontology. Otherwise, checks if term is valid in any supported ontology 98 99 :param term_id: str ontology term to check 100 :param ontology: str name of ontology to check against 101 :return: boolean flag indicating whether the term is supported 102 """ 103 try: 104 ontology_name = self._parse_ontology_name(term_id) 105 if ontology and ontology_name != ontology: 106 return False 107 if term_id in self.cxg_schema.ontology(ontology_name): 108 return True 109 except ValueError: 110 return False 111 return False
Check if an ontology term ID is valid and defined in a supported ontology. If deprecated but defined in the ontology, it is considered valid. Optionally, specify an ontology to check against, and determine if the term is defined in that particular ontology. Otherwise, checks if term is valid in any supported ontology
Parameters
- term_id: str ontology term to check
- ontology: str name of ontology to check against
Returns
boolean flag indicating whether the term is supported
113 def get_term_ancestors(self, term_id: str, include_self: bool = False) -> List[str]: 114 """ 115 Get the ancestor ontology terms for a given term. If include_self is True, the term itself will be included as 116 an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology. 117 118 Example 119 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 120 >>> ontology_parser = OntologyParser() 121 >>> ontology_parser.get_term_ancestors("CL:0000005") # doctest: +SKIP 122 ['CL:0000000', 'CL:0000057', ... 123 124 :param term_id: str ontology term to find ancestors for 125 :param include_self: boolean flag to include the term itself as an ancestor 126 :return: flattened List[str] of ancestor terms 127 """ 128 if term_id in VALID_NON_ONTOLOGY_TERMS: 129 return [] 130 ontology_name = self._parse_ontology_name(term_id) 131 ancestors = list(self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"].keys()) 132 return ancestors + [term_id] if include_self else ancestors
Get the ancestor ontology terms for a given term. If include_self is True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_ancestors("CL:0000005") # doctest: +SKIP
['CL:0000000', 'CL:0000057', ...
Parameters
- term_id: str ontology term to find ancestors for
- include_self: boolean flag to include the term itself as an ancestor
Returns
flattened List[str] of ancestor terms
134 def map_term_ancestors(self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]: 135 """ 136 Get the ancestor ontology terms for each term in a list. If include_self is True, the term itself will be 137 included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology. 138 139 Example 140 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 141 >>> ontology_parser = OntologyParser() 142 >>> ontology_parser.map_term_ancestors(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP 143 { 144 'CL:0000003': ['CL:0000003'], 145 'CL:0000005': ['CL:0000005', 'CL:0000000', ...] 146 } 147 148 :param term_ids: list of str ontology terms to find ancestors for 149 :param include_self: boolean flag to include the term itself as an ancestor 150 :return: Dictionary mapping str term IDs to their respective flattened List[str] of ancestor terms. Maps to 151 empty 152 list if there are no ancestors. 153 """ 154 return {term_id: self.get_term_ancestors(term_id, include_self) for term_id in term_ids}
Get the ancestor ontology terms for each term in a list. If include_self is True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.map_term_ancestors(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP
{
'CL:0000003': ['CL:0000003'],
'CL:0000005': ['CL:0000005', 'CL:0000000', ...]
}
Parameters
- term_ids: list of str ontology terms to find ancestors for
- include_self: boolean flag to include the term itself as an ancestor
Returns
Dictionary mapping str term IDs to their respective flattened List[str] of ancestor terms. Maps to empty list if there are no ancestors.
156 def get_term_ancestors_with_distances(self, term_id: str, include_self: bool = False) -> Dict[str, int]: 157 """ 158 Get the ancestor ontology terms for a given term, and their distance from the term_id. If include_self is True, 159 the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a 160 supported ontology. 161 162 Example 163 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 164 >>> ontology_parser = OntologyParser() 165 >>> ontology_parser.get_term_ancestors_with_distances("CL:0000005") # doctest: +SKIP 166 {'CL:0000057': 1, 'CL:0002320': 2, 'CL:0000000': 3} 167 168 :param term_id: str ontology term to find ancestors for 169 :param include_self: boolean flag to include the term itself as an ancestor 170 :return: Dict[str, int] map of ancestor terms and their respective distances from the term_id 171 """ 172 if term_id in VALID_NON_ONTOLOGY_TERMS: 173 return {} 174 ontology_name = self._parse_ontology_name(term_id) 175 ancestors: Dict[str, int] = self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"].copy() 176 return ancestors | {term_id: 0} if include_self else ancestors
Get the ancestor ontology terms for a given term, and their distance from the term_id. If include_self is True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_ancestors_with_distances("CL:0000005") # doctest: +SKIP
{'CL:0000057': 1, 'CL:0002320': 2, 'CL:0000000': 3}
Parameters
- term_id: str ontology term to find ancestors for
- include_self: boolean flag to include the term itself as an ancestor
Returns
Dict[str, int] map of ancestor terms and their respective distances from the term_id
178 def map_term_ancestors_with_distances( 179 self, term_ids: Iterable[str], include_self: bool = False 180 ) -> Dict[str, Dict[str, int]]: 181 """ 182 Get the ancestor ontology terms for each term in a list, and their distance from the term_id. If include_self is 183 True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a 184 supported ontology. 185 186 Example 187 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 188 >>> ontology_parser = OntologyParser() 189 >>> ontology_parser.map_term_ancestors_with_distances(["CL:0000003", "CL:0000005"], include_self=True) 190 {'CL:0000003': {'CL:0000003': 0}, 'CL:0000005': {'CL:0000057': 1, 'CL:0000499': 2, 'CL:0002320': 3, 'CL:0000255': 4, 'CL:0000000': 5, 'CL:0000005': 0}} 191 192 :param term_ids: list of str ontology terms to find ancestors for 193 :param include_self: boolean flag to include the term itself as an ancestor 194 :return: Dictionary mapping str term IDs to their respective Dict[str, int] map of ancestor terms and their 195 respective distances from the term_id 196 """ 197 return {term_id: self.get_term_ancestors_with_distances(term_id, include_self) for term_id in term_ids}
Get the ancestor ontology terms for each term in a list, and their distance from the term_id. If include_self is True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.map_term_ancestors_with_distances(["CL:0000003", "CL:0000005"], include_self=True)
{'CL:0000003': {'CL:0000003': 0}, 'CL:0000005': {'CL:0000057': 1, 'CL:0000499': 2, 'CL:0002320': 3, 'CL:0000255': 4, 'CL:0000000': 5, 'CL:0000005': 0}}
Parameters
- term_ids: list of str ontology terms to find ancestors for
- include_self: boolean flag to include the term itself as an ancestor
Returns
Dictionary mapping str term IDs to their respective Dict[str, int] map of ancestor terms and their respective distances from the term_id
199 def get_term_parents(self, term_id: str) -> List[str]: 200 """ 201 Get the direct parent ontology terms for a given term. Raises ValueError if the term ID is not valid member of 202 a supported ontology. 203 204 Example 205 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 206 >>> ontology_parser = OntologyParser() 207 >>> ontology_parser.get_term_parents("CL:0000101") 208 ['CL:0000526'] 209 210 :param term_id: str ontology term to find parents for 211 :return: List[str] of parent terms 212 """ 213 if term_id in VALID_NON_ONTOLOGY_TERMS: 214 return [] 215 ontology_name = self._parse_ontology_name(term_id) 216 ancestors: Dict[str, int] = self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"] 217 parents: List[str] = [ancestor for ancestor, distance in ancestors.items() if distance == 1] 218 return parents
Get the direct parent ontology terms for a given term. Raises ValueError if the term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_parents("CL:0000101")
['CL:0000526']
Parameters
- term_id: str ontology term to find parents for
Returns
List[str] of parent terms
220 def get_distance_between_terms(self, term_id_1: str, term_id_2: str) -> int: 221 """ 222 Get the distance between two ontology terms. The distance is defined as the number of edges between the 223 two terms. Terms must be from the same ontology. Returns -1 if terms are disjoint. 224 Raises ValueError if term IDs are not valid members of a supported ontology. 225 226 :param term_id_1: str ontology term to find distance for 227 :param term_id_2: str ontology term to find distance for 228 :return: int distance between the two terms, measured in number of edges between their shortest path. 229 """ 230 lcas = self.get_lowest_common_ancestors(term_id_1, term_id_2) 231 if not lcas: 232 return -1 233 return int( 234 self.get_term_ancestors_with_distances(term_id_1, include_self=True)[lcas[0]] 235 + self.get_term_ancestors_with_distances(term_id_2, include_self=True)[lcas[0]] 236 )
Get the distance between two ontology terms. The distance is defined as the number of edges between the two terms. Terms must be from the same ontology. Returns -1 if terms are disjoint. Raises ValueError if term IDs are not valid members of a supported ontology.
Parameters
- term_id_1: str ontology term to find distance for
- term_id_2: str ontology term to find distance for
Returns
int distance between the two terms, measured in number of edges between their shortest path.
238 def get_lowest_common_ancestors(self, term_id_1: str, term_id_2: str) -> List[str]: 239 """ 240 Get the lowest common ancestors between two ontology terms that is from the given ontology. 241 Terms must be from the same ontology. Ontologies are DAGs, so there may be multiple lowest common ancestors. 242 Raises ValueError if term IDs are not valid members of a supported ontology. 243 244 :param term_id_1: str ontology term to find LCA for 245 :param term_id_2: str ontology term to find LCA for 246 :return: str term ID of the lowest common ancestor term 247 """ 248 # include path to term itself 249 ontology = self._parse_ontology_name(term_id_1) 250 if ontology != self._parse_ontology_name(term_id_2): 251 return [] 252 ancestors_1 = self.get_term_ancestors_with_distances(term_id_1, include_self=True) 253 ancestors_2 = self.get_term_ancestors_with_distances(term_id_2, include_self=True) 254 common_ancestors = set(ancestors_1.keys()) & set(ancestors_2.keys()) 255 min_sum_distances = float("inf") 256 for ancestors in common_ancestors: 257 sum_distances = ancestors_1[ancestors] + ancestors_2[ancestors] 258 if sum_distances < min_sum_distances: 259 min_sum_distances = sum_distances 260 return [ 261 ancestor 262 for ancestor in common_ancestors 263 if ancestors_1[ancestor] + ancestors_2[ancestor] == min_sum_distances 264 ]
Get the lowest common ancestors between two ontology terms that is from the given ontology. Terms must be from the same ontology. Ontologies are DAGs, so there may be multiple lowest common ancestors. Raises ValueError if term IDs are not valid members of a supported ontology.
Parameters
- term_id_1: str ontology term to find LCA for
- term_id_2: str ontology term to find LCA for
Returns
str term ID of the lowest common ancestor term
266 def get_high_level_terms(self, term_id: str, high_level_terms: List[str]) -> List[str]: 267 """ 268 Get the high-level ontology terms for a given term. High-level terms are defined as the ancestors of the term 269 that are part of the high-level ontology terms supported by cellxgene-ontology-guide. If more than 1 270 high_level_term is matched, the returned list of matches preserves the order of the input high_level_terms list. 271 Raises ValueError if term ID is not valid member of a supported ontology. 272 273 Example 274 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 275 >>> ontology_parser = OntologyParser() 276 >>> ontology_parser.get_high_level_terms("CL:0000005", ["CL:0000000", "CL:0000001"]) 277 ['CL:0000000'] 278 279 :param term_id: str ontology term to find high-level terms for 280 :param high_level_terms: list of str ontology terms to check for ancestry to term_id 281 :return: List[str] of high-level terms that the term is a descendant of 282 """ 283 if term_id in VALID_NON_ONTOLOGY_TERMS: 284 return [] 285 ancestors = self.get_term_ancestors(term_id, include_self=True) 286 return [high_level_term for high_level_term in high_level_terms if high_level_term in ancestors]
Get the high-level ontology terms for a given term. High-level terms are defined as the ancestors of the term that are part of the high-level ontology terms supported by cellxgene-ontology-guide. If more than 1 high_level_term is matched, the returned list of matches preserves the order of the input high_level_terms list. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_high_level_terms("CL:0000005", ["CL:0000000", "CL:0000001"])
['CL:0000000']
Parameters
- term_id: str ontology term to find high-level terms for
- high_level_terms: list of str ontology terms to check for ancestry to term_id
Returns
List[str] of high-level terms that the term is a descendant of
288 def map_high_level_terms(self, term_ids: List[str], high_level_terms: List[str]) -> Dict[str, List[str]]: 289 """ 290 Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with 291 format 292 293 {"CL:0000003": ["CL:0000000", ...], "CL:0000005": ["CL:0000000", ...]} 294 295 Where each term_id is mapped to a List[str] of high-level terms that it is a descendant of. Includes self 296 as a descendant. Raises ValueError if term ID is not valid member of a supported ontology. 297 298 :param term_ids: list of str ontology terms to map high level terms for 299 :param high_level_terms: list of str ontology terms to be mapped to descendant term_ids 300 :return: Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list. 301 Each key maps to empty list if there are no ancestors among the provided input. 302 """ 303 return {term_id: self.get_high_level_terms(term_id, high_level_terms) for term_id in term_ids}
Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with format
{"CL:0000003": ["CL:0000000", ...], "CL:0000005": ["CL:0000000", ...]}
Where each term_id is mapped to a List[str] of high-level terms that it is a descendant of. Includes self as a descendant. Raises ValueError if term ID is not valid member of a supported ontology.
Parameters
- term_ids: list of str ontology terms to map high level terms for
- high_level_terms: list of str ontology terms to be mapped to descendant term_ids
Returns
Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list. Each key maps to empty list if there are no ancestors among the provided input.
305 def get_highest_level_term(self, term_id: str, high_level_terms: List[str]) -> Union[str, None]: 306 """ 307 Get the highest level ontology term for a given term. The highest level term is defined as the ancestor of the 308 term that is part of the high-level ontology terms supported by cellxgene-ontology-guide. Raises ValueError 309 if term ID is not valid member of a supported ontology. 310 311 Example 312 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 313 >>> ontology_parser = OntologyParser() 314 >>> ontology_parser.get_highest_level_term("CL:0000005", ["CL:0000000", "CL:0000001"]) 315 'CL:0000000' 316 317 :param term_id: str ontology term to find highest level term for 318 :param high_level_terms: list of str ontology terms to check for ancestry to term_id 319 :return: str highest level term that the term is a descendant of, or None if it is not a descendant of any 320 high-level terms 321 """ 322 high_level_terms = self.get_high_level_terms(term_id, high_level_terms) 323 term_ancestors_and_distances = self.get_term_ancestors_with_distances(term_id, include_self=True) 324 if not high_level_terms: 325 return None 326 return max(high_level_terms, key=lambda high_level_term: term_ancestors_and_distances[high_level_term])
Get the highest level ontology term for a given term. The highest level term is defined as the ancestor of the term that is part of the high-level ontology terms supported by cellxgene-ontology-guide. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_highest_level_term("CL:0000005", ["CL:0000000", "CL:0000001"])
'CL:0000000'
Parameters
- term_id: str ontology term to find highest level term for
- high_level_terms: list of str ontology terms to check for ancestry to term_id
Returns
str highest level term that the term is a descendant of, or None if it is not a descendant of any high-level terms
328 def map_highest_level_term(self, term_ids: List[str], high_level_terms: List[str]) -> Dict[str, Union[str, None]]: 329 """ 330 Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with 331 format 332 333 {"CL:0000003": "CL:0000000", "CL:0000005": "CL:0000000"} 334 335 Where each term_id is mapped to the highest level term that it is a descendant of, from the list provided. 336 Includes term itself as a descendant. Maps to None if term_id does not map to any high level terms among the 337 provided input. Raises ValueError if term ID is not valid member of a supported ontology. 338 339 :param term_ids: list of str ontology terms to map high level terms for 340 :param high_level_terms: list of str ontology terms that can be mapped to descendant term_ids 341 :return: Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list. 342 Each key maps to empty list if there are no ancestors among the provided input. 343 """ 344 return {term_id: self.get_highest_level_term(term_id, high_level_terms) for term_id in term_ids}
Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with format
{"CL:0000003": "CL:0000000", "CL:0000005": "CL:0000000"}
Where each term_id is mapped to the highest level term that it is a descendant of, from the list provided. Includes term itself as a descendant. Maps to None if term_id does not map to any high level terms among the provided input. Raises ValueError if term ID is not valid member of a supported ontology.
Parameters
- term_ids: list of str ontology terms to map high level terms for
- high_level_terms: list of str ontology terms that can be mapped to descendant term_ids
Returns
Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list. Each key maps to empty list if there are no ancestors among the provided input.
346 def get_term_descendants(self, term_id: str, include_self: bool = False) -> List[str]: 347 """ 348 Get the descendant ontology terms for a given term. If include_self is True, the term itself will be included as 349 a descendant. Raises ValueError if term ID is not valid member of a supported ontology. 350 351 Example 352 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 353 >>> ontology_parser = OntologyParser() 354 >>> ontology_parser.get_term_descendants("CL:0000005") # doctest: +SKIP 355 ['CL:0002363'] 356 357 :param term_id: str ontology term to find descendants for 358 :param include_self: boolean flag to include the term itself as a descendant 359 :return: List[str] of descendant terms 360 """ 361 if term_id in VALID_NON_ONTOLOGY_TERMS: 362 return [] 363 ontology_name = self._parse_ontology_name(term_id) 364 descendants = [term_id] if include_self else [] 365 for candidate_descendant, candidate_metadata in self.cxg_schema.ontology(ontology_name).items(): 366 ancestors = candidate_metadata["ancestors"].keys() 367 if term_id in ancestors: 368 descendants.append(candidate_descendant) 369 return descendants
Get the descendant ontology terms for a given term. If include_self is True, the term itself will be included as a descendant. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_descendants("CL:0000005") # doctest: +SKIP
['CL:0002363']
Parameters
- term_id: str ontology term to find descendants for
- include_self: boolean flag to include the term itself as a descendant
Returns
List[str] of descendant terms
371 def map_term_descendants(self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]: 372 """ 373 Get the descendant ontology terms for each term in a list. If include_self is True, the term itself will be 374 included as a descendant. Raises ValueError if term ID is not valid member of a supported ontology. 375 376 Example 377 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 378 >>> ontology_parser = OntologyParser() 379 >>> ontology_parser.map_term_descendants(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP 380 { 381 'CL:0000003': ['CL:0000003', ...], 382 'CL:0000005': ['CL:0000005', 'CL:0002363', ...] 383 } 384 385 :param term_ids: list of str ontology terms to find descendants for 386 :param include_self: boolean flag to include the term itself as an descendant 387 :return: Dictionary mapping str term IDs to their respective flattened List[str] of descendant terms. Maps to 388 empty list if there are no descendants. 389 """ 390 descendants_dict: Dict[str, List[str]] = dict() 391 ontology_names = set() 392 for term_id in term_ids: 393 if term_id in VALID_NON_ONTOLOGY_TERMS: 394 descendants_dict[term_id] = [] 395 continue 396 ontology_name = self._parse_ontology_name(term_id) 397 descendants_dict[term_id] = [term_id] if include_self else [] 398 ontology_names.add(ontology_name) 399 400 for ontology in ontology_names: 401 for candidate_descendant, candidate_metadata in self.cxg_schema.ontology(ontology).items(): 402 for ancestor_id in descendants_dict: 403 ancestors = candidate_metadata["ancestors"].keys() 404 if ancestor_id in ancestors: 405 descendants_dict[ancestor_id].append(candidate_descendant) 406 407 return descendants_dict
Get the descendant ontology terms for each term in a list. If include_self is True, the term itself will be included as a descendant. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.map_term_descendants(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP
{
'CL:0000003': ['CL:0000003', ...],
'CL:0000005': ['CL:0000005', 'CL:0002363', ...]
}
Parameters
- term_ids: list of str ontology terms to find descendants for
- include_self: boolean flag to include the term itself as an descendant
Returns
Dictionary mapping str term IDs to their respective flattened List[str] of descendant terms. Maps to empty list if there are no descendants.
409 def get_term_children(self, term_id: str) -> List[str]: 410 """ 411 Get the direct children ontology terms for a given term. Raises ValueError if term ID is not valid member of a 412 supported ontology. 413 414 Example 415 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 416 >>> ontology_parser = OntologyParser() 417 >>> ontology_parser.get_term_children("CL:0000526") 418 ['CL:0000101', 'CL:4042034'] 419 420 :param term_id: str ontology term to find children for 421 :return: List[str] of children terms 422 """ 423 if term_id in VALID_NON_ONTOLOGY_TERMS: 424 return [] 425 ontology_name = self._parse_ontology_name(term_id) 426 children = [] 427 for candidate_child, candidate_metadata in self.cxg_schema.ontology(ontology_name).items(): 428 for ancestor, distance in candidate_metadata["ancestors"].items(): 429 if ancestor == term_id and distance == 1: 430 children.append(candidate_child) 431 return children
Get the direct children ontology terms for a given term. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_children("CL:0000526")
['CL:0000101', 'CL:4042034']
Parameters
- term_id: str ontology term to find children for
Returns
List[str] of children terms
433 def get_term_graph(self, term_id: str) -> OntologyNode: 434 """ 435 Get the DAG of OntologyNode relationships, with the input term as the root node. Only includes terms from the 436 same ontology as the root term ID. Raises ValueError if term ID is not valid member of a supported ontology. 437 438 Example 439 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 440 >>> ontology_parser = OntologyParser() 441 >>> root_node = ontology_parser.get_term_graph("CL:0000000") 442 >>> root_node.term_id 443 'CL:0000000' 444 >>> root_node.to_dict() # doctest: +SKIP 445 { 446 "term_id": "CL:0000000", 447 "name": "cell A", 448 "children": [ 449 { 450 "term_id": "CL:0000001", 451 "name": "cell B", 452 "children": [...], 453 }, 454 { 455 "term_id": "CL:0000002", 456 "name": "cell C", 457 "children": [...], 458 }, 459 ... 460 ] 461 } 462 >>> root_node.term_counter # doctest: +SKIP 463 Counter({'CL:0002058': 48, 'CL:0002471': 48, ... 464 465 :param term_id: str ontology term to build subtree for 466 :return: OntologyNode representation of graph with term_id as root. 467 """ 468 term_label = self.get_term_label(term_id) 469 root = OntologyNode(term_id, term_label) 470 for child_term_id in self.get_term_children(term_id): 471 root.add_child(self.get_term_graph(child_term_id)) 472 return root
Get the DAG of OntologyNode relationships, with the input term as the root node. Only includes terms from the same ontology as the root term ID. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> root_node = ontology_parser.get_term_graph("CL:0000000")
>>> root_node.term_id
'CL:0000000'
>>> root_node.to_dict() # doctest: +SKIP
{
"term_id": "CL:0000000",
"name": "cell A",
"children": [
{
"term_id": "CL:0000001",
"name": "cell B",
"children": [...],
},
{
"term_id": "CL:0000002",
"name": "cell C",
"children": [...],
},
...
]
}
>>> root_node.term_counter # doctest: +SKIP
Counter({'CL:0002058': 48, 'CL:0002471': 48, ...
Parameters
- term_id: str ontology term to build subtree for
Returns
OntologyNode representation of graph with term_id as root.
474 def is_term_deprecated(self, term_id: str) -> bool: 475 """ 476 Check if an ontology term is deprecated. Raises ValueError if term ID is not valid member of a supported 477 ontology. 478 479 Example 480 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 481 >>> ontology_parser = OntologyParser() 482 >>> ontology_parser.is_term_deprecated("CL:0000003") 483 True 484 485 :param term_id: str ontology term to check for deprecation 486 :return: boolean flag indicating whether the term is deprecated 487 """ 488 if term_id in VALID_NON_ONTOLOGY_TERMS: 489 return False 490 ontology_name = self._parse_ontology_name(term_id) 491 is_deprecated: bool = self.cxg_schema.ontology(ontology_name)[term_id].get("deprecated") 492 return is_deprecated
Check if an ontology term is deprecated. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.is_term_deprecated("CL:0000003")
True
Parameters
- term_id: str ontology term to check for deprecation
Returns
boolean flag indicating whether the term is deprecated
494 def get_term_replacement(self, term_id: str) -> Union[str, None]: 495 """ 496 Fetch the replacement term for a deprecated ontology term, if a replacement exists. Return None otherwise. 497 Raises ValueError if term ID is not valid member of a supported ontology. 498 499 Example 500 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 501 >>> ontology_parser = OntologyParser() 502 >>> ontology_parser.get_term_replacement("CL:0000003") 503 'CL:0000000' 504 505 :param term_id: str ontology term to check a replacement term for 506 :return: replacement str term ID if it exists, None otherwise 507 """ 508 if term_id in VALID_NON_ONTOLOGY_TERMS: 509 return None 510 ontology_name = self._parse_ontology_name(term_id) 511 replaced_by: str = self.cxg_schema.ontology(ontology_name)[term_id].get("replaced_by") 512 return replaced_by if replaced_by else None
Fetch the replacement term for a deprecated ontology term, if a replacement exists. Return None otherwise. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_replacement("CL:0000003")
'CL:0000000'
Parameters
- term_id: str ontology term to check a replacement term for
Returns
replacement str term ID if it exists, None otherwise
514 def get_term_metadata(self, term_id: str) -> Dict[str, Any]: 515 """ 516 Fetch metadata for a given ontology term. Returns a dict with format 517 518 {"comments": ["...", ...], "term_tracker": "...", "consider": ["...", ...]} 519 520 Comments maps to List[str] of ontology curator comments 521 Term Tracker maps to a str url where there is discussion around this term's curation (or deprecation). 522 Consider maps to List[str] of alternate ontology terms to consider using instead of this term 523 524 All keys map to None if no metadata of that type is present. Raises ValueError if term ID is not valid member 525 of a supported ontology. 526 527 :param term_id: str ontology term to fetch metadata for 528 :return: Dict with keys 'Comments', 'Term Tracker', and 'Consider' containing associated metadata. 529 """ 530 if term_id in VALID_NON_ONTOLOGY_TERMS: 531 return {"comments": None, "term_tracker": None, "consider": None} 532 ontology_name = self._parse_ontology_name(term_id) 533 return { 534 key: self.cxg_schema.ontology(ontology_name)[term_id].get(key, None) 535 for key in {"comments", "term_tracker", "consider"} 536 }
Fetch metadata for a given ontology term. Returns a dict with format
{"comments": ["...", ...], "term_tracker": "...", "consider": ["...", ...]}
Comments maps to List[str] of ontology curator comments Term Tracker maps to a str url where there is discussion around this term's curation (or deprecation). Consider maps to List[str] of alternate ontology terms to consider using instead of this term
All keys map to None if no metadata of that type is present. Raises ValueError if term ID is not valid member of a supported ontology.
Parameters
- term_id: str ontology term to fetch metadata for
Returns
Dict with keys 'Comments', 'Term Tracker', and 'Consider' containing associated metadata.
538 def get_term_label(self, term_id: str) -> str: 539 """ 540 Fetch the human-readable label for a given ontology term. Raises ValueError if term ID is not valid member of a 541 supported ontology. 542 543 Example 544 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 545 >>> ontology_parser = OntologyParser() 546 >>> ontology_parser.get_term_label("CL:0000005") 547 'neural crest derived fibroblast' 548 549 :param term_id: str ontology term to fetch label for 550 :return: str human-readable label for the term 551 """ 552 if term_id in VALID_NON_ONTOLOGY_TERMS: 553 return term_id 554 ontology_name = self._parse_ontology_name(term_id) 555 label: str = self.cxg_schema.ontology(ontology_name)[term_id]["label"] 556 return label
Fetch the human-readable label for a given ontology term. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_label("CL:0000005")
'neural crest derived fibroblast'
Parameters
- term_id: str ontology term to fetch label for
Returns
str human-readable label for the term
558 def map_term_labels(self, term_ids: Iterable[str]) -> Dict[str, str]: 559 """ 560 Fetch the human-readable label for a given list of ontology terms. Raises ValueError if term ID is not valid 561 member of a supported ontology. 562 563 Example 564 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 565 >>> ontology_parser = OntologyParser() 566 >>> ontology_parser.map_term_labels(["CL:0000005", "CL:0000003"]) 567 {'CL:0000005': 'neural crest derived fibroblast', 'CL:0000003': 'obsolete native cell'} 568 569 :param term_ids: list of str ontology terms to fetch label for 570 :return: Dict[str, str] mapping term IDs to their respective human-readable labels 571 """ 572 return {term_id: self.get_term_label(term_id) for term_id in term_ids}
Fetch the human-readable label for a given list of ontology terms. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.map_term_labels(["CL:0000005", "CL:0000003"])
{'CL:0000005': 'neural crest derived fibroblast', 'CL:0000003': 'obsolete native cell'}
Parameters
- term_ids: list of str ontology terms to fetch label for
Returns
Dict[str, str] mapping term IDs to their respective human-readable labels
574 def get_term_description(self, term_id: str) -> Optional[str]: 575 """ 576 Fetch the description for a given ontology term. Raises ValueError if term ID is not valid member of a 577 supported ontology. 578 579 Example 580 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 581 >>> ontology_parser = OntologyParser() 582 >>> ontology_parser.get_term_description("CL:0000005") 583 'Any fibroblast that is derived from the neural crest.' 584 585 :param term_id: str ontology term to fetch description for 586 :return: str description for the term 587 """ 588 if term_id in VALID_NON_ONTOLOGY_TERMS: 589 return term_id 590 ontology_name = self._parse_ontology_name(term_id) 591 description: Optional[str] = self.cxg_schema.ontology(ontology_name)[term_id].get("description", None) 592 return description
Fetch the description for a given ontology term. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_description("CL:0000005")
'Any fibroblast that is derived from the neural crest.'
Parameters
- term_id: str ontology term to fetch description for
Returns
str description for the term
594 def map_term_descriptions(self, term_ids: List[str]) -> Dict[str, Optional[str]]: 595 """ 596 Fetch the descriptions for a given list of ontology terms. Raises ValueError if term ID is not valid member of 597 a supported ontology. 598 599 Example 600 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 601 >>> ontology_parser = OntologyParser() 602 >>> ontology_parser.map_term_descriptions(["CL:0000005", "CL:0000146"]) 603 {'CL:0000005': 'Any fibroblast that is derived from the neural crest.', 'CL:0000146': None} 604 605 :param term_ids: list of str ontology terms to fetch descriptions for 606 :return: Dict[str, str] mapping term IDs to their respective descriptions 607 """ 608 return {term_id: self.get_term_description(term_id) for term_id in term_ids}
Fetch the descriptions for a given list of ontology terms. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.map_term_descriptions(["CL:0000005", "CL:0000146"])
{'CL:0000005': 'Any fibroblast that is derived from the neural crest.', 'CL:0000146': None}
Parameters
- term_ids: list of str ontology terms to fetch descriptions for
Returns
Dict[str, str] mapping term IDs to their respective descriptions
610 def get_term_synonyms(self, term_id: str) -> List[str]: 611 """ 612 Fetch a list of synonym labels for a given ontology term. Returns empty list if no synonyms found. 613 Raises ValueError if term ID is not valid member of a supported ontology. 614 615 Example 616 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 617 >>> ontology_parser = OntologyParser() 618 >>> ontology_parser.get_term_synonyms("CL:0000019") 619 ['sperm cell', 'spermatozoid', 'spermatozoon'] 620 621 :param term_id: str ontology term to fetch synonyms for 622 :return: List[str] synonyms for the term 623 """ 624 if term_id in VALID_NON_ONTOLOGY_TERMS: 625 return [] 626 ontology_name = self._parse_ontology_name(term_id) 627 synonyms: List[str] = list(self.cxg_schema.ontology(ontology_name)[term_id].get("synonyms", [])) 628 return synonyms
Fetch a list of synonym labels for a given ontology term. Returns empty list if no synonyms found. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_synonyms("CL:0000019")
['sperm cell', 'spermatozoid', 'spermatozoon']
Parameters
- term_id: str ontology term to fetch synonyms for
Returns
List[str] synonyms for the term
630 def map_term_synonyms(self, term_ids: List[str]) -> Dict[str, List[str]]: 631 """ 632 Fetch the synonym labels for a given list of ontology terms. Raises ValueError if term ID is not valid member of 633 a supported ontology. 634 635 Example 636 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 637 >>> ontology_parser = OntologyParser() 638 >>> ontology_parser.map_term_synonyms(["CL:0000005", "CL:0000019"]) 639 {'CL:0000005': ['fibroblast neural crest derived'], 'CL:0000019': ['sperm cell', 'spermatozoid', 'spermatozoon']} 640 641 :param term_ids: list of str ontology terms to fetch synonyms for 642 :return: Dict[str, List[str]] mapping term IDs to their respective synonym lists 643 """ 644 return {term_id: self.get_term_synonyms(term_id) for term_id in term_ids}
Fetch the synonym labels for a given list of ontology terms. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.map_term_synonyms(["CL:0000005", "CL:0000019"])
{'CL:0000005': ['fibroblast neural crest derived'], 'CL:0000019': ['sperm cell', 'spermatozoid', 'spermatozoon']}
Parameters
- term_ids: list of str ontology terms to fetch synonyms for
Returns
Dict[str, List[str]] mapping term IDs to their respective synonym lists
646 def get_term_id_by_label(self, term_label: str, ontology_name: str) -> Optional[str]: 647 """ 648 Fetch the ontology term ID from a given human-readable label. Filters by ontology_name. Raises ValueError if 649 ontology_name is not a supported ontology. 650 651 Returns None if term ID is not valid member of a supported ontology. 652 653 Example 654 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 655 >>> ontology_parser = OntologyParser() 656 >>> ontology_parser.get_term_id_by_label("neural crest derived fibroblast", "CL") 657 'CL:0000005' 658 659 :param term_label: str human-readable label to fetch term ID for 660 :param ontology_name: str name of ontology to search for term label in 661 :return: Optional[str] term IDs with that label, or None if the label is not found in the ontology 662 """ 663 ontology_term_label_to_id_map = self.get_term_label_to_id_map(ontology_name) 664 return ontology_term_label_to_id_map.get(term_label)
Fetch the ontology term ID from a given human-readable label. Filters by ontology_name. Raises ValueError if ontology_name is not a supported ontology.
Returns None if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_id_by_label("neural crest derived fibroblast", "CL")
'CL:0000005'
Parameters
- term_label: str human-readable label to fetch term ID for
- ontology_name: str name of ontology to search for term label in
Returns
Optional[str] term IDs with that label, or None if the label is not found in the ontology
666 def get_bridge_term_id(self, term_id: str, cross_ontology: str) -> Optional[str]: 667 """ 668 For a given term ID, fetch the equivalent term ID from a given ontology. Only returns exact match if it exists. 669 670 If no applicable match is found, returns None. 671 672 Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology. 673 674 Example 675 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 676 >>> ontology_parser = OntologyParser() 677 >>> ontology_parser.get_bridge_term_id("FBbt:00000001", "UBERON") 678 'UBERON:0000468' 679 680 :param term_id: str ontology term to find equivalent term for 681 :param cross_ontology: str name of ontology to search for equivalent term in 682 :return: Optional[str] equivalent term ID from the cross_ontology 683 """ 684 if cross_ontology not in self.cxg_schema.cross_ontology_mappings: 685 raise ValueError( 686 f"{cross_ontology} is not in the set of supported cross ontology mappings " 687 f"{self.cxg_schema.cross_ontology_mappings}." 688 ) 689 ontology_name = self._parse_ontology_name(term_id) 690 cross_ontology_terms = self.cxg_schema.ontology(ontology_name)[term_id].get("cross_ontology_terms") 691 bridge_term_id: Optional[str] = None 692 if cross_ontology_terms: 693 bridge_term_id = cross_ontology_terms.get(cross_ontology) 694 return bridge_term_id
For a given term ID, fetch the equivalent term ID from a given ontology. Only returns exact match if it exists.
If no applicable match is found, returns None.
Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_bridge_term_id("FBbt:00000001", "UBERON")
'UBERON:0000468'
Parameters
- term_id: str ontology term to find equivalent term for
- cross_ontology: str name of ontology to search for equivalent term in
Returns
Optional[str] equivalent term ID from the cross_ontology
696 def get_closest_bridge_term_ids(self, term_id: str, cross_ontology: str) -> List[str]: 697 """ 698 For a given term ID, fetch the equivalent term ID from a given ontology. If match is found, 699 returns a list of 1 with the exact match. If no exact match is found, traverses the ancestors 700 of the term for the closest match. 701 702 If no applicable match is found, returns an empty list. 703 704 If multiple ancestors of the same distance have matches, returns all possible closest matches. 705 706 Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology. 707 708 Example 709 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 710 >>> ontology_parser = OntologyParser() 711 >>> ontology_parser.get_closest_bridge_term_ids("FBbt:00000039", "UBERON") 712 ['UBERON:0000476', 'UBERON:0000920'] 713 714 :param term_id: str ontology term to find closest term for 715 :param cross_ontology: str name of ontology to search for closest term in 716 :return: List[str] list of closest term IDs from the cross_ontology 717 """ 718 closest_bridge_terms: List[str] = [] 719 terms_to_match = [term_id] 720 while terms_to_match and not closest_bridge_terms: 721 for term in terms_to_match: 722 if closest_bridge_term := self.get_bridge_term_id(term, cross_ontology): 723 closest_bridge_terms.append(closest_bridge_term) 724 terms_to_match = [parent for child in terms_to_match for parent in self.get_term_parents(child)] 725 return closest_bridge_terms
For a given term ID, fetch the equivalent term ID from a given ontology. If match is found, returns a list of 1 with the exact match. If no exact match is found, traverses the ancestors of the term for the closest match.
If no applicable match is found, returns an empty list.
If multiple ancestors of the same distance have matches, returns all possible closest matches.
Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_closest_bridge_term_ids("FBbt:00000039", "UBERON")
['UBERON:0000476', 'UBERON:0000920']
Parameters
- term_id: str ontology term to find closest term for
- cross_ontology: str name of ontology to search for closest term in
Returns
List[str] list of closest term IDs from the cross_ontology