cellxgene_ontology_guide.ontology_parser
1import re 2from typing import Any, Dict, Iterable, List, Optional, Union 3 4from cellxgene_ontology_guide._constants import VALID_NON_ONTOLOGY_TERMS 5from cellxgene_ontology_guide.entities import OntologyNode 6from cellxgene_ontology_guide.supported_versions import CXGSchema 7 8 9class OntologyParser: 10 """ 11 An object to parse ontology term metadata from ontologies corresponding to a given CellxGene Schema Version. 12 """ 13 14 cxg_schema: CXGSchema 15 """ CXGSchema object to fetch ontology metadata from """ 16 17 def __init__(self, schema_version: Optional[str] = None): 18 """ 19 Initialize an OntologyParser object with the ontology metadata corresponding to the given CellxGene schema 20 version. If not cached, it will make a network call to GitHub Release Assets to load in memory and 21 parse the corresponding ontology metadata. 22 23 :param schema_version: str version of the schema to load ontology metadata for. If not provided, the latest 24 is loaded. 25 """ 26 self.cxg_schema = CXGSchema(version=schema_version) if schema_version else CXGSchema() 27 self.term_label_to_id_map: Dict[str, Dict[str, str]] = { 28 ontology_name: dict() for ontology_name in self.cxg_schema.supported_ontologies 29 } 30 31 def get_term_label_to_id_map(self, ontology_name: str) -> Dict[str, str]: 32 """ 33 Fetch the mapping of term labels to term IDs for a given ontology. Caches generated maps by ontology_name. 34 35 Example 36 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 37 >>> ontology_parser = OntologyParser() 38 >>> ontology_parser.get_term_label_to_id_map("CL") # doctest: +SKIP 39 {'Label A': 'CL:0000000', ... } 40 41 :param ontology_name: str name of ontology to get map of term labels to term IDs 42 """ 43 supported_ontology_name: Optional[str] = self._get_supported_ontology_name(ontology_name) 44 if not supported_ontology_name: 45 raise ValueError(f"{supported_ontology_name} is not a supported ontology, its metadata cannot be fetched.") 46 47 if self.term_label_to_id_map[supported_ontology_name]: 48 return self.term_label_to_id_map[supported_ontology_name].copy() 49 50 for term_id, term_metadata in self.cxg_schema.ontology(supported_ontology_name).items(): 51 self.term_label_to_id_map[supported_ontology_name][term_metadata["label"]] = term_id 52 53 return self.term_label_to_id_map[supported_ontology_name].copy() 54 55 def _parse_ontology_name(self, term_id: str) -> str: 56 """ 57 Parse the ontology name from a given term ID. If the term ID does not conform to the expected term format or 58 is not from an ontology supported by cellxgene-ontology-guide, raise a ValueError. 59 60 :param term_id: str ontology term to parse 61 :return: str name of ontology that term belongs to 62 """ 63 # use names groups 64 patterns = [r"([A-Za-z]+):[0-9]+", r"([A-Za-z]+)_[A-Za-z0-9]+"] 65 pattern = "|".join(patterns) 66 match = re.match(pattern, term_id) 67 if not match: 68 raise ValueError(f"{term_id} does not conform to expected regex pattern {pattern} and cannot be queried.") 69 70 ontology_term_prefix = match.group(1) or match.group(2) 71 ontology_name: Optional[str] = self._get_supported_ontology_name(ontology_term_prefix) 72 if not ontology_name: 73 raise ValueError(f"{term_id} is not part of a supported ontology, its metadata cannot be fetched.") 74 75 id_separator = self.cxg_schema.supported_ontologies.get(ontology_name).get("id_separator", ":") 76 if id_separator not in term_id: 77 raise ValueError(f"{term_id} does not conform to expected format for {ontology_term_prefix} terms.") 78 return ontology_name 79 80 def _get_supported_ontology_name(self, ontology_term_prefix: str) -> Optional[str]: 81 """ 82 Get the source ontology name for a given ontology term prefix, if it is supported by the CxG schema. 83 84 If ontology_term_prefix is directly supported by the CxG schema, returns ontology_term_prefix. 85 If ontology_term_prefix is supported as an import from another ontology, returns the name of the source ontology 86 it is imported in. 87 Otherwise, returns None. 88 89 :param ontology_term_prefix: str ontology term prefix to check 90 :return: str name of ontology that term belongs to, or None if it is not directly supported nor imported in 91 a supported ontology in the CxG schema. 92 """ 93 if ontology_term_prefix in self.cxg_schema.supported_ontologies: 94 return ontology_term_prefix 95 supported_ontology_name: Optional[str] = self.cxg_schema.imported_ontologies.get(ontology_term_prefix) 96 return supported_ontology_name 97 98 def is_valid_term_id(self, term_id: str, ontology: Optional[str] = None) -> bool: 99 """ 100 Check if an ontology term ID is valid and defined in a supported ontology. If deprecated but defined 101 in the ontology, it is considered valid. Optionally, specify an ontology to check against, and determine 102 if the term is defined in that particular ontology. Otherwise, checks if term is valid in any supported ontology 103 104 :param term_id: str ontology term to check 105 :param ontology: str name of ontology to check against 106 :return: boolean flag indicating whether the term is supported 107 """ 108 try: 109 ontology_name = self._parse_ontology_name(term_id) 110 if ontology and ontology_name != ontology: 111 return False 112 if term_id in self.cxg_schema.ontology(ontology_name): 113 return True 114 except ValueError: 115 return False 116 return False 117 118 def get_term_ancestors(self, term_id: str, include_self: bool = False) -> List[str]: 119 """ 120 Get the ancestor ontology terms for a given term. If include_self is True, the term itself will be included as 121 an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology. 122 123 Example 124 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 125 >>> ontology_parser = OntologyParser() 126 >>> ontology_parser.get_term_ancestors("CL:0000005") # doctest: +SKIP 127 ['CL:0000000', 'CL:0000057', ... 128 129 :param term_id: str ontology term to find ancestors for 130 :param include_self: boolean flag to include the term itself as an ancestor 131 :return: flattened List[str] of ancestor terms 132 """ 133 if term_id in VALID_NON_ONTOLOGY_TERMS: 134 return [] 135 ontology_name = self._parse_ontology_name(term_id) 136 ancestors = list(self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"].keys()) 137 return ancestors + [term_id] if include_self else ancestors 138 139 def map_term_ancestors(self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]: 140 """ 141 Get the ancestor ontology terms for each term in a list. If include_self is True, the term itself will be 142 included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology. 143 144 Example 145 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 146 >>> ontology_parser = OntologyParser() 147 >>> ontology_parser.map_term_ancestors(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP 148 { 149 'CL:0000003': ['CL:0000003'], 150 'CL:0000005': ['CL:0000005', 'CL:0000000', ...] 151 } 152 153 :param term_ids: list of str ontology terms to find ancestors for 154 :param include_self: boolean flag to include the term itself as an ancestor 155 :return: Dictionary mapping str term IDs to their respective flattened List[str] of ancestor terms. Maps to 156 empty 157 list if there are no ancestors. 158 """ 159 return {term_id: self.get_term_ancestors(term_id, include_self) for term_id in term_ids} 160 161 def get_term_ancestors_with_distances(self, term_id: str, include_self: bool = False) -> Dict[str, int]: 162 """ 163 Get the ancestor ontology terms for a given term, and their distance from the term_id. If include_self is True, 164 the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a 165 supported ontology. 166 167 Example 168 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 169 >>> ontology_parser = OntologyParser() 170 >>> ontology_parser.get_term_ancestors_with_distances("CL:0000005") # doctest: +SKIP 171 {'CL:0000057': 1, 'CL:0002320': 2, 'CL:0000000': 3} 172 173 :param term_id: str ontology term to find ancestors for 174 :param include_self: boolean flag to include the term itself as an ancestor 175 :return: Dict[str, int] map of ancestor terms and their respective distances from the term_id 176 """ 177 if term_id in VALID_NON_ONTOLOGY_TERMS: 178 return {} 179 ontology_name = self._parse_ontology_name(term_id) 180 ancestors: Dict[str, int] = self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"].copy() 181 return ancestors | {term_id: 0} if include_self else ancestors 182 183 def map_term_ancestors_with_distances( 184 self, term_ids: Iterable[str], include_self: bool = False 185 ) -> Dict[str, Dict[str, int]]: 186 """ 187 Get the ancestor ontology terms for each term in a list, and their distance from the term_id. If include_self is 188 True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a 189 supported ontology. 190 191 Example 192 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 193 >>> ontology_parser = OntologyParser() 194 >>> ontology_parser.map_term_ancestors_with_distances(["CL:0000003", "CL:0000005"], include_self=True) 195 {'CL:0000003': {'CL:0000003': 0}, 'CL:0000005': {'CL:0000057': 1, 'CL:0000499': 2, 'CL:0002320': 3, 'CL:0000255': 4, 'CL:0000000': 5, 'CL:0000005': 0}} 196 197 :param term_ids: list of str ontology terms to find ancestors for 198 :param include_self: boolean flag to include the term itself as an ancestor 199 :return: Dictionary mapping str term IDs to their respective Dict[str, int] map of ancestor terms and their 200 respective distances from the term_id 201 """ 202 return {term_id: self.get_term_ancestors_with_distances(term_id, include_self) for term_id in term_ids} 203 204 def get_term_parents(self, term_id: str) -> List[str]: 205 """ 206 Get the direct parent ontology terms for a given term. Raises ValueError if the term ID is not valid member of 207 a supported ontology. 208 209 Example 210 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 211 >>> ontology_parser = OntologyParser() 212 >>> ontology_parser.get_term_parents("CL:0000101") 213 ['CL:0000526'] 214 215 :param term_id: str ontology term to find parents for 216 :return: List[str] of parent terms 217 """ 218 if term_id in VALID_NON_ONTOLOGY_TERMS: 219 return [] 220 ontology_name = self._parse_ontology_name(term_id) 221 ancestors: Dict[str, int] = self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"] 222 parents: List[str] = [ancestor for ancestor, distance in ancestors.items() if distance == 1] 223 return parents 224 225 def get_distance_between_terms(self, term_id_1: str, term_id_2: str) -> int: 226 """ 227 Get the distance between two ontology terms. The distance is defined as the number of edges between the 228 two terms. Terms must be from the same ontology. Returns -1 if terms are disjoint. 229 Raises ValueError if term IDs are not valid members of a supported ontology. 230 231 :param term_id_1: str ontology term to find distance for 232 :param term_id_2: str ontology term to find distance for 233 :return: int distance between the two terms, measured in number of edges between their shortest path. 234 """ 235 lcas = self.get_lowest_common_ancestors(term_id_1, term_id_2) 236 if not lcas: 237 return -1 238 return int( 239 self.get_term_ancestors_with_distances(term_id_1, include_self=True)[lcas[0]] 240 + self.get_term_ancestors_with_distances(term_id_2, include_self=True)[lcas[0]] 241 ) 242 243 def get_lowest_common_ancestors(self, term_id_1: str, term_id_2: str) -> List[str]: 244 """ 245 Get the lowest common ancestors between two ontology terms that is from the given ontology. 246 Terms must be from the same ontology. Ontologies are DAGs, so there may be multiple lowest common ancestors. 247 Raises ValueError if term IDs are not valid members of a supported ontology. 248 249 :param term_id_1: str ontology term to find LCA for 250 :param term_id_2: str ontology term to find LCA for 251 :return: str term ID of the lowest common ancestor term 252 """ 253 # include path to term itself 254 ontology = self._parse_ontology_name(term_id_1) 255 if ontology != self._parse_ontology_name(term_id_2): 256 return [] 257 ancestors_1 = self.get_term_ancestors_with_distances(term_id_1, include_self=True) 258 ancestors_2 = self.get_term_ancestors_with_distances(term_id_2, include_self=True) 259 common_ancestors = set(ancestors_1.keys()) & set(ancestors_2.keys()) 260 min_sum_distances = float("inf") 261 for ancestors in common_ancestors: 262 sum_distances = ancestors_1[ancestors] + ancestors_2[ancestors] 263 if sum_distances < min_sum_distances: 264 min_sum_distances = sum_distances 265 return [ 266 ancestor 267 for ancestor in common_ancestors 268 if ancestors_1[ancestor] + ancestors_2[ancestor] == min_sum_distances 269 ] 270 271 def get_high_level_terms(self, term_id: str, high_level_terms: List[str]) -> List[str]: 272 """ 273 Get the high-level ontology terms for a given term. High-level terms are defined as the ancestors of the term 274 that are part of the high-level ontology terms supported by cellxgene-ontology-guide. If more than 1 275 high_level_term is matched, the returned list of matches preserves the order of the input high_level_terms list. 276 Raises ValueError if term ID is not valid member of a supported ontology. 277 278 Example 279 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 280 >>> ontology_parser = OntologyParser() 281 >>> ontology_parser.get_high_level_terms("CL:0000005", ["CL:0000000", "CL:0000001"]) 282 ['CL:0000000'] 283 284 :param term_id: str ontology term to find high-level terms for 285 :param high_level_terms: list of str ontology terms to check for ancestry to term_id 286 :return: List[str] of high-level terms that the term is a descendant of 287 """ 288 if term_id in VALID_NON_ONTOLOGY_TERMS: 289 return [] 290 ancestors = self.get_term_ancestors(term_id, include_self=True) 291 return [high_level_term for high_level_term in high_level_terms if high_level_term in ancestors] 292 293 def map_high_level_terms(self, term_ids: List[str], high_level_terms: List[str]) -> Dict[str, List[str]]: 294 """ 295 Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with 296 format 297 298 {"CL:0000003": ["CL:0000000", ...], "CL:0000005": ["CL:0000000", ...]} 299 300 Where each term_id is mapped to a List[str] of high-level terms that it is a descendant of. Includes self 301 as a descendant. Raises ValueError if term ID is not valid member of a supported ontology. 302 303 :param term_ids: list of str ontology terms to map high level terms for 304 :param high_level_terms: list of str ontology terms to be mapped to descendant term_ids 305 :return: Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list. 306 Each key maps to empty list if there are no ancestors among the provided input. 307 """ 308 return {term_id: self.get_high_level_terms(term_id, high_level_terms) for term_id in term_ids} 309 310 def get_highest_level_term(self, term_id: str, high_level_terms: List[str]) -> Union[str, None]: 311 """ 312 Get the highest level ontology term for a given term. The highest level term is defined as the ancestor of the 313 term that is part of the high-level ontology terms supported by cellxgene-ontology-guide. Raises ValueError 314 if term ID is not valid member of a supported ontology. 315 316 Example 317 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 318 >>> ontology_parser = OntologyParser() 319 >>> ontology_parser.get_highest_level_term("CL:0000005", ["CL:0000000", "CL:0000001"]) 320 'CL:0000000' 321 322 :param term_id: str ontology term to find highest level term for 323 :param high_level_terms: list of str ontology terms to check for ancestry to term_id 324 :return: str highest level term that the term is a descendant of, or None if it is not a descendant of any 325 high-level terms 326 """ 327 high_level_terms = self.get_high_level_terms(term_id, high_level_terms) 328 term_ancestors_and_distances = self.get_term_ancestors_with_distances(term_id, include_self=True) 329 if not high_level_terms: 330 return None 331 return max(high_level_terms, key=lambda high_level_term: term_ancestors_and_distances[high_level_term]) 332 333 def map_highest_level_term(self, term_ids: List[str], high_level_terms: List[str]) -> Dict[str, Union[str, None]]: 334 """ 335 Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with 336 format 337 338 {"CL:0000003": "CL:0000000", "CL:0000005": "CL:0000000"} 339 340 Where each term_id is mapped to the highest level term that it is a descendant of, from the list provided. 341 Includes term itself as a descendant. Maps to None if term_id does not map to any high level terms among the 342 provided input. Raises ValueError if term ID is not valid member of a supported ontology. 343 344 :param term_ids: list of str ontology terms to map high level terms for 345 :param high_level_terms: list of str ontology terms that can be mapped to descendant term_ids 346 :return: Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list. 347 Each key maps to empty list if there are no ancestors among the provided input. 348 """ 349 return {term_id: self.get_highest_level_term(term_id, high_level_terms) for term_id in term_ids} 350 351 def get_term_descendants(self, term_id: str, include_self: bool = False) -> List[str]: 352 """ 353 Get the descendant ontology terms for a given term. If include_self is True, the term itself will be included as 354 a descendant. Raises ValueError if term ID is not valid member of a supported ontology. 355 356 Example 357 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 358 >>> ontology_parser = OntologyParser() 359 >>> ontology_parser.get_term_descendants("CL:0000005") # doctest: +SKIP 360 ['CL:0002363'] 361 362 :param term_id: str ontology term to find descendants for 363 :param include_self: boolean flag to include the term itself as a descendant 364 :return: List[str] of descendant terms 365 """ 366 if term_id in VALID_NON_ONTOLOGY_TERMS: 367 return [] 368 ontology_name = self._parse_ontology_name(term_id) 369 descendants = [term_id] if include_self else [] 370 for candidate_descendant, candidate_metadata in self.cxg_schema.ontology(ontology_name).items(): 371 ancestors = candidate_metadata["ancestors"].keys() 372 if term_id in ancestors: 373 descendants.append(candidate_descendant) 374 return descendants 375 376 def map_term_descendants(self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]: 377 """ 378 Get the descendant ontology terms for each term in a list. If include_self is True, the term itself will be 379 included as a descendant. Raises ValueError if term ID is not valid member of a supported ontology. 380 381 Example 382 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 383 >>> ontology_parser = OntologyParser() 384 >>> ontology_parser.map_term_descendants(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP 385 { 386 'CL:0000003': ['CL:0000003', ...], 387 'CL:0000005': ['CL:0000005', 'CL:0002363', ...] 388 } 389 390 :param term_ids: list of str ontology terms to find descendants for 391 :param include_self: boolean flag to include the term itself as an descendant 392 :return: Dictionary mapping str term IDs to their respective flattened List[str] of descendant terms. Maps to 393 empty list if there are no descendants. 394 """ 395 descendants_dict: Dict[str, List[str]] = dict() 396 ontology_names = set() 397 for term_id in term_ids: 398 if term_id in VALID_NON_ONTOLOGY_TERMS: 399 descendants_dict[term_id] = [] 400 continue 401 ontology_name = self._parse_ontology_name(term_id) 402 descendants_dict[term_id] = [term_id] if include_self else [] 403 ontology_names.add(ontology_name) 404 405 for ontology in ontology_names: 406 for candidate_descendant, candidate_metadata in self.cxg_schema.ontology(ontology).items(): 407 for ancestor_id in descendants_dict: 408 ancestors = candidate_metadata["ancestors"].keys() 409 if ancestor_id in ancestors: 410 descendants_dict[ancestor_id].append(candidate_descendant) 411 412 return descendants_dict 413 414 def get_term_children(self, term_id: str) -> List[str]: 415 """ 416 Get the direct children ontology terms for a given term. Raises ValueError if term ID is not valid member of a 417 supported ontology. 418 419 Example 420 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 421 >>> ontology_parser = OntologyParser() 422 >>> ontology_parser.get_term_children("CL:0000526") 423 ['CL:0000101', 'CL:4042034'] 424 425 :param term_id: str ontology term to find children for 426 :return: List[str] of children terms 427 """ 428 if term_id in VALID_NON_ONTOLOGY_TERMS: 429 return [] 430 ontology_name = self._parse_ontology_name(term_id) 431 children = [] 432 for candidate_child, candidate_metadata in self.cxg_schema.ontology(ontology_name).items(): 433 for ancestor, distance in candidate_metadata["ancestors"].items(): 434 if ancestor == term_id and distance == 1: 435 children.append(candidate_child) 436 return children 437 438 def get_term_graph(self, term_id: str) -> OntologyNode: 439 """ 440 Get the DAG of OntologyNode relationships, with the input term as the root node. Only includes terms from the 441 same ontology as the root term ID. Raises ValueError if term ID is not valid member of a supported ontology. 442 443 Example 444 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 445 >>> ontology_parser = OntologyParser() 446 >>> root_node = ontology_parser.get_term_graph("CL:0000000") 447 >>> root_node.term_id 448 'CL:0000000' 449 >>> root_node.to_dict() # doctest: +SKIP 450 { 451 "term_id": "CL:0000000", 452 "name": "cell A", 453 "children": [ 454 { 455 "term_id": "CL:0000001", 456 "name": "cell B", 457 "children": [...], 458 }, 459 { 460 "term_id": "CL:0000002", 461 "name": "cell C", 462 "children": [...], 463 }, 464 ... 465 ] 466 } 467 >>> root_node.term_counter # doctest: +SKIP 468 Counter({'CL:0002058': 48, 'CL:0002471': 48, ... 469 470 :param term_id: str ontology term to build subtree for 471 :return: OntologyNode representation of graph with term_id as root. 472 """ 473 term_label = self.get_term_label(term_id) 474 root = OntologyNode(term_id, term_label) 475 for child_term_id in self.get_term_children(term_id): 476 root.add_child(self.get_term_graph(child_term_id)) 477 return root 478 479 def is_term_deprecated(self, term_id: str) -> bool: 480 """ 481 Check if an ontology term is deprecated. Raises ValueError if term ID is not valid member of a supported 482 ontology. 483 484 Example 485 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 486 >>> ontology_parser = OntologyParser() 487 >>> ontology_parser.is_term_deprecated("CL:0000003") 488 True 489 490 :param term_id: str ontology term to check for deprecation 491 :return: boolean flag indicating whether the term is deprecated 492 """ 493 if term_id in VALID_NON_ONTOLOGY_TERMS: 494 return False 495 ontology_name = self._parse_ontology_name(term_id) 496 is_deprecated: bool = self.cxg_schema.ontology(ontology_name)[term_id].get("deprecated") 497 return is_deprecated 498 499 def get_term_replacement(self, term_id: str) -> Union[str, None]: 500 """ 501 Fetch the replacement term for a deprecated ontology term, if a replacement exists. Return None otherwise. 502 Raises ValueError if term ID is not valid member of a supported ontology. 503 504 Example 505 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 506 >>> ontology_parser = OntologyParser() 507 >>> ontology_parser.get_term_replacement("CL:0000003") 508 'CL:0000000' 509 510 :param term_id: str ontology term to check a replacement term for 511 :return: replacement str term ID if it exists, None otherwise 512 """ 513 if term_id in VALID_NON_ONTOLOGY_TERMS: 514 return None 515 ontology_name = self._parse_ontology_name(term_id) 516 replaced_by: str = self.cxg_schema.ontology(ontology_name)[term_id].get("replaced_by") 517 return replaced_by if replaced_by else None 518 519 def get_term_metadata(self, term_id: str) -> Dict[str, Any]: 520 """ 521 Fetch metadata for a given ontology term. Returns a dict with format 522 523 {"comments": ["...", ...], "term_tracker": "...", "consider": ["...", ...]} 524 525 Comments maps to List[str] of ontology curator comments 526 Term Tracker maps to a str url where there is discussion around this term's curation (or deprecation). 527 Consider maps to List[str] of alternate ontology terms to consider using instead of this term 528 529 All keys map to None if no metadata of that type is present. Raises ValueError if term ID is not valid member 530 of a supported ontology. 531 532 :param term_id: str ontology term to fetch metadata for 533 :return: Dict with keys 'Comments', 'Term Tracker', and 'Consider' containing associated metadata. 534 """ 535 if term_id in VALID_NON_ONTOLOGY_TERMS: 536 return {"comments": None, "term_tracker": None, "consider": None} 537 ontology_name = self._parse_ontology_name(term_id) 538 return { 539 key: self.cxg_schema.ontology(ontology_name)[term_id].get(key, None) 540 for key in {"comments", "term_tracker", "consider"} 541 } 542 543 def get_term_label(self, term_id: str) -> str: 544 """ 545 Fetch the human-readable label for a given ontology term. Raises ValueError if term ID is not valid member of a 546 supported ontology. 547 548 Example 549 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 550 >>> ontology_parser = OntologyParser() 551 >>> ontology_parser.get_term_label("CL:0000005") 552 'neural crest derived fibroblast' 553 554 :param term_id: str ontology term to fetch label for 555 :return: str human-readable label for the term 556 """ 557 if term_id in VALID_NON_ONTOLOGY_TERMS: 558 return term_id 559 ontology_name = self._parse_ontology_name(term_id) 560 label: str = self.cxg_schema.ontology(ontology_name)[term_id]["label"] 561 return label 562 563 def map_term_labels(self, term_ids: Iterable[str]) -> Dict[str, str]: 564 """ 565 Fetch the human-readable label for a given list of ontology terms. Raises ValueError if term ID is not valid 566 member of a supported ontology. 567 568 Example 569 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 570 >>> ontology_parser = OntologyParser() 571 >>> ontology_parser.map_term_labels(["CL:0000005", "CL:0000003"]) 572 {'CL:0000005': 'neural crest derived fibroblast', 'CL:0000003': 'obsolete native cell'} 573 574 :param term_ids: list of str ontology terms to fetch label for 575 :return: Dict[str, str] mapping term IDs to their respective human-readable labels 576 """ 577 return {term_id: self.get_term_label(term_id) for term_id in term_ids} 578 579 def get_term_description(self, term_id: str) -> Optional[str]: 580 """ 581 Fetch the description for a given ontology term. Raises ValueError if term ID is not valid member of a 582 supported ontology. 583 584 Example 585 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 586 >>> ontology_parser = OntologyParser() 587 >>> ontology_parser.get_term_description("CL:0000005") 588 'Any fibroblast that is derived from the neural crest.' 589 590 :param term_id: str ontology term to fetch description for 591 :return: str description for the term 592 """ 593 if term_id in VALID_NON_ONTOLOGY_TERMS: 594 return term_id 595 ontology_name = self._parse_ontology_name(term_id) 596 description: Optional[str] = self.cxg_schema.ontology(ontology_name)[term_id].get("description", None) 597 return description 598 599 def map_term_descriptions(self, term_ids: List[str]) -> Dict[str, Optional[str]]: 600 """ 601 Fetch the descriptions for a given list of ontology terms. Raises ValueError if term ID is not valid member of 602 a supported ontology. 603 604 Example 605 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 606 >>> ontology_parser = OntologyParser() 607 >>> ontology_parser.map_term_descriptions(["CL:0000005", "CL:0000146"]) 608 {'CL:0000005': 'Any fibroblast that is derived from the neural crest.', 'CL:0000146': None} 609 610 :param term_ids: list of str ontology terms to fetch descriptions for 611 :return: Dict[str, str] mapping term IDs to their respective descriptions 612 """ 613 return {term_id: self.get_term_description(term_id) for term_id in term_ids} 614 615 def get_term_synonyms(self, term_id: str) -> List[str]: 616 """ 617 Fetch a list of synonym labels for a given ontology term. Returns empty list if no synonyms found. 618 Raises ValueError if term ID is not valid member of a supported ontology. 619 620 Example 621 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 622 >>> ontology_parser = OntologyParser() 623 >>> ontology_parser.get_term_synonyms("CL:0000019") 624 ['sperm cell', 'spermatozoid', 'spermatozoon'] 625 626 :param term_id: str ontology term to fetch synonyms for 627 :return: List[str] synonyms for the term 628 """ 629 if term_id in VALID_NON_ONTOLOGY_TERMS: 630 return [] 631 ontology_name = self._parse_ontology_name(term_id) 632 synonyms: List[str] = list(self.cxg_schema.ontology(ontology_name)[term_id].get("synonyms", [])) 633 return synonyms 634 635 def map_term_synonyms(self, term_ids: List[str]) -> Dict[str, List[str]]: 636 """ 637 Fetch the synonym labels for a given list of ontology terms. Raises ValueError if term ID is not valid member of 638 a supported ontology. 639 640 Example 641 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 642 >>> ontology_parser = OntologyParser() 643 >>> ontology_parser.map_term_synonyms(["CL:0000005", "CL:0000019"]) 644 {'CL:0000005': ['fibroblast neural crest derived'], 'CL:0000019': ['sperm cell', 'spermatozoid', 'spermatozoon']} 645 646 :param term_ids: list of str ontology terms to fetch synonyms for 647 :return: Dict[str, List[str]] mapping term IDs to their respective synonym lists 648 """ 649 return {term_id: self.get_term_synonyms(term_id) for term_id in term_ids} 650 651 def get_term_id_by_label(self, term_label: str, ontology_name: str) -> Optional[str]: 652 """ 653 Fetch the ontology term ID from a given human-readable label. Filters by ontology_name. Raises ValueError if 654 ontology_name is not a supported ontology. 655 656 Returns None if term ID is not valid member of a supported ontology. 657 658 Example 659 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 660 >>> ontology_parser = OntologyParser() 661 >>> ontology_parser.get_term_id_by_label("neural crest derived fibroblast", "CL") 662 'CL:0000005' 663 664 :param term_label: str human-readable label to fetch term ID for 665 :param ontology_name: str name of ontology to search for term label in 666 :return: Optional[str] term IDs with that label, or None if the label is not found in the ontology 667 """ 668 ontology_term_label_to_id_map = self.get_term_label_to_id_map(ontology_name) 669 return ontology_term_label_to_id_map.get(term_label) 670 671 def get_bridge_term_id(self, term_id: str, cross_ontology: str) -> Optional[str]: 672 """ 673 For a given term ID, fetch the equivalent term ID from a given ontology. Only returns exact match if it exists. 674 675 If no applicable match is found, returns None. 676 677 Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology. 678 679 Example 680 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 681 >>> ontology_parser = OntologyParser() 682 >>> ontology_parser.get_bridge_term_id("FBbt:00000001", "UBERON") 683 'UBERON:0000468' 684 685 :param term_id: str ontology term to find equivalent term for 686 :param cross_ontology: str name of ontology to search for equivalent term in 687 :return: Optional[str] equivalent term ID from the cross_ontology 688 """ 689 if cross_ontology not in self.cxg_schema.cross_ontology_mappings: 690 raise ValueError( 691 f"{cross_ontology} is not in the set of supported cross ontology mappings " 692 f"{self.cxg_schema.cross_ontology_mappings}." 693 ) 694 ontology_name = self._parse_ontology_name(term_id) 695 cross_ontology_terms = self.cxg_schema.ontology(ontology_name)[term_id].get("cross_ontology_terms") 696 bridge_term_id: Optional[str] = None 697 if cross_ontology_terms: 698 bridge_term_id = cross_ontology_terms.get(cross_ontology) 699 return bridge_term_id 700 701 def get_closest_bridge_term_ids(self, term_id: str, cross_ontology: str) -> List[str]: 702 """ 703 For a given term ID, fetch the equivalent term ID from a given ontology. If match is found, 704 returns a list of 1 with the exact match. If no exact match is found, traverses the ancestors 705 of the term for the closest match. 706 707 If no applicable match is found, returns an empty list. 708 709 If multiple ancestors of the same distance have matches, returns all possible closest matches. 710 711 Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology. 712 713 Example 714 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 715 >>> ontology_parser = OntologyParser() 716 >>> ontology_parser.get_closest_bridge_term_ids("FBbt:00000039", "UBERON") 717 ['UBERON:0000476', 'UBERON:0000920'] 718 719 :param term_id: str ontology term to find closest term for 720 :param cross_ontology: str name of ontology to search for closest term in 721 :return: List[str] list of closest term IDs from the cross_ontology 722 """ 723 closest_bridge_terms: List[str] = [] 724 terms_to_match = [term_id] 725 while terms_to_match and not closest_bridge_terms: 726 for term in terms_to_match: 727 if closest_bridge_term := self.get_bridge_term_id(term, cross_ontology): 728 closest_bridge_terms.append(closest_bridge_term) 729 terms_to_match = [parent for child in terms_to_match for parent in self.get_term_parents(child)] 730 return closest_bridge_terms
10class OntologyParser: 11 """ 12 An object to parse ontology term metadata from ontologies corresponding to a given CellxGene Schema Version. 13 """ 14 15 cxg_schema: CXGSchema 16 """ CXGSchema object to fetch ontology metadata from """ 17 18 def __init__(self, schema_version: Optional[str] = None): 19 """ 20 Initialize an OntologyParser object with the ontology metadata corresponding to the given CellxGene schema 21 version. If not cached, it will make a network call to GitHub Release Assets to load in memory and 22 parse the corresponding ontology metadata. 23 24 :param schema_version: str version of the schema to load ontology metadata for. If not provided, the latest 25 is loaded. 26 """ 27 self.cxg_schema = CXGSchema(version=schema_version) if schema_version else CXGSchema() 28 self.term_label_to_id_map: Dict[str, Dict[str, str]] = { 29 ontology_name: dict() for ontology_name in self.cxg_schema.supported_ontologies 30 } 31 32 def get_term_label_to_id_map(self, ontology_name: str) -> Dict[str, str]: 33 """ 34 Fetch the mapping of term labels to term IDs for a given ontology. Caches generated maps by ontology_name. 35 36 Example 37 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 38 >>> ontology_parser = OntologyParser() 39 >>> ontology_parser.get_term_label_to_id_map("CL") # doctest: +SKIP 40 {'Label A': 'CL:0000000', ... } 41 42 :param ontology_name: str name of ontology to get map of term labels to term IDs 43 """ 44 supported_ontology_name: Optional[str] = self._get_supported_ontology_name(ontology_name) 45 if not supported_ontology_name: 46 raise ValueError(f"{supported_ontology_name} is not a supported ontology, its metadata cannot be fetched.") 47 48 if self.term_label_to_id_map[supported_ontology_name]: 49 return self.term_label_to_id_map[supported_ontology_name].copy() 50 51 for term_id, term_metadata in self.cxg_schema.ontology(supported_ontology_name).items(): 52 self.term_label_to_id_map[supported_ontology_name][term_metadata["label"]] = term_id 53 54 return self.term_label_to_id_map[supported_ontology_name].copy() 55 56 def _parse_ontology_name(self, term_id: str) -> str: 57 """ 58 Parse the ontology name from a given term ID. If the term ID does not conform to the expected term format or 59 is not from an ontology supported by cellxgene-ontology-guide, raise a ValueError. 60 61 :param term_id: str ontology term to parse 62 :return: str name of ontology that term belongs to 63 """ 64 # use names groups 65 patterns = [r"([A-Za-z]+):[0-9]+", r"([A-Za-z]+)_[A-Za-z0-9]+"] 66 pattern = "|".join(patterns) 67 match = re.match(pattern, term_id) 68 if not match: 69 raise ValueError(f"{term_id} does not conform to expected regex pattern {pattern} and cannot be queried.") 70 71 ontology_term_prefix = match.group(1) or match.group(2) 72 ontology_name: Optional[str] = self._get_supported_ontology_name(ontology_term_prefix) 73 if not ontology_name: 74 raise ValueError(f"{term_id} is not part of a supported ontology, its metadata cannot be fetched.") 75 76 id_separator = self.cxg_schema.supported_ontologies.get(ontology_name).get("id_separator", ":") 77 if id_separator not in term_id: 78 raise ValueError(f"{term_id} does not conform to expected format for {ontology_term_prefix} terms.") 79 return ontology_name 80 81 def _get_supported_ontology_name(self, ontology_term_prefix: str) -> Optional[str]: 82 """ 83 Get the source ontology name for a given ontology term prefix, if it is supported by the CxG schema. 84 85 If ontology_term_prefix is directly supported by the CxG schema, returns ontology_term_prefix. 86 If ontology_term_prefix is supported as an import from another ontology, returns the name of the source ontology 87 it is imported in. 88 Otherwise, returns None. 89 90 :param ontology_term_prefix: str ontology term prefix to check 91 :return: str name of ontology that term belongs to, or None if it is not directly supported nor imported in 92 a supported ontology in the CxG schema. 93 """ 94 if ontology_term_prefix in self.cxg_schema.supported_ontologies: 95 return ontology_term_prefix 96 supported_ontology_name: Optional[str] = self.cxg_schema.imported_ontologies.get(ontology_term_prefix) 97 return supported_ontology_name 98 99 def is_valid_term_id(self, term_id: str, ontology: Optional[str] = None) -> bool: 100 """ 101 Check if an ontology term ID is valid and defined in a supported ontology. If deprecated but defined 102 in the ontology, it is considered valid. Optionally, specify an ontology to check against, and determine 103 if the term is defined in that particular ontology. Otherwise, checks if term is valid in any supported ontology 104 105 :param term_id: str ontology term to check 106 :param ontology: str name of ontology to check against 107 :return: boolean flag indicating whether the term is supported 108 """ 109 try: 110 ontology_name = self._parse_ontology_name(term_id) 111 if ontology and ontology_name != ontology: 112 return False 113 if term_id in self.cxg_schema.ontology(ontology_name): 114 return True 115 except ValueError: 116 return False 117 return False 118 119 def get_term_ancestors(self, term_id: str, include_self: bool = False) -> List[str]: 120 """ 121 Get the ancestor ontology terms for a given term. If include_self is True, the term itself will be included as 122 an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology. 123 124 Example 125 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 126 >>> ontology_parser = OntologyParser() 127 >>> ontology_parser.get_term_ancestors("CL:0000005") # doctest: +SKIP 128 ['CL:0000000', 'CL:0000057', ... 129 130 :param term_id: str ontology term to find ancestors for 131 :param include_self: boolean flag to include the term itself as an ancestor 132 :return: flattened List[str] of ancestor terms 133 """ 134 if term_id in VALID_NON_ONTOLOGY_TERMS: 135 return [] 136 ontology_name = self._parse_ontology_name(term_id) 137 ancestors = list(self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"].keys()) 138 return ancestors + [term_id] if include_self else ancestors 139 140 def map_term_ancestors(self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]: 141 """ 142 Get the ancestor ontology terms for each term in a list. If include_self is True, the term itself will be 143 included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology. 144 145 Example 146 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 147 >>> ontology_parser = OntologyParser() 148 >>> ontology_parser.map_term_ancestors(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP 149 { 150 'CL:0000003': ['CL:0000003'], 151 'CL:0000005': ['CL:0000005', 'CL:0000000', ...] 152 } 153 154 :param term_ids: list of str ontology terms to find ancestors for 155 :param include_self: boolean flag to include the term itself as an ancestor 156 :return: Dictionary mapping str term IDs to their respective flattened List[str] of ancestor terms. Maps to 157 empty 158 list if there are no ancestors. 159 """ 160 return {term_id: self.get_term_ancestors(term_id, include_self) for term_id in term_ids} 161 162 def get_term_ancestors_with_distances(self, term_id: str, include_self: bool = False) -> Dict[str, int]: 163 """ 164 Get the ancestor ontology terms for a given term, and their distance from the term_id. If include_self is True, 165 the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a 166 supported ontology. 167 168 Example 169 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 170 >>> ontology_parser = OntologyParser() 171 >>> ontology_parser.get_term_ancestors_with_distances("CL:0000005") # doctest: +SKIP 172 {'CL:0000057': 1, 'CL:0002320': 2, 'CL:0000000': 3} 173 174 :param term_id: str ontology term to find ancestors for 175 :param include_self: boolean flag to include the term itself as an ancestor 176 :return: Dict[str, int] map of ancestor terms and their respective distances from the term_id 177 """ 178 if term_id in VALID_NON_ONTOLOGY_TERMS: 179 return {} 180 ontology_name = self._parse_ontology_name(term_id) 181 ancestors: Dict[str, int] = self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"].copy() 182 return ancestors | {term_id: 0} if include_self else ancestors 183 184 def map_term_ancestors_with_distances( 185 self, term_ids: Iterable[str], include_self: bool = False 186 ) -> Dict[str, Dict[str, int]]: 187 """ 188 Get the ancestor ontology terms for each term in a list, and their distance from the term_id. If include_self is 189 True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a 190 supported ontology. 191 192 Example 193 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 194 >>> ontology_parser = OntologyParser() 195 >>> ontology_parser.map_term_ancestors_with_distances(["CL:0000003", "CL:0000005"], include_self=True) 196 {'CL:0000003': {'CL:0000003': 0}, 'CL:0000005': {'CL:0000057': 1, 'CL:0000499': 2, 'CL:0002320': 3, 'CL:0000255': 4, 'CL:0000000': 5, 'CL:0000005': 0}} 197 198 :param term_ids: list of str ontology terms to find ancestors for 199 :param include_self: boolean flag to include the term itself as an ancestor 200 :return: Dictionary mapping str term IDs to their respective Dict[str, int] map of ancestor terms and their 201 respective distances from the term_id 202 """ 203 return {term_id: self.get_term_ancestors_with_distances(term_id, include_self) for term_id in term_ids} 204 205 def get_term_parents(self, term_id: str) -> List[str]: 206 """ 207 Get the direct parent ontology terms for a given term. Raises ValueError if the term ID is not valid member of 208 a supported ontology. 209 210 Example 211 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 212 >>> ontology_parser = OntologyParser() 213 >>> ontology_parser.get_term_parents("CL:0000101") 214 ['CL:0000526'] 215 216 :param term_id: str ontology term to find parents for 217 :return: List[str] of parent terms 218 """ 219 if term_id in VALID_NON_ONTOLOGY_TERMS: 220 return [] 221 ontology_name = self._parse_ontology_name(term_id) 222 ancestors: Dict[str, int] = self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"] 223 parents: List[str] = [ancestor for ancestor, distance in ancestors.items() if distance == 1] 224 return parents 225 226 def get_distance_between_terms(self, term_id_1: str, term_id_2: str) -> int: 227 """ 228 Get the distance between two ontology terms. The distance is defined as the number of edges between the 229 two terms. Terms must be from the same ontology. Returns -1 if terms are disjoint. 230 Raises ValueError if term IDs are not valid members of a supported ontology. 231 232 :param term_id_1: str ontology term to find distance for 233 :param term_id_2: str ontology term to find distance for 234 :return: int distance between the two terms, measured in number of edges between their shortest path. 235 """ 236 lcas = self.get_lowest_common_ancestors(term_id_1, term_id_2) 237 if not lcas: 238 return -1 239 return int( 240 self.get_term_ancestors_with_distances(term_id_1, include_self=True)[lcas[0]] 241 + self.get_term_ancestors_with_distances(term_id_2, include_self=True)[lcas[0]] 242 ) 243 244 def get_lowest_common_ancestors(self, term_id_1: str, term_id_2: str) -> List[str]: 245 """ 246 Get the lowest common ancestors between two ontology terms that is from the given ontology. 247 Terms must be from the same ontology. Ontologies are DAGs, so there may be multiple lowest common ancestors. 248 Raises ValueError if term IDs are not valid members of a supported ontology. 249 250 :param term_id_1: str ontology term to find LCA for 251 :param term_id_2: str ontology term to find LCA for 252 :return: str term ID of the lowest common ancestor term 253 """ 254 # include path to term itself 255 ontology = self._parse_ontology_name(term_id_1) 256 if ontology != self._parse_ontology_name(term_id_2): 257 return [] 258 ancestors_1 = self.get_term_ancestors_with_distances(term_id_1, include_self=True) 259 ancestors_2 = self.get_term_ancestors_with_distances(term_id_2, include_self=True) 260 common_ancestors = set(ancestors_1.keys()) & set(ancestors_2.keys()) 261 min_sum_distances = float("inf") 262 for ancestors in common_ancestors: 263 sum_distances = ancestors_1[ancestors] + ancestors_2[ancestors] 264 if sum_distances < min_sum_distances: 265 min_sum_distances = sum_distances 266 return [ 267 ancestor 268 for ancestor in common_ancestors 269 if ancestors_1[ancestor] + ancestors_2[ancestor] == min_sum_distances 270 ] 271 272 def get_high_level_terms(self, term_id: str, high_level_terms: List[str]) -> List[str]: 273 """ 274 Get the high-level ontology terms for a given term. High-level terms are defined as the ancestors of the term 275 that are part of the high-level ontology terms supported by cellxgene-ontology-guide. If more than 1 276 high_level_term is matched, the returned list of matches preserves the order of the input high_level_terms list. 277 Raises ValueError if term ID is not valid member of a supported ontology. 278 279 Example 280 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 281 >>> ontology_parser = OntologyParser() 282 >>> ontology_parser.get_high_level_terms("CL:0000005", ["CL:0000000", "CL:0000001"]) 283 ['CL:0000000'] 284 285 :param term_id: str ontology term to find high-level terms for 286 :param high_level_terms: list of str ontology terms to check for ancestry to term_id 287 :return: List[str] of high-level terms that the term is a descendant of 288 """ 289 if term_id in VALID_NON_ONTOLOGY_TERMS: 290 return [] 291 ancestors = self.get_term_ancestors(term_id, include_self=True) 292 return [high_level_term for high_level_term in high_level_terms if high_level_term in ancestors] 293 294 def map_high_level_terms(self, term_ids: List[str], high_level_terms: List[str]) -> Dict[str, List[str]]: 295 """ 296 Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with 297 format 298 299 {"CL:0000003": ["CL:0000000", ...], "CL:0000005": ["CL:0000000", ...]} 300 301 Where each term_id is mapped to a List[str] of high-level terms that it is a descendant of. Includes self 302 as a descendant. Raises ValueError if term ID is not valid member of a supported ontology. 303 304 :param term_ids: list of str ontology terms to map high level terms for 305 :param high_level_terms: list of str ontology terms to be mapped to descendant term_ids 306 :return: Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list. 307 Each key maps to empty list if there are no ancestors among the provided input. 308 """ 309 return {term_id: self.get_high_level_terms(term_id, high_level_terms) for term_id in term_ids} 310 311 def get_highest_level_term(self, term_id: str, high_level_terms: List[str]) -> Union[str, None]: 312 """ 313 Get the highest level ontology term for a given term. The highest level term is defined as the ancestor of the 314 term that is part of the high-level ontology terms supported by cellxgene-ontology-guide. Raises ValueError 315 if term ID is not valid member of a supported ontology. 316 317 Example 318 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 319 >>> ontology_parser = OntologyParser() 320 >>> ontology_parser.get_highest_level_term("CL:0000005", ["CL:0000000", "CL:0000001"]) 321 'CL:0000000' 322 323 :param term_id: str ontology term to find highest level term for 324 :param high_level_terms: list of str ontology terms to check for ancestry to term_id 325 :return: str highest level term that the term is a descendant of, or None if it is not a descendant of any 326 high-level terms 327 """ 328 high_level_terms = self.get_high_level_terms(term_id, high_level_terms) 329 term_ancestors_and_distances = self.get_term_ancestors_with_distances(term_id, include_self=True) 330 if not high_level_terms: 331 return None 332 return max(high_level_terms, key=lambda high_level_term: term_ancestors_and_distances[high_level_term]) 333 334 def map_highest_level_term(self, term_ids: List[str], high_level_terms: List[str]) -> Dict[str, Union[str, None]]: 335 """ 336 Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with 337 format 338 339 {"CL:0000003": "CL:0000000", "CL:0000005": "CL:0000000"} 340 341 Where each term_id is mapped to the highest level term that it is a descendant of, from the list provided. 342 Includes term itself as a descendant. Maps to None if term_id does not map to any high level terms among the 343 provided input. Raises ValueError if term ID is not valid member of a supported ontology. 344 345 :param term_ids: list of str ontology terms to map high level terms for 346 :param high_level_terms: list of str ontology terms that can be mapped to descendant term_ids 347 :return: Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list. 348 Each key maps to empty list if there are no ancestors among the provided input. 349 """ 350 return {term_id: self.get_highest_level_term(term_id, high_level_terms) for term_id in term_ids} 351 352 def get_term_descendants(self, term_id: str, include_self: bool = False) -> List[str]: 353 """ 354 Get the descendant ontology terms for a given term. If include_self is True, the term itself will be included as 355 a descendant. Raises ValueError if term ID is not valid member of a supported ontology. 356 357 Example 358 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 359 >>> ontology_parser = OntologyParser() 360 >>> ontology_parser.get_term_descendants("CL:0000005") # doctest: +SKIP 361 ['CL:0002363'] 362 363 :param term_id: str ontology term to find descendants for 364 :param include_self: boolean flag to include the term itself as a descendant 365 :return: List[str] of descendant terms 366 """ 367 if term_id in VALID_NON_ONTOLOGY_TERMS: 368 return [] 369 ontology_name = self._parse_ontology_name(term_id) 370 descendants = [term_id] if include_self else [] 371 for candidate_descendant, candidate_metadata in self.cxg_schema.ontology(ontology_name).items(): 372 ancestors = candidate_metadata["ancestors"].keys() 373 if term_id in ancestors: 374 descendants.append(candidate_descendant) 375 return descendants 376 377 def map_term_descendants(self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]: 378 """ 379 Get the descendant ontology terms for each term in a list. If include_self is True, the term itself will be 380 included as a descendant. Raises ValueError if term ID is not valid member of a supported ontology. 381 382 Example 383 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 384 >>> ontology_parser = OntologyParser() 385 >>> ontology_parser.map_term_descendants(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP 386 { 387 'CL:0000003': ['CL:0000003', ...], 388 'CL:0000005': ['CL:0000005', 'CL:0002363', ...] 389 } 390 391 :param term_ids: list of str ontology terms to find descendants for 392 :param include_self: boolean flag to include the term itself as an descendant 393 :return: Dictionary mapping str term IDs to their respective flattened List[str] of descendant terms. Maps to 394 empty list if there are no descendants. 395 """ 396 descendants_dict: Dict[str, List[str]] = dict() 397 ontology_names = set() 398 for term_id in term_ids: 399 if term_id in VALID_NON_ONTOLOGY_TERMS: 400 descendants_dict[term_id] = [] 401 continue 402 ontology_name = self._parse_ontology_name(term_id) 403 descendants_dict[term_id] = [term_id] if include_self else [] 404 ontology_names.add(ontology_name) 405 406 for ontology in ontology_names: 407 for candidate_descendant, candidate_metadata in self.cxg_schema.ontology(ontology).items(): 408 for ancestor_id in descendants_dict: 409 ancestors = candidate_metadata["ancestors"].keys() 410 if ancestor_id in ancestors: 411 descendants_dict[ancestor_id].append(candidate_descendant) 412 413 return descendants_dict 414 415 def get_term_children(self, term_id: str) -> List[str]: 416 """ 417 Get the direct children ontology terms for a given term. Raises ValueError if term ID is not valid member of a 418 supported ontology. 419 420 Example 421 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 422 >>> ontology_parser = OntologyParser() 423 >>> ontology_parser.get_term_children("CL:0000526") 424 ['CL:0000101', 'CL:4042034'] 425 426 :param term_id: str ontology term to find children for 427 :return: List[str] of children terms 428 """ 429 if term_id in VALID_NON_ONTOLOGY_TERMS: 430 return [] 431 ontology_name = self._parse_ontology_name(term_id) 432 children = [] 433 for candidate_child, candidate_metadata in self.cxg_schema.ontology(ontology_name).items(): 434 for ancestor, distance in candidate_metadata["ancestors"].items(): 435 if ancestor == term_id and distance == 1: 436 children.append(candidate_child) 437 return children 438 439 def get_term_graph(self, term_id: str) -> OntologyNode: 440 """ 441 Get the DAG of OntologyNode relationships, with the input term as the root node. Only includes terms from the 442 same ontology as the root term ID. Raises ValueError if term ID is not valid member of a supported ontology. 443 444 Example 445 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 446 >>> ontology_parser = OntologyParser() 447 >>> root_node = ontology_parser.get_term_graph("CL:0000000") 448 >>> root_node.term_id 449 'CL:0000000' 450 >>> root_node.to_dict() # doctest: +SKIP 451 { 452 "term_id": "CL:0000000", 453 "name": "cell A", 454 "children": [ 455 { 456 "term_id": "CL:0000001", 457 "name": "cell B", 458 "children": [...], 459 }, 460 { 461 "term_id": "CL:0000002", 462 "name": "cell C", 463 "children": [...], 464 }, 465 ... 466 ] 467 } 468 >>> root_node.term_counter # doctest: +SKIP 469 Counter({'CL:0002058': 48, 'CL:0002471': 48, ... 470 471 :param term_id: str ontology term to build subtree for 472 :return: OntologyNode representation of graph with term_id as root. 473 """ 474 term_label = self.get_term_label(term_id) 475 root = OntologyNode(term_id, term_label) 476 for child_term_id in self.get_term_children(term_id): 477 root.add_child(self.get_term_graph(child_term_id)) 478 return root 479 480 def is_term_deprecated(self, term_id: str) -> bool: 481 """ 482 Check if an ontology term is deprecated. Raises ValueError if term ID is not valid member of a supported 483 ontology. 484 485 Example 486 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 487 >>> ontology_parser = OntologyParser() 488 >>> ontology_parser.is_term_deprecated("CL:0000003") 489 True 490 491 :param term_id: str ontology term to check for deprecation 492 :return: boolean flag indicating whether the term is deprecated 493 """ 494 if term_id in VALID_NON_ONTOLOGY_TERMS: 495 return False 496 ontology_name = self._parse_ontology_name(term_id) 497 is_deprecated: bool = self.cxg_schema.ontology(ontology_name)[term_id].get("deprecated") 498 return is_deprecated 499 500 def get_term_replacement(self, term_id: str) -> Union[str, None]: 501 """ 502 Fetch the replacement term for a deprecated ontology term, if a replacement exists. Return None otherwise. 503 Raises ValueError if term ID is not valid member of a supported ontology. 504 505 Example 506 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 507 >>> ontology_parser = OntologyParser() 508 >>> ontology_parser.get_term_replacement("CL:0000003") 509 'CL:0000000' 510 511 :param term_id: str ontology term to check a replacement term for 512 :return: replacement str term ID if it exists, None otherwise 513 """ 514 if term_id in VALID_NON_ONTOLOGY_TERMS: 515 return None 516 ontology_name = self._parse_ontology_name(term_id) 517 replaced_by: str = self.cxg_schema.ontology(ontology_name)[term_id].get("replaced_by") 518 return replaced_by if replaced_by else None 519 520 def get_term_metadata(self, term_id: str) -> Dict[str, Any]: 521 """ 522 Fetch metadata for a given ontology term. Returns a dict with format 523 524 {"comments": ["...", ...], "term_tracker": "...", "consider": ["...", ...]} 525 526 Comments maps to List[str] of ontology curator comments 527 Term Tracker maps to a str url where there is discussion around this term's curation (or deprecation). 528 Consider maps to List[str] of alternate ontology terms to consider using instead of this term 529 530 All keys map to None if no metadata of that type is present. Raises ValueError if term ID is not valid member 531 of a supported ontology. 532 533 :param term_id: str ontology term to fetch metadata for 534 :return: Dict with keys 'Comments', 'Term Tracker', and 'Consider' containing associated metadata. 535 """ 536 if term_id in VALID_NON_ONTOLOGY_TERMS: 537 return {"comments": None, "term_tracker": None, "consider": None} 538 ontology_name = self._parse_ontology_name(term_id) 539 return { 540 key: self.cxg_schema.ontology(ontology_name)[term_id].get(key, None) 541 for key in {"comments", "term_tracker", "consider"} 542 } 543 544 def get_term_label(self, term_id: str) -> str: 545 """ 546 Fetch the human-readable label for a given ontology term. Raises ValueError if term ID is not valid member of a 547 supported ontology. 548 549 Example 550 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 551 >>> ontology_parser = OntologyParser() 552 >>> ontology_parser.get_term_label("CL:0000005") 553 'neural crest derived fibroblast' 554 555 :param term_id: str ontology term to fetch label for 556 :return: str human-readable label for the term 557 """ 558 if term_id in VALID_NON_ONTOLOGY_TERMS: 559 return term_id 560 ontology_name = self._parse_ontology_name(term_id) 561 label: str = self.cxg_schema.ontology(ontology_name)[term_id]["label"] 562 return label 563 564 def map_term_labels(self, term_ids: Iterable[str]) -> Dict[str, str]: 565 """ 566 Fetch the human-readable label for a given list of ontology terms. Raises ValueError if term ID is not valid 567 member of a supported ontology. 568 569 Example 570 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 571 >>> ontology_parser = OntologyParser() 572 >>> ontology_parser.map_term_labels(["CL:0000005", "CL:0000003"]) 573 {'CL:0000005': 'neural crest derived fibroblast', 'CL:0000003': 'obsolete native cell'} 574 575 :param term_ids: list of str ontology terms to fetch label for 576 :return: Dict[str, str] mapping term IDs to their respective human-readable labels 577 """ 578 return {term_id: self.get_term_label(term_id) for term_id in term_ids} 579 580 def get_term_description(self, term_id: str) -> Optional[str]: 581 """ 582 Fetch the description for a given ontology term. Raises ValueError if term ID is not valid member of a 583 supported ontology. 584 585 Example 586 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 587 >>> ontology_parser = OntologyParser() 588 >>> ontology_parser.get_term_description("CL:0000005") 589 'Any fibroblast that is derived from the neural crest.' 590 591 :param term_id: str ontology term to fetch description for 592 :return: str description for the term 593 """ 594 if term_id in VALID_NON_ONTOLOGY_TERMS: 595 return term_id 596 ontology_name = self._parse_ontology_name(term_id) 597 description: Optional[str] = self.cxg_schema.ontology(ontology_name)[term_id].get("description", None) 598 return description 599 600 def map_term_descriptions(self, term_ids: List[str]) -> Dict[str, Optional[str]]: 601 """ 602 Fetch the descriptions for a given list of ontology terms. Raises ValueError if term ID is not valid member of 603 a supported ontology. 604 605 Example 606 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 607 >>> ontology_parser = OntologyParser() 608 >>> ontology_parser.map_term_descriptions(["CL:0000005", "CL:0000146"]) 609 {'CL:0000005': 'Any fibroblast that is derived from the neural crest.', 'CL:0000146': None} 610 611 :param term_ids: list of str ontology terms to fetch descriptions for 612 :return: Dict[str, str] mapping term IDs to their respective descriptions 613 """ 614 return {term_id: self.get_term_description(term_id) for term_id in term_ids} 615 616 def get_term_synonyms(self, term_id: str) -> List[str]: 617 """ 618 Fetch a list of synonym labels for a given ontology term. Returns empty list if no synonyms found. 619 Raises ValueError if term ID is not valid member of a supported ontology. 620 621 Example 622 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 623 >>> ontology_parser = OntologyParser() 624 >>> ontology_parser.get_term_synonyms("CL:0000019") 625 ['sperm cell', 'spermatozoid', 'spermatozoon'] 626 627 :param term_id: str ontology term to fetch synonyms for 628 :return: List[str] synonyms for the term 629 """ 630 if term_id in VALID_NON_ONTOLOGY_TERMS: 631 return [] 632 ontology_name = self._parse_ontology_name(term_id) 633 synonyms: List[str] = list(self.cxg_schema.ontology(ontology_name)[term_id].get("synonyms", [])) 634 return synonyms 635 636 def map_term_synonyms(self, term_ids: List[str]) -> Dict[str, List[str]]: 637 """ 638 Fetch the synonym labels for a given list of ontology terms. Raises ValueError if term ID is not valid member of 639 a supported ontology. 640 641 Example 642 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 643 >>> ontology_parser = OntologyParser() 644 >>> ontology_parser.map_term_synonyms(["CL:0000005", "CL:0000019"]) 645 {'CL:0000005': ['fibroblast neural crest derived'], 'CL:0000019': ['sperm cell', 'spermatozoid', 'spermatozoon']} 646 647 :param term_ids: list of str ontology terms to fetch synonyms for 648 :return: Dict[str, List[str]] mapping term IDs to their respective synonym lists 649 """ 650 return {term_id: self.get_term_synonyms(term_id) for term_id in term_ids} 651 652 def get_term_id_by_label(self, term_label: str, ontology_name: str) -> Optional[str]: 653 """ 654 Fetch the ontology term ID from a given human-readable label. Filters by ontology_name. Raises ValueError if 655 ontology_name is not a supported ontology. 656 657 Returns None if term ID is not valid member of a supported ontology. 658 659 Example 660 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 661 >>> ontology_parser = OntologyParser() 662 >>> ontology_parser.get_term_id_by_label("neural crest derived fibroblast", "CL") 663 'CL:0000005' 664 665 :param term_label: str human-readable label to fetch term ID for 666 :param ontology_name: str name of ontology to search for term label in 667 :return: Optional[str] term IDs with that label, or None if the label is not found in the ontology 668 """ 669 ontology_term_label_to_id_map = self.get_term_label_to_id_map(ontology_name) 670 return ontology_term_label_to_id_map.get(term_label) 671 672 def get_bridge_term_id(self, term_id: str, cross_ontology: str) -> Optional[str]: 673 """ 674 For a given term ID, fetch the equivalent term ID from a given ontology. Only returns exact match if it exists. 675 676 If no applicable match is found, returns None. 677 678 Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology. 679 680 Example 681 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 682 >>> ontology_parser = OntologyParser() 683 >>> ontology_parser.get_bridge_term_id("FBbt:00000001", "UBERON") 684 'UBERON:0000468' 685 686 :param term_id: str ontology term to find equivalent term for 687 :param cross_ontology: str name of ontology to search for equivalent term in 688 :return: Optional[str] equivalent term ID from the cross_ontology 689 """ 690 if cross_ontology not in self.cxg_schema.cross_ontology_mappings: 691 raise ValueError( 692 f"{cross_ontology} is not in the set of supported cross ontology mappings " 693 f"{self.cxg_schema.cross_ontology_mappings}." 694 ) 695 ontology_name = self._parse_ontology_name(term_id) 696 cross_ontology_terms = self.cxg_schema.ontology(ontology_name)[term_id].get("cross_ontology_terms") 697 bridge_term_id: Optional[str] = None 698 if cross_ontology_terms: 699 bridge_term_id = cross_ontology_terms.get(cross_ontology) 700 return bridge_term_id 701 702 def get_closest_bridge_term_ids(self, term_id: str, cross_ontology: str) -> List[str]: 703 """ 704 For a given term ID, fetch the equivalent term ID from a given ontology. If match is found, 705 returns a list of 1 with the exact match. If no exact match is found, traverses the ancestors 706 of the term for the closest match. 707 708 If no applicable match is found, returns an empty list. 709 710 If multiple ancestors of the same distance have matches, returns all possible closest matches. 711 712 Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology. 713 714 Example 715 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 716 >>> ontology_parser = OntologyParser() 717 >>> ontology_parser.get_closest_bridge_term_ids("FBbt:00000039", "UBERON") 718 ['UBERON:0000476', 'UBERON:0000920'] 719 720 :param term_id: str ontology term to find closest term for 721 :param cross_ontology: str name of ontology to search for closest term in 722 :return: List[str] list of closest term IDs from the cross_ontology 723 """ 724 closest_bridge_terms: List[str] = [] 725 terms_to_match = [term_id] 726 while terms_to_match and not closest_bridge_terms: 727 for term in terms_to_match: 728 if closest_bridge_term := self.get_bridge_term_id(term, cross_ontology): 729 closest_bridge_terms.append(closest_bridge_term) 730 terms_to_match = [parent for child in terms_to_match for parent in self.get_term_parents(child)] 731 return closest_bridge_terms
An object to parse ontology term metadata from ontologies corresponding to a given CellxGene Schema Version.
18 def __init__(self, schema_version: Optional[str] = None): 19 """ 20 Initialize an OntologyParser object with the ontology metadata corresponding to the given CellxGene schema 21 version. If not cached, it will make a network call to GitHub Release Assets to load in memory and 22 parse the corresponding ontology metadata. 23 24 :param schema_version: str version of the schema to load ontology metadata for. If not provided, the latest 25 is loaded. 26 """ 27 self.cxg_schema = CXGSchema(version=schema_version) if schema_version else CXGSchema() 28 self.term_label_to_id_map: Dict[str, Dict[str, str]] = { 29 ontology_name: dict() for ontology_name in self.cxg_schema.supported_ontologies 30 }
Initialize an OntologyParser object with the ontology metadata corresponding to the given CellxGene schema version. If not cached, it will make a network call to GitHub Release Assets to load in memory and parse the corresponding ontology metadata.
Parameters
- schema_version: str version of the schema to load ontology metadata for. If not provided, the latest is loaded.
32 def get_term_label_to_id_map(self, ontology_name: str) -> Dict[str, str]: 33 """ 34 Fetch the mapping of term labels to term IDs for a given ontology. Caches generated maps by ontology_name. 35 36 Example 37 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 38 >>> ontology_parser = OntologyParser() 39 >>> ontology_parser.get_term_label_to_id_map("CL") # doctest: +SKIP 40 {'Label A': 'CL:0000000', ... } 41 42 :param ontology_name: str name of ontology to get map of term labels to term IDs 43 """ 44 supported_ontology_name: Optional[str] = self._get_supported_ontology_name(ontology_name) 45 if not supported_ontology_name: 46 raise ValueError(f"{supported_ontology_name} is not a supported ontology, its metadata cannot be fetched.") 47 48 if self.term_label_to_id_map[supported_ontology_name]: 49 return self.term_label_to_id_map[supported_ontology_name].copy() 50 51 for term_id, term_metadata in self.cxg_schema.ontology(supported_ontology_name).items(): 52 self.term_label_to_id_map[supported_ontology_name][term_metadata["label"]] = term_id 53 54 return self.term_label_to_id_map[supported_ontology_name].copy()
Fetch the mapping of term labels to term IDs for a given ontology. Caches generated maps by ontology_name.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_label_to_id_map("CL") # doctest: +SKIP
{'Label A': 'CL:0000000', ... }
Parameters
- ontology_name: str name of ontology to get map of term labels to term IDs
99 def is_valid_term_id(self, term_id: str, ontology: Optional[str] = None) -> bool: 100 """ 101 Check if an ontology term ID is valid and defined in a supported ontology. If deprecated but defined 102 in the ontology, it is considered valid. Optionally, specify an ontology to check against, and determine 103 if the term is defined in that particular ontology. Otherwise, checks if term is valid in any supported ontology 104 105 :param term_id: str ontology term to check 106 :param ontology: str name of ontology to check against 107 :return: boolean flag indicating whether the term is supported 108 """ 109 try: 110 ontology_name = self._parse_ontology_name(term_id) 111 if ontology and ontology_name != ontology: 112 return False 113 if term_id in self.cxg_schema.ontology(ontology_name): 114 return True 115 except ValueError: 116 return False 117 return False
Check if an ontology term ID is valid and defined in a supported ontology. If deprecated but defined in the ontology, it is considered valid. Optionally, specify an ontology to check against, and determine if the term is defined in that particular ontology. Otherwise, checks if term is valid in any supported ontology
Parameters
- term_id: str ontology term to check
- ontology: str name of ontology to check against
Returns
boolean flag indicating whether the term is supported
119 def get_term_ancestors(self, term_id: str, include_self: bool = False) -> List[str]: 120 """ 121 Get the ancestor ontology terms for a given term. If include_self is True, the term itself will be included as 122 an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology. 123 124 Example 125 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 126 >>> ontology_parser = OntologyParser() 127 >>> ontology_parser.get_term_ancestors("CL:0000005") # doctest: +SKIP 128 ['CL:0000000', 'CL:0000057', ... 129 130 :param term_id: str ontology term to find ancestors for 131 :param include_self: boolean flag to include the term itself as an ancestor 132 :return: flattened List[str] of ancestor terms 133 """ 134 if term_id in VALID_NON_ONTOLOGY_TERMS: 135 return [] 136 ontology_name = self._parse_ontology_name(term_id) 137 ancestors = list(self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"].keys()) 138 return ancestors + [term_id] if include_self else ancestors
Get the ancestor ontology terms for a given term. If include_self is True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_ancestors("CL:0000005") # doctest: +SKIP
['CL:0000000', 'CL:0000057', ...
Parameters
- term_id: str ontology term to find ancestors for
- include_self: boolean flag to include the term itself as an ancestor
Returns
flattened List[str] of ancestor terms
140 def map_term_ancestors(self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]: 141 """ 142 Get the ancestor ontology terms for each term in a list. If include_self is True, the term itself will be 143 included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology. 144 145 Example 146 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 147 >>> ontology_parser = OntologyParser() 148 >>> ontology_parser.map_term_ancestors(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP 149 { 150 'CL:0000003': ['CL:0000003'], 151 'CL:0000005': ['CL:0000005', 'CL:0000000', ...] 152 } 153 154 :param term_ids: list of str ontology terms to find ancestors for 155 :param include_self: boolean flag to include the term itself as an ancestor 156 :return: Dictionary mapping str term IDs to their respective flattened List[str] of ancestor terms. Maps to 157 empty 158 list if there are no ancestors. 159 """ 160 return {term_id: self.get_term_ancestors(term_id, include_self) for term_id in term_ids}
Get the ancestor ontology terms for each term in a list. If include_self is True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.map_term_ancestors(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP
{
'CL:0000003': ['CL:0000003'],
'CL:0000005': ['CL:0000005', 'CL:0000000', ...]
}
Parameters
- term_ids: list of str ontology terms to find ancestors for
- include_self: boolean flag to include the term itself as an ancestor
Returns
Dictionary mapping str term IDs to their respective flattened List[str] of ancestor terms. Maps to empty list if there are no ancestors.
162 def get_term_ancestors_with_distances(self, term_id: str, include_self: bool = False) -> Dict[str, int]: 163 """ 164 Get the ancestor ontology terms for a given term, and their distance from the term_id. If include_self is True, 165 the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a 166 supported ontology. 167 168 Example 169 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 170 >>> ontology_parser = OntologyParser() 171 >>> ontology_parser.get_term_ancestors_with_distances("CL:0000005") # doctest: +SKIP 172 {'CL:0000057': 1, 'CL:0002320': 2, 'CL:0000000': 3} 173 174 :param term_id: str ontology term to find ancestors for 175 :param include_self: boolean flag to include the term itself as an ancestor 176 :return: Dict[str, int] map of ancestor terms and their respective distances from the term_id 177 """ 178 if term_id in VALID_NON_ONTOLOGY_TERMS: 179 return {} 180 ontology_name = self._parse_ontology_name(term_id) 181 ancestors: Dict[str, int] = self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"].copy() 182 return ancestors | {term_id: 0} if include_self else ancestors
Get the ancestor ontology terms for a given term, and their distance from the term_id. If include_self is True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_ancestors_with_distances("CL:0000005") # doctest: +SKIP
{'CL:0000057': 1, 'CL:0002320': 2, 'CL:0000000': 3}
Parameters
- term_id: str ontology term to find ancestors for
- include_self: boolean flag to include the term itself as an ancestor
Returns
Dict[str, int] map of ancestor terms and their respective distances from the term_id
184 def map_term_ancestors_with_distances( 185 self, term_ids: Iterable[str], include_self: bool = False 186 ) -> Dict[str, Dict[str, int]]: 187 """ 188 Get the ancestor ontology terms for each term in a list, and their distance from the term_id. If include_self is 189 True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a 190 supported ontology. 191 192 Example 193 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 194 >>> ontology_parser = OntologyParser() 195 >>> ontology_parser.map_term_ancestors_with_distances(["CL:0000003", "CL:0000005"], include_self=True) 196 {'CL:0000003': {'CL:0000003': 0}, 'CL:0000005': {'CL:0000057': 1, 'CL:0000499': 2, 'CL:0002320': 3, 'CL:0000255': 4, 'CL:0000000': 5, 'CL:0000005': 0}} 197 198 :param term_ids: list of str ontology terms to find ancestors for 199 :param include_self: boolean flag to include the term itself as an ancestor 200 :return: Dictionary mapping str term IDs to their respective Dict[str, int] map of ancestor terms and their 201 respective distances from the term_id 202 """ 203 return {term_id: self.get_term_ancestors_with_distances(term_id, include_self) for term_id in term_ids}
Get the ancestor ontology terms for each term in a list, and their distance from the term_id. If include_self is True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.map_term_ancestors_with_distances(["CL:0000003", "CL:0000005"], include_self=True)
{'CL:0000003': {'CL:0000003': 0}, 'CL:0000005': {'CL:0000057': 1, 'CL:0000499': 2, 'CL:0002320': 3, 'CL:0000255': 4, 'CL:0000000': 5, 'CL:0000005': 0}}
Parameters
- term_ids: list of str ontology terms to find ancestors for
- include_self: boolean flag to include the term itself as an ancestor
Returns
Dictionary mapping str term IDs to their respective Dict[str, int] map of ancestor terms and their respective distances from the term_id
205 def get_term_parents(self, term_id: str) -> List[str]: 206 """ 207 Get the direct parent ontology terms for a given term. Raises ValueError if the term ID is not valid member of 208 a supported ontology. 209 210 Example 211 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 212 >>> ontology_parser = OntologyParser() 213 >>> ontology_parser.get_term_parents("CL:0000101") 214 ['CL:0000526'] 215 216 :param term_id: str ontology term to find parents for 217 :return: List[str] of parent terms 218 """ 219 if term_id in VALID_NON_ONTOLOGY_TERMS: 220 return [] 221 ontology_name = self._parse_ontology_name(term_id) 222 ancestors: Dict[str, int] = self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"] 223 parents: List[str] = [ancestor for ancestor, distance in ancestors.items() if distance == 1] 224 return parents
Get the direct parent ontology terms for a given term. Raises ValueError if the term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_parents("CL:0000101")
['CL:0000526']
Parameters
- term_id: str ontology term to find parents for
Returns
List[str] of parent terms
226 def get_distance_between_terms(self, term_id_1: str, term_id_2: str) -> int: 227 """ 228 Get the distance between two ontology terms. The distance is defined as the number of edges between the 229 two terms. Terms must be from the same ontology. Returns -1 if terms are disjoint. 230 Raises ValueError if term IDs are not valid members of a supported ontology. 231 232 :param term_id_1: str ontology term to find distance for 233 :param term_id_2: str ontology term to find distance for 234 :return: int distance between the two terms, measured in number of edges between their shortest path. 235 """ 236 lcas = self.get_lowest_common_ancestors(term_id_1, term_id_2) 237 if not lcas: 238 return -1 239 return int( 240 self.get_term_ancestors_with_distances(term_id_1, include_self=True)[lcas[0]] 241 + self.get_term_ancestors_with_distances(term_id_2, include_self=True)[lcas[0]] 242 )
Get the distance between two ontology terms. The distance is defined as the number of edges between the two terms. Terms must be from the same ontology. Returns -1 if terms are disjoint. Raises ValueError if term IDs are not valid members of a supported ontology.
Parameters
- term_id_1: str ontology term to find distance for
- term_id_2: str ontology term to find distance for
Returns
int distance between the two terms, measured in number of edges between their shortest path.
244 def get_lowest_common_ancestors(self, term_id_1: str, term_id_2: str) -> List[str]: 245 """ 246 Get the lowest common ancestors between two ontology terms that is from the given ontology. 247 Terms must be from the same ontology. Ontologies are DAGs, so there may be multiple lowest common ancestors. 248 Raises ValueError if term IDs are not valid members of a supported ontology. 249 250 :param term_id_1: str ontology term to find LCA for 251 :param term_id_2: str ontology term to find LCA for 252 :return: str term ID of the lowest common ancestor term 253 """ 254 # include path to term itself 255 ontology = self._parse_ontology_name(term_id_1) 256 if ontology != self._parse_ontology_name(term_id_2): 257 return [] 258 ancestors_1 = self.get_term_ancestors_with_distances(term_id_1, include_self=True) 259 ancestors_2 = self.get_term_ancestors_with_distances(term_id_2, include_self=True) 260 common_ancestors = set(ancestors_1.keys()) & set(ancestors_2.keys()) 261 min_sum_distances = float("inf") 262 for ancestors in common_ancestors: 263 sum_distances = ancestors_1[ancestors] + ancestors_2[ancestors] 264 if sum_distances < min_sum_distances: 265 min_sum_distances = sum_distances 266 return [ 267 ancestor 268 for ancestor in common_ancestors 269 if ancestors_1[ancestor] + ancestors_2[ancestor] == min_sum_distances 270 ]
Get the lowest common ancestors between two ontology terms that is from the given ontology. Terms must be from the same ontology. Ontologies are DAGs, so there may be multiple lowest common ancestors. Raises ValueError if term IDs are not valid members of a supported ontology.
Parameters
- term_id_1: str ontology term to find LCA for
- term_id_2: str ontology term to find LCA for
Returns
str term ID of the lowest common ancestor term
272 def get_high_level_terms(self, term_id: str, high_level_terms: List[str]) -> List[str]: 273 """ 274 Get the high-level ontology terms for a given term. High-level terms are defined as the ancestors of the term 275 that are part of the high-level ontology terms supported by cellxgene-ontology-guide. If more than 1 276 high_level_term is matched, the returned list of matches preserves the order of the input high_level_terms list. 277 Raises ValueError if term ID is not valid member of a supported ontology. 278 279 Example 280 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 281 >>> ontology_parser = OntologyParser() 282 >>> ontology_parser.get_high_level_terms("CL:0000005", ["CL:0000000", "CL:0000001"]) 283 ['CL:0000000'] 284 285 :param term_id: str ontology term to find high-level terms for 286 :param high_level_terms: list of str ontology terms to check for ancestry to term_id 287 :return: List[str] of high-level terms that the term is a descendant of 288 """ 289 if term_id in VALID_NON_ONTOLOGY_TERMS: 290 return [] 291 ancestors = self.get_term_ancestors(term_id, include_self=True) 292 return [high_level_term for high_level_term in high_level_terms if high_level_term in ancestors]
Get the high-level ontology terms for a given term. High-level terms are defined as the ancestors of the term that are part of the high-level ontology terms supported by cellxgene-ontology-guide. If more than 1 high_level_term is matched, the returned list of matches preserves the order of the input high_level_terms list. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_high_level_terms("CL:0000005", ["CL:0000000", "CL:0000001"])
['CL:0000000']
Parameters
- term_id: str ontology term to find high-level terms for
- high_level_terms: list of str ontology terms to check for ancestry to term_id
Returns
List[str] of high-level terms that the term is a descendant of
294 def map_high_level_terms(self, term_ids: List[str], high_level_terms: List[str]) -> Dict[str, List[str]]: 295 """ 296 Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with 297 format 298 299 {"CL:0000003": ["CL:0000000", ...], "CL:0000005": ["CL:0000000", ...]} 300 301 Where each term_id is mapped to a List[str] of high-level terms that it is a descendant of. Includes self 302 as a descendant. Raises ValueError if term ID is not valid member of a supported ontology. 303 304 :param term_ids: list of str ontology terms to map high level terms for 305 :param high_level_terms: list of str ontology terms to be mapped to descendant term_ids 306 :return: Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list. 307 Each key maps to empty list if there are no ancestors among the provided input. 308 """ 309 return {term_id: self.get_high_level_terms(term_id, high_level_terms) for term_id in term_ids}
Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with format
{"CL:0000003": ["CL:0000000", ...], "CL:0000005": ["CL:0000000", ...]}
Where each term_id is mapped to a List[str] of high-level terms that it is a descendant of. Includes self as a descendant. Raises ValueError if term ID is not valid member of a supported ontology.
Parameters
- term_ids: list of str ontology terms to map high level terms for
- high_level_terms: list of str ontology terms to be mapped to descendant term_ids
Returns
Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list. Each key maps to empty list if there are no ancestors among the provided input.
311 def get_highest_level_term(self, term_id: str, high_level_terms: List[str]) -> Union[str, None]: 312 """ 313 Get the highest level ontology term for a given term. The highest level term is defined as the ancestor of the 314 term that is part of the high-level ontology terms supported by cellxgene-ontology-guide. Raises ValueError 315 if term ID is not valid member of a supported ontology. 316 317 Example 318 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 319 >>> ontology_parser = OntologyParser() 320 >>> ontology_parser.get_highest_level_term("CL:0000005", ["CL:0000000", "CL:0000001"]) 321 'CL:0000000' 322 323 :param term_id: str ontology term to find highest level term for 324 :param high_level_terms: list of str ontology terms to check for ancestry to term_id 325 :return: str highest level term that the term is a descendant of, or None if it is not a descendant of any 326 high-level terms 327 """ 328 high_level_terms = self.get_high_level_terms(term_id, high_level_terms) 329 term_ancestors_and_distances = self.get_term_ancestors_with_distances(term_id, include_self=True) 330 if not high_level_terms: 331 return None 332 return max(high_level_terms, key=lambda high_level_term: term_ancestors_and_distances[high_level_term])
Get the highest level ontology term for a given term. The highest level term is defined as the ancestor of the term that is part of the high-level ontology terms supported by cellxgene-ontology-guide. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_highest_level_term("CL:0000005", ["CL:0000000", "CL:0000001"])
'CL:0000000'
Parameters
- term_id: str ontology term to find highest level term for
- high_level_terms: list of str ontology terms to check for ancestry to term_id
Returns
str highest level term that the term is a descendant of, or None if it is not a descendant of any high-level terms
334 def map_highest_level_term(self, term_ids: List[str], high_level_terms: List[str]) -> Dict[str, Union[str, None]]: 335 """ 336 Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with 337 format 338 339 {"CL:0000003": "CL:0000000", "CL:0000005": "CL:0000000"} 340 341 Where each term_id is mapped to the highest level term that it is a descendant of, from the list provided. 342 Includes term itself as a descendant. Maps to None if term_id does not map to any high level terms among the 343 provided input. Raises ValueError if term ID is not valid member of a supported ontology. 344 345 :param term_ids: list of str ontology terms to map high level terms for 346 :param high_level_terms: list of str ontology terms that can be mapped to descendant term_ids 347 :return: Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list. 348 Each key maps to empty list if there are no ancestors among the provided input. 349 """ 350 return {term_id: self.get_highest_level_term(term_id, high_level_terms) for term_id in term_ids}
Given a list of ontology term IDs and a list of high_level_terms to map them to, returns a dictionary with format
{"CL:0000003": "CL:0000000", "CL:0000005": "CL:0000000"}
Where each term_id is mapped to the highest level term that it is a descendant of, from the list provided. Includes term itself as a descendant. Maps to None if term_id does not map to any high level terms among the provided input. Raises ValueError if term ID is not valid member of a supported ontology.
Parameters
- term_ids: list of str ontology terms to map high level terms for
- high_level_terms: list of str ontology terms that can be mapped to descendant term_ids
Returns
Dictionary mapping str term IDs to their respective List[str] of ancestor terms from the input list. Each key maps to empty list if there are no ancestors among the provided input.
352 def get_term_descendants(self, term_id: str, include_self: bool = False) -> List[str]: 353 """ 354 Get the descendant ontology terms for a given term. If include_self is True, the term itself will be included as 355 a descendant. Raises ValueError if term ID is not valid member of a supported ontology. 356 357 Example 358 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 359 >>> ontology_parser = OntologyParser() 360 >>> ontology_parser.get_term_descendants("CL:0000005") # doctest: +SKIP 361 ['CL:0002363'] 362 363 :param term_id: str ontology term to find descendants for 364 :param include_self: boolean flag to include the term itself as a descendant 365 :return: List[str] of descendant terms 366 """ 367 if term_id in VALID_NON_ONTOLOGY_TERMS: 368 return [] 369 ontology_name = self._parse_ontology_name(term_id) 370 descendants = [term_id] if include_self else [] 371 for candidate_descendant, candidate_metadata in self.cxg_schema.ontology(ontology_name).items(): 372 ancestors = candidate_metadata["ancestors"].keys() 373 if term_id in ancestors: 374 descendants.append(candidate_descendant) 375 return descendants
Get the descendant ontology terms for a given term. If include_self is True, the term itself will be included as a descendant. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_descendants("CL:0000005") # doctest: +SKIP
['CL:0002363']
Parameters
- term_id: str ontology term to find descendants for
- include_self: boolean flag to include the term itself as a descendant
Returns
List[str] of descendant terms
377 def map_term_descendants(self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]: 378 """ 379 Get the descendant ontology terms for each term in a list. If include_self is True, the term itself will be 380 included as a descendant. Raises ValueError if term ID is not valid member of a supported ontology. 381 382 Example 383 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 384 >>> ontology_parser = OntologyParser() 385 >>> ontology_parser.map_term_descendants(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP 386 { 387 'CL:0000003': ['CL:0000003', ...], 388 'CL:0000005': ['CL:0000005', 'CL:0002363', ...] 389 } 390 391 :param term_ids: list of str ontology terms to find descendants for 392 :param include_self: boolean flag to include the term itself as an descendant 393 :return: Dictionary mapping str term IDs to their respective flattened List[str] of descendant terms. Maps to 394 empty list if there are no descendants. 395 """ 396 descendants_dict: Dict[str, List[str]] = dict() 397 ontology_names = set() 398 for term_id in term_ids: 399 if term_id in VALID_NON_ONTOLOGY_TERMS: 400 descendants_dict[term_id] = [] 401 continue 402 ontology_name = self._parse_ontology_name(term_id) 403 descendants_dict[term_id] = [term_id] if include_self else [] 404 ontology_names.add(ontology_name) 405 406 for ontology in ontology_names: 407 for candidate_descendant, candidate_metadata in self.cxg_schema.ontology(ontology).items(): 408 for ancestor_id in descendants_dict: 409 ancestors = candidate_metadata["ancestors"].keys() 410 if ancestor_id in ancestors: 411 descendants_dict[ancestor_id].append(candidate_descendant) 412 413 return descendants_dict
Get the descendant ontology terms for each term in a list. If include_self is True, the term itself will be included as a descendant. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.map_term_descendants(["CL:0000003", "CL:0000005"], include_self=True) # doctest: +SKIP
{
'CL:0000003': ['CL:0000003', ...],
'CL:0000005': ['CL:0000005', 'CL:0002363', ...]
}
Parameters
- term_ids: list of str ontology terms to find descendants for
- include_self: boolean flag to include the term itself as an descendant
Returns
Dictionary mapping str term IDs to their respective flattened List[str] of descendant terms. Maps to empty list if there are no descendants.
415 def get_term_children(self, term_id: str) -> List[str]: 416 """ 417 Get the direct children ontology terms for a given term. Raises ValueError if term ID is not valid member of a 418 supported ontology. 419 420 Example 421 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 422 >>> ontology_parser = OntologyParser() 423 >>> ontology_parser.get_term_children("CL:0000526") 424 ['CL:0000101', 'CL:4042034'] 425 426 :param term_id: str ontology term to find children for 427 :return: List[str] of children terms 428 """ 429 if term_id in VALID_NON_ONTOLOGY_TERMS: 430 return [] 431 ontology_name = self._parse_ontology_name(term_id) 432 children = [] 433 for candidate_child, candidate_metadata in self.cxg_schema.ontology(ontology_name).items(): 434 for ancestor, distance in candidate_metadata["ancestors"].items(): 435 if ancestor == term_id and distance == 1: 436 children.append(candidate_child) 437 return children
Get the direct children ontology terms for a given term. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_children("CL:0000526")
['CL:0000101', 'CL:4042034']
Parameters
- term_id: str ontology term to find children for
Returns
List[str] of children terms
439 def get_term_graph(self, term_id: str) -> OntologyNode: 440 """ 441 Get the DAG of OntologyNode relationships, with the input term as the root node. Only includes terms from the 442 same ontology as the root term ID. Raises ValueError if term ID is not valid member of a supported ontology. 443 444 Example 445 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 446 >>> ontology_parser = OntologyParser() 447 >>> root_node = ontology_parser.get_term_graph("CL:0000000") 448 >>> root_node.term_id 449 'CL:0000000' 450 >>> root_node.to_dict() # doctest: +SKIP 451 { 452 "term_id": "CL:0000000", 453 "name": "cell A", 454 "children": [ 455 { 456 "term_id": "CL:0000001", 457 "name": "cell B", 458 "children": [...], 459 }, 460 { 461 "term_id": "CL:0000002", 462 "name": "cell C", 463 "children": [...], 464 }, 465 ... 466 ] 467 } 468 >>> root_node.term_counter # doctest: +SKIP 469 Counter({'CL:0002058': 48, 'CL:0002471': 48, ... 470 471 :param term_id: str ontology term to build subtree for 472 :return: OntologyNode representation of graph with term_id as root. 473 """ 474 term_label = self.get_term_label(term_id) 475 root = OntologyNode(term_id, term_label) 476 for child_term_id in self.get_term_children(term_id): 477 root.add_child(self.get_term_graph(child_term_id)) 478 return root
Get the DAG of OntologyNode relationships, with the input term as the root node. Only includes terms from the same ontology as the root term ID. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> root_node = ontology_parser.get_term_graph("CL:0000000")
>>> root_node.term_id
'CL:0000000'
>>> root_node.to_dict() # doctest: +SKIP
{
"term_id": "CL:0000000",
"name": "cell A",
"children": [
{
"term_id": "CL:0000001",
"name": "cell B",
"children": [...],
},
{
"term_id": "CL:0000002",
"name": "cell C",
"children": [...],
},
...
]
}
>>> root_node.term_counter # doctest: +SKIP
Counter({'CL:0002058': 48, 'CL:0002471': 48, ...
Parameters
- term_id: str ontology term to build subtree for
Returns
OntologyNode representation of graph with term_id as root.
480 def is_term_deprecated(self, term_id: str) -> bool: 481 """ 482 Check if an ontology term is deprecated. Raises ValueError if term ID is not valid member of a supported 483 ontology. 484 485 Example 486 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 487 >>> ontology_parser = OntologyParser() 488 >>> ontology_parser.is_term_deprecated("CL:0000003") 489 True 490 491 :param term_id: str ontology term to check for deprecation 492 :return: boolean flag indicating whether the term is deprecated 493 """ 494 if term_id in VALID_NON_ONTOLOGY_TERMS: 495 return False 496 ontology_name = self._parse_ontology_name(term_id) 497 is_deprecated: bool = self.cxg_schema.ontology(ontology_name)[term_id].get("deprecated") 498 return is_deprecated
Check if an ontology term is deprecated. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.is_term_deprecated("CL:0000003")
True
Parameters
- term_id: str ontology term to check for deprecation
Returns
boolean flag indicating whether the term is deprecated
500 def get_term_replacement(self, term_id: str) -> Union[str, None]: 501 """ 502 Fetch the replacement term for a deprecated ontology term, if a replacement exists. Return None otherwise. 503 Raises ValueError if term ID is not valid member of a supported ontology. 504 505 Example 506 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 507 >>> ontology_parser = OntologyParser() 508 >>> ontology_parser.get_term_replacement("CL:0000003") 509 'CL:0000000' 510 511 :param term_id: str ontology term to check a replacement term for 512 :return: replacement str term ID if it exists, None otherwise 513 """ 514 if term_id in VALID_NON_ONTOLOGY_TERMS: 515 return None 516 ontology_name = self._parse_ontology_name(term_id) 517 replaced_by: str = self.cxg_schema.ontology(ontology_name)[term_id].get("replaced_by") 518 return replaced_by if replaced_by else None
Fetch the replacement term for a deprecated ontology term, if a replacement exists. Return None otherwise. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_replacement("CL:0000003")
'CL:0000000'
Parameters
- term_id: str ontology term to check a replacement term for
Returns
replacement str term ID if it exists, None otherwise
520 def get_term_metadata(self, term_id: str) -> Dict[str, Any]: 521 """ 522 Fetch metadata for a given ontology term. Returns a dict with format 523 524 {"comments": ["...", ...], "term_tracker": "...", "consider": ["...", ...]} 525 526 Comments maps to List[str] of ontology curator comments 527 Term Tracker maps to a str url where there is discussion around this term's curation (or deprecation). 528 Consider maps to List[str] of alternate ontology terms to consider using instead of this term 529 530 All keys map to None if no metadata of that type is present. Raises ValueError if term ID is not valid member 531 of a supported ontology. 532 533 :param term_id: str ontology term to fetch metadata for 534 :return: Dict with keys 'Comments', 'Term Tracker', and 'Consider' containing associated metadata. 535 """ 536 if term_id in VALID_NON_ONTOLOGY_TERMS: 537 return {"comments": None, "term_tracker": None, "consider": None} 538 ontology_name = self._parse_ontology_name(term_id) 539 return { 540 key: self.cxg_schema.ontology(ontology_name)[term_id].get(key, None) 541 for key in {"comments", "term_tracker", "consider"} 542 }
Fetch metadata for a given ontology term. Returns a dict with format
{"comments": ["...", ...], "term_tracker": "...", "consider": ["...", ...]}
Comments maps to List[str] of ontology curator comments Term Tracker maps to a str url where there is discussion around this term's curation (or deprecation). Consider maps to List[str] of alternate ontology terms to consider using instead of this term
All keys map to None if no metadata of that type is present. Raises ValueError if term ID is not valid member of a supported ontology.
Parameters
- term_id: str ontology term to fetch metadata for
Returns
Dict with keys 'Comments', 'Term Tracker', and 'Consider' containing associated metadata.
544 def get_term_label(self, term_id: str) -> str: 545 """ 546 Fetch the human-readable label for a given ontology term. Raises ValueError if term ID is not valid member of a 547 supported ontology. 548 549 Example 550 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 551 >>> ontology_parser = OntologyParser() 552 >>> ontology_parser.get_term_label("CL:0000005") 553 'neural crest derived fibroblast' 554 555 :param term_id: str ontology term to fetch label for 556 :return: str human-readable label for the term 557 """ 558 if term_id in VALID_NON_ONTOLOGY_TERMS: 559 return term_id 560 ontology_name = self._parse_ontology_name(term_id) 561 label: str = self.cxg_schema.ontology(ontology_name)[term_id]["label"] 562 return label
Fetch the human-readable label for a given ontology term. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_label("CL:0000005")
'neural crest derived fibroblast'
Parameters
- term_id: str ontology term to fetch label for
Returns
str human-readable label for the term
564 def map_term_labels(self, term_ids: Iterable[str]) -> Dict[str, str]: 565 """ 566 Fetch the human-readable label for a given list of ontology terms. Raises ValueError if term ID is not valid 567 member of a supported ontology. 568 569 Example 570 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 571 >>> ontology_parser = OntologyParser() 572 >>> ontology_parser.map_term_labels(["CL:0000005", "CL:0000003"]) 573 {'CL:0000005': 'neural crest derived fibroblast', 'CL:0000003': 'obsolete native cell'} 574 575 :param term_ids: list of str ontology terms to fetch label for 576 :return: Dict[str, str] mapping term IDs to their respective human-readable labels 577 """ 578 return {term_id: self.get_term_label(term_id) for term_id in term_ids}
Fetch the human-readable label for a given list of ontology terms. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.map_term_labels(["CL:0000005", "CL:0000003"])
{'CL:0000005': 'neural crest derived fibroblast', 'CL:0000003': 'obsolete native cell'}
Parameters
- term_ids: list of str ontology terms to fetch label for
Returns
Dict[str, str] mapping term IDs to their respective human-readable labels
580 def get_term_description(self, term_id: str) -> Optional[str]: 581 """ 582 Fetch the description for a given ontology term. Raises ValueError if term ID is not valid member of a 583 supported ontology. 584 585 Example 586 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 587 >>> ontology_parser = OntologyParser() 588 >>> ontology_parser.get_term_description("CL:0000005") 589 'Any fibroblast that is derived from the neural crest.' 590 591 :param term_id: str ontology term to fetch description for 592 :return: str description for the term 593 """ 594 if term_id in VALID_NON_ONTOLOGY_TERMS: 595 return term_id 596 ontology_name = self._parse_ontology_name(term_id) 597 description: Optional[str] = self.cxg_schema.ontology(ontology_name)[term_id].get("description", None) 598 return description
Fetch the description for a given ontology term. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_description("CL:0000005")
'Any fibroblast that is derived from the neural crest.'
Parameters
- term_id: str ontology term to fetch description for
Returns
str description for the term
600 def map_term_descriptions(self, term_ids: List[str]) -> Dict[str, Optional[str]]: 601 """ 602 Fetch the descriptions for a given list of ontology terms. Raises ValueError if term ID is not valid member of 603 a supported ontology. 604 605 Example 606 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 607 >>> ontology_parser = OntologyParser() 608 >>> ontology_parser.map_term_descriptions(["CL:0000005", "CL:0000146"]) 609 {'CL:0000005': 'Any fibroblast that is derived from the neural crest.', 'CL:0000146': None} 610 611 :param term_ids: list of str ontology terms to fetch descriptions for 612 :return: Dict[str, str] mapping term IDs to their respective descriptions 613 """ 614 return {term_id: self.get_term_description(term_id) for term_id in term_ids}
Fetch the descriptions for a given list of ontology terms. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.map_term_descriptions(["CL:0000005", "CL:0000146"])
{'CL:0000005': 'Any fibroblast that is derived from the neural crest.', 'CL:0000146': None}
Parameters
- term_ids: list of str ontology terms to fetch descriptions for
Returns
Dict[str, str] mapping term IDs to their respective descriptions
616 def get_term_synonyms(self, term_id: str) -> List[str]: 617 """ 618 Fetch a list of synonym labels for a given ontology term. Returns empty list if no synonyms found. 619 Raises ValueError if term ID is not valid member of a supported ontology. 620 621 Example 622 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 623 >>> ontology_parser = OntologyParser() 624 >>> ontology_parser.get_term_synonyms("CL:0000019") 625 ['sperm cell', 'spermatozoid', 'spermatozoon'] 626 627 :param term_id: str ontology term to fetch synonyms for 628 :return: List[str] synonyms for the term 629 """ 630 if term_id in VALID_NON_ONTOLOGY_TERMS: 631 return [] 632 ontology_name = self._parse_ontology_name(term_id) 633 synonyms: List[str] = list(self.cxg_schema.ontology(ontology_name)[term_id].get("synonyms", [])) 634 return synonyms
Fetch a list of synonym labels for a given ontology term. Returns empty list if no synonyms found. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_synonyms("CL:0000019")
['sperm cell', 'spermatozoid', 'spermatozoon']
Parameters
- term_id: str ontology term to fetch synonyms for
Returns
List[str] synonyms for the term
636 def map_term_synonyms(self, term_ids: List[str]) -> Dict[str, List[str]]: 637 """ 638 Fetch the synonym labels for a given list of ontology terms. Raises ValueError if term ID is not valid member of 639 a supported ontology. 640 641 Example 642 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 643 >>> ontology_parser = OntologyParser() 644 >>> ontology_parser.map_term_synonyms(["CL:0000005", "CL:0000019"]) 645 {'CL:0000005': ['fibroblast neural crest derived'], 'CL:0000019': ['sperm cell', 'spermatozoid', 'spermatozoon']} 646 647 :param term_ids: list of str ontology terms to fetch synonyms for 648 :return: Dict[str, List[str]] mapping term IDs to their respective synonym lists 649 """ 650 return {term_id: self.get_term_synonyms(term_id) for term_id in term_ids}
Fetch the synonym labels for a given list of ontology terms. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.map_term_synonyms(["CL:0000005", "CL:0000019"])
{'CL:0000005': ['fibroblast neural crest derived'], 'CL:0000019': ['sperm cell', 'spermatozoid', 'spermatozoon']}
Parameters
- term_ids: list of str ontology terms to fetch synonyms for
Returns
Dict[str, List[str]] mapping term IDs to their respective synonym lists
652 def get_term_id_by_label(self, term_label: str, ontology_name: str) -> Optional[str]: 653 """ 654 Fetch the ontology term ID from a given human-readable label. Filters by ontology_name. Raises ValueError if 655 ontology_name is not a supported ontology. 656 657 Returns None if term ID is not valid member of a supported ontology. 658 659 Example 660 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 661 >>> ontology_parser = OntologyParser() 662 >>> ontology_parser.get_term_id_by_label("neural crest derived fibroblast", "CL") 663 'CL:0000005' 664 665 :param term_label: str human-readable label to fetch term ID for 666 :param ontology_name: str name of ontology to search for term label in 667 :return: Optional[str] term IDs with that label, or None if the label is not found in the ontology 668 """ 669 ontology_term_label_to_id_map = self.get_term_label_to_id_map(ontology_name) 670 return ontology_term_label_to_id_map.get(term_label)
Fetch the ontology term ID from a given human-readable label. Filters by ontology_name. Raises ValueError if ontology_name is not a supported ontology.
Returns None if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_id_by_label("neural crest derived fibroblast", "CL")
'CL:0000005'
Parameters
- term_label: str human-readable label to fetch term ID for
- ontology_name: str name of ontology to search for term label in
Returns
Optional[str] term IDs with that label, or None if the label is not found in the ontology
672 def get_bridge_term_id(self, term_id: str, cross_ontology: str) -> Optional[str]: 673 """ 674 For a given term ID, fetch the equivalent term ID from a given ontology. Only returns exact match if it exists. 675 676 If no applicable match is found, returns None. 677 678 Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology. 679 680 Example 681 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 682 >>> ontology_parser = OntologyParser() 683 >>> ontology_parser.get_bridge_term_id("FBbt:00000001", "UBERON") 684 'UBERON:0000468' 685 686 :param term_id: str ontology term to find equivalent term for 687 :param cross_ontology: str name of ontology to search for equivalent term in 688 :return: Optional[str] equivalent term ID from the cross_ontology 689 """ 690 if cross_ontology not in self.cxg_schema.cross_ontology_mappings: 691 raise ValueError( 692 f"{cross_ontology} is not in the set of supported cross ontology mappings " 693 f"{self.cxg_schema.cross_ontology_mappings}." 694 ) 695 ontology_name = self._parse_ontology_name(term_id) 696 cross_ontology_terms = self.cxg_schema.ontology(ontology_name)[term_id].get("cross_ontology_terms") 697 bridge_term_id: Optional[str] = None 698 if cross_ontology_terms: 699 bridge_term_id = cross_ontology_terms.get(cross_ontology) 700 return bridge_term_id
For a given term ID, fetch the equivalent term ID from a given ontology. Only returns exact match if it exists.
If no applicable match is found, returns None.
Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_bridge_term_id("FBbt:00000001", "UBERON")
'UBERON:0000468'
Parameters
- term_id: str ontology term to find equivalent term for
- cross_ontology: str name of ontology to search for equivalent term in
Returns
Optional[str] equivalent term ID from the cross_ontology
702 def get_closest_bridge_term_ids(self, term_id: str, cross_ontology: str) -> List[str]: 703 """ 704 For a given term ID, fetch the equivalent term ID from a given ontology. If match is found, 705 returns a list of 1 with the exact match. If no exact match is found, traverses the ancestors 706 of the term for the closest match. 707 708 If no applicable match is found, returns an empty list. 709 710 If multiple ancestors of the same distance have matches, returns all possible closest matches. 711 712 Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology. 713 714 Example 715 >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser 716 >>> ontology_parser = OntologyParser() 717 >>> ontology_parser.get_closest_bridge_term_ids("FBbt:00000039", "UBERON") 718 ['UBERON:0000476', 'UBERON:0000920'] 719 720 :param term_id: str ontology term to find closest term for 721 :param cross_ontology: str name of ontology to search for closest term in 722 :return: List[str] list of closest term IDs from the cross_ontology 723 """ 724 closest_bridge_terms: List[str] = [] 725 terms_to_match = [term_id] 726 while terms_to_match and not closest_bridge_terms: 727 for term in terms_to_match: 728 if closest_bridge_term := self.get_bridge_term_id(term, cross_ontology): 729 closest_bridge_terms.append(closest_bridge_term) 730 terms_to_match = [parent for child in terms_to_match for parent in self.get_term_parents(child)] 731 return closest_bridge_terms
For a given term ID, fetch the equivalent term ID from a given ontology. If match is found, returns a list of 1 with the exact match. If no exact match is found, traverses the ancestors of the term for the closest match.
If no applicable match is found, returns an empty list.
If multiple ancestors of the same distance have matches, returns all possible closest matches.
Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_closest_bridge_term_ids("FBbt:00000039", "UBERON")
['UBERON:0000476', 'UBERON:0000920']
Parameters
- term_id: str ontology term to find closest term for
- cross_ontology: str name of ontology to search for closest term in
Returns
List[str] list of closest term IDs from the cross_ontology