cellxgene_ontology_guide.supported_versions
1import functools 2import json 3import os 4import warnings 5from datetime import datetime 6from typing import Any, Dict, List, Optional 7 8import zstandard as zstd 9from semantic_version import Version 10 11from cellxgene_ontology_guide._constants import DATA_ROOT, ONTOLOGY_FILENAME_SUFFIX, ONTOLOGY_INFO_FILENAME 12from cellxgene_ontology_guide.entities import Ontology 13 14 15@functools.cache 16def load_ontology_file(file_name: str) -> Any: 17 """Load the ontology file from the data directory and return it as a dict.""" 18 file_path = os.path.join(DATA_ROOT, file_name) 19 with open(file_path, "rb") as f: 20 dctx = zstd.ZstdDecompressor() 21 decompressed = dctx.stream_reader(f) 22 return json.load(decompressed) 23 24 25def clear_ontology_file_cache() -> None: 26 """Clear the cache for the load_ontology_file function.""" 27 load_ontology_file.cache_clear() 28 29 30def get_latest_schema_version(versions: List[str]) -> str: 31 """Given a list of schema versions, return the latest version. 32 33 :param versions: List[str] list of schema versions. Versions can be in the format "v5.0.0" or "5.0.0" 34 :return: str latest version without the leading "v" 35 """ 36 37 return str(sorted([coerce_version(version) for version in versions])[-1]) 38 39 40def coerce_version(version: str) -> Version: 41 """Coerce a version string into a semantic_version.Version object. 42 43 :param version: str version string to coerce 44 :return: Version coerced version object 45 """ 46 v = version[1:] if version[0] == "v" else version 47 return Version.coerce(v) 48 49 50def load_supported_versions() -> Any: 51 """Load the ontology_info.json file and return it as a dict.""" 52 with open(os.path.join(DATA_ROOT, ONTOLOGY_INFO_FILENAME)) as f: 53 return json.load(f) 54 55 56class CXGSchema: 57 """A class to represent the ontology information used by a cellxgene schema version.""" 58 59 version: str 60 """The schema version used by the class instance.""" 61 supported_ontologies: Dict[str, Any] 62 """A dictionary of supported ontologies for the schema version.""" 63 imported_ontologies: Dict[str, str] 64 """In our supported ontologies, the CxG schema can support terms imported from different ontologies. 65 This dictionary maps these 'additional ontologies' to their supported ontology name. For example, 66 for ZFS ontology terms imported into the ZFA ontology, imported_ontologies would be {"ZFS":"ZFA", ...}""" 67 ontology_file_names: Dict[str, str] 68 """A dictionary of ontology names and their corresponding file names.""" 69 70 def __init__(self, version: Optional[str] = None): 71 """ 72 73 :param version: The schema version to use. If not provided, the latest schema version will be used. 74 """ 75 ontology_info = load_supported_versions() 76 if version is None: 77 _version = get_latest_schema_version(ontology_info.keys()) 78 else: 79 _version = str(coerce_version(version)) 80 if str(_version) not in ontology_info: 81 raise ValueError(f"Schema version {_version} is not supported in this package version.") 82 83 self.version = _version 84 self.supported_ontologies = ontology_info[_version]["ontologies"] 85 self.imported_ontologies = { 86 imported_ontology: ontology 87 for ontology, info in self.supported_ontologies.items() 88 for imported_ontology in info.get("additional_ontologies", []) 89 } 90 self.cross_ontology_mappings = { 91 ontology for ontology, info in self.supported_ontologies.items() if info.get("cross_ontology_mapping") 92 } 93 self.ontology_file_names: Dict[str, str] = {} 94 self.deprecated_on = ontology_info[_version].get("deprecated_on") 95 if self.deprecated_on: 96 parsed_date = datetime.strptime(self.deprecated_on, "%Y-%m-%d") 97 warnings.warn( 98 f"Schema version {_version} is deprecated as of {parsed_date}. It will be removed in a future version.", 99 DeprecationWarning, 100 stacklevel=1, 101 ) 102 103 def ontology(self, name: str) -> Any: 104 """Return the ontology terms for the given ontology name. Load from the file cache if available. 105 106 Does not support "additional ontologies" of another ontology. 107 108 :param name: str name of the ontology to get the terms for 109 :return: dict representation of the ontology terms 110 """ 111 if name not in self.ontology_file_names: 112 if getattr(Ontology, name, None) is None: 113 raise ValueError(f"Ontology {name} is not supported in this package version.") 114 115 try: 116 onto_version = self.supported_ontologies[name]["version"] 117 except KeyError as e: 118 raise ValueError(f"Ontology {name} is not supported for schema version {self.version}") from e 119 file_name = f"{name}-ontology-{onto_version}{ONTOLOGY_FILENAME_SUFFIX}" 120 self.ontology_file_names[name] = file_name # save to file name to access from cache 121 return load_ontology_file(self.ontology_file_names[name]) 122 123 def get_ontology_download_url(self, ontology: Ontology) -> str: 124 """ 125 Get the download URL for a given ontology file. 126 127 When the ontology entry carries a direct ``url`` field (used for ontologies whose 128 distribution does not follow the standard ``{source}/{version}/{filename}`` 129 versioned-path convention), that URL is returned as-is. 130 131 Examples: 132 get_ontology_download_url(Ontology.CL) -> "https://github.com/.../cl.owl" 133 get_ontology_download_url(Ontology.UniProt) -> "https://ftp.uniprot.org/.../uniprot_sprot.xml.gz" 134 135 :param ontology: Ontology enum of the ontology to fetch 136 :return: str download URL for the requested ontology file 137 """ 138 onto_info = self.supported_ontologies[ontology.name] 139 if direct_url := onto_info.get("url"): 140 return str(direct_url) 141 source_url = str(onto_info["source"]) 142 version = str(onto_info["version"]) 143 filename = str(onto_info["filename"]) 144 return source_url.replace("{version}", version).replace("{filename}", filename)
16@functools.cache 17def load_ontology_file(file_name: str) -> Any: 18 """Load the ontology file from the data directory and return it as a dict.""" 19 file_path = os.path.join(DATA_ROOT, file_name) 20 with open(file_path, "rb") as f: 21 dctx = zstd.ZstdDecompressor() 22 decompressed = dctx.stream_reader(f) 23 return json.load(decompressed)
Load the ontology file from the data directory and return it as a dict.
26def clear_ontology_file_cache() -> None: 27 """Clear the cache for the load_ontology_file function.""" 28 load_ontology_file.cache_clear()
Clear the cache for the load_ontology_file function.
31def get_latest_schema_version(versions: List[str]) -> str: 32 """Given a list of schema versions, return the latest version. 33 34 :param versions: List[str] list of schema versions. Versions can be in the format "v5.0.0" or "5.0.0" 35 :return: str latest version without the leading "v" 36 """ 37 38 return str(sorted([coerce_version(version) for version in versions])[-1])
Given a list of schema versions, return the latest version.
Parameters
- versions: List[str] list of schema versions. Versions can be in the format "v5.0.0" or "5.0.0"
Returns
str latest version without the leading "v"
41def coerce_version(version: str) -> Version: 42 """Coerce a version string into a semantic_version.Version object. 43 44 :param version: str version string to coerce 45 :return: Version coerced version object 46 """ 47 v = version[1:] if version[0] == "v" else version 48 return Version.coerce(v)
Coerce a version string into a semantic_version.Version object.
Parameters
- version: str version string to coerce
Returns
Version coerced version object
51def load_supported_versions() -> Any: 52 """Load the ontology_info.json file and return it as a dict.""" 53 with open(os.path.join(DATA_ROOT, ONTOLOGY_INFO_FILENAME)) as f: 54 return json.load(f)
Load the ontology_info.json file and return it as a dict.
57class CXGSchema: 58 """A class to represent the ontology information used by a cellxgene schema version.""" 59 60 version: str 61 """The schema version used by the class instance.""" 62 supported_ontologies: Dict[str, Any] 63 """A dictionary of supported ontologies for the schema version.""" 64 imported_ontologies: Dict[str, str] 65 """In our supported ontologies, the CxG schema can support terms imported from different ontologies. 66 This dictionary maps these 'additional ontologies' to their supported ontology name. For example, 67 for ZFS ontology terms imported into the ZFA ontology, imported_ontologies would be {"ZFS":"ZFA", ...}""" 68 ontology_file_names: Dict[str, str] 69 """A dictionary of ontology names and their corresponding file names.""" 70 71 def __init__(self, version: Optional[str] = None): 72 """ 73 74 :param version: The schema version to use. If not provided, the latest schema version will be used. 75 """ 76 ontology_info = load_supported_versions() 77 if version is None: 78 _version = get_latest_schema_version(ontology_info.keys()) 79 else: 80 _version = str(coerce_version(version)) 81 if str(_version) not in ontology_info: 82 raise ValueError(f"Schema version {_version} is not supported in this package version.") 83 84 self.version = _version 85 self.supported_ontologies = ontology_info[_version]["ontologies"] 86 self.imported_ontologies = { 87 imported_ontology: ontology 88 for ontology, info in self.supported_ontologies.items() 89 for imported_ontology in info.get("additional_ontologies", []) 90 } 91 self.cross_ontology_mappings = { 92 ontology for ontology, info in self.supported_ontologies.items() if info.get("cross_ontology_mapping") 93 } 94 self.ontology_file_names: Dict[str, str] = {} 95 self.deprecated_on = ontology_info[_version].get("deprecated_on") 96 if self.deprecated_on: 97 parsed_date = datetime.strptime(self.deprecated_on, "%Y-%m-%d") 98 warnings.warn( 99 f"Schema version {_version} is deprecated as of {parsed_date}. It will be removed in a future version.", 100 DeprecationWarning, 101 stacklevel=1, 102 ) 103 104 def ontology(self, name: str) -> Any: 105 """Return the ontology terms for the given ontology name. Load from the file cache if available. 106 107 Does not support "additional ontologies" of another ontology. 108 109 :param name: str name of the ontology to get the terms for 110 :return: dict representation of the ontology terms 111 """ 112 if name not in self.ontology_file_names: 113 if getattr(Ontology, name, None) is None: 114 raise ValueError(f"Ontology {name} is not supported in this package version.") 115 116 try: 117 onto_version = self.supported_ontologies[name]["version"] 118 except KeyError as e: 119 raise ValueError(f"Ontology {name} is not supported for schema version {self.version}") from e 120 file_name = f"{name}-ontology-{onto_version}{ONTOLOGY_FILENAME_SUFFIX}" 121 self.ontology_file_names[name] = file_name # save to file name to access from cache 122 return load_ontology_file(self.ontology_file_names[name]) 123 124 def get_ontology_download_url(self, ontology: Ontology) -> str: 125 """ 126 Get the download URL for a given ontology file. 127 128 When the ontology entry carries a direct ``url`` field (used for ontologies whose 129 distribution does not follow the standard ``{source}/{version}/{filename}`` 130 versioned-path convention), that URL is returned as-is. 131 132 Examples: 133 get_ontology_download_url(Ontology.CL) -> "https://github.com/.../cl.owl" 134 get_ontology_download_url(Ontology.UniProt) -> "https://ftp.uniprot.org/.../uniprot_sprot.xml.gz" 135 136 :param ontology: Ontology enum of the ontology to fetch 137 :return: str download URL for the requested ontology file 138 """ 139 onto_info = self.supported_ontologies[ontology.name] 140 if direct_url := onto_info.get("url"): 141 return str(direct_url) 142 source_url = str(onto_info["source"]) 143 version = str(onto_info["version"]) 144 filename = str(onto_info["filename"]) 145 return source_url.replace("{version}", version).replace("{filename}", filename)
A class to represent the ontology information used by a cellxgene schema version.
71 def __init__(self, version: Optional[str] = None): 72 """ 73 74 :param version: The schema version to use. If not provided, the latest schema version will be used. 75 """ 76 ontology_info = load_supported_versions() 77 if version is None: 78 _version = get_latest_schema_version(ontology_info.keys()) 79 else: 80 _version = str(coerce_version(version)) 81 if str(_version) not in ontology_info: 82 raise ValueError(f"Schema version {_version} is not supported in this package version.") 83 84 self.version = _version 85 self.supported_ontologies = ontology_info[_version]["ontologies"] 86 self.imported_ontologies = { 87 imported_ontology: ontology 88 for ontology, info in self.supported_ontologies.items() 89 for imported_ontology in info.get("additional_ontologies", []) 90 } 91 self.cross_ontology_mappings = { 92 ontology for ontology, info in self.supported_ontologies.items() if info.get("cross_ontology_mapping") 93 } 94 self.ontology_file_names: Dict[str, str] = {} 95 self.deprecated_on = ontology_info[_version].get("deprecated_on") 96 if self.deprecated_on: 97 parsed_date = datetime.strptime(self.deprecated_on, "%Y-%m-%d") 98 warnings.warn( 99 f"Schema version {_version} is deprecated as of {parsed_date}. It will be removed in a future version.", 100 DeprecationWarning, 101 stacklevel=1, 102 )
Parameters
- version: The schema version to use. If not provided, the latest schema version will be used.
In our supported ontologies, the CxG schema can support terms imported from different ontologies. This dictionary maps these 'additional ontologies' to their supported ontology name. For example, for ZFS ontology terms imported into the ZFA ontology, imported_ontologies would be {"ZFS":"ZFA", ...}
A dictionary of ontology names and their corresponding file names.
104 def ontology(self, name: str) -> Any: 105 """Return the ontology terms for the given ontology name. Load from the file cache if available. 106 107 Does not support "additional ontologies" of another ontology. 108 109 :param name: str name of the ontology to get the terms for 110 :return: dict representation of the ontology terms 111 """ 112 if name not in self.ontology_file_names: 113 if getattr(Ontology, name, None) is None: 114 raise ValueError(f"Ontology {name} is not supported in this package version.") 115 116 try: 117 onto_version = self.supported_ontologies[name]["version"] 118 except KeyError as e: 119 raise ValueError(f"Ontology {name} is not supported for schema version {self.version}") from e 120 file_name = f"{name}-ontology-{onto_version}{ONTOLOGY_FILENAME_SUFFIX}" 121 self.ontology_file_names[name] = file_name # save to file name to access from cache 122 return load_ontology_file(self.ontology_file_names[name])
Return the ontology terms for the given ontology name. Load from the file cache if available.
Does not support "additional ontologies" of another ontology.
Parameters
- name: str name of the ontology to get the terms for
Returns
dict representation of the ontology terms
124 def get_ontology_download_url(self, ontology: Ontology) -> str: 125 """ 126 Get the download URL for a given ontology file. 127 128 When the ontology entry carries a direct ``url`` field (used for ontologies whose 129 distribution does not follow the standard ``{source}/{version}/{filename}`` 130 versioned-path convention), that URL is returned as-is. 131 132 Examples: 133 get_ontology_download_url(Ontology.CL) -> "https://github.com/.../cl.owl" 134 get_ontology_download_url(Ontology.UniProt) -> "https://ftp.uniprot.org/.../uniprot_sprot.xml.gz" 135 136 :param ontology: Ontology enum of the ontology to fetch 137 :return: str download URL for the requested ontology file 138 """ 139 onto_info = self.supported_ontologies[ontology.name] 140 if direct_url := onto_info.get("url"): 141 return str(direct_url) 142 source_url = str(onto_info["source"]) 143 version = str(onto_info["version"]) 144 filename = str(onto_info["filename"]) 145 return source_url.replace("{version}", version).replace("{filename}", filename)
Get the download URL for a given ontology file.
When the ontology entry carries a direct url field (used for ontologies whose
distribution does not follow the standard {source}/{version}/{filename}
versioned-path convention), that URL is returned as-is.
Examples: get_ontology_download_url(Ontology.CL) -> "https://github.com/.../cl.owl" get_ontology_download_url(Ontology.UniProt) -> "https://ftp.uniprot.org/.../uniprot_sprot.xml.gz"
Parameters
- ontology: Ontology enum of the ontology to fetch
Returns
str download URL for the requested ontology file