cellxgene_ontology_guide.supported_versions

  1import functools
  2import json
  3import os
  4import warnings
  5from datetime import datetime
  6from typing import Any, Dict, List, Optional
  7
  8import zstandard as zstd
  9from semantic_version import Version
 10
 11from cellxgene_ontology_guide._constants import DATA_ROOT, ONTOLOGY_FILENAME_SUFFIX, ONTOLOGY_INFO_FILENAME
 12from cellxgene_ontology_guide.entities import Ontology
 13
 14
 15@functools.cache
 16def load_ontology_file(file_name: str) -> Any:
 17    """Load the ontology file from the data directory and return it as a dict."""
 18    file_path = os.path.join(DATA_ROOT, file_name)
 19    with open(file_path, "rb") as f:
 20        dctx = zstd.ZstdDecompressor()
 21        decompressed = dctx.stream_reader(f)
 22        return json.load(decompressed)
 23
 24
 25def clear_ontology_file_cache() -> None:
 26    """Clear the cache for the load_ontology_file function."""
 27    load_ontology_file.cache_clear()
 28
 29
 30def get_latest_schema_version(versions: List[str]) -> str:
 31    """Given a list of schema versions, return the latest version.
 32
 33    :param versions: List[str] list of schema versions. Versions can be in the format "v5.0.0" or "5.0.0"
 34    :return: str latest version without the leading "v"
 35    """
 36
 37    return str(sorted([coerce_version(version) for version in versions])[-1])
 38
 39
 40def coerce_version(version: str) -> Version:
 41    """Coerce a version string into a semantic_version.Version object.
 42
 43    :param version: str version string to coerce
 44    :return: Version coerced version object
 45    """
 46    v = version[1:] if version[0] == "v" else version
 47    return Version.coerce(v)
 48
 49
 50def load_supported_versions() -> Any:
 51    """Load the ontology_info.json file and return it as a dict."""
 52    with open(os.path.join(DATA_ROOT, ONTOLOGY_INFO_FILENAME)) as f:
 53        return json.load(f)
 54
 55
 56class CXGSchema:
 57    """A class to represent the ontology information used by a cellxgene schema version."""
 58
 59    version: str
 60    """The schema version used by the class instance."""
 61    supported_ontologies: Dict[str, Any]
 62    """A dictionary of supported ontologies for the schema version."""
 63    imported_ontologies: Dict[str, str]
 64    """In our supported ontologies, the CxG schema can support terms imported from different ontologies. 
 65    This dictionary maps these 'additional ontologies' to their supported ontology name. For example, 
 66    for ZFS ontology terms imported into the ZFA ontology, imported_ontologies would be {"ZFS":"ZFA", ...}"""
 67    ontology_file_names: Dict[str, str]
 68    """A dictionary of ontology names and their corresponding file names."""
 69
 70    def __init__(self, version: Optional[str] = None):
 71        """
 72
 73        :param version: The schema version to use. If not provided, the latest schema version will be used.
 74        """
 75        ontology_info = load_supported_versions()
 76        if version is None:
 77            _version = get_latest_schema_version(ontology_info.keys())
 78        else:
 79            _version = str(coerce_version(version))
 80            if str(_version) not in ontology_info:
 81                raise ValueError(f"Schema version {_version} is not supported in this package version.")
 82
 83        self.version = _version
 84        self.supported_ontologies = ontology_info[_version]["ontologies"]
 85        self.imported_ontologies = {
 86            imported_ontology: ontology
 87            for ontology, info in self.supported_ontologies.items()
 88            for imported_ontology in info.get("additional_ontologies", [])
 89        }
 90        self.cross_ontology_mappings = {
 91            ontology for ontology, info in self.supported_ontologies.items() if info.get("cross_ontology_mapping")
 92        }
 93        self.ontology_file_names: Dict[str, str] = {}
 94        self.deprecated_on = ontology_info[_version].get("deprecated_on")
 95        if self.deprecated_on:
 96            parsed_date = datetime.strptime(self.deprecated_on, "%Y-%m-%d")
 97            warnings.warn(
 98                f"Schema version {_version} is deprecated as of {parsed_date}. It will be removed in a future version.",
 99                DeprecationWarning,
100                stacklevel=1,
101            )
102
103    def ontology(self, name: str) -> Any:
104        """Return the ontology terms for the given ontology name. Load from the file cache if available.
105
106        Does not support "additional ontologies" of another ontology.
107
108        :param name: str name of the ontology to get the terms for
109        :return: dict representation of the ontology terms
110        """
111        if name not in self.ontology_file_names:
112            if getattr(Ontology, name, None) is None:
113                raise ValueError(f"Ontology {name} is not supported in this package version.")
114
115            try:
116                onto_version = self.supported_ontologies[name]["version"]
117            except KeyError as e:
118                raise ValueError(f"Ontology {name} is not supported for schema version {self.version}") from e
119            file_name = f"{name}-ontology-{onto_version}{ONTOLOGY_FILENAME_SUFFIX}"
120            self.ontology_file_names[name] = file_name  # save to file name to access from cache
121        return load_ontology_file(self.ontology_file_names[name])
122
123    def get_ontology_download_url(self, ontology: Ontology) -> str:
124        """
125        Get the download URL for a given ontology file.
126
127        Examples:
128        get_ontology_download_url("CL") -> "http://example.com/2024-01-01/cl.owl"
129
130        :param ontology: Ontology enum of the ontology to fetch
131        :return: str download URL for the requested ontology file
132        """
133        source_url = self.supported_ontologies[ontology.name]["source"]
134        version = self.supported_ontologies[ontology.name]["version"]
135        filename = self.supported_ontologies[ontology.name]["filename"]
136        return f"{source_url}/{version}/{filename}"
@functools.cache
def load_ontology_file(file_name: str) -> Any:
16@functools.cache
17def load_ontology_file(file_name: str) -> Any:
18    """Load the ontology file from the data directory and return it as a dict."""
19    file_path = os.path.join(DATA_ROOT, file_name)
20    with open(file_path, "rb") as f:
21        dctx = zstd.ZstdDecompressor()
22        decompressed = dctx.stream_reader(f)
23        return json.load(decompressed)

Load the ontology file from the data directory and return it as a dict.

def clear_ontology_file_cache() -> None:
26def clear_ontology_file_cache() -> None:
27    """Clear the cache for the load_ontology_file function."""
28    load_ontology_file.cache_clear()

Clear the cache for the load_ontology_file function.

def get_latest_schema_version(versions: List[str]) -> str:
31def get_latest_schema_version(versions: List[str]) -> str:
32    """Given a list of schema versions, return the latest version.
33
34    :param versions: List[str] list of schema versions. Versions can be in the format "v5.0.0" or "5.0.0"
35    :return: str latest version without the leading "v"
36    """
37
38    return str(sorted([coerce_version(version) for version in versions])[-1])

Given a list of schema versions, return the latest version.

Parameters
  • versions: List[str] list of schema versions. Versions can be in the format "v5.0.0" or "5.0.0"
Returns

str latest version without the leading "v"

def coerce_version(version: str) -> semantic_version.base.Version:
41def coerce_version(version: str) -> Version:
42    """Coerce a version string into a semantic_version.Version object.
43
44    :param version: str version string to coerce
45    :return: Version coerced version object
46    """
47    v = version[1:] if version[0] == "v" else version
48    return Version.coerce(v)

Coerce a version string into a semantic_version.Version object.

Parameters
  • version: str version string to coerce
Returns

Version coerced version object

def load_supported_versions() -> Any:
51def load_supported_versions() -> Any:
52    """Load the ontology_info.json file and return it as a dict."""
53    with open(os.path.join(DATA_ROOT, ONTOLOGY_INFO_FILENAME)) as f:
54        return json.load(f)

Load the ontology_info.json file and return it as a dict.

class CXGSchema:
 57class CXGSchema:
 58    """A class to represent the ontology information used by a cellxgene schema version."""
 59
 60    version: str
 61    """The schema version used by the class instance."""
 62    supported_ontologies: Dict[str, Any]
 63    """A dictionary of supported ontologies for the schema version."""
 64    imported_ontologies: Dict[str, str]
 65    """In our supported ontologies, the CxG schema can support terms imported from different ontologies. 
 66    This dictionary maps these 'additional ontologies' to their supported ontology name. For example, 
 67    for ZFS ontology terms imported into the ZFA ontology, imported_ontologies would be {"ZFS":"ZFA", ...}"""
 68    ontology_file_names: Dict[str, str]
 69    """A dictionary of ontology names and their corresponding file names."""
 70
 71    def __init__(self, version: Optional[str] = None):
 72        """
 73
 74        :param version: The schema version to use. If not provided, the latest schema version will be used.
 75        """
 76        ontology_info = load_supported_versions()
 77        if version is None:
 78            _version = get_latest_schema_version(ontology_info.keys())
 79        else:
 80            _version = str(coerce_version(version))
 81            if str(_version) not in ontology_info:
 82                raise ValueError(f"Schema version {_version} is not supported in this package version.")
 83
 84        self.version = _version
 85        self.supported_ontologies = ontology_info[_version]["ontologies"]
 86        self.imported_ontologies = {
 87            imported_ontology: ontology
 88            for ontology, info in self.supported_ontologies.items()
 89            for imported_ontology in info.get("additional_ontologies", [])
 90        }
 91        self.cross_ontology_mappings = {
 92            ontology for ontology, info in self.supported_ontologies.items() if info.get("cross_ontology_mapping")
 93        }
 94        self.ontology_file_names: Dict[str, str] = {}
 95        self.deprecated_on = ontology_info[_version].get("deprecated_on")
 96        if self.deprecated_on:
 97            parsed_date = datetime.strptime(self.deprecated_on, "%Y-%m-%d")
 98            warnings.warn(
 99                f"Schema version {_version} is deprecated as of {parsed_date}. It will be removed in a future version.",
100                DeprecationWarning,
101                stacklevel=1,
102            )
103
104    def ontology(self, name: str) -> Any:
105        """Return the ontology terms for the given ontology name. Load from the file cache if available.
106
107        Does not support "additional ontologies" of another ontology.
108
109        :param name: str name of the ontology to get the terms for
110        :return: dict representation of the ontology terms
111        """
112        if name not in self.ontology_file_names:
113            if getattr(Ontology, name, None) is None:
114                raise ValueError(f"Ontology {name} is not supported in this package version.")
115
116            try:
117                onto_version = self.supported_ontologies[name]["version"]
118            except KeyError as e:
119                raise ValueError(f"Ontology {name} is not supported for schema version {self.version}") from e
120            file_name = f"{name}-ontology-{onto_version}{ONTOLOGY_FILENAME_SUFFIX}"
121            self.ontology_file_names[name] = file_name  # save to file name to access from cache
122        return load_ontology_file(self.ontology_file_names[name])
123
124    def get_ontology_download_url(self, ontology: Ontology) -> str:
125        """
126        Get the download URL for a given ontology file.
127
128        Examples:
129        get_ontology_download_url("CL") -> "http://example.com/2024-01-01/cl.owl"
130
131        :param ontology: Ontology enum of the ontology to fetch
132        :return: str download URL for the requested ontology file
133        """
134        source_url = self.supported_ontologies[ontology.name]["source"]
135        version = self.supported_ontologies[ontology.name]["version"]
136        filename = self.supported_ontologies[ontology.name]["filename"]
137        return f"{source_url}/{version}/{filename}"

A class to represent the ontology information used by a cellxgene schema version.

CXGSchema(version: Optional[str] = None)
 71    def __init__(self, version: Optional[str] = None):
 72        """
 73
 74        :param version: The schema version to use. If not provided, the latest schema version will be used.
 75        """
 76        ontology_info = load_supported_versions()
 77        if version is None:
 78            _version = get_latest_schema_version(ontology_info.keys())
 79        else:
 80            _version = str(coerce_version(version))
 81            if str(_version) not in ontology_info:
 82                raise ValueError(f"Schema version {_version} is not supported in this package version.")
 83
 84        self.version = _version
 85        self.supported_ontologies = ontology_info[_version]["ontologies"]
 86        self.imported_ontologies = {
 87            imported_ontology: ontology
 88            for ontology, info in self.supported_ontologies.items()
 89            for imported_ontology in info.get("additional_ontologies", [])
 90        }
 91        self.cross_ontology_mappings = {
 92            ontology for ontology, info in self.supported_ontologies.items() if info.get("cross_ontology_mapping")
 93        }
 94        self.ontology_file_names: Dict[str, str] = {}
 95        self.deprecated_on = ontology_info[_version].get("deprecated_on")
 96        if self.deprecated_on:
 97            parsed_date = datetime.strptime(self.deprecated_on, "%Y-%m-%d")
 98            warnings.warn(
 99                f"Schema version {_version} is deprecated as of {parsed_date}. It will be removed in a future version.",
100                DeprecationWarning,
101                stacklevel=1,
102            )
Parameters
  • version: The schema version to use. If not provided, the latest schema version will be used.
version: str

The schema version used by the class instance.

supported_ontologies: Dict[str, Any]

A dictionary of supported ontologies for the schema version.

imported_ontologies: Dict[str, str]

In our supported ontologies, the CxG schema can support terms imported from different ontologies. This dictionary maps these 'additional ontologies' to their supported ontology name. For example, for ZFS ontology terms imported into the ZFA ontology, imported_ontologies would be {"ZFS":"ZFA", ...}

ontology_file_names: Dict[str, str]

A dictionary of ontology names and their corresponding file names.

cross_ontology_mappings
deprecated_on
def ontology(self, name: str) -> Any:
104    def ontology(self, name: str) -> Any:
105        """Return the ontology terms for the given ontology name. Load from the file cache if available.
106
107        Does not support "additional ontologies" of another ontology.
108
109        :param name: str name of the ontology to get the terms for
110        :return: dict representation of the ontology terms
111        """
112        if name not in self.ontology_file_names:
113            if getattr(Ontology, name, None) is None:
114                raise ValueError(f"Ontology {name} is not supported in this package version.")
115
116            try:
117                onto_version = self.supported_ontologies[name]["version"]
118            except KeyError as e:
119                raise ValueError(f"Ontology {name} is not supported for schema version {self.version}") from e
120            file_name = f"{name}-ontology-{onto_version}{ONTOLOGY_FILENAME_SUFFIX}"
121            self.ontology_file_names[name] = file_name  # save to file name to access from cache
122        return load_ontology_file(self.ontology_file_names[name])

Return the ontology terms for the given ontology name. Load from the file cache if available.

Does not support "additional ontologies" of another ontology.

Parameters
  • name: str name of the ontology to get the terms for
Returns

dict representation of the ontology terms

def get_ontology_download_url(self, ontology: cellxgene_ontology_guide.entities.Ontology) -> str:
124    def get_ontology_download_url(self, ontology: Ontology) -> str:
125        """
126        Get the download URL for a given ontology file.
127
128        Examples:
129        get_ontology_download_url("CL") -> "http://example.com/2024-01-01/cl.owl"
130
131        :param ontology: Ontology enum of the ontology to fetch
132        :return: str download URL for the requested ontology file
133        """
134        source_url = self.supported_ontologies[ontology.name]["source"]
135        version = self.supported_ontologies[ontology.name]["version"]
136        filename = self.supported_ontologies[ontology.name]["filename"]
137        return f"{source_url}/{version}/{filename}"

Get the download URL for a given ontology file.

Examples: get_ontology_download_url("CL") -> "http://example.com/2024-01-01/cl.owl"

Parameters
  • ontology: Ontology enum of the ontology to fetch
Returns

str download URL for the requested ontology file