cellxgene_ontology_guide.supported_versions

  1import functools
  2import json
  3import os
  4import warnings
  5from datetime import datetime
  6from typing import Any, Dict, List, Optional
  7
  8import zstandard as zstd
  9from semantic_version import Version
 10
 11from cellxgene_ontology_guide._constants import DATA_ROOT, ONTOLOGY_FILENAME_SUFFIX, ONTOLOGY_INFO_FILENAME
 12from cellxgene_ontology_guide.entities import Ontology
 13
 14
 15@functools.cache
 16def load_ontology_file(file_name: str) -> Any:
 17    """Load the ontology file from the data directory and return it as a dict."""
 18    file_path = os.path.join(DATA_ROOT, file_name)
 19    with open(file_path, "rb") as f:
 20        dctx = zstd.ZstdDecompressor()
 21        decompressed = dctx.stream_reader(f)
 22        return json.load(decompressed)
 23
 24
 25def clear_ontology_file_cache() -> None:
 26    """Clear the cache for the load_ontology_file function."""
 27    load_ontology_file.cache_clear()
 28
 29
 30def get_latest_schema_version(versions: List[str]) -> str:
 31    """Given a list of schema versions, return the latest version.
 32
 33    :param versions: List[str] list of schema versions. Versions can be in the format "v5.0.0" or "5.0.0"
 34    :return: str latest version without the leading "v"
 35    """
 36
 37    return str(sorted([coerce_version(version) for version in versions])[-1])
 38
 39
 40def coerce_version(version: str) -> Version:
 41    """Coerce a version string into a semantic_version.Version object.
 42
 43    :param version: str version string to coerce
 44    :return: Version coerced version object
 45    """
 46    v = version[1:] if version[0] == "v" else version
 47    return Version.coerce(v)
 48
 49
 50def load_supported_versions() -> Any:
 51    """Load the ontology_info.json file and return it as a dict."""
 52    with open(os.path.join(DATA_ROOT, ONTOLOGY_INFO_FILENAME)) as f:
 53        return json.load(f)
 54
 55
 56class CXGSchema:
 57    """A class to represent the ontology information used by a cellxgene schema version."""
 58
 59    version: str
 60    """The schema version used by the class instance."""
 61    supported_ontologies: Dict[str, Any]
 62    """A dictionary of supported ontologies for the schema version."""
 63    imported_ontologies: Dict[str, str]
 64    """In our supported ontologies, the CxG schema can support terms imported from different ontologies. 
 65    This dictionary maps these 'additional ontologies' to their supported ontology name. For example, 
 66    for ZFS ontology terms imported into the ZFA ontology, imported_ontologies would be {"ZFS":"ZFA", ...}"""
 67    ontology_file_names: Dict[str, str]
 68    """A dictionary of ontology names and their corresponding file names."""
 69
 70    def __init__(self, version: Optional[str] = None):
 71        """
 72
 73        :param version: The schema version to use. If not provided, the latest schema version will be used.
 74        """
 75        ontology_info = load_supported_versions()
 76        if version is None:
 77            _version = get_latest_schema_version(ontology_info.keys())
 78        else:
 79            _version = str(coerce_version(version))
 80            if str(_version) not in ontology_info:
 81                raise ValueError(f"Schema version {_version} is not supported in this package version.")
 82
 83        self.version = _version
 84        self.supported_ontologies = ontology_info[_version]["ontologies"]
 85        self.imported_ontologies = {
 86            imported_ontology: ontology
 87            for ontology, info in self.supported_ontologies.items()
 88            for imported_ontology in info.get("additional_ontologies", [])
 89        }
 90        self.cross_ontology_mappings = {
 91            ontology for ontology, info in self.supported_ontologies.items() if info.get("cross_ontology_mapping")
 92        }
 93        self.ontology_file_names: Dict[str, str] = {}
 94        self.deprecated_on = ontology_info[_version].get("deprecated_on")
 95        if self.deprecated_on:
 96            parsed_date = datetime.strptime(self.deprecated_on, "%Y-%m-%d")
 97            warnings.warn(
 98                f"Schema version {_version} is deprecated as of {parsed_date}. It will be removed in a future version.",
 99                DeprecationWarning,
100                stacklevel=1,
101            )
102
103    def ontology(self, name: str) -> Any:
104        """Return the ontology terms for the given ontology name. Load from the file cache if available.
105
106        Does not support "additional ontologies" of another ontology.
107
108        :param name: str name of the ontology to get the terms for
109        :return: dict representation of the ontology terms
110        """
111        if name not in self.ontology_file_names:
112            if getattr(Ontology, name, None) is None:
113                raise ValueError(f"Ontology {name} is not supported in this package version.")
114
115            try:
116                onto_version = self.supported_ontologies[name]["version"]
117            except KeyError as e:
118                raise ValueError(f"Ontology {name} is not supported for schema version {self.version}") from e
119            file_name = f"{name}-ontology-{onto_version}{ONTOLOGY_FILENAME_SUFFIX}"
120            self.ontology_file_names[name] = file_name  # save to file name to access from cache
121        return load_ontology_file(self.ontology_file_names[name])
122
123    def get_ontology_download_url(self, ontology: Ontology) -> str:
124        """
125        Get the download URL for a given ontology file.
126
127        When the ontology entry carries a direct ``url`` field (used for ontologies whose
128        distribution does not follow the standard ``{source}/{version}/{filename}``
129        versioned-path convention), that URL is returned as-is.
130
131        Examples:
132        get_ontology_download_url(Ontology.CL) -> "https://github.com/.../cl.owl"
133        get_ontology_download_url(Ontology.UniProt) -> "https://ftp.uniprot.org/.../uniprot_sprot.xml.gz"
134
135        :param ontology: Ontology enum of the ontology to fetch
136        :return: str download URL for the requested ontology file
137        """
138        onto_info = self.supported_ontologies[ontology.name]
139        if direct_url := onto_info.get("url"):
140            return str(direct_url)
141        source_url = str(onto_info["source"])
142        version = str(onto_info["version"])
143        filename = str(onto_info["filename"])
144        return source_url.replace("{version}", version).replace("{filename}", filename)
@functools.cache
def load_ontology_file(file_name: str) -> Any:
16@functools.cache
17def load_ontology_file(file_name: str) -> Any:
18    """Load the ontology file from the data directory and return it as a dict."""
19    file_path = os.path.join(DATA_ROOT, file_name)
20    with open(file_path, "rb") as f:
21        dctx = zstd.ZstdDecompressor()
22        decompressed = dctx.stream_reader(f)
23        return json.load(decompressed)

Load the ontology file from the data directory and return it as a dict.

def clear_ontology_file_cache() -> None:
26def clear_ontology_file_cache() -> None:
27    """Clear the cache for the load_ontology_file function."""
28    load_ontology_file.cache_clear()

Clear the cache for the load_ontology_file function.

def get_latest_schema_version(versions: List[str]) -> str:
31def get_latest_schema_version(versions: List[str]) -> str:
32    """Given a list of schema versions, return the latest version.
33
34    :param versions: List[str] list of schema versions. Versions can be in the format "v5.0.0" or "5.0.0"
35    :return: str latest version without the leading "v"
36    """
37
38    return str(sorted([coerce_version(version) for version in versions])[-1])

Given a list of schema versions, return the latest version.

Parameters
  • versions: List[str] list of schema versions. Versions can be in the format "v5.0.0" or "5.0.0"
Returns

str latest version without the leading "v"

def coerce_version(version: str) -> semantic_version.base.Version:
41def coerce_version(version: str) -> Version:
42    """Coerce a version string into a semantic_version.Version object.
43
44    :param version: str version string to coerce
45    :return: Version coerced version object
46    """
47    v = version[1:] if version[0] == "v" else version
48    return Version.coerce(v)

Coerce a version string into a semantic_version.Version object.

Parameters
  • version: str version string to coerce
Returns

Version coerced version object

def load_supported_versions() -> Any:
51def load_supported_versions() -> Any:
52    """Load the ontology_info.json file and return it as a dict."""
53    with open(os.path.join(DATA_ROOT, ONTOLOGY_INFO_FILENAME)) as f:
54        return json.load(f)

Load the ontology_info.json file and return it as a dict.

class CXGSchema:
 57class CXGSchema:
 58    """A class to represent the ontology information used by a cellxgene schema version."""
 59
 60    version: str
 61    """The schema version used by the class instance."""
 62    supported_ontologies: Dict[str, Any]
 63    """A dictionary of supported ontologies for the schema version."""
 64    imported_ontologies: Dict[str, str]
 65    """In our supported ontologies, the CxG schema can support terms imported from different ontologies. 
 66    This dictionary maps these 'additional ontologies' to their supported ontology name. For example, 
 67    for ZFS ontology terms imported into the ZFA ontology, imported_ontologies would be {"ZFS":"ZFA", ...}"""
 68    ontology_file_names: Dict[str, str]
 69    """A dictionary of ontology names and their corresponding file names."""
 70
 71    def __init__(self, version: Optional[str] = None):
 72        """
 73
 74        :param version: The schema version to use. If not provided, the latest schema version will be used.
 75        """
 76        ontology_info = load_supported_versions()
 77        if version is None:
 78            _version = get_latest_schema_version(ontology_info.keys())
 79        else:
 80            _version = str(coerce_version(version))
 81            if str(_version) not in ontology_info:
 82                raise ValueError(f"Schema version {_version} is not supported in this package version.")
 83
 84        self.version = _version
 85        self.supported_ontologies = ontology_info[_version]["ontologies"]
 86        self.imported_ontologies = {
 87            imported_ontology: ontology
 88            for ontology, info in self.supported_ontologies.items()
 89            for imported_ontology in info.get("additional_ontologies", [])
 90        }
 91        self.cross_ontology_mappings = {
 92            ontology for ontology, info in self.supported_ontologies.items() if info.get("cross_ontology_mapping")
 93        }
 94        self.ontology_file_names: Dict[str, str] = {}
 95        self.deprecated_on = ontology_info[_version].get("deprecated_on")
 96        if self.deprecated_on:
 97            parsed_date = datetime.strptime(self.deprecated_on, "%Y-%m-%d")
 98            warnings.warn(
 99                f"Schema version {_version} is deprecated as of {parsed_date}. It will be removed in a future version.",
100                DeprecationWarning,
101                stacklevel=1,
102            )
103
104    def ontology(self, name: str) -> Any:
105        """Return the ontology terms for the given ontology name. Load from the file cache if available.
106
107        Does not support "additional ontologies" of another ontology.
108
109        :param name: str name of the ontology to get the terms for
110        :return: dict representation of the ontology terms
111        """
112        if name not in self.ontology_file_names:
113            if getattr(Ontology, name, None) is None:
114                raise ValueError(f"Ontology {name} is not supported in this package version.")
115
116            try:
117                onto_version = self.supported_ontologies[name]["version"]
118            except KeyError as e:
119                raise ValueError(f"Ontology {name} is not supported for schema version {self.version}") from e
120            file_name = f"{name}-ontology-{onto_version}{ONTOLOGY_FILENAME_SUFFIX}"
121            self.ontology_file_names[name] = file_name  # save to file name to access from cache
122        return load_ontology_file(self.ontology_file_names[name])
123
124    def get_ontology_download_url(self, ontology: Ontology) -> str:
125        """
126        Get the download URL for a given ontology file.
127
128        When the ontology entry carries a direct ``url`` field (used for ontologies whose
129        distribution does not follow the standard ``{source}/{version}/{filename}``
130        versioned-path convention), that URL is returned as-is.
131
132        Examples:
133        get_ontology_download_url(Ontology.CL) -> "https://github.com/.../cl.owl"
134        get_ontology_download_url(Ontology.UniProt) -> "https://ftp.uniprot.org/.../uniprot_sprot.xml.gz"
135
136        :param ontology: Ontology enum of the ontology to fetch
137        :return: str download URL for the requested ontology file
138        """
139        onto_info = self.supported_ontologies[ontology.name]
140        if direct_url := onto_info.get("url"):
141            return str(direct_url)
142        source_url = str(onto_info["source"])
143        version = str(onto_info["version"])
144        filename = str(onto_info["filename"])
145        return source_url.replace("{version}", version).replace("{filename}", filename)

A class to represent the ontology information used by a cellxgene schema version.

CXGSchema(version: Optional[str] = None)
 71    def __init__(self, version: Optional[str] = None):
 72        """
 73
 74        :param version: The schema version to use. If not provided, the latest schema version will be used.
 75        """
 76        ontology_info = load_supported_versions()
 77        if version is None:
 78            _version = get_latest_schema_version(ontology_info.keys())
 79        else:
 80            _version = str(coerce_version(version))
 81            if str(_version) not in ontology_info:
 82                raise ValueError(f"Schema version {_version} is not supported in this package version.")
 83
 84        self.version = _version
 85        self.supported_ontologies = ontology_info[_version]["ontologies"]
 86        self.imported_ontologies = {
 87            imported_ontology: ontology
 88            for ontology, info in self.supported_ontologies.items()
 89            for imported_ontology in info.get("additional_ontologies", [])
 90        }
 91        self.cross_ontology_mappings = {
 92            ontology for ontology, info in self.supported_ontologies.items() if info.get("cross_ontology_mapping")
 93        }
 94        self.ontology_file_names: Dict[str, str] = {}
 95        self.deprecated_on = ontology_info[_version].get("deprecated_on")
 96        if self.deprecated_on:
 97            parsed_date = datetime.strptime(self.deprecated_on, "%Y-%m-%d")
 98            warnings.warn(
 99                f"Schema version {_version} is deprecated as of {parsed_date}. It will be removed in a future version.",
100                DeprecationWarning,
101                stacklevel=1,
102            )
Parameters
  • version: The schema version to use. If not provided, the latest schema version will be used.
version: str

The schema version used by the class instance.

supported_ontologies: Dict[str, Any]

A dictionary of supported ontologies for the schema version.

imported_ontologies: Dict[str, str]

In our supported ontologies, the CxG schema can support terms imported from different ontologies. This dictionary maps these 'additional ontologies' to their supported ontology name. For example, for ZFS ontology terms imported into the ZFA ontology, imported_ontologies would be {"ZFS":"ZFA", ...}

ontology_file_names: Dict[str, str]

A dictionary of ontology names and their corresponding file names.

cross_ontology_mappings
deprecated_on
def ontology(self, name: str) -> Any:
104    def ontology(self, name: str) -> Any:
105        """Return the ontology terms for the given ontology name. Load from the file cache if available.
106
107        Does not support "additional ontologies" of another ontology.
108
109        :param name: str name of the ontology to get the terms for
110        :return: dict representation of the ontology terms
111        """
112        if name not in self.ontology_file_names:
113            if getattr(Ontology, name, None) is None:
114                raise ValueError(f"Ontology {name} is not supported in this package version.")
115
116            try:
117                onto_version = self.supported_ontologies[name]["version"]
118            except KeyError as e:
119                raise ValueError(f"Ontology {name} is not supported for schema version {self.version}") from e
120            file_name = f"{name}-ontology-{onto_version}{ONTOLOGY_FILENAME_SUFFIX}"
121            self.ontology_file_names[name] = file_name  # save to file name to access from cache
122        return load_ontology_file(self.ontology_file_names[name])

Return the ontology terms for the given ontology name. Load from the file cache if available.

Does not support "additional ontologies" of another ontology.

Parameters
  • name: str name of the ontology to get the terms for
Returns

dict representation of the ontology terms

def get_ontology_download_url(self, ontology: cellxgene_ontology_guide.entities.Ontology) -> str:
124    def get_ontology_download_url(self, ontology: Ontology) -> str:
125        """
126        Get the download URL for a given ontology file.
127
128        When the ontology entry carries a direct ``url`` field (used for ontologies whose
129        distribution does not follow the standard ``{source}/{version}/{filename}``
130        versioned-path convention), that URL is returned as-is.
131
132        Examples:
133        get_ontology_download_url(Ontology.CL) -> "https://github.com/.../cl.owl"
134        get_ontology_download_url(Ontology.UniProt) -> "https://ftp.uniprot.org/.../uniprot_sprot.xml.gz"
135
136        :param ontology: Ontology enum of the ontology to fetch
137        :return: str download URL for the requested ontology file
138        """
139        onto_info = self.supported_ontologies[ontology.name]
140        if direct_url := onto_info.get("url"):
141            return str(direct_url)
142        source_url = str(onto_info["source"])
143        version = str(onto_info["version"])
144        filename = str(onto_info["filename"])
145        return source_url.replace("{version}", version).replace("{filename}", filename)

Get the download URL for a given ontology file.

When the ontology entry carries a direct url field (used for ontologies whose distribution does not follow the standard {source}/{version}/{filename} versioned-path convention), that URL is returned as-is.

Examples: get_ontology_download_url(Ontology.CL) -> "https://github.com/.../cl.owl" get_ontology_download_url(Ontology.UniProt) -> "https://ftp.uniprot.org/.../uniprot_sprot.xml.gz"

Parameters
  • ontology: Ontology enum of the ontology to fetch
Returns

str download URL for the requested ontology file