Source code for czbenchmarks.models.validators.base_single_cell_model_validator

from typing import ClassVar, List

from ...datasets import SingleCellDataset, Organism
from .base_model_validator import BaseModelValidator


[docs] class BaseSingleCellValidator(BaseModelValidator): """Base validator for single-cell models. Defines common validation logic for single-cell models, including: - Organism compatibility checking - Required metadata validation - AnnData observation and variable key validation """ dataset_type = SingleCellDataset available_organisms: ClassVar[List[Organism]] required_obs_keys: ClassVar[List[str]] required_var_keys: ClassVar[List[str]]
[docs] def __init_subclass__(cls) -> None: """Validate required class variables in child classes. Ensures child classes define: - available_organisms - required_obs_keys - required_var_keys Raises: TypeError: If any required class variable is missing """ super().__init_subclass__() if not hasattr(cls, "available_organisms"): raise TypeError( f"Can't instantiate {cls.__name__} " "without available_organisms class variable" ) if not hasattr(cls, "required_obs_keys"): raise TypeError( f"Can't instantiate {cls.__name__} " "without required_obs_keys class variable" ) if not hasattr(cls, "required_var_keys"): raise TypeError( f"Can't instantiate {cls.__name__} " "without required_var_keys class variable" )
def _validate_dataset(self, dataset: SingleCellDataset): """Validate a single-cell dataset. Checks: 1. Dataset organism is supported 2. Required observation keys are present 3. Required variable keys are present Args: dataset: SingleCellDataset to validate Raises: ValueError: If validation fails """ if dataset.organism not in self.available_organisms: raise ValueError( f"Dataset organism {dataset.organism} " "is not supported for {self.__class__.__name__}" ) missing_keys = [ key for key in self.required_obs_keys if key not in dataset.adata.obs.columns ] if missing_keys: raise ValueError(f"Missing required obs keys: {missing_keys}") missing_keys = [ key for key in self.required_var_keys if key not in dataset.adata.var.columns ] if missing_keys: raise ValueError(f"Missing required var keys: {missing_keys}")