Source code for czbenchmarks.file_cache

"""
File caching utilities for remote storage operations.

This module provides functions for caching datasets and model outputs to/from remote storage,
primarily AWS S3. It includes functionality for downloading, uploading, and managing cached
processed datasets.
"""

import argparse
import logging

from pydantic import BaseModel


log = logging.getLogger(__name__)


[docs] class CacheOptions(BaseModel): """ Configuration options for caching datasets and model outputs. Attributes: download_embeddings (bool): Whether to download embeddings from the remote cache. upload_embeddings (bool): Whether to upload embeddings to the remote cache. upload_results (bool): Whether to upload results to the remote cache. remote_cache_url (str): URL of the remote cache. """ download_embeddings: bool upload_embeddings: bool upload_results: bool remote_cache_url: str
[docs] @classmethod def from_args(cls, args: argparse.Namespace) -> "CacheOptions": remote_cache_url = args.remote_cache_url or "" return cls( remote_cache_url=remote_cache_url, download_embeddings=bool(remote_cache_url) and args.remote_cache_download_embeddings, upload_embeddings=bool(remote_cache_url) and args.remote_cache_upload_embeddings, upload_results=bool(remote_cache_url) and args.remote_cache_upload_results, )