Source code for requests_cache.backends.filesystem

from contextlib import contextmanager
from glob import glob
from os import listdir, makedirs, unlink
from os.path import abspath, basename, expanduser, isabs, join, splitext
from pathlib import Path
from pickle import PickleError
from shutil import rmtree
from tempfile import gettempdir
from typing import List, Union

from ..serializers import SERIALIZERS
from . import BaseCache, BaseStorage
from .sqlite import DbDict


[docs]class FileCache(BaseCache): """Backend that stores cached responses as files on the local filesystem. Response paths will be in the format ``<cache_name>/responses/<cache_key>``. Redirects are stored in a SQLite database, located at ``<cache_name>/redirects.sqlite``. Args: cache_name: Base directory for cache files use_temp: Store cache files in a temp directory (e.g., ``/tmp/http_cache/``). Note: if ``cache_name`` is an absolute path, this option will be ignored. extension: Extension for cache files. If not specified, the serializer default extension will be used. """ def __init__(self, cache_name: Union[Path, str] = 'http_cache', use_temp: bool = False, **kwargs): super().__init__(**kwargs) self.responses = FileDict(cache_name, use_temp=use_temp, **kwargs) db_path = join(self.responses.cache_dir, 'redirects.sqlite') self.redirects = DbDict(db_path, 'redirects', **kwargs)
[docs] def clear(self): """Clear the cache""" # FileDict.clear() removes and re-creates the cache directory, including redirects.sqlite self.responses.clear() self.redirects.init_db()
[docs]class FileDict(BaseStorage): """A dictionary-like interface to files on the local filesystem""" def __init__(self, cache_name, use_temp: bool = False, extension: str = None, **kwargs): super().__init__(**kwargs) self.cache_dir = _get_cache_dir(cache_name, use_temp) self.extension = extension if extension is not None else _get_default_ext(self.serializer) self.is_binary = False makedirs(self.cache_dir, exist_ok=True) @contextmanager def _try_io(self, ignore_errors: bool = False): """Attempt an I/O operation, and either ignore errors or re-raise them as KeyErrors""" try: yield except (IOError, OSError, PickleError) as e: if not ignore_errors: raise KeyError(e) def _path(self, key): ext = f'.{self.extension}' if self.extension else '' return join(self.cache_dir, f'{key}{ext}') def __getitem__(self, key): mode = 'rb' if self.is_binary else 'r' with self._try_io(): try: with open(self._path(key), mode) as f: return self.serializer.loads(f.read()) except UnicodeDecodeError: self.is_binary = True return self.__getitem__(key) def __delitem__(self, key): with self._try_io(): unlink(self._path(key)) def __setitem__(self, key, value): serialized_value = self.serializer.dumps(value) if isinstance(serialized_value, bytes): self.is_binary = True mode = 'wb' if self.is_binary else 'w' with self._try_io(): with open(self._path(key), mode) as f: f.write(self.serializer.dumps(value)) def __iter__(self): yield from self.keys() def __len__(self): return len(listdir(self.cache_dir))
[docs] def clear(self): with self._try_io(ignore_errors=True): rmtree(self.cache_dir, ignore_errors=True) makedirs(self.cache_dir)
[docs] def keys(self): return [splitext(basename(path))[0] for path in self.paths()]
[docs] def paths(self) -> List[str]: """Get absolute file paths to all cached responses""" return glob(self._path('*'))
def _get_cache_dir(cache_dir: Union[Path, str], use_temp: bool) -> str: # Save to a temp directory, if specified if use_temp and not isabs(cache_dir): cache_dir = join(gettempdir(), cache_dir, 'responses') # Expand relative and user paths (~/*), and make sure parent dirs exist cache_dir = abspath(expanduser(str(cache_dir))) makedirs(cache_dir, exist_ok=True) return cache_dir def _get_default_ext(serializer) -> str: for k, v in SERIALIZERS.items(): if serializer is v: return k.replace('pickle', 'pkl') return ''