"""Main classes to add caching features to ``requests.Session``
.. autosummary::
:nosignatures:
CachedSession
CacheMixin
.. Explicitly show inherited method docs on CachedSession instead of CachedMixin
.. autoclass:: requests_cache.session.CachedSession
:show-inheritance:
:inherited-members:
.. autoclass:: requests_cache.session.CacheMixin
"""
from contextlib import contextmanager
from logging import getLogger
from threading import RLock
from typing import TYPE_CHECKING, Callable, Dict, Iterable, Optional, Union
from requests import PreparedRequest, Response
from requests import Session as OriginalSession
from requests.hooks import dispatch_hook
from urllib3 import filepost
from ._utils import get_valid_kwargs
from .backends import KEY_FN, BackendSpecifier, init_backend
from .models import AnyResponse, CachedResponse, set_response_defaults
from .policy import CacheActions, ExpirationTime, get_expiration_seconds
__all__ = ['ALL_METHODS', 'CachedSession', 'CacheMixin']
ALL_METHODS = ['GET', 'HEAD', 'OPTIONS', 'POST', 'PUT', 'PATCH', 'DELETE']
FILTER_FN = Callable[[AnyResponse], bool]
logger = getLogger(__name__)
if TYPE_CHECKING:
MIXIN_BASE = OriginalSession
else:
MIXIN_BASE = object
[docs]class CacheMixin(MIXIN_BASE):
"""Mixin class that extends :py:class:`requests.Session` with caching features.
See :py:class:`.CachedSession` for usage details.
"""
def __init__(
self,
cache_name: str = 'http_cache',
backend: BackendSpecifier = None,
expire_after: ExpirationTime = -1,
urls_expire_after: Dict[str, ExpirationTime] = None,
cache_control: bool = False,
allowable_codes: Iterable[int] = (200,),
allowable_methods: Iterable[str] = ('GET', 'HEAD'),
filter_fn: FILTER_FN = None,
stale_if_error: bool = False,
**kwargs,
):
self.cache = init_backend(cache_name, backend, **kwargs)
self.allowable_codes = allowable_codes
self.allowable_methods = allowable_methods
self.expire_after = expire_after
self.urls_expire_after = urls_expire_after
self.cache_control = cache_control
self.filter_fn = filter_fn or (lambda r: True)
self.stale_if_error = stale_if_error or kwargs.pop('old_data_on_error', False)
self._disabled = False
self._lock = RLock()
# If the superclass is custom Session, pass along any valid kwargs
session_kwargs = get_valid_kwargs(super().__init__, kwargs)
super().__init__(**session_kwargs) # type: ignore
def request( # type: ignore # Note: An extra param (expire_after) is added here
self,
method: str,
url: str,
*args,
expire_after: ExpirationTime = None,
**kwargs,
) -> AnyResponse:
"""This method prepares and sends a request while automatically performing any necessary
caching operations. This will be called by any other method-specific ``requests`` functions
(get, post, etc.). This does not include prepared requests, which will still be cached via
``send()``.
See :py:meth:`requests.Session.request` for parameters. Additional parameters:
Args:
expire_after: Expiration time to set only for this request; see details below.
Overrides ``CachedSession.expire_after``. Accepts all the same values as
``CachedSession.expire_after``. Use ``-1`` to disable expiration.
Returns:
Either a new or cached response
**Order of operations:** For reference, a request will pass through the following methods:
1. :py:func:`requests.get`/:py:meth:`requests.Session.get` or other method-specific functions (optional)
2. :py:meth:`.CachedSession.request`
3. :py:meth:`requests.Session.request`
4. :py:meth:`.CachedSession.send`
5. :py:meth:`.BaseCache.get_response`
6. :py:meth:`requests.Session.send` (if not previously cached)
7. :py:meth:`.BaseCache.save_response` (if not previously cached)
"""
# If present, set per-request expiration as a request header, to be handled in send()
if expire_after is not None:
kwargs.setdefault('headers', {})
kwargs['headers']['Cache-Control'] = f'max-age={get_expiration_seconds(expire_after)}'
with patch_form_boundary(**kwargs):
return super().request(method, url, *args, **kwargs)
def send(
self, request: PreparedRequest, expire_after: ExpirationTime = None, **kwargs
) -> AnyResponse:
"""Send a prepared request, with caching. See :py:meth:`.request` for notes on behavior, and
see :py:meth:`requests.Session.send` for parameters. Additional parameters:
Args:
expire_after: Expiration time to set only for this request
"""
# Determine which actions to take based on request info and cache settings
cache_key = self.cache.create_key(request, **kwargs)
actions = CacheActions.from_request(
cache_key=cache_key,
request=request,
request_expire_after=expire_after,
session_expire_after=self.expire_after,
urls_expire_after=self.urls_expire_after,
cache_control=self.cache_control,
**kwargs,
)
# Attempt to fetch a cached response
cached_response: Optional[CachedResponse] = None
if not (self._disabled or actions.skip_read):
cached_response = self.cache.get_response(cache_key)
actions.update_from_cached_response(cached_response)
is_expired = getattr(cached_response, 'is_expired', False)
# If the response is expired or missing, or the cache is disabled, then fetch a new response
if cached_response is None:
response = self._send_and_cache(request, actions, **kwargs)
elif is_expired and self.stale_if_error:
response = self._resend_and_ignore(request, actions, cached_response, **kwargs)
elif is_expired:
response = self._resend(request, actions, cached_response, **kwargs)
else:
response = cached_response
# If the request has been filtered out and was previously cached, delete it
if not self.filter_fn(response):
logger.debug(f'Deleting filtered response for URL: {response.url}')
self.cache.delete(cache_key)
return response
# Dispatch any hooks here, because they are removed before pickling
return dispatch_hook('response', request.hooks, response, **kwargs)
def _is_cacheable(self, response: Response, actions: CacheActions) -> bool:
"""Perform all checks needed to determine if the given response should be saved to the cache"""
cache_criteria = {
'disabled cache': self._disabled,
'disabled method': str(response.request.method) not in self.allowable_methods,
'disabled status': response.status_code not in self.allowable_codes,
'disabled by filter': not self.filter_fn(response),
'disabled by headers or expiration params': actions.skip_write,
}
logger.debug(f'Pre-cache checks for response from {response.url}: {cache_criteria}')
return not any(cache_criteria.values())
def _send_and_cache(
self,
request: PreparedRequest,
actions: CacheActions,
cached_response: CachedResponse = None,
**kwargs,
) -> AnyResponse:
"""Send the request and cache the response, unless disabled by settings or headers.
If applicable, also add headers to make a conditional request. If we get a 304 Not Modified
response, return the stale cache item.
"""
request.headers.update(actions.validation_headers)
response = super().send(request, **kwargs)
actions.update_from_response(response)
if self._is_cacheable(response, actions):
self.cache.save_response(response, actions.cache_key, actions.expires)
elif cached_response and response.status_code == 304:
return self._update_revalidated_response(actions, response, cached_response)
else:
logger.debug(f'Skipping cache write for URL: {request.url}')
return set_response_defaults(response, actions.cache_key)
def _resend(
self,
request: PreparedRequest,
actions: CacheActions,
cached_response: CachedResponse,
**kwargs,
) -> AnyResponse:
"""Attempt to resend the request and cache the new response. If the request fails, delete
the stale cache item.
"""
logger.debug('Stale response; attempting to re-send request')
try:
return self._send_and_cache(request, actions, cached_response, **kwargs)
except Exception:
self.cache.delete(actions.cache_key)
raise
def _resend_and_ignore(
self,
request: PreparedRequest,
actions: CacheActions,
cached_response: CachedResponse,
**kwargs,
) -> AnyResponse:
"""Attempt to resend the request and cache the new response. If there are any errors, ignore
them and and return the stale cache item.
"""
# Attempt to send the request and cache the new response
logger.debug('Stale response; attempting to re-send request')
try:
response = self._send_and_cache(request, actions, cached_response, **kwargs)
response.raise_for_status()
return response
except Exception:
logger.warning(
f'Request for URL {request.url} failed; using cached response', exc_info=True
)
return cached_response
def _update_revalidated_response(
self, actions: CacheActions, response: Response, cached_response: CachedResponse
) -> CachedResponse:
"""After revalidation, update the cached response's headers and reset its expiration"""
logger.debug(
f'Response for URL {response.request.url} has not been modified; updating and using cached response'
)
cached_response.headers.update(response.headers)
actions.update_from_response(cached_response)
cached_response.expires = actions.expires
self.cache.save_response(cached_response, actions.cache_key, actions.expires)
return cached_response
@contextmanager
def cache_disabled(self):
"""
Context manager for temporary disabling the cache
.. warning:: This method is not thread-safe.
Example:
>>> s = CachedSession()
>>> with s.cache_disabled():
... s.get('http://httpbin.org/ip')
"""
if self._disabled:
yield
else:
self._disabled = True
try:
yield
finally:
self._disabled = False
def remove_expired_responses(self, expire_after: ExpirationTime = None):
# Deprecated; will be replaced by CachedSession.cache.delete(expired=True)
self.cache.remove_expired_responses(expire_after)
def __getstate__(self):
# Unlike requests.Session, CachedSession may contain backend connection objects that can't
# be pickled. Support for this could be added if necessary, but for now it's explicitly
# disabled to avoid confusing errors upon unpickling.
raise NotImplementedError('CachedSession cannot be pickled')
def __repr__(self):
repr_attrs = [
'cache',
'expire_after',
'urls_expire_after',
'allowable_codes',
'allowable_methods',
'stale_if_error',
'cache_control',
]
attr_strs = [f'{k}={repr(getattr(self, k))}' for k in repr_attrs]
return f'<CachedSession({", ".join(attr_strs)})>'
# The following properties exist for partial forwards-compatibility with CacheSettings in 1.0
# All settings will be settable via CachedSession.settings, instead of being split between
# CachedSession and BaseCache.
@property
def settings(self):
return self
@property
def ignored_parameters(self) -> Iterable[str]:
return self.cache.ignored_parameters or []
@ignored_parameters.setter
def ignored_parameters(self, value: Iterable[str]):
self.cache.ignored_parameters = value
@property
def match_headers(self) -> Union[Iterable[str], bool]:
return self.cache.match_headers or []
@match_headers.setter
def match_headers(self, value: Union[Iterable[str], bool]):
self.cache.match_headers = value
@property
def key_fn(self) -> KEY_FN:
return self.cache.key_fn or []
@key_fn.setter
def key_fn(self, value: KEY_FN):
self.cache.key_fn = value
[docs]class CachedSession(CacheMixin, OriginalSession):
"""Session class that extends :py:class:`requests.Session` with caching features.
See individual :py:mod:`backend classes <requests_cache.backends>` for additional backend-specific arguments.
Also see :ref:`user-guide` for more details and examples on how the following arguments
affect cache behavior.
Args:
cache_name: Cache prefix or namespace, depending on backend
backend: Cache backend name or instance; name may be one of
``['sqlite', 'filesystem', 'mongodb', 'gridfs', 'redis', 'dynamodb', 'memory']``
serializer: Serializer name or instance; name may be one of
``['pickle', 'json', 'yaml', 'bson']``.
expire_after: Time after which cached items will expire
urls_expire_after: Expiration times to apply for different URL patterns
cache_control: Use Cache-Control headers to set expiration
allowable_codes: Only cache responses with one of these status codes
allowable_methods: Cache only responses for one of these HTTP methods
match_headers: Match request headers when reading from the cache; may be either a boolean
or a list of specific headers to match
ignored_parameters: List of request parameters to not match against, and exclude from the cache
filter_fn: Function that takes a :py:class:`~requests.Response` object and returns a boolean
indicating whether or not that response should be cached. Will be applied to both new
and previously cached responses.
key_fn: Function for generating custom cache keys based on request info
stale_if_error: Return stale cache data if a new request raises an exception
"""
@contextmanager
def patch_form_boundary(**request_kwargs):
"""If the ``files`` param is present, patch the form boundary used to separate multipart
uploads. ``requests`` does not provide a way to pass a custom boundary to urllib3, so this just
monkey-patches it instead.
"""
if request_kwargs.get('files'):
original_boundary = filepost.choose_boundary
filepost.choose_boundary = lambda: '##requests-cache-form-boundary##'
yield
filepost.choose_boundary = original_boundary
else:
yield