"""Main classes to add caching features to :py:class:`requests.Session`"""
from contextlib import contextmanager, nullcontext
from logging import getLogger
from threading import RLock, Thread
from typing import TYPE_CHECKING, Iterable, MutableMapping, Optional, Union
from requests import PreparedRequest
from requests import Session as OriginalSession
from requests.hooks import dispatch_hook
from urllib3 import filepost
from ._utils import get_valid_kwargs
from .backends import BackendSpecifier, init_backend
from .models import AnyResponse, CachedResponse, OriginalResponse
from .policy import (
DEFAULT_CACHE_NAME,
DEFAULT_IGNORED_PARAMS,
DEFAULT_METHODS,
DEFAULT_STATUS_CODES,
CacheActions,
CacheSettings,
ExpirationPatterns,
ExpirationTime,
FilterCallback,
KeyCallback,
set_request_headers,
)
from .serializers import SerializerType
__all__ = ['CachedSession', 'CacheMixin']
if TYPE_CHECKING:
MIXIN_BASE = OriginalSession
else:
MIXIN_BASE = object
logger = getLogger(__name__)
[docs]class CacheMixin(MIXIN_BASE):
"""Mixin class that extends :py:class:`requests.Session` with caching features.
See :py:class:`.CachedSession` for usage details.
"""
def __init__(
self,
cache_name: str = DEFAULT_CACHE_NAME,
backend: Optional[BackendSpecifier] = None,
serializer: Optional[SerializerType] = None,
expire_after: ExpirationTime = -1,
urls_expire_after: Optional[ExpirationPatterns] = None,
cache_control: bool = False,
allowable_codes: Iterable[int] = DEFAULT_STATUS_CODES,
allowable_methods: Iterable[str] = DEFAULT_METHODS,
always_revalidate: bool = False,
ignored_parameters: Iterable[str] = DEFAULT_IGNORED_PARAMS,
match_headers: Union[Iterable[str], bool] = False,
filter_fn: Optional[FilterCallback] = None,
key_fn: Optional[KeyCallback] = None,
stale_if_error: Union[bool, int] = False,
**kwargs,
):
self.cache = init_backend(cache_name, backend, serializer=serializer, **kwargs)
self.settings = CacheSettings.from_kwargs(
expire_after=expire_after,
urls_expire_after=urls_expire_after,
cache_control=cache_control,
allowable_codes=allowable_codes,
allowable_methods=allowable_methods,
always_revalidate=always_revalidate,
ignored_parameters=ignored_parameters,
match_headers=match_headers,
filter_fn=filter_fn,
key_fn=key_fn,
stale_if_error=stale_if_error,
**kwargs,
)
self._lock = RLock()
# If the mixin superclass is a custom Session, pass along any valid kwargs
super().__init__(**get_valid_kwargs(super().__init__, kwargs)) # type: ignore
@property
def settings(self) -> CacheSettings:
"""Settings that affect cache behavior"""
return self.cache._settings
@settings.setter
def settings(self, value: CacheSettings):
self.cache._settings = value
# For backwards-compatibility
@property
def expire_after(self) -> ExpirationTime:
return self.settings.expire_after
@expire_after.setter
def expire_after(self, value: ExpirationTime):
self.settings.expire_after = value
# Wrapper methods to add return type hints
def get(self, url: str, params=None, **kwargs) -> AnyResponse: # type: ignore
kwargs.setdefault('allow_redirects', True)
return self.request('GET', url, params=params, **kwargs)
def options(self, url: str, **kwargs) -> AnyResponse: # type: ignore
kwargs.setdefault('allow_redirects', True)
return self.request('OPTIONS', url, **kwargs)
def head(self, url: str, **kwargs) -> AnyResponse: # type: ignore
kwargs.setdefault('allow_redirects', False)
return self.request('HEAD', url, **kwargs)
def post(self, url: str, data=None, **kwargs) -> AnyResponse: # type: ignore
return self.request('POST', url, data=data, **kwargs)
def put(self, url: str, data=None, **kwargs) -> AnyResponse: # type: ignore
return self.request('PUT', url, data=data, **kwargs)
def patch(self, url: str, data=None, **kwargs) -> AnyResponse: # type: ignore
return self.request('PATCH', url, data=data, **kwargs)
def delete(self, url: str, **kwargs) -> AnyResponse: # type: ignore
return self.request('DELETE', url, **kwargs)
def request( # type: ignore
self,
method: str,
url: str,
*args,
headers: Optional[MutableMapping[str, str]] = None,
expire_after: ExpirationTime = None,
only_if_cached: bool = False,
refresh: bool = False,
force_refresh: bool = False,
**kwargs,
) -> AnyResponse:
"""This method prepares and sends a request while automatically performing any necessary
caching operations. This will be called by any other method-specific ``requests`` functions
(get, post, etc.). This is not used by :py:class:`~requests.PreparedRequest` objects, which
are handled by :py:meth:`send()`.
See :py:meth:`requests.Session.request` for base parameters. Additional parameters:
Args:
expire_after: Expiration time to set only for this request. See :ref:`expiration` for
details.
only_if_cached: Only return results from the cache. If not cached, return a 504 response
instead of sending a new request.
refresh: Revalidate with the server before using a cached response, and refresh if needed
(e.g., a "soft refresh," like F5 in a browser)
force_refresh: Always make a new request, and overwrite any previously cached response
(e.g., a "hard refresh", like Ctrl-F5 in a browser))
Returns:
Either a new or cached response
"""
headers = set_request_headers(headers, expire_after, only_if_cached, refresh, force_refresh)
with patch_form_boundary() if kwargs.get('files') else nullcontext():
return super().request(method, url, *args, headers=headers, **kwargs) # type: ignore
def send(
self,
request: PreparedRequest,
expire_after: ExpirationTime = None,
only_if_cached: bool = False,
refresh: bool = False,
force_refresh: bool = False,
**kwargs,
) -> AnyResponse:
"""Send a prepared request, with caching. See :py:meth:`requests.Session.send` for base
parameters, and see :py:meth:`.request` for extra parameters.
**Order of operations:** For reference, a request will pass through the following methods:
1. :py:func:`requests.get`, :py:meth:`CachedSession.get`, etc. (optional)
2. :py:meth:`.CachedSession.request`
3. :py:meth:`requests.Session.request`
4. :py:meth:`.CachedSession.send`
5. :py:meth:`.BaseCache.get_response`
6. :py:meth:`requests.Session.send` (if not using a cached response)
7. :py:meth:`.BaseCache.save_response` (if not using a cached response)
"""
# Determine which actions to take based on settings and request info
request.headers = set_request_headers(
request.headers, expire_after, only_if_cached, refresh, force_refresh
)
actions = CacheActions.from_request(
self.cache.create_key(request, **kwargs), request, self.settings
)
# Attempt to fetch a cached response
cached_response: Optional[CachedResponse] = None
if not actions.skip_read:
cached_response = self.cache.get_response(actions.cache_key)
actions.update_from_cached_response(cached_response, self.cache.create_key, **kwargs)
# Handle missing and expired responses based on settings and headers
if actions.error_504:
response: AnyResponse = get_504_response(request)
elif actions.resend_async:
self._resend_async(request, actions, cached_response, **kwargs)
response = cached_response # type: ignore
elif actions.resend_request:
response = self._resend(request, actions, cached_response, **kwargs) # type: ignore
elif actions.send_request:
response = self._send_and_cache(request, actions, cached_response, **kwargs)
else:
response = cached_response # type: ignore # Guaranteed to be non-None by this point
# If the request has been filtered out and was previously cached, delete it
if self.settings.filter_fn is not None and not self.settings.filter_fn(response):
logger.debug(f'Deleting filtered response for URL: {response.url}')
self.cache.delete(actions.cache_key)
return response
# Dispatch any hooks here, because they are removed during serialization
return dispatch_hook('response', request.hooks, response, **kwargs)
def _send_and_cache(
self,
request: PreparedRequest,
actions: CacheActions,
cached_response: Optional[CachedResponse] = None,
**kwargs,
) -> AnyResponse:
"""Send a request and cache the response, unless disabled by settings or headers.
If applicable, also handle conditional requests.
"""
request = actions.update_request(request)
response = super().send(request, **kwargs)
actions.update_from_response(response)
if not actions.skip_write:
self.cache.save_response(response, actions.cache_key, actions.expires)
elif cached_response is not None and response.status_code == 304:
cached_response = actions.update_revalidated_response(response, cached_response)
self.cache.save_response(cached_response, actions.cache_key, actions.expires)
return cached_response
else:
logger.debug(f'Skipping cache write for URL: {request.url}')
return OriginalResponse.wrap_response(response, actions)
def _resend(
self,
request: PreparedRequest,
actions: CacheActions,
cached_response: CachedResponse,
**kwargs,
) -> AnyResponse:
"""Handle a stale cached response by attempting to resend the request and cache a fresh
response
"""
logger.debug('Stale response; attempting to re-send request')
try:
response = self._send_and_cache(request, actions, cached_response, **kwargs)
if (
self.settings.stale_if_error
and response.status_code not in self.settings.allowable_codes
):
response.raise_for_status()
return response
except Exception:
return self._handle_error(cached_response, actions)
def _resend_async(self, *args, **kwargs):
"""Send a non-blocking request to refresh a cached response"""
logger.debug('Using stale response while revalidating')
thread = Thread(target=self._send_and_cache, args=args, kwargs=kwargs)
thread.start()
def _handle_error(self, cached_response: CachedResponse, actions: CacheActions) -> AnyResponse:
"""Handle a request error based on settings:
* Default behavior: re-raise the error
* stale-if-error: Ignore the error and and return the stale cache item
"""
if actions.is_usable(cached_response, error=True):
logger.warning(
f'Request for URL {cached_response.request.url} failed; using cached response',
exc_info=True,
)
return cached_response
else:
raise
@contextmanager
def cache_disabled(self):
"""
Context manager for temporary disabling the cache
.. warning:: This method is not thread-safe.
Example:
>>> s = CachedSession()
>>> with s.cache_disabled():
... s.get('https://httpbin.org/ip')
"""
if self.settings.disabled:
yield
else:
self.settings.disabled = True
try:
yield
finally:
self.settings.disabled = False
def close(self):
"""Close the session and any open backend connections"""
super().close()
self.cache.close()
def remove_expired_responses(self, expire_after: ExpirationTime = None):
# Deprecated; will be replaced by CachedSession.cache.delete(expired=True)
self.cache.remove_expired_responses(expire_after)
def __getstate__(self):
# Unlike requests.Session, CachedSession may contain backend connection objects that can't
# be pickled. Support for this could be added if necessary, but for now it's explicitly
# disabled to avoid confusing errors upon unpickling.
raise NotImplementedError('CachedSession cannot be pickled')
def __repr__(self):
return f'<CachedSession(cache={repr(self.cache)}, settings={self.settings})>'
[docs]class CachedSession(CacheMixin, OriginalSession):
"""Session class that extends :py:class:`requests.Session` with caching features.
See individual :py:mod:`backend classes <requests_cache.backends>` for additional
backend-specific arguments. Also see :ref:`user-guide` for more details and examples on how the
following arguments affect cache behavior.
Args:
cache_name: Used as a cache path, prefix, or namespace, depending on the backend
backend: Cache backend name or instance; name may be one of
``['sqlite', 'filesystem', 'mongodb', 'gridfs', 'redis', 'dynamodb', 'memory']``
serializer: Serializer name or instance; name may be one of
``['pickle', 'json', 'yaml', 'bson']``.
expire_after: Time after which cached items will expire. See :ref:`expiration` for details.
urls_expire_after: Expiration times to apply for different URL patterns
cache_control: Use Cache-Control and other response headers to set expiration
allowable_codes: Only cache responses with one of these status codes
allowable_methods: Cache only responses for one of these HTTP methods
always_revalidate: Revalidate with the server for every request, even if the cached response
is not expired
match_headers: Request headers to match, when `Vary` response header is not available. May
be a list of headers, or ``True`` to match all.
ignored_parameters: Request paramters, headers, and/or JSON body params to exclude from both
request matching and cached request data
stale_if_error: Return a stale response if a new request raises an exception. Optionally
accepts a time value representing maximum staleness to accept.
stale_while_revalidate: Return a stale response initially, while a non-blocking request is
sent to refresh the response for the next time it's requested
filter_fn: Response filtering function that indicates whether or not a given response should
be cached. See :ref:`custom-filtering` for details.
key_fn: Request matching function for generating custom cache keys. See
:ref:`custom-matching` for details.
"""
def get_504_response(request: PreparedRequest) -> CachedResponse:
"""Get a 504: Not Cached error response, for use with only-if-cached option"""
return CachedResponse(
url=request.url or '',
status_code=504,
reason='Not Cached',
request=request, # type: ignore
)
@contextmanager
def patch_form_boundary():
"""If the ``files`` param is present, patch the form boundary used to separate multipart
uploads. ``requests`` does not provide a way to pass a custom boundary to urllib3, so this just
monkey-patches it instead.
"""
original_boundary = filepost.choose_boundary
filepost.choose_boundary = lambda: '##requests-cache-form-boundary##'
yield
filepost.choose_boundary = original_boundary