Examples#

This section contains some complete examples that demonstrate the main features of requests-cache.

Articles#

Some articles and blog posts that discuss requests-cache:

PyBites: Module of the Week: requests-cache for repeated API calls
Real Python: Caching External API Requests
Thomas Gorham: Faster Backtesting with requests-cache
Tim O’Hearn: Pragmatic Usage of requests-cache
Valdir Stumm Jr: Tips for boosting your Python scripts
Python Web Scraping (2nd Edition): Exploring requests-cache

Scripts#

The following scripts can also be found in the examples/ folder on GitHub.

Basic usage (with sessions)#

A simple example using requests-cache with httpbin

Example: basic_sessions.py

#!/usr/bin/env python
import time

from requests_cache import CachedSession


def main():
    session = CachedSession('example_cache', backend='sqlite')

    # The real request will only be made once; afterward, the cached response is used
    for i in range(5):
        response = session.get('http://httpbin.org/get')

    # This is more obvious when calling a slow endpoint
    for i in range(5):
        response = session.get('http://httpbin.org/delay/2')

    # Caching can be disabled if we want to get a fresh page and not cache it
    with session.cache_disabled():
        print(session.get('http://httpbin.org/ip').text)

    # Get some debugging info about the cache
    print(session.cache)
    print('Cached URLS:')
    print('\n'.join(session.cache.urls))


if __name__ == "__main__":
    t = time.time()
    main()
    print('Elapsed: %.3f seconds' % (time.time() - t))

Basic usage (with patching)#

The same as basic_sessions.py, but using Patching

Example: basic_patching.py

#!/usr/bin/env python
import time

import requests

import requests_cache

# After installation, all requests functions and Session methods will be cached
requests_cache.install_cache('example_cache', backend='sqlite')


def main():
    # The real request will only be made once; afterward, the cached response is used
    for i in range(5):
        response = requests.get('http://httpbin.org/get')

    # This is more obvious when calling a slow endpoint
    for i in range(5):
        response = requests.get('http://httpbin.org/delay/2')

    # Caching can be disabled if we want to get a fresh page and not cache it
    with requests_cache.disabled():
        print(requests.get('http://httpbin.org/ip').text)

    # Get some debugging info about the cache
    print(requests_cache.get_cache())
    print('Cached URLS:')
    print('\n'.join(requests_cache.get_cache().urls))

    # Uninstall to remove caching from all requests functions
    requests_cache.uninstall_cache()


if __name__ == "__main__":
    t = time.time()
    main()
    print('Elapsed: %.3f seconds' % (time.time() - t))

Cache expiration#

An example of setting expiration for individual requests

Example: expiration.py

#!/usr/bin/env python
import time

from requests_cache import CachedSession


def main():
    session = CachedSession('example_cache', backend='sqlite')

    # By default, cached responses never expire
    response = session.get('https://httpbin.org/get')
    assert not response.from_cache
    response = session.get('https://httpbin.org/get')
    assert response.from_cache
    assert not response.expires

    # We can set default expiration for the session using expire_after
    session = CachedSession('example_cache', backend='sqlite', expire_after=60)
    session.cache.clear()
    response = session.get('https://httpbin.org/get')
    response = session.get('https://httpbin.org/get')
    print('Expiration time:', response.expires)

    # This can also be overridden for individual requests
    session.cache.clear()
    response = session.get('https://httpbin.org/get', expire_after=1)
    response = session.get('https://httpbin.org/get')
    assert response.from_cache
    print('Expiration time:', response.expires)

    # After 1 second, the cached value will expired
    time.sleep(1.2)
    assert response.is_expired
    response = session.get('https://httpbin.org/get')
    assert not response.from_cache


if __name__ == "__main__":
    t = time.perf_counter()
    main()
    print('Elapsed: %.3f seconds' % (time.perf_counter() - t))

URL patterns#

An example of Expiration With URL Patterns

Example: /url_patterns.py

#!/usr/bin/env python
from datetime import timedelta

from requests_cache import CachedSession

default_expire_after = 60 * 60               # By default, cached responses expire in an hour
urls_expire_after = {
    'httpbin.org/image': timedelta(days=7),  # Requests for this base URL will expire in a week
    '*.fillmurray.com': -1,                  # Requests matching this pattern will never expire
    '*.placeholder.com/*': 0,                # Requests matching this pattern will not be cached
}
urls = [
    'https://httpbin.org/get',               # Will expire in an hour
    'https://httpbin.org/image/jpeg',        # Will expire in a week
    'http://www.fillmurray.com/460/300',     # Will never expire
    'https://via.placeholder.com/350x150',   # Will not be cached
]


def main():
    session = CachedSession(
        cache_name='example_cache',
        expire_after=default_expire_after,
        urls_expire_after=urls_expire_after,
    )
    return [session.get(url) for url in urls]


def _expires_str(response):
    if not response.from_cache:
        return 'N/A'
    elif response.expires is None:
        return 'Never'
    else:
        return response.expires.isoformat()


if __name__ == "__main__":
    original_responses = main()
    cached_responses = main()
    for response in cached_responses:
        print(
            f'{response.url:40} From cache: {response.from_cache:}'
            f'\tExpires: {_expires_str(response)}'
        )

Multi-threaded requests#

An example of making multi-threaded cached requests, adapted from the python docs for ThreadPoolExecutor.

Example: threads.py

#!/usr/bin/env python
from concurrent.futures import ThreadPoolExecutor, as_completed
from time import perf_counter as time

from requests_cache import CachedSession

URLS = [
    'https://en.wikipedia.org/wiki/Python_(programming_language)',
    'https://en.wikipedia.org/wiki/Requests_(software)',
    'https://en.wikipedia.org/wiki/Cache_(computing)',
    'https://en.wikipedia.org/wiki/SQLite',
    'https://en.wikipedia.org/wiki/Redis',
    'https://en.wikipedia.org/wiki/MongoDB',
]


def send_requests():
    session = CachedSession('example_cache')
    start = time()

    with ThreadPoolExecutor(max_workers=5) as executor:
        future_to_url = {executor.submit(session.get, url): url for url in URLS}

        for future in as_completed(future_to_url):
            url = future_to_url[future]
            response = future.result()
            from_cache = 'hit' if response.from_cache else 'miss'
            print(f'{url} is {len(response.content)} bytes (cache {from_cache})')

    print(f'Elapsed: {time() - start:.3f} seconds')


if __name__ == '__main__':
    send_requests()
    send_requests()

Logging requests#

An example of testing the cache to prove that it’s not making more requests than expected.

Example: log_requests.py

#!/usr/bin/env python3
from contextlib import contextmanager
from logging import basicConfig, getLogger
from unittest.mock import patch

import requests

from requests_cache import CachedSession
from requests_cache.session import OriginalSession, set_response_defaults

basicConfig(level='INFO')
logger = getLogger('requests_cache.examples')
# Uncomment for more verbose debug output
# getLogger('requests_cache').setLevel('DEBUG')


@contextmanager
def log_requests():
    """Context manager that mocks and logs all non-cached requests"""
    real_response = set_response_defaults(requests.get('http://httpbin.org/get'))
    with patch.object(OriginalSession, 'send', return_value=real_response) as mock_send:
        session = CachedSession('cache-test', backend='sqlite')
        session.cache.clear()
        yield session
        cached_responses = session.cache.responses.values()

    logger.debug('All calls to Session._request():')
    logger.debug(mock_send.mock_calls)

    logger.info(f'Responses cached: {len(cached_responses)}')
    logger.info(f'Requests sent: {mock_send.call_count}')


def main():
    """Example usage; replace with any other requests you want to test"""
    with log_requests() as session:
        for i in range(10):
            response = session.get('http://httpbin.org/get')
            logger.debug(f'Response {i}: {type(response).__name__}')


if __name__ == '__main__':
    main()

Cache speed test#

An example of benchmarking cache write speeds with semi-randomized response content

Usage (optionally for a specific backend and/or serializer):

python benchmark.py -b <backend> -s <serializer>

Example: benchmark.py

#!/usr/bin/env python
from argparse import ArgumentParser
from os import urandom
from random import random
from time import perf_counter as time

import requests
from rich import print
from rich.progress import Progress

from requests_cache import CachedResponse, CachedSession

BASE_RESPONSE = requests.get('https://httpbin.org/get')
CACHE_NAME = 'rubbish_bin'
WARMUP_ITERATIONS = 100
ITERATIONS = 5000
MAX_RESPONSE_SIZE = 1024 * 350

# Defaults for DynamoDB
AWS_OPTIONS = {
    'endpoint_url': 'http://localhost:8000',
    'region_name': 'us-east-1',
    'aws_access_key_id': 'placeholder',
    'aws_secret_access_key': 'placeholder',
}


def test_write_speed(session, max_size):
    for i in range(WARMUP_ITERATIONS):
        new_response = get_randomized_response(i, max_size)
        session.cache.save_response(new_response)

    with Progress() as progress:
        task = progress.add_task('[cyan]Testing write speed...', total=ITERATIONS)
        start = time()

        for i in range(ITERATIONS):
            new_response = get_randomized_response(i, max_size)
            session.cache.save_response(new_response)
            progress.update(task, advance=1)

    elapsed = time() - start
    avg = (elapsed / ITERATIONS) * 1000
    print(f'[cyan]Elapsed: [green]{elapsed:.3f}[/] seconds (avg [green]{avg:.3f}[/] ms per write)')


def test_read_speed(session):
    keys = list(session.cache.responses.keys())
    for i in range(WARMUP_ITERATIONS):
        key = keys[i % len(keys)]
        session.cache.get_response(key)

    with Progress() as progress:
        task = progress.add_task('[cyan]Testing read speed...', total=ITERATIONS)
        start = time()

        for i in range(ITERATIONS):
            key = keys[i % len(keys)]
            session.cache.get_response(key)
            progress.update(task, advance=1)

    elapsed = time() - start
    avg = (elapsed / ITERATIONS) * 1000
    print(f'[cyan]Elapsed: [green]{elapsed:.3f}[/] seconds (avg [green]{avg:.3f}[/] ms per read)')


def get_randomized_response(i=0, max_size=MAX_RESPONSE_SIZE):
    """Get a response with randomized content"""
    new_response = CachedResponse.from_response(BASE_RESPONSE)
    n_bytes = int(random() * max_size)
    new_response._content = urandom(n_bytes)
    new_response.request.url += f'/response_{i}'
    return new_response


if __name__ == '__main__':
    parser = ArgumentParser()
    parser.add_argument('-b', '--backend', default='sqlite')
    parser.add_argument('-s', '--serializer', default='pickle')
    parser.add_argument('-m', '--max-size', default=MAX_RESPONSE_SIZE, type=float)
    args = parser.parse_args()
    print(f'[cyan]Benchmarking {args.backend} backend with {args.serializer} serializer')

    kwargs = {}
    if args.backend == 'dynamodb':
        kwargs = AWS_OPTIONS
    elif args.backend == 'sqlite-memory':
        args.backend = 'sqlite'
        kwargs = {'use_memory': True}

    session = CachedSession(
        CACHE_NAME,
        backend=args.backend,
        serializer=args.serializer,
        **kwargs,
    )
    test_write_speed(session, args.max_size)
    test_read_speed(session)

Converting an old cache#

Example of converting data cached in older versions of requests-cache (<=0.5.2) into the current format

Example: convert_cache.py

#!/usr/bin/env python
from requests import Response

import requests_cache.backends.base
from requests_cache import CachedResponse, CachedSession


class _Store:
    pass


# Add placeholder object used by pickle to deserialize old responses
requests_cache.backends.base._Store = _Store
requests_cache.backends.base._RawStore = _Store


def convert_old_response(cached_response, timestamp):
    temp_response = Response()
    for field in Response.__attrs__:
        setattr(temp_response, field, getattr(cached_response, field, None))

    new_response = CachedResponse(temp_response)
    new_response.created_at = timestamp
    return new_response


def convert_cache(*args, **kwargs):
    session = CachedSession(*args, **kwargs)
    print(f'Checking {len(session.cache.responses)} cached responses')

    with session.cache.responses.bulk_commit():
        for key, response in session.cache.responses.items():
            if isinstance(response, tuple):
                print(f'Converting response {key}')
                session.cache.responses[key] = convert_old_response(*response)

    print('Conversion complete')


# Example: convert a cache named 'demo_cache.sqlite' in the current directory
if __name__ == '__main__':
    convert_cache('demo_cache', backend='sqlite')

Custom request matcher#

Example of a custom request matcher that caches a new response if the version of requests-cache, requests, or urllib3 changes.

This generally isn’t needed, since anything that causes a deserialization error will simply result in a new request being sent and cached. But you might want to include a library version in your cache key if, for example, you suspect a change in the library does not cause errors but results in different response content.

This uses info from requests.help.info(). You can also preview this info from the command line to see what else is available:

python -m requests.help

Example: custom_request_matcher.py

#!/usr/bin/env python
from hashlib import sha256
from unittest.mock import patch

from requests import PreparedRequest
from requests.help import info as get_requests_info

import requests_cache
from requests_cache import CachedSession
from requests_cache.cache_keys import create_key


def create_custom_key(request: PreparedRequest, **kwargs) -> str:
    """Make a custom cache key that includes library versions"""
    # Start with the default key created by requests-cache
    base_key = create_key(request, **kwargs)
    key = sha256()
    key.update(base_key.encode('utf-8'))

    # Add versions of requests-cache, requests, and urllib3
    requests_info = get_requests_info()
    for lib in ['requests', 'urllib3']:
        key.update(requests_info[lib]['version'].encode('utf-8'))
    key.update(requests_cache.__version__.encode('utf-8'))

    return key.hexdigest()


def test_cache_key():
    """Test that the custom cache keys are working as expected"""
    session = CachedSession('key-test', key_fn=create_custom_key)
    session.cache.clear()
    session.get('https://httpbin.org/get')
    response = session.get('https://httpbin.org/get')
    assert response.from_cache is True

    # Simulate a major version change
    new_versions = {
        'requests': {'version': '3.0.0'},
        'urllib3': {'version': '2.0.0'},
    }
    with patch('__main__.get_requests_info', return_value=new_versions):
        # A new request will be sent since the cache key no longer matches
        response = session.get('https://httpbin.org/get')
        assert response.from_cache is False


if __name__ == '__main__':
    test_cache_key()

Backtesting with time-machine#

An example of using the time-machine library for backtesting, e.g., testing with cached responses that were available at an arbitrary time in the past.

Example: custom_request_matcher.py

#!/usr/bin/env python3
from datetime import datetime

import requests
import time_machine

from requests_cache import CachedSession, set_response_defaults


class BacktestCachedSession(CachedSession):
    def request(self, method: str, url: str, **kwargs):
        response = super().request(method, url, **kwargs)

        # Response was cached after the (simulated) current time, so ignore it and send a new request
        if response.created_at and response.created_at > datetime.utcnow():
            new_response = requests.request(method, url, **kwargs)
            return set_response_defaults(new_response)
        else:
            return response


def demo():
    session = BacktestCachedSession()
    response = session.get('https://httpbin.org/get')
    response = session.get('https://httpbin.org/get')
    assert response.from_cache is True

    # Response was not cached yet at this point, so we should get a fresh one
    with time_machine.travel(datetime(2020, 1, 1)):
        response = session.get('https://httpbin.org/get')
        assert response.from_cache is False


if __name__ == '__main__':
    demo()