Examples#
This section contains some complete examples that demonstrate the main features of requests-cache.
Articles#
Some articles and blog posts that discuss requests-cache:
PyBites: Module of the Week: requests-cache for repeated API calls
Real Python: Caching External API Requests
Thomas Gorham: Faster Backtesting with requests-cache
Tim O’Hearn: Pragmatic Usage of requests-cache
Valdir Stumm Jr: Tips for boosting your Python scripts
Python Web Scraping (2nd Edition): Exploring requests-cache
Scripts#
The following scripts can also be found in the examples/ folder on GitHub.
Basic usage (with sessions)#
A simple example using requests-cache with httpbin
Example: basic_sessions.py
#!/usr/bin/env python
import time
from requests_cache import CachedSession
def main():
session = CachedSession('example_cache', backend='sqlite')
# The real request will only be made once; afterward, the cached response is used
for i in range(5):
response = session.get('http://httpbin.org/get')
# This is more obvious when calling a slow endpoint
for i in range(5):
response = session.get('http://httpbin.org/delay/2')
# Caching can be disabled if we want to get a fresh page and not cache it
with session.cache_disabled():
print(session.get('http://httpbin.org/ip').text)
# Get some debugging info about the cache
print(session.cache)
print('Cached URLS:')
print('\n'.join(session.cache.urls))
if __name__ == "__main__":
t = time.time()
main()
print('Elapsed: %.3f seconds' % (time.time() - t))
Basic usage (with patching)#
The same as basic_sessions.py
, but using Patching
Example: basic_patching.py
#!/usr/bin/env python
import time
import requests
import requests_cache
# After installation, all requests functions and Session methods will be cached
requests_cache.install_cache('example_cache', backend='sqlite')
def main():
# The real request will only be made once; afterward, the cached response is used
for i in range(5):
response = requests.get('http://httpbin.org/get')
# This is more obvious when calling a slow endpoint
for i in range(5):
response = requests.get('http://httpbin.org/delay/2')
# Caching can be disabled if we want to get a fresh page and not cache it
with requests_cache.disabled():
print(requests.get('http://httpbin.org/ip').text)
# Get some debugging info about the cache
print(requests_cache.get_cache())
print('Cached URLS:')
print('\n'.join(requests_cache.get_cache().urls))
# Uninstall to remove caching from all requests functions
requests_cache.uninstall_cache()
if __name__ == "__main__":
t = time.time()
main()
print('Elapsed: %.3f seconds' % (time.time() - t))
Cache expiration#
An example of setting expiration for individual requests
Example: expiration.py
#!/usr/bin/env python
import time
from requests_cache import CachedSession
def main():
session = CachedSession('example_cache', backend='sqlite')
# By default, cached responses never expire
response = session.get('https://httpbin.org/get')
assert not response.from_cache
response = session.get('https://httpbin.org/get')
assert response.from_cache
assert not response.expires
# We can set default expiration for the session using expire_after
session = CachedSession('example_cache', backend='sqlite', expire_after=60)
session.cache.clear()
response = session.get('https://httpbin.org/get')
response = session.get('https://httpbin.org/get')
print('Expiration time:', response.expires)
# This can also be overridden for individual requests
session.cache.clear()
response = session.get('https://httpbin.org/get', expire_after=1)
response = session.get('https://httpbin.org/get')
assert response.from_cache
print('Expiration time:', response.expires)
# After 1 second, the cached value will expired
time.sleep(1.2)
assert response.is_expired
response = session.get('https://httpbin.org/get')
assert not response.from_cache
if __name__ == "__main__":
t = time.perf_counter()
main()
print('Elapsed: %.3f seconds' % (time.perf_counter() - t))
URL patterns#
An example of Expiration With URL Patterns
Example: /url_patterns.py
#!/usr/bin/env python
from datetime import timedelta
from requests_cache import CachedSession
default_expire_after = 60 * 60 # By default, cached responses expire in an hour
urls_expire_after = {
'httpbin.org/image': timedelta(days=7), # Requests for this base URL will expire in a week
'*.fillmurray.com': -1, # Requests matching this pattern will never expire
'*.placeholder.com/*': 0, # Requests matching this pattern will not be cached
}
urls = [
'https://httpbin.org/get', # Will expire in an hour
'https://httpbin.org/image/jpeg', # Will expire in a week
'http://www.fillmurray.com/460/300', # Will never expire
'https://via.placeholder.com/350x150', # Will not be cached
]
def main():
session = CachedSession(
cache_name='example_cache',
expire_after=default_expire_after,
urls_expire_after=urls_expire_after,
)
return [session.get(url) for url in urls]
def _expires_str(response):
if not response.from_cache:
return 'N/A'
elif response.expires is None:
return 'Never'
else:
return response.expires.isoformat()
if __name__ == "__main__":
original_responses = main()
cached_responses = main()
for response in cached_responses:
print(
f'{response.url:40} From cache: {response.from_cache:}'
f'\tExpires: {_expires_str(response)}'
)
Multi-threaded requests#
An example of making multi-threaded cached requests, adapted from the python docs for
ThreadPoolExecutor
.
Example: threads.py
#!/usr/bin/env python
from concurrent.futures import ThreadPoolExecutor, as_completed
from time import perf_counter as time
from requests_cache import CachedSession
URLS = [
'https://en.wikipedia.org/wiki/Python_(programming_language)',
'https://en.wikipedia.org/wiki/Requests_(software)',
'https://en.wikipedia.org/wiki/Cache_(computing)',
'https://en.wikipedia.org/wiki/SQLite',
'https://en.wikipedia.org/wiki/Redis',
'https://en.wikipedia.org/wiki/MongoDB',
]
def send_requests():
session = CachedSession('example_cache')
start = time()
with ThreadPoolExecutor(max_workers=5) as executor:
future_to_url = {executor.submit(session.get, url): url for url in URLS}
for future in as_completed(future_to_url):
url = future_to_url[future]
response = future.result()
from_cache = 'hit' if response.from_cache else 'miss'
print(f'{url} is {len(response.content)} bytes (cache {from_cache})')
print(f'Elapsed: {time() - start:.3f} seconds')
if __name__ == '__main__':
send_requests()
send_requests()
Logging requests#
An example of testing the cache to prove that it’s not making more requests than expected.
Example: log_requests.py
#!/usr/bin/env python3
from contextlib import contextmanager
from logging import basicConfig, getLogger
from unittest.mock import patch
import requests
from requests_cache import CachedSession
from requests_cache.session import OriginalSession, set_response_defaults
basicConfig(level='INFO')
logger = getLogger('requests_cache.examples')
# Uncomment for more verbose debug output
# getLogger('requests_cache').setLevel('DEBUG')
@contextmanager
def log_requests():
"""Context manager that mocks and logs all non-cached requests"""
real_response = set_response_defaults(requests.get('http://httpbin.org/get'))
with patch.object(OriginalSession, 'send', return_value=real_response) as mock_send:
session = CachedSession('cache-test', backend='sqlite')
session.cache.clear()
yield session
cached_responses = session.cache.responses.values()
logger.debug('All calls to Session._request():')
logger.debug(mock_send.mock_calls)
logger.info(f'Responses cached: {len(cached_responses)}')
logger.info(f'Requests sent: {mock_send.call_count}')
def main():
"""Example usage; replace with any other requests you want to test"""
with log_requests() as session:
for i in range(10):
response = session.get('http://httpbin.org/get')
logger.debug(f'Response {i}: {type(response).__name__}')
if __name__ == '__main__':
main()
Cache speed test#
An example of benchmarking cache write speeds with semi-randomized response content
Usage (optionally for a specific backend and/or serializer):
python benchmark.py -b <backend> -s <serializer>
Example: benchmark.py
#!/usr/bin/env python
from argparse import ArgumentParser
from os import urandom
from random import random
from time import perf_counter as time
import requests
from rich import print
from rich.progress import Progress
from requests_cache import CachedResponse, CachedSession
BASE_RESPONSE = requests.get('https://httpbin.org/get')
CACHE_NAME = 'rubbish_bin'
WARMUP_ITERATIONS = 100
ITERATIONS = 5000
MAX_RESPONSE_SIZE = 1024 * 350
# Defaults for DynamoDB
AWS_OPTIONS = {
'endpoint_url': 'http://localhost:8000',
'region_name': 'us-east-1',
'aws_access_key_id': 'placeholder',
'aws_secret_access_key': 'placeholder',
}
def test_write_speed(session, max_size):
for i in range(WARMUP_ITERATIONS):
new_response = get_randomized_response(i, max_size)
session.cache.save_response(new_response)
with Progress() as progress:
task = progress.add_task('[cyan]Testing write speed...', total=ITERATIONS)
start = time()
for i in range(ITERATIONS):
new_response = get_randomized_response(i, max_size)
session.cache.save_response(new_response)
progress.update(task, advance=1)
elapsed = time() - start
avg = (elapsed / ITERATIONS) * 1000
print(f'[cyan]Elapsed: [green]{elapsed:.3f}[/] seconds (avg [green]{avg:.3f}[/] ms per write)')
def test_read_speed(session):
keys = list(session.cache.responses.keys())
for i in range(WARMUP_ITERATIONS):
key = keys[i % len(keys)]
session.cache.get_response(key)
with Progress() as progress:
task = progress.add_task('[cyan]Testing read speed...', total=ITERATIONS)
start = time()
for i in range(ITERATIONS):
key = keys[i % len(keys)]
session.cache.get_response(key)
progress.update(task, advance=1)
elapsed = time() - start
avg = (elapsed / ITERATIONS) * 1000
print(f'[cyan]Elapsed: [green]{elapsed:.3f}[/] seconds (avg [green]{avg:.3f}[/] ms per read)')
def get_randomized_response(i=0, max_size=MAX_RESPONSE_SIZE):
"""Get a response with randomized content"""
new_response = CachedResponse.from_response(BASE_RESPONSE)
n_bytes = int(random() * max_size)
new_response._content = urandom(n_bytes)
new_response.request.url += f'/response_{i}'
return new_response
if __name__ == '__main__':
parser = ArgumentParser()
parser.add_argument('-b', '--backend', default='sqlite')
parser.add_argument('-s', '--serializer', default='pickle')
parser.add_argument('-m', '--max-size', default=MAX_RESPONSE_SIZE, type=float)
args = parser.parse_args()
print(f'[cyan]Benchmarking {args.backend} backend with {args.serializer} serializer')
kwargs = {}
if args.backend == 'dynamodb':
kwargs = AWS_OPTIONS
elif args.backend == 'sqlite-memory':
args.backend = 'sqlite'
kwargs = {'use_memory': True}
session = CachedSession(
CACHE_NAME,
backend=args.backend,
serializer=args.serializer,
**kwargs,
)
test_write_speed(session, args.max_size)
test_read_speed(session)
Converting an old cache#
Example of converting data cached in older versions of requests-cache (<=0.5.2) into the current format
Example: convert_cache.py
#!/usr/bin/env python
from requests import Response
import requests_cache.backends.base
from requests_cache import CachedResponse, CachedSession
class _Store:
pass
# Add placeholder object used by pickle to deserialize old responses
requests_cache.backends.base._Store = _Store
requests_cache.backends.base._RawStore = _Store
def convert_old_response(cached_response, timestamp):
temp_response = Response()
for field in Response.__attrs__:
setattr(temp_response, field, getattr(cached_response, field, None))
new_response = CachedResponse(temp_response)
new_response.created_at = timestamp
return new_response
def convert_cache(*args, **kwargs):
session = CachedSession(*args, **kwargs)
print(f'Checking {len(session.cache.responses)} cached responses')
with session.cache.responses.bulk_commit():
for key, response in session.cache.responses.items():
if isinstance(response, tuple):
print(f'Converting response {key}')
session.cache.responses[key] = convert_old_response(*response)
print('Conversion complete')
# Example: convert a cache named 'demo_cache.sqlite' in the current directory
if __name__ == '__main__':
convert_cache('demo_cache', backend='sqlite')
Custom request matcher#
Example of a custom request matcher that caches a new response if the version of requests-cache, requests, or urllib3 changes.
This generally isn’t needed, since anything that causes a deserialization error will simply result in a new request being sent and cached. But you might want to include a library version in your cache key if, for example, you suspect a change in the library does not cause errors but results in different response content.
This uses info from requests.help.info()
. You can also preview this info from the command
line to see what else is available:
python -m requests.help
Example: custom_request_matcher.py
#!/usr/bin/env python
from hashlib import sha256
from unittest.mock import patch
from requests import PreparedRequest
from requests.help import info as get_requests_info
import requests_cache
from requests_cache import CachedSession
from requests_cache.cache_keys import create_key
def create_custom_key(request: PreparedRequest, **kwargs) -> str:
"""Make a custom cache key that includes library versions"""
# Start with the default key created by requests-cache
base_key = create_key(request, **kwargs)
key = sha256()
key.update(base_key.encode('utf-8'))
# Add versions of requests-cache, requests, and urllib3
requests_info = get_requests_info()
for lib in ['requests', 'urllib3']:
key.update(requests_info[lib]['version'].encode('utf-8'))
key.update(requests_cache.__version__.encode('utf-8'))
return key.hexdigest()
def test_cache_key():
"""Test that the custom cache keys are working as expected"""
session = CachedSession('key-test', key_fn=create_custom_key)
session.cache.clear()
session.get('https://httpbin.org/get')
response = session.get('https://httpbin.org/get')
assert response.from_cache is True
# Simulate a major version change
new_versions = {
'requests': {'version': '3.0.0'},
'urllib3': {'version': '2.0.0'},
}
with patch('__main__.get_requests_info', return_value=new_versions):
# A new request will be sent since the cache key no longer matches
response = session.get('https://httpbin.org/get')
assert response.from_cache is False
if __name__ == '__main__':
test_cache_key()
Backtesting with time-machine#
An example of using the time-machine library for backtesting, e.g., testing with cached responses that were available at an arbitrary time in the past.
Example: custom_request_matcher.py
#!/usr/bin/env python3
from datetime import datetime
import requests
import time_machine
from requests_cache import CachedSession, set_response_defaults
class BacktestCachedSession(CachedSession):
def request(self, method: str, url: str, **kwargs):
response = super().request(method, url, **kwargs)
# Response was cached after the (simulated) current time, so ignore it and send a new request
if response.created_at and response.created_at > datetime.utcnow():
new_response = requests.request(method, url, **kwargs)
return set_response_defaults(new_response)
else:
return response
def demo():
session = BacktestCachedSession()
response = session.get('https://httpbin.org/get')
response = session.get('https://httpbin.org/get')
assert response.from_cache is True
# Response was not cached yet at this point, so we should get a fresh one
with time_machine.travel(datetime(2020, 1, 1)):
response = session.get('https://httpbin.org/get')
assert response.from_cache is False
if __name__ == '__main__':
demo()