import re
import sys
from functools import wraps
from inspect import getmembers
from types import CoroutineType
from typing import AsyncGenerator, Dict
from unittest import TestCase
from scrapy.http import Request
from scrapy.utils.python import get_spec
from scrapy.utils.spider import iterate_spider_output
[docs]class Contract:
"""Abstract class for contracts"""
request_cls = None
def __init__(self, method, *args):
self.testcase_pre = _create_testcase(method, f"@{self.name} pre-hook")
self.testcase_post = _create_testcase(method, f"@{self.name} post-hook")
self.args = args
def add_pre_hook(self, request, results):
if hasattr(self, "pre_process"):
cb = request.callback
@wraps(cb)
def wrapper(response, **cb_kwargs):
try:
results.startTest(self.testcase_pre)
self.pre_process(response)
results.stopTest(self.testcase_pre)
except AssertionError:
results.addFailure(self.testcase_pre, sys.exc_info())
except Exception:
results.addError(self.testcase_pre, sys.exc_info())
else:
results.addSuccess(self.testcase_pre)
finally:
cb_result = cb(response, **cb_kwargs)
if isinstance(cb_result, (AsyncGenerator, CoroutineType)):
raise TypeError("Contracts don't support async callbacks")
return list(iterate_spider_output(cb_result))
request.callback = wrapper
return request
def add_post_hook(self, request, results):
if hasattr(self, "post_process"):
cb = request.callback
@wraps(cb)
def wrapper(response, **cb_kwargs):
cb_result = cb(response, **cb_kwargs)
if isinstance(cb_result, (AsyncGenerator, CoroutineType)):
raise TypeError("Contracts don't support async callbacks")
output = list(iterate_spider_output(cb_result))
try:
results.startTest(self.testcase_post)
self.post_process(output)
results.stopTest(self.testcase_post)
except AssertionError:
results.addFailure(self.testcase_post, sys.exc_info())
except Exception:
results.addError(self.testcase_post, sys.exc_info())
else:
results.addSuccess(self.testcase_post)
finally:
return output
request.callback = wrapper
return request
[docs] def adjust_request_args(self, args):
return args
class ContractsManager:
contracts: Dict[str, Contract] = {}
def __init__(self, contracts):
for contract in contracts:
self.contracts[contract.name] = contract
def tested_methods_from_spidercls(self, spidercls):
is_method = re.compile(r"^\s*@", re.MULTILINE).search
methods = []
for key, value in getmembers(spidercls):
if callable(value) and value.__doc__ and is_method(value.__doc__):
methods.append(key)
return methods
def extract_contracts(self, method):
contracts = []
for line in method.__doc__.split("\n"):
line = line.strip()
if line.startswith("@"):
name, args = re.match(r"@(\w+)\s*(.*)", line).groups()
args = re.split(r"\s+", args)
contracts.append(self.contracts[name](method, *args))
return contracts
def from_spider(self, spider, results):
requests = []
for method in self.tested_methods_from_spidercls(type(spider)):
bound_method = spider.__getattribute__(method)
try:
requests.append(self.from_method(bound_method, results))
except Exception:
case = _create_testcase(bound_method, "contract")
results.addError(case, sys.exc_info())
return requests
def from_method(self, method, results):
contracts = self.extract_contracts(method)
if contracts:
request_cls = Request
for contract in contracts:
if contract.request_cls is not None:
request_cls = contract.request_cls
# calculate request args
args, kwargs = get_spec(request_cls.__init__)
# Don't filter requests to allow
# testing different callbacks on the same URL.
kwargs["dont_filter"] = True
kwargs["callback"] = method
for contract in contracts:
kwargs = contract.adjust_request_args(kwargs)
args.remove("self")
# check if all positional arguments are defined in kwargs
if set(args).issubset(set(kwargs)):
request = request_cls(**kwargs)
# execute pre and post hooks in order
for contract in reversed(contracts):
request = contract.add_pre_hook(request, results)
for contract in contracts:
request = contract.add_post_hook(request, results)
self._clean_req(request, method, results)
return request
def _clean_req(self, request, method, results):
"""stop the request from returning objects and records any errors"""
cb = request.callback
@wraps(cb)
def cb_wrapper(response, **cb_kwargs):
try:
output = cb(response, **cb_kwargs)
output = list(iterate_spider_output(output))
except Exception:
case = _create_testcase(method, "callback")
results.addError(case, sys.exc_info())
def eb_wrapper(failure):
case = _create_testcase(method, "errback")
exc_info = failure.type, failure.value, failure.getTracebackObject()
results.addError(case, exc_info)
request.callback = cb_wrapper
request.errback = eb_wrapper
def _create_testcase(method, desc):
spider = method.__self__.name
class ContractTestCase(TestCase):
def __str__(_self):
return f"[{spider}] {method.__name__} ({desc})"
name = f"{spider}_{method.__name__}"
setattr(ContractTestCase, name, lambda x: x)
return ContractTestCase(name)