"""
:mod:`websockets.http` module provides basic HTTP/1.1 support. It is merely
:adequate for WebSocket handshake messages.
These APIs cannot be imported from :mod:`websockets`. They must be imported
from :mod:`websockets.http`.
"""
import asyncio
import re
import sys
from typing import (
Any,
Dict,
Iterable,
Iterator,
List,
Mapping,
MutableMapping,
Tuple,
Union,
)
from .version import version as websockets_version
__all__ = [
"read_request",
"read_response",
"Headers",
"MultipleValuesError",
"USER_AGENT",
]
MAX_HEADERS = 256
MAX_LINE = 4096
USER_AGENT = f"Python/{sys.version[:3]} websockets/{websockets_version}"
def d(value: bytes) -> str:
"""
Decode a bytestring for interpolating into an error message.
"""
return value.decode(errors="backslashreplace")
# See https://tools.ietf.org/html/rfc7230#appendix-B.
# Regex for validating header names.
_token_re = re.compile(rb"[-!#$%&\'*+.^_`|~0-9a-zA-Z]+")
# Regex for validating header values.
# We don't attempt to support obsolete line folding.
# Include HTAB (\x09), SP (\x20), VCHAR (\x21-\x7e), obs-text (\x80-\xff).
# The ABNF is complicated because it attempts to express that optional
# whitespace is ignored. We strip whitespace and don't revalidate that.
# See also https://www.rfc-editor.org/errata_search.php?rfc=7230&eid=4189
_value_re = re.compile(rb"[\x09\x20-\x7e\x80-\xff]*")
[docs]async def read_request(stream: asyncio.StreamReader) -> Tuple[str, "Headers"]:
"""
Read an HTTP/1.1 GET request and return ``(path, headers)``.
``path`` isn't URL-decoded or validated in any way.
``path`` and ``headers`` are expected to contain only ASCII characters.
Other characters are represented with surrogate escapes.
:func:`read_request` doesn't attempt to read the request body because
WebSocket handshake requests don't have one. If the request contains a
body, it may be read from ``stream`` after this coroutine returns.
:param stream: input to read the request from
:raises EOFError: if the connection is closed without a full HTTP request
:raises SecurityError: if the request exceeds a security limit
:raises ValueError: if the request isn't well formatted
"""
# https://tools.ietf.org/html/rfc7230#section-3.1.1
# Parsing is simple because fixed values are expected for method and
# version and because path isn't checked. Since WebSocket software tends
# to implement HTTP/1.1 strictly, there's little need for lenient parsing.
try:
request_line = await read_line(stream)
except EOFError as exc:
raise EOFError("connection closed while reading HTTP request line") from exc
try:
method, raw_path, version = request_line.split(b" ", 2)
except ValueError: # not enough values to unpack (expected 3, got 1-2)
raise ValueError(f"invalid HTTP request line: {d(request_line)}") from None
if method != b"GET":
raise ValueError(f"unsupported HTTP method: {d(method)}")
if version != b"HTTP/1.1":
raise ValueError(f"unsupported HTTP version: {d(version)}")
path = raw_path.decode("ascii", "surrogateescape")
headers = await read_headers(stream)
return path, headers
[docs]async def read_response(stream: asyncio.StreamReader) -> Tuple[int, str, "Headers"]:
"""
Read an HTTP/1.1 response and return ``(status_code, reason, headers)``.
``reason`` and ``headers`` are expected to contain only ASCII characters.
Other characters are represented with surrogate escapes.
:func:`read_request` doesn't attempt to read the response body because
WebSocket handshake responses don't have one. If the response contains a
body, it may be read from ``stream`` after this coroutine returns.
:param stream: input to read the response from
:raises EOFError: if the connection is closed without a full HTTP response
:raises SecurityError: if the response exceeds a security limit
:raises ValueError: if the response isn't well formatted
"""
# https://tools.ietf.org/html/rfc7230#section-3.1.2
# As in read_request, parsing is simple because a fixed value is expected
# for version, status_code is a 3-digit number, and reason can be ignored.
try:
status_line = await read_line(stream)
except EOFError as exc:
raise EOFError("connection closed while reading HTTP status line") from exc
try:
version, raw_status_code, raw_reason = status_line.split(b" ", 2)
except ValueError: # not enough values to unpack (expected 3, got 1-2)
raise ValueError(f"invalid HTTP status line: {d(status_line)}") from None
if version != b"HTTP/1.1":
raise ValueError(f"unsupported HTTP version: {d(version)}")
try:
status_code = int(raw_status_code)
except ValueError: # invalid literal for int() with base 10
raise ValueError(f"invalid HTTP status code: {d(raw_status_code)}") from None
if not 100 <= status_code < 1000:
raise ValueError(f"unsupported HTTP status code: {d(raw_status_code)}")
if not _value_re.fullmatch(raw_reason):
raise ValueError(f"invalid HTTP reason phrase: {d(raw_reason)}")
reason = raw_reason.decode()
headers = await read_headers(stream)
return status_code, reason, headers
async def read_headers(stream: asyncio.StreamReader) -> "Headers":
"""
Read HTTP headers from ``stream``.
Non-ASCII characters are represented with surrogate escapes.
"""
# https://tools.ietf.org/html/rfc7230#section-3.2
# We don't attempt to support obsolete line folding.
headers = Headers()
for _ in range(MAX_HEADERS + 1):
try:
line = await read_line(stream)
except EOFError as exc:
raise EOFError("connection closed while reading HTTP headers") from exc
if line == b"":
break
try:
raw_name, raw_value = line.split(b":", 1)
except ValueError: # not enough values to unpack (expected 2, got 1)
raise ValueError(f"invalid HTTP header line: {d(line)}") from None
if not _token_re.fullmatch(raw_name):
raise ValueError(f"invalid HTTP header name: {d(raw_name)}")
raw_value = raw_value.strip(b" \t")
if not _value_re.fullmatch(raw_value):
raise ValueError(f"invalid HTTP header value: {d(raw_value)}")
name = raw_name.decode("ascii") # guaranteed to be ASCII at this point
value = raw_value.decode("ascii", "surrogateescape")
headers[name] = value
else:
raise websockets.exceptions.SecurityError("too many HTTP headers")
return headers
async def read_line(stream: asyncio.StreamReader) -> bytes:
"""
Read a single line from ``stream``.
CRLF is stripped from the return value.
"""
# Security: this is bounded by the StreamReader's limit (default = 32 KiB).
line = await stream.readline()
# Security: this guarantees header values are small (hard-coded = 4 KiB)
if len(line) > MAX_LINE:
raise websockets.exceptions.SecurityError("line too long")
# Not mandatory but safe - https://tools.ietf.org/html/rfc7230#section-3.5
if not line.endswith(b"\r\n"):
raise EOFError("line without CRLF")
return line[:-2]
[docs]class MultipleValuesError(LookupError):
"""
Exception raised when :class:`Headers` has more than one value for a key.
"""
def __str__(self) -> str:
# Implement the same logic as KeyError_str in Objects/exceptions.c.
if len(self.args) == 1:
return repr(self.args[0])
return super().__str__()
HeadersLike = Union[Headers, Mapping[str, str], Iterable[Tuple[str, str]]]
# at the bottom to allow circular import, because AbortHandshake depends on HeadersLike
import websockets.exceptions # isort:skip # noqa