diff options
Diffstat (limited to '.venv/lib/python3.12/site-packages/websockets/http11.py')
-rw-r--r-- | .venv/lib/python3.12/site-packages/websockets/http11.py | 423 |
1 files changed, 423 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/websockets/http11.py b/.venv/lib/python3.12/site-packages/websockets/http11.py new file mode 100644 index 00000000..49d7b9a4 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/websockets/http11.py @@ -0,0 +1,423 @@ +from __future__ import annotations + +import dataclasses +import os +import re +import sys +import warnings +from collections.abc import Generator +from typing import Callable + +from .datastructures import Headers +from .exceptions import SecurityError +from .version import version as websockets_version + + +__all__ = [ + "SERVER", + "USER_AGENT", + "Request", + "Response", +] + + +PYTHON_VERSION = "{}.{}".format(*sys.version_info) + +# User-Agent header for HTTP requests. +USER_AGENT = os.environ.get( + "WEBSOCKETS_USER_AGENT", + f"Python/{PYTHON_VERSION} websockets/{websockets_version}", +) + +# Server header for HTTP responses. +SERVER = os.environ.get( + "WEBSOCKETS_SERVER", + f"Python/{PYTHON_VERSION} websockets/{websockets_version}", +) + +# Maximum total size of headers is around 128 * 8 KiB = 1 MiB. +MAX_NUM_HEADERS = int(os.environ.get("WEBSOCKETS_MAX_NUM_HEADERS", "128")) + +# Limit request line and header lines. 8KiB is the most common default +# configuration of popular HTTP servers. +MAX_LINE_LENGTH = int(os.environ.get("WEBSOCKETS_MAX_LINE_LENGTH", "8192")) + +# Support for HTTP response bodies is intended to read an error message +# returned by a server. It isn't designed to perform large file transfers. +MAX_BODY_SIZE = int(os.environ.get("WEBSOCKETS_MAX_BODY_SIZE", "1_048_576")) # 1 MiB + + +def d(value: bytes) -> str: + """ + Decode a bytestring for interpolating into an error message. + + """ + return value.decode(errors="backslashreplace") + + +# See https://datatracker.ietf.org/doc/html/rfc7230#appendix-B. + +# Regex for validating header names. + +_token_re = re.compile(rb"[-!#$%&\'*+.^_`|~0-9a-zA-Z]+") + +# Regex for validating header values. + +# We don't attempt to support obsolete line folding. + +# Include HTAB (\x09), SP (\x20), VCHAR (\x21-\x7e), obs-text (\x80-\xff). + +# The ABNF is complicated because it attempts to express that optional +# whitespace is ignored. We strip whitespace and don't revalidate that. + +# See also https://www.rfc-editor.org/errata_search.php?rfc=7230&eid=4189 + +_value_re = re.compile(rb"[\x09\x20-\x7e\x80-\xff]*") + + +@dataclasses.dataclass +class Request: + """ + WebSocket handshake request. + + Attributes: + path: Request path, including optional query. + headers: Request headers. + """ + + path: str + headers: Headers + # body isn't useful is the context of this library. + + _exception: Exception | None = None + + @property + def exception(self) -> Exception | None: # pragma: no cover + warnings.warn( # deprecated in 10.3 - 2022-04-17 + "Request.exception is deprecated; use ServerProtocol.handshake_exc instead", + DeprecationWarning, + ) + return self._exception + + @classmethod + def parse( + cls, + read_line: Callable[[int], Generator[None, None, bytes]], + ) -> Generator[None, None, Request]: + """ + Parse a WebSocket handshake request. + + This is a generator-based coroutine. + + The request path isn't URL-decoded or validated in any way. + + The request path and headers are expected to contain only ASCII + characters. Other characters are represented with surrogate escapes. + + :meth:`parse` doesn't attempt to read the request body because + WebSocket handshake requests don't have one. If the request contains a + body, it may be read from the data stream after :meth:`parse` returns. + + Args: + read_line: Generator-based coroutine that reads a LF-terminated + line or raises an exception if there isn't enough data + + Raises: + EOFError: If the connection is closed without a full HTTP request. + SecurityError: If the request exceeds a security limit. + ValueError: If the request isn't well formatted. + + """ + # https://datatracker.ietf.org/doc/html/rfc7230#section-3.1.1 + + # Parsing is simple because fixed values are expected for method and + # version and because path isn't checked. Since WebSocket software tends + # to implement HTTP/1.1 strictly, there's little need for lenient parsing. + + try: + request_line = yield from parse_line(read_line) + except EOFError as exc: + raise EOFError("connection closed while reading HTTP request line") from exc + + try: + method, raw_path, protocol = request_line.split(b" ", 2) + except ValueError: # not enough values to unpack (expected 3, got 1-2) + raise ValueError(f"invalid HTTP request line: {d(request_line)}") from None + if protocol != b"HTTP/1.1": + raise ValueError( + f"unsupported protocol; expected HTTP/1.1: {d(request_line)}" + ) + if method != b"GET": + raise ValueError(f"unsupported HTTP method; expected GET; got {d(method)}") + path = raw_path.decode("ascii", "surrogateescape") + + headers = yield from parse_headers(read_line) + + # https://datatracker.ietf.org/doc/html/rfc7230#section-3.3.3 + + if "Transfer-Encoding" in headers: + raise NotImplementedError("transfer codings aren't supported") + + if "Content-Length" in headers: + raise ValueError("unsupported request body") + + return cls(path, headers) + + def serialize(self) -> bytes: + """ + Serialize a WebSocket handshake request. + + """ + # Since the request line and headers only contain ASCII characters, + # we can keep this simple. + request = f"GET {self.path} HTTP/1.1\r\n".encode() + request += self.headers.serialize() + return request + + +@dataclasses.dataclass +class Response: + """ + WebSocket handshake response. + + Attributes: + status_code: Response code. + reason_phrase: Response reason. + headers: Response headers. + body: Response body. + + """ + + status_code: int + reason_phrase: str + headers: Headers + body: bytes = b"" + + _exception: Exception | None = None + + @property + def exception(self) -> Exception | None: # pragma: no cover + warnings.warn( # deprecated in 10.3 - 2022-04-17 + "Response.exception is deprecated; " + "use ClientProtocol.handshake_exc instead", + DeprecationWarning, + ) + return self._exception + + @classmethod + def parse( + cls, + read_line: Callable[[int], Generator[None, None, bytes]], + read_exact: Callable[[int], Generator[None, None, bytes]], + read_to_eof: Callable[[int], Generator[None, None, bytes]], + ) -> Generator[None, None, Response]: + """ + Parse a WebSocket handshake response. + + This is a generator-based coroutine. + + The reason phrase and headers are expected to contain only ASCII + characters. Other characters are represented with surrogate escapes. + + Args: + read_line: Generator-based coroutine that reads a LF-terminated + line or raises an exception if there isn't enough data. + read_exact: Generator-based coroutine that reads the requested + bytes or raises an exception if there isn't enough data. + read_to_eof: Generator-based coroutine that reads until the end + of the stream. + + Raises: + EOFError: If the connection is closed without a full HTTP response. + SecurityError: If the response exceeds a security limit. + LookupError: If the response isn't well formatted. + ValueError: If the response isn't well formatted. + + """ + # https://datatracker.ietf.org/doc/html/rfc7230#section-3.1.2 + + try: + status_line = yield from parse_line(read_line) + except EOFError as exc: + raise EOFError("connection closed while reading HTTP status line") from exc + + try: + protocol, raw_status_code, raw_reason = status_line.split(b" ", 2) + except ValueError: # not enough values to unpack (expected 3, got 1-2) + raise ValueError(f"invalid HTTP status line: {d(status_line)}") from None + if protocol != b"HTTP/1.1": + raise ValueError( + f"unsupported protocol; expected HTTP/1.1: {d(status_line)}" + ) + try: + status_code = int(raw_status_code) + except ValueError: # invalid literal for int() with base 10 + raise ValueError( + f"invalid status code; expected integer; got {d(raw_status_code)}" + ) from None + if not 100 <= status_code < 600: + raise ValueError( + f"invalid status code; expected 100–599; got {d(raw_status_code)}" + ) + if not _value_re.fullmatch(raw_reason): + raise ValueError(f"invalid HTTP reason phrase: {d(raw_reason)}") + reason = raw_reason.decode("ascii", "surrogateescape") + + headers = yield from parse_headers(read_line) + + body = yield from read_body( + status_code, headers, read_line, read_exact, read_to_eof + ) + + return cls(status_code, reason, headers, body) + + def serialize(self) -> bytes: + """ + Serialize a WebSocket handshake response. + + """ + # Since the status line and headers only contain ASCII characters, + # we can keep this simple. + response = f"HTTP/1.1 {self.status_code} {self.reason_phrase}\r\n".encode() + response += self.headers.serialize() + response += self.body + return response + + +def parse_line( + read_line: Callable[[int], Generator[None, None, bytes]], +) -> Generator[None, None, bytes]: + """ + Parse a single line. + + CRLF is stripped from the return value. + + Args: + read_line: Generator-based coroutine that reads a LF-terminated line + or raises an exception if there isn't enough data. + + Raises: + EOFError: If the connection is closed without a CRLF. + SecurityError: If the response exceeds a security limit. + + """ + try: + line = yield from read_line(MAX_LINE_LENGTH) + except RuntimeError: + raise SecurityError("line too long") + # Not mandatory but safe - https://datatracker.ietf.org/doc/html/rfc7230#section-3.5 + if not line.endswith(b"\r\n"): + raise EOFError("line without CRLF") + return line[:-2] + + +def parse_headers( + read_line: Callable[[int], Generator[None, None, bytes]], +) -> Generator[None, None, Headers]: + """ + Parse HTTP headers. + + Non-ASCII characters are represented with surrogate escapes. + + Args: + read_line: Generator-based coroutine that reads a LF-terminated line + or raises an exception if there isn't enough data. + + Raises: + EOFError: If the connection is closed without complete headers. + SecurityError: If the request exceeds a security limit. + ValueError: If the request isn't well formatted. + + """ + # https://datatracker.ietf.org/doc/html/rfc7230#section-3.2 + + # We don't attempt to support obsolete line folding. + + headers = Headers() + for _ in range(MAX_NUM_HEADERS + 1): + try: + line = yield from parse_line(read_line) + except EOFError as exc: + raise EOFError("connection closed while reading HTTP headers") from exc + if line == b"": + break + + try: + raw_name, raw_value = line.split(b":", 1) + except ValueError: # not enough values to unpack (expected 2, got 1) + raise ValueError(f"invalid HTTP header line: {d(line)}") from None + if not _token_re.fullmatch(raw_name): + raise ValueError(f"invalid HTTP header name: {d(raw_name)}") + raw_value = raw_value.strip(b" \t") + if not _value_re.fullmatch(raw_value): + raise ValueError(f"invalid HTTP header value: {d(raw_value)}") + + name = raw_name.decode("ascii") # guaranteed to be ASCII at this point + value = raw_value.decode("ascii", "surrogateescape") + headers[name] = value + + else: + raise SecurityError("too many HTTP headers") + + return headers + + +def read_body( + status_code: int, + headers: Headers, + read_line: Callable[[int], Generator[None, None, bytes]], + read_exact: Callable[[int], Generator[None, None, bytes]], + read_to_eof: Callable[[int], Generator[None, None, bytes]], +) -> Generator[None, None, bytes]: + # https://datatracker.ietf.org/doc/html/rfc7230#section-3.3.3 + + # Since websockets only does GET requests (no HEAD, no CONNECT), all + # responses except 1xx, 204, and 304 include a message body. + if 100 <= status_code < 200 or status_code == 204 or status_code == 304: + return b"" + + # MultipleValuesError is sufficiently unlikely that we don't attempt to + # handle it when accessing headers. Instead we document that its parent + # class, LookupError, may be raised. + # Conversions from str to int are protected by sys.set_int_max_str_digits.. + + elif (coding := headers.get("Transfer-Encoding")) is not None: + if coding != "chunked": + raise NotImplementedError(f"transfer coding {coding} isn't supported") + + body = b"" + while True: + chunk_size_line = yield from parse_line(read_line) + raw_chunk_size = chunk_size_line.split(b";", 1)[0] + # Set a lower limit than default_max_str_digits; 1 EB is plenty. + if len(raw_chunk_size) > 15: + str_chunk_size = raw_chunk_size.decode(errors="backslashreplace") + raise SecurityError(f"chunk too large: 0x{str_chunk_size} bytes") + chunk_size = int(raw_chunk_size, 16) + if chunk_size == 0: + break + if len(body) + chunk_size > MAX_BODY_SIZE: + raise SecurityError( + f"chunk too large: {chunk_size} bytes after {len(body)} bytes" + ) + body += yield from read_exact(chunk_size) + if (yield from read_exact(2)) != b"\r\n": + raise ValueError("chunk without CRLF") + # Read the trailer. + yield from parse_headers(read_line) + return body + + elif (raw_content_length := headers.get("Content-Length")) is not None: + # Set a lower limit than default_max_str_digits; 1 EiB is plenty. + if len(raw_content_length) > 18: + raise SecurityError(f"body too large: {raw_content_length} bytes") + content_length = int(raw_content_length) + if content_length > MAX_BODY_SIZE: + raise SecurityError(f"body too large: {content_length} bytes") + return (yield from read_exact(content_length)) + + else: + try: + return (yield from read_to_eof(MAX_BODY_SIZE)) + except RuntimeError: + raise SecurityError(f"body too large: over {MAX_BODY_SIZE} bytes") |