summaryrefslogtreecommitdiff
path: root/venv/lib/python3.11/site-packages/websockets/legacy/http.py
diff options
context:
space:
mode:
Diffstat (limited to 'venv/lib/python3.11/site-packages/websockets/legacy/http.py')
-rw-r--r--venv/lib/python3.11/site-packages/websockets/legacy/http.py201
1 files changed, 201 insertions, 0 deletions
diff --git a/venv/lib/python3.11/site-packages/websockets/legacy/http.py b/venv/lib/python3.11/site-packages/websockets/legacy/http.py
new file mode 100644
index 0000000..2ac7f70
--- /dev/null
+++ b/venv/lib/python3.11/site-packages/websockets/legacy/http.py
@@ -0,0 +1,201 @@
+from __future__ import annotations
+
+import asyncio
+import re
+from typing import Tuple
+
+from ..datastructures import Headers
+from ..exceptions import SecurityError
+
+
+__all__ = ["read_request", "read_response"]
+
+MAX_HEADERS = 128
+MAX_LINE = 8192
+
+
+def d(value: bytes) -> str:
+ """
+ Decode a bytestring for interpolating into an error message.
+
+ """
+ return value.decode(errors="backslashreplace")
+
+
+# See https://www.rfc-editor.org/rfc/rfc7230.html#appendix-B.
+
+# Regex for validating header names.
+
+_token_re = re.compile(rb"[-!#$%&\'*+.^_`|~0-9a-zA-Z]+")
+
+# Regex for validating header values.
+
+# We don't attempt to support obsolete line folding.
+
+# Include HTAB (\x09), SP (\x20), VCHAR (\x21-\x7e), obs-text (\x80-\xff).
+
+# The ABNF is complicated because it attempts to express that optional
+# whitespace is ignored. We strip whitespace and don't revalidate that.
+
+# See also https://www.rfc-editor.org/errata_search.php?rfc=7230&eid=4189
+
+_value_re = re.compile(rb"[\x09\x20-\x7e\x80-\xff]*")
+
+
+async def read_request(stream: asyncio.StreamReader) -> Tuple[str, Headers]:
+ """
+ Read an HTTP/1.1 GET request and return ``(path, headers)``.
+
+ ``path`` isn't URL-decoded or validated in any way.
+
+ ``path`` and ``headers`` are expected to contain only ASCII characters.
+ Other characters are represented with surrogate escapes.
+
+ :func:`read_request` doesn't attempt to read the request body because
+ WebSocket handshake requests don't have one. If the request contains a
+ body, it may be read from ``stream`` after this coroutine returns.
+
+ Args:
+ stream: Input to read the request from.
+
+ Raises:
+ EOFError: If the connection is closed without a full HTTP request.
+ SecurityError: If the request exceeds a security limit.
+ ValueError: If the request isn't well formatted.
+
+ """
+ # https://www.rfc-editor.org/rfc/rfc7230.html#section-3.1.1
+
+ # Parsing is simple because fixed values are expected for method and
+ # version and because path isn't checked. Since WebSocket software tends
+ # to implement HTTP/1.1 strictly, there's little need for lenient parsing.
+
+ try:
+ request_line = await read_line(stream)
+ except EOFError as exc:
+ raise EOFError("connection closed while reading HTTP request line") from exc
+
+ try:
+ method, raw_path, version = request_line.split(b" ", 2)
+ except ValueError: # not enough values to unpack (expected 3, got 1-2)
+ raise ValueError(f"invalid HTTP request line: {d(request_line)}") from None
+
+ if method != b"GET":
+ raise ValueError(f"unsupported HTTP method: {d(method)}")
+ if version != b"HTTP/1.1":
+ raise ValueError(f"unsupported HTTP version: {d(version)}")
+ path = raw_path.decode("ascii", "surrogateescape")
+
+ headers = await read_headers(stream)
+
+ return path, headers
+
+
+async def read_response(stream: asyncio.StreamReader) -> Tuple[int, str, Headers]:
+ """
+ Read an HTTP/1.1 response and return ``(status_code, reason, headers)``.
+
+ ``reason`` and ``headers`` are expected to contain only ASCII characters.
+ Other characters are represented with surrogate escapes.
+
+ :func:`read_request` doesn't attempt to read the response body because
+ WebSocket handshake responses don't have one. If the response contains a
+ body, it may be read from ``stream`` after this coroutine returns.
+
+ Args:
+ stream: Input to read the response from.
+
+ Raises:
+ EOFError: If the connection is closed without a full HTTP response.
+ SecurityError: If the response exceeds a security limit.
+ ValueError: If the response isn't well formatted.
+
+ """
+ # https://www.rfc-editor.org/rfc/rfc7230.html#section-3.1.2
+
+ # As in read_request, parsing is simple because a fixed value is expected
+ # for version, status_code is a 3-digit number, and reason can be ignored.
+
+ try:
+ status_line = await read_line(stream)
+ except EOFError as exc:
+ raise EOFError("connection closed while reading HTTP status line") from exc
+
+ try:
+ version, raw_status_code, raw_reason = status_line.split(b" ", 2)
+ except ValueError: # not enough values to unpack (expected 3, got 1-2)
+ raise ValueError(f"invalid HTTP status line: {d(status_line)}") from None
+
+ if version != b"HTTP/1.1":
+ raise ValueError(f"unsupported HTTP version: {d(version)}")
+ try:
+ status_code = int(raw_status_code)
+ except ValueError: # invalid literal for int() with base 10
+ raise ValueError(f"invalid HTTP status code: {d(raw_status_code)}") from None
+ if not 100 <= status_code < 1000:
+ raise ValueError(f"unsupported HTTP status code: {d(raw_status_code)}")
+ if not _value_re.fullmatch(raw_reason):
+ raise ValueError(f"invalid HTTP reason phrase: {d(raw_reason)}")
+ reason = raw_reason.decode()
+
+ headers = await read_headers(stream)
+
+ return status_code, reason, headers
+
+
+async def read_headers(stream: asyncio.StreamReader) -> Headers:
+ """
+ Read HTTP headers from ``stream``.
+
+ Non-ASCII characters are represented with surrogate escapes.
+
+ """
+ # https://www.rfc-editor.org/rfc/rfc7230.html#section-3.2
+
+ # We don't attempt to support obsolete line folding.
+
+ headers = Headers()
+ for _ in range(MAX_HEADERS + 1):
+ try:
+ line = await read_line(stream)
+ except EOFError as exc:
+ raise EOFError("connection closed while reading HTTP headers") from exc
+ if line == b"":
+ break
+
+ try:
+ raw_name, raw_value = line.split(b":", 1)
+ except ValueError: # not enough values to unpack (expected 2, got 1)
+ raise ValueError(f"invalid HTTP header line: {d(line)}") from None
+ if not _token_re.fullmatch(raw_name):
+ raise ValueError(f"invalid HTTP header name: {d(raw_name)}")
+ raw_value = raw_value.strip(b" \t")
+ if not _value_re.fullmatch(raw_value):
+ raise ValueError(f"invalid HTTP header value: {d(raw_value)}")
+
+ name = raw_name.decode("ascii") # guaranteed to be ASCII at this point
+ value = raw_value.decode("ascii", "surrogateescape")
+ headers[name] = value
+
+ else:
+ raise SecurityError("too many HTTP headers")
+
+ return headers
+
+
+async def read_line(stream: asyncio.StreamReader) -> bytes:
+ """
+ Read a single line from ``stream``.
+
+ CRLF is stripped from the return value.
+
+ """
+ # Security: this is bounded by the StreamReader's limit (default = 32 KiB).
+ line = await stream.readline()
+ # Security: this guarantees header values are small (hard-coded = 8 KiB)
+ if len(line) > MAX_LINE:
+ raise SecurityError("line too long")
+ # Not mandatory but safe - https://www.rfc-editor.org/rfc/rfc7230.html#section-3.5
+ if not line.endswith(b"\r\n"):
+ raise EOFError("line without CRLF")
+ return line[:-2]