summaryrefslogtreecommitdiff
path: root/venv/lib/python3.11/site-packages/h11/_abnf.py
diff options
context:
space:
mode:
Diffstat (limited to 'venv/lib/python3.11/site-packages/h11/_abnf.py')
-rw-r--r--venv/lib/python3.11/site-packages/h11/_abnf.py132
1 files changed, 132 insertions, 0 deletions
diff --git a/venv/lib/python3.11/site-packages/h11/_abnf.py b/venv/lib/python3.11/site-packages/h11/_abnf.py
new file mode 100644
index 0000000..933587f
--- /dev/null
+++ b/venv/lib/python3.11/site-packages/h11/_abnf.py
@@ -0,0 +1,132 @@
+# We use native strings for all the re patterns, to take advantage of string
+# formatting, and then convert to bytestrings when compiling the final re
+# objects.
+
+# https://svn.tools.ietf.org/svn/wg/httpbis/specs/rfc7230.html#whitespace
+# OWS = *( SP / HTAB )
+# ; optional whitespace
+OWS = r"[ \t]*"
+
+# https://svn.tools.ietf.org/svn/wg/httpbis/specs/rfc7230.html#rule.token.separators
+# token = 1*tchar
+#
+# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*"
+# / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
+# / DIGIT / ALPHA
+# ; any VCHAR, except delimiters
+token = r"[-!#$%&'*+.^_`|~0-9a-zA-Z]+"
+
+# https://svn.tools.ietf.org/svn/wg/httpbis/specs/rfc7230.html#header.fields
+# field-name = token
+field_name = token
+
+# The standard says:
+#
+# field-value = *( field-content / obs-fold )
+# field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
+# field-vchar = VCHAR / obs-text
+# obs-fold = CRLF 1*( SP / HTAB )
+# ; obsolete line folding
+# ; see Section 3.2.4
+#
+# https://tools.ietf.org/html/rfc5234#appendix-B.1
+#
+# VCHAR = %x21-7E
+# ; visible (printing) characters
+#
+# https://svn.tools.ietf.org/svn/wg/httpbis/specs/rfc7230.html#rule.quoted-string
+# obs-text = %x80-FF
+#
+# However, the standard definition of field-content is WRONG! It disallows
+# fields containing a single visible character surrounded by whitespace,
+# e.g. "foo a bar".
+#
+# See: https://www.rfc-editor.org/errata_search.php?rfc=7230&eid=4189
+#
+# So our definition of field_content attempts to fix it up...
+#
+# Also, we allow lots of control characters, because apparently people assume
+# that they're legal in practice (e.g., google analytics makes cookies with
+# \x01 in them!):
+# https://github.com/python-hyper/h11/issues/57
+# We still don't allow NUL or whitespace, because those are often treated as
+# meta-characters and letting them through can lead to nasty issues like SSRF.
+vchar = r"[\x21-\x7e]"
+vchar_or_obs_text = r"[^\x00\s]"
+field_vchar = vchar_or_obs_text
+field_content = r"{field_vchar}+(?:[ \t]+{field_vchar}+)*".format(**globals())
+
+# We handle obs-fold at a different level, and our fixed-up field_content
+# already grows to swallow the whole value, so ? instead of *
+field_value = r"({field_content})?".format(**globals())
+
+# header-field = field-name ":" OWS field-value OWS
+header_field = (
+ r"(?P<field_name>{field_name})"
+ r":"
+ r"{OWS}"
+ r"(?P<field_value>{field_value})"
+ r"{OWS}".format(**globals())
+)
+
+# https://svn.tools.ietf.org/svn/wg/httpbis/specs/rfc7230.html#request.line
+#
+# request-line = method SP request-target SP HTTP-version CRLF
+# method = token
+# HTTP-version = HTTP-name "/" DIGIT "." DIGIT
+# HTTP-name = %x48.54.54.50 ; "HTTP", case-sensitive
+#
+# request-target is complicated (see RFC 7230 sec 5.3) -- could be path, full
+# URL, host+port (for connect), or even "*", but in any case we are guaranteed
+# that it contists of the visible printing characters.
+method = token
+request_target = r"{vchar}+".format(**globals())
+http_version = r"HTTP/(?P<http_version>[0-9]\.[0-9])"
+request_line = (
+ r"(?P<method>{method})"
+ r" "
+ r"(?P<target>{request_target})"
+ r" "
+ r"{http_version}".format(**globals())
+)
+
+# https://svn.tools.ietf.org/svn/wg/httpbis/specs/rfc7230.html#status.line
+#
+# status-line = HTTP-version SP status-code SP reason-phrase CRLF
+# status-code = 3DIGIT
+# reason-phrase = *( HTAB / SP / VCHAR / obs-text )
+status_code = r"[0-9]{3}"
+reason_phrase = r"([ \t]|{vchar_or_obs_text})*".format(**globals())
+status_line = (
+ r"{http_version}"
+ r" "
+ r"(?P<status_code>{status_code})"
+ # However, there are apparently a few too many servers out there that just
+ # leave out the reason phrase:
+ # https://github.com/scrapy/scrapy/issues/345#issuecomment-281756036
+ # https://github.com/seanmonstar/httparse/issues/29
+ # so make it optional. ?: is a non-capturing group.
+ r"(?: (?P<reason>{reason_phrase}))?".format(**globals())
+)
+
+HEXDIG = r"[0-9A-Fa-f]"
+# Actually
+#
+# chunk-size = 1*HEXDIG
+#
+# but we impose an upper-limit to avoid ridiculosity. len(str(2**64)) == 20
+chunk_size = r"({HEXDIG}){{1,20}}".format(**globals())
+# Actually
+#
+# chunk-ext = *( ";" chunk-ext-name [ "=" chunk-ext-val ] )
+#
+# but we aren't parsing the things so we don't really care.
+chunk_ext = r";.*"
+chunk_header = (
+ r"(?P<chunk_size>{chunk_size})"
+ r"(?P<chunk_ext>{chunk_ext})?"
+ r"{OWS}\r\n".format(
+ **globals()
+ ) # Even though the specification does not allow for extra whitespaces,
+ # we are lenient with trailing whitespaces because some servers on the wild use it.
+)