diff options
Diffstat (limited to 'venv/lib/python3.11/site-packages/h11/_abnf.py')
-rw-r--r-- | venv/lib/python3.11/site-packages/h11/_abnf.py | 132 |
1 files changed, 132 insertions, 0 deletions
diff --git a/venv/lib/python3.11/site-packages/h11/_abnf.py b/venv/lib/python3.11/site-packages/h11/_abnf.py new file mode 100644 index 0000000..933587f --- /dev/null +++ b/venv/lib/python3.11/site-packages/h11/_abnf.py @@ -0,0 +1,132 @@ +# We use native strings for all the re patterns, to take advantage of string +# formatting, and then convert to bytestrings when compiling the final re +# objects. + +# https://svn.tools.ietf.org/svn/wg/httpbis/specs/rfc7230.html#whitespace +# OWS = *( SP / HTAB ) +# ; optional whitespace +OWS = r"[ \t]*" + +# https://svn.tools.ietf.org/svn/wg/httpbis/specs/rfc7230.html#rule.token.separators +# token = 1*tchar +# +# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" +# / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~" +# / DIGIT / ALPHA +# ; any VCHAR, except delimiters +token = r"[-!#$%&'*+.^_`|~0-9a-zA-Z]+" + +# https://svn.tools.ietf.org/svn/wg/httpbis/specs/rfc7230.html#header.fields +# field-name = token +field_name = token + +# The standard says: +# +# field-value = *( field-content / obs-fold ) +# field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ] +# field-vchar = VCHAR / obs-text +# obs-fold = CRLF 1*( SP / HTAB ) +# ; obsolete line folding +# ; see Section 3.2.4 +# +# https://tools.ietf.org/html/rfc5234#appendix-B.1 +# +# VCHAR = %x21-7E +# ; visible (printing) characters +# +# https://svn.tools.ietf.org/svn/wg/httpbis/specs/rfc7230.html#rule.quoted-string +# obs-text = %x80-FF +# +# However, the standard definition of field-content is WRONG! It disallows +# fields containing a single visible character surrounded by whitespace, +# e.g. "foo a bar". +# +# See: https://www.rfc-editor.org/errata_search.php?rfc=7230&eid=4189 +# +# So our definition of field_content attempts to fix it up... +# +# Also, we allow lots of control characters, because apparently people assume +# that they're legal in practice (e.g., google analytics makes cookies with +# \x01 in them!): +# https://github.com/python-hyper/h11/issues/57 +# We still don't allow NUL or whitespace, because those are often treated as +# meta-characters and letting them through can lead to nasty issues like SSRF. +vchar = r"[\x21-\x7e]" +vchar_or_obs_text = r"[^\x00\s]" +field_vchar = vchar_or_obs_text +field_content = r"{field_vchar}+(?:[ \t]+{field_vchar}+)*".format(**globals()) + +# We handle obs-fold at a different level, and our fixed-up field_content +# already grows to swallow the whole value, so ? instead of * +field_value = r"({field_content})?".format(**globals()) + +# header-field = field-name ":" OWS field-value OWS +header_field = ( + r"(?P<field_name>{field_name})" + r":" + r"{OWS}" + r"(?P<field_value>{field_value})" + r"{OWS}".format(**globals()) +) + +# https://svn.tools.ietf.org/svn/wg/httpbis/specs/rfc7230.html#request.line +# +# request-line = method SP request-target SP HTTP-version CRLF +# method = token +# HTTP-version = HTTP-name "/" DIGIT "." DIGIT +# HTTP-name = %x48.54.54.50 ; "HTTP", case-sensitive +# +# request-target is complicated (see RFC 7230 sec 5.3) -- could be path, full +# URL, host+port (for connect), or even "*", but in any case we are guaranteed +# that it contists of the visible printing characters. +method = token +request_target = r"{vchar}+".format(**globals()) +http_version = r"HTTP/(?P<http_version>[0-9]\.[0-9])" +request_line = ( + r"(?P<method>{method})" + r" " + r"(?P<target>{request_target})" + r" " + r"{http_version}".format(**globals()) +) + +# https://svn.tools.ietf.org/svn/wg/httpbis/specs/rfc7230.html#status.line +# +# status-line = HTTP-version SP status-code SP reason-phrase CRLF +# status-code = 3DIGIT +# reason-phrase = *( HTAB / SP / VCHAR / obs-text ) +status_code = r"[0-9]{3}" +reason_phrase = r"([ \t]|{vchar_or_obs_text})*".format(**globals()) +status_line = ( + r"{http_version}" + r" " + r"(?P<status_code>{status_code})" + # However, there are apparently a few too many servers out there that just + # leave out the reason phrase: + # https://github.com/scrapy/scrapy/issues/345#issuecomment-281756036 + # https://github.com/seanmonstar/httparse/issues/29 + # so make it optional. ?: is a non-capturing group. + r"(?: (?P<reason>{reason_phrase}))?".format(**globals()) +) + +HEXDIG = r"[0-9A-Fa-f]" +# Actually +# +# chunk-size = 1*HEXDIG +# +# but we impose an upper-limit to avoid ridiculosity. len(str(2**64)) == 20 +chunk_size = r"({HEXDIG}){{1,20}}".format(**globals()) +# Actually +# +# chunk-ext = *( ";" chunk-ext-name [ "=" chunk-ext-val ] ) +# +# but we aren't parsing the things so we don't really care. +chunk_ext = r";.*" +chunk_header = ( + r"(?P<chunk_size>{chunk_size})" + r"(?P<chunk_ext>{chunk_ext})?" + r"{OWS}\r\n".format( + **globals() + ) # Even though the specification does not allow for extra whitespaces, + # we are lenient with trailing whitespaces because some servers on the wild use it. +) |