diff options
author | cyfraeviolae <cyfraeviolae> | 2024-04-03 03:17:55 -0400 |
---|---|---|
committer | cyfraeviolae <cyfraeviolae> | 2024-04-03 03:17:55 -0400 |
commit | 12cf076118570eebbff08c6b3090e0d4798447a1 (patch) | |
tree | 3ba25e17e3c3a5e82316558ba3864b955919ff72 /venv/lib/python3.11/site-packages/mdurl | |
parent | c45662ff3923b34614ddcc8feb9195541166dcc5 (diff) |
no venv
Diffstat (limited to 'venv/lib/python3.11/site-packages/mdurl')
13 files changed, 0 insertions, 553 deletions
diff --git a/venv/lib/python3.11/site-packages/mdurl/__init__.py b/venv/lib/python3.11/site-packages/mdurl/__init__.py deleted file mode 100644 index cdbb640..0000000 --- a/venv/lib/python3.11/site-packages/mdurl/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -__all__ = ( - "decode", - "DECODE_DEFAULT_CHARS", - "DECODE_COMPONENT_CHARS", - "encode", - "ENCODE_DEFAULT_CHARS", - "ENCODE_COMPONENT_CHARS", - "format", - "parse", - "URL", -) -__version__ = "0.1.2" # DO NOT EDIT THIS LINE MANUALLY. LET bump2version UTILITY DO IT - -from mdurl._decode import DECODE_COMPONENT_CHARS, DECODE_DEFAULT_CHARS, decode -from mdurl._encode import ENCODE_COMPONENT_CHARS, ENCODE_DEFAULT_CHARS, encode -from mdurl._format import format -from mdurl._parse import url_parse as parse -from mdurl._url import URL diff --git a/venv/lib/python3.11/site-packages/mdurl/__pycache__/__init__.cpython-311.pyc b/venv/lib/python3.11/site-packages/mdurl/__pycache__/__init__.cpython-311.pyc Binary files differdeleted file mode 100644 index da00954..0000000 --- a/venv/lib/python3.11/site-packages/mdurl/__pycache__/__init__.cpython-311.pyc +++ /dev/null diff --git a/venv/lib/python3.11/site-packages/mdurl/__pycache__/_decode.cpython-311.pyc b/venv/lib/python3.11/site-packages/mdurl/__pycache__/_decode.cpython-311.pyc Binary files differdeleted file mode 100644 index 0a2afd4..0000000 --- a/venv/lib/python3.11/site-packages/mdurl/__pycache__/_decode.cpython-311.pyc +++ /dev/null diff --git a/venv/lib/python3.11/site-packages/mdurl/__pycache__/_encode.cpython-311.pyc b/venv/lib/python3.11/site-packages/mdurl/__pycache__/_encode.cpython-311.pyc Binary files differdeleted file mode 100644 index 0e78e0e..0000000 --- a/venv/lib/python3.11/site-packages/mdurl/__pycache__/_encode.cpython-311.pyc +++ /dev/null diff --git a/venv/lib/python3.11/site-packages/mdurl/__pycache__/_format.cpython-311.pyc b/venv/lib/python3.11/site-packages/mdurl/__pycache__/_format.cpython-311.pyc Binary files differdeleted file mode 100644 index 8f93088..0000000 --- a/venv/lib/python3.11/site-packages/mdurl/__pycache__/_format.cpython-311.pyc +++ /dev/null diff --git a/venv/lib/python3.11/site-packages/mdurl/__pycache__/_parse.cpython-311.pyc b/venv/lib/python3.11/site-packages/mdurl/__pycache__/_parse.cpython-311.pyc Binary files differdeleted file mode 100644 index d455485..0000000 --- a/venv/lib/python3.11/site-packages/mdurl/__pycache__/_parse.cpython-311.pyc +++ /dev/null diff --git a/venv/lib/python3.11/site-packages/mdurl/__pycache__/_url.cpython-311.pyc b/venv/lib/python3.11/site-packages/mdurl/__pycache__/_url.cpython-311.pyc Binary files differdeleted file mode 100644 index 7679d66..0000000 --- a/venv/lib/python3.11/site-packages/mdurl/__pycache__/_url.cpython-311.pyc +++ /dev/null diff --git a/venv/lib/python3.11/site-packages/mdurl/_decode.py b/venv/lib/python3.11/site-packages/mdurl/_decode.py deleted file mode 100644 index 9b50a2d..0000000 --- a/venv/lib/python3.11/site-packages/mdurl/_decode.py +++ /dev/null @@ -1,104 +0,0 @@ -from __future__ import annotations - -from collections.abc import Sequence -import functools -import re - -DECODE_DEFAULT_CHARS = ";/?:@&=+$,#" -DECODE_COMPONENT_CHARS = "" - -decode_cache: dict[str, list[str]] = {} - - -def get_decode_cache(exclude: str) -> Sequence[str]: - if exclude in decode_cache: - return decode_cache[exclude] - - cache: list[str] = [] - decode_cache[exclude] = cache - - for i in range(128): - ch = chr(i) - cache.append(ch) - - for i in range(len(exclude)): - ch_code = ord(exclude[i]) - cache[ch_code] = "%" + ("0" + hex(ch_code)[2:].upper())[-2:] - - return cache - - -# Decode percent-encoded string. -# -def decode(string: str, exclude: str = DECODE_DEFAULT_CHARS) -> str: - cache = get_decode_cache(exclude) - repl_func = functools.partial(repl_func_with_cache, cache=cache) - return re.sub(r"(%[a-f0-9]{2})+", repl_func, string, flags=re.IGNORECASE) - - -def repl_func_with_cache(match: re.Match, cache: Sequence[str]) -> str: - seq = match.group() - result = "" - - i = 0 - l = len(seq) # noqa: E741 - while i < l: - b1 = int(seq[i + 1 : i + 3], 16) - - if b1 < 0x80: - result += cache[b1] - i += 3 # emulate JS for loop statement3 - continue - - if (b1 & 0xE0) == 0xC0 and (i + 3 < l): - # 110xxxxx 10xxxxxx - b2 = int(seq[i + 4 : i + 6], 16) - - if (b2 & 0xC0) == 0x80: - all_bytes = bytes((b1, b2)) - try: - result += all_bytes.decode() - except UnicodeDecodeError: - result += "\ufffd" * 2 - - i += 3 - i += 3 # emulate JS for loop statement3 - continue - - if (b1 & 0xF0) == 0xE0 and (i + 6 < l): - # 1110xxxx 10xxxxxx 10xxxxxx - b2 = int(seq[i + 4 : i + 6], 16) - b3 = int(seq[i + 7 : i + 9], 16) - - if (b2 & 0xC0) == 0x80 and (b3 & 0xC0) == 0x80: - all_bytes = bytes((b1, b2, b3)) - try: - result += all_bytes.decode() - except UnicodeDecodeError: - result += "\ufffd" * 3 - - i += 6 - i += 3 # emulate JS for loop statement3 - continue - - if (b1 & 0xF8) == 0xF0 and (i + 9 < l): - # 111110xx 10xxxxxx 10xxxxxx 10xxxxxx - b2 = int(seq[i + 4 : i + 6], 16) - b3 = int(seq[i + 7 : i + 9], 16) - b4 = int(seq[i + 10 : i + 12], 16) - - if (b2 & 0xC0) == 0x80 and (b3 & 0xC0) == 0x80 and (b4 & 0xC0) == 0x80: - all_bytes = bytes((b1, b2, b3, b4)) - try: - result += all_bytes.decode() - except UnicodeDecodeError: - result += "\ufffd" * 4 - - i += 9 - i += 3 # emulate JS for loop statement3 - continue - - result += "\ufffd" - i += 3 # emulate JS for loop statement3 - - return result diff --git a/venv/lib/python3.11/site-packages/mdurl/_encode.py b/venv/lib/python3.11/site-packages/mdurl/_encode.py deleted file mode 100644 index bc2e5b9..0000000 --- a/venv/lib/python3.11/site-packages/mdurl/_encode.py +++ /dev/null @@ -1,85 +0,0 @@ -from __future__ import annotations - -from collections.abc import Sequence -from string import ascii_letters, digits, hexdigits -from urllib.parse import quote as encode_uri_component - -ASCII_LETTERS_AND_DIGITS = ascii_letters + digits - -ENCODE_DEFAULT_CHARS = ";/?:@&=+$,-_.!~*'()#" -ENCODE_COMPONENT_CHARS = "-_.!~*'()" - -encode_cache: dict[str, list[str]] = {} - - -# Create a lookup array where anything but characters in `chars` string -# and alphanumeric chars is percent-encoded. -def get_encode_cache(exclude: str) -> Sequence[str]: - if exclude in encode_cache: - return encode_cache[exclude] - - cache: list[str] = [] - encode_cache[exclude] = cache - - for i in range(128): - ch = chr(i) - - if ch in ASCII_LETTERS_AND_DIGITS: - # always allow unencoded alphanumeric characters - cache.append(ch) - else: - cache.append("%" + ("0" + hex(i)[2:].upper())[-2:]) - - for i in range(len(exclude)): - cache[ord(exclude[i])] = exclude[i] - - return cache - - -# Encode unsafe characters with percent-encoding, skipping already -# encoded sequences. -# -# - string - string to encode -# - exclude - list of characters to ignore (in addition to a-zA-Z0-9) -# - keepEscaped - don't encode '%' in a correct escape sequence (default: true) -def encode( - string: str, exclude: str = ENCODE_DEFAULT_CHARS, *, keep_escaped: bool = True -) -> str: - result = "" - - cache = get_encode_cache(exclude) - - l = len(string) # noqa: E741 - i = 0 - while i < l: - code = ord(string[i]) - - # % - if keep_escaped and code == 0x25 and i + 2 < l: - if all(c in hexdigits for c in string[i + 1 : i + 3]): - result += string[i : i + 3] - i += 2 - i += 1 # JS for loop statement3 - continue - - if code < 128: - result += cache[code] - i += 1 # JS for loop statement3 - continue - - if code >= 0xD800 and code <= 0xDFFF: - if code >= 0xD800 and code <= 0xDBFF and i + 1 < l: - next_code = ord(string[i + 1]) - if next_code >= 0xDC00 and next_code <= 0xDFFF: - result += encode_uri_component(string[i] + string[i + 1]) - i += 1 - i += 1 # JS for loop statement3 - continue - result += "%EF%BF%BD" - i += 1 # JS for loop statement3 - continue - - result += encode_uri_component(string[i]) - i += 1 # JS for loop statement3 - - return result diff --git a/venv/lib/python3.11/site-packages/mdurl/_format.py b/venv/lib/python3.11/site-packages/mdurl/_format.py deleted file mode 100644 index 12524ca..0000000 --- a/venv/lib/python3.11/site-packages/mdurl/_format.py +++ /dev/null @@ -1,27 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - from mdurl._url import URL - - -def format(url: URL) -> str: # noqa: A001 - result = "" - - result += url.protocol or "" - result += "//" if url.slashes else "" - result += url.auth + "@" if url.auth else "" - - if url.hostname and ":" in url.hostname: - # ipv6 address - result += "[" + url.hostname + "]" - else: - result += url.hostname or "" - - result += ":" + url.port if url.port else "" - result += url.pathname or "" - result += url.search or "" - result += url.hash or "" - - return result diff --git a/venv/lib/python3.11/site-packages/mdurl/_parse.py b/venv/lib/python3.11/site-packages/mdurl/_parse.py deleted file mode 100644 index ffeeac7..0000000 --- a/venv/lib/python3.11/site-packages/mdurl/_parse.py +++ /dev/null @@ -1,304 +0,0 @@ -# Copyright Joyent, Inc. and other Node contributors. -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to permit -# persons to whom the Software is furnished to do so, subject to the -# following conditions: -# -# The above copyright notice and this permission notice shall be included -# in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN -# NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, -# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR -# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE -# USE OR OTHER DEALINGS IN THE SOFTWARE. - - -# Changes from joyent/node: -# -# 1. No leading slash in paths, -# e.g. in `url.parse('http://foo?bar')` pathname is ``, not `/` -# -# 2. Backslashes are not replaced with slashes, -# so `http:\\example.org\` is treated like a relative path -# -# 3. Trailing colon is treated like a part of the path, -# i.e. in `http://example.org:foo` pathname is `:foo` -# -# 4. Nothing is URL-encoded in the resulting object, -# (in joyent/node some chars in auth and paths are encoded) -# -# 5. `url.parse()` does not have `parseQueryString` argument -# -# 6. Removed extraneous result properties: `host`, `path`, `query`, etc., -# which can be constructed using other parts of the url. - -from __future__ import annotations - -from collections import defaultdict -import re - -from mdurl._url import URL - -# Reference: RFC 3986, RFC 1808, RFC 2396 - -# define these here so at least they only have to be -# compiled once on the first module load. -PROTOCOL_PATTERN = re.compile(r"^([a-z0-9.+-]+:)", flags=re.IGNORECASE) -PORT_PATTERN = re.compile(r":[0-9]*$") - -# Special case for a simple path URL -SIMPLE_PATH_PATTERN = re.compile(r"^(//?(?!/)[^?\s]*)(\?[^\s]*)?$") - -# RFC 2396: characters reserved for delimiting URLs. -# We actually just auto-escape these. -DELIMS = ("<", ">", '"', "`", " ", "\r", "\n", "\t") - -# RFC 2396: characters not allowed for various reasons. -UNWISE = ("{", "}", "|", "\\", "^", "`") + DELIMS - -# Allowed by RFCs, but cause of XSS attacks. Always escape these. -AUTO_ESCAPE = ("'",) + UNWISE -# Characters that are never ever allowed in a hostname. -# Note that any invalid chars are also handled, but these -# are the ones that are *expected* to be seen, so we fast-path -# them. -NON_HOST_CHARS = ("%", "/", "?", ";", "#") + AUTO_ESCAPE -HOST_ENDING_CHARS = ("/", "?", "#") -HOSTNAME_MAX_LEN = 255 -HOSTNAME_PART_PATTERN = re.compile(r"^[+a-z0-9A-Z_-]{0,63}$") -HOSTNAME_PART_START = re.compile(r"^([+a-z0-9A-Z_-]{0,63})(.*)$") -# protocols that can allow "unsafe" and "unwise" chars. - -# protocols that never have a hostname. -HOSTLESS_PROTOCOL = defaultdict( - bool, - { - "javascript": True, - "javascript:": True, - }, -) -# protocols that always contain a // bit. -SLASHED_PROTOCOL = defaultdict( - bool, - { - "http": True, - "https": True, - "ftp": True, - "gopher": True, - "file": True, - "http:": True, - "https:": True, - "ftp:": True, - "gopher:": True, - "file:": True, - }, -) - - -class MutableURL: - def __init__(self) -> None: - self.protocol: str | None = None - self.slashes: bool = False - self.auth: str | None = None - self.port: str | None = None - self.hostname: str | None = None - self.hash: str | None = None - self.search: str | None = None - self.pathname: str | None = None - - def parse(self, url: str, slashes_denote_host: bool) -> "MutableURL": - lower_proto = "" - slashes = False - rest = url - - # trim before proceeding. - # This is to support parse stuff like " http://foo.com \n" - rest = rest.strip() - - if not slashes_denote_host and len(url.split("#")) == 1: - # Try fast path regexp - simple_path = SIMPLE_PATH_PATTERN.match(rest) - if simple_path: - self.pathname = simple_path.group(1) - if simple_path.group(2): - self.search = simple_path.group(2) - return self - - proto = "" - proto_match = PROTOCOL_PATTERN.match(rest) - if proto_match: - proto = proto_match.group() - lower_proto = proto.lower() - self.protocol = proto - rest = rest[len(proto) :] - - # figure out if it's got a host - # user@server is *always* interpreted as a hostname, and url - # resolution will treat //foo/bar as host=foo,path=bar because that's - # how the browser resolves relative URLs. - if slashes_denote_host or proto or re.search(r"^//[^@/]+@[^@/]+", rest): - slashes = rest.startswith("//") - if slashes and not (proto and HOSTLESS_PROTOCOL[proto]): - rest = rest[2:] - self.slashes = True - - if not HOSTLESS_PROTOCOL[proto] and ( - slashes or (proto and not SLASHED_PROTOCOL[proto]) - ): - - # there's a hostname. - # the first instance of /, ?, ;, or # ends the host. - # - # If there is an @ in the hostname, then non-host chars *are* allowed - # to the left of the last @ sign, unless some host-ending character - # comes *before* the @-sign. - # URLs are obnoxious. - # - # ex: - # http://a@b@c/ => user:a@b host:c - # http://a@b?@c => user:a host:c path:/?@c - - # v0.12 TODO(isaacs): This is not quite how Chrome does things. - # Review our test case against browsers more comprehensively. - - # find the first instance of any hostEndingChars - host_end = -1 - for i in range(len(HOST_ENDING_CHARS)): - hec = rest.find(HOST_ENDING_CHARS[i]) - if hec != -1 and (host_end == -1 or hec < host_end): - host_end = hec - - # at this point, either we have an explicit point where the - # auth portion cannot go past, or the last @ char is the decider. - if host_end == -1: - # atSign can be anywhere. - at_sign = rest.rfind("@") - else: - # atSign must be in auth portion. - # http://a@b/c@d => host:b auth:a path:/c@d - at_sign = rest.rfind("@", 0, host_end + 1) - - # Now we have a portion which is definitely the auth. - # Pull that off. - if at_sign != -1: - auth = rest[:at_sign] - rest = rest[at_sign + 1 :] - self.auth = auth - - # the host is the remaining to the left of the first non-host char - host_end = -1 - for i in range(len(NON_HOST_CHARS)): - hec = rest.find(NON_HOST_CHARS[i]) - if hec != -1 and (host_end == -1 or hec < host_end): - host_end = hec - # if we still have not hit it, then the entire thing is a host. - if host_end == -1: - host_end = len(rest) - - if host_end > 0 and rest[host_end - 1] == ":": - host_end -= 1 - host = rest[:host_end] - rest = rest[host_end:] - - # pull out port. - self.parse_host(host) - - # we've indicated that there is a hostname, - # so even if it's empty, it has to be present. - self.hostname = self.hostname or "" - - # if hostname begins with [ and ends with ] - # assume that it's an IPv6 address. - ipv6_hostname = self.hostname.startswith("[") and self.hostname.endswith( - "]" - ) - - # validate a little. - if not ipv6_hostname: - hostparts = self.hostname.split(".") - l = len(hostparts) # noqa: E741 - i = 0 - while i < l: - part = hostparts[i] - if not part: - i += 1 # emulate statement3 in JS for loop - continue - if not HOSTNAME_PART_PATTERN.search(part): - newpart = "" - k = len(part) - j = 0 - while j < k: - if ord(part[j]) > 127: - # we replace non-ASCII char with a temporary placeholder - # we need this to make sure size of hostname is not - # broken by replacing non-ASCII by nothing - newpart += "x" - else: - newpart += part[j] - j += 1 # emulate statement3 in JS for loop - - # we test again with ASCII char only - if not HOSTNAME_PART_PATTERN.search(newpart): - valid_parts = hostparts[:i] - not_host = hostparts[i + 1 :] - bit = HOSTNAME_PART_START.search(part) - if bit: - valid_parts.append(bit.group(1)) - not_host.insert(0, bit.group(2)) - if not_host: - rest = ".".join(not_host) + rest - self.hostname = ".".join(valid_parts) - break - i += 1 # emulate statement3 in JS for loop - - if len(self.hostname) > HOSTNAME_MAX_LEN: - self.hostname = "" - - # strip [ and ] from the hostname - # the host field still retains them, though - if ipv6_hostname: - self.hostname = self.hostname[1:-1] - - # chop off from the tail first. - hash = rest.find("#") # noqa: A001 - if hash != -1: - # got a fragment string. - self.hash = rest[hash:] - rest = rest[:hash] - qm = rest.find("?") - if qm != -1: - self.search = rest[qm:] - rest = rest[:qm] - if rest: - self.pathname = rest - if SLASHED_PROTOCOL[lower_proto] and self.hostname and not self.pathname: - self.pathname = "" - - return self - - def parse_host(self, host: str) -> None: - port_match = PORT_PATTERN.search(host) - if port_match: - port = port_match.group() - if port != ":": - self.port = port[1:] - host = host[: -len(port)] - if host: - self.hostname = host - - -def url_parse(url: URL | str, *, slashes_denote_host: bool = False) -> URL: - if isinstance(url, URL): - return url - u = MutableURL() - u.parse(url, slashes_denote_host) - return URL( - u.protocol, u.slashes, u.auth, u.port, u.hostname, u.hash, u.search, u.pathname - ) diff --git a/venv/lib/python3.11/site-packages/mdurl/_url.py b/venv/lib/python3.11/site-packages/mdurl/_url.py deleted file mode 100644 index f866e7a..0000000 --- a/venv/lib/python3.11/site-packages/mdurl/_url.py +++ /dev/null @@ -1,14 +0,0 @@ -from __future__ import annotations - -from typing import NamedTuple - - -class URL(NamedTuple): - protocol: str | None - slashes: bool - auth: str | None - port: str | None - hostname: str | None - hash: str | None # noqa: A003 - search: str | None - pathname: str | None diff --git a/venv/lib/python3.11/site-packages/mdurl/py.typed b/venv/lib/python3.11/site-packages/mdurl/py.typed deleted file mode 100644 index 7632ecf..0000000 --- a/venv/lib/python3.11/site-packages/mdurl/py.typed +++ /dev/null @@ -1 +0,0 @@ -# Marker file for PEP 561 |