diff options
author | cyfraeviolae <cyfraeviolae> | 2024-04-03 03:10:44 -0400 |
---|---|---|
committer | cyfraeviolae <cyfraeviolae> | 2024-04-03 03:10:44 -0400 |
commit | 6d7ba58f880be618ade07f8ea080fe8c4bf8a896 (patch) | |
tree | b1c931051ffcebd2bd9d61d98d6233ffa289bbce /venv/lib/python3.11/site-packages/mdurl/_encode.py | |
parent | 4f884c9abc32990b4061a1bb6997b4b37e58ea0b (diff) |
venv
Diffstat (limited to 'venv/lib/python3.11/site-packages/mdurl/_encode.py')
-rw-r--r-- | venv/lib/python3.11/site-packages/mdurl/_encode.py | 85 |
1 files changed, 85 insertions, 0 deletions
diff --git a/venv/lib/python3.11/site-packages/mdurl/_encode.py b/venv/lib/python3.11/site-packages/mdurl/_encode.py new file mode 100644 index 0000000..bc2e5b9 --- /dev/null +++ b/venv/lib/python3.11/site-packages/mdurl/_encode.py @@ -0,0 +1,85 @@ +from __future__ import annotations + +from collections.abc import Sequence +from string import ascii_letters, digits, hexdigits +from urllib.parse import quote as encode_uri_component + +ASCII_LETTERS_AND_DIGITS = ascii_letters + digits + +ENCODE_DEFAULT_CHARS = ";/?:@&=+$,-_.!~*'()#" +ENCODE_COMPONENT_CHARS = "-_.!~*'()" + +encode_cache: dict[str, list[str]] = {} + + +# Create a lookup array where anything but characters in `chars` string +# and alphanumeric chars is percent-encoded. +def get_encode_cache(exclude: str) -> Sequence[str]: + if exclude in encode_cache: + return encode_cache[exclude] + + cache: list[str] = [] + encode_cache[exclude] = cache + + for i in range(128): + ch = chr(i) + + if ch in ASCII_LETTERS_AND_DIGITS: + # always allow unencoded alphanumeric characters + cache.append(ch) + else: + cache.append("%" + ("0" + hex(i)[2:].upper())[-2:]) + + for i in range(len(exclude)): + cache[ord(exclude[i])] = exclude[i] + + return cache + + +# Encode unsafe characters with percent-encoding, skipping already +# encoded sequences. +# +# - string - string to encode +# - exclude - list of characters to ignore (in addition to a-zA-Z0-9) +# - keepEscaped - don't encode '%' in a correct escape sequence (default: true) +def encode( + string: str, exclude: str = ENCODE_DEFAULT_CHARS, *, keep_escaped: bool = True +) -> str: + result = "" + + cache = get_encode_cache(exclude) + + l = len(string) # noqa: E741 + i = 0 + while i < l: + code = ord(string[i]) + + # % + if keep_escaped and code == 0x25 and i + 2 < l: + if all(c in hexdigits for c in string[i + 1 : i + 3]): + result += string[i : i + 3] + i += 2 + i += 1 # JS for loop statement3 + continue + + if code < 128: + result += cache[code] + i += 1 # JS for loop statement3 + continue + + if code >= 0xD800 and code <= 0xDFFF: + if code >= 0xD800 and code <= 0xDBFF and i + 1 < l: + next_code = ord(string[i + 1]) + if next_code >= 0xDC00 and next_code <= 0xDFFF: + result += encode_uri_component(string[i] + string[i + 1]) + i += 1 + i += 1 # JS for loop statement3 + continue + result += "%EF%BF%BD" + i += 1 # JS for loop statement3 + continue + + result += encode_uri_component(string[i]) + i += 1 # JS for loop statement3 + + return result |