summaryrefslogtreecommitdiff
path: root/venv/lib/python3.11/site-packages/mdurl/_decode.py
diff options
context:
space:
mode:
authorcyfraeviolae <cyfraeviolae>2024-04-03 03:10:44 -0400
committercyfraeviolae <cyfraeviolae>2024-04-03 03:10:44 -0400
commit6d7ba58f880be618ade07f8ea080fe8c4bf8a896 (patch)
treeb1c931051ffcebd2bd9d61d98d6233ffa289bbce /venv/lib/python3.11/site-packages/mdurl/_decode.py
parent4f884c9abc32990b4061a1bb6997b4b37e58ea0b (diff)
venv
Diffstat (limited to 'venv/lib/python3.11/site-packages/mdurl/_decode.py')
-rw-r--r--venv/lib/python3.11/site-packages/mdurl/_decode.py104
1 files changed, 104 insertions, 0 deletions
diff --git a/venv/lib/python3.11/site-packages/mdurl/_decode.py b/venv/lib/python3.11/site-packages/mdurl/_decode.py
new file mode 100644
index 0000000..9b50a2d
--- /dev/null
+++ b/venv/lib/python3.11/site-packages/mdurl/_decode.py
@@ -0,0 +1,104 @@
+from __future__ import annotations
+
+from collections.abc import Sequence
+import functools
+import re
+
+DECODE_DEFAULT_CHARS = ";/?:@&=+$,#"
+DECODE_COMPONENT_CHARS = ""
+
+decode_cache: dict[str, list[str]] = {}
+
+
+def get_decode_cache(exclude: str) -> Sequence[str]:
+ if exclude in decode_cache:
+ return decode_cache[exclude]
+
+ cache: list[str] = []
+ decode_cache[exclude] = cache
+
+ for i in range(128):
+ ch = chr(i)
+ cache.append(ch)
+
+ for i in range(len(exclude)):
+ ch_code = ord(exclude[i])
+ cache[ch_code] = "%" + ("0" + hex(ch_code)[2:].upper())[-2:]
+
+ return cache
+
+
+# Decode percent-encoded string.
+#
+def decode(string: str, exclude: str = DECODE_DEFAULT_CHARS) -> str:
+ cache = get_decode_cache(exclude)
+ repl_func = functools.partial(repl_func_with_cache, cache=cache)
+ return re.sub(r"(%[a-f0-9]{2})+", repl_func, string, flags=re.IGNORECASE)
+
+
+def repl_func_with_cache(match: re.Match, cache: Sequence[str]) -> str:
+ seq = match.group()
+ result = ""
+
+ i = 0
+ l = len(seq) # noqa: E741
+ while i < l:
+ b1 = int(seq[i + 1 : i + 3], 16)
+
+ if b1 < 0x80:
+ result += cache[b1]
+ i += 3 # emulate JS for loop statement3
+ continue
+
+ if (b1 & 0xE0) == 0xC0 and (i + 3 < l):
+ # 110xxxxx 10xxxxxx
+ b2 = int(seq[i + 4 : i + 6], 16)
+
+ if (b2 & 0xC0) == 0x80:
+ all_bytes = bytes((b1, b2))
+ try:
+ result += all_bytes.decode()
+ except UnicodeDecodeError:
+ result += "\ufffd" * 2
+
+ i += 3
+ i += 3 # emulate JS for loop statement3
+ continue
+
+ if (b1 & 0xF0) == 0xE0 and (i + 6 < l):
+ # 1110xxxx 10xxxxxx 10xxxxxx
+ b2 = int(seq[i + 4 : i + 6], 16)
+ b3 = int(seq[i + 7 : i + 9], 16)
+
+ if (b2 & 0xC0) == 0x80 and (b3 & 0xC0) == 0x80:
+ all_bytes = bytes((b1, b2, b3))
+ try:
+ result += all_bytes.decode()
+ except UnicodeDecodeError:
+ result += "\ufffd" * 3
+
+ i += 6
+ i += 3 # emulate JS for loop statement3
+ continue
+
+ if (b1 & 0xF8) == 0xF0 and (i + 9 < l):
+ # 111110xx 10xxxxxx 10xxxxxx 10xxxxxx
+ b2 = int(seq[i + 4 : i + 6], 16)
+ b3 = int(seq[i + 7 : i + 9], 16)
+ b4 = int(seq[i + 10 : i + 12], 16)
+
+ if (b2 & 0xC0) == 0x80 and (b3 & 0xC0) == 0x80 and (b4 & 0xC0) == 0x80:
+ all_bytes = bytes((b1, b2, b3, b4))
+ try:
+ result += all_bytes.decode()
+ except UnicodeDecodeError:
+ result += "\ufffd" * 4
+
+ i += 9
+ i += 3 # emulate JS for loop statement3
+ continue
+
+ result += "\ufffd"
+ i += 3 # emulate JS for loop statement3
+
+ return result