summaryrefslogtreecommitdiff
path: root/venv/lib/python3.11/site-packages/mdurl/_encode.py
diff options
context:
space:
mode:
authorcyfraeviolae <cyfraeviolae>2024-04-03 03:10:44 -0400
committercyfraeviolae <cyfraeviolae>2024-04-03 03:10:44 -0400
commit6d7ba58f880be618ade07f8ea080fe8c4bf8a896 (patch)
treeb1c931051ffcebd2bd9d61d98d6233ffa289bbce /venv/lib/python3.11/site-packages/mdurl/_encode.py
parent4f884c9abc32990b4061a1bb6997b4b37e58ea0b (diff)
venv
Diffstat (limited to 'venv/lib/python3.11/site-packages/mdurl/_encode.py')
-rw-r--r--venv/lib/python3.11/site-packages/mdurl/_encode.py85
1 files changed, 85 insertions, 0 deletions
diff --git a/venv/lib/python3.11/site-packages/mdurl/_encode.py b/venv/lib/python3.11/site-packages/mdurl/_encode.py
new file mode 100644
index 0000000..bc2e5b9
--- /dev/null
+++ b/venv/lib/python3.11/site-packages/mdurl/_encode.py
@@ -0,0 +1,85 @@
+from __future__ import annotations
+
+from collections.abc import Sequence
+from string import ascii_letters, digits, hexdigits
+from urllib.parse import quote as encode_uri_component
+
+ASCII_LETTERS_AND_DIGITS = ascii_letters + digits
+
+ENCODE_DEFAULT_CHARS = ";/?:@&=+$,-_.!~*'()#"
+ENCODE_COMPONENT_CHARS = "-_.!~*'()"
+
+encode_cache: dict[str, list[str]] = {}
+
+
+# Create a lookup array where anything but characters in `chars` string
+# and alphanumeric chars is percent-encoded.
+def get_encode_cache(exclude: str) -> Sequence[str]:
+ if exclude in encode_cache:
+ return encode_cache[exclude]
+
+ cache: list[str] = []
+ encode_cache[exclude] = cache
+
+ for i in range(128):
+ ch = chr(i)
+
+ if ch in ASCII_LETTERS_AND_DIGITS:
+ # always allow unencoded alphanumeric characters
+ cache.append(ch)
+ else:
+ cache.append("%" + ("0" + hex(i)[2:].upper())[-2:])
+
+ for i in range(len(exclude)):
+ cache[ord(exclude[i])] = exclude[i]
+
+ return cache
+
+
+# Encode unsafe characters with percent-encoding, skipping already
+# encoded sequences.
+#
+# - string - string to encode
+# - exclude - list of characters to ignore (in addition to a-zA-Z0-9)
+# - keepEscaped - don't encode '%' in a correct escape sequence (default: true)
+def encode(
+ string: str, exclude: str = ENCODE_DEFAULT_CHARS, *, keep_escaped: bool = True
+) -> str:
+ result = ""
+
+ cache = get_encode_cache(exclude)
+
+ l = len(string) # noqa: E741
+ i = 0
+ while i < l:
+ code = ord(string[i])
+
+ # %
+ if keep_escaped and code == 0x25 and i + 2 < l:
+ if all(c in hexdigits for c in string[i + 1 : i + 3]):
+ result += string[i : i + 3]
+ i += 2
+ i += 1 # JS for loop statement3
+ continue
+
+ if code < 128:
+ result += cache[code]
+ i += 1 # JS for loop statement3
+ continue
+
+ if code >= 0xD800 and code <= 0xDFFF:
+ if code >= 0xD800 and code <= 0xDBFF and i + 1 < l:
+ next_code = ord(string[i + 1])
+ if next_code >= 0xDC00 and next_code <= 0xDFFF:
+ result += encode_uri_component(string[i] + string[i + 1])
+ i += 1
+ i += 1 # JS for loop statement3
+ continue
+ result += "%EF%BF%BD"
+ i += 1 # JS for loop statement3
+ continue
+
+ result += encode_uri_component(string[i])
+ i += 1 # JS for loop statement3
+
+ return result