summaryrefslogtreecommitdiff
path: root/venv/lib/python3.11/site-packages/mdurl/_encode.py
blob: bc2e5b917afe9e9ecaa6f11af7a9ac82704d3914 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
from __future__ import annotations

from collections.abc import Sequence
from string import ascii_letters, digits, hexdigits
from urllib.parse import quote as encode_uri_component

ASCII_LETTERS_AND_DIGITS = ascii_letters + digits

ENCODE_DEFAULT_CHARS = ";/?:@&=+$,-_.!~*'()#"
ENCODE_COMPONENT_CHARS = "-_.!~*'()"

encode_cache: dict[str, list[str]] = {}


# Create a lookup array where anything but characters in `chars` string
# and alphanumeric chars is percent-encoded.
def get_encode_cache(exclude: str) -> Sequence[str]:
    if exclude in encode_cache:
        return encode_cache[exclude]

    cache: list[str] = []
    encode_cache[exclude] = cache

    for i in range(128):
        ch = chr(i)

        if ch in ASCII_LETTERS_AND_DIGITS:
            # always allow unencoded alphanumeric characters
            cache.append(ch)
        else:
            cache.append("%" + ("0" + hex(i)[2:].upper())[-2:])

    for i in range(len(exclude)):
        cache[ord(exclude[i])] = exclude[i]

    return cache


# Encode unsafe characters with percent-encoding, skipping already
# encoded sequences.
#
#  - string       - string to encode
#  - exclude      - list of characters to ignore (in addition to a-zA-Z0-9)
#  - keepEscaped  - don't encode '%' in a correct escape sequence (default: true)
def encode(
    string: str, exclude: str = ENCODE_DEFAULT_CHARS, *, keep_escaped: bool = True
) -> str:
    result = ""

    cache = get_encode_cache(exclude)

    l = len(string)  # noqa: E741
    i = 0
    while i < l:
        code = ord(string[i])

        #                              %
        if keep_escaped and code == 0x25 and i + 2 < l:
            if all(c in hexdigits for c in string[i + 1 : i + 3]):
                result += string[i : i + 3]
                i += 2
                i += 1  # JS for loop statement3
                continue

        if code < 128:
            result += cache[code]
            i += 1  # JS for loop statement3
            continue

        if code >= 0xD800 and code <= 0xDFFF:
            if code >= 0xD800 and code <= 0xDBFF and i + 1 < l:
                next_code = ord(string[i + 1])
                if next_code >= 0xDC00 and next_code <= 0xDFFF:
                    result += encode_uri_component(string[i] + string[i + 1])
                    i += 1
                    i += 1  # JS for loop statement3
                    continue
            result += "%EF%BF%BD"
            i += 1  # JS for loop statement3
            continue

        result += encode_uri_component(string[i])
        i += 1  # JS for loop statement3

    return result