diff options
author | cyfraeviolae <cyfraeviolae> | 2024-04-03 03:10:44 -0400 |
---|---|---|
committer | cyfraeviolae <cyfraeviolae> | 2024-04-03 03:10:44 -0400 |
commit | 6d7ba58f880be618ade07f8ea080fe8c4bf8a896 (patch) | |
tree | b1c931051ffcebd2bd9d61d98d6233ffa289bbce /venv/lib/python3.11/site-packages/pip/_internal/utils/encoding.py | |
parent | 4f884c9abc32990b4061a1bb6997b4b37e58ea0b (diff) |
venv
Diffstat (limited to 'venv/lib/python3.11/site-packages/pip/_internal/utils/encoding.py')
-rw-r--r-- | venv/lib/python3.11/site-packages/pip/_internal/utils/encoding.py | 36 |
1 files changed, 36 insertions, 0 deletions
diff --git a/venv/lib/python3.11/site-packages/pip/_internal/utils/encoding.py b/venv/lib/python3.11/site-packages/pip/_internal/utils/encoding.py new file mode 100644 index 0000000..008f06a --- /dev/null +++ b/venv/lib/python3.11/site-packages/pip/_internal/utils/encoding.py @@ -0,0 +1,36 @@ +import codecs +import locale +import re +import sys +from typing import List, Tuple + +BOMS: List[Tuple[bytes, str]] = [ + (codecs.BOM_UTF8, "utf-8"), + (codecs.BOM_UTF16, "utf-16"), + (codecs.BOM_UTF16_BE, "utf-16-be"), + (codecs.BOM_UTF16_LE, "utf-16-le"), + (codecs.BOM_UTF32, "utf-32"), + (codecs.BOM_UTF32_BE, "utf-32-be"), + (codecs.BOM_UTF32_LE, "utf-32-le"), +] + +ENCODING_RE = re.compile(rb"coding[:=]\s*([-\w.]+)") + + +def auto_decode(data: bytes) -> str: + """Check a bytes string for a BOM to correctly detect the encoding + + Fallback to locale.getpreferredencoding(False) like open() on Python3""" + for bom, encoding in BOMS: + if data.startswith(bom): + return data[len(bom) :].decode(encoding) + # Lets check the first two lines as in PEP263 + for line in data.split(b"\n")[:2]: + if line[0:1] == b"#" and ENCODING_RE.search(line): + result = ENCODING_RE.search(line) + assert result is not None + encoding = result.groups()[0].decode("ascii") + return data.decode(encoding) + return data.decode( + locale.getpreferredencoding(False) or sys.getdefaultencoding(), + ) |