diff options
Diffstat (limited to 'venv/lib/python3.11/site-packages/pip/_vendor/webencodings')
10 files changed, 1110 insertions, 0 deletions
diff --git a/venv/lib/python3.11/site-packages/pip/_vendor/webencodings/__init__.py b/venv/lib/python3.11/site-packages/pip/_vendor/webencodings/__init__.py new file mode 100644 index 0000000..d21d697 --- /dev/null +++ b/venv/lib/python3.11/site-packages/pip/_vendor/webencodings/__init__.py @@ -0,0 +1,342 @@ +# coding: utf-8 +""" + +    webencodings +    ~~~~~~~~~~~~ + +    This is a Python implementation of the `WHATWG Encoding standard +    <http://encoding.spec.whatwg.org/>`. See README for details. + +    :copyright: Copyright 2012 by Simon Sapin +    :license: BSD, see LICENSE for details. + +""" + +from __future__ import unicode_literals + +import codecs + +from .labels import LABELS + + +VERSION = '0.5.1' + + +# Some names in Encoding are not valid Python aliases. Remap these. +PYTHON_NAMES = { +    'iso-8859-8-i': 'iso-8859-8', +    'x-mac-cyrillic': 'mac-cyrillic', +    'macintosh': 'mac-roman', +    'windows-874': 'cp874'} + +CACHE = {} + + +def ascii_lower(string): +    r"""Transform (only) ASCII letters to lower case: A-Z is mapped to a-z. + +    :param string: An Unicode string. +    :returns: A new Unicode string. + +    This is used for `ASCII case-insensitive +    <http://encoding.spec.whatwg.org/#ascii-case-insensitive>`_ +    matching of encoding labels. +    The same matching is also used, among other things, +    for `CSS keywords <http://dev.w3.org/csswg/css-values/#keywords>`_. + +    This is different from the :meth:`~py:str.lower` method of Unicode strings +    which also affect non-ASCII characters, +    sometimes mapping them into the ASCII range: + +        >>> keyword = u'Bac\N{KELVIN SIGN}ground' +        >>> assert keyword.lower() == u'background' +        >>> assert ascii_lower(keyword) != keyword.lower() +        >>> assert ascii_lower(keyword) == u'bac\N{KELVIN SIGN}ground' + +    """ +    # This turns out to be faster than unicode.translate() +    return string.encode('utf8').lower().decode('utf8') + + +def lookup(label): +    """ +    Look for an encoding by its label. +    This is the spec’s `get an encoding +    <http://encoding.spec.whatwg.org/#concept-encoding-get>`_ algorithm. +    Supported labels are listed there. + +    :param label: A string. +    :returns: +        An :class:`Encoding` object, or :obj:`None` for an unknown label. + +    """ +    # Only strip ASCII whitespace: U+0009, U+000A, U+000C, U+000D, and U+0020. +    label = ascii_lower(label.strip('\t\n\f\r ')) +    name = LABELS.get(label) +    if name is None: +        return None +    encoding = CACHE.get(name) +    if encoding is None: +        if name == 'x-user-defined': +            from .x_user_defined import codec_info +        else: +            python_name = PYTHON_NAMES.get(name, name) +            # Any python_name value that gets to here should be valid. +            codec_info = codecs.lookup(python_name) +        encoding = Encoding(name, codec_info) +        CACHE[name] = encoding +    return encoding + + +def _get_encoding(encoding_or_label): +    """ +    Accept either an encoding object or label. + +    :param encoding: An :class:`Encoding` object or a label string. +    :returns: An :class:`Encoding` object. +    :raises: :exc:`~exceptions.LookupError` for an unknown label. + +    """ +    if hasattr(encoding_or_label, 'codec_info'): +        return encoding_or_label + +    encoding = lookup(encoding_or_label) +    if encoding is None: +        raise LookupError('Unknown encoding label: %r' % encoding_or_label) +    return encoding + + +class Encoding(object): +    """Reresents a character encoding such as UTF-8, +    that can be used for decoding or encoding. + +    .. attribute:: name + +        Canonical name of the encoding + +    .. attribute:: codec_info + +        The actual implementation of the encoding, +        a stdlib :class:`~codecs.CodecInfo` object. +        See :func:`codecs.register`. + +    """ +    def __init__(self, name, codec_info): +        self.name = name +        self.codec_info = codec_info + +    def __repr__(self): +        return '<Encoding %s>' % self.name + + +#: The UTF-8 encoding. Should be used for new content and formats. +UTF8 = lookup('utf-8') + +_UTF16LE = lookup('utf-16le') +_UTF16BE = lookup('utf-16be') + + +def decode(input, fallback_encoding, errors='replace'): +    """ +    Decode a single string. + +    :param input: A byte string +    :param fallback_encoding: +        An :class:`Encoding` object or a label string. +        The encoding to use if :obj:`input` does note have a BOM. +    :param errors: Type of error handling. See :func:`codecs.register`. +    :raises: :exc:`~exceptions.LookupError` for an unknown encoding label. +    :return: +        A ``(output, encoding)`` tuple of an Unicode string +        and an :obj:`Encoding`. + +    """ +    # Fail early if `encoding` is an invalid label. +    fallback_encoding = _get_encoding(fallback_encoding) +    bom_encoding, input = _detect_bom(input) +    encoding = bom_encoding or fallback_encoding +    return encoding.codec_info.decode(input, errors)[0], encoding + + +def _detect_bom(input): +    """Return (bom_encoding, input), with any BOM removed from the input.""" +    if input.startswith(b'\xFF\xFE'): +        return _UTF16LE, input[2:] +    if input.startswith(b'\xFE\xFF'): +        return _UTF16BE, input[2:] +    if input.startswith(b'\xEF\xBB\xBF'): +        return UTF8, input[3:] +    return None, input + + +def encode(input, encoding=UTF8, errors='strict'): +    """ +    Encode a single string. + +    :param input: An Unicode string. +    :param encoding: An :class:`Encoding` object or a label string. +    :param errors: Type of error handling. See :func:`codecs.register`. +    :raises: :exc:`~exceptions.LookupError` for an unknown encoding label. +    :return: A byte string. + +    """ +    return _get_encoding(encoding).codec_info.encode(input, errors)[0] + + +def iter_decode(input, fallback_encoding, errors='replace'): +    """ +    "Pull"-based decoder. + +    :param input: +        An iterable of byte strings. + +        The input is first consumed just enough to determine the encoding +        based on the precense of a BOM, +        then consumed on demand when the return value is. +    :param fallback_encoding: +        An :class:`Encoding` object or a label string. +        The encoding to use if :obj:`input` does note have a BOM. +    :param errors: Type of error handling. See :func:`codecs.register`. +    :raises: :exc:`~exceptions.LookupError` for an unknown encoding label. +    :returns: +        An ``(output, encoding)`` tuple. +        :obj:`output` is an iterable of Unicode strings, +        :obj:`encoding` is the :obj:`Encoding` that is being used. + +    """ + +    decoder = IncrementalDecoder(fallback_encoding, errors) +    generator = _iter_decode_generator(input, decoder) +    encoding = next(generator) +    return generator, encoding + + +def _iter_decode_generator(input, decoder): +    """Return a generator that first yields the :obj:`Encoding`, +    then yields output chukns as Unicode strings. + +    """ +    decode = decoder.decode +    input = iter(input) +    for chunck in input: +        output = decode(chunck) +        if output: +            assert decoder.encoding is not None +            yield decoder.encoding +            yield output +            break +    else: +        # Input exhausted without determining the encoding +        output = decode(b'', final=True) +        assert decoder.encoding is not None +        yield decoder.encoding +        if output: +            yield output +        return + +    for chunck in input: +        output = decode(chunck) +        if output: +            yield output +    output = decode(b'', final=True) +    if output: +        yield output + + +def iter_encode(input, encoding=UTF8, errors='strict'): +    """ +    “Pull”-based encoder. + +    :param input: An iterable of Unicode strings. +    :param encoding: An :class:`Encoding` object or a label string. +    :param errors: Type of error handling. See :func:`codecs.register`. +    :raises: :exc:`~exceptions.LookupError` for an unknown encoding label. +    :returns: An iterable of byte strings. + +    """ +    # Fail early if `encoding` is an invalid label. +    encode = IncrementalEncoder(encoding, errors).encode +    return _iter_encode_generator(input, encode) + + +def _iter_encode_generator(input, encode): +    for chunck in input: +        output = encode(chunck) +        if output: +            yield output +    output = encode('', final=True) +    if output: +        yield output + + +class IncrementalDecoder(object): +    """ +    “Push”-based decoder. + +    :param fallback_encoding: +        An :class:`Encoding` object or a label string. +        The encoding to use if :obj:`input` does note have a BOM. +    :param errors: Type of error handling. See :func:`codecs.register`. +    :raises: :exc:`~exceptions.LookupError` for an unknown encoding label. + +    """ +    def __init__(self, fallback_encoding, errors='replace'): +        # Fail early if `encoding` is an invalid label. +        self._fallback_encoding = _get_encoding(fallback_encoding) +        self._errors = errors +        self._buffer = b'' +        self._decoder = None +        #: The actual :class:`Encoding` that is being used, +        #: or :obj:`None` if that is not determined yet. +        #: (Ie. if there is not enough input yet to determine +        #: if there is a BOM.) +        self.encoding = None  # Not known yet. + +    def decode(self, input, final=False): +        """Decode one chunk of the input. + +        :param input: A byte string. +        :param final: +            Indicate that no more input is available. +            Must be :obj:`True` if this is the last call. +        :returns: An Unicode string. + +        """ +        decoder = self._decoder +        if decoder is not None: +            return decoder(input, final) + +        input = self._buffer + input +        encoding, input = _detect_bom(input) +        if encoding is None: +            if len(input) < 3 and not final:  # Not enough data yet. +                self._buffer = input +                return '' +            else:  # No BOM +                encoding = self._fallback_encoding +        decoder = encoding.codec_info.incrementaldecoder(self._errors).decode +        self._decoder = decoder +        self.encoding = encoding +        return decoder(input, final) + + +class IncrementalEncoder(object): +    """ +    “Push”-based encoder. + +    :param encoding: An :class:`Encoding` object or a label string. +    :param errors: Type of error handling. See :func:`codecs.register`. +    :raises: :exc:`~exceptions.LookupError` for an unknown encoding label. + +    .. method:: encode(input, final=False) + +        :param input: An Unicode string. +        :param final: +            Indicate that no more input is available. +            Must be :obj:`True` if this is the last call. +        :returns: A byte string. + +    """ +    def __init__(self, encoding=UTF8, errors='strict'): +        encoding = _get_encoding(encoding) +        self.encode = encoding.codec_info.incrementalencoder(errors).encode diff --git a/venv/lib/python3.11/site-packages/pip/_vendor/webencodings/__pycache__/__init__.cpython-311.pyc b/venv/lib/python3.11/site-packages/pip/_vendor/webencodings/__pycache__/__init__.cpython-311.pyc Binary files differnew file mode 100644 index 0000000..7c00abe --- /dev/null +++ b/venv/lib/python3.11/site-packages/pip/_vendor/webencodings/__pycache__/__init__.cpython-311.pyc diff --git a/venv/lib/python3.11/site-packages/pip/_vendor/webencodings/__pycache__/labels.cpython-311.pyc b/venv/lib/python3.11/site-packages/pip/_vendor/webencodings/__pycache__/labels.cpython-311.pyc Binary files differnew file mode 100644 index 0000000..9e8b1b5 --- /dev/null +++ b/venv/lib/python3.11/site-packages/pip/_vendor/webencodings/__pycache__/labels.cpython-311.pyc diff --git a/venv/lib/python3.11/site-packages/pip/_vendor/webencodings/__pycache__/mklabels.cpython-311.pyc b/venv/lib/python3.11/site-packages/pip/_vendor/webencodings/__pycache__/mklabels.cpython-311.pyc Binary files differnew file mode 100644 index 0000000..4426839 --- /dev/null +++ b/venv/lib/python3.11/site-packages/pip/_vendor/webencodings/__pycache__/mklabels.cpython-311.pyc diff --git a/venv/lib/python3.11/site-packages/pip/_vendor/webencodings/__pycache__/tests.cpython-311.pyc b/venv/lib/python3.11/site-packages/pip/_vendor/webencodings/__pycache__/tests.cpython-311.pyc Binary files differnew file mode 100644 index 0000000..db59a75 --- /dev/null +++ b/venv/lib/python3.11/site-packages/pip/_vendor/webencodings/__pycache__/tests.cpython-311.pyc diff --git a/venv/lib/python3.11/site-packages/pip/_vendor/webencodings/__pycache__/x_user_defined.cpython-311.pyc b/venv/lib/python3.11/site-packages/pip/_vendor/webencodings/__pycache__/x_user_defined.cpython-311.pyc Binary files differnew file mode 100644 index 0000000..c509c80 --- /dev/null +++ b/venv/lib/python3.11/site-packages/pip/_vendor/webencodings/__pycache__/x_user_defined.cpython-311.pyc diff --git a/venv/lib/python3.11/site-packages/pip/_vendor/webencodings/labels.py b/venv/lib/python3.11/site-packages/pip/_vendor/webencodings/labels.py new file mode 100644 index 0000000..29cbf91 --- /dev/null +++ b/venv/lib/python3.11/site-packages/pip/_vendor/webencodings/labels.py @@ -0,0 +1,231 @@ +""" + +    webencodings.labels +    ~~~~~~~~~~~~~~~~~~~ + +    Map encoding labels to their name. + +    :copyright: Copyright 2012 by Simon Sapin +    :license: BSD, see LICENSE for details. + +""" + +# XXX Do not edit! +# This file is automatically generated by mklabels.py + +LABELS = { +    'unicode-1-1-utf-8':   'utf-8', +    'utf-8':               'utf-8', +    'utf8':                'utf-8', +    '866':                 'ibm866', +    'cp866':               'ibm866', +    'csibm866':            'ibm866', +    'ibm866':              'ibm866', +    'csisolatin2':         'iso-8859-2', +    'iso-8859-2':          'iso-8859-2', +    'iso-ir-101':          'iso-8859-2', +    'iso8859-2':           'iso-8859-2', +    'iso88592':            'iso-8859-2', +    'iso_8859-2':          'iso-8859-2', +    'iso_8859-2:1987':     'iso-8859-2', +    'l2':                  'iso-8859-2', +    'latin2':              'iso-8859-2', +    'csisolatin3':         'iso-8859-3', +    'iso-8859-3':          'iso-8859-3', +    'iso-ir-109':          'iso-8859-3', +    'iso8859-3':           'iso-8859-3', +    'iso88593':            'iso-8859-3', +    'iso_8859-3':          'iso-8859-3', +    'iso_8859-3:1988':     'iso-8859-3', +    'l3':                  'iso-8859-3', +    'latin3':              'iso-8859-3', +    'csisolatin4':         'iso-8859-4', +    'iso-8859-4':          'iso-8859-4', +    'iso-ir-110':          'iso-8859-4', +    'iso8859-4':           'iso-8859-4', +    'iso88594':            'iso-8859-4', +    'iso_8859-4':          'iso-8859-4', +    'iso_8859-4:1988':     'iso-8859-4', +    'l4':                  'iso-8859-4', +    'latin4':              'iso-8859-4', +    'csisolatincyrillic':  'iso-8859-5', +    'cyrillic':            'iso-8859-5', +    'iso-8859-5':          'iso-8859-5', +    'iso-ir-144':          'iso-8859-5', +    'iso8859-5':           'iso-8859-5', +    'iso88595':            'iso-8859-5', +    'iso_8859-5':          'iso-8859-5', +    'iso_8859-5:1988':     'iso-8859-5', +    'arabic':              'iso-8859-6', +    'asmo-708':            'iso-8859-6', +    'csiso88596e':         'iso-8859-6', +    'csiso88596i':         'iso-8859-6', +    'csisolatinarabic':    'iso-8859-6', +    'ecma-114':            'iso-8859-6', +    'iso-8859-6':          'iso-8859-6', +    'iso-8859-6-e':        'iso-8859-6', +    'iso-8859-6-i':        'iso-8859-6', +    'iso-ir-127':          'iso-8859-6', +    'iso8859-6':           'iso-8859-6', +    'iso88596':            'iso-8859-6', +    'iso_8859-6':          'iso-8859-6', +    'iso_8859-6:1987':     'iso-8859-6', +    'csisolatingreek':     'iso-8859-7', +    'ecma-118':            'iso-8859-7', +    'elot_928':            'iso-8859-7', +    'greek':               'iso-8859-7', +    'greek8':              'iso-8859-7', +    'iso-8859-7':          'iso-8859-7', +    'iso-ir-126':          'iso-8859-7', +    'iso8859-7':           'iso-8859-7', +    'iso88597':            'iso-8859-7', +    'iso_8859-7':          'iso-8859-7', +    'iso_8859-7:1987':     'iso-8859-7', +    'sun_eu_greek':        'iso-8859-7', +    'csiso88598e':         'iso-8859-8', +    'csisolatinhebrew':    'iso-8859-8', +    'hebrew':              'iso-8859-8', +    'iso-8859-8':          'iso-8859-8', +    'iso-8859-8-e':        'iso-8859-8', +    'iso-ir-138':          'iso-8859-8', +    'iso8859-8':           'iso-8859-8', +    'iso88598':            'iso-8859-8', +    'iso_8859-8':          'iso-8859-8', +    'iso_8859-8:1988':     'iso-8859-8', +    'visual':              'iso-8859-8', +    'csiso88598i':         'iso-8859-8-i', +    'iso-8859-8-i':        'iso-8859-8-i', +    'logical':             'iso-8859-8-i', +    'csisolatin6':         'iso-8859-10', +    'iso-8859-10':         'iso-8859-10', +    'iso-ir-157':          'iso-8859-10', +    'iso8859-10':          'iso-8859-10', +    'iso885910':           'iso-8859-10', +    'l6':                  'iso-8859-10', +    'latin6':              'iso-8859-10', +    'iso-8859-13':         'iso-8859-13', +    'iso8859-13':          'iso-8859-13', +    'iso885913':           'iso-8859-13', +    'iso-8859-14':         'iso-8859-14', +    'iso8859-14':          'iso-8859-14', +    'iso885914':           'iso-8859-14', +    'csisolatin9':         'iso-8859-15', +    'iso-8859-15':         'iso-8859-15', +    'iso8859-15':          'iso-8859-15', +    'iso885915':           'iso-8859-15', +    'iso_8859-15':         'iso-8859-15', +    'l9':                  'iso-8859-15', +    'iso-8859-16':         'iso-8859-16', +    'cskoi8r':             'koi8-r', +    'koi':                 'koi8-r', +    'koi8':                'koi8-r', +    'koi8-r':              'koi8-r', +    'koi8_r':              'koi8-r', +    'koi8-u':              'koi8-u', +    'csmacintosh':         'macintosh', +    'mac':                 'macintosh', +    'macintosh':           'macintosh', +    'x-mac-roman':         'macintosh', +    'dos-874':             'windows-874', +    'iso-8859-11':         'windows-874', +    'iso8859-11':          'windows-874', +    'iso885911':           'windows-874', +    'tis-620':             'windows-874', +    'windows-874':         'windows-874', +    'cp1250':              'windows-1250', +    'windows-1250':        'windows-1250', +    'x-cp1250':            'windows-1250', +    'cp1251':              'windows-1251', +    'windows-1251':        'windows-1251', +    'x-cp1251':            'windows-1251', +    'ansi_x3.4-1968':      'windows-1252', +    'ascii':               'windows-1252', +    'cp1252':              'windows-1252', +    'cp819':               'windows-1252', +    'csisolatin1':         'windows-1252', +    'ibm819':              'windows-1252', +    'iso-8859-1':          'windows-1252', +    'iso-ir-100':          'windows-1252', +    'iso8859-1':           'windows-1252', +    'iso88591':            'windows-1252', +    'iso_8859-1':          'windows-1252', +    'iso_8859-1:1987':     'windows-1252', +    'l1':                  'windows-1252', +    'latin1':              'windows-1252', +    'us-ascii':            'windows-1252', +    'windows-1252':        'windows-1252', +    'x-cp1252':            'windows-1252', +    'cp1253':              'windows-1253', +    'windows-1253':        'windows-1253', +    'x-cp1253':            'windows-1253', +    'cp1254':              'windows-1254', +    'csisolatin5':         'windows-1254', +    'iso-8859-9':          'windows-1254', +    'iso-ir-148':          'windows-1254', +    'iso8859-9':           'windows-1254', +    'iso88599':            'windows-1254', +    'iso_8859-9':          'windows-1254', +    'iso_8859-9:1989':     'windows-1254', +    'l5':                  'windows-1254', +    'latin5':              'windows-1254', +    'windows-1254':        'windows-1254', +    'x-cp1254':            'windows-1254', +    'cp1255':              'windows-1255', +    'windows-1255':        'windows-1255', +    'x-cp1255':            'windows-1255', +    'cp1256':              'windows-1256', +    'windows-1256':        'windows-1256', +    'x-cp1256':            'windows-1256', +    'cp1257':              'windows-1257', +    'windows-1257':        'windows-1257', +    'x-cp1257':            'windows-1257', +    'cp1258':              'windows-1258', +    'windows-1258':        'windows-1258', +    'x-cp1258':            'windows-1258', +    'x-mac-cyrillic':      'x-mac-cyrillic', +    'x-mac-ukrainian':     'x-mac-cyrillic', +    'chinese':             'gbk', +    'csgb2312':            'gbk', +    'csiso58gb231280':     'gbk', +    'gb2312':              'gbk', +    'gb_2312':             'gbk', +    'gb_2312-80':          'gbk', +    'gbk':                 'gbk', +    'iso-ir-58':           'gbk', +    'x-gbk':               'gbk', +    'gb18030':             'gb18030', +    'hz-gb-2312':          'hz-gb-2312', +    'big5':                'big5', +    'big5-hkscs':          'big5', +    'cn-big5':             'big5', +    'csbig5':              'big5', +    'x-x-big5':            'big5', +    'cseucpkdfmtjapanese': 'euc-jp', +    'euc-jp':              'euc-jp', +    'x-euc-jp':            'euc-jp', +    'csiso2022jp':         'iso-2022-jp', +    'iso-2022-jp':         'iso-2022-jp', +    'csshiftjis':          'shift_jis', +    'ms_kanji':            'shift_jis', +    'shift-jis':           'shift_jis', +    'shift_jis':           'shift_jis', +    'sjis':                'shift_jis', +    'windows-31j':         'shift_jis', +    'x-sjis':              'shift_jis', +    'cseuckr':             'euc-kr', +    'csksc56011987':       'euc-kr', +    'euc-kr':              'euc-kr', +    'iso-ir-149':          'euc-kr', +    'korean':              'euc-kr', +    'ks_c_5601-1987':      'euc-kr', +    'ks_c_5601-1989':      'euc-kr', +    'ksc5601':             'euc-kr', +    'ksc_5601':            'euc-kr', +    'windows-949':         'euc-kr', +    'csiso2022kr':         'iso-2022-kr', +    'iso-2022-kr':         'iso-2022-kr', +    'utf-16be':            'utf-16be', +    'utf-16':              'utf-16le', +    'utf-16le':            'utf-16le', +    'x-user-defined':      'x-user-defined', +} diff --git a/venv/lib/python3.11/site-packages/pip/_vendor/webencodings/mklabels.py b/venv/lib/python3.11/site-packages/pip/_vendor/webencodings/mklabels.py new file mode 100644 index 0000000..295dc92 --- /dev/null +++ b/venv/lib/python3.11/site-packages/pip/_vendor/webencodings/mklabels.py @@ -0,0 +1,59 @@ +""" + +    webencodings.mklabels +    ~~~~~~~~~~~~~~~~~~~~~ + +    Regenarate the webencodings.labels module. + +    :copyright: Copyright 2012 by Simon Sapin +    :license: BSD, see LICENSE for details. + +""" + +import json +try: +    from urllib import urlopen +except ImportError: +    from urllib.request import urlopen + + +def assert_lower(string): +    assert string == string.lower() +    return string + + +def generate(url): +    parts = ['''\ +""" + +    webencodings.labels +    ~~~~~~~~~~~~~~~~~~~ + +    Map encoding labels to their name. + +    :copyright: Copyright 2012 by Simon Sapin +    :license: BSD, see LICENSE for details. + +""" + +# XXX Do not edit! +# This file is automatically generated by mklabels.py + +LABELS = { +'''] +    labels = [ +        (repr(assert_lower(label)).lstrip('u'), +         repr(encoding['name']).lstrip('u')) +        for category in json.loads(urlopen(url).read().decode('ascii')) +        for encoding in category['encodings'] +        for label in encoding['labels']] +    max_len = max(len(label) for label, name in labels) +    parts.extend( +        '    %s:%s %s,\n' % (label, ' ' * (max_len - len(label)), name) +        for label, name in labels) +    parts.append('}') +    return ''.join(parts) + + +if __name__ == '__main__': +    print(generate('http://encoding.spec.whatwg.org/encodings.json')) diff --git a/venv/lib/python3.11/site-packages/pip/_vendor/webencodings/tests.py b/venv/lib/python3.11/site-packages/pip/_vendor/webencodings/tests.py new file mode 100644 index 0000000..e12c10d --- /dev/null +++ b/venv/lib/python3.11/site-packages/pip/_vendor/webencodings/tests.py @@ -0,0 +1,153 @@ +# coding: utf-8 +""" + +    webencodings.tests +    ~~~~~~~~~~~~~~~~~~ + +    A basic test suite for Encoding. + +    :copyright: Copyright 2012 by Simon Sapin +    :license: BSD, see LICENSE for details. + +""" + +from __future__ import unicode_literals + +from . import (lookup, LABELS, decode, encode, iter_decode, iter_encode, +               IncrementalDecoder, IncrementalEncoder, UTF8) + + +def assert_raises(exception, function, *args, **kwargs): +    try: +        function(*args, **kwargs) +    except exception: +        return +    else:  # pragma: no cover +        raise AssertionError('Did not raise %s.' % exception) + + +def test_labels(): +    assert lookup('utf-8').name == 'utf-8' +    assert lookup('Utf-8').name == 'utf-8' +    assert lookup('UTF-8').name == 'utf-8' +    assert lookup('utf8').name == 'utf-8' +    assert lookup('utf8').name == 'utf-8' +    assert lookup('utf8 ').name == 'utf-8' +    assert lookup(' \r\nutf8\t').name == 'utf-8' +    assert lookup('u8') is None  # Python label. +    assert lookup('utf-8 ') is None  # Non-ASCII white space. + +    assert lookup('US-ASCII').name == 'windows-1252' +    assert lookup('iso-8859-1').name == 'windows-1252' +    assert lookup('latin1').name == 'windows-1252' +    assert lookup('LATIN1').name == 'windows-1252' +    assert lookup('latin-1') is None +    assert lookup('LATİN1') is None  # ASCII-only case insensitivity. + + +def test_all_labels(): +    for label in LABELS: +        assert decode(b'', label) == ('', lookup(label)) +        assert encode('', label) == b'' +        for repeat in [0, 1, 12]: +            output, _ = iter_decode([b''] * repeat, label) +            assert list(output) == [] +            assert list(iter_encode([''] * repeat, label)) == [] +        decoder = IncrementalDecoder(label) +        assert decoder.decode(b'') == '' +        assert decoder.decode(b'', final=True) == '' +        encoder = IncrementalEncoder(label) +        assert encoder.encode('') == b'' +        assert encoder.encode('', final=True) == b'' +    # All encoding names are valid labels too: +    for name in set(LABELS.values()): +        assert lookup(name).name == name + + +def test_invalid_label(): +    assert_raises(LookupError, decode, b'\xEF\xBB\xBF\xc3\xa9', 'invalid') +    assert_raises(LookupError, encode, 'é', 'invalid') +    assert_raises(LookupError, iter_decode, [], 'invalid') +    assert_raises(LookupError, iter_encode, [], 'invalid') +    assert_raises(LookupError, IncrementalDecoder, 'invalid') +    assert_raises(LookupError, IncrementalEncoder, 'invalid') + + +def test_decode(): +    assert decode(b'\x80', 'latin1') == ('€', lookup('latin1')) +    assert decode(b'\x80', lookup('latin1')) == ('€', lookup('latin1')) +    assert decode(b'\xc3\xa9', 'utf8') == ('é', lookup('utf8')) +    assert decode(b'\xc3\xa9', UTF8) == ('é', lookup('utf8')) +    assert decode(b'\xc3\xa9', 'ascii') == ('é', lookup('ascii')) +    assert decode(b'\xEF\xBB\xBF\xc3\xa9', 'ascii') == ('é', lookup('utf8'))  # UTF-8 with BOM + +    assert decode(b'\xFE\xFF\x00\xe9', 'ascii') == ('é', lookup('utf-16be'))  # UTF-16-BE with BOM +    assert decode(b'\xFF\xFE\xe9\x00', 'ascii') == ('é', lookup('utf-16le'))  # UTF-16-LE with BOM +    assert decode(b'\xFE\xFF\xe9\x00', 'ascii') == ('\ue900', lookup('utf-16be')) +    assert decode(b'\xFF\xFE\x00\xe9', 'ascii') == ('\ue900', lookup('utf-16le')) + +    assert decode(b'\x00\xe9', 'UTF-16BE') == ('é', lookup('utf-16be')) +    assert decode(b'\xe9\x00', 'UTF-16LE') == ('é', lookup('utf-16le')) +    assert decode(b'\xe9\x00', 'UTF-16') == ('é', lookup('utf-16le')) + +    assert decode(b'\xe9\x00', 'UTF-16BE') == ('\ue900', lookup('utf-16be')) +    assert decode(b'\x00\xe9', 'UTF-16LE') == ('\ue900', lookup('utf-16le')) +    assert decode(b'\x00\xe9', 'UTF-16') == ('\ue900', lookup('utf-16le')) + + +def test_encode(): +    assert encode('é', 'latin1') == b'\xe9' +    assert encode('é', 'utf8') == b'\xc3\xa9' +    assert encode('é', 'utf8') == b'\xc3\xa9' +    assert encode('é', 'utf-16') == b'\xe9\x00' +    assert encode('é', 'utf-16le') == b'\xe9\x00' +    assert encode('é', 'utf-16be') == b'\x00\xe9' + + +def test_iter_decode(): +    def iter_decode_to_string(input, fallback_encoding): +        output, _encoding = iter_decode(input, fallback_encoding) +        return ''.join(output) +    assert iter_decode_to_string([], 'latin1') == '' +    assert iter_decode_to_string([b''], 'latin1') == '' +    assert iter_decode_to_string([b'\xe9'], 'latin1') == 'é' +    assert iter_decode_to_string([b'hello'], 'latin1') == 'hello' +    assert iter_decode_to_string([b'he', b'llo'], 'latin1') == 'hello' +    assert iter_decode_to_string([b'hell', b'o'], 'latin1') == 'hello' +    assert iter_decode_to_string([b'\xc3\xa9'], 'latin1') == 'é' +    assert iter_decode_to_string([b'\xEF\xBB\xBF\xc3\xa9'], 'latin1') == 'é' +    assert iter_decode_to_string([ +        b'\xEF\xBB\xBF', b'\xc3', b'\xa9'], 'latin1') == 'é' +    assert iter_decode_to_string([ +        b'\xEF\xBB\xBF', b'a', b'\xc3'], 'latin1') == 'a\uFFFD' +    assert iter_decode_to_string([ +        b'', b'\xEF', b'', b'', b'\xBB\xBF\xc3', b'\xa9'], 'latin1') == 'é' +    assert iter_decode_to_string([b'\xEF\xBB\xBF'], 'latin1') == '' +    assert iter_decode_to_string([b'\xEF\xBB'], 'latin1') == 'ï»' +    assert iter_decode_to_string([b'\xFE\xFF\x00\xe9'], 'latin1') == 'é' +    assert iter_decode_to_string([b'\xFF\xFE\xe9\x00'], 'latin1') == 'é' +    assert iter_decode_to_string([ +        b'', b'\xFF', b'', b'', b'\xFE\xe9', b'\x00'], 'latin1') == 'é' +    assert iter_decode_to_string([ +        b'', b'h\xe9', b'llo'], 'x-user-defined') == 'h\uF7E9llo' + + +def test_iter_encode(): +    assert b''.join(iter_encode([], 'latin1')) == b'' +    assert b''.join(iter_encode([''], 'latin1')) == b'' +    assert b''.join(iter_encode(['é'], 'latin1')) == b'\xe9' +    assert b''.join(iter_encode(['', 'é', '', ''], 'latin1')) == b'\xe9' +    assert b''.join(iter_encode(['', 'é', '', ''], 'utf-16')) == b'\xe9\x00' +    assert b''.join(iter_encode(['', 'é', '', ''], 'utf-16le')) == b'\xe9\x00' +    assert b''.join(iter_encode(['', 'é', '', ''], 'utf-16be')) == b'\x00\xe9' +    assert b''.join(iter_encode([ +        '', 'h\uF7E9', '', 'llo'], 'x-user-defined')) == b'h\xe9llo' + + +def test_x_user_defined(): +    encoded = b'2,\x0c\x0b\x1aO\xd9#\xcb\x0f\xc9\xbbt\xcf\xa8\xca' +    decoded = '2,\x0c\x0b\x1aO\uf7d9#\uf7cb\x0f\uf7c9\uf7bbt\uf7cf\uf7a8\uf7ca' +    encoded = b'aa' +    decoded = 'aa' +    assert decode(encoded, 'x-user-defined') == (decoded, lookup('x-user-defined')) +    assert encode(decoded, 'x-user-defined') == encoded diff --git a/venv/lib/python3.11/site-packages/pip/_vendor/webencodings/x_user_defined.py b/venv/lib/python3.11/site-packages/pip/_vendor/webencodings/x_user_defined.py new file mode 100644 index 0000000..d16e326 --- /dev/null +++ b/venv/lib/python3.11/site-packages/pip/_vendor/webencodings/x_user_defined.py @@ -0,0 +1,325 @@ +# coding: utf-8 +""" + +    webencodings.x_user_defined +    ~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +    An implementation of the x-user-defined encoding. + +    :copyright: Copyright 2012 by Simon Sapin +    :license: BSD, see LICENSE for details. + +""" + +from __future__ import unicode_literals + +import codecs + + +### Codec APIs + +class Codec(codecs.Codec): + +    def encode(self, input, errors='strict'): +        return codecs.charmap_encode(input, errors, encoding_table) + +    def decode(self, input, errors='strict'): +        return codecs.charmap_decode(input, errors, decoding_table) + + +class IncrementalEncoder(codecs.IncrementalEncoder): +    def encode(self, input, final=False): +        return codecs.charmap_encode(input, self.errors, encoding_table)[0] + + +class IncrementalDecoder(codecs.IncrementalDecoder): +    def decode(self, input, final=False): +        return codecs.charmap_decode(input, self.errors, decoding_table)[0] + + +class StreamWriter(Codec, codecs.StreamWriter): +    pass + + +class StreamReader(Codec, codecs.StreamReader): +    pass + + +### encodings module API + +codec_info = codecs.CodecInfo( +    name='x-user-defined', +    encode=Codec().encode, +    decode=Codec().decode, +    incrementalencoder=IncrementalEncoder, +    incrementaldecoder=IncrementalDecoder, +    streamreader=StreamReader, +    streamwriter=StreamWriter, +) + + +### Decoding Table + +# Python 3: +# for c in range(256): print('    %r' % chr(c if c < 128 else c + 0xF700)) +decoding_table = ( +    '\x00' +    '\x01' +    '\x02' +    '\x03' +    '\x04' +    '\x05' +    '\x06' +    '\x07' +    '\x08' +    '\t' +    '\n' +    '\x0b' +    '\x0c' +    '\r' +    '\x0e' +    '\x0f' +    '\x10' +    '\x11' +    '\x12' +    '\x13' +    '\x14' +    '\x15' +    '\x16' +    '\x17' +    '\x18' +    '\x19' +    '\x1a' +    '\x1b' +    '\x1c' +    '\x1d' +    '\x1e' +    '\x1f' +    ' ' +    '!' +    '"' +    '#' +    '$' +    '%' +    '&' +    "'" +    '(' +    ')' +    '*' +    '+' +    ',' +    '-' +    '.' +    '/' +    '0' +    '1' +    '2' +    '3' +    '4' +    '5' +    '6' +    '7' +    '8' +    '9' +    ':' +    ';' +    '<' +    '=' +    '>' +    '?' +    '@' +    'A' +    'B' +    'C' +    'D' +    'E' +    'F' +    'G' +    'H' +    'I' +    'J' +    'K' +    'L' +    'M' +    'N' +    'O' +    'P' +    'Q' +    'R' +    'S' +    'T' +    'U' +    'V' +    'W' +    'X' +    'Y' +    'Z' +    '[' +    '\\' +    ']' +    '^' +    '_' +    '`' +    'a' +    'b' +    'c' +    'd' +    'e' +    'f' +    'g' +    'h' +    'i' +    'j' +    'k' +    'l' +    'm' +    'n' +    'o' +    'p' +    'q' +    'r' +    's' +    't' +    'u' +    'v' +    'w' +    'x' +    'y' +    'z' +    '{' +    '|' +    '}' +    '~' +    '\x7f' +    '\uf780' +    '\uf781' +    '\uf782' +    '\uf783' +    '\uf784' +    '\uf785' +    '\uf786' +    '\uf787' +    '\uf788' +    '\uf789' +    '\uf78a' +    '\uf78b' +    '\uf78c' +    '\uf78d' +    '\uf78e' +    '\uf78f' +    '\uf790' +    '\uf791' +    '\uf792' +    '\uf793' +    '\uf794' +    '\uf795' +    '\uf796' +    '\uf797' +    '\uf798' +    '\uf799' +    '\uf79a' +    '\uf79b' +    '\uf79c' +    '\uf79d' +    '\uf79e' +    '\uf79f' +    '\uf7a0' +    '\uf7a1' +    '\uf7a2' +    '\uf7a3' +    '\uf7a4' +    '\uf7a5' +    '\uf7a6' +    '\uf7a7' +    '\uf7a8' +    '\uf7a9' +    '\uf7aa' +    '\uf7ab' +    '\uf7ac' +    '\uf7ad' +    '\uf7ae' +    '\uf7af' +    '\uf7b0' +    '\uf7b1' +    '\uf7b2' +    '\uf7b3' +    '\uf7b4' +    '\uf7b5' +    '\uf7b6' +    '\uf7b7' +    '\uf7b8' +    '\uf7b9' +    '\uf7ba' +    '\uf7bb' +    '\uf7bc' +    '\uf7bd' +    '\uf7be' +    '\uf7bf' +    '\uf7c0' +    '\uf7c1' +    '\uf7c2' +    '\uf7c3' +    '\uf7c4' +    '\uf7c5' +    '\uf7c6' +    '\uf7c7' +    '\uf7c8' +    '\uf7c9' +    '\uf7ca' +    '\uf7cb' +    '\uf7cc' +    '\uf7cd' +    '\uf7ce' +    '\uf7cf' +    '\uf7d0' +    '\uf7d1' +    '\uf7d2' +    '\uf7d3' +    '\uf7d4' +    '\uf7d5' +    '\uf7d6' +    '\uf7d7' +    '\uf7d8' +    '\uf7d9' +    '\uf7da' +    '\uf7db' +    '\uf7dc' +    '\uf7dd' +    '\uf7de' +    '\uf7df' +    '\uf7e0' +    '\uf7e1' +    '\uf7e2' +    '\uf7e3' +    '\uf7e4' +    '\uf7e5' +    '\uf7e6' +    '\uf7e7' +    '\uf7e8' +    '\uf7e9' +    '\uf7ea' +    '\uf7eb' +    '\uf7ec' +    '\uf7ed' +    '\uf7ee' +    '\uf7ef' +    '\uf7f0' +    '\uf7f1' +    '\uf7f2' +    '\uf7f3' +    '\uf7f4' +    '\uf7f5' +    '\uf7f6' +    '\uf7f7' +    '\uf7f8' +    '\uf7f9' +    '\uf7fa' +    '\uf7fb' +    '\uf7fc' +    '\uf7fd' +    '\uf7fe' +    '\uf7ff' +) + +### Encoding table +encoding_table = codecs.charmap_build(decoding_table)  | 
