diff options
author | cyfraeviolae <cyfraeviolae> | 2024-04-03 03:17:55 -0400 |
---|---|---|
committer | cyfraeviolae <cyfraeviolae> | 2024-04-03 03:17:55 -0400 |
commit | 12cf076118570eebbff08c6b3090e0d4798447a1 (patch) | |
tree | 3ba25e17e3c3a5e82316558ba3864b955919ff72 /venv/lib/python3.11/site-packages/pygments/lexers/mime.py | |
parent | c45662ff3923b34614ddcc8feb9195541166dcc5 (diff) |
no venv
Diffstat (limited to 'venv/lib/python3.11/site-packages/pygments/lexers/mime.py')
-rw-r--r-- | venv/lib/python3.11/site-packages/pygments/lexers/mime.py | 210 |
1 files changed, 0 insertions, 210 deletions
diff --git a/venv/lib/python3.11/site-packages/pygments/lexers/mime.py b/venv/lib/python3.11/site-packages/pygments/lexers/mime.py deleted file mode 100644 index 8bf16f7..0000000 --- a/venv/lib/python3.11/site-packages/pygments/lexers/mime.py +++ /dev/null @@ -1,210 +0,0 @@ -""" - pygments.lexers.mime - ~~~~~~~~~~~~~~~~~~~~ - - Lexer for Multipurpose Internet Mail Extensions (MIME) data. - - :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS. - :license: BSD, see LICENSE for details. -""" - -import re - -from pygments.lexer import RegexLexer, include -from pygments.lexers import get_lexer_for_mimetype -from pygments.token import Text, Name, String, Operator, Comment, Other -from pygments.util import get_int_opt, ClassNotFound - -__all__ = ["MIMELexer"] - - -class MIMELexer(RegexLexer): - """ - Lexer for Multipurpose Internet Mail Extensions (MIME) data. This lexer is - designed to process nested multipart data. - - It assumes that the given data contains both header and body (and is - split at an empty line). If no valid header is found, then the entire data - will be treated as body. - - Additional options accepted: - - `MIME-max-level` - Max recursion level for nested MIME structure. Any negative number - would treated as unlimited. (default: -1) - - `Content-Type` - Treat the data as a specific content type. Useful when header is - missing, or this lexer would try to parse from header. (default: - `text/plain`) - - `Multipart-Boundary` - Set the default multipart boundary delimiter. This option is only used - when `Content-Type` is `multipart` and header is missing. This lexer - would try to parse from header by default. (default: None) - - `Content-Transfer-Encoding` - Treat the data as a specific encoding. Or this lexer would try to parse - from header by default. (default: None) - - .. versionadded:: 2.5 - """ - - name = "MIME" - aliases = ["mime"] - mimetypes = ["multipart/mixed", - "multipart/related", - "multipart/alternative"] - - def __init__(self, **options): - super().__init__(**options) - self.boundary = options.get("Multipart-Boundary") - self.content_transfer_encoding = options.get("Content_Transfer_Encoding") - self.content_type = options.get("Content_Type", "text/plain") - self.max_nested_level = get_int_opt(options, "MIME-max-level", -1) - - def get_header_tokens(self, match): - field = match.group(1) - - if field.lower() in self.attention_headers: - yield match.start(1), Name.Tag, field + ":" - yield match.start(2), Text.Whitespace, match.group(2) - - pos = match.end(2) - body = match.group(3) - for i, t, v in self.get_tokens_unprocessed(body, ("root", field.lower())): - yield pos + i, t, v - - else: - yield match.start(), Comment, match.group() - - def get_body_tokens(self, match): - pos_body_start = match.start() - entire_body = match.group() - - # skip first newline - if entire_body[0] == '\n': - yield pos_body_start, Text.Whitespace, '\n' - pos_body_start = pos_body_start + 1 - entire_body = entire_body[1:] - - # if it is not a multipart - if not self.content_type.startswith("multipart") or not self.boundary: - for i, t, v in self.get_bodypart_tokens(entire_body): - yield pos_body_start + i, t, v - return - - # find boundary - bdry_pattern = r"^--%s(--)?\n" % re.escape(self.boundary) - bdry_matcher = re.compile(bdry_pattern, re.MULTILINE) - - # some data has prefix text before first boundary - m = bdry_matcher.search(entire_body) - if m: - pos_part_start = pos_body_start + m.end() - pos_iter_start = lpos_end = m.end() - yield pos_body_start, Text, entire_body[:m.start()] - yield pos_body_start + lpos_end, String.Delimiter, m.group() - else: - pos_part_start = pos_body_start - pos_iter_start = 0 - - # process tokens of each body part - for m in bdry_matcher.finditer(entire_body, pos_iter_start): - # bodypart - lpos_start = pos_part_start - pos_body_start - lpos_end = m.start() - part = entire_body[lpos_start:lpos_end] - for i, t, v in self.get_bodypart_tokens(part): - yield pos_part_start + i, t, v - - # boundary - yield pos_body_start + lpos_end, String.Delimiter, m.group() - pos_part_start = pos_body_start + m.end() - - # some data has suffix text after last boundary - lpos_start = pos_part_start - pos_body_start - if lpos_start != len(entire_body): - yield pos_part_start, Text, entire_body[lpos_start:] - - def get_bodypart_tokens(self, text): - # return if: - # * no content - # * no content type specific - # * content encoding is not readable - # * max recurrsion exceed - if not text.strip() or not self.content_type: - return [(0, Other, text)] - - cte = self.content_transfer_encoding - if cte and cte not in {"8bit", "7bit", "quoted-printable"}: - return [(0, Other, text)] - - if self.max_nested_level == 0: - return [(0, Other, text)] - - # get lexer - try: - lexer = get_lexer_for_mimetype(self.content_type) - except ClassNotFound: - return [(0, Other, text)] - - if isinstance(lexer, type(self)): - lexer.max_nested_level = self.max_nested_level - 1 - - return lexer.get_tokens_unprocessed(text) - - def store_content_type(self, match): - self.content_type = match.group(1) - - prefix_len = match.start(1) - match.start(0) - yield match.start(0), Text.Whitespace, match.group(0)[:prefix_len] - yield match.start(1), Name.Label, match.group(2) - yield match.end(2), String.Delimiter, '/' - yield match.start(3), Name.Label, match.group(3) - - def get_content_type_subtokens(self, match): - yield match.start(1), Text, match.group(1) - yield match.start(2), Text.Whitespace, match.group(2) - yield match.start(3), Name.Attribute, match.group(3) - yield match.start(4), Operator, match.group(4) - yield match.start(5), String, match.group(5) - - if match.group(3).lower() == "boundary": - boundary = match.group(5).strip() - if boundary[0] == '"' and boundary[-1] == '"': - boundary = boundary[1:-1] - self.boundary = boundary - - def store_content_transfer_encoding(self, match): - self.content_transfer_encoding = match.group(0).lower() - yield match.start(0), Name.Constant, match.group(0) - - attention_headers = {"content-type", "content-transfer-encoding"} - - tokens = { - "root": [ - (r"^([\w-]+):( *)([\s\S]*?\n)(?![ \t])", get_header_tokens), - (r"^$[\s\S]+", get_body_tokens), - ], - "header": [ - # folding - (r"\n[ \t]", Text.Whitespace), - (r"\n(?![ \t])", Text.Whitespace, "#pop"), - ], - "content-type": [ - include("header"), - ( - r"^\s*((multipart|application|audio|font|image|model|text|video" - r"|message)/([\w-]+))", - store_content_type, - ), - (r'(;)((?:[ \t]|\n[ \t])*)([\w:-]+)(=)([\s\S]*?)(?=;|\n(?![ \t]))', - get_content_type_subtokens), - (r';[ \t]*\n(?![ \t])', Text, '#pop'), - ], - "content-transfer-encoding": [ - include("header"), - (r"([\w-]+)", store_content_transfer_encoding), - ], - } |