summaryrefslogtreecommitdiff
path: root/venv/lib/python3.11/site-packages/pygments/lexers/mime.py
diff options
context:
space:
mode:
authorcyfraeviolae <cyfraeviolae>2024-04-03 03:17:55 -0400
committercyfraeviolae <cyfraeviolae>2024-04-03 03:17:55 -0400
commit12cf076118570eebbff08c6b3090e0d4798447a1 (patch)
tree3ba25e17e3c3a5e82316558ba3864b955919ff72 /venv/lib/python3.11/site-packages/pygments/lexers/mime.py
parentc45662ff3923b34614ddcc8feb9195541166dcc5 (diff)
no venv
Diffstat (limited to 'venv/lib/python3.11/site-packages/pygments/lexers/mime.py')
-rw-r--r--venv/lib/python3.11/site-packages/pygments/lexers/mime.py210
1 files changed, 0 insertions, 210 deletions
diff --git a/venv/lib/python3.11/site-packages/pygments/lexers/mime.py b/venv/lib/python3.11/site-packages/pygments/lexers/mime.py
deleted file mode 100644
index 8bf16f7..0000000
--- a/venv/lib/python3.11/site-packages/pygments/lexers/mime.py
+++ /dev/null
@@ -1,210 +0,0 @@
-"""
- pygments.lexers.mime
- ~~~~~~~~~~~~~~~~~~~~
-
- Lexer for Multipurpose Internet Mail Extensions (MIME) data.
-
- :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS.
- :license: BSD, see LICENSE for details.
-"""
-
-import re
-
-from pygments.lexer import RegexLexer, include
-from pygments.lexers import get_lexer_for_mimetype
-from pygments.token import Text, Name, String, Operator, Comment, Other
-from pygments.util import get_int_opt, ClassNotFound
-
-__all__ = ["MIMELexer"]
-
-
-class MIMELexer(RegexLexer):
- """
- Lexer for Multipurpose Internet Mail Extensions (MIME) data. This lexer is
- designed to process nested multipart data.
-
- It assumes that the given data contains both header and body (and is
- split at an empty line). If no valid header is found, then the entire data
- will be treated as body.
-
- Additional options accepted:
-
- `MIME-max-level`
- Max recursion level for nested MIME structure. Any negative number
- would treated as unlimited. (default: -1)
-
- `Content-Type`
- Treat the data as a specific content type. Useful when header is
- missing, or this lexer would try to parse from header. (default:
- `text/plain`)
-
- `Multipart-Boundary`
- Set the default multipart boundary delimiter. This option is only used
- when `Content-Type` is `multipart` and header is missing. This lexer
- would try to parse from header by default. (default: None)
-
- `Content-Transfer-Encoding`
- Treat the data as a specific encoding. Or this lexer would try to parse
- from header by default. (default: None)
-
- .. versionadded:: 2.5
- """
-
- name = "MIME"
- aliases = ["mime"]
- mimetypes = ["multipart/mixed",
- "multipart/related",
- "multipart/alternative"]
-
- def __init__(self, **options):
- super().__init__(**options)
- self.boundary = options.get("Multipart-Boundary")
- self.content_transfer_encoding = options.get("Content_Transfer_Encoding")
- self.content_type = options.get("Content_Type", "text/plain")
- self.max_nested_level = get_int_opt(options, "MIME-max-level", -1)
-
- def get_header_tokens(self, match):
- field = match.group(1)
-
- if field.lower() in self.attention_headers:
- yield match.start(1), Name.Tag, field + ":"
- yield match.start(2), Text.Whitespace, match.group(2)
-
- pos = match.end(2)
- body = match.group(3)
- for i, t, v in self.get_tokens_unprocessed(body, ("root", field.lower())):
- yield pos + i, t, v
-
- else:
- yield match.start(), Comment, match.group()
-
- def get_body_tokens(self, match):
- pos_body_start = match.start()
- entire_body = match.group()
-
- # skip first newline
- if entire_body[0] == '\n':
- yield pos_body_start, Text.Whitespace, '\n'
- pos_body_start = pos_body_start + 1
- entire_body = entire_body[1:]
-
- # if it is not a multipart
- if not self.content_type.startswith("multipart") or not self.boundary:
- for i, t, v in self.get_bodypart_tokens(entire_body):
- yield pos_body_start + i, t, v
- return
-
- # find boundary
- bdry_pattern = r"^--%s(--)?\n" % re.escape(self.boundary)
- bdry_matcher = re.compile(bdry_pattern, re.MULTILINE)
-
- # some data has prefix text before first boundary
- m = bdry_matcher.search(entire_body)
- if m:
- pos_part_start = pos_body_start + m.end()
- pos_iter_start = lpos_end = m.end()
- yield pos_body_start, Text, entire_body[:m.start()]
- yield pos_body_start + lpos_end, String.Delimiter, m.group()
- else:
- pos_part_start = pos_body_start
- pos_iter_start = 0
-
- # process tokens of each body part
- for m in bdry_matcher.finditer(entire_body, pos_iter_start):
- # bodypart
- lpos_start = pos_part_start - pos_body_start
- lpos_end = m.start()
- part = entire_body[lpos_start:lpos_end]
- for i, t, v in self.get_bodypart_tokens(part):
- yield pos_part_start + i, t, v
-
- # boundary
- yield pos_body_start + lpos_end, String.Delimiter, m.group()
- pos_part_start = pos_body_start + m.end()
-
- # some data has suffix text after last boundary
- lpos_start = pos_part_start - pos_body_start
- if lpos_start != len(entire_body):
- yield pos_part_start, Text, entire_body[lpos_start:]
-
- def get_bodypart_tokens(self, text):
- # return if:
- # * no content
- # * no content type specific
- # * content encoding is not readable
- # * max recurrsion exceed
- if not text.strip() or not self.content_type:
- return [(0, Other, text)]
-
- cte = self.content_transfer_encoding
- if cte and cte not in {"8bit", "7bit", "quoted-printable"}:
- return [(0, Other, text)]
-
- if self.max_nested_level == 0:
- return [(0, Other, text)]
-
- # get lexer
- try:
- lexer = get_lexer_for_mimetype(self.content_type)
- except ClassNotFound:
- return [(0, Other, text)]
-
- if isinstance(lexer, type(self)):
- lexer.max_nested_level = self.max_nested_level - 1
-
- return lexer.get_tokens_unprocessed(text)
-
- def store_content_type(self, match):
- self.content_type = match.group(1)
-
- prefix_len = match.start(1) - match.start(0)
- yield match.start(0), Text.Whitespace, match.group(0)[:prefix_len]
- yield match.start(1), Name.Label, match.group(2)
- yield match.end(2), String.Delimiter, '/'
- yield match.start(3), Name.Label, match.group(3)
-
- def get_content_type_subtokens(self, match):
- yield match.start(1), Text, match.group(1)
- yield match.start(2), Text.Whitespace, match.group(2)
- yield match.start(3), Name.Attribute, match.group(3)
- yield match.start(4), Operator, match.group(4)
- yield match.start(5), String, match.group(5)
-
- if match.group(3).lower() == "boundary":
- boundary = match.group(5).strip()
- if boundary[0] == '"' and boundary[-1] == '"':
- boundary = boundary[1:-1]
- self.boundary = boundary
-
- def store_content_transfer_encoding(self, match):
- self.content_transfer_encoding = match.group(0).lower()
- yield match.start(0), Name.Constant, match.group(0)
-
- attention_headers = {"content-type", "content-transfer-encoding"}
-
- tokens = {
- "root": [
- (r"^([\w-]+):( *)([\s\S]*?\n)(?![ \t])", get_header_tokens),
- (r"^$[\s\S]+", get_body_tokens),
- ],
- "header": [
- # folding
- (r"\n[ \t]", Text.Whitespace),
- (r"\n(?![ \t])", Text.Whitespace, "#pop"),
- ],
- "content-type": [
- include("header"),
- (
- r"^\s*((multipart|application|audio|font|image|model|text|video"
- r"|message)/([\w-]+))",
- store_content_type,
- ),
- (r'(;)((?:[ \t]|\n[ \t])*)([\w:-]+)(=)([\s\S]*?)(?=;|\n(?![ \t]))',
- get_content_type_subtokens),
- (r';[ \t]*\n(?![ \t])', Text, '#pop'),
- ],
- "content-transfer-encoding": [
- include("header"),
- (r"([\w-]+)", store_content_transfer_encoding),
- ],
- }