summaryrefslogtreecommitdiff
path: root/venv/lib/python3.11/site-packages/pygments/lexers/mime.py
diff options
context:
space:
mode:
Diffstat (limited to 'venv/lib/python3.11/site-packages/pygments/lexers/mime.py')
-rw-r--r--venv/lib/python3.11/site-packages/pygments/lexers/mime.py210
1 files changed, 210 insertions, 0 deletions
diff --git a/venv/lib/python3.11/site-packages/pygments/lexers/mime.py b/venv/lib/python3.11/site-packages/pygments/lexers/mime.py
new file mode 100644
index 0000000..8bf16f7
--- /dev/null
+++ b/venv/lib/python3.11/site-packages/pygments/lexers/mime.py
@@ -0,0 +1,210 @@
+"""
+ pygments.lexers.mime
+ ~~~~~~~~~~~~~~~~~~~~
+
+ Lexer for Multipurpose Internet Mail Extensions (MIME) data.
+
+ :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS.
+ :license: BSD, see LICENSE for details.
+"""
+
+import re
+
+from pygments.lexer import RegexLexer, include
+from pygments.lexers import get_lexer_for_mimetype
+from pygments.token import Text, Name, String, Operator, Comment, Other
+from pygments.util import get_int_opt, ClassNotFound
+
+__all__ = ["MIMELexer"]
+
+
+class MIMELexer(RegexLexer):
+ """
+ Lexer for Multipurpose Internet Mail Extensions (MIME) data. This lexer is
+ designed to process nested multipart data.
+
+ It assumes that the given data contains both header and body (and is
+ split at an empty line). If no valid header is found, then the entire data
+ will be treated as body.
+
+ Additional options accepted:
+
+ `MIME-max-level`
+ Max recursion level for nested MIME structure. Any negative number
+ would treated as unlimited. (default: -1)
+
+ `Content-Type`
+ Treat the data as a specific content type. Useful when header is
+ missing, or this lexer would try to parse from header. (default:
+ `text/plain`)
+
+ `Multipart-Boundary`
+ Set the default multipart boundary delimiter. This option is only used
+ when `Content-Type` is `multipart` and header is missing. This lexer
+ would try to parse from header by default. (default: None)
+
+ `Content-Transfer-Encoding`
+ Treat the data as a specific encoding. Or this lexer would try to parse
+ from header by default. (default: None)
+
+ .. versionadded:: 2.5
+ """
+
+ name = "MIME"
+ aliases = ["mime"]
+ mimetypes = ["multipart/mixed",
+ "multipart/related",
+ "multipart/alternative"]
+
+ def __init__(self, **options):
+ super().__init__(**options)
+ self.boundary = options.get("Multipart-Boundary")
+ self.content_transfer_encoding = options.get("Content_Transfer_Encoding")
+ self.content_type = options.get("Content_Type", "text/plain")
+ self.max_nested_level = get_int_opt(options, "MIME-max-level", -1)
+
+ def get_header_tokens(self, match):
+ field = match.group(1)
+
+ if field.lower() in self.attention_headers:
+ yield match.start(1), Name.Tag, field + ":"
+ yield match.start(2), Text.Whitespace, match.group(2)
+
+ pos = match.end(2)
+ body = match.group(3)
+ for i, t, v in self.get_tokens_unprocessed(body, ("root", field.lower())):
+ yield pos + i, t, v
+
+ else:
+ yield match.start(), Comment, match.group()
+
+ def get_body_tokens(self, match):
+ pos_body_start = match.start()
+ entire_body = match.group()
+
+ # skip first newline
+ if entire_body[0] == '\n':
+ yield pos_body_start, Text.Whitespace, '\n'
+ pos_body_start = pos_body_start + 1
+ entire_body = entire_body[1:]
+
+ # if it is not a multipart
+ if not self.content_type.startswith("multipart") or not self.boundary:
+ for i, t, v in self.get_bodypart_tokens(entire_body):
+ yield pos_body_start + i, t, v
+ return
+
+ # find boundary
+ bdry_pattern = r"^--%s(--)?\n" % re.escape(self.boundary)
+ bdry_matcher = re.compile(bdry_pattern, re.MULTILINE)
+
+ # some data has prefix text before first boundary
+ m = bdry_matcher.search(entire_body)
+ if m:
+ pos_part_start = pos_body_start + m.end()
+ pos_iter_start = lpos_end = m.end()
+ yield pos_body_start, Text, entire_body[:m.start()]
+ yield pos_body_start + lpos_end, String.Delimiter, m.group()
+ else:
+ pos_part_start = pos_body_start
+ pos_iter_start = 0
+
+ # process tokens of each body part
+ for m in bdry_matcher.finditer(entire_body, pos_iter_start):
+ # bodypart
+ lpos_start = pos_part_start - pos_body_start
+ lpos_end = m.start()
+ part = entire_body[lpos_start:lpos_end]
+ for i, t, v in self.get_bodypart_tokens(part):
+ yield pos_part_start + i, t, v
+
+ # boundary
+ yield pos_body_start + lpos_end, String.Delimiter, m.group()
+ pos_part_start = pos_body_start + m.end()
+
+ # some data has suffix text after last boundary
+ lpos_start = pos_part_start - pos_body_start
+ if lpos_start != len(entire_body):
+ yield pos_part_start, Text, entire_body[lpos_start:]
+
+ def get_bodypart_tokens(self, text):
+ # return if:
+ # * no content
+ # * no content type specific
+ # * content encoding is not readable
+ # * max recurrsion exceed
+ if not text.strip() or not self.content_type:
+ return [(0, Other, text)]
+
+ cte = self.content_transfer_encoding
+ if cte and cte not in {"8bit", "7bit", "quoted-printable"}:
+ return [(0, Other, text)]
+
+ if self.max_nested_level == 0:
+ return [(0, Other, text)]
+
+ # get lexer
+ try:
+ lexer = get_lexer_for_mimetype(self.content_type)
+ except ClassNotFound:
+ return [(0, Other, text)]
+
+ if isinstance(lexer, type(self)):
+ lexer.max_nested_level = self.max_nested_level - 1
+
+ return lexer.get_tokens_unprocessed(text)
+
+ def store_content_type(self, match):
+ self.content_type = match.group(1)
+
+ prefix_len = match.start(1) - match.start(0)
+ yield match.start(0), Text.Whitespace, match.group(0)[:prefix_len]
+ yield match.start(1), Name.Label, match.group(2)
+ yield match.end(2), String.Delimiter, '/'
+ yield match.start(3), Name.Label, match.group(3)
+
+ def get_content_type_subtokens(self, match):
+ yield match.start(1), Text, match.group(1)
+ yield match.start(2), Text.Whitespace, match.group(2)
+ yield match.start(3), Name.Attribute, match.group(3)
+ yield match.start(4), Operator, match.group(4)
+ yield match.start(5), String, match.group(5)
+
+ if match.group(3).lower() == "boundary":
+ boundary = match.group(5).strip()
+ if boundary[0] == '"' and boundary[-1] == '"':
+ boundary = boundary[1:-1]
+ self.boundary = boundary
+
+ def store_content_transfer_encoding(self, match):
+ self.content_transfer_encoding = match.group(0).lower()
+ yield match.start(0), Name.Constant, match.group(0)
+
+ attention_headers = {"content-type", "content-transfer-encoding"}
+
+ tokens = {
+ "root": [
+ (r"^([\w-]+):( *)([\s\S]*?\n)(?![ \t])", get_header_tokens),
+ (r"^$[\s\S]+", get_body_tokens),
+ ],
+ "header": [
+ # folding
+ (r"\n[ \t]", Text.Whitespace),
+ (r"\n(?![ \t])", Text.Whitespace, "#pop"),
+ ],
+ "content-type": [
+ include("header"),
+ (
+ r"^\s*((multipart|application|audio|font|image|model|text|video"
+ r"|message)/([\w-]+))",
+ store_content_type,
+ ),
+ (r'(;)((?:[ \t]|\n[ \t])*)([\w:-]+)(=)([\s\S]*?)(?=;|\n(?![ \t]))',
+ get_content_type_subtokens),
+ (r';[ \t]*\n(?![ \t])', Text, '#pop'),
+ ],
+ "content-transfer-encoding": [
+ include("header"),
+ (r"([\w-]+)", store_content_transfer_encoding),
+ ],
+ }