summaryrefslogtreecommitdiff
path: root/venv/lib/python3.11/site-packages/pygments/lexers/textfmts.py
diff options
context:
space:
mode:
Diffstat (limited to 'venv/lib/python3.11/site-packages/pygments/lexers/textfmts.py')
-rw-r--r--venv/lib/python3.11/site-packages/pygments/lexers/textfmts.py436
1 files changed, 436 insertions, 0 deletions
diff --git a/venv/lib/python3.11/site-packages/pygments/lexers/textfmts.py b/venv/lib/python3.11/site-packages/pygments/lexers/textfmts.py
new file mode 100644
index 0000000..c7cfb6d
--- /dev/null
+++ b/venv/lib/python3.11/site-packages/pygments/lexers/textfmts.py
@@ -0,0 +1,436 @@
+"""
+ pygments.lexers.textfmts
+ ~~~~~~~~~~~~~~~~~~~~~~~~
+
+ Lexers for various text formats.
+
+ :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS.
+ :license: BSD, see LICENSE for details.
+"""
+
+import re
+
+from pygments.lexers import guess_lexer, get_lexer_by_name
+from pygments.lexer import RegexLexer, bygroups, default, include
+from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
+ Number, Generic, Literal, Punctuation
+from pygments.util import ClassNotFound
+
+__all__ = ['IrcLogsLexer', 'TodotxtLexer', 'HttpLexer', 'GettextLexer',
+ 'NotmuchLexer', 'KernelLogLexer']
+
+
+class IrcLogsLexer(RegexLexer):
+ """
+ Lexer for IRC logs in *irssi*, *xchat* or *weechat* style.
+ """
+
+ name = 'IRC logs'
+ aliases = ['irc']
+ filenames = ['*.weechatlog']
+ mimetypes = ['text/x-irclog']
+
+ flags = re.VERBOSE | re.MULTILINE
+ timestamp = r"""
+ (
+ # irssi / xchat and others
+ (?: \[|\()? # Opening bracket or paren for the timestamp
+ (?: # Timestamp
+ (?: (?:\d{1,4} [-/])* # Date as - or /-separated groups of digits
+ (?:\d{1,4})
+ [T ])? # Date/time separator: T or space
+ (?: \d?\d [:.])* # Time as :/.-separated groups of 1 or 2 digits
+ (?: \d?\d)
+ )
+ (?: \]|\))?\s+ # Closing bracket or paren for the timestamp
+ |
+ # weechat
+ \d{4}\s\w{3}\s\d{2}\s # Date
+ \d{2}:\d{2}:\d{2}\s+ # Time + Whitespace
+ |
+ # xchat
+ \w{3}\s\d{2}\s # Date
+ \d{2}:\d{2}:\d{2}\s+ # Time + Whitespace
+ )?
+ """
+ tokens = {
+ 'root': [
+ # log start/end
+ (r'^\*\*\*\*(.*)\*\*\*\*$', Comment),
+ # hack
+ ("^" + timestamp + r'(\s*<[^>]*>\s*)$', bygroups(Comment.Preproc, Name.Tag)),
+ # normal msgs
+ ("^" + timestamp + r"""
+ (\s*<.*?>\s*) # Nick """,
+ bygroups(Comment.Preproc, Name.Tag), 'msg'),
+ # /me msgs
+ ("^" + timestamp + r"""
+ (\s*[*]\s+) # Star
+ (\S+\s+.*?\n) # Nick + rest of message """,
+ bygroups(Comment.Preproc, Keyword, Generic.Inserted)),
+ # join/part msgs
+ ("^" + timestamp + r"""
+ (\s*(?:\*{3}|<?-[!@=P]?->?)\s*) # Star(s) or symbols
+ (\S+\s+) # Nick + Space
+ (.*?\n) # Rest of message """,
+ bygroups(Comment.Preproc, Keyword, String, Comment)),
+ (r"^.*?\n", Text),
+ ],
+ 'msg': [
+ (r"\S+:(?!//)", Name.Attribute), # Prefix
+ (r".*\n", Text, '#pop'),
+ ],
+ }
+
+
+class GettextLexer(RegexLexer):
+ """
+ Lexer for Gettext catalog files.
+
+ .. versionadded:: 0.9
+ """
+ name = 'Gettext Catalog'
+ aliases = ['pot', 'po']
+ filenames = ['*.pot', '*.po']
+ mimetypes = ['application/x-gettext', 'text/x-gettext', 'text/gettext']
+
+ tokens = {
+ 'root': [
+ (r'^#,\s.*?$', Keyword.Type),
+ (r'^#:\s.*?$', Keyword.Declaration),
+ # (r'^#$', Comment),
+ (r'^(#|#\.\s|#\|\s|#~\s|#\s).*$', Comment.Single),
+ (r'^(")([A-Za-z-]+:)(.*")$',
+ bygroups(String, Name.Property, String)),
+ (r'^".*"$', String),
+ (r'^(msgid|msgid_plural|msgstr|msgctxt)(\s+)(".*")$',
+ bygroups(Name.Variable, Text, String)),
+ (r'^(msgstr\[)(\d)(\])(\s+)(".*")$',
+ bygroups(Name.Variable, Number.Integer, Name.Variable, Text, String)),
+ ]
+ }
+
+
+class HttpLexer(RegexLexer):
+ """
+ Lexer for HTTP sessions.
+
+ .. versionadded:: 1.5
+ """
+
+ name = 'HTTP'
+ aliases = ['http']
+
+ flags = re.DOTALL
+
+ def get_tokens_unprocessed(self, text, stack=('root',)):
+ """Reset the content-type state."""
+ self.content_type = None
+ return RegexLexer.get_tokens_unprocessed(self, text, stack)
+
+ def header_callback(self, match):
+ if match.group(1).lower() == 'content-type':
+ content_type = match.group(5).strip()
+ if ';' in content_type:
+ content_type = content_type[:content_type.find(';')].strip()
+ self.content_type = content_type
+ yield match.start(1), Name.Attribute, match.group(1)
+ yield match.start(2), Text, match.group(2)
+ yield match.start(3), Operator, match.group(3)
+ yield match.start(4), Text, match.group(4)
+ yield match.start(5), Literal, match.group(5)
+ yield match.start(6), Text, match.group(6)
+
+ def continuous_header_callback(self, match):
+ yield match.start(1), Text, match.group(1)
+ yield match.start(2), Literal, match.group(2)
+ yield match.start(3), Text, match.group(3)
+
+ def content_callback(self, match):
+ content_type = getattr(self, 'content_type', None)
+ content = match.group()
+ offset = match.start()
+ if content_type:
+ from pygments.lexers import get_lexer_for_mimetype
+ possible_lexer_mimetypes = [content_type]
+ if '+' in content_type:
+ # application/calendar+xml can be treated as application/xml
+ # if there's not a better match.
+ general_type = re.sub(r'^(.*)/.*\+(.*)$', r'\1/\2',
+ content_type)
+ possible_lexer_mimetypes.append(general_type)
+
+ for i in possible_lexer_mimetypes:
+ try:
+ lexer = get_lexer_for_mimetype(i)
+ except ClassNotFound:
+ pass
+ else:
+ for idx, token, value in lexer.get_tokens_unprocessed(content):
+ yield offset + idx, token, value
+ return
+ yield offset, Text, content
+
+ tokens = {
+ 'root': [
+ (r'([a-zA-Z][-_a-zA-Z]+)( +)([^ ]+)( +)'
+ r'(HTTP)(/)(1\.[01]|2(?:\.0)?|3)(\r?\n|\Z)',
+ bygroups(Name.Function, Text, Name.Namespace, Text,
+ Keyword.Reserved, Operator, Number, Text),
+ 'headers'),
+ (r'(HTTP)(/)(1\.[01]|2(?:\.0)?|3)( +)(\d{3})(?:( +)([^\r\n]*))?(\r?\n|\Z)',
+ bygroups(Keyword.Reserved, Operator, Number, Text, Number, Text,
+ Name.Exception, Text),
+ 'headers'),
+ ],
+ 'headers': [
+ (r'([^\s:]+)( *)(:)( *)([^\r\n]*)(\r?\n|\Z)', header_callback),
+ (r'([\t ]+)([^\r\n]+)(\r?\n|\Z)', continuous_header_callback),
+ (r'\r?\n', Text, 'content')
+ ],
+ 'content': [
+ (r'.+', content_callback)
+ ]
+ }
+
+ def analyse_text(text):
+ return any (
+ re.search(pattern, text) is not None
+ for pattern in (
+ r'^([a-zA-Z][-_a-zA-Z]+)( +)([^ ]+)( +)(HTTP)(/)(1\.[01]|2(?:\.0)?|3)(\r?\n|\Z)',
+ r'^(HTTP)(/)(1\.[01]|2(?:\.0)?|3)( +)(\d{3})(?:( +)([^\r\n]*))?(\r?\n|\Z)',
+ )
+ )
+
+
+class TodotxtLexer(RegexLexer):
+ """
+ Lexer for Todo.txt todo list format.
+
+ .. versionadded:: 2.0
+ """
+
+ name = 'Todotxt'
+ url = 'http://todotxt.com/'
+ aliases = ['todotxt']
+ # *.todotxt is not a standard extension for Todo.txt files; including it
+ # makes testing easier, and also makes autodetecting file type easier.
+ filenames = ['todo.txt', '*.todotxt']
+ mimetypes = ['text/x-todo']
+
+ # Aliases mapping standard token types of Todo.txt format concepts
+ CompleteTaskText = Operator # Chosen to de-emphasize complete tasks
+ IncompleteTaskText = Text # Incomplete tasks should look like plain text
+
+ # Priority should have most emphasis to indicate importance of tasks
+ Priority = Generic.Heading
+ # Dates should have next most emphasis because time is important
+ Date = Generic.Subheading
+
+ # Project and context should have equal weight, and be in different colors
+ Project = Generic.Error
+ Context = String
+
+ # If tag functionality is added, it should have the same weight as Project
+ # and Context, and a different color. Generic.Traceback would work well.
+
+ # Regex patterns for building up rules; dates, priorities, projects, and
+ # contexts are all atomic
+ # TODO: Make date regex more ISO 8601 compliant
+ date_regex = r'\d{4,}-\d{2}-\d{2}'
+ priority_regex = r'\([A-Z]\)'
+ project_regex = r'\+\S+'
+ context_regex = r'@\S+'
+
+ # Compound regex expressions
+ complete_one_date_regex = r'(x )(' + date_regex + r')'
+ complete_two_date_regex = (complete_one_date_regex + r'( )(' +
+ date_regex + r')')
+ priority_date_regex = r'(' + priority_regex + r')( )(' + date_regex + r')'
+
+ tokens = {
+ # Should parse starting at beginning of line; each line is a task
+ 'root': [
+ # Complete task entry points: two total:
+ # 1. Complete task with two dates
+ (complete_two_date_regex, bygroups(CompleteTaskText, Date,
+ CompleteTaskText, Date),
+ 'complete'),
+ # 2. Complete task with one date
+ (complete_one_date_regex, bygroups(CompleteTaskText, Date),
+ 'complete'),
+
+ # Incomplete task entry points: six total:
+ # 1. Priority plus date
+ (priority_date_regex, bygroups(Priority, IncompleteTaskText, Date),
+ 'incomplete'),
+ # 2. Priority only
+ (priority_regex, Priority, 'incomplete'),
+ # 3. Leading date
+ (date_regex, Date, 'incomplete'),
+ # 4. Leading context
+ (context_regex, Context, 'incomplete'),
+ # 5. Leading project
+ (project_regex, Project, 'incomplete'),
+ # 6. Non-whitespace catch-all
+ (r'\S+', IncompleteTaskText, 'incomplete'),
+ ],
+
+ # Parse a complete task
+ 'complete': [
+ # Newline indicates end of task, should return to root
+ (r'\s*\n', CompleteTaskText, '#pop'),
+ # Tokenize contexts and projects
+ (context_regex, Context),
+ (project_regex, Project),
+ # Tokenize non-whitespace text
+ (r'\S+', CompleteTaskText),
+ # Tokenize whitespace not containing a newline
+ (r'\s+', CompleteTaskText),
+ ],
+
+ # Parse an incomplete task
+ 'incomplete': [
+ # Newline indicates end of task, should return to root
+ (r'\s*\n', IncompleteTaskText, '#pop'),
+ # Tokenize contexts and projects
+ (context_regex, Context),
+ (project_regex, Project),
+ # Tokenize non-whitespace text
+ (r'\S+', IncompleteTaskText),
+ # Tokenize whitespace not containing a newline
+ (r'\s+', IncompleteTaskText),
+ ],
+ }
+
+
+class NotmuchLexer(RegexLexer):
+ """
+ For Notmuch email text format.
+
+ .. versionadded:: 2.5
+
+ Additional options accepted:
+
+ `body_lexer`
+ If given, highlight the contents of the message body with the specified
+ lexer, else guess it according to the body content (default: ``None``).
+ """
+
+ name = 'Notmuch'
+ url = 'https://notmuchmail.org/'
+ aliases = ['notmuch']
+
+ def _highlight_code(self, match):
+ code = match.group(1)
+
+ try:
+ if self.body_lexer:
+ lexer = get_lexer_by_name(self.body_lexer)
+ else:
+ lexer = guess_lexer(code.strip())
+ except ClassNotFound:
+ lexer = get_lexer_by_name('text')
+
+ yield from lexer.get_tokens_unprocessed(code)
+
+ tokens = {
+ 'root': [
+ (r'\fmessage\{\s*', Keyword, ('message', 'message-attr')),
+ ],
+ 'message-attr': [
+ (r'(\s*id:\s*)(\S+)', bygroups(Name.Attribute, String)),
+ (r'(\s*(?:depth|match|excluded):\s*)(\d+)',
+ bygroups(Name.Attribute, Number.Integer)),
+ (r'(\s*filename:\s*)(.+\n)',
+ bygroups(Name.Attribute, String)),
+ default('#pop'),
+ ],
+ 'message': [
+ (r'\fmessage\}\n', Keyword, '#pop'),
+ (r'\fheader\{\n', Keyword, 'header'),
+ (r'\fbody\{\n', Keyword, 'body'),
+ ],
+ 'header': [
+ (r'\fheader\}\n', Keyword, '#pop'),
+ (r'((?:Subject|From|To|Cc|Date):\s*)(.*\n)',
+ bygroups(Name.Attribute, String)),
+ (r'(.*)(\s*\(.*\))(\s*\(.*\)\n)',
+ bygroups(Generic.Strong, Literal, Name.Tag)),
+ ],
+ 'body': [
+ (r'\fpart\{\n', Keyword, 'part'),
+ (r'\f(part|attachment)\{\s*', Keyword, ('part', 'part-attr')),
+ (r'\fbody\}\n', Keyword, '#pop'),
+ ],
+ 'part-attr': [
+ (r'(ID:\s*)(\d+)', bygroups(Name.Attribute, Number.Integer)),
+ (r'(,\s*)((?:Filename|Content-id):\s*)([^,]+)',
+ bygroups(Punctuation, Name.Attribute, String)),
+ (r'(,\s*)(Content-type:\s*)(.+\n)',
+ bygroups(Punctuation, Name.Attribute, String)),
+ default('#pop'),
+ ],
+ 'part': [
+ (r'\f(?:part|attachment)\}\n', Keyword, '#pop'),
+ (r'\f(?:part|attachment)\{\s*', Keyword, ('#push', 'part-attr')),
+ (r'^Non-text part: .*\n', Comment),
+ (r'(?s)(.*?(?=\f(?:part|attachment)\}\n))', _highlight_code),
+ ],
+ }
+
+ def analyse_text(text):
+ return 1.0 if text.startswith('\fmessage{') else 0.0
+
+ def __init__(self, **options):
+ self.body_lexer = options.get('body_lexer', None)
+ RegexLexer.__init__(self, **options)
+
+
+class KernelLogLexer(RegexLexer):
+ """
+ For Linux Kernel log ("dmesg") output.
+
+ .. versionadded:: 2.6
+ """
+ name = 'Kernel log'
+ aliases = ['kmsg', 'dmesg']
+ filenames = ['*.kmsg', '*.dmesg']
+
+ tokens = {
+ 'root': [
+ (r'^[^:]+:debug : (?=\[)', Text, 'debug'),
+ (r'^[^:]+:info : (?=\[)', Text, 'info'),
+ (r'^[^:]+:warn : (?=\[)', Text, 'warn'),
+ (r'^[^:]+:notice: (?=\[)', Text, 'warn'),
+ (r'^[^:]+:err : (?=\[)', Text, 'error'),
+ (r'^[^:]+:crit : (?=\[)', Text, 'error'),
+ (r'^(?=\[)', Text, 'unknown'),
+ ],
+ 'unknown': [
+ (r'^(?=.+(warning|notice|audit|deprecated))', Text, 'warn'),
+ (r'^(?=.+(error|critical|fail|Bug))', Text, 'error'),
+ default('info'),
+ ],
+ 'base': [
+ (r'\[[0-9. ]+\] ', Number),
+ (r'(?<=\] ).+?:', Keyword),
+ (r'\n', Text, '#pop'),
+ ],
+ 'debug': [
+ include('base'),
+ (r'.+\n', Comment, '#pop')
+ ],
+ 'info': [
+ include('base'),
+ (r'.+\n', Text, '#pop')
+ ],
+ 'warn': [
+ include('base'),
+ (r'.+\n', Generic.Strong, '#pop')
+ ],
+ 'error': [
+ include('base'),
+ (r'.+\n', Generic.Error, '#pop')
+ ]
+ }