diff options
Diffstat (limited to 'venv/lib/python3.11/site-packages/pygments/lexers/markup.py')
-rw-r--r-- | venv/lib/python3.11/site-packages/pygments/lexers/markup.py | 1550 |
1 files changed, 0 insertions, 1550 deletions
diff --git a/venv/lib/python3.11/site-packages/pygments/lexers/markup.py b/venv/lib/python3.11/site-packages/pygments/lexers/markup.py deleted file mode 100644 index bb4c7ce..0000000 --- a/venv/lib/python3.11/site-packages/pygments/lexers/markup.py +++ /dev/null @@ -1,1550 +0,0 @@ -""" - pygments.lexers.markup - ~~~~~~~~~~~~~~~~~~~~~~ - - Lexers for non-HTML markup languages. - - :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS. - :license: BSD, see LICENSE for details. -""" - -import re - -from pygments.lexers.html import XmlLexer -from pygments.lexers.javascript import JavascriptLexer -from pygments.lexers.css import CssLexer -from pygments.lexers.lilypond import LilyPondLexer -from pygments.lexers.data import JsonLexer - -from pygments.lexer import RegexLexer, DelegatingLexer, include, bygroups, \ - using, this, do_insertions, default, words -from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ - Number, Punctuation, Generic, Other, Whitespace -from pygments.util import get_bool_opt, ClassNotFound - -__all__ = ['BBCodeLexer', 'MoinWikiLexer', 'RstLexer', 'TexLexer', 'GroffLexer', - 'MozPreprocHashLexer', 'MozPreprocPercentLexer', - 'MozPreprocXulLexer', 'MozPreprocJavascriptLexer', - 'MozPreprocCssLexer', 'MarkdownLexer', 'TiddlyWiki5Lexer', 'WikitextLexer'] - - -class BBCodeLexer(RegexLexer): - """ - A lexer that highlights BBCode(-like) syntax. - - .. versionadded:: 0.6 - """ - - name = 'BBCode' - aliases = ['bbcode'] - mimetypes = ['text/x-bbcode'] - - tokens = { - 'root': [ - (r'[^[]+', Text), - # tag/end tag begin - (r'\[/?\w+', Keyword, 'tag'), - # stray bracket - (r'\[', Text), - ], - 'tag': [ - (r'\s+', Text), - # attribute with value - (r'(\w+)(=)("?[^\s"\]]+"?)', - bygroups(Name.Attribute, Operator, String)), - # tag argument (a la [color=green]) - (r'(=)("?[^\s"\]]+"?)', - bygroups(Operator, String)), - # tag end - (r'\]', Keyword, '#pop'), - ], - } - - -class MoinWikiLexer(RegexLexer): - """ - For MoinMoin (and Trac) Wiki markup. - - .. versionadded:: 0.7 - """ - - name = 'MoinMoin/Trac Wiki markup' - aliases = ['trac-wiki', 'moin'] - filenames = [] - mimetypes = ['text/x-trac-wiki'] - flags = re.MULTILINE | re.IGNORECASE - - tokens = { - 'root': [ - (r'^#.*$', Comment), - (r'(!)(\S+)', bygroups(Keyword, Text)), # Ignore-next - # Titles - (r'^(=+)([^=]+)(=+)(\s*#.+)?$', - bygroups(Generic.Heading, using(this), Generic.Heading, String)), - # Literal code blocks, with optional shebang - (r'(\{\{\{)(\n#!.+)?', bygroups(Name.Builtin, Name.Namespace), 'codeblock'), - (r'(\'\'\'?|\|\||`|__|~~|\^|,,|::)', Comment), # Formatting - # Lists - (r'^( +)([.*-])( )', bygroups(Text, Name.Builtin, Text)), - (r'^( +)([a-z]{1,5}\.)( )', bygroups(Text, Name.Builtin, Text)), - # Other Formatting - (r'\[\[\w+.*?\]\]', Keyword), # Macro - (r'(\[[^\s\]]+)(\s+[^\]]+?)?(\])', - bygroups(Keyword, String, Keyword)), # Link - (r'^----+$', Keyword), # Horizontal rules - (r'[^\n\'\[{!_~^,|]+', Text), - (r'\n', Text), - (r'.', Text), - ], - 'codeblock': [ - (r'\}\}\}', Name.Builtin, '#pop'), - # these blocks are allowed to be nested in Trac, but not MoinMoin - (r'\{\{\{', Text, '#push'), - (r'[^{}]+', Comment.Preproc), # slurp boring text - (r'.', Comment.Preproc), # allow loose { or } - ], - } - - -class RstLexer(RegexLexer): - """ - For reStructuredText markup. - - .. versionadded:: 0.7 - - Additional options accepted: - - `handlecodeblocks` - Highlight the contents of ``.. sourcecode:: language``, - ``.. code:: language`` and ``.. code-block:: language`` - directives with a lexer for the given language (default: - ``True``). - - .. versionadded:: 0.8 - """ - name = 'reStructuredText' - url = 'https://docutils.sourceforge.io/rst.html' - aliases = ['restructuredtext', 'rst', 'rest'] - filenames = ['*.rst', '*.rest'] - mimetypes = ["text/x-rst", "text/prs.fallenstein.rst"] - flags = re.MULTILINE - - def _handle_sourcecode(self, match): - from pygments.lexers import get_lexer_by_name - - # section header - yield match.start(1), Punctuation, match.group(1) - yield match.start(2), Text, match.group(2) - yield match.start(3), Operator.Word, match.group(3) - yield match.start(4), Punctuation, match.group(4) - yield match.start(5), Text, match.group(5) - yield match.start(6), Keyword, match.group(6) - yield match.start(7), Text, match.group(7) - - # lookup lexer if wanted and existing - lexer = None - if self.handlecodeblocks: - try: - lexer = get_lexer_by_name(match.group(6).strip()) - except ClassNotFound: - pass - indention = match.group(8) - indention_size = len(indention) - code = (indention + match.group(9) + match.group(10) + match.group(11)) - - # no lexer for this language. handle it like it was a code block - if lexer is None: - yield match.start(8), String, code - return - - # highlight the lines with the lexer. - ins = [] - codelines = code.splitlines(True) - code = '' - for line in codelines: - if len(line) > indention_size: - ins.append((len(code), [(0, Text, line[:indention_size])])) - code += line[indention_size:] - else: - code += line - yield from do_insertions(ins, lexer.get_tokens_unprocessed(code)) - - # from docutils.parsers.rst.states - closers = '\'")]}>\u2019\u201d\xbb!?' - unicode_delimiters = '\u2010\u2011\u2012\u2013\u2014\u00a0' - end_string_suffix = (r'((?=$)|(?=[-/:.,; \n\x00%s%s]))' - % (re.escape(unicode_delimiters), - re.escape(closers))) - - tokens = { - 'root': [ - # Heading with overline - (r'^(=+|-+|`+|:+|\.+|\'+|"+|~+|\^+|_+|\*+|\++|#+)([ \t]*\n)' - r'(.+)(\n)(\1)(\n)', - bygroups(Generic.Heading, Text, Generic.Heading, - Text, Generic.Heading, Text)), - # Plain heading - (r'^(\S.*)(\n)(={3,}|-{3,}|`{3,}|:{3,}|\.{3,}|\'{3,}|"{3,}|' - r'~{3,}|\^{3,}|_{3,}|\*{3,}|\+{3,}|#{3,})(\n)', - bygroups(Generic.Heading, Text, Generic.Heading, Text)), - # Bulleted lists - (r'^(\s*)([-*+])( .+\n(?:\1 .+\n)*)', - bygroups(Text, Number, using(this, state='inline'))), - # Numbered lists - (r'^(\s*)([0-9#ivxlcmIVXLCM]+\.)( .+\n(?:\1 .+\n)*)', - bygroups(Text, Number, using(this, state='inline'))), - (r'^(\s*)(\(?[0-9#ivxlcmIVXLCM]+\))( .+\n(?:\1 .+\n)*)', - bygroups(Text, Number, using(this, state='inline'))), - # Numbered, but keep words at BOL from becoming lists - (r'^(\s*)([A-Z]+\.)( .+\n(?:\1 .+\n)+)', - bygroups(Text, Number, using(this, state='inline'))), - (r'^(\s*)(\(?[A-Za-z]+\))( .+\n(?:\1 .+\n)+)', - bygroups(Text, Number, using(this, state='inline'))), - # Line blocks - (r'^(\s*)(\|)( .+\n(?:\| .+\n)*)', - bygroups(Text, Operator, using(this, state='inline'))), - # Sourcecode directives - (r'^( *\.\.)(\s*)((?:source)?code(?:-block)?)(::)([ \t]*)([^\n]+)' - r'(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\8.*)?\n)+)', - _handle_sourcecode), - # A directive - (r'^( *\.\.)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))', - bygroups(Punctuation, Text, Operator.Word, Punctuation, Text, - using(this, state='inline'))), - # A reference target - (r'^( *\.\.)(\s*)(_(?:[^:\\]|\\.)+:)(.*?)$', - bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))), - # A footnote/citation target - (r'^( *\.\.)(\s*)(\[.+\])(.*?)$', - bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))), - # A substitution def - (r'^( *\.\.)(\s*)(\|.+\|)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))', - bygroups(Punctuation, Text, Name.Tag, Text, Operator.Word, - Punctuation, Text, using(this, state='inline'))), - # Comments - (r'^ *\.\..*(\n( +.*\n|\n)+)?', Comment.Preproc), - # Field list marker - (r'^( *)(:(?:\\\\|\\:|[^:\n])+:(?=\s))([ \t]*)', - bygroups(Text, Name.Class, Text)), - # Definition list - (r'^(\S.*(?<!::)\n)((?:(?: +.*)\n)+)', - bygroups(using(this, state='inline'), using(this, state='inline'))), - # Code blocks - (r'(::)(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\3.*)?\n)+)', - bygroups(String.Escape, Text, String, String, Text, String)), - include('inline'), - ], - 'inline': [ - (r'\\.', Text), # escape - (r'``', String, 'literal'), # code - (r'(`.+?)(<.+?>)(`__?)', # reference with inline target - bygroups(String, String.Interpol, String)), - (r'`.+?`__?', String), # reference - (r'(`.+?`)(:[a-zA-Z0-9:-]+?:)?', - bygroups(Name.Variable, Name.Attribute)), # role - (r'(:[a-zA-Z0-9:-]+?:)(`.+?`)', - bygroups(Name.Attribute, Name.Variable)), # role (content first) - (r'\*\*.+?\*\*', Generic.Strong), # Strong emphasis - (r'\*.+?\*', Generic.Emph), # Emphasis - (r'\[.*?\]_', String), # Footnote or citation - (r'<.+?>', Name.Tag), # Hyperlink - (r'[^\\\n\[*`:]+', Text), - (r'.', Text), - ], - 'literal': [ - (r'[^`]+', String), - (r'``' + end_string_suffix, String, '#pop'), - (r'`', String), - ] - } - - def __init__(self, **options): - self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True) - RegexLexer.__init__(self, **options) - - def analyse_text(text): - if text[:2] == '..' and text[2:3] != '.': - return 0.3 - p1 = text.find("\n") - p2 = text.find("\n", p1 + 1) - if (p2 > -1 and # has two lines - p1 * 2 + 1 == p2 and # they are the same length - text[p1+1] in '-=' and # the next line both starts and ends with - text[p1+1] == text[p2-1]): # ...a sufficiently high header - return 0.5 - - -class TexLexer(RegexLexer): - """ - Lexer for the TeX and LaTeX typesetting languages. - """ - - name = 'TeX' - aliases = ['tex', 'latex'] - filenames = ['*.tex', '*.aux', '*.toc'] - mimetypes = ['text/x-tex', 'text/x-latex'] - - tokens = { - 'general': [ - (r'%.*?\n', Comment), - (r'[{}]', Name.Builtin), - (r'[&_^]', Name.Builtin), - ], - 'root': [ - (r'\\\[', String.Backtick, 'displaymath'), - (r'\\\(', String, 'inlinemath'), - (r'\$\$', String.Backtick, 'displaymath'), - (r'\$', String, 'inlinemath'), - (r'\\([a-zA-Z]+|.)', Keyword, 'command'), - (r'\\$', Keyword), - include('general'), - (r'[^\\$%&_^{}]+', Text), - ], - 'math': [ - (r'\\([a-zA-Z]+|.)', Name.Variable), - include('general'), - (r'[0-9]+', Number), - (r'[-=!+*/()\[\]]', Operator), - (r'[^=!+*/()\[\]\\$%&_^{}0-9-]+', Name.Builtin), - ], - 'inlinemath': [ - (r'\\\)', String, '#pop'), - (r'\$', String, '#pop'), - include('math'), - ], - 'displaymath': [ - (r'\\\]', String, '#pop'), - (r'\$\$', String, '#pop'), - (r'\$', Name.Builtin), - include('math'), - ], - 'command': [ - (r'\[.*?\]', Name.Attribute), - (r'\*', Keyword), - default('#pop'), - ], - } - - def analyse_text(text): - for start in ("\\documentclass", "\\input", "\\documentstyle", - "\\relax"): - if text[:len(start)] == start: - return True - - -class GroffLexer(RegexLexer): - """ - Lexer for the (g)roff typesetting language, supporting groff - extensions. Mainly useful for highlighting manpage sources. - - .. versionadded:: 0.6 - """ - - name = 'Groff' - aliases = ['groff', 'nroff', 'man'] - filenames = ['*.[1-9]', '*.man', '*.1p', '*.3pm'] - mimetypes = ['application/x-troff', 'text/troff'] - - tokens = { - 'root': [ - (r'(\.)(\w+)', bygroups(Text, Keyword), 'request'), - (r'\.', Punctuation, 'request'), - # Regular characters, slurp till we find a backslash or newline - (r'[^\\\n]+', Text, 'textline'), - default('textline'), - ], - 'textline': [ - include('escapes'), - (r'[^\\\n]+', Text), - (r'\n', Text, '#pop'), - ], - 'escapes': [ - # groff has many ways to write escapes. - (r'\\"[^\n]*', Comment), - (r'\\[fn]\w', String.Escape), - (r'\\\(.{2}', String.Escape), - (r'\\.\[.*\]', String.Escape), - (r'\\.', String.Escape), - (r'\\\n', Text, 'request'), - ], - 'request': [ - (r'\n', Text, '#pop'), - include('escapes'), - (r'"[^\n"]+"', String.Double), - (r'\d+', Number), - (r'\S+', String), - (r'\s+', Text), - ], - } - - def analyse_text(text): - if text[:1] != '.': - return False - if text[:3] == '.\\"': - return True - if text[:4] == '.TH ': - return True - if text[1:3].isalnum() and text[3].isspace(): - return 0.9 - - -class MozPreprocHashLexer(RegexLexer): - """ - Lexer for Mozilla Preprocessor files (with '#' as the marker). - - Other data is left untouched. - - .. versionadded:: 2.0 - """ - name = 'mozhashpreproc' - aliases = [name] - filenames = [] - mimetypes = [] - - tokens = { - 'root': [ - (r'^#', Comment.Preproc, ('expr', 'exprstart')), - (r'.+', Other), - ], - 'exprstart': [ - (r'(literal)(.*)', bygroups(Comment.Preproc, Text), '#pop:2'), - (words(( - 'define', 'undef', 'if', 'ifdef', 'ifndef', 'else', 'elif', - 'elifdef', 'elifndef', 'endif', 'expand', 'filter', 'unfilter', - 'include', 'includesubst', 'error')), - Comment.Preproc, '#pop'), - ], - 'expr': [ - (words(('!', '!=', '==', '&&', '||')), Operator), - (r'(defined)(\()', bygroups(Keyword, Punctuation)), - (r'\)', Punctuation), - (r'[0-9]+', Number.Decimal), - (r'__\w+?__', Name.Variable), - (r'@\w+?@', Name.Class), - (r'\w+', Name), - (r'\n', Text, '#pop'), - (r'\s+', Text), - (r'\S', Punctuation), - ], - } - - -class MozPreprocPercentLexer(MozPreprocHashLexer): - """ - Lexer for Mozilla Preprocessor files (with '%' as the marker). - - Other data is left untouched. - - .. versionadded:: 2.0 - """ - name = 'mozpercentpreproc' - aliases = [name] - filenames = [] - mimetypes = [] - - tokens = { - 'root': [ - (r'^%', Comment.Preproc, ('expr', 'exprstart')), - (r'.+', Other), - ], - } - - -class MozPreprocXulLexer(DelegatingLexer): - """ - Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the - `XmlLexer`. - - .. versionadded:: 2.0 - """ - name = "XUL+mozpreproc" - aliases = ['xul+mozpreproc'] - filenames = ['*.xul.in'] - mimetypes = [] - - def __init__(self, **options): - super().__init__(XmlLexer, MozPreprocHashLexer, **options) - - -class MozPreprocJavascriptLexer(DelegatingLexer): - """ - Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the - `JavascriptLexer`. - - .. versionadded:: 2.0 - """ - name = "Javascript+mozpreproc" - aliases = ['javascript+mozpreproc'] - filenames = ['*.js.in'] - mimetypes = [] - - def __init__(self, **options): - super().__init__(JavascriptLexer, MozPreprocHashLexer, **options) - - -class MozPreprocCssLexer(DelegatingLexer): - """ - Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the - `CssLexer`. - - .. versionadded:: 2.0 - """ - name = "CSS+mozpreproc" - aliases = ['css+mozpreproc'] - filenames = ['*.css.in'] - mimetypes = [] - - def __init__(self, **options): - super().__init__(CssLexer, MozPreprocPercentLexer, **options) - - -class MarkdownLexer(RegexLexer): - """ - For Markdown markup. - - .. versionadded:: 2.2 - """ - name = 'Markdown' - url = 'https://daringfireball.net/projects/markdown/' - aliases = ['markdown', 'md'] - filenames = ['*.md', '*.markdown'] - mimetypes = ["text/x-markdown"] - flags = re.MULTILINE - - def _handle_codeblock(self, match): - from pygments.lexers import get_lexer_by_name - - yield match.start('initial'), String.Backtick, match.group('initial') - yield match.start('lang'), String.Backtick, match.group('lang') - if match.group('afterlang') is not None: - yield match.start('whitespace'), Whitespace, match.group('whitespace') - yield match.start('extra'), Text, match.group('extra') - yield match.start('newline'), Whitespace, match.group('newline') - - # lookup lexer if wanted and existing - lexer = None - if self.handlecodeblocks: - try: - lexer = get_lexer_by_name(match.group('lang').strip()) - except ClassNotFound: - pass - code = match.group('code') - # no lexer for this language. handle it like it was a code block - if lexer is None: - yield match.start('code'), String, code - else: - # FIXME: aren't the offsets wrong? - yield from do_insertions([], lexer.get_tokens_unprocessed(code)) - - yield match.start('terminator'), String.Backtick, match.group('terminator') - - tokens = { - 'root': [ - # heading with '#' prefix (atx-style) - (r'(^#[^#].+)(\n)', bygroups(Generic.Heading, Text)), - # subheading with '#' prefix (atx-style) - (r'(^#{2,6}[^#].+)(\n)', bygroups(Generic.Subheading, Text)), - # heading with '=' underlines (Setext-style) - (r'^(.+)(\n)(=+)(\n)', bygroups(Generic.Heading, Text, Generic.Heading, Text)), - # subheading with '-' underlines (Setext-style) - (r'^(.+)(\n)(-+)(\n)', bygroups(Generic.Subheading, Text, Generic.Subheading, Text)), - # task list - (r'^(\s*)([*-] )(\[[ xX]\])( .+\n)', - bygroups(Whitespace, Keyword, Keyword, using(this, state='inline'))), - # bulleted list - (r'^(\s*)([*-])(\s)(.+\n)', - bygroups(Whitespace, Keyword, Whitespace, using(this, state='inline'))), - # numbered list - (r'^(\s*)([0-9]+\.)( .+\n)', - bygroups(Whitespace, Keyword, using(this, state='inline'))), - # quote - (r'^(\s*>\s)(.+\n)', bygroups(Keyword, Generic.Emph)), - # code block fenced by 3 backticks - (r'^(\s*```\n[\w\W]*?^\s*```$\n)', String.Backtick), - # code block with language - # Some tools include extra stuff after the language name, just - # highlight that as text. For example: https://docs.enola.dev/use/execmd - (r'''(?x) - ^(?P<initial>\s*```) - (?P<lang>[\w\-]+) - (?P<afterlang> - (?P<whitespace>[^\S\n]+) - (?P<extra>.*))? - (?P<newline>\n) - (?P<code>(.|\n)*?) - (?P<terminator>^\s*```$\n) - ''', - _handle_codeblock), - - include('inline'), - ], - 'inline': [ - # escape - (r'\\.', Text), - # inline code - (r'([^`]?)(`[^`\n]+`)', bygroups(Text, String.Backtick)), - # warning: the following rules eat outer tags. - # eg. **foo _bar_ baz** => foo and baz are not recognized as bold - # bold fenced by '**' - (r'([^\*]?)(\*\*[^* \n][^*\n]*\*\*)', bygroups(Text, Generic.Strong)), - # bold fenced by '__' - (r'([^_]?)(__[^_ \n][^_\n]*__)', bygroups(Text, Generic.Strong)), - # italics fenced by '*' - (r'([^\*]?)(\*[^* \n][^*\n]*\*)', bygroups(Text, Generic.Emph)), - # italics fenced by '_' - (r'([^_]?)(_[^_ \n][^_\n]*_)', bygroups(Text, Generic.Emph)), - # strikethrough - (r'([^~]?)(~~[^~ \n][^~\n]*~~)', bygroups(Text, Generic.Deleted)), - # mentions and topics (twitter and github stuff) - (r'[@#][\w/:]+', Name.Entity), - # (image?) links eg: ![Image of Yaktocat](https://octodex.github.com/images/yaktocat.png) - (r'(!?\[)([^]]+)(\])(\()([^)]+)(\))', - bygroups(Text, Name.Tag, Text, Text, Name.Attribute, Text)), - # reference-style links, e.g.: - # [an example][id] - # [id]: http://example.com/ - (r'(\[)([^]]+)(\])(\[)([^]]*)(\])', - bygroups(Text, Name.Tag, Text, Text, Name.Label, Text)), - (r'^(\s*\[)([^]]*)(\]:\s*)(.+)', - bygroups(Text, Name.Label, Text, Name.Attribute)), - - # general text, must come last! - (r'[^\\\s]+', Text), - (r'.', Text), - ], - } - - def __init__(self, **options): - self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True) - RegexLexer.__init__(self, **options) - - -class TiddlyWiki5Lexer(RegexLexer): - """ - For TiddlyWiki5 markup. - - .. versionadded:: 2.7 - """ - name = 'tiddler' - url = 'https://tiddlywiki.com/#TiddlerFiles' - aliases = ['tid'] - filenames = ['*.tid'] - mimetypes = ["text/vnd.tiddlywiki"] - flags = re.MULTILINE - - def _handle_codeblock(self, match): - """ - match args: 1:backticks, 2:lang_name, 3:newline, 4:code, 5:backticks - """ - from pygments.lexers import get_lexer_by_name - - # section header - yield match.start(1), String, match.group(1) - yield match.start(2), String, match.group(2) - yield match.start(3), Text, match.group(3) - - # lookup lexer if wanted and existing - lexer = None - if self.handlecodeblocks: - try: - lexer = get_lexer_by_name(match.group(2).strip()) - except ClassNotFound: - pass - code = match.group(4) - - # no lexer for this language. handle it like it was a code block - if lexer is None: - yield match.start(4), String, code - return - - yield from do_insertions([], lexer.get_tokens_unprocessed(code)) - - yield match.start(5), String, match.group(5) - - def _handle_cssblock(self, match): - """ - match args: 1:style tag 2:newline, 3:code, 4:closing style tag - """ - from pygments.lexers import get_lexer_by_name - - # section header - yield match.start(1), String, match.group(1) - yield match.start(2), String, match.group(2) - - lexer = None - if self.handlecodeblocks: - try: - lexer = get_lexer_by_name('css') - except ClassNotFound: - pass - code = match.group(3) - - # no lexer for this language. handle it like it was a code block - if lexer is None: - yield match.start(3), String, code - return - - yield from do_insertions([], lexer.get_tokens_unprocessed(code)) - - yield match.start(4), String, match.group(4) - - tokens = { - 'root': [ - # title in metadata section - (r'^(title)(:\s)(.+\n)', bygroups(Keyword, Text, Generic.Heading)), - # headings - (r'^(!)([^!].+\n)', bygroups(Generic.Heading, Text)), - (r'^(!{2,6})(.+\n)', bygroups(Generic.Subheading, Text)), - # bulleted or numbered lists or single-line block quotes - # (can be mixed) - (r'^(\s*)([*#>]+)(\s*)(.+\n)', - bygroups(Text, Keyword, Text, using(this, state='inline'))), - # multi-line block quotes - (r'^(<<<.*\n)([\w\W]*?)(^<<<.*$)', bygroups(String, Text, String)), - # table header - (r'^(\|.*?\|h)$', bygroups(Generic.Strong)), - # table footer or caption - (r'^(\|.*?\|[cf])$', bygroups(Generic.Emph)), - # table class - (r'^(\|.*?\|k)$', bygroups(Name.Tag)), - # definitions - (r'^(;.*)$', bygroups(Generic.Strong)), - # text block - (r'^(```\n)([\w\W]*?)(^```$)', bygroups(String, Text, String)), - # code block with language - (r'^(```)(\w+)(\n)([\w\W]*?)(^```$)', _handle_codeblock), - # CSS style block - (r'^(<style>)(\n)([\w\W]*?)(^</style>$)', _handle_cssblock), - - include('keywords'), - include('inline'), - ], - 'keywords': [ - (words(( - '\\define', '\\end', 'caption', 'created', 'modified', 'tags', - 'title', 'type'), prefix=r'^', suffix=r'\b'), - Keyword), - ], - 'inline': [ - # escape - (r'\\.', Text), - # created or modified date - (r'\d{17}', Number.Integer), - # italics - (r'(\s)(//[^/]+//)((?=\W|\n))', - bygroups(Text, Generic.Emph, Text)), - # superscript - (r'(\s)(\^\^[^\^]+\^\^)', bygroups(Text, Generic.Emph)), - # subscript - (r'(\s)(,,[^,]+,,)', bygroups(Text, Generic.Emph)), - # underscore - (r'(\s)(__[^_]+__)', bygroups(Text, Generic.Strong)), - # bold - (r"(\s)(''[^']+'')((?=\W|\n))", - bygroups(Text, Generic.Strong, Text)), - # strikethrough - (r'(\s)(~~[^~]+~~)((?=\W|\n))', - bygroups(Text, Generic.Deleted, Text)), - # TiddlyWiki variables - (r'<<[^>]+>>', Name.Tag), - (r'\$\$[^$]+\$\$', Name.Tag), - (r'\$\([^)]+\)\$', Name.Tag), - # TiddlyWiki style or class - (r'^@@.*$', Name.Tag), - # HTML tags - (r'</?[^>]+>', Name.Tag), - # inline code - (r'`[^`]+`', String.Backtick), - # HTML escaped symbols - (r'&\S*?;', String.Regex), - # Wiki links - (r'(\[{2})([^]\|]+)(\]{2})', bygroups(Text, Name.Tag, Text)), - # External links - (r'(\[{2})([^]\|]+)(\|)([^]\|]+)(\]{2})', - bygroups(Text, Name.Tag, Text, Name.Attribute, Text)), - # Transclusion - (r'(\{{2})([^}]+)(\}{2})', bygroups(Text, Name.Tag, Text)), - # URLs - (r'(\b.?.?tps?://[^\s"]+)', bygroups(Name.Attribute)), - - # general text, must come last! - (r'[\w]+', Text), - (r'.', Text) - ], - } - - def __init__(self, **options): - self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True) - RegexLexer.__init__(self, **options) - - -class WikitextLexer(RegexLexer): - """ - For MediaWiki Wikitext. - - Parsing Wikitext is tricky, and results vary between different MediaWiki - installations, so we only highlight common syntaxes (built-in or from - popular extensions), and also assume templates produce no unbalanced - syntaxes. - - .. versionadded:: 2.15 - """ - name = 'Wikitext' - url = 'https://www.mediawiki.org/wiki/Wikitext' - aliases = ['wikitext', 'mediawiki'] - filenames = [] - mimetypes = ['text/x-wiki'] - flags = re.MULTILINE - - def nowiki_tag_rules(tag_name): - return [ - (r'(?i)(</)({})(\s*)(>)'.format(tag_name), bygroups(Punctuation, - Name.Tag, Whitespace, Punctuation), '#pop'), - include('entity'), - include('text'), - ] - - def plaintext_tag_rules(tag_name): - return [ - (r'(?si)(.*?)(</)({})(\s*)(>)'.format(tag_name), bygroups(Text, - Punctuation, Name.Tag, Whitespace, Punctuation), '#pop'), - ] - - def delegate_tag_rules(tag_name, lexer): - return [ - (r'(?i)(</)({})(\s*)(>)'.format(tag_name), bygroups(Punctuation, - Name.Tag, Whitespace, Punctuation), '#pop'), - (r'(?si).+?(?=</{}\s*>)'.format(tag_name), using(lexer)), - ] - - def text_rules(token): - return [ - (r'\w+', token), - (r'[^\S\n]+', token), - (r'(?s).', token), - ] - - def handle_syntaxhighlight(self, match, ctx): - from pygments.lexers import get_lexer_by_name - - attr_content = match.group() - start = 0 - index = 0 - while True: - index = attr_content.find('>', start) - # Exclude comment end (-->) - if attr_content[index-2:index] != '--': - break - start = index + 1 - - if index == -1: - # No tag end - yield from self.get_tokens_unprocessed(attr_content, stack=['root', 'attr']) - return - attr = attr_content[:index] - yield from self.get_tokens_unprocessed(attr, stack=['root', 'attr']) - yield match.start(3) + index, Punctuation, '>' - - lexer = None - content = attr_content[index+1:] - lang_match = re.findall(r'\blang=("|\'|)(\w+)(\1)', attr) - - if len(lang_match) >= 1: - # Pick the last match in case of multiple matches - lang = lang_match[-1][1] - try: - lexer = get_lexer_by_name(lang) - except ClassNotFound: - pass - - if lexer is None: - yield match.start() + index + 1, Text, content - else: - yield from lexer.get_tokens_unprocessed(content) - - def handle_score(self, match, ctx): - attr_content = match.group() - start = 0 - index = 0 - while True: - index = attr_content.find('>', start) - # Exclude comment end (-->) - if attr_content[index-2:index] != '--': - break - start = index + 1 - - if index == -1: - # No tag end - yield from self.get_tokens_unprocessed(attr_content, stack=['root', 'attr']) - return - attr = attr_content[:index] - content = attr_content[index+1:] - yield from self.get_tokens_unprocessed(attr, stack=['root', 'attr']) - yield match.start(3) + index, Punctuation, '>' - - lang_match = re.findall(r'\blang=("|\'|)(\w+)(\1)', attr) - # Pick the last match in case of multiple matches - lang = lang_match[-1][1] if len(lang_match) >= 1 else 'lilypond' - - if lang == 'lilypond': # Case sensitive - yield from LilyPondLexer().get_tokens_unprocessed(content) - else: # ABC - # FIXME: Use ABC lexer in the future - yield match.start() + index + 1, Text, content - - # a-z removed to prevent linter from complaining, REMEMBER to use (?i) - title_char = r' %!"$&\'()*,\-./0-9:;=?@A-Z\\\^_`~+\u0080-\uFFFF' - nbsp_char = r'(?:\t| |&\#0*160;|&\#[Xx]0*[Aa]0;|[ \xA0\u1680\u2000-\u200A\u202F\u205F\u3000])' - link_address = r'(?:[0-9.]+|\[[0-9a-f:.]+\]|[^\x00-\x20"<>\[\]\x7F\xA0\u1680\u2000-\u200A\u202F\u205F\u3000\uFFFD])' - link_char_class = r'[^\x00-\x20"<>\[\]\x7F\xA0\u1680\u2000-\u200A\u202F\u205F\u3000\uFFFD]' - double_slashes_i = { - '__FORCETOC__', '__NOCONTENTCONVERT__', '__NOCC__', '__NOEDITSECTION__', '__NOGALLERY__', - '__NOTITLECONVERT__', '__NOTC__', '__NOTOC__', '__TOC__', - } - double_slashes = { - '__EXPECTUNUSEDCATEGORY__', '__HIDDENCAT__', '__INDEX__', '__NEWSECTIONLINK__', - '__NOINDEX__', '__NONEWSECTIONLINK__', '__STATICREDIRECT__', '__NOGLOBAL__', - '__DISAMBIG__', '__EXPECTED_UNCONNECTED_PAGE__', - } - protocols = { - 'bitcoin:', 'ftp://', 'ftps://', 'geo:', 'git://', 'gopher://', 'http://', 'https://', - 'irc://', 'ircs://', 'magnet:', 'mailto:', 'mms://', 'news:', 'nntp://', 'redis://', - 'sftp://', 'sip:', 'sips:', 'sms:', 'ssh://', 'svn://', 'tel:', 'telnet://', 'urn:', - 'worldwind://', 'xmpp:', '//', - } - non_relative_protocols = protocols - {'//'} - html_tags = { - 'abbr', 'b', 'bdi', 'bdo', 'big', 'blockquote', 'br', 'caption', 'center', 'cite', 'code', - 'data', 'dd', 'del', 'dfn', 'div', 'dl', 'dt', 'em', 'font', 'h1', 'h2', 'h3', 'h4', 'h5', - 'h6', 'hr', 'i', 'ins', 'kbd', 'li', 'link', 'mark', 'meta', 'ol', 'p', 'q', 'rb', 'rp', - 'rt', 'rtc', 'ruby', 's', 'samp', 'small', 'span', 'strike', 'strong', 'sub', 'sup', - 'table', 'td', 'th', 'time', 'tr', 'tt', 'u', 'ul', 'var', 'wbr', - } - parser_tags = { - 'graph', 'charinsert', 'rss', 'chem', 'categorytree', 'nowiki', 'inputbox', 'math', - 'hiero', 'score', 'pre', 'ref', 'translate', 'imagemap', 'templatestyles', 'languages', - 'noinclude', 'mapframe', 'section', 'poem', 'syntaxhighlight', 'includeonly', 'tvar', - 'onlyinclude', 'templatedata', 'langconvert', 'timeline', 'dynamicpagelist', 'gallery', - 'maplink', 'ce', 'references', - } - variant_langs = { - # ZhConverter.php - 'zh', 'zh-hans', 'zh-hant', 'zh-cn', 'zh-hk', 'zh-mo', 'zh-my', 'zh-sg', 'zh-tw', - # WuuConverter.php - 'wuu', 'wuu-hans', 'wuu-hant', - # UzConverter.php - 'uz', 'uz-latn', 'uz-cyrl', - # TlyConverter.php - 'tly', 'tly-cyrl', - # TgConverter.php - 'tg', 'tg-latn', - # SrConverter.php - 'sr', 'sr-ec', 'sr-el', - # ShiConverter.php - 'shi', 'shi-tfng', 'shi-latn', - # ShConverter.php - 'sh-latn', 'sh-cyrl', - # KuConverter.php - 'ku', 'ku-arab', 'ku-latn', - # KkConverter.php - 'kk', 'kk-cyrl', 'kk-latn', 'kk-arab', 'kk-kz', 'kk-tr', 'kk-cn', - # IuConverter.php - 'iu', 'ike-cans', 'ike-latn', - # GanConverter.php - 'gan', 'gan-hans', 'gan-hant', - # EnConverter.php - 'en', 'en-x-piglatin', - # CrhConverter.php - 'crh', 'crh-cyrl', 'crh-latn', - # BanConverter.php - 'ban', 'ban-bali', 'ban-x-dharma', 'ban-x-palmleaf', 'ban-x-pku', - } - magic_vars_i = { - 'ARTICLEPATH', 'INT', 'PAGEID', 'SCRIPTPATH', 'SERVER', 'SERVERNAME', 'STYLEPATH', - } - magic_vars = { - '!', '=', 'BASEPAGENAME', 'BASEPAGENAMEE', 'CASCADINGSOURCES', 'CONTENTLANGUAGE', - 'CONTENTLANG', 'CURRENTDAY', 'CURRENTDAY2', 'CURRENTDAYNAME', 'CURRENTDOW', 'CURRENTHOUR', - 'CURRENTMONTH', 'CURRENTMONTH2', 'CURRENTMONTH1', 'CURRENTMONTHABBREV', 'CURRENTMONTHNAME', - 'CURRENTMONTHNAMEGEN', 'CURRENTTIME', 'CURRENTTIMESTAMP', 'CURRENTVERSION', 'CURRENTWEEK', - 'CURRENTYEAR', 'DIRECTIONMARK', 'DIRMARK', 'FULLPAGENAME', 'FULLPAGENAMEE', 'LOCALDAY', - 'LOCALDAY2', 'LOCALDAYNAME', 'LOCALDOW', 'LOCALHOUR', 'LOCALMONTH', 'LOCALMONTH2', - 'LOCALMONTH1', 'LOCALMONTHABBREV', 'LOCALMONTHNAME', 'LOCALMONTHNAMEGEN', 'LOCALTIME', - 'LOCALTIMESTAMP', 'LOCALWEEK', 'LOCALYEAR', 'NAMESPACE', 'NAMESPACEE', 'NAMESPACENUMBER', - 'NUMBEROFACTIVEUSERS', 'NUMBEROFADMINS', 'NUMBEROFARTICLES', 'NUMBEROFEDITS', - 'NUMBEROFFILES', 'NUMBEROFPAGES', 'NUMBEROFUSERS', 'PAGELANGUAGE', 'PAGENAME', 'PAGENAMEE', - 'REVISIONDAY', 'REVISIONDAY2', 'REVISIONID', 'REVISIONMONTH', 'REVISIONMONTH1', - 'REVISIONSIZE', 'REVISIONTIMESTAMP', 'REVISIONUSER', 'REVISIONYEAR', 'ROOTPAGENAME', - 'ROOTPAGENAMEE', 'SITENAME', 'SUBJECTPAGENAME', 'ARTICLEPAGENAME', 'SUBJECTPAGENAMEE', - 'ARTICLEPAGENAMEE', 'SUBJECTSPACE', 'ARTICLESPACE', 'SUBJECTSPACEE', 'ARTICLESPACEE', - 'SUBPAGENAME', 'SUBPAGENAMEE', 'TALKPAGENAME', 'TALKPAGENAMEE', 'TALKSPACE', 'TALKSPACEE', - } - parser_functions_i = { - 'ANCHORENCODE', 'BIDI', 'CANONICALURL', 'CANONICALURLE', 'FILEPATH', 'FORMATNUM', - 'FULLURL', 'FULLURLE', 'GENDER', 'GRAMMAR', 'INT', r'\#LANGUAGE', 'LC', 'LCFIRST', 'LOCALURL', - 'LOCALURLE', 'NS', 'NSE', 'PADLEFT', 'PADRIGHT', 'PAGEID', 'PLURAL', 'UC', 'UCFIRST', - 'URLENCODE', - } - parser_functions = { - 'BASEPAGENAME', 'BASEPAGENAMEE', 'CASCADINGSOURCES', 'DEFAULTSORT', 'DEFAULTSORTKEY', - 'DEFAULTCATEGORYSORT', 'FULLPAGENAME', 'FULLPAGENAMEE', 'NAMESPACE', 'NAMESPACEE', - 'NAMESPACENUMBER', 'NUMBERINGROUP', 'NUMINGROUP', 'NUMBEROFACTIVEUSERS', 'NUMBEROFADMINS', - 'NUMBEROFARTICLES', 'NUMBEROFEDITS', 'NUMBEROFFILES', 'NUMBEROFPAGES', 'NUMBEROFUSERS', - 'PAGENAME', 'PAGENAMEE', 'PAGESINCATEGORY', 'PAGESINCAT', 'PAGESIZE', 'PROTECTIONEXPIRY', - 'PROTECTIONLEVEL', 'REVISIONDAY', 'REVISIONDAY2', 'REVISIONID', 'REVISIONMONTH', - 'REVISIONMONTH1', 'REVISIONTIMESTAMP', 'REVISIONUSER', 'REVISIONYEAR', 'ROOTPAGENAME', - 'ROOTPAGENAMEE', 'SUBJECTPAGENAME', 'ARTICLEPAGENAME', 'SUBJECTPAGENAMEE', - 'ARTICLEPAGENAMEE', 'SUBJECTSPACE', 'ARTICLESPACE', 'SUBJECTSPACEE', 'ARTICLESPACEE', - 'SUBPAGENAME', 'SUBPAGENAMEE', 'TALKPAGENAME', 'TALKPAGENAMEE', 'TALKSPACE', 'TALKSPACEE', - 'INT', 'DISPLAYTITLE', 'PAGESINNAMESPACE', 'PAGESINNS', - } - - tokens = { - 'root': [ - # Redirects - (r"""(?xi) - (\A\s*?)(\#REDIRECT:?) # may contain a colon - (\s+)(\[\[) (?=[^\]\n]* \]\]$) - """, - bygroups(Whitespace, Keyword, Whitespace, Punctuation), 'redirect-inner'), - # Subheadings - (r'^(={2,6})(.+?)(\1)(\s*$\n)', - bygroups(Generic.Subheading, Generic.Subheading, Generic.Subheading, Whitespace)), - # Headings - (r'^(=.+?=)(\s*$\n)', - bygroups(Generic.Heading, Whitespace)), - # Double-slashed magic words - (words(double_slashes_i, prefix=r'(?i)'), Name.Function.Magic), - (words(double_slashes), Name.Function.Magic), - # Raw URLs - (r'(?i)\b(?:{}){}{}*'.format('|'.join(protocols), - link_address, link_char_class), Name.Label), - # Magic links - (r'\b(?:RFC|PMID){}+[0-9]+\b'.format(nbsp_char), - Name.Function.Magic), - (r"""(?x) - \bISBN {nbsp_char} - (?: 97[89] {nbsp_dash}? )? - (?: [0-9] {nbsp_dash}? ){{9}} # escape format() - [0-9Xx]\b - """.format(nbsp_char=nbsp_char, nbsp_dash=f'(?:-|{nbsp_char})'), Name.Function.Magic), - include('list'), - include('inline'), - include('text'), - ], - 'redirect-inner': [ - (r'(\]\])(\s*?\n)', bygroups(Punctuation, Whitespace), '#pop'), - (r'(\#)([^#]*?)', bygroups(Punctuation, Name.Label)), - (r'(?i)[{}]+'.format(title_char), Name.Tag), - ], - 'list': [ - # Description lists - (r'^;', Keyword, 'dt'), - # Ordered lists, unordered lists and indents - (r'^[#:*]+', Keyword), - # Horizontal rules - (r'^-{4,}', Keyword), - ], - 'inline': [ - # Signatures - (r'~{3,5}', Keyword), - # Entities - include('entity'), - # Bold & italic - (r"('')(''')(?!')", bygroups(Generic.Emph, - Generic.EmphStrong), 'inline-italic-bold'), - (r"'''(?!')", Generic.Strong, 'inline-bold'), - (r"''(?!')", Generic.Emph, 'inline-italic'), - # Comments & parameters & templates - include('replaceable'), - # Media links - ( - r"""(?xi) - (\[\[) - (File|Image) (:) - ((?: [%s] | \{{2,3}[^{}]*?\}{2,3} | <!--[\s\S]*?--> )*) - (?: (\#) ([%s]*?) )? - """ % (title_char, f'{title_char}#'), - bygroups(Punctuation, Name.Namespace, Punctuation, - using(this, state=['wikilink-name']), Punctuation, Name.Label), - 'medialink-inner' - ), - # Wikilinks - ( - r"""(?xi) - (\[\[)(?!%s) # Should not contain URLs - (?: ([%s]*) (:))? - ((?: [%s] | \{{2,3}[^{}]*?\}{2,3} | <!--[\s\S]*?--> )*?) - (?: (\#) ([%s]*?) )? - (\]\]) - """ % ('|'.join(protocols), title_char.replace('/', ''), - title_char, f'{title_char}#'), - bygroups(Punctuation, Name.Namespace, Punctuation, - using(this, state=['wikilink-name']), Punctuation, Name.Label, Punctuation) - ), - ( - r"""(?xi) - (\[\[)(?!%s) - (?: ([%s]*) (:))? - ((?: [%s] | \{{2,3}[^{}]*?\}{2,3} | <!--[\s\S]*?--> )*?) - (?: (\#) ([%s]*?) )? - (\|) - """ % ('|'.join(protocols), title_char.replace('/', ''), - title_char, f'{title_char}#'), - bygroups(Punctuation, Name.Namespace, Punctuation, - using(this, state=['wikilink-name']), Punctuation, Name.Label, Punctuation), - 'wikilink-inner' - ), - # External links - ( - r"""(?xi) - (\[) - ((?:{}) {} {}*) - (\s*) - """.format('|'.join(protocols), link_address, link_char_class), - bygroups(Punctuation, Name.Label, Whitespace), - 'extlink-inner' - ), - # Tables - (r'^(:*)(\s*?)(\{\|)([^\n]*)$', bygroups(Keyword, - Whitespace, Punctuation, using(this, state=['root', 'attr'])), 'table'), - # HTML tags - (r'(?i)(<)({})\b'.format('|'.join(html_tags)), - bygroups(Punctuation, Name.Tag), 'tag-inner-ordinary'), - (r'(?i)(</)({})\b(\s*)(>)'.format('|'.join(html_tags)), - bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)), - # <nowiki> - (r'(?i)(<)(nowiki)\b', bygroups(Punctuation, - Name.Tag), ('tag-nowiki', 'tag-inner')), - # <pre> - (r'(?i)(<)(pre)\b', bygroups(Punctuation, - Name.Tag), ('tag-pre', 'tag-inner')), - # <categorytree> - (r'(?i)(<)(categorytree)\b', bygroups( - Punctuation, Name.Tag), ('tag-categorytree', 'tag-inner')), - # <hiero> - (r'(?i)(<)(hiero)\b', bygroups(Punctuation, - Name.Tag), ('tag-hiero', 'tag-inner')), - # <math> - (r'(?i)(<)(math)\b', bygroups(Punctuation, - Name.Tag), ('tag-math', 'tag-inner')), - # <chem> - (r'(?i)(<)(chem)\b', bygroups(Punctuation, - Name.Tag), ('tag-chem', 'tag-inner')), - # <ce> - (r'(?i)(<)(ce)\b', bygroups(Punctuation, - Name.Tag), ('tag-ce', 'tag-inner')), - # <charinsert> - (r'(?i)(<)(charinsert)\b', bygroups( - Punctuation, Name.Tag), ('tag-charinsert', 'tag-inner')), - # <templatedata> - (r'(?i)(<)(templatedata)\b', bygroups( - Punctuation, Name.Tag), ('tag-templatedata', 'tag-inner')), - # <gallery> - (r'(?i)(<)(gallery)\b', bygroups( - Punctuation, Name.Tag), ('tag-gallery', 'tag-inner')), - # <graph> - (r'(?i)(<)(gallery)\b', bygroups( - Punctuation, Name.Tag), ('tag-graph', 'tag-inner')), - # <dynamicpagelist> - (r'(?i)(<)(dynamicpagelist)\b', bygroups( - Punctuation, Name.Tag), ('tag-dynamicpagelist', 'tag-inner')), - # <inputbox> - (r'(?i)(<)(inputbox)\b', bygroups( - Punctuation, Name.Tag), ('tag-inputbox', 'tag-inner')), - # <rss> - (r'(?i)(<)(rss)\b', bygroups( - Punctuation, Name.Tag), ('tag-rss', 'tag-inner')), - # <imagemap> - (r'(?i)(<)(imagemap)\b', bygroups( - Punctuation, Name.Tag), ('tag-imagemap', 'tag-inner')), - # <syntaxhighlight> - (r'(?i)(</)(syntaxhighlight)\b(\s*)(>)', - bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)), - (r'(?si)(<)(syntaxhighlight)\b([^>]*?(?<!/)>.*?)(?=</\2\s*>)', - bygroups(Punctuation, Name.Tag, handle_syntaxhighlight)), - # <syntaxhighlight>: Fallback case for self-closing tags - (r'(?i)(<)(syntaxhighlight)\b(\s*?)((?:[^>]|-->)*?)(/\s*?(?<!--)>)', bygroups( - Punctuation, Name.Tag, Whitespace, using(this, state=['root', 'attr']), Punctuation)), - # <source> - (r'(?i)(</)(source)\b(\s*)(>)', - bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)), - (r'(?si)(<)(source)\b([^>]*?(?<!/)>.*?)(?=</\2\s*>)', - bygroups(Punctuation, Name.Tag, handle_syntaxhighlight)), - # <source>: Fallback case for self-closing tags - (r'(?i)(<)(source)\b(\s*?)((?:[^>]|-->)*?)(/\s*?(?<!--)>)', bygroups( - Punctuation, Name.Tag, Whitespace, using(this, state=['root', 'attr']), Punctuation)), - # <score> - (r'(?i)(</)(score)\b(\s*)(>)', - bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)), - (r'(?si)(<)(score)\b([^>]*?(?<!/)>.*?)(?=</\2\s*>)', - bygroups(Punctuation, Name.Tag, handle_score)), - # <score>: Fallback case for self-closing tags - (r'(?i)(<)(score)\b(\s*?)((?:[^>]|-->)*?)(/\s*?(?<!--)>)', bygroups( - Punctuation, Name.Tag, Whitespace, using(this, state=['root', 'attr']), Punctuation)), - # Other parser tags - (r'(?i)(<)({})\b'.format('|'.join(parser_tags)), - bygroups(Punctuation, Name.Tag), 'tag-inner-ordinary'), - (r'(?i)(</)({})\b(\s*)(>)'.format('|'.join(parser_tags)), - bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)), - # LanguageConverter markups - ( - r"""(?xi) - (-\{{) # Use {{ to escape format() - ([^|]) (\|) - (?: - (?: ([^;]*?) (=>))? - (\s* (?:{variants}) \s*) (:) - )? - """.format(variants='|'.join(variant_langs)), - bygroups(Punctuation, Keyword, Punctuation, - using(this, state=['root', 'lc-raw']), - Operator, Name.Label, Punctuation), - 'lc-inner' - ), - # LanguageConverter markups: composite conversion grammar - ( - r"""(?xi) - (-\{) - ([a-z\s;-]*?) (\|) - """, - bygroups(Punctuation, - using(this, state=['root', 'lc-flag']), - Punctuation), - 'lc-raw' - ), - # LanguageConverter markups: fallbacks - ( - r"""(?xi) - (-\{{) (?!\{{) # Use {{ to escape format() - (?: (\s* (?:{variants}) \s*) (:))? - """.format(variants='|'.join(variant_langs)), - bygroups(Punctuation, Name.Label, Punctuation), - 'lc-inner' - ), - ], - 'wikilink-name': [ - include('replaceable'), - (r'[^{<]+', Name.Tag), - (r'(?s).', Name.Tag), - ], - 'wikilink-inner': [ - # Quit in case of another wikilink - (r'(?=\[\[)', Punctuation, '#pop'), - (r'\]\]', Punctuation, '#pop'), - include('inline'), - include('text'), - ], - 'medialink-inner': [ - (r'\]\]', Punctuation, '#pop'), - (r'(\|)([^\n=|]*)(=)', - bygroups(Punctuation, Name.Attribute, Operator)), - (r'\|', Punctuation), - include('inline'), - include('text'), - ], - 'quote-common': [ - # Quit in case of link/template endings - (r'(?=\]\]|\{\{|\}\})', Punctuation, '#pop'), - (r'\n', Text, '#pop'), - ], - 'inline-italic': [ - include('quote-common'), - (r"('')(''')(?!')", bygroups(Generic.Emph, - Generic.Strong), ('#pop', 'inline-bold')), - (r"'''(?!')", Generic.EmphStrong, ('#pop', 'inline-italic-bold')), - (r"''(?!')", Generic.Emph, '#pop'), - include('inline'), - include('text-italic'), - ], - 'inline-bold': [ - include('quote-common'), - (r"(''')('')(?!')", bygroups( - Generic.Strong, Generic.Emph), ('#pop', 'inline-italic')), - (r"'''(?!')", Generic.Strong, '#pop'), - (r"''(?!')", Generic.EmphStrong, ('#pop', 'inline-bold-italic')), - include('inline'), - include('text-bold'), - ], - 'inline-bold-italic': [ - include('quote-common'), - (r"('')(''')(?!')", bygroups(Generic.EmphStrong, - Generic.Strong), '#pop'), - (r"'''(?!')", Generic.EmphStrong, ('#pop', 'inline-italic')), - (r"''(?!')", Generic.EmphStrong, ('#pop', 'inline-bold')), - include('inline'), - include('text-bold-italic'), - ], - 'inline-italic-bold': [ - include('quote-common'), - (r"(''')('')(?!')", bygroups( - Generic.EmphStrong, Generic.Emph), '#pop'), - (r"'''(?!')", Generic.EmphStrong, ('#pop', 'inline-italic')), - (r"''(?!')", Generic.EmphStrong, ('#pop', 'inline-bold')), - include('inline'), - include('text-bold-italic'), - ], - 'lc-flag': [ - (r'\s+', Whitespace), - (r';', Punctuation), - *text_rules(Keyword), - ], - 'lc-inner': [ - ( - r"""(?xi) - (;) - (?: ([^;]*?) (=>))? - (\s* (?:{variants}) \s*) (:) - """.format(variants='|'.join(variant_langs)), - bygroups(Punctuation, using(this, state=['root', 'lc-raw']), - Operator, Name.Label, Punctuation) - ), - (r';?\s*?\}-', Punctuation, '#pop'), - include('inline'), - include('text'), - ], - 'lc-raw': [ - (r'\}-', Punctuation, '#pop'), - include('inline'), - include('text'), - ], - 'replaceable': [ - # Comments - (r'<!--[\s\S]*?(?:-->|\Z)', Comment.Multiline), - # Parameters - ( - r"""(?x) - (\{{3}) - ([^|]*?) - (?=\}{3}|\|) - """, - bygroups(Punctuation, Name.Variable), - 'parameter-inner', - ), - # Magic variables - (r'(?i)(\{\{)(\s*)(%s)(\s*)(\}\})' % '|'.join(magic_vars_i), - bygroups(Punctuation, Whitespace, Name.Function, Whitespace, Punctuation)), - (r'(\{\{)(\s*)(%s)(\s*)(\}\})' % '|'.join(magic_vars), - bygroups(Punctuation, Whitespace, Name.Function, Whitespace, Punctuation)), - # Parser functions & templates - (r'\{\{', Punctuation, 'template-begin-space'), - # <tvar> legacy syntax - (r'(?i)(<)(tvar)\b(\|)([^>]*?)(>)', bygroups(Punctuation, - Name.Tag, Punctuation, String, Punctuation)), - (r'</>', Punctuation, '#pop'), - # <tvar> - (r'(?i)(<)(tvar)\b', bygroups(Punctuation, Name.Tag), 'tag-inner-ordinary'), - (r'(?i)(</)(tvar)\b(\s*)(>)', - bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)), - ], - 'parameter-inner': [ - (r'\}{3}', Punctuation, '#pop'), - (r'\|', Punctuation), - include('inline'), - include('text'), - ], - 'template-begin-space': [ - # Templates allow line breaks at the beginning, and due to how MediaWiki handles - # comments, an extra state is required to handle things like {{\n<!---->\n name}} - (r'<!--[\s\S]*?(?:-->|\Z)', Comment.Multiline), - (r'\s+', Whitespace), - # Parser functions - ( - r'(?i)(\#[%s]*?|%s)(:)' % (title_char, - '|'.join(parser_functions_i)), - bygroups(Name.Function, Punctuation), ('#pop', 'template-inner') - ), - ( - r'(%s)(:)' % ('|'.join(parser_functions)), - bygroups(Name.Function, Punctuation), ('#pop', 'template-inner') - ), - # Templates - ( - r'(?i)([%s]*?)(:)' % title_char, - bygroups(Name.Namespace, Punctuation), ('#pop', 'template-name') - ), - default(('#pop', 'template-name'),), - ], - 'template-name': [ - (r'(\s*?)(\|)', bygroups(Text, Punctuation), ('#pop', 'template-inner')), - (r'\}\}', Punctuation, '#pop'), - (r'\n', Text, '#pop'), - include('replaceable'), - *text_rules(Name.Tag), - ], - 'template-inner': [ - (r'\}\}', Punctuation, '#pop'), - (r'\|', Punctuation), - ( - r"""(?x) - (?<=\|) - ( (?: (?! \{\{ | \}\} )[^=\|<])*? ) # Exclude templates and tags - (=) - """, - bygroups(Name.Label, Operator) - ), - include('inline'), - include('text'), - ], - 'table': [ - # Use [ \t\n\r\0\x0B] instead of \s to follow PHP trim() behavior - # Endings - (r'^([ \t\n\r\0\x0B]*?)(\|\})', - bygroups(Whitespace, Punctuation), '#pop'), - # Table rows - (r'^([ \t\n\r\0\x0B]*?)(\|-+)(.*)$', bygroups(Whitespace, Punctuation, - using(this, state=['root', 'attr']))), - # Captions - ( - r"""(?x) - ^([ \t\n\r\0\x0B]*?)(\|\+) - # Exclude links, template and tags - (?: ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? )(\|) )? - (.*?)$ - """, - bygroups(Whitespace, Punctuation, using(this, state=[ - 'root', 'attr']), Punctuation, Generic.Heading), - ), - # Table data - ( - r"""(?x) - ( ^(?:[ \t\n\r\0\x0B]*?)\| | \|\| ) - (?: ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? )(\|)(?!\|) )? - """, - bygroups(Punctuation, using(this, state=[ - 'root', 'attr']), Punctuation), - ), - # Table headers - ( - r"""(?x) - ( ^(?:[ \t\n\r\0\x0B]*?)! ) - (?: ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? )(\|)(?!\|) )? - """, - bygroups(Punctuation, using(this, state=[ - 'root', 'attr']), Punctuation), - 'table-header', - ), - include('list'), - include('inline'), - include('text'), - ], - 'table-header': [ - # Requires another state for || handling inside headers - (r'\n', Text, '#pop'), - ( - r"""(?x) - (!!|\|\|) - (?: - ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? ) - (\|)(?!\|) - )? - """, - bygroups(Punctuation, using(this, state=[ - 'root', 'attr']), Punctuation) - ), - *text_rules(Generic.Subheading), - ], - 'entity': [ - (r'&\S*?;', Name.Entity), - ], - 'dt': [ - (r'\n', Text, '#pop'), - include('inline'), - (r':', Keyword, '#pop'), - include('text'), - ], - 'extlink-inner': [ - (r'\]', Punctuation, '#pop'), - include('inline'), - include('text'), - ], - 'nowiki-ish': [ - include('entity'), - include('text'), - ], - 'attr': [ - include('replaceable'), - (r'\s+', Whitespace), - (r'(=)(\s*)(")', bygroups(Operator, Whitespace, String.Double), 'attr-val-2'), - (r"(=)(\s*)(')", bygroups(Operator, Whitespace, String.Single), 'attr-val-1'), - (r'(=)(\s*)', bygroups(Operator, Whitespace), 'attr-val-0'), - (r'[\w:-]+', Name.Attribute), - - ], - 'attr-val-0': [ - (r'\s', Whitespace, '#pop'), - include('replaceable'), - *text_rules(String), - ], - 'attr-val-1': [ - (r"'", String.Single, '#pop'), - include('replaceable'), - *text_rules(String.Single), - ], - 'attr-val-2': [ - (r'"', String.Double, '#pop'), - include('replaceable'), - *text_rules(String.Double), - ], - 'tag-inner-ordinary': [ - (r'/?\s*>', Punctuation, '#pop'), - include('tag-attr'), - ], - 'tag-inner': [ - # Return to root state for self-closing tags - (r'/\s*>', Punctuation, '#pop:2'), - (r'\s*>', Punctuation, '#pop'), - include('tag-attr'), - ], - # There states below are just like their non-tag variants, the key difference is - # they forcibly quit when encountering tag closing markup - 'tag-attr': [ - include('replaceable'), - (r'\s+', Whitespace), - (r'(=)(\s*)(")', bygroups(Operator, - Whitespace, String.Double), 'tag-attr-val-2'), - (r"(=)(\s*)(')", bygroups(Operator, - Whitespace, String.Single), 'tag-attr-val-1'), - (r'(=)(\s*)', bygroups(Operator, Whitespace), 'tag-attr-val-0'), - (r'[\w:-]+', Name.Attribute), - - ], - 'tag-attr-val-0': [ - (r'\s', Whitespace, '#pop'), - (r'/?>', Punctuation, '#pop:2'), - include('replaceable'), - *text_rules(String), - ], - 'tag-attr-val-1': [ - (r"'", String.Single, '#pop'), - (r'/?>', Punctuation, '#pop:2'), - include('replaceable'), - *text_rules(String.Single), - ], - 'tag-attr-val-2': [ - (r'"', String.Double, '#pop'), - (r'/?>', Punctuation, '#pop:2'), - include('replaceable'), - *text_rules(String.Double), - ], - 'tag-nowiki': nowiki_tag_rules('nowiki'), - 'tag-pre': nowiki_tag_rules('pre'), - 'tag-categorytree': plaintext_tag_rules('categorytree'), - 'tag-dynamicpagelist': plaintext_tag_rules('dynamicpagelist'), - 'tag-hiero': plaintext_tag_rules('hiero'), - 'tag-inputbox': plaintext_tag_rules('inputbox'), - 'tag-imagemap': plaintext_tag_rules('imagemap'), - 'tag-charinsert': plaintext_tag_rules('charinsert'), - 'tag-timeline': plaintext_tag_rules('timeline'), - 'tag-gallery': plaintext_tag_rules('gallery'), - 'tag-graph': plaintext_tag_rules('graph'), - 'tag-rss': plaintext_tag_rules('rss'), - 'tag-math': delegate_tag_rules('math', TexLexer), - 'tag-chem': delegate_tag_rules('chem', TexLexer), - 'tag-ce': delegate_tag_rules('ce', TexLexer), - 'tag-templatedata': delegate_tag_rules('templatedata', JsonLexer), - 'text-italic': text_rules(Generic.Emph), - 'text-bold': text_rules(Generic.Strong), - 'text-bold-italic': text_rules(Generic.EmphStrong), - 'text': text_rules(Text), - } |