venv/lib/python3.11/site-packages/markdown_it/parser_inline.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147

"""Tokenizes paragraph content.
"""
from __future__ import annotations

from typing import TYPE_CHECKING, Callable

from . import rules_inline
from .ruler import Ruler
from .rules_inline.state_inline import StateInline
from .token import Token
from .utils import EnvType

if TYPE_CHECKING:
    from markdown_it import MarkdownIt


# Parser rules
RuleFuncInlineType = Callable[[StateInline, bool], bool]
"""(state: StateInline, silent: bool) -> matched: bool)

`silent` disables token generation, useful for lookahead.
"""
_rules: list[tuple[str, RuleFuncInlineType]] = [
    ("text", rules_inline.text),
    ("linkify", rules_inline.linkify),
    ("newline", rules_inline.newline),
    ("escape", rules_inline.escape),
    ("backticks", rules_inline.backtick),
    ("strikethrough", rules_inline.strikethrough.tokenize),
    ("emphasis", rules_inline.emphasis.tokenize),
    ("link", rules_inline.link),
    ("image", rules_inline.image),
    ("autolink", rules_inline.autolink),
    ("html_inline", rules_inline.html_inline),
    ("entity", rules_inline.entity),
]

# Note `rule2` ruleset was created specifically for emphasis/strikethrough
# post-processing and may be changed in the future.
#
# Don't use this for anything except pairs (plugins working with `balance_pairs`).
#
RuleFuncInline2Type = Callable[[StateInline], None]
_rules2: list[tuple[str, RuleFuncInline2Type]] = [
    ("balance_pairs", rules_inline.link_pairs),
    ("strikethrough", rules_inline.strikethrough.postProcess),
    ("emphasis", rules_inline.emphasis.postProcess),
    # rules for pairs separate '**' into its own text tokens, which may be left unused,
    # rule below merges unused segments back with the rest of the text
    ("fragments_join", rules_inline.fragments_join),
]


class ParserInline:
    def __init__(self) -> None:
        self.ruler = Ruler[RuleFuncInlineType]()
        for name, rule in _rules:
            self.ruler.push(name, rule)
        # Second ruler used for post-processing (e.g. in emphasis-like rules)
        self.ruler2 = Ruler[RuleFuncInline2Type]()
        for name, rule2 in _rules2:
            self.ruler2.push(name, rule2)

    def skipToken(self, state: StateInline) -> None:
        """Skip single token by running all rules in validation mode;
        returns `True` if any rule reported success
        """
        ok = False
        pos = state.pos
        rules = self.ruler.getRules("")
        maxNesting = state.md.options["maxNesting"]
        cache = state.cache

        if pos in cache:
            state.pos = cache[pos]
            return

        if state.level < maxNesting:
            for rule in rules:
                #  Increment state.level and decrement it later to limit recursion.
                # It's harmless to do here, because no tokens are created.
                # But ideally, we'd need a separate private state variable for this purpose.
                state.level += 1
                ok = rule(state, True)
                state.level -= 1
                if ok:
                    break
        else:
            # Too much nesting, just skip until the end of the paragraph.
            #
            # NOTE: this will cause links to behave incorrectly in the following case,
            #       when an amount of `[` is exactly equal to `maxNesting + 1`:
            #
            #       [[[[[[[[[[[[[[[[[[[[[foo]()
            #
            # TODO: remove this workaround when CM standard will allow nested links
            #       (we can replace it by preventing links from being parsed in
            #       validation mode)
            #
            state.pos = state.posMax

        if not ok:
            state.pos += 1
        cache[pos] = state.pos

    def tokenize(self, state: StateInline) -> None:
        """Generate tokens for input range."""
        ok = False
        rules = self.ruler.getRules("")
        end = state.posMax
        maxNesting = state.md.options["maxNesting"]

        while state.pos < end:
            # Try all possible rules.
            # On success, rule should:
            #
            # - update `state.pos`
            # - update `state.tokens`
            # - return true

            if state.level < maxNesting:
                for rule in rules:
                    ok = rule(state, False)
                    if ok:
                        break

            if ok:
                if state.pos >= end:
                    break
                continue

            state.pending += state.src[state.pos]
            state.pos += 1

        if state.pending:
            state.pushPending()

    def parse(
        self, src: str, md: MarkdownIt, env: EnvType, tokens: list[Token]
    ) -> list[Token]:
        """Process input string and push inline tokens into `tokens`"""
        state = StateInline(src, md, env, tokens)
        self.tokenize(state)
        rules2 = self.ruler2.getRules("")
        for rule in rules2:
            rule(state)
        return state.tokens