diff options
Diffstat (limited to 'venv/lib/python3.11/site-packages/markdown_it/rules_inline')
32 files changed, 1399 insertions, 0 deletions
diff --git a/venv/lib/python3.11/site-packages/markdown_it/rules_inline/__init__.py b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/__init__.py new file mode 100644 index 0000000..3a8026e --- /dev/null +++ b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/__init__.py @@ -0,0 +1,31 @@ +__all__ = ( + "StateInline", + "text", + "fragments_join", + "link_pairs", + "linkify", + "escape", + "newline", + "backtick", + "emphasis", + "image", + "link", + "autolink", + "entity", + "html_inline", + "strikethrough", +) +from . import emphasis, strikethrough +from .autolink import autolink +from .backticks import backtick +from .balance_pairs import link_pairs +from .entity import entity +from .escape import escape +from .fragments_join import fragments_join +from .html_inline import html_inline +from .image import image +from .link import link +from .linkify import linkify +from .newline import newline +from .state_inline import StateInline +from .text import text diff --git a/venv/lib/python3.11/site-packages/markdown_it/rules_inline/__pycache__/__init__.cpython-311.pyc b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/__pycache__/__init__.cpython-311.pyc Binary files differnew file mode 100644 index 0000000..ac048d7 --- /dev/null +++ b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/__pycache__/__init__.cpython-311.pyc diff --git a/venv/lib/python3.11/site-packages/markdown_it/rules_inline/__pycache__/autolink.cpython-311.pyc b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/__pycache__/autolink.cpython-311.pyc Binary files differnew file mode 100644 index 0000000..15c5b40 --- /dev/null +++ b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/__pycache__/autolink.cpython-311.pyc diff --git a/venv/lib/python3.11/site-packages/markdown_it/rules_inline/__pycache__/backticks.cpython-311.pyc b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/__pycache__/backticks.cpython-311.pyc Binary files differnew file mode 100644 index 0000000..6d0114b --- /dev/null +++ b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/__pycache__/backticks.cpython-311.pyc diff --git a/venv/lib/python3.11/site-packages/markdown_it/rules_inline/__pycache__/balance_pairs.cpython-311.pyc b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/__pycache__/balance_pairs.cpython-311.pyc Binary files differnew file mode 100644 index 0000000..9297508 --- /dev/null +++ b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/__pycache__/balance_pairs.cpython-311.pyc diff --git a/venv/lib/python3.11/site-packages/markdown_it/rules_inline/__pycache__/emphasis.cpython-311.pyc b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/__pycache__/emphasis.cpython-311.pyc Binary files differnew file mode 100644 index 0000000..d1923ec --- /dev/null +++ b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/__pycache__/emphasis.cpython-311.pyc diff --git a/venv/lib/python3.11/site-packages/markdown_it/rules_inline/__pycache__/entity.cpython-311.pyc b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/__pycache__/entity.cpython-311.pyc Binary files differnew file mode 100644 index 0000000..c302f88 --- /dev/null +++ b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/__pycache__/entity.cpython-311.pyc diff --git a/venv/lib/python3.11/site-packages/markdown_it/rules_inline/__pycache__/escape.cpython-311.pyc b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/__pycache__/escape.cpython-311.pyc Binary files differnew file mode 100644 index 0000000..7389e27 --- /dev/null +++ b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/__pycache__/escape.cpython-311.pyc diff --git a/venv/lib/python3.11/site-packages/markdown_it/rules_inline/__pycache__/fragments_join.cpython-311.pyc b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/__pycache__/fragments_join.cpython-311.pyc Binary files differnew file mode 100644 index 0000000..1c1e20d --- /dev/null +++ b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/__pycache__/fragments_join.cpython-311.pyc diff --git a/venv/lib/python3.11/site-packages/markdown_it/rules_inline/__pycache__/html_inline.cpython-311.pyc b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/__pycache__/html_inline.cpython-311.pyc Binary files differnew file mode 100644 index 0000000..a77d98a --- /dev/null +++ b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/__pycache__/html_inline.cpython-311.pyc diff --git a/venv/lib/python3.11/site-packages/markdown_it/rules_inline/__pycache__/image.cpython-311.pyc b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/__pycache__/image.cpython-311.pyc Binary files differnew file mode 100644 index 0000000..485976b --- /dev/null +++ b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/__pycache__/image.cpython-311.pyc diff --git a/venv/lib/python3.11/site-packages/markdown_it/rules_inline/__pycache__/link.cpython-311.pyc b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/__pycache__/link.cpython-311.pyc Binary files differnew file mode 100644 index 0000000..ba75953 --- /dev/null +++ b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/__pycache__/link.cpython-311.pyc diff --git a/venv/lib/python3.11/site-packages/markdown_it/rules_inline/__pycache__/linkify.cpython-311.pyc b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/__pycache__/linkify.cpython-311.pyc Binary files differnew file mode 100644 index 0000000..7cb2ef9 --- /dev/null +++ b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/__pycache__/linkify.cpython-311.pyc diff --git a/venv/lib/python3.11/site-packages/markdown_it/rules_inline/__pycache__/newline.cpython-311.pyc b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/__pycache__/newline.cpython-311.pyc Binary files differnew file mode 100644 index 0000000..2134530 --- /dev/null +++ b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/__pycache__/newline.cpython-311.pyc diff --git a/venv/lib/python3.11/site-packages/markdown_it/rules_inline/__pycache__/state_inline.cpython-311.pyc b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/__pycache__/state_inline.cpython-311.pyc Binary files differnew file mode 100644 index 0000000..9095837 --- /dev/null +++ b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/__pycache__/state_inline.cpython-311.pyc diff --git a/venv/lib/python3.11/site-packages/markdown_it/rules_inline/__pycache__/strikethrough.cpython-311.pyc b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/__pycache__/strikethrough.cpython-311.pyc Binary files differnew file mode 100644 index 0000000..006536a --- /dev/null +++ b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/__pycache__/strikethrough.cpython-311.pyc diff --git a/venv/lib/python3.11/site-packages/markdown_it/rules_inline/__pycache__/text.cpython-311.pyc b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/__pycache__/text.cpython-311.pyc Binary files differnew file mode 100644 index 0000000..4f56489 --- /dev/null +++ b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/__pycache__/text.cpython-311.pyc diff --git a/venv/lib/python3.11/site-packages/markdown_it/rules_inline/autolink.py b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/autolink.py new file mode 100644 index 0000000..295d963 --- /dev/null +++ b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/autolink.py @@ -0,0 +1,77 @@ +# Process autolinks '<protocol:...>' +import re + +from .state_inline import StateInline + +EMAIL_RE = re.compile( + r"^([a-zA-Z0-9.!#$%&\'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)$" # noqa: E501 +) +AUTOLINK_RE = re.compile(r"^([a-zA-Z][a-zA-Z0-9+.\-]{1,31}):([^<>\x00-\x20]*)$") + + +def autolink(state: StateInline, silent: bool) -> bool: + pos = state.pos + + if state.src[pos] != "<": + return False + + start = state.pos + maximum = state.posMax + + while True: + pos += 1 + if pos >= maximum: + return False + + ch = state.src[pos] + + if ch == "<": + return False + if ch == ">": + break + + url = state.src[start + 1 : pos] + + if AUTOLINK_RE.search(url) is not None: + fullUrl = state.md.normalizeLink(url) + if not state.md.validateLink(fullUrl): + return False + + if not silent: + token = state.push("link_open", "a", 1) + token.attrs = {"href": fullUrl} + token.markup = "autolink" + token.info = "auto" + + token = state.push("text", "", 0) + token.content = state.md.normalizeLinkText(url) + + token = state.push("link_close", "a", -1) + token.markup = "autolink" + token.info = "auto" + + state.pos += len(url) + 2 + return True + + if EMAIL_RE.search(url) is not None: + fullUrl = state.md.normalizeLink("mailto:" + url) + if not state.md.validateLink(fullUrl): + return False + + if not silent: + token = state.push("link_open", "a", 1) + token.attrs = {"href": fullUrl} + token.markup = "autolink" + token.info = "auto" + + token = state.push("text", "", 0) + token.content = state.md.normalizeLinkText(url) + + token = state.push("link_close", "a", -1) + token.markup = "autolink" + token.info = "auto" + + state.pos += len(url) + 2 + return True + + return False diff --git a/venv/lib/python3.11/site-packages/markdown_it/rules_inline/backticks.py b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/backticks.py new file mode 100644 index 0000000..fc60d6b --- /dev/null +++ b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/backticks.py @@ -0,0 +1,72 @@ +# Parse backticks +import re + +from .state_inline import StateInline + +regex = re.compile("^ (.+) $") + + +def backtick(state: StateInline, silent: bool) -> bool: + pos = state.pos + + if state.src[pos] != "`": + return False + + start = pos + pos += 1 + maximum = state.posMax + + # scan marker length + while pos < maximum and (state.src[pos] == "`"): + pos += 1 + + marker = state.src[start:pos] + openerLength = len(marker) + + if state.backticksScanned and state.backticks.get(openerLength, 0) <= start: + if not silent: + state.pending += marker + state.pos += openerLength + return True + + matchStart = matchEnd = pos + + # Nothing found in the cache, scan until the end of the line (or until marker is found) + while True: + try: + matchStart = state.src.index("`", matchEnd) + except ValueError: + break + matchEnd = matchStart + 1 + + # scan marker length + while matchEnd < maximum and (state.src[matchEnd] == "`"): + matchEnd += 1 + + closerLength = matchEnd - matchStart + + if closerLength == openerLength: + # Found matching closer length. + if not silent: + token = state.push("code_inline", "code", 0) + token.markup = marker + token.content = state.src[pos:matchStart].replace("\n", " ") + if ( + token.content.startswith(" ") + and token.content.endswith(" ") + and len(token.content.strip()) > 0 + ): + token.content = token.content[1:-1] + state.pos = matchEnd + return True + + # Some different length found, put it in cache as upper limit of where closer can be found + state.backticks[closerLength] = matchStart + + # Scanned through the end, didn't find anything + state.backticksScanned = True + + if not silent: + state.pending += marker + state.pos += openerLength + return True diff --git a/venv/lib/python3.11/site-packages/markdown_it/rules_inline/balance_pairs.py b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/balance_pairs.py new file mode 100644 index 0000000..bbb2101 --- /dev/null +++ b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/balance_pairs.py @@ -0,0 +1,137 @@ +"""Balance paired characters (*, _, etc) in inline tokens.""" +from __future__ import annotations + +from .state_inline import Delimiter, StateInline + + +def processDelimiters(state: StateInline, delimiters: list[Delimiter]) -> None: + """For each opening emphasis-like marker find a matching closing one.""" + if not delimiters: + return + + openersBottom = {} + maximum = len(delimiters) + + # headerIdx is the first delimiter of the current (where closer is) delimiter run + headerIdx = 0 + lastTokenIdx = -2 # needs any value lower than -1 + jumps: list[int] = [] + closerIdx = 0 + while closerIdx < maximum: + closer = delimiters[closerIdx] + + jumps.append(0) + + # markers belong to same delimiter run if: + # - they have adjacent tokens + # - AND markers are the same + # + if ( + delimiters[headerIdx].marker != closer.marker + or lastTokenIdx != closer.token - 1 + ): + headerIdx = closerIdx + lastTokenIdx = closer.token + + # Length is only used for emphasis-specific "rule of 3", + # if it's not defined (in strikethrough or 3rd party plugins), + # we can default it to 0 to disable those checks. + # + closer.length = closer.length or 0 + + if not closer.close: + closerIdx += 1 + continue + + # Previously calculated lower bounds (previous fails) + # for each marker, each delimiter length modulo 3, + # and for whether this closer can be an opener; + # https://github.com/commonmark/cmark/commit/34250e12ccebdc6372b8b49c44fab57c72443460 + if closer.marker not in openersBottom: + openersBottom[closer.marker] = [-1, -1, -1, -1, -1, -1] + + minOpenerIdx = openersBottom[closer.marker][ + (3 if closer.open else 0) + (closer.length % 3) + ] + + openerIdx = headerIdx - jumps[headerIdx] - 1 + + newMinOpenerIdx = openerIdx + + while openerIdx > minOpenerIdx: + opener = delimiters[openerIdx] + + if opener.marker != closer.marker: + openerIdx -= jumps[openerIdx] + 1 + continue + + if opener.open and opener.end < 0: + isOddMatch = False + + # from spec: + # + # If one of the delimiters can both open and close emphasis, then the + # sum of the lengths of the delimiter runs containing the opening and + # closing delimiters must not be a multiple of 3 unless both lengths + # are multiples of 3. + # + if ( + (opener.close or closer.open) + and ((opener.length + closer.length) % 3 == 0) + and (opener.length % 3 != 0 or closer.length % 3 != 0) + ): + isOddMatch = True + + if not isOddMatch: + # If previous delimiter cannot be an opener, we can safely skip + # the entire sequence in future checks. This is required to make + # sure algorithm has linear complexity (see *_*_*_*_*_... case). + # + if openerIdx > 0 and not delimiters[openerIdx - 1].open: + lastJump = jumps[openerIdx - 1] + 1 + else: + lastJump = 0 + + jumps[closerIdx] = closerIdx - openerIdx + lastJump + jumps[openerIdx] = lastJump + + closer.open = False + opener.end = closerIdx + opener.close = False + newMinOpenerIdx = -1 + + # treat next token as start of run, + # it optimizes skips in **<...>**a**<...>** pathological case + lastTokenIdx = -2 + + break + + openerIdx -= jumps[openerIdx] + 1 + + if newMinOpenerIdx != -1: + # If match for this delimiter run failed, we want to set lower bound for + # future lookups. This is required to make sure algorithm has linear + # complexity. + # + # See details here: + # https:#github.com/commonmark/cmark/issues/178#issuecomment-270417442 + # + openersBottom[closer.marker][ + (3 if closer.open else 0) + ((closer.length or 0) % 3) + ] = newMinOpenerIdx + + closerIdx += 1 + + +def link_pairs(state: StateInline) -> None: + tokens_meta = state.tokens_meta + maximum = len(state.tokens_meta) + + processDelimiters(state, state.delimiters) + + curr = 0 + while curr < maximum: + curr_meta = tokens_meta[curr] + if curr_meta and "delimiters" in curr_meta: + processDelimiters(state, curr_meta["delimiters"]) + curr += 1 diff --git a/venv/lib/python3.11/site-packages/markdown_it/rules_inline/emphasis.py b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/emphasis.py new file mode 100644 index 0000000..9a98f9e --- /dev/null +++ b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/emphasis.py @@ -0,0 +1,102 @@ +# Process *this* and _that_ +# +from __future__ import annotations + +from .state_inline import Delimiter, StateInline + + +def tokenize(state: StateInline, silent: bool) -> bool: + """Insert each marker as a separate text token, and add it to delimiter list""" + start = state.pos + marker = state.src[start] + + if silent: + return False + + if marker not in ("_", "*"): + return False + + scanned = state.scanDelims(state.pos, marker == "*") + + for _ in range(scanned.length): + token = state.push("text", "", 0) + token.content = marker + state.delimiters.append( + Delimiter( + marker=ord(marker), + length=scanned.length, + token=len(state.tokens) - 1, + end=-1, + open=scanned.can_open, + close=scanned.can_close, + ) + ) + + state.pos += scanned.length + + return True + + +def _postProcess(state: StateInline, delimiters: list[Delimiter]) -> None: + i = len(delimiters) - 1 + while i >= 0: + startDelim = delimiters[i] + + # /* _ */ /* * */ + if startDelim.marker != 0x5F and startDelim.marker != 0x2A: + i -= 1 + continue + + # Process only opening markers + if startDelim.end == -1: + i -= 1 + continue + + endDelim = delimiters[startDelim.end] + + # If the previous delimiter has the same marker and is adjacent to this one, + # merge those into one strong delimiter. + # + # `<em><em>whatever</em></em>` -> `<strong>whatever</strong>` + # + isStrong = ( + i > 0 + and delimiters[i - 1].end == startDelim.end + 1 + # check that first two markers match and adjacent + and delimiters[i - 1].marker == startDelim.marker + and delimiters[i - 1].token == startDelim.token - 1 + # check that last two markers are adjacent (we can safely assume they match) + and delimiters[startDelim.end + 1].token == endDelim.token + 1 + ) + + ch = chr(startDelim.marker) + + token = state.tokens[startDelim.token] + token.type = "strong_open" if isStrong else "em_open" + token.tag = "strong" if isStrong else "em" + token.nesting = 1 + token.markup = ch + ch if isStrong else ch + token.content = "" + + token = state.tokens[endDelim.token] + token.type = "strong_close" if isStrong else "em_close" + token.tag = "strong" if isStrong else "em" + token.nesting = -1 + token.markup = ch + ch if isStrong else ch + token.content = "" + + if isStrong: + state.tokens[delimiters[i - 1].token].content = "" + state.tokens[delimiters[startDelim.end + 1].token].content = "" + i -= 1 + + i -= 1 + + +def postProcess(state: StateInline) -> None: + """Walk through delimiter list and replace text tokens with tags.""" + _postProcess(state, state.delimiters) + + for token in state.tokens_meta: + if token and "delimiters" in token: + _postProcess(state, token["delimiters"]) diff --git a/venv/lib/python3.11/site-packages/markdown_it/rules_inline/entity.py b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/entity.py new file mode 100644 index 0000000..ec9d396 --- /dev/null +++ b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/entity.py @@ -0,0 +1,53 @@ +# Process html entity - {, ¯, ", ... +import re + +from ..common.entities import entities +from ..common.utils import fromCodePoint, isValidEntityCode +from .state_inline import StateInline + +DIGITAL_RE = re.compile(r"^&#((?:x[a-f0-9]{1,6}|[0-9]{1,7}));", re.IGNORECASE) +NAMED_RE = re.compile(r"^&([a-z][a-z0-9]{1,31});", re.IGNORECASE) + + +def entity(state: StateInline, silent: bool) -> bool: + pos = state.pos + maximum = state.posMax + + if state.src[pos] != "&": + return False + + if pos + 1 >= maximum: + return False + + if state.src[pos + 1] == "#": + if match := DIGITAL_RE.search(state.src[pos:]): + if not silent: + match1 = match.group(1) + code = ( + int(match1[1:], 16) if match1[0].lower() == "x" else int(match1, 10) + ) + + token = state.push("text_special", "", 0) + token.content = ( + fromCodePoint(code) + if isValidEntityCode(code) + else fromCodePoint(0xFFFD) + ) + token.markup = match.group(0) + token.info = "entity" + + state.pos += len(match.group(0)) + return True + + else: + if (match := NAMED_RE.search(state.src[pos:])) and match.group(1) in entities: + if not silent: + token = state.push("text_special", "", 0) + token.content = entities[match.group(1)] + token.markup = match.group(0) + token.info = "entity" + + state.pos += len(match.group(0)) + return True + + return False diff --git a/venv/lib/python3.11/site-packages/markdown_it/rules_inline/escape.py b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/escape.py new file mode 100644 index 0000000..9f68b5d --- /dev/null +++ b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/escape.py @@ -0,0 +1,92 @@ +""" +Process escaped chars and hardbreaks +""" +from ..common.utils import isStrSpace +from .state_inline import StateInline + + +def escape(state: StateInline, silent: bool) -> bool: + """Process escaped chars and hardbreaks.""" + pos = state.pos + maximum = state.posMax + + if state.src[pos] != "\\": + return False + + pos += 1 + + # '\' at the end of the inline block + if pos >= maximum: + return False + + ch1 = state.src[pos] + ch1_ord = ord(ch1) + if ch1 == "\n": + if not silent: + state.push("hardbreak", "br", 0) + pos += 1 + # skip leading whitespaces from next line + while pos < maximum: + ch = state.src[pos] + if not isStrSpace(ch): + break + pos += 1 + + state.pos = pos + return True + + escapedStr = state.src[pos] + + if ch1_ord >= 0xD800 and ch1_ord <= 0xDBFF and pos + 1 < maximum: + ch2 = state.src[pos + 1] + ch2_ord = ord(ch2) + if ch2_ord >= 0xDC00 and ch2_ord <= 0xDFFF: + escapedStr += ch2 + pos += 1 + + origStr = "\\" + escapedStr + + if not silent: + token = state.push("text_special", "", 0) + token.content = escapedStr if ch1 in _ESCAPED else origStr + token.markup = origStr + token.info = "escape" + + state.pos = pos + 1 + return True + + +_ESCAPED = { + "!", + '"', + "#", + "$", + "%", + "&", + "'", + "(", + ")", + "*", + "+", + ",", + "-", + ".", + "/", + ":", + ";", + "<", + "=", + ">", + "?", + "@", + "[", + "\\", + "]", + "^", + "_", + "`", + "{", + "|", + "}", + "~", +} diff --git a/venv/lib/python3.11/site-packages/markdown_it/rules_inline/fragments_join.py b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/fragments_join.py new file mode 100644 index 0000000..f795c13 --- /dev/null +++ b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/fragments_join.py @@ -0,0 +1,43 @@ +from .state_inline import StateInline + + +def fragments_join(state: StateInline) -> None: + """ + Clean up tokens after emphasis and strikethrough postprocessing: + merge adjacent text nodes into one and re-calculate all token levels + + This is necessary because initially emphasis delimiter markers (``*, _, ~``) + are treated as their own separate text tokens. Then emphasis rule either + leaves them as text (needed to merge with adjacent text) or turns them + into opening/closing tags (which messes up levels inside). + """ + level = 0 + maximum = len(state.tokens) + + curr = last = 0 + while curr < maximum: + # re-calculate levels after emphasis/strikethrough turns some text nodes + # into opening/closing tags + if state.tokens[curr].nesting < 0: + level -= 1 # closing tag + state.tokens[curr].level = level + if state.tokens[curr].nesting > 0: + level += 1 # opening tag + + if ( + state.tokens[curr].type == "text" + and curr + 1 < maximum + and state.tokens[curr + 1].type == "text" + ): + # collapse two adjacent text nodes + state.tokens[curr + 1].content = ( + state.tokens[curr].content + state.tokens[curr + 1].content + ) + else: + if curr != last: + state.tokens[last] = state.tokens[curr] + last += 1 + curr += 1 + + if curr != last: + del state.tokens[last:] diff --git a/venv/lib/python3.11/site-packages/markdown_it/rules_inline/html_inline.py b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/html_inline.py new file mode 100644 index 0000000..9065e1d --- /dev/null +++ b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/html_inline.py @@ -0,0 +1,43 @@ +# Process html tags +from ..common.html_re import HTML_TAG_RE +from ..common.utils import isLinkClose, isLinkOpen +from .state_inline import StateInline + + +def isLetter(ch: int) -> bool: + lc = ch | 0x20 # to lower case + # /* a */ and /* z */ + return (lc >= 0x61) and (lc <= 0x7A) + + +def html_inline(state: StateInline, silent: bool) -> bool: + pos = state.pos + + if not state.md.options.get("html", None): + return False + + # Check start + maximum = state.posMax + if state.src[pos] != "<" or pos + 2 >= maximum: + return False + + # Quick fail on second char + ch = state.src[pos + 1] + if ch not in ("!", "?", "/") and not isLetter(ord(ch)): # /* / */ + return False + + match = HTML_TAG_RE.search(state.src[pos:]) + if not match: + return False + + if not silent: + token = state.push("html_inline", "", 0) + token.content = state.src[pos : pos + len(match.group(0))] + + if isLinkOpen(token.content): + state.linkLevel += 1 + if isLinkClose(token.content): + state.linkLevel -= 1 + + state.pos += len(match.group(0)) + return True diff --git a/venv/lib/python3.11/site-packages/markdown_it/rules_inline/image.py b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/image.py new file mode 100644 index 0000000..b4a32a9 --- /dev/null +++ b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/image.py @@ -0,0 +1,148 @@ +# Process ![image](<src> "title") +from __future__ import annotations + +from ..common.utils import isStrSpace, normalizeReference +from ..token import Token +from .state_inline import StateInline + + +def image(state: StateInline, silent: bool) -> bool: + label = None + href = "" + oldPos = state.pos + max = state.posMax + + if state.src[state.pos] != "!": + return False + + if state.pos + 1 < state.posMax and state.src[state.pos + 1] != "[": + return False + + labelStart = state.pos + 2 + labelEnd = state.md.helpers.parseLinkLabel(state, state.pos + 1, False) + + # parser failed to find ']', so it's not a valid link + if labelEnd < 0: + return False + + pos = labelEnd + 1 + + if pos < max and state.src[pos] == "(": + # + # Inline link + # + + # [link]( <href> "title" ) + # ^^ skipping these spaces + pos += 1 + while pos < max: + ch = state.src[pos] + if not isStrSpace(ch) and ch != "\n": + break + pos += 1 + + if pos >= max: + return False + + # [link]( <href> "title" ) + # ^^^^^^ parsing link destination + start = pos + res = state.md.helpers.parseLinkDestination(state.src, pos, state.posMax) + if res.ok: + href = state.md.normalizeLink(res.str) + if state.md.validateLink(href): + pos = res.pos + else: + href = "" + + # [link]( <href> "title" ) + # ^^ skipping these spaces + start = pos + while pos < max: + ch = state.src[pos] + if not isStrSpace(ch) and ch != "\n": + break + pos += 1 + + # [link]( <href> "title" ) + # ^^^^^^^ parsing link title + res = state.md.helpers.parseLinkTitle(state.src, pos, state.posMax) + if pos < max and start != pos and res.ok: + title = res.str + pos = res.pos + + # [link]( <href> "title" ) + # ^^ skipping these spaces + while pos < max: + ch = state.src[pos] + if not isStrSpace(ch) and ch != "\n": + break + pos += 1 + else: + title = "" + + if pos >= max or state.src[pos] != ")": + state.pos = oldPos + return False + + pos += 1 + + else: + # + # Link reference + # + if "references" not in state.env: + return False + + # /* [ */ + if pos < max and state.src[pos] == "[": + start = pos + 1 + pos = state.md.helpers.parseLinkLabel(state, pos) + if pos >= 0: + label = state.src[start:pos] + pos += 1 + else: + pos = labelEnd + 1 + else: + pos = labelEnd + 1 + + # covers label == '' and label == undefined + # (collapsed reference link and shortcut reference link respectively) + if not label: + label = state.src[labelStart:labelEnd] + + label = normalizeReference(label) + + ref = state.env["references"].get(label, None) + if not ref: + state.pos = oldPos + return False + + href = ref["href"] + title = ref["title"] + + # + # We found the end of the link, and know for a fact it's a valid link + # so all that's left to do is to call tokenizer. + # + if not silent: + content = state.src[labelStart:labelEnd] + + tokens: list[Token] = [] + state.md.inline.parse(content, state.md, state.env, tokens) + + token = state.push("image", "img", 0) + token.attrs = {"src": href, "alt": ""} + token.children = tokens or None + token.content = content + + if title: + token.attrSet("title", title) + + # note, this is not part of markdown-it JS, but is useful for renderers + if label and state.md.options.get("store_labels", False): + token.meta["label"] = label + + state.pos = pos + state.posMax = max + return True diff --git a/venv/lib/python3.11/site-packages/markdown_it/rules_inline/link.py b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/link.py new file mode 100644 index 0000000..78cf912 --- /dev/null +++ b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/link.py @@ -0,0 +1,151 @@ +# Process [link](<to> "stuff") + +from ..common.utils import isStrSpace, normalizeReference +from .state_inline import StateInline + + +def link(state: StateInline, silent: bool) -> bool: + href = "" + title = "" + label = None + oldPos = state.pos + maximum = state.posMax + start = state.pos + parseReference = True + + if state.src[state.pos] != "[": + return False + + labelStart = state.pos + 1 + labelEnd = state.md.helpers.parseLinkLabel(state, state.pos, True) + + # parser failed to find ']', so it's not a valid link + if labelEnd < 0: + return False + + pos = labelEnd + 1 + + if pos < maximum and state.src[pos] == "(": + # + # Inline link + # + + # might have found a valid shortcut link, disable reference parsing + parseReference = False + + # [link]( <href> "title" ) + # ^^ skipping these spaces + pos += 1 + while pos < maximum: + ch = state.src[pos] + if not isStrSpace(ch) and ch != "\n": + break + pos += 1 + + if pos >= maximum: + return False + + # [link]( <href> "title" ) + # ^^^^^^ parsing link destination + start = pos + res = state.md.helpers.parseLinkDestination(state.src, pos, state.posMax) + if res.ok: + href = state.md.normalizeLink(res.str) + if state.md.validateLink(href): + pos = res.pos + else: + href = "" + + # [link]( <href> "title" ) + # ^^ skipping these spaces + start = pos + while pos < maximum: + ch = state.src[pos] + if not isStrSpace(ch) and ch != "\n": + break + pos += 1 + + # [link]( <href> "title" ) + # ^^^^^^^ parsing link title + res = state.md.helpers.parseLinkTitle(state.src, pos, state.posMax) + if pos < maximum and start != pos and res.ok: + title = res.str + pos = res.pos + + # [link]( <href> "title" ) + # ^^ skipping these spaces + while pos < maximum: + ch = state.src[pos] + if not isStrSpace(ch) and ch != "\n": + break + pos += 1 + + if pos >= maximum or state.src[pos] != ")": + # parsing a valid shortcut link failed, fallback to reference + parseReference = True + + pos += 1 + + if parseReference: + # + # Link reference + # + if "references" not in state.env: + return False + + if pos < maximum and state.src[pos] == "[": + start = pos + 1 + pos = state.md.helpers.parseLinkLabel(state, pos) + if pos >= 0: + label = state.src[start:pos] + pos += 1 + else: + pos = labelEnd + 1 + + else: + pos = labelEnd + 1 + + # covers label == '' and label == undefined + # (collapsed reference link and shortcut reference link respectively) + if not label: + label = state.src[labelStart:labelEnd] + + label = normalizeReference(label) + + ref = ( + state.env["references"][label] if label in state.env["references"] else None + ) + if not ref: + state.pos = oldPos + return False + + href = ref["href"] + title = ref["title"] + + # + # We found the end of the link, and know for a fact it's a valid link + # so all that's left to do is to call tokenizer. + # + if not silent: + state.pos = labelStart + state.posMax = labelEnd + + token = state.push("link_open", "a", 1) + token.attrs = {"href": href} + + if title: + token.attrSet("title", title) + + # note, this is not part of markdown-it JS, but is useful for renderers + if label and state.md.options.get("store_labels", False): + token.meta["label"] = label + + state.linkLevel += 1 + state.md.inline.tokenize(state) + state.linkLevel -= 1 + + token = state.push("link_close", "a", -1) + + state.pos = pos + state.posMax = maximum + return True diff --git a/venv/lib/python3.11/site-packages/markdown_it/rules_inline/linkify.py b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/linkify.py new file mode 100644 index 0000000..a8a1815 --- /dev/null +++ b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/linkify.py @@ -0,0 +1,61 @@ +"""Process links like https://example.org/""" +import re + +from .state_inline import StateInline + +# RFC3986: scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) +SCHEME_RE = re.compile(r"(?:^|[^a-z0-9.+-])([a-z][a-z0-9.+-]*)$", re.IGNORECASE) + + +def linkify(state: StateInline, silent: bool) -> bool: + """Rule for identifying plain-text links.""" + if not state.md.options.linkify: + return False + if state.linkLevel > 0: + return False + if not state.md.linkify: + raise ModuleNotFoundError("Linkify enabled but not installed.") + + pos = state.pos + maximum = state.posMax + + if ( + (pos + 3) > maximum + or state.src[pos] != ":" + or state.src[pos + 1] != "/" + or state.src[pos + 2] != "/" + ): + return False + + if not (match := SCHEME_RE.match(state.pending)): + return False + + proto = match.group(1) + if not (link := state.md.linkify.match_at_start(state.src[pos - len(proto) :])): + return False + url: str = link.url + + # disallow '*' at the end of the link (conflicts with emphasis) + url = url.rstrip("*") + + full_url = state.md.normalizeLink(url) + if not state.md.validateLink(full_url): + return False + + if not silent: + state.pending = state.pending[: -len(proto)] + + token = state.push("link_open", "a", 1) + token.attrs = {"href": full_url} + token.markup = "linkify" + token.info = "auto" + + token = state.push("text", "", 0) + token.content = state.md.normalizeLinkText(url) + + token = state.push("link_close", "a", -1) + token.markup = "linkify" + token.info = "auto" + + state.pos += len(url) - len(proto) + return True diff --git a/venv/lib/python3.11/site-packages/markdown_it/rules_inline/newline.py b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/newline.py new file mode 100644 index 0000000..ca8f1db --- /dev/null +++ b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/newline.py @@ -0,0 +1,43 @@ +"""Proceess '\n'.""" +from ..common.utils import charStrAt, isStrSpace +from .state_inline import StateInline + + +def newline(state: StateInline, silent: bool) -> bool: + pos = state.pos + + if state.src[pos] != "\n": + return False + + pmax = len(state.pending) - 1 + maximum = state.posMax + + # ' \n' -> hardbreak + # Lookup in pending chars is bad practice! Don't copy to other rules! + # Pending string is stored in concat mode, indexed lookups will cause + # conversion to flat mode. + if not silent: + if pmax >= 0 and charStrAt(state.pending, pmax) == " ": + if pmax >= 1 and charStrAt(state.pending, pmax - 1) == " ": + # Find whitespaces tail of pending chars. + ws = pmax - 1 + while ws >= 1 and charStrAt(state.pending, ws - 1) == " ": + ws -= 1 + state.pending = state.pending[:ws] + + state.push("hardbreak", "br", 0) + else: + state.pending = state.pending[:-1] + state.push("softbreak", "br", 0) + + else: + state.push("softbreak", "br", 0) + + pos += 1 + + # skip heading spaces for next line + while pos < maximum and isStrSpace(state.src[pos]): + pos += 1 + + state.pos = pos + return True diff --git a/venv/lib/python3.11/site-packages/markdown_it/rules_inline/state_inline.py b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/state_inline.py new file mode 100644 index 0000000..c0c491c --- /dev/null +++ b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/state_inline.py @@ -0,0 +1,166 @@ +from __future__ import annotations + +from collections import namedtuple +from dataclasses import dataclass +from typing import TYPE_CHECKING, Any, Literal + +from .._compat import DATACLASS_KWARGS +from ..common.utils import isMdAsciiPunct, isPunctChar, isWhiteSpace +from ..ruler import StateBase +from ..token import Token +from ..utils import EnvType + +if TYPE_CHECKING: + from markdown_it import MarkdownIt + + +@dataclass(**DATACLASS_KWARGS) +class Delimiter: + # Char code of the starting marker (number). + marker: int + + # Total length of these series of delimiters. + length: int + + # A position of the token this delimiter corresponds to. + token: int + + # If this delimiter is matched as a valid opener, `end` will be + # equal to its position, otherwise it's `-1`. + end: int + + # Boolean flags that determine if this delimiter could open or close + # an emphasis. + open: bool + close: bool + + level: bool | None = None + + +Scanned = namedtuple("Scanned", ["can_open", "can_close", "length"]) + + +class StateInline(StateBase): + def __init__( + self, src: str, md: MarkdownIt, env: EnvType, outTokens: list[Token] + ) -> None: + self.src = src + self.env = env + self.md = md + self.tokens = outTokens + self.tokens_meta: list[dict[str, Any] | None] = [None] * len(outTokens) + + self.pos = 0 + self.posMax = len(self.src) + self.level = 0 + self.pending = "" + self.pendingLevel = 0 + + # Stores { start: end } pairs. Useful for backtrack + # optimization of pairs parse (emphasis, strikes). + self.cache: dict[int, int] = {} + + # List of emphasis-like delimiters for current tag + self.delimiters: list[Delimiter] = [] + + # Stack of delimiter lists for upper level tags + self._prev_delimiters: list[list[Delimiter]] = [] + + # backticklength => last seen position + self.backticks: dict[int, int] = {} + self.backticksScanned = False + + # Counter used to disable inline linkify-it execution + # inside <a> and markdown links + self.linkLevel = 0 + + def __repr__(self) -> str: + return ( + f"{self.__class__.__name__}" + f"(pos=[{self.pos} of {self.posMax}], token={len(self.tokens)})" + ) + + def pushPending(self) -> Token: + token = Token("text", "", 0) + token.content = self.pending + token.level = self.pendingLevel + self.tokens.append(token) + self.pending = "" + return token + + def push(self, ttype: str, tag: str, nesting: Literal[-1, 0, 1]) -> Token: + """Push new token to "stream". + If pending text exists - flush it as text token + """ + if self.pending: + self.pushPending() + + token = Token(ttype, tag, nesting) + token_meta = None + + if nesting < 0: + # closing tag + self.level -= 1 + self.delimiters = self._prev_delimiters.pop() + + token.level = self.level + + if nesting > 0: + # opening tag + self.level += 1 + self._prev_delimiters.append(self.delimiters) + self.delimiters = [] + token_meta = {"delimiters": self.delimiters} + + self.pendingLevel = self.level + self.tokens.append(token) + self.tokens_meta.append(token_meta) + return token + + def scanDelims(self, start: int, canSplitWord: bool) -> Scanned: + """ + Scan a sequence of emphasis-like markers, and determine whether + it can start an emphasis sequence or end an emphasis sequence. + + - start - position to scan from (it should point at a valid marker); + - canSplitWord - determine if these markers can be found inside a word + + """ + pos = start + maximum = self.posMax + marker = self.src[start] + + # treat beginning of the line as a whitespace + lastChar = self.src[start - 1] if start > 0 else " " + + while pos < maximum and self.src[pos] == marker: + pos += 1 + + count = pos - start + + # treat end of the line as a whitespace + nextChar = self.src[pos] if pos < maximum else " " + + isLastPunctChar = isMdAsciiPunct(ord(lastChar)) or isPunctChar(lastChar) + isNextPunctChar = isMdAsciiPunct(ord(nextChar)) or isPunctChar(nextChar) + + isLastWhiteSpace = isWhiteSpace(ord(lastChar)) + isNextWhiteSpace = isWhiteSpace(ord(nextChar)) + + left_flanking = not ( + isNextWhiteSpace + or (isNextPunctChar and not (isLastWhiteSpace or isLastPunctChar)) + ) + right_flanking = not ( + isLastWhiteSpace + or (isLastPunctChar and not (isNextWhiteSpace or isNextPunctChar)) + ) + + if not canSplitWord: + can_open = left_flanking and ((not right_flanking) or isLastPunctChar) + can_close = right_flanking and ((not left_flanking) or isNextPunctChar) + else: + can_open = left_flanking + can_close = right_flanking + + return Scanned(can_open, can_close, count) diff --git a/venv/lib/python3.11/site-packages/markdown_it/rules_inline/strikethrough.py b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/strikethrough.py new file mode 100644 index 0000000..ec81628 --- /dev/null +++ b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/strikethrough.py @@ -0,0 +1,127 @@ +# ~~strike through~~ +from __future__ import annotations + +from .state_inline import Delimiter, StateInline + + +def tokenize(state: StateInline, silent: bool) -> bool: + """Insert each marker as a separate text token, and add it to delimiter list""" + start = state.pos + ch = state.src[start] + + if silent: + return False + + if ch != "~": + return False + + scanned = state.scanDelims(state.pos, True) + length = scanned.length + + if length < 2: + return False + + if length % 2: + token = state.push("text", "", 0) + token.content = ch + length -= 1 + + i = 0 + while i < length: + token = state.push("text", "", 0) + token.content = ch + ch + state.delimiters.append( + Delimiter( + marker=ord(ch), + length=0, # disable "rule of 3" length checks meant for emphasis + token=len(state.tokens) - 1, + end=-1, + open=scanned.can_open, + close=scanned.can_close, + ) + ) + + i += 2 + + state.pos += scanned.length + + return True + + +def _postProcess(state: StateInline, delimiters: list[Delimiter]) -> None: + loneMarkers = [] + maximum = len(delimiters) + + i = 0 + while i < maximum: + startDelim = delimiters[i] + + if startDelim.marker != 0x7E: # /* ~ */ + i += 1 + continue + + if startDelim.end == -1: + i += 1 + continue + + endDelim = delimiters[startDelim.end] + + token = state.tokens[startDelim.token] + token.type = "s_open" + token.tag = "s" + token.nesting = 1 + token.markup = "~~" + token.content = "" + + token = state.tokens[endDelim.token] + token.type = "s_close" + token.tag = "s" + token.nesting = -1 + token.markup = "~~" + token.content = "" + + if ( + state.tokens[endDelim.token - 1].type == "text" + and state.tokens[endDelim.token - 1].content == "~" + ): + loneMarkers.append(endDelim.token - 1) + + i += 1 + + # If a marker sequence has an odd number of characters, it's split + # like this: `~~~~~` -> `~` + `~~` + `~~`, leaving one marker at the + # start of the sequence. + # + # So, we have to move all those markers after subsequent s_close tags. + # + while loneMarkers: + i = loneMarkers.pop() + j = i + 1 + + while (j < len(state.tokens)) and (state.tokens[j].type == "s_close"): + j += 1 + + j -= 1 + + if i != j: + token = state.tokens[j] + state.tokens[j] = state.tokens[i] + state.tokens[i] = token + + +def postProcess(state: StateInline) -> None: + """Walk through delimiter list and replace text tokens with tags.""" + tokens_meta = state.tokens_meta + maximum = len(state.tokens_meta) + _postProcess(state, state.delimiters) + + curr = 0 + while curr < maximum: + try: + curr_meta = tokens_meta[curr] + except IndexError: + pass + else: + if curr_meta and "delimiters" in curr_meta: + _postProcess(state, curr_meta["delimiters"]) + curr += 1 diff --git a/venv/lib/python3.11/site-packages/markdown_it/rules_inline/text.py b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/text.py new file mode 100644 index 0000000..f306b2e --- /dev/null +++ b/venv/lib/python3.11/site-packages/markdown_it/rules_inline/text.py @@ -0,0 +1,53 @@ +# Skip text characters for text token, place those to pending buffer +# and increment current pos +from .state_inline import StateInline + +# Rule to skip pure text +# '{}$%@~+=:' reserved for extensions + +# !!!! Don't confuse with "Markdown ASCII Punctuation" chars +# http://spec.commonmark.org/0.15/#ascii-punctuation-character + + +_TerminatorChars = { + "\n", + "!", + "#", + "$", + "%", + "&", + "*", + "+", + "-", + ":", + "<", + "=", + ">", + "@", + "[", + "\\", + "]", + "^", + "_", + "`", + "{", + "}", + "~", +} + + +def text(state: StateInline, silent: bool) -> bool: + pos = state.pos + posMax = state.posMax + while (pos < posMax) and state.src[pos] not in _TerminatorChars: + pos += 1 + + if pos == state.pos: + return False + + if not silent: + state.pending += state.src[state.pos : pos] + + state.pos = pos + + return True |