From 6d7ba58f880be618ade07f8ea080fe8c4bf8a896 Mon Sep 17 00:00:00 2001 From: cyfraeviolae Date: Wed, 3 Apr 2024 03:10:44 -0400 Subject: venv --- .../jsbeautifier/javascript/tokenizer.py | 634 +++++++++++++++++++++ 1 file changed, 634 insertions(+) create mode 100644 venv/lib/python3.11/site-packages/jsbeautifier/javascript/tokenizer.py (limited to 'venv/lib/python3.11/site-packages/jsbeautifier/javascript/tokenizer.py') diff --git a/venv/lib/python3.11/site-packages/jsbeautifier/javascript/tokenizer.py b/venv/lib/python3.11/site-packages/jsbeautifier/javascript/tokenizer.py new file mode 100644 index 0000000..3beba9f --- /dev/null +++ b/venv/lib/python3.11/site-packages/jsbeautifier/javascript/tokenizer.py @@ -0,0 +1,634 @@ +# The MIT License (MIT) +# +# Copyright (c) 2007-2018 Einar Lielmanis, Liam Newman, and contributors. +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation files +# (the "Software"), to deal in the Software without restriction, +# including without limitation the rights to use, copy, modify, merge, +# publish, distribute, sublicense, and/or sell copies of the Software, +# and to permit persons to whom the Software is furnished to do so, +# subject to the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import re +from ..core.inputscanner import InputScanner +from ..core.tokenizer import TokenTypes as BaseTokenTypes +from ..core.tokenizer import Tokenizer as BaseTokenizer +from ..core.tokenizer import TokenizerPatterns as BaseTokenizerPatterns +from ..core.directives import Directives + +from ..core.pattern import Pattern +from ..core.templatablepattern import TemplatablePattern + + +__all__ = ["TOKEN", "Tokenizer", "TokenTypes"] + + +class TokenTypes(BaseTokenTypes): + START_EXPR = "TK_START_EXPR" + END_EXPR = "TK_END_EXPR" + START_BLOCK = "TK_START_BLOCK" + END_BLOCK = "TK_END_BLOCK" + WORD = "TK_WORD" + RESERVED = "TK_RESERVED" + SEMICOLON = "TK_SEMICOLON" + STRING = "TK_STRING" + EQUALS = "TK_EQUALS" + OPERATOR = "TK_OPERATOR" + COMMA = "TK_COMMA" + BLOCK_COMMENT = "TK_BLOCK_COMMENT" + COMMENT = "TK_COMMENT" + DOT = "TK_DOT" + UNKNOWN = "TK_UNKNOWN" + + def __init__(self): + pass + + +TOKEN = TokenTypes() + +dot_pattern = re.compile(r"[^\d\.]") + +number_pattern = re.compile( + r"0[xX][0123456789abcdefABCDEF_]*n?|0[oO][01234567_]*n?|0[bB][01_]*n?|\d[\d_]*n|(?:\.\d[\d_]*|\d[\d_]*\.?[\d_]*)(?:[eE][+-]?[\d_]+)?" +) +digit = re.compile(r"[0-9]") + + +positionable_operators = frozenset( + ( + ">>> === !== &&= ??= ||= " + + "<< && >= ** != == <= >> || ?? |> " + + "< / - + > : & % ? ^ | *" + ).split(" ") +) + +punct = ( + ">>>= " + + "... >>= <<= === >>> !== **= &&= ??= ||= " + + "=> ^= :: /= << <= == && -= >= >> != -- += ** || ?? ++ %= &= *= |= |> " + + "= ! ? > < : / ^ - + * & % ~ |" +) + +punct = re.compile(r"([-[\]{}()*+?.,\\^$|#])").sub(r"\\\1", punct) +# ?. but not if followed by a number +punct = "\\?\\.(?!\\d) " + punct +punct = punct.replace(" ", "|") + +punct_pattern = re.compile(punct) + +# Words which always should start on a new line +line_starters = frozenset( + ( + "continue,try,throw,return,var,let,const,if,switch,case,default,for," + + "while,break,function,import,export" + ).split(",") +) +reserved_words = line_starters | frozenset( + [ + "do", + "in", + "of", + "else", + "get", + "set", + "new", + "catch", + "finally", + "typeof", + "yield", + "async", + "await", + "from", + "as", + "class", + "extends", + ] +) + +reserved_word_pattern = re.compile(r"^(?:" + "|".join(reserved_words) + r")$") + +directives_core = Directives(r"/\*", r"\*/") + +xmlRegExp = re.compile( + r'[\s\S]*?<(\/?)([-a-zA-Z:0-9_.]+|{[^}]+?}|!\[CDATA\[[^\]]*?\]\]|)(\s*{[^}]+?}|\s+[-a-zA-Z:0-9_.]+|\s+[-a-zA-Z:0-9_.]+\s*=\s*(\'[^\']*\'|"[^"]*"|{([^{}]|{[^}]+?})+?}))*\s*(\/?)\s*>' +) + + +class TokenizerPatterns(BaseTokenizerPatterns): + def __init__(self, input_scanner, acorn, options): + BaseTokenizerPatterns.__init__(self, input_scanner) + + # This is not pretty, but given how we did the version import + # it is the only way to do this without having setup.py fail on a missing + # six dependency. + six = __import__("six") + + # IMPORTANT: This string must be run through six to handle \u chars + self.whitespace = self.whitespace.matching( + six.u(r"\u00A0\u1680\u180e\u2000-\u200a\u202f\u205f\u3000\ufeff"), + six.u(r"\u2028\u2029"), + ) + + pattern = Pattern(input_scanner) + templatable = TemplatablePattern(input_scanner).read_options(options) + + self.identifier = templatable.starting_with(acorn.identifier).matching( + acorn.identifierMatch + ) + self.number = pattern.matching(number_pattern) + self.punct = pattern.matching(punct_pattern) + self.comment = pattern.starting_with(r"//").until(six.u(r"[\n\r\u2028\u2029]")) + self.block_comment = pattern.starting_with(r"/\*").until_after(r"\*/") + self.html_comment_start = pattern.matching(r"") + self.include = pattern.starting_with(r"#include").until_after(acorn.lineBreak) + self.shebang = pattern.starting_with(r"#!").until_after(acorn.lineBreak) + + self.xml = pattern.matching(xmlRegExp) + + self.single_quote = templatable.until(six.u(r"['\\\n\r\u2028\u2029]")) + self.double_quote = templatable.until(six.u(r'["\\\n\r\u2028\u2029]')) + self.template_text = templatable.until(r"[`\\$]") + self.template_expression = templatable.until(r"[`}\\]") + + +class Tokenizer(BaseTokenizer): + positionable_operators = positionable_operators + line_starters = line_starters + + def __init__(self, input_string, opts): + BaseTokenizer.__init__(self, input_string, opts) + + import jsbeautifier.javascript.acorn as acorn + + self.acorn = acorn + + self.in_html_comment = False + self.has_char_escapes = False + + self._patterns = TokenizerPatterns(self._input, self.acorn, opts) + + def _reset(self): + self.in_html_comment = False + + def _is_comment(self, current_token): + return ( + current_token.type == TOKEN.COMMENT + or current_token.type == TOKEN.BLOCK_COMMENT + or current_token.type == TOKEN.UNKNOWN + ) + + def _is_opening(self, current_token): + return ( + current_token.type == TOKEN.START_BLOCK + or current_token.type == TOKEN.START_EXPR + ) + + def _is_closing(self, current_token, open_token): + return ( + current_token.type == TOKEN.END_BLOCK + or current_token.type == TOKEN.END_EXPR + ) and ( + open_token is not None + and ( + (current_token.text == "]" and open_token.text == "[") + or (current_token.text == ")" and open_token.text == "(") + or (current_token.text == "}" and open_token.text == "{") + ) + ) + + def _get_next_token(self, previous_token, open_token): + token = None + self._readWhitespace() + + c = self._input.peek() + if c is None: + token = self._create_token(TOKEN.EOF, "") + + token = token or self._read_non_javascript(c) + token = token or self._read_string(c) + token = token or self._read_pair( + c, self._input.peek(1) + ) # Issue #2062 hack for record type '#{' + token = token or self._read_word(previous_token) + token = token or self._read_singles(c) + token = token or self._read_comment(c) + token = token or self._read_regexp(c, previous_token) + token = token or self._read_xml(c, previous_token) + token = token or self._read_punctuation() + token = token or self._create_token(TOKEN.UNKNOWN, self._input.next()) + + return token + + def _read_singles(self, c): + token = None + + if c == "(" or c == "[": + token = self._create_token(TOKEN.START_EXPR, c) + elif c == ")" or c == "]": + token = self._create_token(TOKEN.END_EXPR, c) + elif c == "{": + token = self._create_token(TOKEN.START_BLOCK, c) + elif c == "}": + token = self._create_token(TOKEN.END_BLOCK, c) + elif c == ";": + token = self._create_token(TOKEN.SEMICOLON, c) + elif ( + c == "." + and self._input.peek(1) is not None + and bool(dot_pattern.match(self._input.peek(1))) + ): + token = self._create_token(TOKEN.DOT, c) + elif c == ",": + token = self._create_token(TOKEN.COMMA, c) + + if token is not None: + self._input.next() + + return token + + def _read_pair(self, c, d): + token = None + + if c == "#" and d == "{": + token = self._create_token(TOKEN.START_BLOCK, c + d) + + if token is not None: + self._input.next() + self._input.next() + + return token + + def _read_word(self, previous_token): + resulting_string = self._patterns.identifier.read() + + if bool(resulting_string): + resulting_string = re.sub(self.acorn.allLineBreaks, "\n", resulting_string) + if not ( + previous_token.type == TOKEN.DOT + or ( + previous_token.type == TOKEN.RESERVED + and (previous_token.text == "set" or previous_token.text == "get") + ) + ) and reserved_word_pattern.match(resulting_string): + if (resulting_string == "in" or resulting_string == "of") and ( + previous_token.type == TOKEN.WORD + or previous_token.type == TOKEN.STRING + ): + # in and of are operators, need to hack + return self._create_token(TOKEN.OPERATOR, resulting_string) + + return self._create_token(TOKEN.RESERVED, resulting_string) + + return self._create_token(TOKEN.WORD, resulting_string) + + resulting_string = self._patterns.number.read() + if resulting_string != "": + return self._create_token(TOKEN.WORD, resulting_string) + + def _read_comment(self, c): + token = None + if c == "/": + comment = "" + if self._input.peek(1) == "*": # peek /* .. */ comment + comment = self._patterns.block_comment.read() + + directives = directives_core.get_directives(comment) + if directives and directives.get("ignore") == "start": + comment += directives_core.readIgnored(self._input) + comment = re.sub(self.acorn.allLineBreaks, "\n", comment) + token = self._create_token(TOKEN.BLOCK_COMMENT, comment) + token.directives = directives + + elif self._input.peek(1) == "/": # peek // comment + comment = self._patterns.comment.read() + token = self._create_token(TOKEN.COMMENT, comment) + + return token + + def _read_string(self, c): + if c == "`" or c == "'" or c == '"': + resulting_string = self._input.next() + self.has_char_escapes = False + + if c == "`": + resulting_string += self.parse_string("`", True, "${") + else: + resulting_string += self.parse_string(c) + + if self.has_char_escapes and self._options.unescape_strings: + resulting_string = self.unescape_string(resulting_string) + + if self._input.peek() == c: + resulting_string += self._input.next() + + resulting_string = re.sub(self.acorn.allLineBreaks, "\n", resulting_string) + + return self._create_token(TOKEN.STRING, resulting_string) + + return None + + def _read_regexp(self, c, previous_token): + if c == "/" and self.allowRegExOrXML(previous_token): + # handle regexp + resulting_string = self._input.next() + esc = False + + in_char_class = False + while ( + self._input.hasNext() + and (esc or in_char_class or self._input.peek() != c) + and not self._input.testChar(self.acorn.newline) + ): + resulting_string += self._input.peek() + if not esc: + esc = self._input.peek() == "\\" + if self._input.peek() == "[": + in_char_class = True + elif self._input.peek() == "]": + in_char_class = False + else: + esc = False + self._input.next() + + if self._input.peek() == c: + resulting_string += self._input.next() + + if c == "/": + # regexps may have modifiers /regexp/MOD, so fetch those too + # Only [gim] are valid, but if the user puts in garbage, do + # what we can to take it. + resulting_string += self._input.read(self.acorn.identifier) + + return self._create_token(TOKEN.STRING, resulting_string) + + return None + + def _read_xml(self, c, previous_token): + if self._options.e4x and c == "<" and self.allowRegExOrXML(previous_token): + # handle e4x xml literals + xmlStr = "" + match = self._patterns.xml.read_match() + if match and not match.group(1): + rootTag = match.group(2) + rootTag = re.sub(r"^{\s+", "{", re.sub(r"\s+}$", "}", rootTag)) + isCurlyRoot = rootTag.startswith("{") + depth = 0 + while bool(match): + isEndTag = match.group(1) + tagName = match.group(2) + isSingletonTag = (match.groups()[-1] != "") or ( + match.group(2)[0:8] == "![CDATA[" + ) + if not isSingletonTag and ( + tagName == rootTag + or ( + isCurlyRoot + and re.sub(r"^{\s+", "{", re.sub(r"\s+}$", "}", tagName)) + ) + ): + if isEndTag: + depth -= 1 + else: + depth += 1 + + xmlStr += match.group(0) + if depth <= 0: + break + + match = self._patterns.xml.read_match() + + # if we didn't close correctly, keep unformatted. + if not match: + xmlStr += self._input.match(re.compile(r"[\s\S]*")).group(0) + + xmlStr = re.sub(self.acorn.allLineBreaks, "\n", xmlStr) + return self._create_token(TOKEN.STRING, xmlStr) + + return None + + def _read_non_javascript(self, c): + resulting_string = "" + + if c == "#": + # she-bang + if self._is_first_token(): + resulting_string = self._patterns.shebang.read() + if resulting_string: + return self._create_token( + TOKEN.UNKNOWN, resulting_string.strip() + "\n" + ) + + # handles extendscript #includes + resulting_string = self._patterns.include.read() + + if resulting_string: + return self._create_token( + TOKEN.UNKNOWN, resulting_string.strip() + "\n" + ) + + c = self._input.next() + + # Spidermonkey-specific sharp variables for circular references + # https://developer.mozilla.org/En/Sharp_variables_in_JavaScript + # http://mxr.mozilla.org/mozilla-central/source/js/src/jsscan.cpp + # around line 1935 + sharp = "#" + if self._input.hasNext() and self._input.testChar(digit): + while True: + c = self._input.next() + sharp += c + if (not self._input.hasNext()) or c == "#" or c == "=": + break + if c == "#": + pass + elif self._input.peek() == "[" and self._input.peek(1) == "]": + sharp += "[]" + self._input.next() + self._input.next() + elif self._input.peek() == "{" and self._input.peek(1) == "}": + sharp += "{}" + self._input.next() + self._input.next() + + return self._create_token(TOKEN.WORD, sharp) + + self._input.back() + + elif c == "<" and self._is_first_token(): + if self._patterns.html_comment_start.read(): + c = "") + + return None + + def _read_punctuation(self): + token = None + resulting_string = self._patterns.punct.read() + if resulting_string != "": + if resulting_string == "=": + token = self._create_token(TOKEN.EQUALS, resulting_string) + elif resulting_string == "?.": + token = self._create_token(TOKEN.DOT, resulting_string) + else: + token = self._create_token(TOKEN.OPERATOR, resulting_string) + + return token + + __regexTokens = { + TOKEN.COMMENT, + TOKEN.START_EXPR, + TOKEN.START_BLOCK, + TOKEN.START, + TOKEN.END_BLOCK, + TOKEN.OPERATOR, + TOKEN.EQUALS, + TOKEN.EOF, + TOKEN.SEMICOLON, + TOKEN.COMMA, + } + + def allowRegExOrXML(self, previous_token): + return ( + ( + previous_token.type == TOKEN.RESERVED + and previous_token.text + in {"return", "case", "throw", "else", "do", "typeof", "yield"} + ) + or ( + previous_token.type == TOKEN.END_EXPR + and previous_token.text == ")" + and previous_token.opened.previous.type == TOKEN.RESERVED + and previous_token.opened.previous.text in {"if", "while", "for"} + ) + or (previous_token.type in self.__regexTokens) + ) + + def parse_string(self, delimiter, allow_unescaped_newlines=False, start_sub=None): + if delimiter == "'": + pattern = self._patterns.single_quote + elif delimiter == '"': + pattern = self._patterns.double_quote + elif delimiter == "`": + pattern = self._patterns.template_text + elif delimiter == "}": + pattern = self._patterns.template_expression + resulting_string = pattern.read() + next = "" + while self._input.hasNext(): + next = self._input.next() + if next == delimiter or ( + not allow_unescaped_newlines and self.acorn.newline.match(next) + ): + self._input.back() + break + elif next == "\\" and self._input.hasNext(): + current_char = self._input.peek() + if current_char == "x" or current_char == "u": + self.has_char_escapes = True + elif current_char == "\r" and self._input.peek(1) == "\n": + self._input.next() + + next += self._input.next() + elif start_sub is not None: + if start_sub == "${" and next == "$" and self._input.peek() == "{": + next += self._input.next() + + if start_sub == next: + if delimiter == "`": + next += self.parse_string("}", allow_unescaped_newlines, "`") + else: + next += self.parse_string("`", allow_unescaped_newlines, "${") + + if self._input.hasNext(): + next += self._input.next() + + next += pattern.read() + resulting_string += next + return resulting_string + + def unescape_string(self, s): + # You think that a regex would work for this + # return s.replace(/\\x([0-9a-f]{2})/gi, function(match, val) { + # return String.fromCharCode(parseInt(val, 16)); + # }) + # However, dealing with '\xff', '\\xff', '\\\xff' makes this more fun. + out = self.acorn.six.u("") + escaped = 0 + + input_scan = InputScanner(s) + matched = None + + while input_scan.hasNext(): + # Keep any whitespace, non-slash characters + # also keep slash pairs. + matched = input_scan.match(re.compile(r"([\s]|[^\\]|\\\\)+")) + + if matched: + out += matched.group(0) + + if input_scan.peek() != "\\": + continue + + input_scan.next() + if input_scan.peek() == "x": + matched = input_scan.match(re.compile(r"x([0-9A-Fa-f]{2})")) + elif input_scan.peek() == "u": + matched = input_scan.match(re.compile(r"u([0-9A-Fa-f]{4})")) + if not matched: + matched = input_scan.match(re.compile(r"u\{([0-9A-Fa-f]+)\}")) + else: + out += "\\" + if input_scan.hasNext(): + out += input_scan.next() + continue + + # If there's some error decoding, return the original string + if not matched: + return s + + escaped = int(matched.group(1), 16) + + if escaped > 0x7E and escaped <= 0xFF and matched.group(0).startswith("x"): + # we bail out on \x7f..\xff, + # leaving whole string escaped, + # as it's probably completely binary + return s + elif escaped >= 0x00 and escaped < 0x20: + # leave 0x00...0x1f escaped + out += "\\" + matched.group(0) + elif escaped > 0x10FFFF: + # If the escape sequence is out of bounds, keep the original sequence and continue conversion + out += "\\" + matched.group(0) + elif escaped == 0x22 or escaped == 0x27 or escaped == 0x5C: + # single-quote, apostrophe, backslash - escape these + out += "\\" + chr(escaped) + else: + out += self.acorn.six.unichr(escaped) + + return out -- cgit v1.2.3