diff options
author | cyfraeviolae <cyfraeviolae> | 2024-04-03 03:17:55 -0400 |
---|---|---|
committer | cyfraeviolae <cyfraeviolae> | 2024-04-03 03:17:55 -0400 |
commit | 12cf076118570eebbff08c6b3090e0d4798447a1 (patch) | |
tree | 3ba25e17e3c3a5e82316558ba3864b955919ff72 /venv/lib/python3.11/site-packages/jsbeautifier/javascript/tokenizer.py | |
parent | c45662ff3923b34614ddcc8feb9195541166dcc5 (diff) |
no venv
Diffstat (limited to 'venv/lib/python3.11/site-packages/jsbeautifier/javascript/tokenizer.py')
-rw-r--r-- | venv/lib/python3.11/site-packages/jsbeautifier/javascript/tokenizer.py | 634 |
1 files changed, 0 insertions, 634 deletions
diff --git a/venv/lib/python3.11/site-packages/jsbeautifier/javascript/tokenizer.py b/venv/lib/python3.11/site-packages/jsbeautifier/javascript/tokenizer.py deleted file mode 100644 index 3beba9f..0000000 --- a/venv/lib/python3.11/site-packages/jsbeautifier/javascript/tokenizer.py +++ /dev/null @@ -1,634 +0,0 @@ -# The MIT License (MIT) -# -# Copyright (c) 2007-2018 Einar Lielmanis, Liam Newman, and contributors. -# -# Permission is hereby granted, free of charge, to any person -# obtaining a copy of this software and associated documentation files -# (the "Software"), to deal in the Software without restriction, -# including without limitation the rights to use, copy, modify, merge, -# publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, -# subject to the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -import re -from ..core.inputscanner import InputScanner -from ..core.tokenizer import TokenTypes as BaseTokenTypes -from ..core.tokenizer import Tokenizer as BaseTokenizer -from ..core.tokenizer import TokenizerPatterns as BaseTokenizerPatterns -from ..core.directives import Directives - -from ..core.pattern import Pattern -from ..core.templatablepattern import TemplatablePattern - - -__all__ = ["TOKEN", "Tokenizer", "TokenTypes"] - - -class TokenTypes(BaseTokenTypes): - START_EXPR = "TK_START_EXPR" - END_EXPR = "TK_END_EXPR" - START_BLOCK = "TK_START_BLOCK" - END_BLOCK = "TK_END_BLOCK" - WORD = "TK_WORD" - RESERVED = "TK_RESERVED" - SEMICOLON = "TK_SEMICOLON" - STRING = "TK_STRING" - EQUALS = "TK_EQUALS" - OPERATOR = "TK_OPERATOR" - COMMA = "TK_COMMA" - BLOCK_COMMENT = "TK_BLOCK_COMMENT" - COMMENT = "TK_COMMENT" - DOT = "TK_DOT" - UNKNOWN = "TK_UNKNOWN" - - def __init__(self): - pass - - -TOKEN = TokenTypes() - -dot_pattern = re.compile(r"[^\d\.]") - -number_pattern = re.compile( - r"0[xX][0123456789abcdefABCDEF_]*n?|0[oO][01234567_]*n?|0[bB][01_]*n?|\d[\d_]*n|(?:\.\d[\d_]*|\d[\d_]*\.?[\d_]*)(?:[eE][+-]?[\d_]+)?" -) -digit = re.compile(r"[0-9]") - - -positionable_operators = frozenset( - ( - ">>> === !== &&= ??= ||= " - + "<< && >= ** != == <= >> || ?? |> " - + "< / - + > : & % ? ^ | *" - ).split(" ") -) - -punct = ( - ">>>= " - + "... >>= <<= === >>> !== **= &&= ??= ||= " - + "=> ^= :: /= << <= == && -= >= >> != -- += ** || ?? ++ %= &= *= |= |> " - + "= ! ? > < : / ^ - + * & % ~ |" -) - -punct = re.compile(r"([-[\]{}()*+?.,\\^$|#])").sub(r"\\\1", punct) -# ?. but not if followed by a number -punct = "\\?\\.(?!\\d) " + punct -punct = punct.replace(" ", "|") - -punct_pattern = re.compile(punct) - -# Words which always should start on a new line -line_starters = frozenset( - ( - "continue,try,throw,return,var,let,const,if,switch,case,default,for," - + "while,break,function,import,export" - ).split(",") -) -reserved_words = line_starters | frozenset( - [ - "do", - "in", - "of", - "else", - "get", - "set", - "new", - "catch", - "finally", - "typeof", - "yield", - "async", - "await", - "from", - "as", - "class", - "extends", - ] -) - -reserved_word_pattern = re.compile(r"^(?:" + "|".join(reserved_words) + r")$") - -directives_core = Directives(r"/\*", r"\*/") - -xmlRegExp = re.compile( - r'[\s\S]*?<(\/?)([-a-zA-Z:0-9_.]+|{[^}]+?}|!\[CDATA\[[^\]]*?\]\]|)(\s*{[^}]+?}|\s+[-a-zA-Z:0-9_.]+|\s+[-a-zA-Z:0-9_.]+\s*=\s*(\'[^\']*\'|"[^"]*"|{([^{}]|{[^}]+?})+?}))*\s*(\/?)\s*>' -) - - -class TokenizerPatterns(BaseTokenizerPatterns): - def __init__(self, input_scanner, acorn, options): - BaseTokenizerPatterns.__init__(self, input_scanner) - - # This is not pretty, but given how we did the version import - # it is the only way to do this without having setup.py fail on a missing - # six dependency. - six = __import__("six") - - # IMPORTANT: This string must be run through six to handle \u chars - self.whitespace = self.whitespace.matching( - six.u(r"\u00A0\u1680\u180e\u2000-\u200a\u202f\u205f\u3000\ufeff"), - six.u(r"\u2028\u2029"), - ) - - pattern = Pattern(input_scanner) - templatable = TemplatablePattern(input_scanner).read_options(options) - - self.identifier = templatable.starting_with(acorn.identifier).matching( - acorn.identifierMatch - ) - self.number = pattern.matching(number_pattern) - self.punct = pattern.matching(punct_pattern) - self.comment = pattern.starting_with(r"//").until(six.u(r"[\n\r\u2028\u2029]")) - self.block_comment = pattern.starting_with(r"/\*").until_after(r"\*/") - self.html_comment_start = pattern.matching(r"<!--") - self.html_comment_end = pattern.matching(r"-->") - self.include = pattern.starting_with(r"#include").until_after(acorn.lineBreak) - self.shebang = pattern.starting_with(r"#!").until_after(acorn.lineBreak) - - self.xml = pattern.matching(xmlRegExp) - - self.single_quote = templatable.until(six.u(r"['\\\n\r\u2028\u2029]")) - self.double_quote = templatable.until(six.u(r'["\\\n\r\u2028\u2029]')) - self.template_text = templatable.until(r"[`\\$]") - self.template_expression = templatable.until(r"[`}\\]") - - -class Tokenizer(BaseTokenizer): - positionable_operators = positionable_operators - line_starters = line_starters - - def __init__(self, input_string, opts): - BaseTokenizer.__init__(self, input_string, opts) - - import jsbeautifier.javascript.acorn as acorn - - self.acorn = acorn - - self.in_html_comment = False - self.has_char_escapes = False - - self._patterns = TokenizerPatterns(self._input, self.acorn, opts) - - def _reset(self): - self.in_html_comment = False - - def _is_comment(self, current_token): - return ( - current_token.type == TOKEN.COMMENT - or current_token.type == TOKEN.BLOCK_COMMENT - or current_token.type == TOKEN.UNKNOWN - ) - - def _is_opening(self, current_token): - return ( - current_token.type == TOKEN.START_BLOCK - or current_token.type == TOKEN.START_EXPR - ) - - def _is_closing(self, current_token, open_token): - return ( - current_token.type == TOKEN.END_BLOCK - or current_token.type == TOKEN.END_EXPR - ) and ( - open_token is not None - and ( - (current_token.text == "]" and open_token.text == "[") - or (current_token.text == ")" and open_token.text == "(") - or (current_token.text == "}" and open_token.text == "{") - ) - ) - - def _get_next_token(self, previous_token, open_token): - token = None - self._readWhitespace() - - c = self._input.peek() - if c is None: - token = self._create_token(TOKEN.EOF, "") - - token = token or self._read_non_javascript(c) - token = token or self._read_string(c) - token = token or self._read_pair( - c, self._input.peek(1) - ) # Issue #2062 hack for record type '#{' - token = token or self._read_word(previous_token) - token = token or self._read_singles(c) - token = token or self._read_comment(c) - token = token or self._read_regexp(c, previous_token) - token = token or self._read_xml(c, previous_token) - token = token or self._read_punctuation() - token = token or self._create_token(TOKEN.UNKNOWN, self._input.next()) - - return token - - def _read_singles(self, c): - token = None - - if c == "(" or c == "[": - token = self._create_token(TOKEN.START_EXPR, c) - elif c == ")" or c == "]": - token = self._create_token(TOKEN.END_EXPR, c) - elif c == "{": - token = self._create_token(TOKEN.START_BLOCK, c) - elif c == "}": - token = self._create_token(TOKEN.END_BLOCK, c) - elif c == ";": - token = self._create_token(TOKEN.SEMICOLON, c) - elif ( - c == "." - and self._input.peek(1) is not None - and bool(dot_pattern.match(self._input.peek(1))) - ): - token = self._create_token(TOKEN.DOT, c) - elif c == ",": - token = self._create_token(TOKEN.COMMA, c) - - if token is not None: - self._input.next() - - return token - - def _read_pair(self, c, d): - token = None - - if c == "#" and d == "{": - token = self._create_token(TOKEN.START_BLOCK, c + d) - - if token is not None: - self._input.next() - self._input.next() - - return token - - def _read_word(self, previous_token): - resulting_string = self._patterns.identifier.read() - - if bool(resulting_string): - resulting_string = re.sub(self.acorn.allLineBreaks, "\n", resulting_string) - if not ( - previous_token.type == TOKEN.DOT - or ( - previous_token.type == TOKEN.RESERVED - and (previous_token.text == "set" or previous_token.text == "get") - ) - ) and reserved_word_pattern.match(resulting_string): - if (resulting_string == "in" or resulting_string == "of") and ( - previous_token.type == TOKEN.WORD - or previous_token.type == TOKEN.STRING - ): - # in and of are operators, need to hack - return self._create_token(TOKEN.OPERATOR, resulting_string) - - return self._create_token(TOKEN.RESERVED, resulting_string) - - return self._create_token(TOKEN.WORD, resulting_string) - - resulting_string = self._patterns.number.read() - if resulting_string != "": - return self._create_token(TOKEN.WORD, resulting_string) - - def _read_comment(self, c): - token = None - if c == "/": - comment = "" - if self._input.peek(1) == "*": # peek /* .. */ comment - comment = self._patterns.block_comment.read() - - directives = directives_core.get_directives(comment) - if directives and directives.get("ignore") == "start": - comment += directives_core.readIgnored(self._input) - comment = re.sub(self.acorn.allLineBreaks, "\n", comment) - token = self._create_token(TOKEN.BLOCK_COMMENT, comment) - token.directives = directives - - elif self._input.peek(1) == "/": # peek // comment - comment = self._patterns.comment.read() - token = self._create_token(TOKEN.COMMENT, comment) - - return token - - def _read_string(self, c): - if c == "`" or c == "'" or c == '"': - resulting_string = self._input.next() - self.has_char_escapes = False - - if c == "`": - resulting_string += self.parse_string("`", True, "${") - else: - resulting_string += self.parse_string(c) - - if self.has_char_escapes and self._options.unescape_strings: - resulting_string = self.unescape_string(resulting_string) - - if self._input.peek() == c: - resulting_string += self._input.next() - - resulting_string = re.sub(self.acorn.allLineBreaks, "\n", resulting_string) - - return self._create_token(TOKEN.STRING, resulting_string) - - return None - - def _read_regexp(self, c, previous_token): - if c == "/" and self.allowRegExOrXML(previous_token): - # handle regexp - resulting_string = self._input.next() - esc = False - - in_char_class = False - while ( - self._input.hasNext() - and (esc or in_char_class or self._input.peek() != c) - and not self._input.testChar(self.acorn.newline) - ): - resulting_string += self._input.peek() - if not esc: - esc = self._input.peek() == "\\" - if self._input.peek() == "[": - in_char_class = True - elif self._input.peek() == "]": - in_char_class = False - else: - esc = False - self._input.next() - - if self._input.peek() == c: - resulting_string += self._input.next() - - if c == "/": - # regexps may have modifiers /regexp/MOD, so fetch those too - # Only [gim] are valid, but if the user puts in garbage, do - # what we can to take it. - resulting_string += self._input.read(self.acorn.identifier) - - return self._create_token(TOKEN.STRING, resulting_string) - - return None - - def _read_xml(self, c, previous_token): - if self._options.e4x and c == "<" and self.allowRegExOrXML(previous_token): - # handle e4x xml literals - xmlStr = "" - match = self._patterns.xml.read_match() - if match and not match.group(1): - rootTag = match.group(2) - rootTag = re.sub(r"^{\s+", "{", re.sub(r"\s+}$", "}", rootTag)) - isCurlyRoot = rootTag.startswith("{") - depth = 0 - while bool(match): - isEndTag = match.group(1) - tagName = match.group(2) - isSingletonTag = (match.groups()[-1] != "") or ( - match.group(2)[0:8] == "![CDATA[" - ) - if not isSingletonTag and ( - tagName == rootTag - or ( - isCurlyRoot - and re.sub(r"^{\s+", "{", re.sub(r"\s+}$", "}", tagName)) - ) - ): - if isEndTag: - depth -= 1 - else: - depth += 1 - - xmlStr += match.group(0) - if depth <= 0: - break - - match = self._patterns.xml.read_match() - - # if we didn't close correctly, keep unformatted. - if not match: - xmlStr += self._input.match(re.compile(r"[\s\S]*")).group(0) - - xmlStr = re.sub(self.acorn.allLineBreaks, "\n", xmlStr) - return self._create_token(TOKEN.STRING, xmlStr) - - return None - - def _read_non_javascript(self, c): - resulting_string = "" - - if c == "#": - # she-bang - if self._is_first_token(): - resulting_string = self._patterns.shebang.read() - if resulting_string: - return self._create_token( - TOKEN.UNKNOWN, resulting_string.strip() + "\n" - ) - - # handles extendscript #includes - resulting_string = self._patterns.include.read() - - if resulting_string: - return self._create_token( - TOKEN.UNKNOWN, resulting_string.strip() + "\n" - ) - - c = self._input.next() - - # Spidermonkey-specific sharp variables for circular references - # https://developer.mozilla.org/En/Sharp_variables_in_JavaScript - # http://mxr.mozilla.org/mozilla-central/source/js/src/jsscan.cpp - # around line 1935 - sharp = "#" - if self._input.hasNext() and self._input.testChar(digit): - while True: - c = self._input.next() - sharp += c - if (not self._input.hasNext()) or c == "#" or c == "=": - break - if c == "#": - pass - elif self._input.peek() == "[" and self._input.peek(1) == "]": - sharp += "[]" - self._input.next() - self._input.next() - elif self._input.peek() == "{" and self._input.peek(1) == "}": - sharp += "{}" - self._input.next() - self._input.next() - - return self._create_token(TOKEN.WORD, sharp) - - self._input.back() - - elif c == "<" and self._is_first_token(): - if self._patterns.html_comment_start.read(): - c = "<!--" - while self._input.hasNext() and not self._input.testChar( - self.acorn.newline - ): - c += self._input.next() - - self.in_html_comment = True - return self._create_token(TOKEN.COMMENT, c) - - elif ( - c == "-" and self.in_html_comment and self._patterns.html_comment_end.read() - ): - self.in_html_comment = False - return self._create_token(TOKEN.COMMENT, "-->") - - return None - - def _read_punctuation(self): - token = None - resulting_string = self._patterns.punct.read() - if resulting_string != "": - if resulting_string == "=": - token = self._create_token(TOKEN.EQUALS, resulting_string) - elif resulting_string == "?.": - token = self._create_token(TOKEN.DOT, resulting_string) - else: - token = self._create_token(TOKEN.OPERATOR, resulting_string) - - return token - - __regexTokens = { - TOKEN.COMMENT, - TOKEN.START_EXPR, - TOKEN.START_BLOCK, - TOKEN.START, - TOKEN.END_BLOCK, - TOKEN.OPERATOR, - TOKEN.EQUALS, - TOKEN.EOF, - TOKEN.SEMICOLON, - TOKEN.COMMA, - } - - def allowRegExOrXML(self, previous_token): - return ( - ( - previous_token.type == TOKEN.RESERVED - and previous_token.text - in {"return", "case", "throw", "else", "do", "typeof", "yield"} - ) - or ( - previous_token.type == TOKEN.END_EXPR - and previous_token.text == ")" - and previous_token.opened.previous.type == TOKEN.RESERVED - and previous_token.opened.previous.text in {"if", "while", "for"} - ) - or (previous_token.type in self.__regexTokens) - ) - - def parse_string(self, delimiter, allow_unescaped_newlines=False, start_sub=None): - if delimiter == "'": - pattern = self._patterns.single_quote - elif delimiter == '"': - pattern = self._patterns.double_quote - elif delimiter == "`": - pattern = self._patterns.template_text - elif delimiter == "}": - pattern = self._patterns.template_expression - resulting_string = pattern.read() - next = "" - while self._input.hasNext(): - next = self._input.next() - if next == delimiter or ( - not allow_unescaped_newlines and self.acorn.newline.match(next) - ): - self._input.back() - break - elif next == "\\" and self._input.hasNext(): - current_char = self._input.peek() - if current_char == "x" or current_char == "u": - self.has_char_escapes = True - elif current_char == "\r" and self._input.peek(1) == "\n": - self._input.next() - - next += self._input.next() - elif start_sub is not None: - if start_sub == "${" and next == "$" and self._input.peek() == "{": - next += self._input.next() - - if start_sub == next: - if delimiter == "`": - next += self.parse_string("}", allow_unescaped_newlines, "`") - else: - next += self.parse_string("`", allow_unescaped_newlines, "${") - - if self._input.hasNext(): - next += self._input.next() - - next += pattern.read() - resulting_string += next - return resulting_string - - def unescape_string(self, s): - # You think that a regex would work for this - # return s.replace(/\\x([0-9a-f]{2})/gi, function(match, val) { - # return String.fromCharCode(parseInt(val, 16)); - # }) - # However, dealing with '\xff', '\\xff', '\\\xff' makes this more fun. - out = self.acorn.six.u("") - escaped = 0 - - input_scan = InputScanner(s) - matched = None - - while input_scan.hasNext(): - # Keep any whitespace, non-slash characters - # also keep slash pairs. - matched = input_scan.match(re.compile(r"([\s]|[^\\]|\\\\)+")) - - if matched: - out += matched.group(0) - - if input_scan.peek() != "\\": - continue - - input_scan.next() - if input_scan.peek() == "x": - matched = input_scan.match(re.compile(r"x([0-9A-Fa-f]{2})")) - elif input_scan.peek() == "u": - matched = input_scan.match(re.compile(r"u([0-9A-Fa-f]{4})")) - if not matched: - matched = input_scan.match(re.compile(r"u\{([0-9A-Fa-f]+)\}")) - else: - out += "\\" - if input_scan.hasNext(): - out += input_scan.next() - continue - - # If there's some error decoding, return the original string - if not matched: - return s - - escaped = int(matched.group(1), 16) - - if escaped > 0x7E and escaped <= 0xFF and matched.group(0).startswith("x"): - # we bail out on \x7f..\xff, - # leaving whole string escaped, - # as it's probably completely binary - return s - elif escaped >= 0x00 and escaped < 0x20: - # leave 0x00...0x1f escaped - out += "\\" + matched.group(0) - elif escaped > 0x10FFFF: - # If the escape sequence is out of bounds, keep the original sequence and continue conversion - out += "\\" + matched.group(0) - elif escaped == 0x22 or escaped == 0x27 or escaped == 0x5C: - # single-quote, apostrophe, backslash - escape these - out += "\\" + chr(escaped) - else: - out += self.acorn.six.unichr(escaped) - - return out |