diff options
author | cyfraeviolae <cyfraeviolae> | 2024-04-03 03:10:44 -0400 |
---|---|---|
committer | cyfraeviolae <cyfraeviolae> | 2024-04-03 03:10:44 -0400 |
commit | 6d7ba58f880be618ade07f8ea080fe8c4bf8a896 (patch) | |
tree | b1c931051ffcebd2bd9d61d98d6233ffa289bbce /venv/lib/python3.11/site-packages/jsbeautifier/javascript | |
parent | 4f884c9abc32990b4061a1bb6997b4b37e58ea0b (diff) |
venv
Diffstat (limited to 'venv/lib/python3.11/site-packages/jsbeautifier/javascript')
10 files changed, 2478 insertions, 0 deletions
diff --git a/venv/lib/python3.11/site-packages/jsbeautifier/javascript/__init__.py b/venv/lib/python3.11/site-packages/jsbeautifier/javascript/__init__.py new file mode 100644 index 0000000..0c01055 --- /dev/null +++ b/venv/lib/python3.11/site-packages/jsbeautifier/javascript/__init__.py @@ -0,0 +1 @@ +# Empty file :) diff --git a/venv/lib/python3.11/site-packages/jsbeautifier/javascript/__pycache__/__init__.cpython-311.pyc b/venv/lib/python3.11/site-packages/jsbeautifier/javascript/__pycache__/__init__.cpython-311.pyc Binary files differnew file mode 100644 index 0000000..d4031d3 --- /dev/null +++ b/venv/lib/python3.11/site-packages/jsbeautifier/javascript/__pycache__/__init__.cpython-311.pyc diff --git a/venv/lib/python3.11/site-packages/jsbeautifier/javascript/__pycache__/acorn.cpython-311.pyc b/venv/lib/python3.11/site-packages/jsbeautifier/javascript/__pycache__/acorn.cpython-311.pyc Binary files differnew file mode 100644 index 0000000..62f71a3 --- /dev/null +++ b/venv/lib/python3.11/site-packages/jsbeautifier/javascript/__pycache__/acorn.cpython-311.pyc diff --git a/venv/lib/python3.11/site-packages/jsbeautifier/javascript/__pycache__/beautifier.cpython-311.pyc b/venv/lib/python3.11/site-packages/jsbeautifier/javascript/__pycache__/beautifier.cpython-311.pyc Binary files differnew file mode 100644 index 0000000..8d64886 --- /dev/null +++ b/venv/lib/python3.11/site-packages/jsbeautifier/javascript/__pycache__/beautifier.cpython-311.pyc diff --git a/venv/lib/python3.11/site-packages/jsbeautifier/javascript/__pycache__/options.cpython-311.pyc b/venv/lib/python3.11/site-packages/jsbeautifier/javascript/__pycache__/options.cpython-311.pyc Binary files differnew file mode 100644 index 0000000..69444c4 --- /dev/null +++ b/venv/lib/python3.11/site-packages/jsbeautifier/javascript/__pycache__/options.cpython-311.pyc diff --git a/venv/lib/python3.11/site-packages/jsbeautifier/javascript/__pycache__/tokenizer.cpython-311.pyc b/venv/lib/python3.11/site-packages/jsbeautifier/javascript/__pycache__/tokenizer.cpython-311.pyc Binary files differnew file mode 100644 index 0000000..d4e6105 --- /dev/null +++ b/venv/lib/python3.11/site-packages/jsbeautifier/javascript/__pycache__/tokenizer.cpython-311.pyc diff --git a/venv/lib/python3.11/site-packages/jsbeautifier/javascript/acorn.py b/venv/lib/python3.11/site-packages/jsbeautifier/javascript/acorn.py new file mode 100644 index 0000000..933376e --- /dev/null +++ b/venv/lib/python3.11/site-packages/jsbeautifier/javascript/acorn.py @@ -0,0 +1,94 @@ +import re + +# This section of code was translated to python from acorn (javascript). +# +# Acorn was written by Marijn Haverbeke and released under an MIT +# license. The Unicode regexps (for identifiers and whitespace) were +# taken from [Esprima](http://esprima.org) by Ariya Hidayat. +# +# Git repositories for Acorn are available at +# +# http://marijnhaverbeke.nl/git/acorn +# https://github.com/marijnh/acorn.git + +# This is not pretty, but given how we did the version import +# it is the only way to do this without having setup.py fail on a missing +# six dependency. +six = __import__("six") + +# ## Character categories + +# acorn used char codes to squeeze the last bit of performance out +# Beautifier is okay without that, so we're using regex +# permit #(23), $ (36), and @ (64). @ is used in ES7 decorators. +# 65 through 91 are uppercase letters. +# permit _ (95). +# 97 through 123 are lowercase letters. +_baseASCIIidentifierStartChars = six.u(r"\x23\x24\x40\x41-\x5a\x5f\x61-\x7a") + +# inside an identifier @ is not allowed but 0-9 are. +_baseASCIIidentifierChars = six.u(r"\x24\x30-\x39\x41-\x5a\x5f\x61-\x7a") + +# Big ugly regular expressions that match characters in the +# whitespace, identifier, and identifier-start categories. These +# are only applied when a character is found to actually have a +# code point above 128. +# IMPORTANT: These strings must be run through six to handle \u chars +_nonASCIIidentifierStartChars = six.u( + r"\xaa\xb5\xba\xc0-\xd6\xd8-\xf6\xf8-\u02c1\u02c6-\u02d1\u02e0-\u02e4\u02ec\u02ee\u0370-\u0374\u0376\u0377\u037a-\u037d\u0386\u0388-\u038a\u038c\u038e-\u03a1\u03a3-\u03f5\u03f7-\u0481\u048a-\u0527\u0531-\u0556\u0559\u0561-\u0587\u05d0-\u05ea\u05f0-\u05f2\u0620-\u064a\u066e\u066f\u0671-\u06d3\u06d5\u06e5\u06e6\u06ee\u06ef\u06fa-\u06fc\u06ff\u0710\u0712-\u072f\u074d-\u07a5\u07b1\u07ca-\u07ea\u07f4\u07f5\u07fa\u0800-\u0815\u081a\u0824\u0828\u0840-\u0858\u08a0\u08a2-\u08ac\u0904-\u0939\u093d\u0950\u0958-\u0961\u0971-\u0977\u0979-\u097f\u0985-\u098c\u098f\u0990\u0993-\u09a8\u09aa-\u09b0\u09b2\u09b6-\u09b9\u09bd\u09ce\u09dc\u09dd\u09df-\u09e1\u09f0\u09f1\u0a05-\u0a0a\u0a0f\u0a10\u0a13-\u0a28\u0a2a-\u0a30\u0a32\u0a33\u0a35\u0a36\u0a38\u0a39\u0a59-\u0a5c\u0a5e\u0a72-\u0a74\u0a85-\u0a8d\u0a8f-\u0a91\u0a93-\u0aa8\u0aaa-\u0ab0\u0ab2\u0ab3\u0ab5-\u0ab9\u0abd\u0ad0\u0ae0\u0ae1\u0b05-\u0b0c\u0b0f\u0b10\u0b13-\u0b28\u0b2a-\u0b30\u0b32\u0b33\u0b35-\u0b39\u0b3d\u0b5c\u0b5d\u0b5f-\u0b61\u0b71\u0b83\u0b85-\u0b8a\u0b8e-\u0b90\u0b92-\u0b95\u0b99\u0b9a\u0b9c\u0b9e\u0b9f\u0ba3\u0ba4\u0ba8-\u0baa\u0bae-\u0bb9\u0bd0\u0c05-\u0c0c\u0c0e-\u0c10\u0c12-\u0c28\u0c2a-\u0c33\u0c35-\u0c39\u0c3d\u0c58\u0c59\u0c60\u0c61\u0c85-\u0c8c\u0c8e-\u0c90\u0c92-\u0ca8\u0caa-\u0cb3\u0cb5-\u0cb9\u0cbd\u0cde\u0ce0\u0ce1\u0cf1\u0cf2\u0d05-\u0d0c\u0d0e-\u0d10\u0d12-\u0d3a\u0d3d\u0d4e\u0d60\u0d61\u0d7a-\u0d7f\u0d85-\u0d96\u0d9a-\u0db1\u0db3-\u0dbb\u0dbd\u0dc0-\u0dc6\u0e01-\u0e30\u0e32\u0e33\u0e40-\u0e46\u0e81\u0e82\u0e84\u0e87\u0e88\u0e8a\u0e8d\u0e94-\u0e97\u0e99-\u0e9f\u0ea1-\u0ea3\u0ea5\u0ea7\u0eaa\u0eab\u0ead-\u0eb0\u0eb2\u0eb3\u0ebd\u0ec0-\u0ec4\u0ec6\u0edc-\u0edf\u0f00\u0f40-\u0f47\u0f49-\u0f6c\u0f88-\u0f8c\u1000-\u102a\u103f\u1050-\u1055\u105a-\u105d\u1061\u1065\u1066\u106e-\u1070\u1075-\u1081\u108e\u10a0-\u10c5\u10c7\u10cd\u10d0-\u10fa\u10fc-\u1248\u124a-\u124d\u1250-\u1256\u1258\u125a-\u125d\u1260-\u1288\u128a-\u128d\u1290-\u12b0\u12b2-\u12b5\u12b8-\u12be\u12c0\u12c2-\u12c5\u12c8-\u12d6\u12d8-\u1310\u1312-\u1315\u1318-\u135a\u1380-\u138f\u13a0-\u13f4\u1401-\u166c\u166f-\u167f\u1681-\u169a\u16a0-\u16ea\u16ee-\u16f0\u1700-\u170c\u170e-\u1711\u1720-\u1731\u1740-\u1751\u1760-\u176c\u176e-\u1770\u1780-\u17b3\u17d7\u17dc\u1820-\u1877\u1880-\u18a8\u18aa\u18b0-\u18f5\u1900-\u191c\u1950-\u196d\u1970-\u1974\u1980-\u19ab\u19c1-\u19c7\u1a00-\u1a16\u1a20-\u1a54\u1aa7\u1b05-\u1b33\u1b45-\u1b4b\u1b83-\u1ba0\u1bae\u1baf\u1bba-\u1be5\u1c00-\u1c23\u1c4d-\u1c4f\u1c5a-\u1c7d\u1ce9-\u1cec\u1cee-\u1cf1\u1cf5\u1cf6\u1d00-\u1dbf\u1e00-\u1f15\u1f18-\u1f1d\u1f20-\u1f45\u1f48-\u1f4d\u1f50-\u1f57\u1f59\u1f5b\u1f5d\u1f5f-\u1f7d\u1f80-\u1fb4\u1fb6-\u1fbc\u1fbe\u1fc2-\u1fc4\u1fc6-\u1fcc\u1fd0-\u1fd3\u1fd6-\u1fdb\u1fe0-\u1fec\u1ff2-\u1ff4\u1ff6-\u1ffc\u2071\u207f\u2090-\u209c\u2102\u2107\u210a-\u2113\u2115\u2119-\u211d\u2124\u2126\u2128\u212a-\u212d\u212f-\u2139\u213c-\u213f\u2145-\u2149\u214e\u2160-\u2188\u2c00-\u2c2e\u2c30-\u2c5e\u2c60-\u2ce4\u2ceb-\u2cee\u2cf2\u2cf3\u2d00-\u2d25\u2d27\u2d2d\u2d30-\u2d67\u2d6f\u2d80-\u2d96\u2da0-\u2da6\u2da8-\u2dae\u2db0-\u2db6\u2db8-\u2dbe\u2dc0-\u2dc6\u2dc8-\u2dce\u2dd0-\u2dd6\u2dd8-\u2dde\u2e2f\u3005-\u3007\u3021-\u3029\u3031-\u3035\u3038-\u303c\u3041-\u3096\u309d-\u309f\u30a1-\u30fa\u30fc-\u30ff\u3105-\u312d\u3131-\u318e\u31a0-\u31ba\u31f0-\u31ff\u3400-\u4db5\u4e00-\u9fcc\ua000-\ua48c\ua4d0-\ua4fd\ua500-\ua60c\ua610-\ua61f\ua62a\ua62b\ua640-\ua66e\ua67f-\ua697\ua6a0-\ua6ef\ua717-\ua71f\ua722-\ua788\ua78b-\ua78e\ua790-\ua793\ua7a0-\ua7aa\ua7f8-\ua801\ua803-\ua805\ua807-\ua80a\ua80c-\ua822\ua840-\ua873\ua882-\ua8b3\ua8f2-\ua8f7\ua8fb\ua90a-\ua925\ua930-\ua946\ua960-\ua97c\ua984-\ua9b2\ua9cf\uaa00-\uaa28\uaa40-\uaa42\uaa44-\uaa4b\uaa60-\uaa76\uaa7a\uaa80-\uaaaf\uaab1\uaab5\uaab6\uaab9-\uaabd\uaac0\uaac2\uaadb-\uaadd\uaae0-\uaaea\uaaf2-\uaaf4\uab01-\uab06\uab09-\uab0e\uab11-\uab16\uab20-\uab26\uab28-\uab2e\uabc0-\uabe2\uac00-\ud7a3\ud7b0-\ud7c6\ud7cb-\ud7fb\uf900-\ufa6d\ufa70-\ufad9\ufb00-\ufb06\ufb13-\ufb17\ufb1d\ufb1f-\ufb28\ufb2a-\ufb36\ufb38-\ufb3c\ufb3e\ufb40\ufb41\ufb43\ufb44\ufb46-\ufbb1\ufbd3-\ufd3d\ufd50-\ufd8f\ufd92-\ufdc7\ufdf0-\ufdfb\ufe70-\ufe74\ufe76-\ufefc\uff21-\uff3a\uff41-\uff5a\uff66-\uffbe\uffc2-\uffc7\uffca-\uffcf\uffd2-\uffd7\uffda-\uffdc" +) +_nonASCIIidentifierChars = six.u( + r"\u0300-\u036f\u0483-\u0487\u0591-\u05bd\u05bf\u05c1\u05c2\u05c4\u05c5\u05c7\u0610-\u061a\u0620-\u0649\u0672-\u06d3\u06e7-\u06e8\u06fb-\u06fc\u0730-\u074a\u0800-\u0814\u081b-\u0823\u0825-\u0827\u0829-\u082d\u0840-\u0857\u08e4-\u08fe\u0900-\u0903\u093a-\u093c\u093e-\u094f\u0951-\u0957\u0962-\u0963\u0966-\u096f\u0981-\u0983\u09bc\u09be-\u09c4\u09c7\u09c8\u09d7\u09df-\u09e0\u0a01-\u0a03\u0a3c\u0a3e-\u0a42\u0a47\u0a48\u0a4b-\u0a4d\u0a51\u0a66-\u0a71\u0a75\u0a81-\u0a83\u0abc\u0abe-\u0ac5\u0ac7-\u0ac9\u0acb-\u0acd\u0ae2-\u0ae3\u0ae6-\u0aef\u0b01-\u0b03\u0b3c\u0b3e-\u0b44\u0b47\u0b48\u0b4b-\u0b4d\u0b56\u0b57\u0b5f-\u0b60\u0b66-\u0b6f\u0b82\u0bbe-\u0bc2\u0bc6-\u0bc8\u0bca-\u0bcd\u0bd7\u0be6-\u0bef\u0c01-\u0c03\u0c46-\u0c48\u0c4a-\u0c4d\u0c55\u0c56\u0c62-\u0c63\u0c66-\u0c6f\u0c82\u0c83\u0cbc\u0cbe-\u0cc4\u0cc6-\u0cc8\u0cca-\u0ccd\u0cd5\u0cd6\u0ce2-\u0ce3\u0ce6-\u0cef\u0d02\u0d03\u0d46-\u0d48\u0d57\u0d62-\u0d63\u0d66-\u0d6f\u0d82\u0d83\u0dca\u0dcf-\u0dd4\u0dd6\u0dd8-\u0ddf\u0df2\u0df3\u0e34-\u0e3a\u0e40-\u0e45\u0e50-\u0e59\u0eb4-\u0eb9\u0ec8-\u0ecd\u0ed0-\u0ed9\u0f18\u0f19\u0f20-\u0f29\u0f35\u0f37\u0f39\u0f41-\u0f47\u0f71-\u0f84\u0f86-\u0f87\u0f8d-\u0f97\u0f99-\u0fbc\u0fc6\u1000-\u1029\u1040-\u1049\u1067-\u106d\u1071-\u1074\u1082-\u108d\u108f-\u109d\u135d-\u135f\u170e-\u1710\u1720-\u1730\u1740-\u1750\u1772\u1773\u1780-\u17b2\u17dd\u17e0-\u17e9\u180b-\u180d\u1810-\u1819\u1920-\u192b\u1930-\u193b\u1951-\u196d\u19b0-\u19c0\u19c8-\u19c9\u19d0-\u19d9\u1a00-\u1a15\u1a20-\u1a53\u1a60-\u1a7c\u1a7f-\u1a89\u1a90-\u1a99\u1b46-\u1b4b\u1b50-\u1b59\u1b6b-\u1b73\u1bb0-\u1bb9\u1be6-\u1bf3\u1c00-\u1c22\u1c40-\u1c49\u1c5b-\u1c7d\u1cd0-\u1cd2\u1d00-\u1dbe\u1e01-\u1f15\u200c\u200d\u203f\u2040\u2054\u20d0-\u20dc\u20e1\u20e5-\u20f0\u2d81-\u2d96\u2de0-\u2dff\u3021-\u3028\u3099\u309a\ua640-\ua66d\ua674-\ua67d\ua69f\ua6f0-\ua6f1\ua7f8-\ua800\ua806\ua80b\ua823-\ua827\ua880-\ua881\ua8b4-\ua8c4\ua8d0-\ua8d9\ua8f3-\ua8f7\ua900-\ua909\ua926-\ua92d\ua930-\ua945\ua980-\ua983\ua9b3-\ua9c0\uaa00-\uaa27\uaa40-\uaa41\uaa4c-\uaa4d\uaa50-\uaa59\uaa7b\uaae0-\uaae9\uaaf2-\uaaf3\uabc0-\uabe1\uabec\uabed\uabf0-\uabf9\ufb20-\ufb28\ufe00-\ufe0f\ufe20-\ufe26\ufe33\ufe34\ufe4d-\ufe4f\uff10-\uff19\uff3f" +) +# _nonASCIIidentifierStart = re.compile("[" + _nonASCIIidentifierStartChars + "]") +# _nonASCIIidentifier = re.compile("[" + _nonASCIIidentifierStartChars + _nonASCIIidentifierChars + "]") + +_unicodeEscapeOrCodePoint = six.u(r"\\u[0-9a-fA-F]{4}|\\u\{[0-9a-fA-F]+\}") + +_identifierStart = ( + six.u("(?:") + + _unicodeEscapeOrCodePoint + + six.u("|[") + + _baseASCIIidentifierStartChars + + _nonASCIIidentifierStartChars + + six.u("])") +) +_identifierChars = ( + six.u("(?:") + + _unicodeEscapeOrCodePoint + + six.u("|[") + + _baseASCIIidentifierChars + + _nonASCIIidentifierStartChars + + _nonASCIIidentifierChars + + six.u("])*") +) + +identifier = re.compile(_identifierStart + _identifierChars) + +identifierStart = re.compile(_identifierStart) +identifierMatch = re.compile( + six.u("(?:") + + _unicodeEscapeOrCodePoint + + six.u("|[") + + _baseASCIIidentifierChars + + _nonASCIIidentifierStartChars + + _nonASCIIidentifierChars + + six.u("])+") +) + +_nonASCIIwhitespace = re.compile( + six.u(r"[\u1680\u180e\u2000-\u200a\u202f\u205f\u3000\ufeff]") +) + +# Whether a single character denotes a newline. +# IMPORTANT: This string must be run through six to handle \u chars +newline = re.compile(six.u(r"[\n\r\u2028\u2029]")) + +# Matches a whole line break (where CRLF is considered a single +# line break). Used to count lines. + +# in javascript, these two differ +# in python they are the same, different methods are called on them +# IMPORTANT: This string must be run through six to handle \u chars +lineBreak = re.compile(six.u(r"\r\n|[\n\r\u2028\u2029]")) +allLineBreaks = lineBreak diff --git a/venv/lib/python3.11/site-packages/jsbeautifier/javascript/beautifier.py b/venv/lib/python3.11/site-packages/jsbeautifier/javascript/beautifier.py new file mode 100644 index 0000000..fff41a0 --- /dev/null +++ b/venv/lib/python3.11/site-packages/jsbeautifier/javascript/beautifier.py @@ -0,0 +1,1647 @@ +# The MIT License (MIT) +# +# Copyright (c) 2007-2018 Einar Lielmanis, Liam Newman, and contributors. +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation files +# (the "Software"), to deal in the Software without restriction, +# including without limitation the rights to use, copy, modify, merge, +# publish, distribute, sublicense, and/or sell copies of the Software, +# and to permit persons to whom the Software is furnished to do so, +# subject to the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import re +import string +import copy +from ..core.token import Token +from .tokenizer import Tokenizer +from .tokenizer import TOKEN +from .options import BeautifierOptions +from ..core.output import Output + + +def default_options(): + return BeautifierOptions() + + +class BeautifierFlags: + def __init__(self, mode): + self.mode = mode + self.parent = None + self.last_token = Token(TOKEN.START_BLOCK, "") + self.last_word = "" + self.declaration_statement = False + self.declaration_assignment = False + self.multiline_frame = False + self.inline_frame = False + self.if_block = False + self.else_block = False + self.class_start_block = False + self.do_block = False + self.do_while = False + self.import_block = False + self.in_case = False + self.in_case_statement = False + self.case_body = False + self.case_block = False + self.indentation_level = 0 + self.alignment = 0 + self.line_indent_level = 0 + self.start_line_index = 0 + self.ternary_depth = 0 + + def apply_base(self, flags_base, added_newline): + next_indent_level = flags_base.indentation_level + if not added_newline and flags_base.line_indent_level > next_indent_level: + next_indent_level = flags_base.line_indent_level + + self.parent = flags_base + self.last_token = flags_base.last_token + self.last_word = flags_base.last_word + self.indentation_level = next_indent_level + + +OPERATOR_POSITION = { + "before_newline": "before-newline", + "after_newline": "after-newline", + "preserve_newline": "preserve-newline", +} +OPERATOR_POSITION_BEFORE_OR_PRESERVE = [ + OPERATOR_POSITION["before_newline"], + OPERATOR_POSITION["preserve_newline"], +] + + +class MODE: + ( + BlockStatement, + Statement, + ObjectLiteral, + ArrayLiteral, + ForInitializer, + Conditional, + Expression, + ) = range(7) + + +def remove_redundant_indentation(output, frame): + # This implementation is effective but has some issues: + # - can cause line wrap to happen too soon due to indent removal + # after wrap points are calculated + # These issues are minor compared to ugly indentation. + + if ( + frame.multiline_frame + or frame.mode == MODE.ForInitializer + or frame.mode == MODE.Conditional + ): + return + + # remove one indent from each line inside this section + output.remove_indent(frame.start_line_index) + + +def reserved_word(token, word): + return token and token.type == TOKEN.RESERVED and token.text == word + + +def reserved_array(token, words): + return token and token.type == TOKEN.RESERVED and token.text in words + + +_special_word_set = frozenset( + [ + "case", + "return", + "do", + "if", + "throw", + "else", + "await", + "break", + "continue", + "async", + ] +) + + +class Beautifier: + def __init__(self, opts=None): + import jsbeautifier.javascript.acorn as acorn + + self.acorn = acorn + self._options = BeautifierOptions(opts) + + self._blank_state() + + def _blank_state(self, js_source_text=None): + if js_source_text is None: + js_source_text = "" + + # internal flags + self._flags = None + self._previous_flags = None + self._flag_store = [] + self._tokens = None + + if self._options.eol == "auto": + self._options.eol = "\n" + if self.acorn.lineBreak.search(js_source_text or ""): + self._options.eol = self.acorn.lineBreak.search(js_source_text).group() + + baseIndentString = re.search("^[\t ]*", js_source_text).group(0) + self._last_last_text = "" # pre-last token text + + self._output = Output(self._options, baseIndentString) + # If testing the ignore directive, start with output disable set to + # true + self._output.raw = self._options.test_output_raw + + self.set_mode(MODE.BlockStatement) + return js_source_text + + def beautify(self, source_text="", opts=None): + if opts is not None: + self._options = BeautifierOptions(opts) + + source_text = source_text or "" + if self._options.disabled: + return source_text + + source_text = self._blank_state(source_text) + + source_text = self.unpack(source_text, self._options.eval_code) + + self._tokens = Tokenizer(source_text, self._options).tokenize() + + for current_token in self._tokens: + self.handle_token(current_token) + + self._last_last_text = self._flags.last_token.text + self._flags.last_token = current_token + + sweet_code = self._output.get_code(self._options.eol) + + return sweet_code + + def handle_token(self, current_token, preserve_statement_flags=False): + if current_token.type == TOKEN.START_EXPR: + self.handle_start_expr(current_token) + elif current_token.type == TOKEN.END_EXPR: + self.handle_end_expr(current_token) + elif current_token.type == TOKEN.START_BLOCK: + self.handle_start_block(current_token) + elif current_token.type == TOKEN.END_BLOCK: + self.handle_end_block(current_token) + elif current_token.type == TOKEN.WORD: + self.handle_word(current_token) + elif current_token.type == TOKEN.RESERVED: + self.handle_word(current_token) + elif current_token.type == TOKEN.SEMICOLON: + self.handle_semicolon(current_token) + elif current_token.type == TOKEN.STRING: + self.handle_string(current_token) + elif current_token.type == TOKEN.EQUALS: + self.handle_equals(current_token) + elif current_token.type == TOKEN.OPERATOR: + self.handle_operator(current_token) + elif current_token.type == TOKEN.COMMA: + self.handle_comma(current_token) + elif current_token.type == TOKEN.BLOCK_COMMENT: + self.handle_block_comment(current_token, preserve_statement_flags) + elif current_token.type == TOKEN.COMMENT: + self.handle_comment(current_token, preserve_statement_flags) + elif current_token.type == TOKEN.DOT: + self.handle_dot(current_token) + elif current_token.type == TOKEN.EOF: + self.handle_eof(current_token) + elif current_token.type == TOKEN.UNKNOWN: + self.handle_unknown(current_token, preserve_statement_flags) + else: + self.handle_unknown(current_token, preserve_statement_flags) + + def handle_whitespace_and_comments( + self, current_token, preserve_statement_flags=False + ): + newlines = current_token.newlines + keep_whitespace = self._options.keep_array_indentation and self.is_array( + self._flags.mode + ) + + if current_token.comments_before is not None: + for comment_token in current_token.comments_before: + # The cleanest handling of inline comments is to treat them + # as though they aren't there. + # Just continue formatting and the behavior should be logical. + # Also ignore unknown tokens. Again, this should result in better + # behavior. + self.handle_whitespace_and_comments( + comment_token, preserve_statement_flags + ) + self.handle_token(comment_token, preserve_statement_flags) + + if keep_whitespace: + for i in range(newlines): + self.print_newline(i > 0, preserve_statement_flags) + else: # not keep_whitespace + if ( + self._options.max_preserve_newlines != 0 + and newlines > self._options.max_preserve_newlines + ): + newlines = self._options.max_preserve_newlines + + if self._options.preserve_newlines and newlines > 1: + self.print_newline(False, preserve_statement_flags) + for i in range(1, newlines): + self.print_newline(True, preserve_statement_flags) + + def unpack(self, source, evalcode=False): + import jsbeautifier.unpackers as unpackers + + try: + return unpackers.run(source, evalcode) + except unpackers.UnpackingError: + return source + + def is_array(self, mode): + return mode == MODE.ArrayLiteral + + def is_expression(self, mode): + return ( + mode == MODE.Expression + or mode == MODE.ForInitializer + or mode == MODE.Conditional + ) + + _newline_restricted_tokens = frozenset( + ["async", "break", "continue", "return", "throw", "yield"] + ) + + def allow_wrap_or_preserved_newline(self, current_token, force_linewrap=False): + # never wrap the first token of a line. + if self._output.just_added_newline(): + return + + shouldPreserveOrForce = ( + self._options.preserve_newlines and current_token.newlines + ) or force_linewrap + operatorLogicApplies = ( + self._flags.last_token.text in Tokenizer.positionable_operators + or current_token.text in Tokenizer.positionable_operators + ) + + if operatorLogicApplies: + shouldPrintOperatorNewline = ( + self._flags.last_token.text in Tokenizer.positionable_operators + and self._options.operator_position + in OPERATOR_POSITION_BEFORE_OR_PRESERVE + ) or current_token.text in Tokenizer.positionable_operators + shouldPreserveOrForce = shouldPreserveOrForce and shouldPrintOperatorNewline + + if shouldPreserveOrForce: + self.print_newline(preserve_statement_flags=True) + elif self._options.wrap_line_length > 0: + if reserved_array(self._flags.last_token, self._newline_restricted_tokens): + # These tokens should never have a newline inserted between + # them and the following expression. + return + self._output.set_wrap_point() + + def print_newline(self, force_newline=False, preserve_statement_flags=False): + if not preserve_statement_flags: + if ( + self._flags.last_token.text != ";" + and self._flags.last_token.text != "," + and self._flags.last_token.text != "=" + and ( + self._flags.last_token.type != TOKEN.OPERATOR + or self._flags.last_token.text == "--" + or self._flags.last_token.text == "++" + ) + ): + next_token = self._tokens.peek() + while ( + self._flags.mode == MODE.Statement + and not (self._flags.if_block and reserved_word(next_token, "else")) + and not self._flags.do_block + ): + self.restore_mode() + + if self._output.add_new_line(force_newline): + self._flags.multiline_frame = True + + def print_token_line_indentation(self, current_token): + if self._output.just_added_newline(): + line = self._output.current_line + if ( + self._options.keep_array_indentation + and current_token.newlines + and (self.is_array(self._flags.mode) or current_token.text == "[") + ): + line.set_indent(-1) + line.push(current_token.whitespace_before) + self._output.space_before_token = False + elif self._output.set_indent( + self._flags.indentation_level, self._flags.alignment + ): + self._flags.line_indent_level = self._flags.indentation_level + + def print_token(self, current_token, s=None): + if self._output.raw: + self._output.add_raw_token(current_token) + return + + if ( + self._options.comma_first + and current_token.previous + and current_token.previous.type == TOKEN.COMMA + and self._output.just_added_newline() + ): + if self._output.previous_line.last() == ",": + # if the comma was already at the start of the line, + # pull back onto that line and reprint the indentation + popped = self._output.previous_line.pop() + if self._output.previous_line.is_empty(): + self._output.previous_line.push(popped) + self._output.trim(True) + self._output.current_line.pop() + self._output.trim() + + # add the comma in front of the next token + self.print_token_line_indentation(current_token) + self._output.add_token(",") + self._output.space_before_token = True + + if s is None: + s = current_token.text + + self.print_token_line_indentation(current_token) + self._output.non_breaking_space = True + self._output.add_token(s) + if self._output.previous_token_wrapped: + self._flags.multiline_frame = True + + def indent(self): + self._flags.indentation_level += 1 + self._output.set_indent(self._flags.indentation_level, self._flags.alignment) + + def deindent(self): + allow_deindent = self._flags.indentation_level > 0 and ( + (self._flags.parent is None) + or self._flags.indentation_level > self._flags.parent.indentation_level + ) + + if allow_deindent: + self._flags.indentation_level -= 1 + + self._output.set_indent(self._flags.indentation_level, self._flags.alignment) + + def set_mode(self, mode): + if self._flags: + self._flag_store.append(self._flags) + self._previous_flags = self._flags + else: + self._previous_flags = BeautifierFlags(mode) + + self._flags = BeautifierFlags(mode) + self._flags.apply_base(self._previous_flags, self._output.just_added_newline()) + self._flags.start_line_index = self._output.get_line_number() + + self._output.set_indent(self._flags.indentation_level, self._flags.alignment) + + def restore_mode(self): + if len(self._flag_store) > 0: + self._previous_flags = self._flags + self._flags = self._flag_store.pop() + if self._previous_flags.mode == MODE.Statement: + remove_redundant_indentation(self._output, self._previous_flags) + + self._output.set_indent(self._flags.indentation_level, self._flags.alignment) + + def start_of_object_property(self): + return ( + self._flags.parent.mode == MODE.ObjectLiteral + and self._flags.mode == MODE.Statement + and ( + (self._flags.last_token.text == ":" and self._flags.ternary_depth == 0) + or (reserved_array(self._flags.last_token, ["get", "set"])) + ) + ) + + def start_of_statement(self, current_token): + start = False + start = start or ( + reserved_array(self._flags.last_token, ["var", "let", "const"]) + and current_token.type == TOKEN.WORD + ) + start = start or reserved_word(self._flags.last_token, "do") + start = start or ( + not ( + self._flags.parent.mode == MODE.ObjectLiteral + and self._flags.mode == MODE.Statement + ) + and reserved_array(self._flags.last_token, self._newline_restricted_tokens) + and not current_token.newlines + ) + start = start or ( + reserved_word(self._flags.last_token, "else") + and not ( + reserved_word(current_token, "if") + and current_token.comments_before is None + ) + ) + start = start or ( + self._flags.last_token.type == TOKEN.END_EXPR + and ( + self._previous_flags.mode == MODE.ForInitializer + or self._previous_flags.mode == MODE.Conditional + ) + ) + start = start or ( + self._flags.last_token.type == TOKEN.WORD + and self._flags.mode == MODE.BlockStatement + and not self._flags.in_case + and not (current_token.text == "--" or current_token.text == "++") + and self._last_last_text != "function" + and current_token.type != TOKEN.WORD + and current_token.type != TOKEN.RESERVED + ) + start = start or ( + self._flags.mode == MODE.ObjectLiteral + and ( + (self._flags.last_token.text == ":" and self._flags.ternary_depth == 0) + or (reserved_array(self._flags.last_token, ["get", "set"])) + ) + ) + + if start: + self.set_mode(MODE.Statement) + self.indent() + + self.handle_whitespace_and_comments(current_token, True) + + # Issue #276: + # If starting a new statement with [if, for, while, do], push to a new line. + # if (a) if (b) if(c) d(); else e(); else f(); + if not self.start_of_object_property(): + self.allow_wrap_or_preserved_newline( + current_token, + reserved_array(current_token, ["do", "for", "if", "while"]), + ) + return True + else: + return False + + def handle_start_expr(self, current_token): + if self.start_of_statement(current_token): + # The conditional starts the statement if appropriate. + pass + else: + self.handle_whitespace_and_comments(current_token) + + next_mode = MODE.Expression + + if current_token.text == "[": + if ( + self._flags.last_token.type == TOKEN.WORD + or self._flags.last_token.text == ")" + ): + if reserved_array(self._flags.last_token, Tokenizer.line_starters): + self._output.space_before_token = True + self.print_token(current_token) + self.set_mode(next_mode) + self.indent() + if self._options.space_in_paren: + self._output.space_before_token = True + return + + next_mode = MODE.ArrayLiteral + + if self.is_array(self._flags.mode): + if self._flags.last_token.text == "[" or ( + self._flags.last_token.text == "," + and (self._last_last_text == "]" or self._last_last_text == "}") + ): + # ], [ goes to a new line + # }, [ goes to a new line + if not self._options.keep_array_indentation: + self.print_newline() + + if self._flags.last_token.type not in [ + TOKEN.START_EXPR, + TOKEN.END_EXPR, + TOKEN.WORD, + TOKEN.OPERATOR, + TOKEN.DOT, + ]: + self._output.space_before_token = True + + else: + if self._flags.last_token.type == TOKEN.RESERVED: + if self._flags.last_token.text == "for": + self._output.space_before_token = ( + self._options.space_before_conditional + ) + next_mode = MODE.ForInitializer + elif self._flags.last_token.text in ["if", "while", "switch"]: + self._output.space_before_token = ( + self._options.space_before_conditional + ) + next_mode = MODE.Conditional + elif self._flags.last_word in ["await", "async"]: + # Should be a space between await and an IIFE, or async and + # an arrow function + self._output.space_before_token = True + elif ( + self._flags.last_token.text == "import" + and current_token.whitespace_before == "" + ): + self._output.space_before_token = False + elif ( + self._flags.last_token.text in Tokenizer.line_starters + or self._flags.last_token.text == "catch" + ): + self._output.space_before_token = True + + elif self._flags.last_token.type in [TOKEN.EQUALS, TOKEN.OPERATOR]: + # Support of this kind of newline preservation: + # a = (b && + # (c || d)); + if not self.start_of_object_property(): + self.allow_wrap_or_preserved_newline(current_token) + elif self._flags.last_token.type == TOKEN.WORD: + self._output.space_before_token = False + # function name() vs function name () + # function* name() vs function* name () + # async name() vs async name () + # In ES6, you can also define the method properties of an object + # var obj = {a: function() {}} + # It can be abbreviated + # var obj = {a() {}} + # var obj = { a() {}} vs var obj = { a () {}} + # var obj = { * a() {}} vs var obj = { * a () {}} + peek_back_two = self._tokens.peek(-3) + if self._options.space_after_named_function and peek_back_two: + # peek starts at next character so -1 is current token + peek_back_three = self._tokens.peek(-4) + if reserved_array(peek_back_two, ["async", "function"]) or ( + peek_back_two.text == "*" + and reserved_array(peek_back_three, ["async", "function"]) + ): + self._output.space_before_token = True + elif self._flags.mode == MODE.ObjectLiteral: + if (peek_back_two.text == "{" or peek_back_two.text == ",") or ( + peek_back_two.text == "*" + and ( + peek_back_three.text == "{" + or peek_back_three.text == "," + ) + ): + self._output.space_before_token = True + elif self._flags.parent and self._flags.parent.class_start_block: + self._output.space_before_token = True + else: + # Support preserving wrapped arrow function expressions + # a.b('c', + # () => d.e + # ) + self.allow_wrap_or_preserved_newline(current_token) + + # function() vs function (), typeof() vs typeof () + # function*() vs function* (), yield*() vs yield* () + if ( + self._flags.last_token.type == TOKEN.RESERVED + and ( + self._flags.last_word == "function" + or self._flags.last_word == "typeof" + ) + ) or ( + self._flags.last_token.text == "*" + and ( + self._last_last_text in ["function", "yield"] + or ( + self._flags.mode == MODE.ObjectLiteral + and self._last_last_text in ["{", ","] + ) + ) + ): + self._output.space_before_token = ( + self._options.space_after_anon_function + ) + + if ( + self._flags.last_token.text == ";" + or self._flags.last_token.type == TOKEN.START_BLOCK + ): + self.print_newline() + elif ( + self._flags.last_token.type + in [TOKEN.END_EXPR, TOKEN.START_EXPR, TOKEN.END_BLOCK, TOKEN.COMMA] + or self._flags.last_token.text == "." + ): + # do nothing on (( and )( and ][ and ]( and .( + # TODO: Consider whether forcing this is required. Review failing + # tests when removed. + self.allow_wrap_or_preserved_newline(current_token, current_token.newlines) + + self.print_token(current_token) + self.set_mode(next_mode) + + if self._options.space_in_paren: + self._output.space_before_token = True + + # In all cases, if we newline while inside an expression it should be + # indented. + self.indent() + + def handle_end_expr(self, current_token): + # statements inside expressions are not valid syntax, but... + # statements must all be closed when their container closes + while self._flags.mode == MODE.Statement: + self.restore_mode() + + self.handle_whitespace_and_comments(current_token) + + if self._flags.multiline_frame: + self.allow_wrap_or_preserved_newline( + current_token, + current_token.text == "]" + and self.is_array(self._flags.mode) + and not self._options.keep_array_indentation, + ) + + if self._options.space_in_paren: + if ( + self._flags.last_token.type == TOKEN.START_EXPR + and not self._options.space_in_empty_paren + ): + # empty parens are always "()" and "[]", not "( )" or "[ ]" + self._output.space_before_token = False + self._output.trim() + else: + self._output.space_before_token = True + + self.deindent() + self.print_token(current_token) + self.restore_mode() + + remove_redundant_indentation(self._output, self._previous_flags) + + # do {} while () // no statement required after + if self._flags.do_while and self._previous_flags.mode == MODE.Conditional: + self._previous_flags.mode = MODE.Expression + self._flags.do_block = False + self._flags.do_while = False + + def handle_start_block(self, current_token): + self.handle_whitespace_and_comments(current_token) + + # Check if this is a BlockStatement that should be treated as a + # ObjectLiteral + next_token = self._tokens.peek() + second_token = self._tokens.peek(1) + if ( + self._flags.last_word == "switch" + and self._flags.last_token.type == TOKEN.END_EXPR + ): + self.set_mode(MODE.BlockStatement) + self._flags.in_case_statement = True + elif self._flags.case_body: + self.set_mode(MODE.BlockStatement) + elif second_token is not None and ( + ( + second_token.text in [":", ","] + and next_token.type in [TOKEN.STRING, TOKEN.WORD, TOKEN.RESERVED] + ) + or ( + next_token.text in ["get", "set", "..."] + and second_token.type in [TOKEN.WORD, TOKEN.RESERVED] + ) + ): + # We don't support TypeScript,but we didn't break it for a very long time. + # We'll try to keep not breaking it. + if self._last_last_text in [ + "class", + "interface", + ] and second_token.text not in [":", ","]: + self.set_mode(MODE.BlockStatement) + else: + self.set_mode(MODE.ObjectLiteral) + elif ( + self._flags.last_token.type == TOKEN.OPERATOR + and self._flags.last_token.text == "=>" + ): + # arrow function: (param1, paramN) => { statements } + self.set_mode(MODE.BlockStatement) + elif self._flags.last_token.type in [ + TOKEN.EQUALS, + TOKEN.START_EXPR, + TOKEN.COMMA, + TOKEN.OPERATOR, + ] or reserved_array( + self._flags.last_token, ["return", "throw", "import", "default"] + ): + # Detecting shorthand function syntax is difficult by scanning forward, + # so check the surrounding context. + # If the block is being returned, imported, export default, passed as arg, + # assigned with = or assigned in a nested object, treat as an + # ObjectLiteral. + self.set_mode(MODE.ObjectLiteral) + else: + self.set_mode(MODE.BlockStatement) + + if self._flags.last_token: + if reserved_array(self._flags.last_token.previous, ["class", "extends"]): + self._flags.class_start_block = True + + empty_braces = ( + (next_token is not None) + and next_token.comments_before is None + and next_token.text == "}" + ) + empty_anonymous_function = ( + empty_braces + and self._flags.last_word == "function" + and self._flags.last_token.type == TOKEN.END_EXPR + ) + + if ( + self._options.brace_preserve_inline + ): # check for inline, set inline_frame if so + # search forward for newline wanted inside this block + index = 0 + check_token = None + self._flags.inline_frame = True + do_loop = True + while do_loop: + index += 1 + check_token = self._tokens.peek(index - 1) + if check_token.newlines: + self._flags.inline_frame = False + + do_loop = check_token.type != TOKEN.EOF and not ( + check_token.type == TOKEN.END_BLOCK + and check_token.opened == current_token + ) + + if ( + self._options.brace_style == "expand" + or (self._options.brace_style == "none" and current_token.newlines) + ) and not self._flags.inline_frame: + if self._flags.last_token.type != TOKEN.OPERATOR and ( + empty_anonymous_function + or self._flags.last_token.type == TOKEN.EQUALS + or ( + reserved_array(self._flags.last_token, _special_word_set) + and self._flags.last_token.text != "else" + ) + ): + self._output.space_before_token = True + else: + self.print_newline(preserve_statement_flags=True) + else: # collapse || inline_frame + if self.is_array(self._previous_flags.mode) and ( + self._flags.last_token.type == TOKEN.START_EXPR + or self._flags.last_token.type == TOKEN.COMMA + ): + # if we're preserving inline, + # allow newline between comma and next brace. + if self._flags.inline_frame: + self.allow_wrap_or_preserved_newline(current_token) + self._flags.inline_frame = True + self._previous_flags.multiline_frame = ( + self._previous_flags.multiline_frame + or self._flags.multiline_frame + ) + self._flags.multiline_frame = False + elif self._flags.last_token.type == TOKEN.COMMA: + self._output.space_before_token = True + + elif self._flags.last_token.type not in [TOKEN.OPERATOR, TOKEN.START_EXPR]: + if ( + self._flags.last_token.type in [TOKEN.START_BLOCK, TOKEN.SEMICOLON] + and not self._flags.inline_frame + ): + self.print_newline() + else: + self._output.space_before_token = True + + self.print_token(current_token) + self.indent() + + # Except for specific cases, open braces are followed by a new line. + if not empty_braces and not ( + self._options.brace_preserve_inline and self._flags.inline_frame + ): + self.print_newline() + + def handle_end_block(self, current_token): + # statements must all be closed when their container closes + self.handle_whitespace_and_comments(current_token) + + while self._flags.mode == MODE.Statement: + self.restore_mode() + + empty_braces = self._flags.last_token.type == TOKEN.START_BLOCK + + # try inline_frame (only set if opt.braces-preserve-inline) first + if self._flags.inline_frame and not empty_braces: + self._output.space_before_token = True + elif self._options.brace_style == "expand": + if not empty_braces: + self.print_newline() + else: + # skip {} + if not empty_braces: + if ( + self.is_array(self._flags.mode) + and self._options.keep_array_indentation + ): + self._options.keep_array_indentation = False + self.print_newline() + self._options.keep_array_indentation = True + else: + self.print_newline() + + self.restore_mode() + self.print_token(current_token) + + def handle_word(self, current_token): + if current_token.type == TOKEN.RESERVED: + if ( + current_token.text in ["set", "get"] + and self._flags.mode != MODE.ObjectLiteral + ): + current_token.type = TOKEN.WORD + elif current_token.text == "import" and self._tokens.peek().text in [ + "(", + ".", + ]: + current_token.type = TOKEN.WORD + elif current_token.text in ["as", "from"] and not self._flags.import_block: + current_token.type = TOKEN.WORD + elif self._flags.mode == MODE.ObjectLiteral: + next_token = self._tokens.peek() + if next_token.text == ":": + current_token.type = TOKEN.WORD + + if self.start_of_statement(current_token): + # The conditional starts the statement if appropriate. + if ( + reserved_array(self._flags.last_token, ["var", "let", "const"]) + and current_token.type == TOKEN.WORD + ): + self._flags.declaration_statement = True + + elif ( + current_token.newlines + and not self.is_expression(self._flags.mode) + and ( + self._flags.last_token.type != TOKEN.OPERATOR + or ( + self._flags.last_token.text == "--" + or self._flags.last_token.text == "++" + ) + ) + and self._flags.last_token.type != TOKEN.EQUALS + and ( + self._options.preserve_newlines + or not reserved_array( + self._flags.last_token, ["var", "let", "const", "set", "get"] + ) + ) + ): + self.handle_whitespace_and_comments(current_token) + self.print_newline() + else: + self.handle_whitespace_and_comments(current_token) + + if self._flags.do_block and not self._flags.do_while: + if reserved_word(current_token, "while"): + # do {} ## while () + self._output.space_before_token = True + self.print_token(current_token) + self._output.space_before_token = True + self._flags.do_while = True + return + else: + # do {} should always have while as the next word. + # if we don't see the expected while, recover + self.print_newline() + self._flags.do_block = False + + # if may be followed by else, or not + # Bare/inline ifs are tricky + # Need to unwind the modes correctly: if (a) if (b) c(); else d(); else + # e(); + if self._flags.if_block: + if (not self._flags.else_block) and reserved_word(current_token, "else"): + self._flags.else_block = True + else: + while self._flags.mode == MODE.Statement: + self.restore_mode() + + self._flags.if_block = False + + if self._flags.in_case_statement and reserved_array( + current_token, ["case", "default"] + ): + self.print_newline() + if (not self._flags.case_block) and ( + self._flags.case_body or self._options.jslint_happy + ): + self.deindent() + self._flags.case_body = False + self.print_token(current_token) + self._flags.in_case = True + return + + if self._flags.last_token.type in [ + TOKEN.COMMA, + TOKEN.START_EXPR, + TOKEN.EQUALS, + TOKEN.OPERATOR, + ]: + if not self.start_of_object_property() and not ( + # start of object property is different for numeric values with +/- prefix operators + self._flags.last_token.text in ["+", "-"] + and self._last_last_text == ":" + and self._flags.parent.mode == MODE.ObjectLiteral + ): + self.allow_wrap_or_preserved_newline(current_token) + + if reserved_word(current_token, "function"): + if self._flags.last_token.text in ["}", ";"] or ( + self._output.just_added_newline() + and not ( + self._flags.last_token.text in ["(", "[", "{", ":", "=", ","] + or self._flags.last_token.type == TOKEN.OPERATOR + ) + ): + # make sure there is a nice clean space of at least one blank line + # before a new function definition, except in arrays + if ( + not self._output.just_added_blankline() + and current_token.comments_before is None + ): + self.print_newline() + self.print_newline(True) + + if ( + self._flags.last_token.type == TOKEN.RESERVED + or self._flags.last_token.type == TOKEN.WORD + ): + if reserved_array( + self._flags.last_token, ["get", "set", "new", "export"] + ) or reserved_array( + self._flags.last_token, self._newline_restricted_tokens + ): + self._output.space_before_token = True + elif ( + reserved_word(self._flags.last_token, "default") + and self._last_last_text == "export" + ): + self._output.space_before_token = True + elif self._flags.last_token.text == "declare": + # accomodates Typescript declare function formatting + self._output.space_before_token = True + else: + self.print_newline() + elif ( + self._flags.last_token.type == TOKEN.OPERATOR + or self._flags.last_token.text == "=" + ): + # foo = function + self._output.space_before_token = True + elif not self._flags.multiline_frame and ( + self.is_expression(self._flags.mode) or self.is_array(self._flags.mode) + ): + # (function + pass + else: + self.print_newline() + + self.print_token(current_token) + self._flags.last_word = current_token.text + return + + prefix = "NONE" + + if self._flags.last_token.type == TOKEN.END_BLOCK: + if self._previous_flags.inline_frame: + prefix = "SPACE" + elif not reserved_array( + current_token, ["else", "catch", "finally", "from"] + ): + prefix = "NEWLINE" + else: + if self._options.brace_style in ["expand", "end-expand"] or ( + self._options.brace_style == "none" and current_token.newlines + ): + prefix = "NEWLINE" + else: + prefix = "SPACE" + self._output.space_before_token = True + elif ( + self._flags.last_token.type == TOKEN.SEMICOLON + and self._flags.mode == MODE.BlockStatement + ): + # TODO: Should this be for STATEMENT as well? + prefix = "NEWLINE" + elif self._flags.last_token.type == TOKEN.SEMICOLON and self.is_expression( + self._flags.mode + ): + prefix = "SPACE" + elif self._flags.last_token.type == TOKEN.STRING: + prefix = "NEWLINE" + elif ( + self._flags.last_token.type == TOKEN.RESERVED + or self._flags.last_token.type == TOKEN.WORD + or ( + self._flags.last_token.text == "*" + and ( + self._last_last_text in ["function", "yield"] + or ( + self._flags.mode == MODE.ObjectLiteral + and self._last_last_text in ["{", ","] + ) + ) + ) + ): + prefix = "SPACE" + elif self._flags.last_token.type == TOKEN.START_BLOCK: + if self._flags.inline_frame: + prefix = "SPACE" + else: + prefix = "NEWLINE" + elif self._flags.last_token.type == TOKEN.END_EXPR: + self._output.space_before_token = True + prefix = "NEWLINE" + + if ( + reserved_array(current_token, Tokenizer.line_starters) + and self._flags.last_token.text != ")" + ): + if ( + self._flags.inline_frame + or self._flags.last_token.text == "else " + or self._flags.last_token.text == "export" + ): + prefix = "SPACE" + else: + prefix = "NEWLINE" + + if reserved_array(current_token, ["else", "catch", "finally"]): + if ( + ( + not ( + self._flags.last_token.type == TOKEN.END_BLOCK + and self._previous_flags.mode == MODE.BlockStatement + ) + ) + or self._options.brace_style == "expand" + or self._options.brace_style == "end-expand" + or (self._options.brace_style == "none" and current_token.newlines) + ) and not self._flags.inline_frame: + self.print_newline() + else: + self._output.trim(True) + # If we trimmed and there's something other than a close block before us + # put a newline back in. Handles '} // comment' scenario. + if self._output.current_line.last() != "}": + self.print_newline() + + self._output.space_before_token = True + + elif prefix == "NEWLINE": + if reserved_array(self._flags.last_token, _special_word_set): + # no newline between return nnn + self._output.space_before_token = True + elif self._flags.last_token.text == "declare" and reserved_array( + current_token, ["var", "let", "const"] + ): + # accomodates Typescript declare formatting + self._output.space_before_token = True + elif self._flags.last_token.type != TOKEN.END_EXPR: + if ( + self._flags.last_token.type != TOKEN.START_EXPR + or not (reserved_array(current_token, ["var", "let", "const"])) + ) and self._flags.last_token.text != ":": + # no need to force newline on VAR - + # for (var x = 0... + if ( + reserved_word(current_token, "if") + and self._flags.last_token.text == "else" + ): + self._output.space_before_token = True + else: + self.print_newline() + elif ( + reserved_array(current_token, Tokenizer.line_starters) + and self._flags.last_token.text != ")" + ): + self.print_newline() + elif ( + self._flags.multiline_frame + and self.is_array(self._flags.mode) + and self._flags.last_token.text == "," + and self._last_last_text == "}" + ): + self.print_newline() # }, in lists get a newline + elif prefix == "SPACE": + self._output.space_before_token = True + + if current_token.previous and ( + current_token.previous.type == TOKEN.WORD + or current_token.previous.type == TOKEN.RESERVED + ): + self._output.space_before_token = True + + self.print_token(current_token) + self._flags.last_word = current_token.text + + if current_token.type == TOKEN.RESERVED: + if current_token.text == "do": + self._flags.do_block = True + elif current_token.text == "if": + self._flags.if_block = True + elif current_token.text == "import": + self._flags.import_block = True + elif current_token.text == "from" and self._flags.import_block: + self._flags.import_block = False + + def handle_semicolon(self, current_token): + if self.start_of_statement(current_token): + # The conditional starts the statement if appropriate. + # Semicolon can be the start (and end) of a statement + self._output.space_before_token = False + else: + self.handle_whitespace_and_comments(current_token) + + next_token = self._tokens.peek() + while ( + self._flags.mode == MODE.Statement + and not (self._flags.if_block and reserved_word(next_token, "else")) + and not self._flags.do_block + ): + self.restore_mode() + + if self._flags.import_block: + self._flags.import_block = False + + self.print_token(current_token) + + def handle_string(self, current_token): + if ( + current_token.text[0] == "`" + and current_token.newlines == 0 + and current_token.whitespace_before == "" + and ( + self._flags.last_token.type == TOKEN.WORD + or current_token.previous.text == ")" + ) + ): + # This conditional checks backtick strings and makes no changes + pass + elif self.start_of_statement(current_token): + # The conditional starts the statement if appropriate. + # One difference - strings want at least a space before + self._output.space_before_token = True + else: + self.handle_whitespace_and_comments(current_token) + if ( + self._flags.last_token.type in [TOKEN.RESERVED, TOKEN.WORD] + or self._flags.inline_frame + ): + self._output.space_before_token = True + elif self._flags.last_token.type in [ + TOKEN.COMMA, + TOKEN.START_EXPR, + TOKEN.EQUALS, + TOKEN.OPERATOR, + ]: + if not self.start_of_object_property(): + self.allow_wrap_or_preserved_newline(current_token) + elif ( + current_token.text[0] == "`" + and self._flags.last_token.type == TOKEN.END_EXPR + and current_token.previous.text in ["]", ")"] + and current_token.newlines == 0 + ): + self._output.space_before_token = True + else: + self.print_newline() + + self.print_token(current_token) + + def handle_equals(self, current_token): + if self.start_of_statement(current_token): + # The conditional starts the statement if appropriate. + pass + else: + self.handle_whitespace_and_comments(current_token) + + if self._flags.declaration_statement: + # just got an '=' in a var-line, different line breaking rules will + # apply + self._flags.declaration_assignment = True + + self._output.space_before_token = True + self.print_token(current_token) + self._output.space_before_token = True + + def handle_comma(self, current_token): + self.handle_whitespace_and_comments(current_token, True) + + self.print_token(current_token) + self._output.space_before_token = True + + if self._flags.declaration_statement: + if self.is_expression(self._flags.parent.mode): + # do not break on comma, for ( var a = 1, b = 2 + self._flags.declaration_assignment = False + + if self._flags.declaration_assignment: + self._flags.declaration_assignment = False + self.print_newline(preserve_statement_flags=True) + elif self._options.comma_first: + # for comma-first, we want to allow a newline before the comma + # to turn into a newline after the comma, which we will fixup + # later + self.allow_wrap_or_preserved_newline(current_token) + + elif self._flags.mode == MODE.ObjectLiteral or ( + self._flags.mode == MODE.Statement + and self._flags.parent.mode == MODE.ObjectLiteral + ): + if self._flags.mode == MODE.Statement: + self.restore_mode() + + if not self._flags.inline_frame: + self.print_newline() + elif self._options.comma_first: + # EXPR or DO_BLOCK + # for comma-first, we want to allow a newline before the comma + # to turn into a newline after the comma, which we will fixup later + self.allow_wrap_or_preserved_newline(current_token) + + def handle_operator(self, current_token): + isGeneratorAsterisk = current_token.text == "*" and ( + reserved_array(self._flags.last_token, ["function", "yield"]) + or ( + self._flags.last_token.type + in [TOKEN.START_BLOCK, TOKEN.COMMA, TOKEN.END_BLOCK, TOKEN.SEMICOLON] + ) + ) + isUnary = current_token.text in ["+", "-"] and ( + self._flags.last_token.type + in [TOKEN.START_BLOCK, TOKEN.START_EXPR, TOKEN.EQUALS, TOKEN.OPERATOR] + or self._flags.last_token.text in Tokenizer.line_starters + or self._flags.last_token.text == "," + ) + + if self.start_of_statement(current_token): + # The conditional starts the statement if appropriate. + pass + else: + preserve_statement_flags = not isGeneratorAsterisk + self.handle_whitespace_and_comments(current_token, preserve_statement_flags) + + # hack for actionscript's import .*; + if current_token.text == "*" and self._flags.last_token.type == TOKEN.DOT: + self.print_token(current_token) + return + + if current_token.text == "::": + # no spaces around the exotic namespacing syntax operator + self.print_token(current_token) + return + + if current_token.text in ["-", "+"] and self.start_of_object_property(): + # numeric value with +/- symbol in front as a property + self.print_token(current_token) + return + + # Allow line wrapping between operators when operator_position is + # set to before or preserve + if ( + self._flags.last_token.type == TOKEN.OPERATOR + and self._options.operator_position in OPERATOR_POSITION_BEFORE_OR_PRESERVE + ): + self.allow_wrap_or_preserved_newline(current_token) + + if current_token.text == ":" and self._flags.in_case: + self.print_token(current_token) + self._flags.in_case = False + self._flags.case_body = True + if self._tokens.peek().type != TOKEN.START_BLOCK: + self.indent() + self.print_newline() + self._flags.case_block = False + else: + self._output.space_before_token = True + self._flags.case_block = True + + return + + space_before = True + space_after = True + in_ternary = False + + if current_token.text == ":": + if self._flags.ternary_depth == 0: + # Colon is invalid javascript outside of ternary and object, + # but do our best to guess what was meant. + space_before = False + else: + self._flags.ternary_depth -= 1 + in_ternary = True + elif current_token.text == "?": + self._flags.ternary_depth += 1 + + # let's handle the operator_position option prior to any conflicting + # logic + if ( + (not isUnary) + and (not isGeneratorAsterisk) + and self._options.preserve_newlines + and current_token.text in Tokenizer.positionable_operators + ): + isColon = current_token.text == ":" + isTernaryColon = isColon and in_ternary + isOtherColon = isColon and not in_ternary + + if self._options.operator_position == OPERATOR_POSITION["before_newline"]: + # if the current token is : and it's not a ternary statement + # then we set space_before to false + self._output.space_before_token = not isOtherColon + + self.print_token(current_token) + + if (not isColon) or isTernaryColon: + self.allow_wrap_or_preserved_newline(current_token) + + self._output.space_before_token = True + + return + + elif self._options.operator_position == OPERATOR_POSITION["after_newline"]: + # if the current token is anything but colon, or (via deduction) it's a colon and in a ternary statement, + # then print a newline. + self._output.space_before_token = True + + if (not isColon) or isTernaryColon: + if self._tokens.peek().newlines: + self.print_newline(preserve_statement_flags=True) + else: + self.allow_wrap_or_preserved_newline(current_token) + else: + self._output.space_before_token = False + + self.print_token(current_token) + + self._output.space_before_token = True + return + + elif ( + self._options.operator_position == OPERATOR_POSITION["preserve_newline"] + ): + if not isOtherColon: + self.allow_wrap_or_preserved_newline(current_token) + + # if we just added a newline, or the current token is : and it's not a ternary statement, + # then we set space_before to false + self._output.space_before_token = not ( + self._output.just_added_newline() or isOtherColon + ) + + self.print_token(current_token) + + self._output.space_before_token = True + return + + if isGeneratorAsterisk: + self.allow_wrap_or_preserved_newline(current_token) + space_before = False + next_token = self._tokens.peek() + space_after = next_token and next_token.type in [TOKEN.WORD, TOKEN.RESERVED] + elif current_token.text == "...": + self.allow_wrap_or_preserved_newline(current_token) + space_before = self._flags.last_token.type == TOKEN.START_BLOCK + space_after = False + elif current_token.text in ["--", "++", "!", "~"] or isUnary: + if ( + self._flags.last_token.type == TOKEN.COMMA + or self._flags.last_token.type == TOKEN.START_EXPR + ): + self.allow_wrap_or_preserved_newline(current_token) + + space_before = False + space_after = False + + # http://www.ecma-international.org/ecma-262/5.1/#sec-7.9.1 + # if there is a newline between -- or ++ and anything else we + # should preserve it. + if current_token.newlines and ( + current_token.text == "--" + or current_token.text == "++" + or current_token.text == "~" + ): + new_line_needed = ( + reserved_array(self._flags.last_token, _special_word_set) + and current_token.newlines + ) + if new_line_needed and ( + self._previous_flags.if_block or self._previous_flags.else_block + ): + self.restore_mode() + self.print_newline(new_line_needed, True) + + if self._flags.last_token.text == ";" and self.is_expression( + self._flags.mode + ): + # for (;; ++i) + # ^^ + space_before = True + + if self._flags.last_token.type == TOKEN.RESERVED: + space_before = True + elif self._flags.last_token.type == TOKEN.END_EXPR: + space_before = not ( + self._flags.last_token.text == "]" + and current_token.text in ["--", "++"] + ) + elif self._flags.last_token.type == TOKEN.OPERATOR: + # a++ + ++b + # a - -b + space_before = current_token.text in [ + "--", + "-", + "++", + "+", + ] and self._flags.last_token.text in ["--", "-", "++", "+"] + # + and - are not unary when preceeded by -- or ++ operator + # a-- + b + # a * +b + # a - -b + if current_token.text in ["-", "+"] and self._flags.last_token.text in [ + "--", + "++", + ]: + space_after = True + + if ( + ( + self._flags.mode == MODE.BlockStatement + and not self._flags.inline_frame + ) + or self._flags.mode == MODE.Statement + ) and self._flags.last_token.text in ["{", ";"]: + # { foo: --i } + # foo(): --bar + self.print_newline() + + if space_before: + self._output.space_before_token = True + + self.print_token(current_token) + + if space_after: + self._output.space_before_token = True + + def handle_block_comment(self, current_token, preserve_statement_flags): + if self._output.raw: + self._output.add_raw_token(current_token) + if ( + current_token.directives + and current_token.directives.get("preserve") == "end" + ): + # If we're testing the raw output behavior, do not allow a + # directive to turn it off. + self._output.raw = self._options.test_output_raw + return + + if current_token.directives: + self.print_newline(preserve_statement_flags=preserve_statement_flags) + self.print_token(current_token) + if current_token.directives.get("preserve") == "start": + self._output.raw = True + + self.print_newline(preserve_statement_flags=True) + return + + # inline block + if ( + not self.acorn.newline.search(current_token.text) + and not current_token.newlines + ): + self._output.space_before_token = True + self.print_token(current_token) + self._output.space_before_token = True + return + + lines = self.acorn.allLineBreaks.split(current_token.text) + javadoc = False + starless = False + last_indent = current_token.whitespace_before + last_indent_length = len(last_indent) + + # block comment starts with a new line + self.print_newline(preserve_statement_flags=preserve_statement_flags) + + # first line always indented + self.print_token(current_token, lines[0]) + self.print_newline(preserve_statement_flags=preserve_statement_flags) + + if len(lines) > 1: + lines = lines[1:] + javadoc = not any( + l for l in lines if (l.strip() == "" or (l.lstrip())[0] != "*") + ) + starless = all(l.startswith(last_indent) or l.strip() == "" for l in lines) + + if javadoc: + self._flags.alignment = 1 + + for line in lines: + if javadoc: + # javadoc: reformat and re-indent + self.print_token(current_token, line.lstrip()) + elif starless and len(line) > last_indent_length: + # starless: re-indent non-empty content, avoiding trim + self.print_token(current_token, line[last_indent_length:]) + else: + # normal comments output raw + self._output.current_line.set_indent(-1) + self._output.add_token(line) + + # for comments on their own line or more than one line, + # make sure there's a new line after + self.print_newline(preserve_statement_flags=preserve_statement_flags) + + self._flags.alignment = 0 + + def handle_comment(self, current_token, preserve_statement_flags): + if current_token.newlines: + self.print_newline(preserve_statement_flags=preserve_statement_flags) + + if not current_token.newlines: + self._output.trim(True) + + self._output.space_before_token = True + self.print_token(current_token) + self.print_newline(preserve_statement_flags=preserve_statement_flags) + + def handle_dot(self, current_token): + if self.start_of_statement(current_token): + # The conditional starts the statement if appropriate. + pass + else: + self.handle_whitespace_and_comments(current_token, True) + + if re.search("^([0-9])+$", self._flags.last_token.text): + self._output.space_before_token = True + + if reserved_array(self._flags.last_token, _special_word_set): + self._output.space_before_token = False + else: + # allow preserved newlines before dots in general + # force newlines on dots after close paren when break_chained - for + # bar().baz() + self.allow_wrap_or_preserved_newline( + current_token, + self._flags.last_token.text == ")" + and self._options.break_chained_methods, + ) + + # Only unindent chained method dot if this dot starts a new line. + # Otherwise the automatic extra indentation removal + # will handle any over indent + if self._options.unindent_chained_methods and self._output.just_added_newline(): + self.deindent() + + self.print_token(current_token) + + def handle_unknown(self, current_token, preserve_statement_flags): + self.print_token(current_token) + if current_token.text[-1] == "\n": + self.print_newline(preserve_statement_flags=preserve_statement_flags) + + def handle_eof(self, current_token): + # Unwind any open statements + while self._flags.mode == MODE.Statement: + self.restore_mode() + + self.handle_whitespace_and_comments(current_token) diff --git a/venv/lib/python3.11/site-packages/jsbeautifier/javascript/options.py b/venv/lib/python3.11/site-packages/jsbeautifier/javascript/options.py new file mode 100644 index 0000000..541a6d0 --- /dev/null +++ b/venv/lib/python3.11/site-packages/jsbeautifier/javascript/options.py @@ -0,0 +1,102 @@ +# The MIT License (MIT) +# +# Copyright (c) 2007-2018 Einar Lielmanis, Liam Newman, and contributors. +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation files +# (the "Software"), to deal in the Software without restriction, +# including without limitation the rights to use, copy, modify, merge, +# publish, distribute, sublicense, and/or sell copies of the Software, +# and to permit persons to whom the Software is furnished to do so, +# subject to the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + + +from ..core.options import Options as BaseOptions + +OPERATOR_POSITION = ["before-newline", "after-newline", "preserve-newline"] + + +class BeautifierOptions(BaseOptions): + def __init__(self, options=None): + BaseOptions.__init__(self, options, "js") + + self.css = None + self.js = None + self.html = None + + # compatibility, re + + raw_brace_style = getattr(self.raw_options, "brace_style", None) + if raw_brace_style == "expand-strict": # graceful handling of deprecated option + setattr(self.raw_options, "brace_style", "expand") + elif ( + raw_brace_style == "collapse-preserve-inline" + ): # graceful handling of deprecated option + setattr(self.raw_options, "brace_style", "collapse,preserve-inline") + # elif bool(self.raw_options.braces_on_own_line): # graceful handling of deprecated option + # raw_brace_style = "expand": "collapse" + # elif raw_brace_style is None: # Nothing exists to set it + # setattr(self.raw_options, 'brace_style', "collapse") + + # preserve-inline in delimited string will trigger brace_preserve_inline, everything + # else is considered a brace_style and the last one only will have an effect + + brace_style_split = self._get_selection_list( + "brace_style", + ["collapse", "expand", "end-expand", "none", "preserve-inline"], + ) + + # preserve-inline in delimited string will trigger brace_preserve_inline + # Everything else is considered a brace_style and the last one only will + # have an effect + # specify defaults in case one half of meta-option is missing + self.brace_preserve_inline = False + self.brace_style = "collapse" + + for bs in brace_style_split: + if bs == "preserve-inline": + self.brace_preserve_inline = True + else: + self.brace_style = bs + + self.unindent_chained_methods = self._get_boolean("unindent_chained_methods") + self.break_chained_methods = self._get_boolean("break_chained_methods") + self.space_in_paren = self._get_boolean("space_in_paren") + self.space_in_empty_paren = self._get_boolean("space_in_empty_paren") + self.jslint_happy = self._get_boolean("jslint_happy") + self.space_after_anon_function = self._get_boolean("space_after_anon_function") + self.space_after_named_function = self._get_boolean( + "space_after_named_function" + ) + self.keep_array_indentation = self._get_boolean("keep_array_indentation") + self.space_before_conditional = self._get_boolean( + "space_before_conditional", True + ) + self.unescape_strings = self._get_boolean("unescape_strings") + self.e4x = self._get_boolean("e4x") + self.comma_first = self._get_boolean("comma_first") + self.operator_position = self._get_selection( + "operator_position", OPERATOR_POSITION + ) + + # For testing of beautify preserve:start directive + self.test_output_raw = False + + # force opts.space_after_anon_function to true if opts.jslint_happy + if self.jslint_happy: + self.space_after_anon_function = True + + self.keep_quiet = False + self.eval_code = False diff --git a/venv/lib/python3.11/site-packages/jsbeautifier/javascript/tokenizer.py b/venv/lib/python3.11/site-packages/jsbeautifier/javascript/tokenizer.py new file mode 100644 index 0000000..3beba9f --- /dev/null +++ b/venv/lib/python3.11/site-packages/jsbeautifier/javascript/tokenizer.py @@ -0,0 +1,634 @@ +# The MIT License (MIT) +# +# Copyright (c) 2007-2018 Einar Lielmanis, Liam Newman, and contributors. +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation files +# (the "Software"), to deal in the Software without restriction, +# including without limitation the rights to use, copy, modify, merge, +# publish, distribute, sublicense, and/or sell copies of the Software, +# and to permit persons to whom the Software is furnished to do so, +# subject to the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import re +from ..core.inputscanner import InputScanner +from ..core.tokenizer import TokenTypes as BaseTokenTypes +from ..core.tokenizer import Tokenizer as BaseTokenizer +from ..core.tokenizer import TokenizerPatterns as BaseTokenizerPatterns +from ..core.directives import Directives + +from ..core.pattern import Pattern +from ..core.templatablepattern import TemplatablePattern + + +__all__ = ["TOKEN", "Tokenizer", "TokenTypes"] + + +class TokenTypes(BaseTokenTypes): + START_EXPR = "TK_START_EXPR" + END_EXPR = "TK_END_EXPR" + START_BLOCK = "TK_START_BLOCK" + END_BLOCK = "TK_END_BLOCK" + WORD = "TK_WORD" + RESERVED = "TK_RESERVED" + SEMICOLON = "TK_SEMICOLON" + STRING = "TK_STRING" + EQUALS = "TK_EQUALS" + OPERATOR = "TK_OPERATOR" + COMMA = "TK_COMMA" + BLOCK_COMMENT = "TK_BLOCK_COMMENT" + COMMENT = "TK_COMMENT" + DOT = "TK_DOT" + UNKNOWN = "TK_UNKNOWN" + + def __init__(self): + pass + + +TOKEN = TokenTypes() + +dot_pattern = re.compile(r"[^\d\.]") + +number_pattern = re.compile( + r"0[xX][0123456789abcdefABCDEF_]*n?|0[oO][01234567_]*n?|0[bB][01_]*n?|\d[\d_]*n|(?:\.\d[\d_]*|\d[\d_]*\.?[\d_]*)(?:[eE][+-]?[\d_]+)?" +) +digit = re.compile(r"[0-9]") + + +positionable_operators = frozenset( + ( + ">>> === !== &&= ??= ||= " + + "<< && >= ** != == <= >> || ?? |> " + + "< / - + > : & % ? ^ | *" + ).split(" ") +) + +punct = ( + ">>>= " + + "... >>= <<= === >>> !== **= &&= ??= ||= " + + "=> ^= :: /= << <= == && -= >= >> != -- += ** || ?? ++ %= &= *= |= |> " + + "= ! ? > < : / ^ - + * & % ~ |" +) + +punct = re.compile(r"([-[\]{}()*+?.,\\^$|#])").sub(r"\\\1", punct) +# ?. but not if followed by a number +punct = "\\?\\.(?!\\d) " + punct +punct = punct.replace(" ", "|") + +punct_pattern = re.compile(punct) + +# Words which always should start on a new line +line_starters = frozenset( + ( + "continue,try,throw,return,var,let,const,if,switch,case,default,for," + + "while,break,function,import,export" + ).split(",") +) +reserved_words = line_starters | frozenset( + [ + "do", + "in", + "of", + "else", + "get", + "set", + "new", + "catch", + "finally", + "typeof", + "yield", + "async", + "await", + "from", + "as", + "class", + "extends", + ] +) + +reserved_word_pattern = re.compile(r"^(?:" + "|".join(reserved_words) + r")$") + +directives_core = Directives(r"/\*", r"\*/") + +xmlRegExp = re.compile( + r'[\s\S]*?<(\/?)([-a-zA-Z:0-9_.]+|{[^}]+?}|!\[CDATA\[[^\]]*?\]\]|)(\s*{[^}]+?}|\s+[-a-zA-Z:0-9_.]+|\s+[-a-zA-Z:0-9_.]+\s*=\s*(\'[^\']*\'|"[^"]*"|{([^{}]|{[^}]+?})+?}))*\s*(\/?)\s*>' +) + + +class TokenizerPatterns(BaseTokenizerPatterns): + def __init__(self, input_scanner, acorn, options): + BaseTokenizerPatterns.__init__(self, input_scanner) + + # This is not pretty, but given how we did the version import + # it is the only way to do this without having setup.py fail on a missing + # six dependency. + six = __import__("six") + + # IMPORTANT: This string must be run through six to handle \u chars + self.whitespace = self.whitespace.matching( + six.u(r"\u00A0\u1680\u180e\u2000-\u200a\u202f\u205f\u3000\ufeff"), + six.u(r"\u2028\u2029"), + ) + + pattern = Pattern(input_scanner) + templatable = TemplatablePattern(input_scanner).read_options(options) + + self.identifier = templatable.starting_with(acorn.identifier).matching( + acorn.identifierMatch + ) + self.number = pattern.matching(number_pattern) + self.punct = pattern.matching(punct_pattern) + self.comment = pattern.starting_with(r"//").until(six.u(r"[\n\r\u2028\u2029]")) + self.block_comment = pattern.starting_with(r"/\*").until_after(r"\*/") + self.html_comment_start = pattern.matching(r"<!--") + self.html_comment_end = pattern.matching(r"-->") + self.include = pattern.starting_with(r"#include").until_after(acorn.lineBreak) + self.shebang = pattern.starting_with(r"#!").until_after(acorn.lineBreak) + + self.xml = pattern.matching(xmlRegExp) + + self.single_quote = templatable.until(six.u(r"['\\\n\r\u2028\u2029]")) + self.double_quote = templatable.until(six.u(r'["\\\n\r\u2028\u2029]')) + self.template_text = templatable.until(r"[`\\$]") + self.template_expression = templatable.until(r"[`}\\]") + + +class Tokenizer(BaseTokenizer): + positionable_operators = positionable_operators + line_starters = line_starters + + def __init__(self, input_string, opts): + BaseTokenizer.__init__(self, input_string, opts) + + import jsbeautifier.javascript.acorn as acorn + + self.acorn = acorn + + self.in_html_comment = False + self.has_char_escapes = False + + self._patterns = TokenizerPatterns(self._input, self.acorn, opts) + + def _reset(self): + self.in_html_comment = False + + def _is_comment(self, current_token): + return ( + current_token.type == TOKEN.COMMENT + or current_token.type == TOKEN.BLOCK_COMMENT + or current_token.type == TOKEN.UNKNOWN + ) + + def _is_opening(self, current_token): + return ( + current_token.type == TOKEN.START_BLOCK + or current_token.type == TOKEN.START_EXPR + ) + + def _is_closing(self, current_token, open_token): + return ( + current_token.type == TOKEN.END_BLOCK + or current_token.type == TOKEN.END_EXPR + ) and ( + open_token is not None + and ( + (current_token.text == "]" and open_token.text == "[") + or (current_token.text == ")" and open_token.text == "(") + or (current_token.text == "}" and open_token.text == "{") + ) + ) + + def _get_next_token(self, previous_token, open_token): + token = None + self._readWhitespace() + + c = self._input.peek() + if c is None: + token = self._create_token(TOKEN.EOF, "") + + token = token or self._read_non_javascript(c) + token = token or self._read_string(c) + token = token or self._read_pair( + c, self._input.peek(1) + ) # Issue #2062 hack for record type '#{' + token = token or self._read_word(previous_token) + token = token or self._read_singles(c) + token = token or self._read_comment(c) + token = token or self._read_regexp(c, previous_token) + token = token or self._read_xml(c, previous_token) + token = token or self._read_punctuation() + token = token or self._create_token(TOKEN.UNKNOWN, self._input.next()) + + return token + + def _read_singles(self, c): + token = None + + if c == "(" or c == "[": + token = self._create_token(TOKEN.START_EXPR, c) + elif c == ")" or c == "]": + token = self._create_token(TOKEN.END_EXPR, c) + elif c == "{": + token = self._create_token(TOKEN.START_BLOCK, c) + elif c == "}": + token = self._create_token(TOKEN.END_BLOCK, c) + elif c == ";": + token = self._create_token(TOKEN.SEMICOLON, c) + elif ( + c == "." + and self._input.peek(1) is not None + and bool(dot_pattern.match(self._input.peek(1))) + ): + token = self._create_token(TOKEN.DOT, c) + elif c == ",": + token = self._create_token(TOKEN.COMMA, c) + + if token is not None: + self._input.next() + + return token + + def _read_pair(self, c, d): + token = None + + if c == "#" and d == "{": + token = self._create_token(TOKEN.START_BLOCK, c + d) + + if token is not None: + self._input.next() + self._input.next() + + return token + + def _read_word(self, previous_token): + resulting_string = self._patterns.identifier.read() + + if bool(resulting_string): + resulting_string = re.sub(self.acorn.allLineBreaks, "\n", resulting_string) + if not ( + previous_token.type == TOKEN.DOT + or ( + previous_token.type == TOKEN.RESERVED + and (previous_token.text == "set" or previous_token.text == "get") + ) + ) and reserved_word_pattern.match(resulting_string): + if (resulting_string == "in" or resulting_string == "of") and ( + previous_token.type == TOKEN.WORD + or previous_token.type == TOKEN.STRING + ): + # in and of are operators, need to hack + return self._create_token(TOKEN.OPERATOR, resulting_string) + + return self._create_token(TOKEN.RESERVED, resulting_string) + + return self._create_token(TOKEN.WORD, resulting_string) + + resulting_string = self._patterns.number.read() + if resulting_string != "": + return self._create_token(TOKEN.WORD, resulting_string) + + def _read_comment(self, c): + token = None + if c == "/": + comment = "" + if self._input.peek(1) == "*": # peek /* .. */ comment + comment = self._patterns.block_comment.read() + + directives = directives_core.get_directives(comment) + if directives and directives.get("ignore") == "start": + comment += directives_core.readIgnored(self._input) + comment = re.sub(self.acorn.allLineBreaks, "\n", comment) + token = self._create_token(TOKEN.BLOCK_COMMENT, comment) + token.directives = directives + + elif self._input.peek(1) == "/": # peek // comment + comment = self._patterns.comment.read() + token = self._create_token(TOKEN.COMMENT, comment) + + return token + + def _read_string(self, c): + if c == "`" or c == "'" or c == '"': + resulting_string = self._input.next() + self.has_char_escapes = False + + if c == "`": + resulting_string += self.parse_string("`", True, "${") + else: + resulting_string += self.parse_string(c) + + if self.has_char_escapes and self._options.unescape_strings: + resulting_string = self.unescape_string(resulting_string) + + if self._input.peek() == c: + resulting_string += self._input.next() + + resulting_string = re.sub(self.acorn.allLineBreaks, "\n", resulting_string) + + return self._create_token(TOKEN.STRING, resulting_string) + + return None + + def _read_regexp(self, c, previous_token): + if c == "/" and self.allowRegExOrXML(previous_token): + # handle regexp + resulting_string = self._input.next() + esc = False + + in_char_class = False + while ( + self._input.hasNext() + and (esc or in_char_class or self._input.peek() != c) + and not self._input.testChar(self.acorn.newline) + ): + resulting_string += self._input.peek() + if not esc: + esc = self._input.peek() == "\\" + if self._input.peek() == "[": + in_char_class = True + elif self._input.peek() == "]": + in_char_class = False + else: + esc = False + self._input.next() + + if self._input.peek() == c: + resulting_string += self._input.next() + + if c == "/": + # regexps may have modifiers /regexp/MOD, so fetch those too + # Only [gim] are valid, but if the user puts in garbage, do + # what we can to take it. + resulting_string += self._input.read(self.acorn.identifier) + + return self._create_token(TOKEN.STRING, resulting_string) + + return None + + def _read_xml(self, c, previous_token): + if self._options.e4x and c == "<" and self.allowRegExOrXML(previous_token): + # handle e4x xml literals + xmlStr = "" + match = self._patterns.xml.read_match() + if match and not match.group(1): + rootTag = match.group(2) + rootTag = re.sub(r"^{\s+", "{", re.sub(r"\s+}$", "}", rootTag)) + isCurlyRoot = rootTag.startswith("{") + depth = 0 + while bool(match): + isEndTag = match.group(1) + tagName = match.group(2) + isSingletonTag = (match.groups()[-1] != "") or ( + match.group(2)[0:8] == "![CDATA[" + ) + if not isSingletonTag and ( + tagName == rootTag + or ( + isCurlyRoot + and re.sub(r"^{\s+", "{", re.sub(r"\s+}$", "}", tagName)) + ) + ): + if isEndTag: + depth -= 1 + else: + depth += 1 + + xmlStr += match.group(0) + if depth <= 0: + break + + match = self._patterns.xml.read_match() + + # if we didn't close correctly, keep unformatted. + if not match: + xmlStr += self._input.match(re.compile(r"[\s\S]*")).group(0) + + xmlStr = re.sub(self.acorn.allLineBreaks, "\n", xmlStr) + return self._create_token(TOKEN.STRING, xmlStr) + + return None + + def _read_non_javascript(self, c): + resulting_string = "" + + if c == "#": + # she-bang + if self._is_first_token(): + resulting_string = self._patterns.shebang.read() + if resulting_string: + return self._create_token( + TOKEN.UNKNOWN, resulting_string.strip() + "\n" + ) + + # handles extendscript #includes + resulting_string = self._patterns.include.read() + + if resulting_string: + return self._create_token( + TOKEN.UNKNOWN, resulting_string.strip() + "\n" + ) + + c = self._input.next() + + # Spidermonkey-specific sharp variables for circular references + # https://developer.mozilla.org/En/Sharp_variables_in_JavaScript + # http://mxr.mozilla.org/mozilla-central/source/js/src/jsscan.cpp + # around line 1935 + sharp = "#" + if self._input.hasNext() and self._input.testChar(digit): + while True: + c = self._input.next() + sharp += c + if (not self._input.hasNext()) or c == "#" or c == "=": + break + if c == "#": + pass + elif self._input.peek() == "[" and self._input.peek(1) == "]": + sharp += "[]" + self._input.next() + self._input.next() + elif self._input.peek() == "{" and self._input.peek(1) == "}": + sharp += "{}" + self._input.next() + self._input.next() + + return self._create_token(TOKEN.WORD, sharp) + + self._input.back() + + elif c == "<" and self._is_first_token(): + if self._patterns.html_comment_start.read(): + c = "<!--" + while self._input.hasNext() and not self._input.testChar( + self.acorn.newline + ): + c += self._input.next() + + self.in_html_comment = True + return self._create_token(TOKEN.COMMENT, c) + + elif ( + c == "-" and self.in_html_comment and self._patterns.html_comment_end.read() + ): + self.in_html_comment = False + return self._create_token(TOKEN.COMMENT, "-->") + + return None + + def _read_punctuation(self): + token = None + resulting_string = self._patterns.punct.read() + if resulting_string != "": + if resulting_string == "=": + token = self._create_token(TOKEN.EQUALS, resulting_string) + elif resulting_string == "?.": + token = self._create_token(TOKEN.DOT, resulting_string) + else: + token = self._create_token(TOKEN.OPERATOR, resulting_string) + + return token + + __regexTokens = { + TOKEN.COMMENT, + TOKEN.START_EXPR, + TOKEN.START_BLOCK, + TOKEN.START, + TOKEN.END_BLOCK, + TOKEN.OPERATOR, + TOKEN.EQUALS, + TOKEN.EOF, + TOKEN.SEMICOLON, + TOKEN.COMMA, + } + + def allowRegExOrXML(self, previous_token): + return ( + ( + previous_token.type == TOKEN.RESERVED + and previous_token.text + in {"return", "case", "throw", "else", "do", "typeof", "yield"} + ) + or ( + previous_token.type == TOKEN.END_EXPR + and previous_token.text == ")" + and previous_token.opened.previous.type == TOKEN.RESERVED + and previous_token.opened.previous.text in {"if", "while", "for"} + ) + or (previous_token.type in self.__regexTokens) + ) + + def parse_string(self, delimiter, allow_unescaped_newlines=False, start_sub=None): + if delimiter == "'": + pattern = self._patterns.single_quote + elif delimiter == '"': + pattern = self._patterns.double_quote + elif delimiter == "`": + pattern = self._patterns.template_text + elif delimiter == "}": + pattern = self._patterns.template_expression + resulting_string = pattern.read() + next = "" + while self._input.hasNext(): + next = self._input.next() + if next == delimiter or ( + not allow_unescaped_newlines and self.acorn.newline.match(next) + ): + self._input.back() + break + elif next == "\\" and self._input.hasNext(): + current_char = self._input.peek() + if current_char == "x" or current_char == "u": + self.has_char_escapes = True + elif current_char == "\r" and self._input.peek(1) == "\n": + self._input.next() + + next += self._input.next() + elif start_sub is not None: + if start_sub == "${" and next == "$" and self._input.peek() == "{": + next += self._input.next() + + if start_sub == next: + if delimiter == "`": + next += self.parse_string("}", allow_unescaped_newlines, "`") + else: + next += self.parse_string("`", allow_unescaped_newlines, "${") + + if self._input.hasNext(): + next += self._input.next() + + next += pattern.read() + resulting_string += next + return resulting_string + + def unescape_string(self, s): + # You think that a regex would work for this + # return s.replace(/\\x([0-9a-f]{2})/gi, function(match, val) { + # return String.fromCharCode(parseInt(val, 16)); + # }) + # However, dealing with '\xff', '\\xff', '\\\xff' makes this more fun. + out = self.acorn.six.u("") + escaped = 0 + + input_scan = InputScanner(s) + matched = None + + while input_scan.hasNext(): + # Keep any whitespace, non-slash characters + # also keep slash pairs. + matched = input_scan.match(re.compile(r"([\s]|[^\\]|\\\\)+")) + + if matched: + out += matched.group(0) + + if input_scan.peek() != "\\": + continue + + input_scan.next() + if input_scan.peek() == "x": + matched = input_scan.match(re.compile(r"x([0-9A-Fa-f]{2})")) + elif input_scan.peek() == "u": + matched = input_scan.match(re.compile(r"u([0-9A-Fa-f]{4})")) + if not matched: + matched = input_scan.match(re.compile(r"u\{([0-9A-Fa-f]+)\}")) + else: + out += "\\" + if input_scan.hasNext(): + out += input_scan.next() + continue + + # If there's some error decoding, return the original string + if not matched: + return s + + escaped = int(matched.group(1), 16) + + if escaped > 0x7E and escaped <= 0xFF and matched.group(0).startswith("x"): + # we bail out on \x7f..\xff, + # leaving whole string escaped, + # as it's probably completely binary + return s + elif escaped >= 0x00 and escaped < 0x20: + # leave 0x00...0x1f escaped + out += "\\" + matched.group(0) + elif escaped > 0x10FFFF: + # If the escape sequence is out of bounds, keep the original sequence and continue conversion + out += "\\" + matched.group(0) + elif escaped == 0x22 or escaped == 0x27 or escaped == 0x5C: + # single-quote, apostrophe, backslash - escape these + out += "\\" + chr(escaped) + else: + out += self.acorn.six.unichr(escaped) + + return out |