diff options
author | cyfraeviolae <cyfraeviolae> | 2024-04-03 03:10:44 -0400 |
---|---|---|
committer | cyfraeviolae <cyfraeviolae> | 2024-04-03 03:10:44 -0400 |
commit | 6d7ba58f880be618ade07f8ea080fe8c4bf8a896 (patch) | |
tree | b1c931051ffcebd2bd9d61d98d6233ffa289bbce /venv/lib/python3.11/site-packages/pygments/regexopt.py | |
parent | 4f884c9abc32990b4061a1bb6997b4b37e58ea0b (diff) |
venv
Diffstat (limited to 'venv/lib/python3.11/site-packages/pygments/regexopt.py')
-rw-r--r-- | venv/lib/python3.11/site-packages/pygments/regexopt.py | 91 |
1 files changed, 91 insertions, 0 deletions
diff --git a/venv/lib/python3.11/site-packages/pygments/regexopt.py b/venv/lib/python3.11/site-packages/pygments/regexopt.py new file mode 100644 index 0000000..45223ec --- /dev/null +++ b/venv/lib/python3.11/site-packages/pygments/regexopt.py @@ -0,0 +1,91 @@ +""" + pygments.regexopt + ~~~~~~~~~~~~~~~~~ + + An algorithm that generates optimized regexes for matching long lists of + literal strings. + + :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +import re +from re import escape +from os.path import commonprefix +from itertools import groupby +from operator import itemgetter + +CS_ESCAPE = re.compile(r'[\[\^\\\-\]]') +FIRST_ELEMENT = itemgetter(0) + + +def make_charset(letters): + return '[' + CS_ESCAPE.sub(lambda m: '\\' + m.group(), ''.join(letters)) + ']' + + +def regex_opt_inner(strings, open_paren): + """Return a regex that matches any string in the sorted list of strings.""" + close_paren = open_paren and ')' or '' + # print strings, repr(open_paren) + if not strings: + # print '-> nothing left' + return '' + first = strings[0] + if len(strings) == 1: + # print '-> only 1 string' + return open_paren + escape(first) + close_paren + if not first: + # print '-> first string empty' + return open_paren + regex_opt_inner(strings[1:], '(?:') \ + + '?' + close_paren + if len(first) == 1: + # multiple one-char strings? make a charset + oneletter = [] + rest = [] + for s in strings: + if len(s) == 1: + oneletter.append(s) + else: + rest.append(s) + if len(oneletter) > 1: # do we have more than one oneletter string? + if rest: + # print '-> 1-character + rest' + return open_paren + regex_opt_inner(rest, '') + '|' \ + + make_charset(oneletter) + close_paren + # print '-> only 1-character' + return open_paren + make_charset(oneletter) + close_paren + prefix = commonprefix(strings) + if prefix: + plen = len(prefix) + # we have a prefix for all strings + # print '-> prefix:', prefix + return open_paren + escape(prefix) \ + + regex_opt_inner([s[plen:] for s in strings], '(?:') \ + + close_paren + # is there a suffix? + strings_rev = [s[::-1] for s in strings] + suffix = commonprefix(strings_rev) + if suffix: + slen = len(suffix) + # print '-> suffix:', suffix[::-1] + return open_paren \ + + regex_opt_inner(sorted(s[:-slen] for s in strings), '(?:') \ + + escape(suffix[::-1]) + close_paren + # recurse on common 1-string prefixes + # print '-> last resort' + return open_paren + \ + '|'.join(regex_opt_inner(list(group[1]), '') + for group in groupby(strings, lambda s: s[0] == first[0])) \ + + close_paren + + +def regex_opt(strings, prefix='', suffix=''): + """Return a compiled regex that matches any string in the given list. + + The strings to match must be literal strings, not regexes. They will be + regex-escaped. + + *prefix* and *suffix* are pre- and appended to the final regex. + """ + strings = sorted(strings) + return prefix + regex_opt_inner(strings, '(') + suffix |