diff options
author | cyfraeviolae <cyfraeviolae> | 2024-04-03 03:10:44 -0400 |
---|---|---|
committer | cyfraeviolae <cyfraeviolae> | 2024-04-03 03:10:44 -0400 |
commit | 6d7ba58f880be618ade07f8ea080fe8c4bf8a896 (patch) | |
tree | b1c931051ffcebd2bd9d61d98d6233ffa289bbce /venv/lib/python3.11/site-packages/faker/sphinx | |
parent | 4f884c9abc32990b4061a1bb6997b4b37e58ea0b (diff) |
venv
Diffstat (limited to 'venv/lib/python3.11/site-packages/faker/sphinx')
-rw-r--r-- | venv/lib/python3.11/site-packages/faker/sphinx/__init__.py | 0 | ||||
-rw-r--r-- | venv/lib/python3.11/site-packages/faker/sphinx/__pycache__/__init__.cpython-311.pyc | bin | 0 -> 196 bytes | |||
-rw-r--r-- | venv/lib/python3.11/site-packages/faker/sphinx/__pycache__/autodoc.cpython-311.pyc | bin | 0 -> 1334 bytes | |||
-rw-r--r-- | venv/lib/python3.11/site-packages/faker/sphinx/__pycache__/docstring.cpython-311.pyc | bin | 0 -> 11983 bytes | |||
-rw-r--r-- | venv/lib/python3.11/site-packages/faker/sphinx/__pycache__/documentor.cpython-311.pyc | bin | 0 -> 10310 bytes | |||
-rw-r--r-- | venv/lib/python3.11/site-packages/faker/sphinx/__pycache__/validator.cpython-311.pyc | bin | 0 -> 7769 bytes | |||
-rw-r--r-- | venv/lib/python3.11/site-packages/faker/sphinx/autodoc.py | 19 | ||||
-rw-r--r-- | venv/lib/python3.11/site-packages/faker/sphinx/docstring.py | 224 | ||||
-rw-r--r-- | venv/lib/python3.11/site-packages/faker/sphinx/documentor.py | 170 | ||||
-rw-r--r-- | venv/lib/python3.11/site-packages/faker/sphinx/validator.py | 151 |
10 files changed, 564 insertions, 0 deletions
diff --git a/venv/lib/python3.11/site-packages/faker/sphinx/__init__.py b/venv/lib/python3.11/site-packages/faker/sphinx/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/venv/lib/python3.11/site-packages/faker/sphinx/__init__.py diff --git a/venv/lib/python3.11/site-packages/faker/sphinx/__pycache__/__init__.cpython-311.pyc b/venv/lib/python3.11/site-packages/faker/sphinx/__pycache__/__init__.cpython-311.pyc Binary files differnew file mode 100644 index 0000000..932cf92 --- /dev/null +++ b/venv/lib/python3.11/site-packages/faker/sphinx/__pycache__/__init__.cpython-311.pyc diff --git a/venv/lib/python3.11/site-packages/faker/sphinx/__pycache__/autodoc.cpython-311.pyc b/venv/lib/python3.11/site-packages/faker/sphinx/__pycache__/autodoc.cpython-311.pyc Binary files differnew file mode 100644 index 0000000..a750b3c --- /dev/null +++ b/venv/lib/python3.11/site-packages/faker/sphinx/__pycache__/autodoc.cpython-311.pyc diff --git a/venv/lib/python3.11/site-packages/faker/sphinx/__pycache__/docstring.cpython-311.pyc b/venv/lib/python3.11/site-packages/faker/sphinx/__pycache__/docstring.cpython-311.pyc Binary files differnew file mode 100644 index 0000000..af17c1f --- /dev/null +++ b/venv/lib/python3.11/site-packages/faker/sphinx/__pycache__/docstring.cpython-311.pyc diff --git a/venv/lib/python3.11/site-packages/faker/sphinx/__pycache__/documentor.cpython-311.pyc b/venv/lib/python3.11/site-packages/faker/sphinx/__pycache__/documentor.cpython-311.pyc Binary files differnew file mode 100644 index 0000000..8e275af --- /dev/null +++ b/venv/lib/python3.11/site-packages/faker/sphinx/__pycache__/documentor.cpython-311.pyc diff --git a/venv/lib/python3.11/site-packages/faker/sphinx/__pycache__/validator.cpython-311.pyc b/venv/lib/python3.11/site-packages/faker/sphinx/__pycache__/validator.cpython-311.pyc Binary files differnew file mode 100644 index 0000000..e36eb63 --- /dev/null +++ b/venv/lib/python3.11/site-packages/faker/sphinx/__pycache__/validator.cpython-311.pyc diff --git a/venv/lib/python3.11/site-packages/faker/sphinx/autodoc.py b/venv/lib/python3.11/site-packages/faker/sphinx/autodoc.py new file mode 100644 index 0000000..8e7154e --- /dev/null +++ b/venv/lib/python3.11/site-packages/faker/sphinx/autodoc.py @@ -0,0 +1,19 @@ +# coding=utf-8 +from faker.sphinx.docstring import ProviderMethodDocstring +from faker.sphinx.documentor import write_provider_docs + + +def _create_source_files(app): + write_provider_docs() + + +def _process_docstring(app, what, name, obj, options, lines): + docstring = ProviderMethodDocstring(app, what, name, obj, options, lines) + if not docstring.skipped: + lines[:] = docstring.lines[:] + + +def setup(app): + app.setup_extension("sphinx.ext.autodoc") + app.connect("builder-inited", _create_source_files) + app.connect("autodoc-process-docstring", _process_docstring) diff --git a/venv/lib/python3.11/site-packages/faker/sphinx/docstring.py b/venv/lib/python3.11/site-packages/faker/sphinx/docstring.py new file mode 100644 index 0000000..813dbf4 --- /dev/null +++ b/venv/lib/python3.11/site-packages/faker/sphinx/docstring.py @@ -0,0 +1,224 @@ +# coding=utf-8 +import inspect +import logging +import re + +from collections import namedtuple +from typing import Pattern + +from faker import Faker +from faker.config import AVAILABLE_LOCALES, DEFAULT_LOCALE +from faker.sphinx.validator import SampleCodeValidator + +logger = logging.getLogger(__name__) +_fake = Faker(AVAILABLE_LOCALES) +_base_provider_method_pattern: Pattern = re.compile(r"^faker\.providers\.BaseProvider\.(?P<method>\w+)$") +_standard_provider_method_pattern: Pattern = re.compile(r"^faker\.providers\.\w+\.Provider\.(?P<method>\w+)$") +_locale_provider_method_pattern: Pattern = re.compile( + r"^faker\.providers\.\w+" r"\.(?P<locale>[a-z]{2,3}_[A-Z]{2})" r"\.Provider" r"\.(?P<method>\w+)$", +) +_sample_line_pattern: Pattern = re.compile( + r"^:sample" r"(?: size=(?P<size>[1-9][0-9]*))?" r"(?: seed=(?P<seed>[0-9]+))?" r":" r"(?: ?(?P<kwargs>.*))?$", +) +_command_template = "generator.{method}({kwargs})" +_sample_output_template = ( + ">>> Faker.seed({seed})\n" + ">>> for _ in range({size}):\n" + "... fake.{method}({kwargs})\n" + "...\n" + "{results}\n\n" +) + +DEFAULT_SAMPLE_SIZE = 5 +DEFAULT_SEED = 0 +Sample = namedtuple("Sample", ["size", "seed", "kwargs"]) + + +class ProviderMethodDocstring: + """ + Class that preprocesses provider method docstrings to generate sample usage and output + + Notes on how samples are generated: + - If the docstring belongs to a standard provider method, sample usage and output will be + generated using a `Faker` object in the `DEFAULT_LOCALE`. + - If the docstring belongs to a localized provider method, the correct locale will be used. + - If the docstring does not belong to any provider method, docstring preprocessing will be skipped. + - Docstring lines will be parsed for potential sample sections, and the generation details of each + sample section will internally be represented as a ``Sample`` namedtuple. + - Each ``Sample`` will have info on the keyword arguments to pass to the provider method, how many + times the provider method will be called, and the initial seed value to ``Faker.seed()``. + """ + + def __init__(self, app, what, name, obj, options, lines): + self._line_iter = iter(lines) + self._parsed_lines = [] + self._samples = [] + self._skipped = True + self._log_prefix = f"{inspect.getfile(obj)}:docstring of {name}: WARNING:" + + if what != "method": + return + + base_provider_method_match = _base_provider_method_pattern.match(name) + locale_provider_method_match = _locale_provider_method_pattern.match(name) + standard_provider_method_match = _standard_provider_method_pattern.match(name) + if base_provider_method_match: + groupdict = base_provider_method_match.groupdict() + self._method = groupdict["method"] + self._locale = DEFAULT_LOCALE + elif standard_provider_method_match: + groupdict = standard_provider_method_match.groupdict() + self._method = groupdict["method"] + self._locale = DEFAULT_LOCALE + elif locale_provider_method_match: + groupdict = locale_provider_method_match.groupdict() + self._method = groupdict["method"] + self._locale = groupdict["locale"] + else: + return + + self._skipped = False + self._parse() + self._generate_samples() + + def _log_warning(self, warning): + logger.warning(f"{self._log_prefix} {warning}") + + def _parse(self): + while True: + try: + line = next(self._line_iter) + except StopIteration: + break + else: + self._parse_section(line) + + def _parse_section(self, section): + # No-op if section does not look like the start of a sample section + if not section.startswith(":sample"): + self._parsed_lines.append(section) + return + + try: + next_line = next(self._line_iter) + except StopIteration: + # No more lines left to consume, so save current sample section + self._process_sample_section(section) + return + + # Next line is the start of a new sample section, so process + # current sample section, and start parsing the new section + if next_line.startswith(":sample"): + self._process_sample_section(section) + self._parse_section(next_line) + + # Next line is an empty line indicating the end of + # current sample section, so process current section + elif next_line == "": + self._process_sample_section(section) + + # Section is assumed to be multiline, so continue + # adding lines to current sample section + else: + section = section + next_line + self._parse_section(section) + + def _process_sample_section(self, section): + match = _sample_line_pattern.match(section) + + # Discard sample section if malformed + if not match: + msg = f"The section `{section}` is malformed and will be discarded." + self._log_warning(msg) + return + + # Set sample generation defaults and do some beautification if necessary + groupdict = match.groupdict() + size = groupdict.get("size") + seed = groupdict.get("seed") + kwargs = groupdict.get("kwargs") + size = max(int(size), DEFAULT_SAMPLE_SIZE) if size else DEFAULT_SAMPLE_SIZE + seed = int(seed) if seed else DEFAULT_SEED + kwargs = self._beautify_kwargs(kwargs) if kwargs else "" + + # Store sample generation details + sample = Sample(size, seed, kwargs) + self._samples.append(sample) + + def _beautify_kwargs(self, kwargs): + def _repl_whitespace(match): + quoted = match.group(1) or match.group(2) + return quoted if quoted else "" + + def _repl_comma(match): + quoted = match.group(1) or match.group(2) + return quoted if quoted else ", " + + # First, remove all whitespaces and tabs not within quotes + result = re.sub(r'("[^"]*")|(\'[^\']*\')|[ \t]+', _repl_whitespace, kwargs) + + # Next, insert a whitespace after each comma not within quotes + result = re.sub(r'("[^"]*")|(\'[^\']*\')|,', _repl_comma, result) + + # Then return the result with all leading and trailing whitespaces stripped + return result.strip() + + def _stringify_result(self, value): + return repr(value) + + def _generate_eval_scope(self): + from collections import OrderedDict # noqa: F401 Do not remove! The eval command needs this reference. + + return { + "generator": _fake[self._locale], + "OrderedDict": OrderedDict, + } + + def _inject_default_sample_section(self): + default_sample = Sample(DEFAULT_SAMPLE_SIZE, DEFAULT_SEED, "") + self._samples.append(default_sample) + + def _generate_samples(self): + if not self._samples: + self._inject_default_sample_section() + + output = "" + eval_scope = self._generate_eval_scope() + for sample in self._samples: + command = _command_template.format(method=self._method, kwargs=sample.kwargs) + validator = SampleCodeValidator(command) + if validator.errors: + msg = ( + f"Invalid code elements detected. Sample generation will be " + f"skipped for method `{self._method}` with arguments `{sample.kwargs}`." + ) + self._log_warning(msg) + continue + + try: + Faker.seed(sample.seed) + results = "\n".join([self._stringify_result(eval(command, eval_scope)) for _ in range(sample.size)]) + except Exception: + msg = f"Sample generation failed for method `{self._method}` with arguments `{sample.kwargs}`." + self._log_warning(msg) + continue + else: + output += _sample_output_template.format( + seed=sample.seed, + method=self._method, + kwargs=sample.kwargs, + size=sample.size, + results=results, + ) + + if output: + output = ":examples:\n\n" + output + self._parsed_lines.extend(output.split("\n")) + + @property + def skipped(self): + return self._skipped + + @property + def lines(self): + return self._parsed_lines diff --git a/venv/lib/python3.11/site-packages/faker/sphinx/documentor.py b/venv/lib/python3.11/site-packages/faker/sphinx/documentor.py new file mode 100644 index 0000000..ad6d7ab --- /dev/null +++ b/venv/lib/python3.11/site-packages/faker/sphinx/documentor.py @@ -0,0 +1,170 @@ +# coding=utf-8 +import importlib +import inspect +import os + +from pathlib import Path + +from faker.config import AVAILABLE_LOCALES +from faker.config import PROVIDERS as STANDARD_PROVIDER_NAMES +from faker.providers import BaseProvider + +if os.environ.get("READTHEDOCS", False): + version = os.environ["READTHEDOCS_VERSION"] + HOME = Path("/home/docs/checkouts/readthedocs.org/user_builds/faker/checkouts") / version + DOCS_ROOT = HOME / "docs" +else: + DOCS_ROOT = Path(__file__).resolve().parents[2] / "docs" + +SECTION_ADORNMENTS = "#*=-~" + +PROVIDER_AUTODOC_TEMPLATE = """ +.. autoclass:: {provider_class} + :members: {provider_methods} + :undoc-members: + :show-inheritance: + +""" + +BASE_PROVIDER_METHOD_NAMES = [ + name for name, method in inspect.getmembers(BaseProvider, inspect.isfunction) if not name.startswith("_") +] + + +def _get_provider_methods(provider_class): + try: + provider_module_name, obj_name = provider_class.rsplit(".", 1) + provider_module = importlib.import_module(provider_module_name) + provider = getattr(provider_module, obj_name, None) + except (ModuleNotFoundError, AttributeError): + return "" + else: + return ", ".join( + [ + name + for name, method in inspect.getmembers(provider, inspect.isfunction) + if not name.startswith("_") and name not in BASE_PROVIDER_METHOD_NAMES + ] + ) + + +def _get_localized_provider_info(locale): + info = [] + for provider_name in STANDARD_PROVIDER_NAMES: + try: + locale_module_path = f"{provider_name}.{locale}" + locale_module = importlib.import_module(locale_module_path) + provider = getattr(locale_module, "Provider") + except (ModuleNotFoundError, AttributeError): + continue + else: + provider_class = f"{provider.__module__}.Provider" + info.append((provider_class, provider_name)) + return info + + +def _write(fh, s): + return fh.write(s.encode("utf-8")) + + +def _hide_edit_on_github(fh): + _write(fh, ":github_url: hide\n\n") + + +def _write_title(fh, title, level=1): + if not isinstance(level, int) or level < 1 or level > 5: + raise ValueError("`level` must be an integer from 1 to 5") + if level <= 2: + _write(fh, SECTION_ADORNMENTS[level - 1] * len(title)) + _write(fh, "\n") + _write(fh, f"{title}\n") + _write(fh, SECTION_ADORNMENTS[level - 1] * len(title)) + _write(fh, "\n\n") + + +def _write_includes(fh): + _write(fh, ".. include:: ../includes/substitutions.rst") + _write(fh, "\n\n") + + +def _write_standard_provider_index(): + with (DOCS_ROOT / "providers.rst").open("wb") as fh: + _hide_edit_on_github(fh) + _write_title(fh, "Standard Providers") + _write(fh, ".. toctree::\n") + _write(fh, " :maxdepth: 2\n\n") + _write(fh, " providers/baseprovider\n") + for provider_name in STANDARD_PROVIDER_NAMES: + _write(fh, f" providers/{provider_name}\n") + + +def _write_base_provider_docs(): + (DOCS_ROOT / "providers").mkdir(parents=True, exist_ok=True) + with (DOCS_ROOT / "providers" / "baseprovider.rst").open("wb") as fh: + _hide_edit_on_github(fh) + _write_title(fh, "``faker.providers``") + _write_includes(fh) + _write( + fh, + PROVIDER_AUTODOC_TEMPLATE.format( + provider_class="faker.providers.BaseProvider", + provider_methods=",".join(BASE_PROVIDER_METHOD_NAMES), + ), + ) + + +def _write_standard_provider_docs(): + (DOCS_ROOT / "providers").mkdir(parents=True, exist_ok=True) + for provider_name in STANDARD_PROVIDER_NAMES: + with (DOCS_ROOT / "providers" / f"{provider_name}.rst").open("wb") as fh: + provider_class = f"{provider_name}.Provider" + provider_methods = _get_provider_methods(provider_class) + _hide_edit_on_github(fh) + _write_title(fh, f"``{provider_name}``") + _write_includes(fh) + _write( + fh, + PROVIDER_AUTODOC_TEMPLATE.format( + provider_class=provider_class, + provider_methods=provider_methods, + ), + ) + + +def _write_localized_provider_index(): + with (DOCS_ROOT / "locales.rst").open("wb") as fh: + _hide_edit_on_github(fh) + _write_title(fh, "Localized Providers") + _write(fh, ".. toctree::\n") + _write(fh, " :maxdepth: 2\n\n") + for locale in AVAILABLE_LOCALES: + _write(fh, f" locales/{locale}\n") + + +def _write_localized_provider_docs(): + (DOCS_ROOT / "locales").mkdir(parents=True, exist_ok=True) + for locale in AVAILABLE_LOCALES: + info = _get_localized_provider_info(locale) + with (DOCS_ROOT / "locales" / "{}.rst".format(locale)).open("wb") as fh: + _hide_edit_on_github(fh) + _write_title(fh, f"Locale {locale}") + _write_includes(fh) + for provider_class, standard_provider_name in info: + provider_methods = _get_provider_methods(provider_class) + _write_title(fh, f"``{standard_provider_name}``", level=2) + _write( + fh, + PROVIDER_AUTODOC_TEMPLATE.format( + provider_class=provider_class, + provider_methods=provider_methods, + ), + ) + + +def write_provider_docs(): + DOCS_ROOT.mkdir(parents=True, exist_ok=True) + _write_standard_provider_index() + _write_base_provider_docs() + _write_standard_provider_docs() + _write_localized_provider_index() + _write_localized_provider_docs() diff --git a/venv/lib/python3.11/site-packages/faker/sphinx/validator.py b/venv/lib/python3.11/site-packages/faker/sphinx/validator.py new file mode 100644 index 0000000..05acf89 --- /dev/null +++ b/venv/lib/python3.11/site-packages/faker/sphinx/validator.py @@ -0,0 +1,151 @@ +# coding=utf-8 +import ast +import traceback + +from collections import OrderedDict + + +class SampleCodeValidator(ast.NodeVisitor): + """ + Class that checks if a string is a valid and "safe" Python expression + + What is considered "safe" for this class is limited to the context of generating + provider method sample code and output for documentation purposes. The end goal + is to pass a command string to `eval()` should the string pass the validation + performed by this class. + + The main assumption this class will make is that the command string passed during + class instantiation will always be in the form "{generator}.{method}({arguments})". + In said form, {generator} is a `Generator` object variable that already exists + within the scope where `eval()` will be called, {method} will be the provider + method name which is also available within the `eval()` scope, and {arguments} + will be sample arguments parsed from docstrings. This means that {arguments} can + potentially be used as a vector for code injection. + + In order to neuter the impact of code injection, the following validation steps + will be applied: + + - The command string is parsed using 'eval' mode, meaning expressions only. + - The command string can only have whitelisted code elements. See `_whitelisted_nodes`. + - The command string can only have one instance of variable access. + - The command string can only have one instance of attribute access. + - The command string can only have one instance of a function/method call. + - The argument values in the command string can only be literals. + - The only literals allowed are numbers (integers, floats, or complex numbers), + strings (but not f-strings), bytes, lists, tuples, sets, dictionaries, True, + False, and None. + + There is, however, an exception. In order to accommodate sample code with custom + probability distribution, variable access to `OrderedDict` will not count against + the maximum limit of variable access, and invoking `OrderedDict` constructor calls + will not count against the maximum limit of function/method calls. In order to + neuter the impact of code injection, please ensure that `OrderedDict` refers to + the standard library's `collections.OrderedDict` within the `eval()` scope before + passing the command string to `eval()` for execution. This can be done in code review. + """ + + _whitelisted_nodes = ( + # Code elements related to function calls and variable and attribute access + ast.Expression, + ast.Call, + ast.Attribute, + ast.Name, + ast.Load, + ast.keyword, + # Code elements representing whitelisted literals + ast.Num, + ast.Str, + ast.Bytes, + ast.List, + ast.Tuple, + ast.Set, + ast.Dict, + ast.NameConstant, + ) + + _max_function_call_count = 1 + _max_attribute_access_count = 1 + _max_variable_access_count = 1 + + def __init__(self, command): + self._errors = set() + self._function_call_count = 0 + self._attribute_access_count = 0 + self._variable_access_count = 0 + self._command = command + + try: + self._tree = ast.parse(command, mode="eval") + except (SyntaxError, ValueError): + self._log_error(traceback.format_exc()) + else: + self._validate() + + @property + def errors(self): + return self._errors + + def _is_whitelisted(self, node): + return isinstance(node, self._whitelisted_nodes) + + def _log_error(self, msg): + self._errors.add(msg) + + def _validate(self): + self.visit(self._tree) + + def _is_node_using_ordereddict(self, node): + is_valid = False + + # If instance of function call, check if it is a call to the OrderedDict constructor + if isinstance(node, ast.Call): + is_valid = self._is_node_using_ordereddict(node.func) + + # If instance of variable access, check if it is + elif isinstance(node, ast.Name) and node.id == OrderedDict.__name__: + is_valid = True + + return is_valid + + def visit(self, node): + # Check if code element type is allowed + if not self._is_whitelisted(node): + msg = "Code element `%s` is not allowed." % node.__class__.__name__ + self._log_error(msg) + + return super().visit(node) + + def visit_Call(self, node): + if not self._is_node_using_ordereddict(node): + # There can only be one instance of a function call + if self._function_call_count < self._max_function_call_count: + self._function_call_count += 1 + else: + msg = "There can only be one instance of a function/method call." + self._log_error(msg) + + # Proceed to child nodes + self.generic_visit(node) + + def visit_Attribute(self, node): + # There can only be one instance of attribute access + if self._attribute_access_count < self._max_attribute_access_count: + self._attribute_access_count += 1 + else: + msg = "There can only be one instance of attribute access." + self._log_error(msg) + + # Proceed to child nodes + self.generic_visit(node) + + def visit_Name(self, node): + if not self._is_node_using_ordereddict(node): + # There can only be one instance of variable access + if self._variable_access_count < self._max_variable_access_count: + self._variable_access_count += 1 + else: + msg = "There can only be one instance of variable access." + self._log_error(msg) + + # Proceed to child nodes + self.generic_visit(node) |