import re
import string

from collections import OrderedDict
from typing import Any, Collection, List, Optional, Sequence, TypeVar, Union

from ..generator import Generator
from ..typing import OrderedDictType
from ..utils.distribution import choices_distribution, choices_distribution_unique

_re_hash = re.compile(r"#")
_re_perc = re.compile(r"%")
_re_dol = re.compile(r"\$")
_re_excl = re.compile(r"!")
_re_at = re.compile(r"@")
_re_qm = re.compile(r"\?")
_re_cir = re.compile(r"\^")

T = TypeVar("T")
ElementsType = Union[Collection[str], Collection[T], OrderedDictType[T, float]]


class BaseProvider:
    __provider__ = "base"
    __lang__: Optional[str] = None
    __use_weighting__ = False

    # Locales supported by Linux Mint from `/usr/share/i18n/SUPPORTED`
    language_locale_codes = {
        "aa": ("DJ", "ER", "ET"),
        "af": ("ZA",),
        "ak": ("GH",),
        "am": ("ET",),
        "an": ("ES",),
        "apn": ("IN",),
        "ar": (
            "AE",
            "BH",
            "DJ",
            "DZ",
            "EG",
            "EH",
            "ER",
            "IL",
            "IN",
            "IQ",
            "JO",
            "KM",
            "KW",
            "LB",
            "LY",
            "MA",
            "MR",
            "OM",
            "PS",
            "QA",
            "SA",
            "SD",
            "SO",
            "SS",
            "SY",
            "TD",
            "TN",
            "YE",
        ),
        "as": ("IN",),
        "ast": ("ES",),
        "ayc": ("PE",),
        "az": ("AZ", "IN"),
        "be": ("BY",),
        "bem": ("ZM",),
        "ber": ("DZ", "MA"),
        "bg": ("BG",),
        "bhb": ("IN",),
        "bho": ("IN",),
        "bn": ("BD", "IN"),
        "bo": ("CN", "IN"),
        "br": ("FR",),
        "brx": ("IN",),
        "bs": ("BA",),
        "byn": ("ER",),
        "ca": ("AD", "ES", "FR", "IT"),
        "ce": ("RU",),
        "ckb": ("IQ",),
        "cmn": ("TW",),
        "crh": ("UA",),
        "cs": ("CZ",),
        "csb": ("PL",),
        "cv": ("RU",),
        "cy": ("GB",),
        "da": ("DK",),
        "de": ("AT", "BE", "CH", "DE", "LI", "LU"),
        "doi": ("IN",),
        "dv": ("MV",),
        "dz": ("BT",),
        "el": ("GR", "CY"),
        "en": (
            "AG",
            "AU",
            "BD",
            "BW",
            "CA",
            "DK",
            "GB",
            "HK",
            "IE",
            "IN",
            "NG",
            "NZ",
            "PH",
            "SG",
            "US",
            "ZA",
            "ZM",
            "ZW",
        ),
        "eo": ("US",),
        "es": (
            "AR",
            "BO",
            "CL",
            "CO",
            "CR",
            "CU",
            "DO",
            "EC",
            "ES",
            "GT",
            "HN",
            "MX",
            "NI",
            "PA",
            "PE",
            "PR",
            "PY",
            "SV",
            "US",
            "UY",
            "VE",
        ),
        "et": ("EE",),
        "eu": ("ES", "FR"),
        "fa": ("IR",),
        "ff": ("SN",),
        "fi": ("FI",),
        "fil": ("PH",),
        "fo": ("FO",),
        "fr": ("CA", "CH", "FR", "LU"),
        "fur": ("IT",),
        "fy": ("NL", "DE"),
        "ga": ("IE",),
        "gd": ("GB",),
        "gez": ("ER", "ET"),
        "gl": ("ES",),
        "gu": ("IN",),
        "gv": ("GB",),
        "ha": ("NG",),
        "hak": ("TW",),
        "he": ("IL",),
        "hi": ("IN",),
        "hne": ("IN",),
        "hr": ("HR",),
        "hsb": ("DE",),
        "ht": ("HT",),
        "hu": ("HU",),
        "hy": ("AM",),
        "ia": ("FR",),
        "id": ("ID",),
        "ig": ("NG",),
        "ik": ("CA",),
        "is": ("IS",),
        "it": ("CH", "IT"),
        "iu": ("CA",),
        "iw": ("IL",),
        "ja": ("JP",),
        "ka": ("GE",),
        "kk": ("KZ",),
        "kl": ("GL",),
        "km": ("KH",),
        "kn": ("IN",),
        "ko": ("KR",),
        "kok": ("IN",),
        "ks": ("IN",),
        "ku": ("TR",),
        "kw": ("GB",),
        "ky": ("KG",),
        "lb": ("LU",),
        "lg": ("UG",),
        "li": ("BE", "NL"),
        "lij": ("IT",),
        "ln": ("CD",),
        "lo": ("LA",),
        "lt": ("LT",),
        "lv": ("LV",),
        "lzh": ("TW",),
        "mag": ("IN",),
        "mai": ("IN",),
        "mg": ("MG",),
        "mhr": ("RU",),
        "mi": ("NZ",),
        "mk": ("MK",),
        "ml": ("IN",),
        "mn": ("MN",),
        "mni": ("IN",),
        "mr": ("IN",),
        "ms": ("MY",),
        "mt": ("MT",),
        "my": ("MM",),
        "nan": ("TW",),
        "nb": ("NO",),
        "nds": ("DE", "NL"),
        "ne": ("NP",),
        "nhn": ("MX",),
        "niu": ("NU", "NZ"),
        "nl": ("AW", "BE", "NL"),
        "nn": ("NO",),
        "nr": ("ZA",),
        "nso": ("ZA",),
        "oc": ("FR",),
        "om": ("ET", "KE"),
        "or": ("IN",),
        "os": ("RU",),
        "pa": ("IN", "PK"),
        "pap": ("AN", "AW", "CW"),
        "pl": ("PL",),
        "ps": ("AF",),
        "pt": ("BR", "PT"),
        "quz": ("PE",),
        "raj": ("IN",),
        "ro": ("RO",),
        "ru": ("RU", "UA"),
        "rw": ("RW",),
        "sa": ("IN",),
        "sat": ("IN",),
        "sc": ("IT",),
        "sd": ("IN", "PK"),
        "se": ("NO",),
        "shs": ("CA",),
        "si": ("LK",),
        "sid": ("ET",),
        "sk": ("SK",),
        "sl": ("SI",),
        "so": ("DJ", "ET", "KE", "SO"),
        "sq": ("AL", "ML"),
        "sr": ("ME", "RS"),
        "ss": ("ZA",),
        "st": ("ZA",),
        "sv": ("FI", "SE"),
        "sw": ("KE", "TZ"),
        "szl": ("PL",),
        "ta": ("IN", "LK"),
        "tcy": ("IN",),
        "te": ("IN",),
        "tg": ("TJ",),
        "th": ("TH",),
        "the": ("NP",),
        "ti": ("ER", "ET"),
        "tig": ("ER",),
        "tk": ("TM",),
        "tl": ("PH",),
        "tn": ("ZA",),
        "tr": ("CY", "TR"),
        "ts": ("ZA",),
        "tt": ("RU",),
        "ug": ("CN",),
        "uk": ("UA",),
        "unm": ("US",),
        "ur": ("IN", "PK"),
        "uz": ("UZ",),
        "ve": ("ZA",),
        "vi": ("VN",),
        "wa": ("BE",),
        "wae": ("CH",),
        "wal": ("ET",),
        "wo": ("SN",),
        "xh": ("ZA",),
        "yi": ("US",),
        "yo": ("NG",),
        "yue": ("HK",),
        "zh": ("CN", "HK", "SG", "TW"),
        "zu": ("ZA",),
    }

    def __init__(self, generator: Any) -> None:
        """
        Base class for fake data providers
        :param generator: `Generator` instance
        """
        self.generator = generator

    def locale(self) -> str:
        """Generate a random underscored i18n locale code (e.g. en_US)."""

        language_code = self.language_code()
        return (
            language_code
            + "_"
            + self.random_element(
                BaseProvider.language_locale_codes[language_code],
            )
        )

    def language_code(self) -> str:
        """Generate a random i18n language code (e.g. en)."""

        return self.random_element(BaseProvider.language_locale_codes.keys())

    def random_int(self, min: int = 0, max: int = 9999, step: int = 1) -> int:
        """Generate a random integer between two integers ``min`` and ``max`` inclusive
        while observing the provided ``step`` value.

        This method is functionally equivalent to randomly sampling an integer
        from the sequence ``range(min, max + 1, step)``.

        :sample: min=0, max=15
        :sample: min=0, max=15, step=3
        """
        return self.generator.random.randrange(min, max + 1, step)

    def random_digit(self) -> int:
        """Generate a random digit (0 to 9)."""

        return self.generator.random.randint(0, 9)

    def random_digit_not_null(self) -> int:
        """Generate a random non-zero digit (1 to 9)."""

        return self.generator.random.randint(1, 9)

    def random_digit_above_two(self) -> int:
        """Generate a random digit above value two (2 to 9)."""

        return self.generator.random.randint(2, 9)

    def random_digit_or_empty(self) -> Union[int, str]:
        """Generate a random digit (0 to 9) or an empty string.

        This method will return an empty string 50% of the time,
        and each digit has a 1/20 chance of being generated.
        """

        if self.generator.random.randint(0, 1):
            return self.generator.random.randint(0, 9)
        else:
            return ""

    def random_digit_not_null_or_empty(self) -> Union[int, str]:
        """Generate a random non-zero digit (1 to 9) or an empty string.

        This method will return an empty string 50% of the time,
        and each digit has a 1/18 chance of being generated.
        """

        if self.generator.random.randint(0, 1):
            return self.generator.random.randint(1, 9)
        else:
            return ""

    def random_number(self, digits: Optional[int] = None, fix_len: bool = False) -> int:
        """Generate a random integer according to the following rules:

        - If ``digits`` is ``None`` (default), its value will be set to a random
          integer from 1 to 9.
        - If ``fix_len`` is ``False`` (default), all integers that do not exceed
          the number of ``digits`` can be generated.
        - If ``fix_len`` is ``True``, only integers with the exact number of
          ``digits`` can be generated.

        :sample: fix_len=False
        :sample: fix_len=True
        :sample: digits=3
        :sample: digits=3, fix_len=False
        :sample: digits=3, fix_len=True
        """
        if digits is None:
            digits = self.random_digit_not_null()
        if digits < 0:
            raise ValueError("The digit parameter must be greater than or equal to 0.")
        if fix_len:
            if digits > 0:
                return self.generator.random.randint(pow(10, digits - 1), pow(10, digits) - 1)
            else:
                raise ValueError("A number of fixed length cannot have less than 1 digit in it.")
        else:
            return self.generator.random.randint(0, pow(10, digits) - 1)

    def random_letter(self) -> str:
        """Generate a random ASCII letter (a-z and A-Z)."""

        return self.generator.random.choice(getattr(string, "letters", string.ascii_letters))

    def random_letters(self, length: int = 16) -> Sequence[str]:
        """Generate a list of random ASCII letters (a-z and A-Z) of the specified ``length``.

        :sample: length=10
        """
        return self.random_choices(
            getattr(string, "letters", string.ascii_letters),
            length=length,
        )

    def random_lowercase_letter(self) -> str:
        """Generate a random lowercase ASCII letter (a-z)."""

        return self.generator.random.choice(string.ascii_lowercase)

    def random_uppercase_letter(self) -> str:
        """Generate a random uppercase ASCII letter (A-Z)."""

        return self.generator.random.choice(string.ascii_uppercase)

    def random_elements(
        self,
        elements: ElementsType[T] = ("a", "b", "c"),  # type: ignore[assignment]
        length: Optional[int] = None,
        unique: bool = False,
        use_weighting: Optional[bool] = None,
    ) -> Sequence[T]:
        """Generate a list of randomly sampled objects from ``elements``.

        Set ``unique`` to ``False`` for random sampling with replacement, and set ``unique`` to
        ``True`` for random sampling without replacement.

        If ``length`` is set to ``None`` or is omitted, ``length`` will be set to a random
        integer from 1 to the size of ``elements``.

        The value of ``length`` cannot be greater than the number of objects
        in ``elements`` if ``unique`` is set to ``True``.

        The value of ``elements`` can be any sequence type (``list``, ``tuple``, ``set``,
        ``string``, etc) or an ``OrderedDict`` type. If it is the latter, the keys will be
        used as the objects for sampling, and the values will be used as weighted probabilities
        if ``unique`` is set to ``False``. For example:

        .. code-block:: python

            # Random sampling with replacement
            fake.random_elements(
                elements=OrderedDict([
                    ("variable_1", 0.5),        # Generates "variable_1" 50% of the time
                    ("variable_2", 0.2),        # Generates "variable_2" 20% of the time
                    ("variable_3", 0.2),        # Generates "variable_3" 20% of the time
                    ("variable_4": 0.1),        # Generates "variable_4" 10% of the time
                ]), unique=False
            )

            # Random sampling without replacement (defaults to uniform distribution)
            fake.random_elements(
                elements=OrderedDict([
                    ("variable_1", 0.5),
                    ("variable_2", 0.2),
                    ("variable_3", 0.2),
                    ("variable_4": 0.1),
                ]), unique=True
            )

        :sample: elements=('a', 'b', 'c', 'd'), unique=False
        :sample: elements=('a', 'b', 'c', 'd'), unique=True
        :sample: elements=('a', 'b', 'c', 'd'), length=10, unique=False
        :sample: elements=('a', 'b', 'c', 'd'), length=4, unique=True
        :sample: elements=OrderedDict([
                        ("a", 0.45),
                        ("b", 0.35),
                       ("c", 0.15),
                       ("d", 0.05),
                   ]), length=20, unique=False
        :sample: elements=OrderedDict([
                       ("a", 0.45),
                       ("b", 0.35),
                       ("c", 0.15),
                       ("d", 0.05),
                   ]), unique=True
        """
        use_weighting = use_weighting if use_weighting is not None else self.__use_weighting__

        if isinstance(elements, dict) and not isinstance(elements, OrderedDict):
            raise ValueError("Use OrderedDict only to avoid dependency on PYTHONHASHSEED (See #363).")

        fn = choices_distribution_unique if unique else choices_distribution

        if length is None:
            length = self.generator.random.randint(1, len(elements))

        if unique and length > len(elements):
            raise ValueError("Sample length cannot be longer than the number of unique elements to pick from.")

        if isinstance(elements, dict):
            if not hasattr(elements, "_key_cache"):
                elements._key_cache = tuple(elements.keys())  # type: ignore

            choices = elements._key_cache  # type: ignore[attr-defined, union-attr]
            probabilities = tuple(elements.values()) if use_weighting else None
        else:
            if unique:
                # shortcut
                return self.generator.random.sample(elements, length)
            choices = elements
            probabilities = None

        return fn(
            tuple(choices),
            probabilities,
            self.generator.random,
            length=length,
        )

    def random_choices(
        self,
        elements: ElementsType[T] = ("a", "b", "c"),  # type: ignore[assignment]
        length: Optional[int] = None,
    ) -> Sequence[T]:
        """Generate a list of objects randomly sampled from ``elements`` with replacement.

        For information on the ``elements`` and ``length`` arguments, please refer to
        :meth:`random_elements() <faker.providers.BaseProvider.random_elements>` which
        is used under the hood with the ``unique`` argument explicitly set to ``False``.

        :sample: elements=('a', 'b', 'c', 'd')
        :sample: elements=('a', 'b', 'c', 'd'), length=10
        :sample: elements=OrderedDict([
                     ("a", 0.45),
                     ("b", 0.35),
                     ("c", 0.15),
                     ("d", 0.05),
                 ])
        :sample: elements=OrderedDict([
                     ("a", 0.45),
                     ("b", 0.35),
                     ("c", 0.15),
                     ("d", 0.05),
                 ]), length=20
        """
        return self.random_elements(elements, length, unique=False)

    def random_element(self, elements: ElementsType[T] = ("a", "b", "c")) -> T:
        """Generate a randomly sampled object from ``elements``.

        For information on the ``elements`` argument, please refer to
        :meth:`random_elements() <faker.providers.BaseProvider.random_elements>` which
        is used under the hood with the ``unique`` argument set to ``False`` and the
        ``length`` argument set to ``1``.

        :sample: elements=('a', 'b', 'c', 'd')
        :sample size=10: elements=OrderedDict([
                     ("a", 0.45),
                     ("b", 0.35),
                     ("c", 0.15),
                     ("d", 0.05),
                 ])
        """

        return self.random_elements(elements, length=1)[0]

    def random_sample(
        self, elements: ElementsType[T] = ("a", "b", "c"), length: Optional[int] = None  # type: ignore[assignment]
    ) -> Sequence[T]:
        """Generate a list of objects randomly sampled from ``elements`` without replacement.

        For information on the ``elements`` and ``length`` arguments, please refer to
        :meth:`random_elements() <faker.providers.BaseProvider.random_elements>` which
        is used under the hood with the ``unique`` argument explicitly set to ``True``.

        :sample: elements=('a', 'b', 'c', 'd', 'e', 'f')
        :sample: elements=('a', 'b', 'c', 'd', 'e', 'f'), length=3
        """
        return self.random_elements(elements, length, unique=True)

    def randomize_nb_elements(
        self,
        number: int = 10,
        le: bool = False,
        ge: bool = False,
        min: Optional[int] = None,
        max: Optional[int] = None,
    ) -> int:
        """Generate a random integer near ``number`` according to the following rules:

        - If ``le`` is ``False`` (default), allow generation up to 140% of ``number``.
          If ``True``, upper bound generation is capped at 100%.
        - If ``ge`` is ``False`` (default), allow generation down to 60% of ``number``.
          If ``True``, lower bound generation is capped at 100%.
        - If a numerical value for ``min`` is provided, generated values less than ``min``
          will be clamped at ``min``.
        - If a numerical value for ``max`` is provided, generated values greater than
          ``max`` will be clamped at ``max``.
        - If both ``le`` and ``ge`` are ``True``, the value of ``number`` will automatically
          be returned, regardless of the values supplied for ``min`` and ``max``.

        :sample: number=100
        :sample: number=100, ge=True
        :sample: number=100, ge=True, min=120
        :sample: number=100, le=True
        :sample: number=100, le=True, max=80
        :sample: number=79, le=True, ge=True, min=80
        """
        if le and ge:
            return number
        _min = 100 if ge else 60
        _max = 100 if le else 140
        nb = int(number * self.generator.random.randint(_min, _max) / 100)
        if min is not None and nb < min:
            nb = min
        if max is not None and nb > max:
            nb = max
        return nb

    def numerify(self, text: str = "###") -> str:
        """Generate a string with each placeholder in ``text`` replaced according
        to the following rules:

        - Number signs ('#') are replaced with a random digit (0 to 9).
        - Percent signs ('%') are replaced with a random non-zero digit (1 to 9).
        - Dollar signs ('$') are replaced with a random digit above two (2 to 9).
        - Exclamation marks ('!') are replaced with a random digit or an empty string.
        - At symbols ('@') are replaced with a random non-zero digit or an empty string.

        Under the hood, this method uses :meth:`random_digit() <faker.providers.BaseProvider.random_digit>`,
        :meth:`random_digit_not_null() <faker.providers.BaseProvider.random_digit_not_null>`,
        :meth:`random_digit_or_empty() <faker.providers.BaseProvider.random_digit_or_empty>`,
        and :meth:`random_digit_not_null_or_empty() <faker.providers.BaseProvider.random_digit_not_null_or_empty>`
        to generate the random values.

        :sample: text='Intel Core i%-%%##K vs AMD Ryzen % %%##X'
        :sample: text='!!! !!@ !@! !@@ @!! @!@ @@! @@@'
        """
        text = _re_hash.sub(lambda x: str(self.random_digit()), text)
        text = _re_perc.sub(lambda x: str(self.random_digit_not_null()), text)
        text = _re_dol.sub(lambda x: str(self.random_digit_above_two()), text)
        text = _re_excl.sub(lambda x: str(self.random_digit_or_empty()), text)
        text = _re_at.sub(lambda x: str(self.random_digit_not_null_or_empty()), text)
        return text

    def lexify(self, text: str = "????", letters: str = string.ascii_letters) -> str:
        """Generate a string with each question mark ('?') in ``text``
        replaced with a random character from ``letters``.

        By default, ``letters`` contains all ASCII letters, uppercase and lowercase.

        :sample: text='Random Identifier: ??????????'
        :sample: text='Random Identifier: ??????????', letters='ABCDE'
        """
        return _re_qm.sub(lambda x: self.random_element(letters), text)

    def bothify(self, text: str = "## ??", letters: str = string.ascii_letters) -> str:
        """Generate a string with each placeholder in ``text`` replaced according to the following rules:

        - Number signs ('#') are replaced with a random digit (0 to 9).
        - Question marks ('?') are replaced with a random character from ``letters``.

        By default, ``letters`` contains all ASCII letters, uppercase and lowercase.

        Under the hood, this method uses :meth:`numerify() <faker.providers.BaseProvider.numerify>` and
        and :meth:`lexify() <faker.providers.BaseProvider.lexify>` to generate random values for number
        signs and question marks respectively.

        :sample: letters='ABCDE'
        :sample: text='Product Number: ????-########'
        :sample: text='Product Number: ????-########', letters='ABCDE'
        """
        return self.lexify(self.numerify(text), letters=letters)

    def hexify(self, text: str = "^^^^", upper: bool = False) -> str:
        """Generate a string with each circumflex ('^') in ``text``
        replaced with a random hexadecimal character.

        By default, ``upper`` is set to False. If set to ``True``, output
        will be formatted using uppercase hexadecimal characters.

        :sample: text='MAC Address: ^^:^^:^^:^^:^^:^^'
        :sample: text='MAC Address: ^^:^^:^^:^^:^^:^^', upper=True
        """
        letters = string.hexdigits[:-6]
        if upper:
            letters = letters.upper()
        return _re_cir.sub(lambda x: self.random_element(letters), text)


class DynamicProvider(BaseProvider):
    def __init__(
        self,
        provider_name: str,
        elements: Optional[List] = None,
        generator: Optional[Any] = None,
    ):
        """
        A faker Provider capable of getting a list of elements to randomly select from,
        instead of using the predefined list of elements which exist in the default providers in faker.

        :param provider_name: Name of provider, which would translate into the function name e.g. faker.my_fun().
        :param elements: List of values to randomly select from
        :param generator: Generator object. If missing, the default Generator is used.

        :example:
        >>>from faker import Faker
        >>>from faker.providers import DynamicProvider

        >>>medical_professions_provider = DynamicProvider(
        >>>     provider_name="medical_profession",
        >>>     elements=["dr.", "doctor", "nurse", "surgeon", "clerk"],
        >>>)
        >>>fake = Faker()
        >>>fake.add_provider(medical_professions_provider)

        >>>fake.medical_profession()
        "dr."

        """

        if not generator:
            generator = Generator()
        super().__init__(generator)
        if provider_name.startswith("__"):
            raise ValueError("Provider name cannot start with __ as it would be ignored by Faker")

        self.provider_name = provider_name

        self.elements = []
        if elements:
            self.elements = elements

        setattr(self, provider_name, self.get_random_value)  # Add a method for the provider_name value

    def add_element(self, element: str) -> None:
        """Add new element."""
        self.elements.append(element)

    def get_random_value(self, use_weighting: bool = True) -> Any:
        """Returns a random value for this provider.

        :param use_weighting: boolean option to use weighting. Defaults to True
        """
        if not self.elements or len(self.elements) == 0:
            raise ValueError("Elements should be a list of values the provider samples from")

        return self.random_elements(self.elements, length=1, use_weighting=use_weighting)[0]