summaryrefslogtreecommitdiff
path: root/venv/lib/python3.11/site-packages/faker/utils/distribution.py
diff options
context:
space:
mode:
authorcyfraeviolae <cyfraeviolae>2024-04-03 03:17:55 -0400
committercyfraeviolae <cyfraeviolae>2024-04-03 03:17:55 -0400
commit12cf076118570eebbff08c6b3090e0d4798447a1 (patch)
tree3ba25e17e3c3a5e82316558ba3864b955919ff72 /venv/lib/python3.11/site-packages/faker/utils/distribution.py
parentc45662ff3923b34614ddcc8feb9195541166dcc5 (diff)
no venv
Diffstat (limited to 'venv/lib/python3.11/site-packages/faker/utils/distribution.py')
-rw-r--r--venv/lib/python3.11/site-packages/faker/utils/distribution.py88
1 files changed, 0 insertions, 88 deletions
diff --git a/venv/lib/python3.11/site-packages/faker/utils/distribution.py b/venv/lib/python3.11/site-packages/faker/utils/distribution.py
deleted file mode 100644
index 45580a5..0000000
--- a/venv/lib/python3.11/site-packages/faker/utils/distribution.py
+++ /dev/null
@@ -1,88 +0,0 @@
-import bisect
-import itertools
-
-from random import Random
-from typing import Generator, Iterable, Optional, Sequence, TypeVar
-
-from faker.generator import random as mod_random
-
-
-def random_sample(random: Optional[Random] = None) -> float:
- if random is None:
- random = mod_random
- return random.uniform(0.0, 1.0)
-
-
-def cumsum(it: Iterable[float]) -> Generator[float, None, None]:
- total: float = 0
- for x in it:
- total += x
- yield total
-
-
-T = TypeVar("T")
-
-
-def choices_distribution_unique(
- a: Sequence[T],
- p: Optional[Sequence[float]],
- random: Optional[Random] = None,
- length: int = 1,
-) -> Sequence[T]:
- # As of Python 3.7, there isn't a way to sample unique elements that takes
- # weight into account.
- if random is None:
- random = mod_random
-
- assert p is not None
- assert len(a) == len(p)
- assert len(a) >= length, "You can't request more unique samples than elements in the dataset."
-
- choices = []
- items = list(a)
- probabilities = list(p)
- for i in range(length):
- cdf = tuple(cumsum(probabilities))
- normal = cdf[-1]
- cdf2 = [i / normal for i in cdf]
- uniform_sample = random_sample(random=random)
- idx = bisect.bisect_right(cdf2, uniform_sample)
- item = items[idx]
- choices.append(item)
- probabilities.pop(idx)
- items.pop(idx)
- return choices
-
-
-def choices_distribution(
- a: Sequence[T],
- p: Optional[Sequence[float]],
- random: Optional[Random] = None,
- length: int = 1,
-) -> Sequence[T]:
- if random is None:
- random = mod_random
-
- if p is not None:
- assert len(a) == len(p)
-
- if hasattr(random, "choices"):
- if length == 1 and p is None:
- return [random.choice(a)]
- else:
- return random.choices(a, weights=p, k=length)
- else:
- choices = []
-
- if p is None:
- p = itertools.repeat(1, len(a)) # type: ignore
-
- cdf = list(cumsum(p)) # type: ignore
- normal = cdf[-1]
- cdf2 = [i / normal for i in cdf]
- for i in range(length):
- uniform_sample = random_sample(random=random)
- idx = bisect.bisect_right(cdf2, uniform_sample)
- item = a[idx]
- choices.append(item)
- return choices