From 6d7ba58f880be618ade07f8ea080fe8c4bf8a896 Mon Sep 17 00:00:00 2001
From: cyfraeviolae <cyfraeviolae>
Date: Wed, 3 Apr 2024 03:10:44 -0400
Subject: venv

---
 .../site-packages/faker/utils/distribution.py      | 88 ++++++++++++++++++++++
 1 file changed, 88 insertions(+)
 create mode 100644 venv/lib/python3.11/site-packages/faker/utils/distribution.py

(limited to 'venv/lib/python3.11/site-packages/faker/utils/distribution.py')
diff --git a/venv/lib/python3.11/site-packages/faker/utils/distribution.py b/venv/lib/python3.11/site-packages/faker/utils/distribution.py
new file mode 100644
index 0000000..45580a5
--- /dev/null
+++ b/venv/lib/python3.11/site-packages/faker/utils/distribution.py
@@ -0,0 +1,88 @@
+import bisect
+import itertools
+
+from random import Random
+from typing import Generator, Iterable, Optional, Sequence, TypeVar
+
+from faker.generator import random as mod_random
+
+
+def random_sample(random: Optional[Random] = None) -> float:
+    if random is None:
+        random = mod_random
+    return random.uniform(0.0, 1.0)
+
+
+def cumsum(it: Iterable[float]) -> Generator[float, None, None]:
+    total: float = 0
+    for x in it:
+        total += x
+        yield total
+
+
+T = TypeVar("T")
+
+
+def choices_distribution_unique(
+    a: Sequence[T],
+    p: Optional[Sequence[float]],
+    random: Optional[Random] = None,
+    length: int = 1,
+) -> Sequence[T]:
+    # As of Python 3.7, there isn't a way to sample unique elements that takes
+    # weight into account.
+    if random is None:
+        random = mod_random
+
+    assert p is not None
+    assert len(a) == len(p)
+    assert len(a) >= length, "You can't request more unique samples than elements in the dataset."
+
+    choices = []
+    items = list(a)
+    probabilities = list(p)
+    for i in range(length):
+        cdf = tuple(cumsum(probabilities))
+        normal = cdf[-1]
+        cdf2 = [i / normal for i in cdf]
+        uniform_sample = random_sample(random=random)
+        idx = bisect.bisect_right(cdf2, uniform_sample)
+        item = items[idx]
+        choices.append(item)
+        probabilities.pop(idx)
+        items.pop(idx)
+    return choices
+
+
+def choices_distribution(
+    a: Sequence[T],
+    p: Optional[Sequence[float]],
+    random: Optional[Random] = None,
+    length: int = 1,
+) -> Sequence[T]:
+    if random is None:
+        random = mod_random
+
+    if p is not None:
+        assert len(a) == len(p)
+
+    if hasattr(random, "choices"):
+        if length == 1 and p is None:
+            return [random.choice(a)]
+        else:
+            return random.choices(a, weights=p, k=length)
+    else:
+        choices = []
+
+        if p is None:
+            p = itertools.repeat(1, len(a))  # type: ignore
+
+        cdf = list(cumsum(p))  # type: ignore
+        normal = cdf[-1]
+        cdf2 = [i / normal for i in cdf]
+        for i in range(length):
+            uniform_sample = random_sample(random=random)
+            idx = bisect.bisect_right(cdf2, uniform_sample)
+            item = a[idx]
+            choices.append(item)
+        return choices
-- 
cgit v1.2.3