3 # © Copyright 2021-2022, Scott Gasch
5 """Mathematical helpers."""
10 from heapq import heappop, heappush
11 from typing import Dict, List, Optional, Tuple
13 from pyutils import dict_utils
16 class NumericPopulation(object):
17 """A numeric population with some statistics such as median, mean, pN,
20 >>> pop = NumericPopulation()
22 >>> pop.add_number(10)
34 >>> round(pop.get_stdev(), 1)
36 >>> pop.get_percentile(20)
38 >>> pop.get_percentile(60)
43 self.lowers, self.highers = [], []
45 self.sorted_copy: Optional[List[float]] = None
49 def add_number(self, number: float):
50 """Adds a number to the population. Runtime complexity of this
51 operation is :math:`O(2 log_2 n)`"""
53 if not self.highers or number > self.highers[0]:
54 heappush(self.highers, number)
56 heappush(self.lowers, -number) # for lowers we need a max heap
57 self.aggregate += number
59 if not self.maximum or number > self.maximum:
61 if not self.minimum or number < self.minimum:
65 """Return the population size."""
68 n += len(self.highers)
74 if len(self.lowers) - len(self.highers) > 1:
75 heappush(self.highers, -heappop(self.lowers))
76 elif len(self.highers) - len(self.lowers) > 1:
77 heappush(self.lowers, -heappop(self.highers))
79 def get_median(self) -> float:
80 """Returns the approximate median (p50) so far in O(1) time."""
82 if len(self.lowers) == len(self.highers):
83 return -self.lowers[0]
84 elif len(self.lowers) > len(self.highers):
85 return -self.lowers[0]
87 return self.highers[0]
89 def get_mean(self) -> float:
90 """Returns the mean (arithmetic mean) so far in O(1) time."""
92 count = len(self.lowers) + len(self.highers)
93 return self.aggregate / count
95 def get_mode(self) -> Tuple[float, int]:
96 """Returns the mode (most common member in the population)
99 count: Dict[float, int] = collections.defaultdict(int)
100 for n in self.lowers:
102 for n in self.highers:
104 return dict_utils.item_with_max_value(count)
106 def get_stdev(self) -> float:
107 """Returns the stdev so far in O(n) time."""
109 mean = self.get_mean()
111 for n in self.lowers:
113 variance += (n - mean) ** 2
114 for n in self.highers:
115 variance += (n - mean) ** 2
116 count = len(self.lowers) + len(self.highers)
117 return math.sqrt(variance) / count
119 def _create_sorted_copy_if_needed(self, count: int):
120 if not self.sorted_copy or count != len(self.sorted_copy):
121 self.sorted_copy = []
122 for x in self.lowers:
123 self.sorted_copy.append(-x)
124 for x in self.highers:
125 self.sorted_copy.append(x)
126 self.sorted_copy = sorted(self.sorted_copy)
128 def get_percentile(self, n: float) -> float:
129 """Returns the number at approximately pn% (i.e. the nth percentile)
130 of the distribution in O(n log n) time. Not thread-safe;
131 does caching across multiple calls without an invocation to
132 add_number for perf reasons.
135 return self.get_median()
136 count = len(self.lowers) + len(self.highers)
137 self._create_sorted_copy_if_needed(count)
138 assert self.sorted_copy
139 index = round(count * (n / 100.0))
140 index = max(0, index)
141 index = min(count - 1, index)
142 return self.sorted_copy[index]
145 def gcd_floats(a: float, b: float) -> float:
146 """Returns the greatest common divisor of a and b."""
148 return gcd_floats(b, a)
153 return gcd_floats(b, a - math.floor(a / b) * b)
156 def gcd_float_sequence(lst: List[float]) -> float:
157 """Returns the greatest common divisor of a list of floats."""
159 raise ValueError("Need at least one number")
163 gcd = gcd_floats(lst[0], lst[1])
164 for i in range(2, len(lst)):
165 gcd = gcd_floats(gcd, lst[i])
169 def truncate_float(n: float, decimals: int = 2):
170 """Truncate a float to a particular number of decimals.
172 >>> truncate_float(3.1415927, 3)
176 assert 0 < decimals < 10
177 multiplier = 10**decimals
178 return int(n * multiplier) / multiplier
181 def percentage_to_multiplier(percent: float) -> float:
182 """Given a percentage (e.g. 155%), return a factor needed to scale a
183 number by that percentage.
185 >>> percentage_to_multiplier(155)
187 >>> percentage_to_multiplier(45)
189 >>> percentage_to_multiplier(-25)
192 multiplier = percent / 100
197 def multiplier_to_percent(multiplier: float) -> float:
198 """Convert a multiplicative factor into a percent change.
200 >>> multiplier_to_percent(0.75)
202 >>> multiplier_to_percent(1.0)
204 >>> multiplier_to_percent(1.99)
211 percent = 1.0 - percent
216 @functools.lru_cache(maxsize=1024, typed=True)
217 def is_prime(n: int) -> bool:
219 Returns True if n is prime and False otherwise. Obviously(?) very slow for
220 very large input numbers.
226 >>> is_prime(51602981)
229 if not isinstance(n, int):
230 raise TypeError("argument passed to is_prime is not of 'int' type")
238 # This is checked so that we can skip middle five numbers in below
240 if n % 2 == 0 or n % 3 == 0:
245 if n % i == 0 or n % (i + 2) == 0:
251 if __name__ == '__main__':