3 """Mathematical helpers."""
7 from heapq import heappop, heappush
8 from typing import List, Optional
11 class NumericPopulation(object):
12 """A numeric population with some statistics such as median, mean, pN,
15 >>> pop = NumericPopulation()
17 >>> pop.add_number(10)
27 >>> round(pop.get_stdev(), 2)
29 >>> pop.get_percentile(20)
31 >>> pop.get_percentile(60)
37 self.lowers, self.highers = [], []
39 self.sorted_copy: Optional[List[float]] = None
41 def add_number(self, number: float):
44 if not self.highers or number > self.highers[0]:
45 heappush(self.highers, number)
47 heappush(self.lowers, -number) # for lowers we need a max heap
48 self.aggregate += number
52 if len(self.lowers) - len(self.highers) > 1:
53 heappush(self.highers, -heappop(self.lowers))
54 elif len(self.highers) - len(self.lowers) > 1:
55 heappush(self.lowers, -heappop(self.highers))
57 def get_median(self) -> float:
58 """Returns the approximate median (p50) so far in O(1) time."""
60 if len(self.lowers) == len(self.highers):
61 return -self.lowers[0]
62 elif len(self.lowers) > len(self.highers):
63 return -self.lowers[0]
65 return self.highers[0]
67 def get_mean(self) -> float:
68 """Returns the mean (arithmetic mean) so far in O(1) time."""
70 count = len(self.lowers) + len(self.highers)
71 return self.aggregate / count
73 def get_stdev(self) -> float:
74 """Returns the stdev so far in O(n) time."""
76 mean = self.get_mean()
80 variance += (n - mean) ** 2
81 for n in self.highers:
82 variance += (n - mean) ** 2
83 count = len(self.lowers) + len(self.highers) - 1
84 return math.sqrt(variance) / count
86 def get_percentile(self, n: float) -> float:
87 """Returns the number at approximately pn% (i.e. the nth percentile)
88 of the distribution in O(n log n) time (expensive, requires a
89 complete sort). Not thread safe. Caching does across
90 multiple calls without an invocation to add_number.
94 return self.get_median()
95 count = len(self.lowers) + len(self.highers)
96 if self.sorted_copy is not None:
97 if count == len(self.sorted_copy):
98 index = round(count * (n / 100.0))
99 assert 0 <= index < count
100 return self.sorted_copy[index]
101 self.sorted_copy = [-x for x in self.lowers]
102 for x in self.highers:
103 self.sorted_copy.append(x)
104 self.sorted_copy = sorted(self.sorted_copy)
105 index = round(count * (n / 100.0))
106 assert 0 <= index < count
107 return self.sorted_copy[index]
110 def gcd_floats(a: float, b: float) -> float:
112 return gcd_floats(b, a)
117 return gcd_floats(b, a - math.floor(a / b) * b)
120 def gcd_float_sequence(lst: List[float]) -> float:
122 raise ValueError("Need at least one number")
126 gcd = gcd_floats(lst[0], lst[1])
127 for i in range(2, len(lst)):
128 gcd = gcd_floats(gcd, lst[i])
132 def truncate_float(n: float, decimals: int = 2):
134 Truncate a float to a particular number of decimals.
136 >>> truncate_float(3.1415927, 3)
140 assert 0 < decimals < 10
141 multiplier = 10**decimals
142 return int(n * multiplier) / multiplier
145 def percentage_to_multiplier(percent: float) -> float:
146 """Given a percentage (e.g. 155%), return a factor needed to scale a
147 number by that percentage.
149 >>> percentage_to_multiplier(155)
151 >>> percentage_to_multiplier(45)
153 >>> percentage_to_multiplier(-25)
157 multiplier = percent / 100
162 def multiplier_to_percent(multiplier: float) -> float:
163 """Convert a multiplicative factor into a percent change.
165 >>> multiplier_to_percent(0.75)
167 >>> multiplier_to_percent(1.0)
169 >>> multiplier_to_percent(1.99)
177 percent = 1.0 - percent
182 @functools.lru_cache(maxsize=1024, typed=True)
183 def is_prime(n: int) -> bool:
185 Returns True if n is prime and False otherwise. Obviously(?) very slow for
186 very large input numbers.
192 >>> is_prime(51602981)
196 if not isinstance(n, int):
197 raise TypeError("argument passed to is_prime is not of 'int' type")
205 # This is checked so that we can skip middle five numbers in below
207 if n % 2 == 0 or n % 3 == 0:
212 if n % i == 0 or n % (i + 2) == 0:
218 if __name__ == '__main__':