math_utils.py

   1 #!/usr/bin/env python3
   2
   3 # © Copyright 2021-2022, Scott Gasch
   4
   5 """Mathematical helpers."""
   6
   7 import collections
   8 import functools
   9 import math
  10 from heapq import heappop, heappush
  11 from typing import Dict, List, Optional, Tuple
  12
  13 import dict_utils
  14
  15
  16 class NumericPopulation(object):
  17     """A numeric population with some statistics such as median, mean, pN,
  18     stdev, etc...
  19
  20     >>> pop = NumericPopulation()
  21     >>> pop.add_number(1)
  22     >>> pop.add_number(10)
  23     >>> pop.add_number(3)
  24     >>> pop.get_median()
  25     3
  26     >>> pop.add_number(7)
  27     >>> pop.add_number(5)
  28     >>> pop.get_median()
  29     5
  30     >>> pop.get_mean()
  31     5.2
  32     >>> round(pop.get_stdev(), 2)
  33     1.75
  34     >>> pop.get_percentile(20)
  35     3
  36     >>> pop.get_percentile(60)
  37     7
  38     """
  39
  40     def __init__(self):
  41         self.lowers, self.highers = [], []
  42         self.aggregate = 0.0
  43         self.sorted_copy: Optional[List[float]] = None
  44
  45     def add_number(self, number: float):
  46         """Adds a number to the population.  Runtime complexity of this
  47         operation is :math:`O(2 log_2 n)`"""
  48
  49         if not self.highers or number > self.highers[0]:
  50             heappush(self.highers, number)
  51         else:
  52             heappush(self.lowers, -number)  # for lowers we need a max heap
  53         self.aggregate += number
  54         self._rebalance()
  55
  56     def _rebalance(self):
  57         if len(self.lowers) - len(self.highers) > 1:
  58             heappush(self.highers, -heappop(self.lowers))
  59         elif len(self.highers) - len(self.lowers) > 1:
  60             heappush(self.lowers, -heappop(self.highers))
  61
  62     def get_median(self) -> float:
  63         """Returns the approximate median (p50) so far in O(1) time."""
  64
  65         if len(self.lowers) == len(self.highers):
  66             return -self.lowers[0]
  67         elif len(self.lowers) > len(self.highers):
  68             return -self.lowers[0]
  69         else:
  70             return self.highers[0]
  71
  72     def get_mean(self) -> float:
  73         """Returns the mean (arithmetic mean) so far in O(1) time."""
  74
  75         count = len(self.lowers) + len(self.highers)
  76         return self.aggregate / count
  77
  78     def get_mode(self) -> Tuple[float, int]:
  79         """Returns the mode (most common member in the population)
  80         in O(n) time."""
  81
  82         count: Dict[float, int] = collections.defaultdict(int)
  83         for n in self.lowers:
  84             count[-n] += 1
  85         for n in self.highers:
  86             count[n] += 1
  87         return dict_utils.item_with_max_value(count)
  88
  89     def get_stdev(self) -> float:
  90         """Returns the stdev so far in O(n) time."""
  91
  92         mean = self.get_mean()
  93         variance = 0.0
  94         for n in self.lowers:
  95             n = -n
  96             variance += (n - mean) ** 2
  97         for n in self.highers:
  98             variance += (n - mean) ** 2
  99         count = len(self.lowers) + len(self.highers) - 1
 100         return math.sqrt(variance) / count
 101
 102     def get_percentile(self, n: float) -> float:
 103         """Returns the number at approximately pn% (i.e. the nth percentile)
 104         of the distribution in O(n log n) time.  Not thread-safe;
 105         does caching across multiple calls without an invocation to
 106         add_number for perf reasons.
 107         """
 108         if n == 50:
 109             return self.get_median()
 110         count = len(self.lowers) + len(self.highers)
 111         if self.sorted_copy is not None:
 112             if count == len(self.sorted_copy):
 113                 index = round(count * (n / 100.0))
 114                 assert 0 <= index < count
 115                 return self.sorted_copy[index]
 116         self.sorted_copy = [-x for x in self.lowers]
 117         for x in self.highers:
 118             self.sorted_copy.append(x)
 119         self.sorted_copy = sorted(self.sorted_copy)
 120         index = round(count * (n / 100.0))
 121         assert 0 <= index < count
 122         return self.sorted_copy[index]
 123
 124
 125 def gcd_floats(a: float, b: float) -> float:
 126     """Returns the greatest common divisor of a and b."""
 127     if a < b:
 128         return gcd_floats(b, a)
 129
 130     # base case
 131     if abs(b) < 0.001:
 132         return a
 133     return gcd_floats(b, a - math.floor(a / b) * b)
 134
 135
 136 def gcd_float_sequence(lst: List[float]) -> float:
 137     """Returns the greatest common divisor of a list of floats."""
 138     if len(lst) <= 0:
 139         raise ValueError("Need at least one number")
 140     elif len(lst) == 1:
 141         return lst[0]
 142     assert len(lst) >= 2
 143     gcd = gcd_floats(lst[0], lst[1])
 144     for i in range(2, len(lst)):
 145         gcd = gcd_floats(gcd, lst[i])
 146     return gcd
 147
 148
 149 def truncate_float(n: float, decimals: int = 2):
 150     """Truncate a float to a particular number of decimals.
 151
 152     >>> truncate_float(3.1415927, 3)
 153     3.141
 154
 155     """
 156     assert 0 < decimals < 10
 157     multiplier = 10**decimals
 158     return int(n * multiplier) / multiplier
 159
 160
 161 def percentage_to_multiplier(percent: float) -> float:
 162     """Given a percentage (e.g. 155%), return a factor needed to scale a
 163     number by that percentage.
 164
 165     >>> percentage_to_multiplier(155)
 166     2.55
 167     >>> percentage_to_multiplier(45)
 168     1.45
 169     >>> percentage_to_multiplier(-25)
 170     0.75
 171     """
 172     multiplier = percent / 100
 173     multiplier += 1.0
 174     return multiplier
 175
 176
 177 def multiplier_to_percent(multiplier: float) -> float:
 178     """Convert a multiplicative factor into a percent change.
 179
 180     >>> multiplier_to_percent(0.75)
 181     -25.0
 182     >>> multiplier_to_percent(1.0)
 183     0.0
 184     >>> multiplier_to_percent(1.99)
 185     99.0
 186     """
 187     percent = multiplier
 188     if percent > 0.0:
 189         percent -= 1.0
 190     else:
 191         percent = 1.0 - percent
 192     percent *= 100.0
 193     return percent
 194
 195
 196 @functools.lru_cache(maxsize=1024, typed=True)
 197 def is_prime(n: int) -> bool:
 198     """
 199     Returns True if n is prime and False otherwise.  Obviously(?) very slow for
 200     very large input numbers.
 201
 202     >>> is_prime(13)
 203     True
 204     >>> is_prime(22)
 205     False
 206     >>> is_prime(51602981)
 207     True
 208     """
 209     if not isinstance(n, int):
 210         raise TypeError("argument passed to is_prime is not of 'int' type")
 211
 212     # Corner cases
 213     if n <= 1:
 214         return False
 215     if n <= 3:
 216         return True
 217
 218     # This is checked so that we can skip middle five numbers in below
 219     # loop
 220     if n % 2 == 0 or n % 3 == 0:
 221         return False
 222
 223     i = 5
 224     while i * i <= n:
 225         if n % i == 0 or n % (i + 2) == 0:
 226             return False
 227         i = i + 6
 228     return True
 229
 230
 231 if __name__ == '__main__':
 232     import doctest
 233
 234     doctest.testmod()