X-Git-Url: https://wannabe.guru.org/gitweb/?a=blobdiff_plain;f=math_utils.py;h=270df8ccb3e8a1fcd5ea3f955438f800fcb69807;hb=3ca9b4d16433af8da5d2de7f4a2338b56b5428d5;hp=31610ba5fd2a0726b5b1f151dc87cc362b7389d4;hpb=14b42faebd598dc14cec6eaef77f06845e500b4b;p=python_utils.git diff --git a/math_utils.py b/math_utils.py index 31610ba..270df8c 100644 --- a/math_utils.py +++ b/math_utils.py @@ -1,11 +1,16 @@ #!/usr/bin/env python3 +# © Copyright 2021-2022, Scott Gasch + """Mathematical helpers.""" +import collections import functools import math from heapq import heappop, heappush -from typing import List, Optional +from typing import Dict, List, Optional, Tuple + +import dict_utils class NumericPopulation(object): @@ -25,12 +30,11 @@ class NumericPopulation(object): >>> pop.get_mean() 5.2 >>> round(pop.get_stdev(), 2) - 6.99 + 1.75 >>> pop.get_percentile(20) 3 >>> pop.get_percentile(60) 7 - """ def __init__(self): @@ -39,7 +43,8 @@ class NumericPopulation(object): self.sorted_copy: Optional[List[float]] = None def add_number(self, number: float): - """O(2 log2 n)""" + """Adds a number to the population. Runtime complexity of this + operation is :math:`O(2 log_2 n)`""" if not self.highers or number > self.highers[0]: heappush(self.highers, number) @@ -70,6 +75,17 @@ class NumericPopulation(object): count = len(self.lowers) + len(self.highers) return self.aggregate / count + def get_mode(self) -> Tuple[float, int]: + """Returns the mode (most common member in the population) + in O(n) time.""" + + count: Dict[float, int] = collections.defaultdict(int) + for n in self.lowers: + count[-n] += 1 + for n in self.highers: + count[n] += 1 + return dict_utils.item_with_max_value(count) + def get_stdev(self) -> float: """Returns the stdev so far in O(n) time.""" @@ -80,14 +96,14 @@ class NumericPopulation(object): variance += (n - mean) ** 2 for n in self.highers: variance += (n - mean) ** 2 - return math.sqrt(variance) + count = len(self.lowers) + len(self.highers) - 1 + return math.sqrt(variance) / count def get_percentile(self, n: float) -> float: """Returns the number at approximately pn% (i.e. the nth percentile) - of the distribution in O(n log n) time (expensive, requires a - complete sort). Not thread safe. Caching does across - multiple calls without an invocation to add_number. - + of the distribution in O(n log n) time. Not thread-safe; + does caching across multiple calls without an invocation to + add_number for perf reasons. """ if n == 50: return self.get_median() @@ -107,6 +123,7 @@ class NumericPopulation(object): def gcd_floats(a: float, b: float) -> float: + """Returns the greatest common divisor of a and b.""" if a < b: return gcd_floats(b, a) @@ -117,6 +134,7 @@ def gcd_floats(a: float, b: float) -> float: def gcd_float_sequence(lst: List[float]) -> float: + """Returns the greatest common divisor of a list of floats.""" if len(lst) <= 0: raise ValueError("Need at least one number") elif len(lst) == 1: @@ -129,8 +147,7 @@ def gcd_float_sequence(lst: List[float]) -> float: def truncate_float(n: float, decimals: int = 2): - """ - Truncate a float to a particular number of decimals. + """Truncate a float to a particular number of decimals. >>> truncate_float(3.1415927, 3) 3.141 @@ -151,7 +168,6 @@ def percentage_to_multiplier(percent: float) -> float: 1.45 >>> percentage_to_multiplier(-25) 0.75 - """ multiplier = percent / 100 multiplier += 1.0 @@ -167,7 +183,6 @@ def multiplier_to_percent(multiplier: float) -> float: 0.0 >>> multiplier_to_percent(1.99) 99.0 - """ percent = multiplier if percent > 0.0: @@ -190,7 +205,6 @@ def is_prime(n: int) -> bool: False >>> is_prime(51602981) True - """ if not isinstance(n, int): raise TypeError("argument passed to is_prime is not of 'int' type")