X-Git-Url: https://wannabe.guru.org/gitweb/?a=blobdiff_plain;f=math_utils.py;h=ed9c2f450f0fb0a9b121069a19d4a0b1fe7acfba;hb=e106f3e4aa1e038cd52ea966a961c2493ab56096;hp=270df8ccb3e8a1fcd5ea3f955438f800fcb69807;hpb=02302bbd9363facb59c4df2c1f4013087702cfa6;p=python_utils.git diff --git a/math_utils.py b/math_utils.py index 270df8c..ed9c2f4 100644 --- a/math_utils.py +++ b/math_utils.py @@ -41,6 +41,8 @@ class NumericPopulation(object): self.lowers, self.highers = [], [] self.aggregate = 0.0 self.sorted_copy: Optional[List[float]] = None + self.maximum = None + self.minimum = None def add_number(self, number: float): """Adds a number to the population. Runtime complexity of this @@ -52,6 +54,10 @@ class NumericPopulation(object): heappush(self.lowers, -number) # for lowers we need a max heap self.aggregate += number self._rebalance() + if not self.maximum or number > self.maximum: + self.maximum = number + if not self.minimum or number < self.minimum: + self.minimum = number def _rebalance(self): if len(self.lowers) - len(self.highers) > 1: @@ -96,9 +102,18 @@ class NumericPopulation(object): variance += (n - mean) ** 2 for n in self.highers: variance += (n - mean) ** 2 - count = len(self.lowers) + len(self.highers) - 1 + count = len(self.lowers) + len(self.highers) return math.sqrt(variance) / count + def _create_sorted_copy_if_needed(self, count: int): + if not self.sorted_copy or count != len(self.sorted_copy): + self.sorted_copy = [] + for x in self.lowers: + self.sorted_copy.append(-x) + for x in self.highers: + self.sorted_copy.append(x) + self.sorted_copy = sorted(self.sorted_copy) + def get_percentile(self, n: float) -> float: """Returns the number at approximately pn% (i.e. the nth percentile) of the distribution in O(n log n) time. Not thread-safe; @@ -108,17 +123,11 @@ class NumericPopulation(object): if n == 50: return self.get_median() count = len(self.lowers) + len(self.highers) - if self.sorted_copy is not None: - if count == len(self.sorted_copy): - index = round(count * (n / 100.0)) - assert 0 <= index < count - return self.sorted_copy[index] - self.sorted_copy = [-x for x in self.lowers] - for x in self.highers: - self.sorted_copy.append(x) - self.sorted_copy = sorted(self.sorted_copy) + self._create_sorted_copy_if_needed(count) + assert self.sorted_copy index = round(count * (n / 100.0)) - assert 0 <= index < count + index = max(0, index) + index = min(count - 1, index) return self.sorted_copy[index]