From a4b50bb62e2653d3d084c6c7e0574abb9277b8d7 Mon Sep 17 00:00:00 2001 From: Scott Gasch Date: Thu, 10 Feb 2022 08:13:08 -0800 Subject: [PATCH] Add some useful stats to histogram. --- histogram.py | 11 +++++++++-- math_utils.py | 19 +++++++++++++++++-- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/histogram.py b/histogram.py index 2657c0b..9c07df9 100644 --- a/histogram.py +++ b/histogram.py @@ -40,7 +40,7 @@ class SimpleHistogram(Generic[T]): raise Exception("Buckets overlap?!") self.buckets[start_end] = 0 self.sigma: float = 0.0 - self.median: RunningMedian = RunningMedian() + self.stats: RunningMedian = RunningMedian() self.maximum: Optional[T] = None self.minimum: Optional[T] = None self.count: Count = 0 @@ -74,7 +74,7 @@ class SimpleHistogram(Generic[T]): self.count += 1 self.buckets[bucket] += 1 self.sigma += item - self.median.add_number(item) + self.stats.add_number(item) if self.maximum is None or item > self.maximum: self.maximum = item if self.minimum is None or item < self.minimum: @@ -142,4 +142,11 @@ class SimpleHistogram(Generic[T]): txt += sigma_label.rjust(details.max_label_width) txt += ' ' * (bar_width - 2) txt += f'Σ=(100.00% n={self.count})\n' + txt += ' ' * (bar_width + details.max_label_width - 2) + txt += f'mean(μ)={self.stats.get_mean():.3f}\n' + txt += ' ' * (bar_width + details.max_label_width - 2) + txt += f'p50(η)={self.stats.get_median():.3f}\n' + txt += ' ' * (bar_width + details.max_label_width - 2) + txt += f'stdev(σ)={self.stats.get_stdev():.3f}\n' + txt += '\n' return txt diff --git a/math_utils.py b/math_utils.py index 37fcec5..28b8e6b 100644 --- a/math_utils.py +++ b/math_utils.py @@ -25,12 +25,14 @@ class RunningMedian(object): def __init__(self): self.lowers, self.highers = [], [] + self.aggregate = 0.0 - def add_number(self, number): + def add_number(self, number: float): if not self.highers or number > self.highers[0]: heappush(self.highers, number) else: heappush(self.lowers, -number) # for lowers we need a max heap + self.aggregate += number self.rebalance() def rebalance(self): @@ -39,7 +41,7 @@ class RunningMedian(object): elif len(self.highers) - len(self.lowers) > 1: heappush(self.lowers, -heappop(self.highers)) - def get_median(self): + def get_median(self) -> float: if len(self.lowers) == len(self.highers): return (-self.lowers[0] + self.highers[0]) / 2 elif len(self.lowers) > len(self.highers): @@ -47,6 +49,19 @@ class RunningMedian(object): else: return self.highers[0] + def get_mean(self) -> float: + count = len(self.lowers) + len(self.highers) + return self.aggregate / count + + def get_stdev(self) -> float: + mean = self.get_mean() + variance = 0.0 + for n in self.lowers: + variance += (n - mean) ** 2 + for n in self.highers: + variance += (n - mean) ** 2 + return math.sqrt(variance) + def gcd_floats(a: float, b: float) -> float: if a < b: -- 2.45.2