Since this thing is on the innerwebs I suppose it should have a
[python_utils.git] / histogram.py
index 2657c0bbd6c9f679808b80843b7c4c36c2f2eb1c..52a0d1fad558a493c6e303abdd07a6933053a045 100644 (file)
@@ -1,6 +1,8 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
+# © Copyright 2021-2022, Scott Gasch
+
 """A text-based simple histogram helper class."""
 
 import math
@@ -32,7 +34,7 @@ class SimpleHistogram(Generic[T]):
     NEGATIVE_INFINITY = -math.inf
 
     def __init__(self, buckets: List[Tuple[Bound, Bound]]):
-        from math_utils import RunningMedian
+        from math_utils import NumericPopulation
 
         self.buckets: Dict[Tuple[Bound, Bound], Count] = {}
         for start_end in buckets:
@@ -40,7 +42,7 @@ class SimpleHistogram(Generic[T]):
                 raise Exception("Buckets overlap?!")
             self.buckets[start_end] = 0
         self.sigma: float = 0.0
-        self.median: RunningMedian = RunningMedian()
+        self.stats: NumericPopulation = NumericPopulation()
         self.maximum: Optional[T] = None
         self.minimum: Optional[T] = None
         self.count: Count = 0
@@ -74,7 +76,7 @@ class SimpleHistogram(Generic[T]):
         self.count += 1
         self.buckets[bucket] += 1
         self.sigma += item
-        self.median.add_number(item)
+        self.stats.add_number(item)
         if self.maximum is None or item > self.maximum:
             self.maximum = item
         if self.minimum is None or item < self.minimum:
@@ -122,9 +124,11 @@ class SimpleHistogram(Generic[T]):
         )
         if len(sigma_label) > details.max_label_width:
             details.max_label_width = len(sigma_label)
-        bar_width = width - (details.max_label_width + 16)
+        bar_width = width - (details.max_label_width + 17)
 
         for (start, end), pop in sorted(self.buckets.items(), key=lambda x: x[0]):
+            if start < details.lowest_start:
+                continue
             label = f'[{label_formatter}..{label_formatter}): ' % (start, end)
             bar = bar_graph(
                 (pop / details.max_population),
@@ -141,5 +145,14 @@ class SimpleHistogram(Generic[T]):
         txt += '-' * width + '\n'
         txt += sigma_label.rjust(details.max_label_width)
         txt += ' ' * (bar_width - 2)
-        txt += f'Σ=(100.00% n={self.count})\n'
+        txt += f'     pop(Σn)={self.count}\n'
+        txt += ' ' * (bar_width + details.max_label_width - 2)
+        txt += f'     mean(x̄)={self.stats.get_mean():.3f}\n'
+        txt += ' ' * (bar_width + details.max_label_width - 2)
+        txt += f' median(p50)={self.stats.get_median():.3f}\n'
+        txt += ' ' * (bar_width + details.max_label_width - 2)
+        txt += f'    mode(Mo)={self.stats.get_mode()[0]:.3f}\n'
+        txt += ' ' * (bar_width + details.max_label_width - 2)
+        txt += f'    stdev(σ)={self.stats.get_stdev():.3f}\n'
+        txt += '\n'
         return txt