Add some useful stats to histogram.
[python_utils.git] / math_utils.py
index 37fcec5f6c557cdf1a66d39b671fd8d9438ba29c..28b8e6b3b6d9a407c6f263220e314d9d4acacbc0 100644 (file)
@@ -25,12 +25,14 @@ class RunningMedian(object):
 
     def __init__(self):
         self.lowers, self.highers = [], []
+        self.aggregate = 0.0
 
-    def add_number(self, number):
+    def add_number(self, number: float):
         if not self.highers or number > self.highers[0]:
             heappush(self.highers, number)
         else:
             heappush(self.lowers, -number)  # for lowers we need a max heap
+        self.aggregate += number
         self.rebalance()
 
     def rebalance(self):
@@ -39,7 +41,7 @@ class RunningMedian(object):
         elif len(self.highers) - len(self.lowers) > 1:
             heappush(self.lowers, -heappop(self.highers))
 
-    def get_median(self):
+    def get_median(self) -> float:
         if len(self.lowers) == len(self.highers):
             return (-self.lowers[0] + self.highers[0]) / 2
         elif len(self.lowers) > len(self.highers):
@@ -47,6 +49,19 @@ class RunningMedian(object):
         else:
             return self.highers[0]
 
+    def get_mean(self) -> float:
+        count = len(self.lowers) + len(self.highers)
+        return self.aggregate / count
+
+    def get_stdev(self) -> float:
+        mean = self.get_mean()
+        variance = 0.0
+        for n in self.lowers:
+            variance += (n - mean) ** 2
+        for n in self.highers:
+            variance += (n - mean) ** 2
+        return math.sqrt(variance)
+
 
 def gcd_floats(a: float, b: float) -> float:
     if a < b: