projects
/
python_utils.git
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
c974b8c
)
Add some useful stats to histogram.
author
Scott Gasch
<
[email protected]
>
Thu, 10 Feb 2022 16:13:08 +0000
(08:13 -0800)
committer
Scott Gasch
<
[email protected]
>
Thu, 10 Feb 2022 16:13:08 +0000
(08:13 -0800)
histogram.py
patch
|
blob
|
history
math_utils.py
patch
|
blob
|
history
diff --git
a/histogram.py
b/histogram.py
index 2657c0bbd6c9f679808b80843b7c4c36c2f2eb1c..9c07df9b588aef626ecf8217a70ac4fd9676eb9d 100644
(file)
--- a/
histogram.py
+++ b/
histogram.py
@@
-40,7
+40,7
@@
class SimpleHistogram(Generic[T]):
raise Exception("Buckets overlap?!")
self.buckets[start_end] = 0
self.sigma: float = 0.0
raise Exception("Buckets overlap?!")
self.buckets[start_end] = 0
self.sigma: float = 0.0
- self.
median
: RunningMedian = RunningMedian()
+ self.
stats
: RunningMedian = RunningMedian()
self.maximum: Optional[T] = None
self.minimum: Optional[T] = None
self.count: Count = 0
self.maximum: Optional[T] = None
self.minimum: Optional[T] = None
self.count: Count = 0
@@
-74,7
+74,7
@@
class SimpleHistogram(Generic[T]):
self.count += 1
self.buckets[bucket] += 1
self.sigma += item
self.count += 1
self.buckets[bucket] += 1
self.sigma += item
- self.
median
.add_number(item)
+ self.
stats
.add_number(item)
if self.maximum is None or item > self.maximum:
self.maximum = item
if self.minimum is None or item < self.minimum:
if self.maximum is None or item > self.maximum:
self.maximum = item
if self.minimum is None or item < self.minimum:
@@
-142,4
+142,11
@@
class SimpleHistogram(Generic[T]):
txt += sigma_label.rjust(details.max_label_width)
txt += ' ' * (bar_width - 2)
txt += f'Σ=(100.00% n={self.count})\n'
txt += sigma_label.rjust(details.max_label_width)
txt += ' ' * (bar_width - 2)
txt += f'Σ=(100.00% n={self.count})\n'
+ txt += ' ' * (bar_width + details.max_label_width - 2)
+ txt += f'mean(μ)={self.stats.get_mean():.3f}\n'
+ txt += ' ' * (bar_width + details.max_label_width - 2)
+ txt += f'p50(η)={self.stats.get_median():.3f}\n'
+ txt += ' ' * (bar_width + details.max_label_width - 2)
+ txt += f'stdev(σ)={self.stats.get_stdev():.3f}\n'
+ txt += '\n'
return txt
return txt
diff --git
a/math_utils.py
b/math_utils.py
index 37fcec5f6c557cdf1a66d39b671fd8d9438ba29c..28b8e6b3b6d9a407c6f263220e314d9d4acacbc0 100644
(file)
--- a/
math_utils.py
+++ b/
math_utils.py
@@
-25,12
+25,14
@@
class RunningMedian(object):
def __init__(self):
self.lowers, self.highers = [], []
def __init__(self):
self.lowers, self.highers = [], []
+ self.aggregate = 0.0
- def add_number(self, number):
+ def add_number(self, number
: float
):
if not self.highers or number > self.highers[0]:
heappush(self.highers, number)
else:
heappush(self.lowers, -number) # for lowers we need a max heap
if not self.highers or number > self.highers[0]:
heappush(self.highers, number)
else:
heappush(self.lowers, -number) # for lowers we need a max heap
+ self.aggregate += number
self.rebalance()
def rebalance(self):
self.rebalance()
def rebalance(self):
@@
-39,7
+41,7
@@
class RunningMedian(object):
elif len(self.highers) - len(self.lowers) > 1:
heappush(self.lowers, -heappop(self.highers))
elif len(self.highers) - len(self.lowers) > 1:
heappush(self.lowers, -heappop(self.highers))
- def get_median(self):
+ def get_median(self)
-> float
:
if len(self.lowers) == len(self.highers):
return (-self.lowers[0] + self.highers[0]) / 2
elif len(self.lowers) > len(self.highers):
if len(self.lowers) == len(self.highers):
return (-self.lowers[0] + self.highers[0]) / 2
elif len(self.lowers) > len(self.highers):
@@
-47,6
+49,19
@@
class RunningMedian(object):
else:
return self.highers[0]
else:
return self.highers[0]
+ def get_mean(self) -> float:
+ count = len(self.lowers) + len(self.highers)
+ return self.aggregate / count
+
+ def get_stdev(self) -> float:
+ mean = self.get_mean()
+ variance = 0.0
+ for n in self.lowers:
+ variance += (n - mean) ** 2
+ for n in self.highers:
+ variance += (n - mean) ** 2
+ return math.sqrt(variance)
+
def gcd_floats(a: float, b: float) -> float:
if a < b:
def gcd_floats(a: float, b: float) -> float:
if a < b: