2 # -*- coding: utf-8 -*-
4 # © Copyright 2021-2022, Scott Gasch
7 This is a text-based histogram class. It creates output like this:
9 A Histogram helper class. Creates outputs like this::
11 [4..5): ▏ ( 0.16% n=1)
12 [5..6): ██▍ ( 0.64% n=4)
13 [6..7): ██████▏ ( 1.60% n=10)
14 [7..8): ████████████▍ ( 3.20% n=20)
15 [8..9): █████████████████████▊ ( 5.60% n=35)
16 [9..10): ████████████████████████████████▍ ( 8.32% n=52)
17 [10..11): ██████████████████████████████████████████▍ (10.88% n=68)
18 [11..12): █████████████████████████████████████████████████▉ (12.80% n=80)
19 [12..13): ████████████████████████████████████████████████████▉ (13.60% n=85)
20 [13..14): █████████████████████████████████████████████████▉ (12.80% n=80)
21 [14..15): ██████████████████████████████████████████▍ (10.88% n=68)
22 [15..16): ████████████████████████████████▍ ( 8.32% n=52)
23 [16..17): █████████████████████▊ ( 5.60% n=35)
24 [17..18): ████████████▍ ( 3.20% n=20)
25 [18..19): ██████▏ ( 1.60% n=10)
26 [19..20): ██▍ ( 0.64% n=4)
27 [20..21): ▏ ( 0.16% n=1)
28 --------------------------------------------------------------------------------
38 from dataclasses import dataclass
39 from typing import Dict, Generic, Iterable, List, Optional, Tuple, TypeVar
41 T = TypeVar("T", int, float)
48 """A collection of details about the internal histogram buckets."""
50 num_populated_buckets: int = 0
51 """Count of populated buckets"""
53 max_population: Optional[int] = None
54 """The max population in a bucket currently"""
56 last_bucket_start: Optional[int] = None
57 """The last bucket starting point"""
59 lowest_start: Optional[int] = None
60 """The lowest populated bucket's starting point"""
62 highest_end: Optional[int] = None
63 """The highest populated bucket's ending point"""
65 max_label_width: Optional[int] = None
66 """The maximum label width (for display purposes)"""
69 class SimpleHistogram(Generic[T]):
70 """A simple histogram."""
72 # Useful in defining wide open bottom/top bucket bounds:
73 POSITIVE_INFINITY = math.inf
74 NEGATIVE_INFINITY = -math.inf
76 def __init__(self, buckets: List[Tuple[Bound, Bound]]):
80 buckets: a list of [start..end] tuples that define the
81 buckets we are counting population in. See also
82 :meth:`n_evenly_spaced_buckets` to generate these
85 from pyutils.math_utils import NumericPopulation
87 self.buckets: Dict[Tuple[Bound, Bound], Count] = {}
88 for start_end in buckets:
89 if self._get_bucket(start_end[0]) is not None:
90 raise Exception("Buckets overlap?!")
91 self.buckets[start_end] = 0
92 self.sigma: float = 0.0
93 self.stats: NumericPopulation = NumericPopulation()
94 self.maximum: Optional[T] = None
95 self.minimum: Optional[T] = None
99 def n_evenly_spaced_buckets(
103 ) -> List[Tuple[int, int]]:
104 """A helper method for generating the buckets argument to
105 our c'tor provided that you want N evenly spaced buckets.
108 min_bound: the minimum possible value
109 max_bound: the maximum possible value
110 n: how many buckets to create
113 A list of bounds that define N evenly spaced buckets
115 ret: List[Tuple[int, int]] = []
116 stride = int((max_bound - min_bound) / n)
118 raise Exception("Min must be < Max")
119 imax = math.ceil(max_bound)
120 imin = math.floor(min_bound)
121 for bucket_start in range(imin, imax, stride):
122 ret.append((bucket_start, bucket_start + stride))
125 def _get_bucket(self, item: T) -> Optional[Tuple[int, int]]:
126 """Given an item, what bucket is it in?"""
127 for start_end in self.buckets:
128 if start_end[0] <= item < start_end[1]:
132 def add_item(self, item: T) -> bool:
133 """Adds a single item to the histogram (reculting in us incrementing
134 the population in the correct bucket.
137 item: the item to be added
140 True if the item was successfully added or False if the item
141 is not within the bounds established during class construction.
143 bucket = self._get_bucket(item)
147 self.buckets[bucket] += 1
149 self.stats.add_number(item)
150 if self.maximum is None or item > self.maximum:
152 if self.minimum is None or item < self.minimum:
156 def add_items(self, lst: Iterable[T]) -> bool:
157 """Adds a collection of items to the histogram and increments
158 the correct bucket's population for each item.
161 lst: An iterable of items to be added
164 True if all items were added successfully or False if any
165 item was not able to be added because it was not within the
166 bounds established at object construction.
170 all_true = all_true and self.add_item(item)
173 def _get_bucket_details(self, label_formatter: str) -> BucketDetails:
174 """Get the details about one bucket."""
175 details = BucketDetails()
176 for (start, end), pop in sorted(self.buckets.items(), key=lambda x: x[0]):
178 details.num_populated_buckets += 1
179 details.last_bucket_start = start
180 if details.max_population is None or pop > details.max_population:
181 details.max_population = pop
182 if details.lowest_start is None or start < details.lowest_start:
183 details.lowest_start = start
184 if details.highest_end is None or end > details.highest_end:
185 details.highest_end = end
186 label = f'[{label_formatter}..{label_formatter}): ' % (start, end)
187 label_width = len(label)
189 details.max_label_width is None
190 or label_width > details.max_label_width
192 details.max_label_width = label_width
195 def __repr__(self, *, width: int = 80, label_formatter: str = '%d') -> str:
196 """Returns a pretty (text) representation of the histogram and
197 some vital stats about the population in it (min, max, mean,
198 median, mode, stdev, etc...)
200 from pyutils.text_utils import BarGraphText, bar_graph_string
202 details = self._get_bucket_details(label_formatter)
204 if details.num_populated_buckets == 0:
206 assert details.max_label_width is not None
207 assert details.lowest_start is not None
208 assert details.highest_end is not None
209 assert details.max_population is not None
210 sigma_label = f'[{label_formatter}..{label_formatter}): ' % (
211 details.lowest_start,
214 if len(sigma_label) > details.max_label_width:
215 details.max_label_width = len(sigma_label)
216 bar_width = width - (details.max_label_width + 17)
218 for (start, end), pop in sorted(self.buckets.items(), key=lambda x: x[0]):
219 if start < details.lowest_start:
221 label = f'[{label_formatter}..{label_formatter}): ' % (start, end)
222 bar = bar_graph_string(
224 details.max_population,
225 text=BarGraphText.NONE,
230 txt += label.rjust(details.max_label_width)
232 txt += f"({pop/self.count*100.0:5.2f}% n={pop})\n"
233 if start == details.last_bucket_start:
235 txt += '-' * width + '\n'
236 txt += sigma_label.rjust(details.max_label_width)
237 txt += ' ' * (bar_width - 2)
238 txt += f' pop(Σn)={self.count}\n'
239 txt += ' ' * (bar_width + details.max_label_width - 2)
240 txt += f' mean(x̄)={self.stats.get_mean():.3f}\n'
241 txt += ' ' * (bar_width + details.max_label_width - 2)
242 txt += f' median(p50)={self.stats.get_median():.3f}\n'
243 txt += ' ' * (bar_width + details.max_label_width - 2)
244 txt += f' mode(Mo)={self.stats.get_mode()[0]:.3f}\n'
245 txt += ' ' * (bar_width + details.max_label_width - 2)
246 txt += f' stdev(σ)={self.stats.get_stdev():.3f}\n'