2 # -*- coding: utf-8 -*-
4 # © Copyright 2021-2023, Scott Gasch
7 This is a text-based histogram class. It creates output like this:
9 A Histogram helper class. Creates outputs like this::
11 [5..6): ▏ ( 0.10% n=1)
12 [6..7): █▋ ( 0.49% n=5)
13 [7..8): █████▏ ( 1.46% n=15)
14 [8..9): ███████████▉ ( 3.42% n=35)
15 [9..10): ██████████████████████▏ ( 6.35% n=65)
16 [10..11): ██████████████████████████████████▌ ( 9.86% n=101)
17 [11..12): ██████████████████████████████████████████████▏ (13.18% n=135)
18 [12..13): ████████████████████████████████████████████████████▉ (15.14% n=155)
19 [13..14): ████████████████████████████████████████████████████▉ (15.14% n=155)
20 [14..15): ██████████████████████████████████████████████▏ (13.18% n=135)
21 [15..16): ██████████████████████████████████▌ ( 9.86% n=101)
22 [16..17): ██████████████████████▏ ( 6.35% n=65)
23 [17..18): ███████████▉ ( 3.42% n=35)
24 [18..19): █████▏ ( 1.46% n=15)
25 [19..20): █▋ ( 0.49% n=5)
26 [20..21): ▏ ( 0.10% n=1)
27 --------------------------------------------------------------------------------
36 from dataclasses import dataclass
37 from typing import Dict, Generic, Iterable, List, Optional, Tuple, TypeVar
39 T = TypeVar("T", int, float)
46 """A collection of details about the internal histogram buckets."""
48 num_populated_buckets: int = 0
49 """Count of populated buckets"""
51 max_population: Optional[int] = None
52 """The max population in a bucket currently"""
54 last_bucket_start: Optional[int] = None
55 """The last bucket starting point"""
57 lowest_start: Optional[int] = None
58 """The lowest populated bucket's starting point"""
60 highest_end: Optional[int] = None
61 """The highest populated bucket's ending point"""
63 max_label_width: Optional[int] = None
64 """The maximum label width (for display purposes)"""
67 class SimpleHistogram(Generic[T]):
68 """A simple histogram."""
70 # Useful in defining wide open bottom/top bucket bounds:
71 POSITIVE_INFINITY = math.inf
72 NEGATIVE_INFINITY = -math.inf
74 def __init__(self, buckets: List[Tuple[Bound, Bound]]):
78 buckets: a list of [start..end] tuples that define the
79 buckets we are counting population in. See also
80 :meth:`n_evenly_spaced_buckets` to generate these
84 ValueError: buckets overlap
86 from pyutils.math_utils import NumericPopulation
88 self.buckets: Dict[Tuple[Bound, Bound], Count] = {}
89 for start_end in buckets:
90 if self._get_bucket(start_end[0]) is not None:
91 raise ValueError("Buckets overlap?!")
92 self.buckets[start_end] = 0
93 self.sigma: float = 0.0
94 self.stats: NumericPopulation = NumericPopulation()
95 self.maximum: Optional[T] = None
96 self.minimum: Optional[T] = None
100 def n_evenly_spaced_buckets(
104 ) -> List[Tuple[int, int]]:
105 """A helper method for generating the buckets argument to
106 our c'tor provided that you want N evenly spaced buckets.
109 min_bound: the minimum possible value
110 max_bound: the maximum possible value
111 n: how many buckets to create
114 A list of bounds that define N evenly spaced buckets
117 ValueError: min is not < max
119 ret: List[Tuple[int, int]] = []
120 stride = int((max_bound - min_bound) / n)
122 raise ValueError("Min must be < Max")
123 imax = math.ceil(max_bound)
124 imin = math.floor(min_bound)
125 for bucket_start in range(imin, imax, stride):
126 ret.append((bucket_start, bucket_start + stride))
129 def _get_bucket(self, item: T) -> Optional[Tuple[int, int]]:
130 """Given an item, what bucket is it in?"""
131 for start_end in self.buckets:
132 if start_end[0] <= item < start_end[1]:
136 def add_item(self, item: T) -> bool:
137 """Adds a single item to the histogram (reculting in us incrementing
138 the population in the correct bucket.
141 item: the item to be added
144 True if the item was successfully added or False if the item
145 is not within the bounds established during class construction.
147 bucket = self._get_bucket(item)
151 self.buckets[bucket] += 1
153 self.stats.add_number(item)
154 if self.maximum is None or item > self.maximum:
156 if self.minimum is None or item < self.minimum:
160 def add_items(self, lst: Iterable[T]) -> bool:
161 """Adds a collection of items to the histogram and increments
162 the correct bucket's population for each item.
165 lst: An iterable of items to be added
168 True if all items were added successfully or False if any
169 item was not able to be added because it was not within the
170 bounds established at object construction.
174 all_true = all_true and self.add_item(item)
177 def _get_bucket_details(self, label_formatter: str) -> BucketDetails:
178 """Get the details about one bucket."""
179 details = BucketDetails()
180 for (start, end), pop in sorted(self.buckets.items(), key=lambda x: x[0]):
182 details.num_populated_buckets += 1
183 details.last_bucket_start = start
184 if details.max_population is None or pop > details.max_population:
185 details.max_population = pop
186 if details.lowest_start is None or start < details.lowest_start:
187 details.lowest_start = start
188 if details.highest_end is None or end > details.highest_end:
189 details.highest_end = end
190 label = f"[{label_formatter}..{label_formatter}): " % (start, end)
191 label_width = len(label)
193 details.max_label_width is None
194 or label_width > details.max_label_width
196 details.max_label_width = label_width
199 def __repr__(self, *, width: int = 80, label_formatter: str = "%d") -> str:
200 """Returns a pretty (text) representation of the histogram and
201 some vital stats about the population in it (min, max, mean,
202 median, mode, stdev, etc...)
204 from pyutils.text_utils import BarGraphText, bar_graph_string
206 details = self._get_bucket_details(label_formatter)
208 if details.num_populated_buckets == 0:
210 assert details.max_label_width is not None
211 assert details.lowest_start is not None
212 assert details.highest_end is not None
213 assert details.max_population is not None
214 sigma_label = f"[{label_formatter}..{label_formatter}): " % (
215 details.lowest_start,
218 if len(sigma_label) > details.max_label_width:
219 details.max_label_width = len(sigma_label)
220 bar_width = width - (details.max_label_width + 17)
222 for (start, end), pop in sorted(self.buckets.items(), key=lambda x: x[0]):
223 if start < details.lowest_start:
225 label = f"[{label_formatter}..{label_formatter}): " % (start, end)
226 bar = bar_graph_string(
228 details.max_population,
229 text=BarGraphText.NONE,
234 txt += label.rjust(details.max_label_width)
236 txt += f"({pop/self.count*100.0:5.2f}% n={pop})\n"
237 if start == details.last_bucket_start:
239 txt += "-" * width + "\n"
240 txt += sigma_label.rjust(details.max_label_width)
241 txt += " " * (bar_width - 2)
242 txt += f" pop(Σn)={self.count}\n"
243 txt += " " * (bar_width + details.max_label_width - 2)
244 txt += f" mean(μ)={self.stats.get_mean():.3f}\n"
245 txt += " " * (bar_width + details.max_label_width - 2)
246 txt += f" median(p50)={self.stats.get_median():.3f}\n"
247 txt += " " * (bar_width + details.max_label_width - 2)
248 txt += f" mode(Mo)={self.stats.get_mode()[0]:.3f}\n"
249 txt += " " * (bar_width + details.max_label_width - 2)
250 txt += f" stdev(σ)={self.stats.get_stdev():.3f}\n"