2 # -*- coding: utf-8 -*-
4 # © Copyright 2021-2022, Scott Gasch
7 This is a text-based histogram class. It creates output like this:
9 A Histogram helper class. Creates outputs like this::
11 [5..6): ▏ ( 0.10% n=1)
12 [6..7): █▋ ( 0.49% n=5)
13 [7..8): █████▏ ( 1.46% n=15)
14 [8..9): ███████████▉ ( 3.42% n=35)
15 [9..10): ██████████████████████▏ ( 6.35% n=65)
16 [10..11): ██████████████████████████████████▌ ( 9.86% n=101)
17 [11..12): ██████████████████████████████████████████████▏ (13.18% n=135)
18 [12..13): ████████████████████████████████████████████████████▉ (15.14% n=155)
19 [13..14): ████████████████████████████████████████████████████▉ (15.14% n=155)
20 [14..15): ██████████████████████████████████████████████▏ (13.18% n=135)
21 [15..16): ██████████████████████████████████▌ ( 9.86% n=101)
22 [16..17): ██████████████████████▏ ( 6.35% n=65)
23 [17..18): ███████████▉ ( 3.42% n=35)
24 [18..19): █████▏ ( 1.46% n=15)
25 [19..20): █▋ ( 0.49% n=5)
26 [20..21): ▏ ( 0.10% n=1)
27 --------------------------------------------------------------------------------
36 from dataclasses import dataclass
37 from typing import Dict, Generic, Iterable, List, Optional, Tuple, TypeVar
39 T = TypeVar("T", int, float)
46 """A collection of details about the internal histogram buckets."""
48 num_populated_buckets: int = 0
49 """Count of populated buckets"""
51 max_population: Optional[int] = None
52 """The max population in a bucket currently"""
54 last_bucket_start: Optional[int] = None
55 """The last bucket starting point"""
57 lowest_start: Optional[int] = None
58 """The lowest populated bucket's starting point"""
60 highest_end: Optional[int] = None
61 """The highest populated bucket's ending point"""
63 max_label_width: Optional[int] = None
64 """The maximum label width (for display purposes)"""
67 class SimpleHistogram(Generic[T]):
68 """A simple histogram."""
70 # Useful in defining wide open bottom/top bucket bounds:
71 POSITIVE_INFINITY = math.inf
72 NEGATIVE_INFINITY = -math.inf
74 def __init__(self, buckets: List[Tuple[Bound, Bound]]):
78 buckets: a list of [start..end] tuples that define the
79 buckets we are counting population in. See also
80 :meth:`n_evenly_spaced_buckets` to generate these
83 from pyutils.math_utils import NumericPopulation
85 self.buckets: Dict[Tuple[Bound, Bound], Count] = {}
86 for start_end in buckets:
87 if self._get_bucket(start_end[0]) is not None:
88 raise Exception("Buckets overlap?!")
89 self.buckets[start_end] = 0
90 self.sigma: float = 0.0
91 self.stats: NumericPopulation = NumericPopulation()
92 self.maximum: Optional[T] = None
93 self.minimum: Optional[T] = None
97 def n_evenly_spaced_buckets(
101 ) -> List[Tuple[int, int]]:
102 """A helper method for generating the buckets argument to
103 our c'tor provided that you want N evenly spaced buckets.
106 min_bound: the minimum possible value
107 max_bound: the maximum possible value
108 n: how many buckets to create
111 A list of bounds that define N evenly spaced buckets
113 ret: List[Tuple[int, int]] = []
114 stride = int((max_bound - min_bound) / n)
116 raise Exception("Min must be < Max")
117 imax = math.ceil(max_bound)
118 imin = math.floor(min_bound)
119 for bucket_start in range(imin, imax, stride):
120 ret.append((bucket_start, bucket_start + stride))
123 def _get_bucket(self, item: T) -> Optional[Tuple[int, int]]:
124 """Given an item, what bucket is it in?"""
125 for start_end in self.buckets:
126 if start_end[0] <= item < start_end[1]:
130 def add_item(self, item: T) -> bool:
131 """Adds a single item to the histogram (reculting in us incrementing
132 the population in the correct bucket.
135 item: the item to be added
138 True if the item was successfully added or False if the item
139 is not within the bounds established during class construction.
141 bucket = self._get_bucket(item)
145 self.buckets[bucket] += 1
147 self.stats.add_number(item)
148 if self.maximum is None or item > self.maximum:
150 if self.minimum is None or item < self.minimum:
154 def add_items(self, lst: Iterable[T]) -> bool:
155 """Adds a collection of items to the histogram and increments
156 the correct bucket's population for each item.
159 lst: An iterable of items to be added
162 True if all items were added successfully or False if any
163 item was not able to be added because it was not within the
164 bounds established at object construction.
168 all_true = all_true and self.add_item(item)
171 def _get_bucket_details(self, label_formatter: str) -> BucketDetails:
172 """Get the details about one bucket."""
173 details = BucketDetails()
174 for (start, end), pop in sorted(self.buckets.items(), key=lambda x: x[0]):
176 details.num_populated_buckets += 1
177 details.last_bucket_start = start
178 if details.max_population is None or pop > details.max_population:
179 details.max_population = pop
180 if details.lowest_start is None or start < details.lowest_start:
181 details.lowest_start = start
182 if details.highest_end is None or end > details.highest_end:
183 details.highest_end = end
184 label = f"[{label_formatter}..{label_formatter}): " % (start, end)
185 label_width = len(label)
187 details.max_label_width is None
188 or label_width > details.max_label_width
190 details.max_label_width = label_width
193 def __repr__(self, *, width: int = 80, label_formatter: str = "%d") -> str:
194 """Returns a pretty (text) representation of the histogram and
195 some vital stats about the population in it (min, max, mean,
196 median, mode, stdev, etc...)
198 from pyutils.text_utils import BarGraphText, bar_graph_string
200 details = self._get_bucket_details(label_formatter)
202 if details.num_populated_buckets == 0:
204 assert details.max_label_width is not None
205 assert details.lowest_start is not None
206 assert details.highest_end is not None
207 assert details.max_population is not None
208 sigma_label = f"[{label_formatter}..{label_formatter}): " % (
209 details.lowest_start,
212 if len(sigma_label) > details.max_label_width:
213 details.max_label_width = len(sigma_label)
214 bar_width = width - (details.max_label_width + 17)
216 for (start, end), pop in sorted(self.buckets.items(), key=lambda x: x[0]):
217 if start < details.lowest_start:
219 label = f"[{label_formatter}..{label_formatter}): " % (start, end)
220 bar = bar_graph_string(
222 details.max_population,
223 text=BarGraphText.NONE,
228 txt += label.rjust(details.max_label_width)
230 txt += f"({pop/self.count*100.0:5.2f}% n={pop})\n"
231 if start == details.last_bucket_start:
233 txt += "-" * width + "\n"
234 txt += sigma_label.rjust(details.max_label_width)
235 txt += " " * (bar_width - 2)
236 txt += f" pop(Σn)={self.count}\n"
237 txt += " " * (bar_width + details.max_label_width - 2)
238 txt += f" mean(μ)={self.stats.get_mean():.3f}\n"
239 txt += " " * (bar_width + details.max_label_width - 2)
240 txt += f" median(p50)={self.stats.get_median():.3f}\n"
241 txt += " " * (bar_width + details.max_label_width - 2)
242 txt += f" mode(Mo)={self.stats.get_mode()[0]:.3f}\n"
243 txt += " " * (bar_width + details.max_label_width - 2)
244 txt += f" stdev(σ)={self.stats.get_stdev():.3f}\n"