2 # -*- coding: utf-8 -*-
4 # © Copyright 2021-2022, Scott Gasch
6 """A text-based simple histogram helper class."""
9 from dataclasses import dataclass
10 from typing import Dict, Generic, Iterable, List, Optional, Tuple, TypeVar
12 T = TypeVar("T", int, float)
19 """A collection of details about the internal histogram buckets."""
21 num_populated_buckets: int = 0
22 """Count of populated buckets"""
24 max_population: Optional[int] = None
25 """The max population in a bucket currently"""
27 last_bucket_start: Optional[int] = None
28 """The last bucket starting point"""
30 lowest_start: Optional[int] = None
31 """The lowest populated bucket's starting point"""
33 highest_end: Optional[int] = None
34 """The highest populated bucket's ending point"""
36 max_label_width: Optional[int] = None
37 """The maximum label width (for display purposes)"""
40 class SimpleHistogram(Generic[T]):
41 """A simple histogram."""
43 # Useful in defining wide open bottom/top bucket bounds:
44 POSITIVE_INFINITY = math.inf
45 NEGATIVE_INFINITY = -math.inf
47 def __init__(self, buckets: List[Tuple[Bound, Bound]]):
51 buckets: a list of [start..end] tuples that define the
52 buckets we are counting population in. See also
53 :meth:`n_evenly_spaced_buckets` to generate these
56 from pyutils.math_utils import NumericPopulation
58 self.buckets: Dict[Tuple[Bound, Bound], Count] = {}
59 for start_end in buckets:
60 if self._get_bucket(start_end[0]) is not None:
61 raise Exception("Buckets overlap?!")
62 self.buckets[start_end] = 0
63 self.sigma: float = 0.0
64 self.stats: NumericPopulation = NumericPopulation()
65 self.maximum: Optional[T] = None
66 self.minimum: Optional[T] = None
70 def n_evenly_spaced_buckets(
74 ) -> List[Tuple[int, int]]:
75 """A helper method for generating the buckets argument to
76 our c'tor provided that you want N evenly spaced buckets.
79 min_bound: the minimum possible value
80 max_bound: the maximum possible value
81 n: how many buckets to create
84 A list of bounds that define N evenly spaced buckets
86 ret: List[Tuple[int, int]] = []
87 stride = int((max_bound - min_bound) / n)
89 raise Exception("Min must be < Max")
90 imax = math.ceil(max_bound)
91 imin = math.floor(min_bound)
92 for bucket_start in range(imin, imax, stride):
93 ret.append((bucket_start, bucket_start + stride))
96 def _get_bucket(self, item: T) -> Optional[Tuple[int, int]]:
97 """Given an item, what bucket is it in?"""
98 for start_end in self.buckets:
99 if start_end[0] <= item < start_end[1]:
103 def add_item(self, item: T) -> bool:
104 """Adds a single item to the histogram (reculting in us incrementing
105 the population in the correct bucket.
108 item: the item to be added
111 True if the item was successfully added or False if the item
112 is not within the bounds established during class construction.
114 bucket = self._get_bucket(item)
118 self.buckets[bucket] += 1
120 self.stats.add_number(item)
121 if self.maximum is None or item > self.maximum:
123 if self.minimum is None or item < self.minimum:
127 def add_items(self, lst: Iterable[T]) -> bool:
128 """Adds a collection of items to the histogram and increments
129 the correct bucket's population for each item.
132 lst: An iterable of items to be added
135 True if all items were added successfully or False if any
136 item was not able to be added because it was not within the
137 bounds established at object construction.
141 all_true = all_true and self.add_item(item)
144 def _get_bucket_details(self, label_formatter: str) -> BucketDetails:
145 """Get the details about one bucket."""
146 details = BucketDetails()
147 for (start, end), pop in sorted(self.buckets.items(), key=lambda x: x[0]):
149 details.num_populated_buckets += 1
150 details.last_bucket_start = start
151 if details.max_population is None or pop > details.max_population:
152 details.max_population = pop
153 if details.lowest_start is None or start < details.lowest_start:
154 details.lowest_start = start
155 if details.highest_end is None or end > details.highest_end:
156 details.highest_end = end
157 label = f'[{label_formatter}..{label_formatter}): ' % (start, end)
158 label_width = len(label)
160 details.max_label_width is None
161 or label_width > details.max_label_width
163 details.max_label_width = label_width
166 def __repr__(self, *, width: int = 80, label_formatter: str = '%d') -> str:
167 """Returns a pretty (text) representation of the histogram and
168 some vital stats about the population in it (min, max, mean,
169 median, mode, stdev, etc...)
171 from pyutils.text_utils import BarGraphText, bar_graph_string
173 details = self._get_bucket_details(label_formatter)
175 if details.num_populated_buckets == 0:
177 assert details.max_label_width is not None
178 assert details.lowest_start is not None
179 assert details.highest_end is not None
180 assert details.max_population is not None
181 sigma_label = f'[{label_formatter}..{label_formatter}): ' % (
182 details.lowest_start,
185 if len(sigma_label) > details.max_label_width:
186 details.max_label_width = len(sigma_label)
187 bar_width = width - (details.max_label_width + 17)
189 for (start, end), pop in sorted(self.buckets.items(), key=lambda x: x[0]):
190 if start < details.lowest_start:
192 label = f'[{label_formatter}..{label_formatter}): ' % (start, end)
193 bar = bar_graph_string(
195 details.max_population,
196 text=BarGraphText.NONE,
201 txt += label.rjust(details.max_label_width)
203 txt += f"({pop/self.count*100.0:5.2f}% n={pop})\n"
204 if start == details.last_bucket_start:
206 txt += '-' * width + '\n'
207 txt += sigma_label.rjust(details.max_label_width)
208 txt += ' ' * (bar_width - 2)
209 txt += f' pop(Σn)={self.count}\n'
210 txt += ' ' * (bar_width + details.max_label_width - 2)
211 txt += f' mean(x̄)={self.stats.get_mean():.3f}\n'
212 txt += ' ' * (bar_width + details.max_label_width - 2)
213 txt += f' median(p50)={self.stats.get_median():.3f}\n'
214 txt += ' ' * (bar_width + details.max_label_width - 2)
215 txt += f' mode(Mo)={self.stats.get_mode()[0]:.3f}\n'
216 txt += ' ' * (bar_width + details.max_label_width - 2)
217 txt += f' stdev(σ)={self.stats.get_stdev():.3f}\n'