3 # © Copyright 2021-2023, Scott Gasch
5 """This module contains helper functions for dealing with Python lists."""
8 from collections import Counter
9 from itertools import chain, combinations, islice
22 def shard(lst: List[Any], size: int) -> Iterator[Any]:
24 Shards (i.e. splits) a list into sublists of size `size` whcih,
25 together, contain all items in the original unsharded list.
28 lst: the original input list to shard
29 size: the ideal shard size (number of elements per shard)
32 A generator that yields successive shards.
36 If `len(lst)` is not an even multiple of `size` then the last
37 shard will not have `size` items in it. It will have
38 `len(lst) % size` items instead.
40 >>> for sublist in shard([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], 3):
41 ... [_ for _ in sublist]
47 for x in range(0, len(lst), size):
48 yield islice(lst, x, x + size)
51 def flatten(lst: List[Any]) -> List[Any]:
53 Flatten out a list. That is, for each item in list that contains
54 a list, remove the nested list and replace it with its items.
57 lst: the list to flatten
60 The flattened list. See example.
62 >>> flatten([ 1, [2, 3, 4, [5], 6], 7, [8, [9]]])
63 [1, 2, 3, 4, 5, 6, 7, 8, 9]
67 if isinstance(lst[0], list):
68 return flatten(lst[0]) + flatten(lst[1:])
69 return lst[:1] + flatten(lst[1:])
72 def prepend(item: Any, lst: List[Any]) -> List[Any]:
74 Prepend an item to a list. An alias for `list.insert(0, item)`.
75 The opposite of `list.append()`.
78 item: the item to be prepended
79 lst: the list on which to prepend
82 The list with item prepended.
84 >>> prepend('foo', ['bar', 'baz'])
91 def remove_list_if_one_element(lst: List[Any]) -> Any:
93 Remove the list and return the 0th element iff its length is one.
96 lst: the List to check
99 Either `lst` (if `len(lst) > 1`) or `lst[0]` (if `len(lst) == 1`).
101 >>> remove_list_if_one_element([1234])
104 >>> remove_list_if_one_element([1, 2, 3, 4])
113 def population_counts(lst: Sequence[Any]) -> Counter:
115 Return a population count mapping for the list (i.e. the keys are
116 list items and the values are the number of occurrances of that
117 list item in the original list). Note: this is used internally
118 to implement :meth:`most_common` and :meth:`least_common`.
121 lst: the list whose population should be counted
124 a `Counter` containing the population count of `lst` items.
126 >>> population_counts([1, 1, 1, 2, 2, 3, 3, 3, 4])
127 Counter({1: 3, 3: 3, 2: 2, 4: 1})
132 def most_common(lst: List[Any], *, count: int = 1) -> Any:
134 Return the N most common item in the list.
137 lst: the list to find the most common item in
138 count: the number of most common items to return
141 The most common item in `lst`.
145 In the case of ties for most common item, which most common
146 item is returned is undefined.
148 >>> most_common([1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4])
151 >>> most_common([1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4], count=2)
155 p = population_counts(lst)
156 return remove_list_if_one_element([_[0] for _ in p.most_common()[0:count]])
159 def least_common(lst: List[Any], *, count: int = 1) -> Any:
161 Return the N least common item in the list.
164 lst: the list to find the least common item in
165 count: the number of least common items to return
168 The least common item in `lst`
172 In the case of ties, which least common item is returned
175 >>> least_common([1, 1, 1, 2, 2, 3, 3, 3, 4])
178 >>> least_common([1, 1, 1, 2, 2, 3, 3, 3, 4], count=2)
181 p = population_counts(lst)
182 mc = p.most_common()[-count:]
184 return remove_list_if_one_element([_[0] for _ in mc])
187 def dedup_list(lst: List[Any]) -> List[Any]:
189 Remove duplicates from the list.
192 lst: the list to de-duplicate
195 The de-duplicated input list. That is, the same list with
196 all extra duplicate items removed. The list composed of
197 the set of unique items from the input `lst`
199 >>> dedup_list([1, 2, 1, 3, 3, 4, 2, 3, 4, 5, 1])
202 return list(set(lst))
205 def uniq(lst: List[Any]) -> List[Any]:
207 Alias for :meth:`dedup_list`.
209 return dedup_list(lst)
212 def contains_duplicates(lst: List[Any]) -> bool:
214 Does the list contain duplicate elements or not?
217 lst: the list to check for duplicates
220 True if the input `lst` contains duplicated items and
223 >>> lst = [1, 2, 1, 3, 3, 4, 4, 5, 6, 1, 3, 4]
224 >>> contains_duplicates(lst)
227 >>> contains_duplicates(dedup_list(lst))
238 def all_unique(lst: List[Any]) -> bool:
240 Inverted alias for :meth:`contains_duplicates`.
242 return not contains_duplicates(lst)
245 def transpose(lst: List[Any]) -> List[Any]:
247 Transpose a list of lists.
250 lst: the list of lists to be transposed.
253 The transposed result. See example.
255 >>> lst = [[1, 2], [3, 4], [5, 6]]
257 [[1, 3, 5], [2, 4, 6]]
260 transposed = zip(*lst)
261 return [list(_) for _ in transposed]
267 def ngrams(lst: Sequence[T], n: int) -> Generator[Sequence[T], T, None]:
269 Return the ngrams in the sequence.
272 lst: the list in which to find ngrams
273 n: the size of each ngram to return
276 A generator that yields all ngrams of size `n` in `lst`.
278 >>> seq = 'encyclopedia'
279 >>> for _ in ngrams(seq, 3):
292 >>> seq = ['this', 'is', 'an', 'awesome', 'test']
293 >>> for _ in ngrams(seq, 3):
296 ['is', 'an', 'awesome']
297 ['an', 'awesome', 'test']
299 for i in range(len(lst) - n + 1):
303 def permute(seq: str) -> Generator[str, str, None]:
305 Returns all permutations of a sequence.
308 seq: the sequence to permute
311 All permutations creatable by shuffling items in `seq`.
315 Takes O(N!) time, beware of large inputs.
317 >>> for x in permute('cat'):
326 yield from _permute(seq, "")
329 def _permute(seq: str, path: str) -> Generator[str, str, None]:
330 """Internal helper to permute items recursively."""
335 for i in range(seq_len):
340 yield from _permute(cdr, path + car)
343 def shuffle(seq: MutableSequence[Any]) -> MutableSequence[Any]:
344 """Shuffles a sequence into a random order.
347 seq: a sequence to shuffle
350 The shuffled sequence.
353 >>> shuffle([1, 2, 3, 4, 5])
356 >>> shuffle('example')
359 if isinstance(seq, str):
360 from pyutils import string_utils
362 return string_utils.shuffle(seq)
368 def scramble(seq: MutableSequence[Any]) -> MutableSequence[Any]:
369 """An alias for :meth:`shuffle`."""
373 def binary_search(lst: Sequence[Any], target: Any) -> Tuple[bool, int]:
374 """Performs a binary search on lst (which must already be sorted).
377 lst: the (already sorted!) list in which to search
378 target: the item value to be found
381 A Tuple composed of a bool which indicates whether the
382 target was found and an int which indicates the index closest to
383 target whether it was found or not.
385 >>> a = [1, 4, 5, 6, 7, 9, 10, 11]
386 >>> binary_search(a, 4)
389 >>> binary_search(a, 12)
392 >>> binary_search(a, 3)
395 >>> binary_search(a, 2)
399 >>> binary_search(a, 4)
400 Traceback (most recent call last):
409 assert x >= last # This asserts iff the list isn't sorted
410 last = x # in ascending order.
411 return _binary_search(lst, target, 0, len(lst) - 1)
415 lst: Sequence[Any], target: Any, low: int, high: int
416 ) -> Tuple[bool, int]:
417 """Internal helper to perform a binary search recursively."""
419 mid = (high + low) // 2
420 if lst[mid] == target:
422 elif lst[mid] > target:
423 return _binary_search(lst, target, low, mid - 1)
425 return _binary_search(lst, target, mid + 1, high)
430 def powerset(seq: Sequence[Any]) -> Iterator[Sequence[Any]]:
431 """Returns the powerset of the items in the input sequence. That is,
432 return the set containing every set constructable using items from
433 seq (including the empty set and the "full" set: `seq` itself).
436 seq: the sequence whose items will be used to construct the powerset.
439 The powerset composed of all sets possible to create with items from `seq`.
440 See: https://en.wikipedia.org/wiki/Power_set.
442 >>> for x in powerset([1, 2, 3]):
453 return chain.from_iterable(combinations(seq, r) for r in range(len(seq) + 1))
456 if __name__ == "__main__":