3 # © Copyright 2021-2022, Scott Gasch
5 """This module contains helper functions for dealing with Python lists."""
8 from collections import Counter
9 from itertools import chain, combinations, islice
10 from typing import Any, Iterator, List, MutableSequence, Sequence, Tuple
13 def shard(lst: List[Any], size: int) -> Iterator[Any]:
15 Shards (i.e. splits) a list into sublists of size `size` whcih,
16 together, contain all items in the original unsharded list.
19 lst: the original input list to shard
20 size: the ideal shard size (number of elements per shard)
23 A generator that yields successive shards.
27 If `len(lst)` is not an even multiple of `size` then the last
28 shard will not have `size` items in it. It will have
29 `len(lst) % size` items instead.
31 >>> for sublist in shard([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], 3):
32 ... [_ for _ in sublist]
38 for x in range(0, len(lst), size):
39 yield islice(lst, x, x + size)
42 def flatten(lst: List[Any]) -> List[Any]:
44 Flatten out a list. That is, for each item in list that contains
45 a list, remove the nested list and replace it with its items.
48 lst: the list to flatten
51 The flattened list. See example.
53 >>> flatten([ 1, [2, 3, 4, [5], 6], 7, [8, [9]]])
54 [1, 2, 3, 4, 5, 6, 7, 8, 9]
58 if isinstance(lst[0], list):
59 return flatten(lst[0]) + flatten(lst[1:])
60 return lst[:1] + flatten(lst[1:])
63 def prepend(item: Any, lst: List[Any]) -> List[Any]:
65 Prepend an item to a list. An alias for `list.insert(0, item)`.
66 The opposite of `list.append()`.
69 item: the item to be prepended
70 lst: the list on which to prepend
73 The list with item prepended.
75 >>> prepend('foo', ['bar', 'baz'])
82 def remove_list_if_one_element(lst: List[Any]) -> Any:
84 Remove the list and return the 0th element iff its length is one.
87 lst: the List to check
90 Either `lst` (if `len(lst) > 1`) or `lst[0]` (if `len(lst) == 1`).
92 >>> remove_list_if_one_element([1234])
95 >>> remove_list_if_one_element([1, 2, 3, 4])
104 def population_counts(lst: Sequence[Any]) -> Counter:
106 Return a population count mapping for the list (i.e. the keys are
107 list items and the values are the number of occurrances of that
108 list item in the original list). Note: this is used internally
109 to implement :meth:`most_common` and :meth:`least_common`.
112 lst: the list whose population should be counted
115 a `Counter` containing the population count of `lst` items.
117 >>> population_counts([1, 1, 1, 2, 2, 3, 3, 3, 4])
118 Counter({1: 3, 3: 3, 2: 2, 4: 1})
123 def most_common(lst: List[Any], *, count=1) -> Any:
125 Return the N most common item in the list.
128 lst: the list to find the most common item in
129 count: the number of most common items to return
132 The most common item in `lst`.
136 In the case of ties for most common item, which most common
137 item is returned is undefined.
139 >>> most_common([1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4])
142 >>> most_common([1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4], count=2)
146 p = population_counts(lst)
147 return remove_list_if_one_element([_[0] for _ in p.most_common()[0:count]])
150 def least_common(lst: List[Any], *, count=1) -> Any:
152 Return the N least common item in the list.
155 lst: the list to find the least common item in
156 count: the number of least common items to return
159 The least common item in `lst`
163 In the case of ties, which least common item is returned
166 >>> least_common([1, 1, 1, 2, 2, 3, 3, 3, 4])
169 >>> least_common([1, 1, 1, 2, 2, 3, 3, 3, 4], count=2)
172 p = population_counts(lst)
173 mc = p.most_common()[-count:]
175 return remove_list_if_one_element([_[0] for _ in mc])
178 def dedup_list(lst: List[Any]) -> List[Any]:
180 Remove duplicates from the list.
183 lst: the list to de-duplicate
186 The de-duplicated input list. That is, the same list with
187 all extra duplicate items removed. The list composed of
188 the set of unique items from the input `lst`
190 >>> dedup_list([1, 2, 1, 3, 3, 4, 2, 3, 4, 5, 1])
193 return list(set(lst))
196 def uniq(lst: List[Any]) -> List[Any]:
198 Alias for :meth:`dedup_list`.
200 return dedup_list(lst)
203 def contains_duplicates(lst: List[Any]) -> bool:
205 Does the list contain duplicate elements or not?
208 lst: the list to check for duplicates
211 True if the input `lst` contains duplicated items and
214 >>> lst = [1, 2, 1, 3, 3, 4, 4, 5, 6, 1, 3, 4]
215 >>> contains_duplicates(lst)
218 >>> contains_duplicates(dedup_list(lst))
229 def all_unique(lst: List[Any]) -> bool:
231 Inverted alias for :meth:`contains_duplicates`.
233 return not contains_duplicates(lst)
236 def transpose(lst: List[Any]) -> List[Any]:
238 Transpose a list of lists.
241 lst: the list of lists to be transposed.
244 The transposed result. See example.
246 >>> lst = [[1, 2], [3, 4], [5, 6]]
248 [[1, 3, 5], [2, 4, 6]]
251 transposed = zip(*lst)
252 return [list(_) for _ in transposed]
255 def ngrams(lst: Sequence[Any], n: int):
257 Return the ngrams in the sequence.
260 lst: the list in which to find ngrams
261 n: the size of each ngram to return
264 A generator that yields all ngrams of size `n` in `lst`.
266 >>> seq = 'encyclopedia'
267 >>> for _ in ngrams(seq, 3):
280 >>> seq = ['this', 'is', 'an', 'awesome', 'test']
281 >>> for _ in ngrams(seq, 3):
284 ['is', 'an', 'awesome']
285 ['an', 'awesome', 'test']
287 for i in range(len(lst) - n + 1):
291 def permute(seq: str):
293 Returns all permutations of a sequence.
296 seq: the sequence to permute
299 All permutations creatable by shuffling items in `seq`.
303 Takes O(N!) time, beware of large inputs.
305 >>> for x in permute('cat'):
314 yield from _permute(seq, "")
317 def _permute(seq: str, path: str):
318 """Internal helper to permute items recursively."""
323 for i in range(seq_len):
328 yield from _permute(cdr, path + car)
331 def shuffle(seq: MutableSequence[Any]) -> MutableSequence[Any]:
332 """Shuffles a sequence into a random order.
335 seq: a sequence to shuffle
338 The shuffled sequence.
341 >>> shuffle([1, 2, 3, 4, 5])
344 >>> shuffle('example')
347 if isinstance(seq, str):
348 from pyutils import string_utils
350 return string_utils.shuffle(seq)
356 def scramble(seq: MutableSequence[Any]) -> MutableSequence[Any]:
357 """An alias for :meth:`shuffle`."""
361 def binary_search(lst: Sequence[Any], target: Any) -> Tuple[bool, int]:
362 """Performs a binary search on lst (which must already be sorted).
365 lst: the (already sorted!) list in which to search
366 target: the item value to be found
369 A Tuple composed of a bool which indicates whether the
370 target was found and an int which indicates the index closest to
371 target whether it was found or not.
373 >>> a = [1, 4, 5, 6, 7, 9, 10, 11]
374 >>> binary_search(a, 4)
377 >>> binary_search(a, 12)
380 >>> binary_search(a, 3)
383 >>> binary_search(a, 2)
387 >>> binary_search(a, 4)
388 Traceback (most recent call last):
397 assert x >= last # This asserts iff the list isn't sorted
398 last = x # in ascending order.
399 return _binary_search(lst, target, 0, len(lst) - 1)
403 lst: Sequence[Any], target: Any, low: int, high: int
404 ) -> Tuple[bool, int]:
405 """Internal helper to perform a binary search recursively."""
407 mid = (high + low) // 2
408 if lst[mid] == target:
410 elif lst[mid] > target:
411 return _binary_search(lst, target, low, mid - 1)
413 return _binary_search(lst, target, mid + 1, high)
418 def powerset(seq: Sequence[Any]) -> Iterator[Sequence[Any]]:
419 """Returns the powerset of the items in the input sequence. That is,
420 return the set containing every set constructable using items from
421 seq (including the empty set and the "full" set: `seq` itself).
424 seq: the sequence whose items will be used to construct the powerset.
427 The powerset composed of all sets possible to create with items from `seq`.
428 See: https://en.wikipedia.org/wiki/Power_set.
430 >>> for x in powerset([1, 2, 3]):
441 return chain.from_iterable(combinations(seq, r) for r in range(len(seq) + 1))
444 if __name__ == '__main__':