X-Git-Url: https://wannabe.guru.org/gitweb/?a=blobdiff_plain;f=src%2Fpyutils%2Flist_utils.py;h=f0a4e826a28f27e57ee06acb9668499853dbd512;hb=HEAD;hp=c67db7d19cdeffbc35f8e8c1b0d1a000069574a7;hpb=69566c003b4f1c3a4905f37d3735d7921502d14a;p=pyutils.git diff --git a/src/pyutils/list_utils.py b/src/pyutils/list_utils.py index c67db7d..f0a4e82 100644 --- a/src/pyutils/list_utils.py +++ b/src/pyutils/list_utils.py @@ -1,18 +1,41 @@ #!/usr/bin/env python3 -# © Copyright 2021-2022, Scott Gasch +# © Copyright 2021-2023, Scott Gasch -"""Some useful(?) utilities for dealing with Lists.""" +"""This module contains helper functions for dealing with Python lists.""" import random from collections import Counter from itertools import chain, combinations, islice -from typing import Any, Iterator, List, MutableSequence, Sequence, Tuple +from typing import ( + Any, + Generator, + Iterator, + List, + MutableSequence, + Sequence, + Tuple, + TypeVar, +) def shard(lst: List[Any], size: int) -> Iterator[Any]: """ - Yield successive size-sized shards from lst. + Shards (i.e. splits) a list into sublists of size `size` whcih, + together, contain all items in the original unsharded list. + + Args: + lst: the original input list to shard + size: the ideal shard size (number of elements per shard) + + Returns: + A generator that yields successive shards. + + .. note:: + + If `len(lst)` is not an even multiple of `size` then the last + shard will not have `size` items in it. It will have + `len(lst) % size` items instead. >>> for sublist in shard([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], 3): ... [_ for _ in sublist] @@ -20,7 +43,6 @@ def shard(lst: List[Any], size: int) -> Iterator[Any]: [4, 5, 6] [7, 8, 9] [10, 11, 12] - """ for x in range(0, len(lst), size): yield islice(lst, x, x + size) @@ -28,11 +50,17 @@ def shard(lst: List[Any], size: int) -> Iterator[Any]: def flatten(lst: List[Any]) -> List[Any]: """ - Flatten out a list: + Flatten out a list. That is, for each item in list that contains + a list, remove the nested list and replace it with its items. + + Args: + lst: the list to flatten + + Returns: + The flattened list. See example. >>> flatten([ 1, [2, 3, 4, [5], 6], 7, [8, [9]]]) [1, 2, 3, 4, 5, 6, 7, 8, 9] - """ if len(lst) == 0: return lst @@ -43,11 +71,18 @@ def flatten(lst: List[Any]) -> List[Any]: def prepend(item: Any, lst: List[Any]) -> List[Any]: """ - Prepend an item to a list. + Prepend an item to a list. An alias for `list.insert(0, item)`. + The opposite of `list.append()`. + + Args: + item: the item to be prepended + lst: the list on which to prepend + + Returns: + The list with item prepended. >>> prepend('foo', ['bar', 'baz']) ['foo', 'bar', 'baz'] - """ lst.insert(0, item) return lst @@ -57,12 +92,17 @@ def remove_list_if_one_element(lst: List[Any]) -> Any: """ Remove the list and return the 0th element iff its length is one. + Args: + lst: the List to check + + Returns: + Either `lst` (if `len(lst) > 1`) or `lst[0]` (if `len(lst) == 1`). + >>> remove_list_if_one_element([1234]) 1234 >>> remove_list_if_one_element([1, 2, 3, 4]) [1, 2, 3, 4] - """ if len(lst) == 1: return lst[0] @@ -74,20 +114,36 @@ def population_counts(lst: Sequence[Any]) -> Counter: """ Return a population count mapping for the list (i.e. the keys are list items and the values are the number of occurrances of that - list item in the original list. + list item in the original list). Note: this is used internally + to implement :meth:`most_common` and :meth:`least_common`. + + Args: + lst: the list whose population should be counted + + Returns: + a `Counter` containing the population count of `lst` items. >>> population_counts([1, 1, 1, 2, 2, 3, 3, 3, 4]) Counter({1: 3, 3: 3, 2: 2, 4: 1}) - """ return Counter(lst) -def most_common(lst: List[Any], *, count=1) -> Any: - +def most_common(lst: List[Any], *, count: int = 1) -> Any: """ - Return the most common item in the list. In the case of ties, - which most common item is returned will be random. + Return the N most common item in the list. + + Args: + lst: the list to find the most common item in + count: the number of most common items to return + + Returns: + The most common item in `lst`. + + .. warning:: + + In the case of ties for most common item, which most common + item is returned is undefined. >>> most_common([1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4]) 3 @@ -100,17 +156,27 @@ def most_common(lst: List[Any], *, count=1) -> Any: return remove_list_if_one_element([_[0] for _ in p.most_common()[0:count]]) -def least_common(lst: List[Any], *, count=1) -> Any: +def least_common(lst: List[Any], *, count: int = 1) -> Any: """ - Return the least common item in the list. In the case of - ties, which least common item is returned will be random. + Return the N least common item in the list. + + Args: + lst: the list to find the least common item in + count: the number of least common items to return + + Returns: + The least common item in `lst` + + .. warning:: + + In the case of ties, which least common item is returned + is undefined. >>> least_common([1, 1, 1, 2, 2, 3, 3, 3, 4]) 4 >>> least_common([1, 1, 1, 2, 2, 3, 3, 3, 4], count=2) [4, 2] - """ p = population_counts(lst) mc = p.most_common()[-count:] @@ -120,25 +186,39 @@ def least_common(lst: List[Any], *, count=1) -> Any: def dedup_list(lst: List[Any]) -> List[Any]: """ - Remove duplicates from the list performantly. + Remove duplicates from the list. + + Args: + lst: the list to de-duplicate + + Returns: + The de-duplicated input list. That is, the same list with + all extra duplicate items removed. The list composed of + the set of unique items from the input `lst` >>> dedup_list([1, 2, 1, 3, 3, 4, 2, 3, 4, 5, 1]) [1, 2, 3, 4, 5] - """ return list(set(lst)) def uniq(lst: List[Any]) -> List[Any]: """ - Alias for dedup_list. + Alias for :meth:`dedup_list`. """ return dedup_list(lst) def contains_duplicates(lst: List[Any]) -> bool: """ - Does the list contian duplicate elements or not? + Does the list contain duplicate elements or not? + + Args: + lst: the list to check for duplicates + + Returns: + True if the input `lst` contains duplicated items and + False otherwise. >>> lst = [1, 2, 1, 3, 3, 4, 4, 5, 6, 1, 3, 4] >>> contains_duplicates(lst) @@ -146,7 +226,6 @@ def contains_duplicates(lst: List[Any]) -> bool: >>> contains_duplicates(dedup_list(lst)) False - """ seen = set() for _ in lst: @@ -158,7 +237,7 @@ def contains_duplicates(lst: List[Any]) -> bool: def all_unique(lst: List[Any]) -> bool: """ - Inverted alias for contains_duplicates. + Inverted alias for :meth:`contains_duplicates`. """ return not contains_duplicates(lst) @@ -167,6 +246,12 @@ def transpose(lst: List[Any]) -> List[Any]: """ Transpose a list of lists. + Args: + lst: the list of lists to be transposed. + + Returns: + The transposed result. See example. + >>> lst = [[1, 2], [3, 4], [5, 6]] >>> transpose(lst) [[1, 3, 5], [2, 4, 6]] @@ -176,10 +261,20 @@ def transpose(lst: List[Any]) -> List[Any]: return [list(_) for _ in transposed] -def ngrams(lst: Sequence[Any], n): +T = TypeVar('T') + + +def ngrams(lst: Sequence[T], n: int) -> Generator[Sequence[T], T, None]: """ Return the ngrams in the sequence. + Args: + lst: the list in which to find ngrams + n: the size of each ngram to return + + Returns: + A generator that yields all ngrams of size `n` in `lst`. + >>> seq = 'encyclopedia' >>> for _ in ngrams(seq, 3): ... _ @@ -205,9 +300,19 @@ def ngrams(lst: Sequence[Any], n): yield lst[i : i + n] -def permute(seq: str): +def permute(seq: str) -> Generator[str, str, None]: """ - Returns all permutations of a sequence; takes O(N!) time. + Returns all permutations of a sequence. + + Args: + seq: the sequence to permute + + Returns: + All permutations creatable by shuffling items in `seq`. + + .. warning:: + + Takes O(N!) time, beware of large inputs. >>> for x in permute('cat'): ... print(x) @@ -217,12 +322,12 @@ def permute(seq: str): atc tca tac - """ yield from _permute(seq, "") -def _permute(seq: str, path: str): +def _permute(seq: str, path: str) -> Generator[str, str, None]: + """Internal helper to permute items recursively.""" seq_len = len(seq) if seq_len == 0: yield path @@ -238,13 +343,18 @@ def _permute(seq: str, path: str): def shuffle(seq: MutableSequence[Any]) -> MutableSequence[Any]: """Shuffles a sequence into a random order. + Args: + seq: a sequence to shuffle + + Returns: + The shuffled sequence. + >>> random.seed(22) >>> shuffle([1, 2, 3, 4, 5]) [3, 4, 1, 5, 2] >>> shuffle('example') 'empaelx' - """ if isinstance(seq, str): from pyutils import string_utils @@ -256,14 +366,21 @@ def shuffle(seq: MutableSequence[Any]) -> MutableSequence[Any]: def scramble(seq: MutableSequence[Any]) -> MutableSequence[Any]: + """An alias for :meth:`shuffle`.""" return shuffle(seq) def binary_search(lst: Sequence[Any], target: Any) -> Tuple[bool, int]: """Performs a binary search on lst (which must already be sorted). - Returns a Tuple composed of a bool which indicates whether the - target was found and an int which indicates the index closest to - target whether it was found or not. + + Args: + lst: the (already sorted!) list in which to search + target: the item value to be found + + Returns: + A Tuple composed of a bool which indicates whether the + target was found and an int which indicates the index closest to + target whether it was found or not. >>> a = [1, 4, 5, 6, 7, 9, 10, 11] >>> binary_search(a, 4) @@ -297,6 +414,7 @@ def binary_search(lst: Sequence[Any], target: Any) -> Tuple[bool, int]: def _binary_search( lst: Sequence[Any], target: Any, low: int, high: int ) -> Tuple[bool, int]: + """Internal helper to perform a binary search recursively.""" if high >= low: mid = (high + low) // 2 if lst[mid] == target: @@ -309,8 +427,17 @@ def _binary_search( return (False, low) -def powerset(lst: Sequence[Any]) -> Iterator[Sequence[Any]]: - """Returns the powerset of the items in the input sequence. +def powerset(seq: Sequence[Any]) -> Iterator[Sequence[Any]]: + """Returns the powerset of the items in the input sequence. That is, + return the set containing every set constructable using items from + seq (including the empty set and the "full" set: `seq` itself). + + Args: + seq: the sequence whose items will be used to construct the powerset. + + Returns: + The powerset composed of all sets possible to create with items from `seq`. + See: https://en.wikipedia.org/wiki/Power_set. >>> for x in powerset([1, 2, 3]): ... print(x) @@ -323,10 +450,10 @@ def powerset(lst: Sequence[Any]) -> Iterator[Sequence[Any]]: (2, 3) (1, 2, 3) """ - return chain.from_iterable(combinations(lst, r) for r in range(len(lst) + 1)) + return chain.from_iterable(combinations(seq, r) for r in range(len(seq) + 1)) -if __name__ == '__main__': +if __name__ == "__main__": import doctest doctest.testmod()