list_utils.py

   1 #!/usr/bin/env python3
   2
   3 from collections import Counter
   4 from itertools import islice
   5 from typing import Any, Iterator, List, Mapping, Sequence
   6
   7
   8 def shard(lst: List[Any], size: int) -> Iterator[Any]:
   9     """
  10     Yield successive size-sized shards from lst.
  11
  12     >>> for sublist in shard([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], 3):
  13     ...     [_ for _ in sublist]
  14     [1, 2, 3]
  15     [4, 5, 6]
  16     [7, 8, 9]
  17     [10, 11, 12]
  18
  19     """
  20     for x in range(0, len(lst), size):
  21         yield islice(lst, x, x + size)
  22
  23
  24 def flatten(lst: List[Any]) -> List[Any]:
  25     """
  26     Flatten out a list:
  27
  28     >>> flatten([ 1, [2, 3, 4, [5], 6], 7, [8, [9]]])
  29     [1, 2, 3, 4, 5, 6, 7, 8, 9]
  30
  31     """
  32     if len(lst) == 0:
  33         return lst
  34     if isinstance(lst[0], list):
  35         return flatten(lst[0]) + flatten(lst[1:])
  36     return lst[:1] + flatten(lst[1:])
  37
  38
  39 def prepend(item: Any, lst: List[Any]) -> List[Any]:
  40     """
  41     Prepend an item to a list.
  42
  43     >>> prepend('foo', ['bar', 'baz'])
  44     ['foo', 'bar', 'baz']
  45
  46     """
  47     lst.insert(0, item)
  48     return lst
  49
  50
  51 def population_counts(lst: List[Any]) -> Mapping[Any, int]:
  52     """
  53     Return a population count mapping for the list (i.e. the keys are
  54     list items and the values are the number of occurrances of that
  55     list item in the original list.
  56
  57     >>> population_counts([1, 1, 1, 2, 2, 3, 3, 3, 4])
  58     Counter({1: 3, 3: 3, 2: 2, 4: 1})
  59
  60     """
  61     return Counter(lst)
  62
  63
  64 def most_common_item(lst: List[Any]) -> Any:
  65
  66     """
  67     Return the most common item in the list.  In the case of ties,
  68     which most common item is returned will be random.
  69
  70     >>> most_common_item([1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4])
  71     3
  72
  73     """
  74     return population_counts(lst).most_common(1)[0][0]
  75
  76
  77 def least_common_item(lst: List[Any]) -> Any:
  78     """
  79     Return the least common item in the list.  In the case of
  80     ties, which least common item is returned will be random.
  81
  82     >>> least_common_item([1, 1, 1, 2, 2, 3, 3, 3, 4])
  83     4
  84
  85     """
  86     return population_counts(lst).most_common()[-1][0]
  87
  88
  89 def dedup_list(lst: List[Any]) -> List[Any]:
  90     """
  91     Remove duplicates from the list performantly.
  92
  93     >>> dedup_list([1, 2, 1, 3, 3, 4, 2, 3, 4, 5, 1])
  94     [1, 2, 3, 4, 5]
  95
  96     """
  97     return list(set(lst))
  98
  99
 100 def uniq(lst: List[Any]) -> List[Any]:
 101     """
 102     Alias for dedup_list.
 103     """
 104     return dedup_list(lst)
 105
 106
 107 def ngrams(lst: Sequence[Any], n):
 108     """
 109     Return the ngrams in the sequence.
 110
 111     >>> seq = 'encyclopedia'
 112     >>> for _ in ngrams(seq, 3):
 113     ...     _
 114     'enc'
 115     'ncy'
 116     'cyc'
 117     'ycl'
 118     'clo'
 119     'lop'
 120     'ope'
 121     'ped'
 122     'edi'
 123     'dia'
 124
 125     >>> seq = ['this', 'is', 'an', 'awesome', 'test']
 126     >>> for _ in ngrams(seq, 3):
 127     ...     _
 128     ['this', 'is', 'an']
 129     ['is', 'an', 'awesome']
 130     ['an', 'awesome', 'test']
 131     """
 132     for i in range(len(lst) - n + 1):
 133         yield lst[i:i + n]
 134
 135
 136 if __name__ == '__main__':
 137     import doctest
 138     doctest.testmod()