+
+
+def strip_ansi_sequences(in_str: str) -> str:
+ """
+ Args:
+ in_str: the string to strip
+
+ Returns:
+ in_str with recognized ANSI escape sequences removed.
+
+ .. warning::
+ This method works by using a regular expression.
+ It works for all ANSI escape sequences I've tested with but
+ may miss some; caveat emptor.
+
+ >>> import ansi as a
+ >>> s = a.fg('blue') + 'blue!' + a.reset()
+ >>> len(s) # '\x1b[38;5;21mblue!\x1b[m'
+ 18
+ >>> len(strip_ansi_sequences(s))
+ 5
+ >>> strip_ansi_sequences(s)
+ 'blue!'
+
+ """
+ return re.sub(r'\x1b\[[\d+;]*[a-z]', '', in_str)
+
+
+class SprintfStdout(contextlib.AbstractContextManager):
+ """
+ A context manager that captures outputs to stdout to a buffer
+ without printing them.
+
+ >>> with SprintfStdout() as buf:
+ ... print("test")
+ ... print("1, 2, 3")
+ ...
+ >>> print(buf(), end='')
+ test
+ 1, 2, 3
+
+ """
+
+ def __init__(self) -> None:
+ self.destination = io.StringIO()
+ self.recorder: contextlib.redirect_stdout
+
+ def __enter__(self) -> Callable[[], str]:
+ self.recorder = contextlib.redirect_stdout(self.destination)
+ self.recorder.__enter__()
+ return lambda: self.destination.getvalue()
+
+ def __exit__(self, *args) -> Literal[False]:
+ self.recorder.__exit__(*args)
+ self.destination.seek(0)
+ return False
+
+
+def capitalize_first_letter(in_str: str) -> str:
+ """
+ Args:
+ in_str: the string to capitalize
+
+ Returns:
+ in_str with the first character capitalized.
+
+ >>> capitalize_first_letter('test')
+ 'Test'
+ >>> capitalize_first_letter("ALREADY!")
+ 'ALREADY!'
+
+ """
+ return in_str[0].upper() + in_str[1:]
+
+
+def it_they(n: int) -> str:
+ """
+ Args:
+ n: how many of them are there?
+
+ Returns:
+ 'it' if n is one or 'they' otherwize.
+
+ Suggested usage::
+
+ n = num_files_saved_to_tmp()
+ print(f'Saved file{pluralize(n)} successfully.')
+ print(f'{it_they(n)} {is_are(n)} located in /tmp.')
+
+ >>> it_they(1)
+ 'it'
+ >>> it_they(100)
+ 'they'
+ """
+ if n == 1:
+ return "it"
+ return "they"
+
+
+def is_are(n: int) -> str:
+ """
+ Args:
+ n: how many of them are there?
+
+ Returns:
+ 'is' if n is one or 'are' otherwize.
+
+ Suggested usage::
+
+ n = num_files_saved_to_tmp()
+ print(f'Saved file{pluralize(n)} successfully.')
+ print(f'{it_they(n)} {is_are(n)} located in /tmp.')
+
+ >>> is_are(1)
+ 'is'
+ >>> is_are(2)
+ 'are'
+
+ """
+ if n == 1:
+ return "is"
+ return "are"
+
+
+def pluralize(n: int) -> str:
+ """
+ Args:
+ n: how many of them are there?
+
+ Returns:
+ 's' if n is greater than one otherwize ''.
+
+ Suggested usage::
+
+ n = num_files_saved_to_tmp()
+ print(f'Saved file{pluralize(n)} successfully.')
+ print(f'{it_they(n)} {is_are(n)} located in /tmp.')
+
+ >>> pluralize(15)
+ 's'
+ >>> count = 1
+ >>> print(f'There {is_are(count)} {count} file{pluralize(count)}.')
+ There is 1 file.
+ >>> count = 4
+ >>> print(f'There {is_are(count)} {count} file{pluralize(count)}.')
+ There are 4 files.
+ """
+ if n == 1:
+ return ""
+ return "s"
+
+
+def make_contractions(txt: str) -> str:
+ """This code glues words in txt together to form (English)
+ contractions.
+
+ Args:
+ txt: the input text to be contractionized.
+
+ Returns:
+ Output text identical to original input except for any
+ recognized contractions are formed.
+
+ .. note::
+ The order in which we create contractions is defined by the
+ implementation and what I thought made more sense when writing
+ this code.
+
+ >>> make_contractions('It is nice today.')
+ "It's nice today."
+
+ >>> make_contractions('I can not even...')
+ "I can't even..."
+
+ >>> make_contractions('She could not see!')
+ "She couldn't see!"
+
+ >>> make_contractions('But she will not go.')
+ "But she won't go."
+
+ >>> make_contractions('Verily, I shall not.')
+ "Verily, I shan't."
+
+ >>> make_contractions('No you cannot.')
+ "No you can't."
+
+ >>> make_contractions('I said you can not go.')
+ "I said you can't go."
+ """
+
+ first_second = [
+ (
+ [
+ 'are',
+ 'could',
+ 'did',
+ 'has',
+ 'have',
+ 'is',
+ 'must',
+ 'should',
+ 'was',
+ 'were',
+ 'would',
+ ],
+ ['(n)o(t)'],
+ ),
+ (
+ [
+ "I",
+ "you",
+ "he",
+ "she",
+ "it",
+ "we",
+ "they",
+ "how",
+ "why",
+ "when",
+ "where",
+ "who",
+ "there",
+ ],
+ ['woul(d)', 'i(s)', 'a(re)', 'ha(s)', 'ha(ve)', 'ha(d)', 'wi(ll)'],
+ ),
+ ]
+
+ # Special cases: can't, shan't and won't.
+ txt = re.sub(r'\b(can)\s*no(t)\b', r"\1'\2", txt, count=0, flags=re.IGNORECASE)
+ txt = re.sub(r'\b(sha)ll\s*(n)o(t)\b', r"\1\2'\3", txt, count=0, flags=re.IGNORECASE)
+ txt = re.sub(
+ r'\b(w)ill\s*(n)(o)(t)\b',
+ r"\1\3\2'\4",
+ txt,
+ count=0,
+ flags=re.IGNORECASE,
+ )
+
+ for first_list, second_list in first_second:
+ for first in first_list:
+ for second in second_list:
+ # Disallow there're/where're. They're valid English
+ # but sound weird.
+ if (first in ('there', 'where')) and second == 'a(re)':
+ continue
+
+ pattern = fr'\b({first})\s+{second}\b'
+ if second == '(n)o(t)':
+ replacement = r"\1\2'\3"
+ else:
+ replacement = r"\1'\2"
+ txt = re.sub(pattern, replacement, txt, count=0, flags=re.IGNORECASE)
+
+ return txt
+
+
+def thify(n: int) -> str:
+ """
+ Args:
+ n: how many of them are there?
+
+ Returns:
+ The proper cardinal suffix for a number.
+
+ Suggested usage::
+
+ attempt_count = 0
+ while True:
+ attempt_count += 1
+ if try_the_thing():
+ break
+ print(f'The {attempt_count}{thify(attempt_count)} failed, trying again.')
+
+ >>> thify(1)
+ 'st'
+ >>> thify(33)
+ 'rd'
+ >>> thify(16)
+ 'th'
+ """
+ digit = str(n)
+ assert is_integer_number(digit)
+ digit = digit[-1:]
+ if digit == "1":
+ return "st"
+ elif digit == "2":
+ return "nd"
+ elif digit == "3":
+ return "rd"
+ else:
+ return "th"
+
+
+def ngrams(txt: str, n: int):
+ """
+ Args:
+ txt: the string to create ngrams using
+ n: how many words per ngram created?
+
+ Returns:
+ Generates the ngrams from the input string.
+
+ >>> [x for x in ngrams('This is a test', 2)]
+ ['This is', 'is a', 'a test']
+ """
+ words = txt.split()
+ for ngram in ngrams_presplit(words, n):
+ ret = ''
+ for word in ngram:
+ ret += f'{word} '
+ yield ret.strip()
+
+
+def ngrams_presplit(words: Sequence[str], n: int):
+ """
+ Same as :meth:`ngrams` but with the string pre-split.
+ """
+ return list_utils.ngrams(words, n)
+
+
+def bigrams(txt: str):
+ """Generates the bigrams (n=2) of the given string."""
+ return ngrams(txt, 2)
+
+
+def trigrams(txt: str):
+ """Generates the trigrams (n=3) of the given string."""
+ return ngrams(txt, 3)
+
+
+def shuffle_columns_into_list(
+ input_lines: Sequence[str], column_specs: Iterable[Iterable[int]], delim=''
+) -> Iterable[str]:
+ """Helper to shuffle / parse columnar data and return the results as a
+ list.
+
+ Args:
+ input_lines: A sequence of strings that represents text that
+ has been broken into columns by the caller
+ column_specs: an iterable collection of numeric sequences that
+ indicate one or more column numbers to copy to form the Nth
+ position in the output list. See example below.
+ delim: for column_specs that indicate we should copy more than
+ one column from the input into this position, use delim to
+ separate source data. Defaults to ''.
+
+ Returns:
+ A list of string created by following the instructions set forth
+ in column_specs.
+
+ >>> cols = '-rwxr-xr-x 1 scott wheel 3.1K Jul 9 11:34 acl_test.py'.split()
+ >>> shuffle_columns_into_list(
+ ... cols,
+ ... [ [8], [2, 3], [5, 6, 7] ],
+ ... delim='!',
+ ... )
+ ['acl_test.py', 'scott!wheel', 'Jul!9!11:34']
+ """
+ out = []
+
+ # Column specs map input lines' columns into outputs.
+ # [col1, col2...]
+ for spec in column_specs:
+ hunk = ''
+ for n in spec:
+ hunk = hunk + delim + input_lines[n]
+ hunk = hunk.strip(delim)
+ out.append(hunk)
+ return out
+
+
+def shuffle_columns_into_dict(
+ input_lines: Sequence[str],
+ column_specs: Iterable[Tuple[str, Iterable[int]]],
+ delim='',
+) -> Dict[str, str]:
+ """Helper to shuffle / parse columnar data and return the results
+ as a dict.
+
+ Args:
+ input_lines: a sequence of strings that represents text that
+ has been broken into columns by the caller
+ column_specs: instructions for what dictionary keys to apply
+ to individual or compound input column data. See example
+ below.
+ delim: when forming compound output data by gluing more than
+ one input column together, use this character to separate
+ the source data. Defaults to ''.
+
+ Returns:
+ A dict formed by applying the column_specs instructions.
+
+ >>> cols = '-rwxr-xr-x 1 scott wheel 3.1K Jul 9 11:34 acl_test.py'.split()
+ >>> shuffle_columns_into_dict(
+ ... cols,
+ ... [ ('filename', [8]), ('owner', [2, 3]), ('mtime', [5, 6, 7]) ],
+ ... delim='!',
+ ... )
+ {'filename': 'acl_test.py', 'owner': 'scott!wheel', 'mtime': 'Jul!9!11:34'}
+ """
+ out = {}
+
+ # Column specs map input lines' columns into outputs.
+ # "key", [col1, col2...]
+ for spec in column_specs:
+ hunk = ''
+ for n in spec[1]:
+ hunk = hunk + delim + input_lines[n]
+ hunk = hunk.strip(delim)
+ out[spec[0]] = hunk
+ return out
+
+
+def interpolate_using_dict(txt: str, values: Dict[str, str]) -> str:
+ """
+ Interpolate a string with data from a dict.
+
+ Args:
+ txt: the mad libs template
+ values: what you and your kids chose for each category.
+
+ >>> interpolate_using_dict('This is a {adjective} {noun}.',
+ ... {'adjective': 'good', 'noun': 'example'})
+ 'This is a good example.'
+ """
+ return sprintf(txt.format(**values), end='')
+
+
+def to_ascii(txt: str):
+ """
+ Args:
+ txt: the input data to encode
+
+ Returns:
+ txt encoded as an ASCII byte string.
+
+ >>> to_ascii('test')
+ b'test'
+
+ >>> to_ascii(b'1, 2, 3')
+ b'1, 2, 3'
+ """
+ if isinstance(txt, str):
+ return txt.encode('ascii')
+ if isinstance(txt, bytes):
+ return txt
+ raise Exception('to_ascii works with strings and bytes')
+
+
+def to_base64(txt: str, *, encoding='utf-8', errors='surrogatepass') -> bytes:
+ """
+ Args:
+ txt: the input data to encode
+
+ Returns:
+ txt encoded with a 64-chracter alphabet. Similar to and compatible
+ with uuencode/uudecode.
+
+ >>> to_base64('hello?')
+ b'aGVsbG8/\\n'
+ """
+ return base64.encodebytes(txt.encode(encoding, errors))
+
+
+def is_base64(txt: str) -> bool:
+ """
+ Args:
+ txt: the string to check
+
+ Returns:
+ True if txt is a valid base64 encoded string. This assumes
+ txt was encoded with Python's standard base64 alphabet which
+ is the same as what uuencode/uudecode uses).
+
+ >>> is_base64('test') # all letters in the b64 alphabet
+ True
+
+ >>> is_base64('another test, how do you like this one?')
+ False
+
+ >>> is_base64(b'aGVsbG8/\\n') # Ending newline is ok.
+ True
+
+ """
+ a = string.ascii_uppercase + string.ascii_lowercase + string.digits + '+/'
+ alphabet = set(a.encode('ascii'))
+ for char in to_ascii(txt.strip()):
+ if char not in alphabet:
+ return False
+ return True
+
+
+def from_base64(b64: bytes, encoding='utf-8', errors='surrogatepass') -> str:
+ """
+ Args:
+ b64: bytestring of 64-bit encoded data to decode / convert.
+
+ Returns:
+ The decoded form of b64 as a normal python string. Similar to
+ and compatible with uuencode / uudecode.
+
+ >>> from_base64(b'aGVsbG8/\\n')
+ 'hello?'
+ """
+ return base64.decodebytes(b64).decode(encoding, errors)
+
+
+def chunk(txt: str, chunk_size: int):
+ """
+ Args:
+ txt: a string to be chunked into evenly spaced pieces.
+ chunk_size: the size of each chunk to make
+
+ Returns:
+ The original string chunked into evenly spaced pieces.
+
+ >>> ' '.join(chunk('010011011100010110101010101010101001111110101000', 8))
+ '01001101 11000101 10101010 10101010 10011111 10101000'
+ """
+ if len(txt) % chunk_size != 0:
+ msg = f'String to chunk\'s length ({len(txt)} is not an even multiple of chunk_size ({chunk_size})'
+ logger.warning(msg)
+ warnings.warn(msg, stacklevel=2)
+ for x in range(0, len(txt), chunk_size):
+ yield txt[x : x + chunk_size]
+
+
+def to_bitstring(txt: str, *, delimiter='') -> str:
+ """
+ Args:
+ txt: the string to convert into a bitstring
+ delimiter: character to insert between adjacent bytes. Note that
+ only bitstrings with delimiter='' are interpretable by
+ :meth:`from_bitstring`.
+
+ Returns:
+ txt converted to ascii/binary and then chopped into bytes.
+
+ >>> to_bitstring('hello?')
+ '011010000110010101101100011011000110111100111111'
+
+ >>> to_bitstring('test', delimiter=' ')
+ '01110100 01100101 01110011 01110100'
+
+ >>> to_bitstring(b'test')
+ '01110100011001010111001101110100'
+ """
+ etxt = to_ascii(txt)
+ bits = bin(int.from_bytes(etxt, 'big'))
+ bits = bits[2:]
+ return delimiter.join(chunk(bits.zfill(8 * ((len(bits) + 7) // 8)), 8))
+
+
+def is_bitstring(txt: str) -> bool:
+ """
+ Args:
+ txt: the string to check
+
+ Returns:
+ True if txt is a recognized bitstring and False otherwise.
+ Note that if delimiter is non empty this code will not
+ recognize the bitstring.
+
+ >>> is_bitstring('011010000110010101101100011011000110111100111111')
+ True
+
+ >>> is_bitstring('1234')
+ False
+ """
+ return is_binary_integer_number(f'0b{txt}')
+
+
+def from_bitstring(bits: str, encoding='utf-8', errors='surrogatepass') -> str:
+ """
+ Args:
+ bits: the bitstring to convert back into a python string
+ encoding: the encoding to use
+
+ Returns:
+ The regular python string represented by bits. Note that this
+ code does not work with to_bitstring when delimiter is non-empty.
+
+ >>> from_bitstring('011010000110010101101100011011000110111100111111')
+ 'hello?'
+ """
+ n = int(bits, 2)
+ return n.to_bytes((n.bit_length() + 7) // 8, 'big').decode(encoding, errors) or '\0'
+
+
+def ip_v4_sort_key(txt: str) -> Optional[Tuple[int, ...]]:
+ """
+ Args:
+ txt: an IP address to chunk up for sorting purposes
+
+ Returns:
+ A tuple of IP components arranged such that the sorting of
+ IP addresses using a normal comparator will do something sane
+ and desireable.
+
+ >>> ip_v4_sort_key('10.0.0.18')
+ (10, 0, 0, 18)
+
+ >>> ips = ['10.0.0.10', '100.0.0.1', '1.2.3.4', '10.0.0.9']
+ >>> sorted(ips, key=lambda x: ip_v4_sort_key(x))
+ ['1.2.3.4', '10.0.0.9', '10.0.0.10', '100.0.0.1']
+ """
+ if not is_ip_v4(txt):
+ print(f"not IP: {txt}")
+ return None
+ return tuple(int(x) for x in txt.split('.'))
+
+
+def path_ancestors_before_descendants_sort_key(volume: str) -> Tuple[str, ...]:
+ """
+ Args:
+ volume: the string to chunk up for sorting purposes
+
+ Returns:
+ A tuple of volume's components such that the sorting of
+ volumes using a normal comparator will do something sane
+ and desireable.
+
+ >>> path_ancestors_before_descendants_sort_key('/usr/local/bin')
+ ('usr', 'local', 'bin')
+
+ >>> paths = ['/usr/local', '/usr/local/bin', '/usr']
+ >>> sorted(paths, key=lambda x: path_ancestors_before_descendants_sort_key(x))
+ ['/usr', '/usr/local', '/usr/local/bin']
+ """
+ return tuple(x for x in volume.split('/') if len(x) > 0)
+
+
+def replace_all(in_str: str, replace_set: str, replacement: str) -> str:
+ """
+ Execute several replace operations in a row.
+
+ Args:
+ in_str: the string in which to replace characters
+ replace_set: the set of target characters to replace
+ replacement: the character to replace any member of replace_set
+ with
+
+ Returns:
+ The string with replacements executed.
+
+ >>> s = 'this_is a-test!'
+ >>> replace_all(s, ' _-!', '')
+ 'thisisatest'
+ """
+ for char in replace_set:
+ in_str = in_str.replace(char, replacement)
+ return in_str
+
+
+def replace_nth(in_str: str, source: str, target: str, nth: int):
+ """
+ Replaces the nth occurrance of a substring within a string.
+
+ Args:
+ in_str: the string in which to run the replacement
+ source: the substring to replace
+ target: the replacement text
+ nth: which occurrance of source to replace?
+
+ >>> replace_nth('this is a test', ' ', '-', 3)
+ 'this is a-test'
+ """
+ where = [m.start() for m in re.finditer(source, in_str)][nth - 1]
+ before = in_str[:where]
+ after = in_str[where:]
+ after = after.replace(source, target, 1)
+ return before + after
+
+
+if __name__ == '__main__':
+ import doctest
+
+ doctest.testmod()