+
+
+class SprintfStdout(contextlib.AbstractContextManager):
+ """
+ A context manager that captures outputs to stdout.
+
+ with SprintfStdout() as buf:
+ print("test")
+ print(buf())
+
+ 'test\n'
+ """
+
+ def __init__(self) -> None:
+ self.destination = io.StringIO()
+ self.recorder: contextlib.redirect_stdout
+
+ def __enter__(self) -> Callable[[], str]:
+ self.recorder = contextlib.redirect_stdout(self.destination)
+ self.recorder.__enter__()
+ return lambda: self.destination.getvalue()
+
+ def __exit__(self, *args) -> Literal[False]:
+ self.recorder.__exit__(*args)
+ self.destination.seek(0)
+ return False
+
+
+def capitalize_first_letter(txt: str) -> str:
+ """Capitalize the first letter of a string.
+
+ >>> capitalize_first_letter('test')
+ 'Test'
+ >>> capitalize_first_letter("ALREADY!")
+ 'ALREADY!'
+
+ """
+ return txt[0].upper() + txt[1:]
+
+
+def it_they(n: int) -> str:
+ """It or they?
+
+ >>> it_they(1)
+ 'it'
+ >>> it_they(100)
+ 'they'
+
+ """
+ if n == 1:
+ return "it"
+ return "they"
+
+
+def is_are(n: int) -> str:
+ """Is or are?
+
+ >>> is_are(1)
+ 'is'
+ >>> is_are(2)
+ 'are'
+
+ """
+ if n == 1:
+ return "is"
+ return "are"
+
+
+def pluralize(n: int) -> str:
+ """Add an s?
+
+ >>> pluralize(15)
+ 's'
+ >>> count = 1
+ >>> print(f'There {is_are(count)} {count} file{pluralize(count)}.')
+ There is 1 file.
+ >>> count = 4
+ >>> print(f'There {is_are(count)} {count} file{pluralize(count)}.')
+ There are 4 files.
+
+ """
+ if n == 1:
+ return ""
+ return "s"
+
+
+def make_contractions(txt: str) -> str:
+ """Glue words together to form contractions.
+
+ >>> make_contractions('It is nice today.')
+ "It's nice today."
+
+ >>> make_contractions('I can not even...')
+ "I can't even..."
+
+ >>> make_contractions('She could not see!')
+ "She couldn't see!"
+
+ >>> make_contractions('But she will not go.')
+ "But she won't go."
+
+ >>> make_contractions('Verily, I shall not.')
+ "Verily, I shan't."
+
+ >>> make_contractions('No you cannot.')
+ "No you can't."
+
+ >>> make_contractions('I said you can not go.')
+ "I said you can't go."
+
+ """
+
+ first_second = [
+ (
+ [
+ 'are',
+ 'could',
+ 'did',
+ 'has',
+ 'have',
+ 'is',
+ 'must',
+ 'should',
+ 'was',
+ 'were',
+ 'would',
+ ],
+ ['(n)o(t)'],
+ ),
+ (
+ [
+ "I",
+ "you",
+ "he",
+ "she",
+ "it",
+ "we",
+ "they",
+ "how",
+ "why",
+ "when",
+ "where",
+ "who",
+ "there",
+ ],
+ ['woul(d)', 'i(s)', 'a(re)', 'ha(s)', 'ha(ve)', 'ha(d)', 'wi(ll)'],
+ ),
+ ]
+
+ # Special cases: can't, shan't and won't.
+ txt = re.sub(r'\b(can)\s*no(t)\b', r"\1'\2", txt, count=0, flags=re.IGNORECASE)
+ txt = re.sub(r'\b(sha)ll\s*(n)o(t)\b', r"\1\2'\3", txt, count=0, flags=re.IGNORECASE)
+ txt = re.sub(
+ r'\b(w)ill\s*(n)(o)(t)\b',
+ r"\1\3\2'\4",
+ txt,
+ count=0,
+ flags=re.IGNORECASE,
+ )
+
+ for first_list, second_list in first_second:
+ for first in first_list:
+ for second in second_list:
+ # Disallow there're/where're. They're valid English
+ # but sound weird.
+ if (first in ('there', 'where')) and second == 'a(re)':
+ continue
+
+ pattern = fr'\b({first})\s+{second}\b'
+ if second == '(n)o(t)':
+ replacement = r"\1\2'\3"
+ else:
+ replacement = r"\1'\2"
+ txt = re.sub(pattern, replacement, txt, count=0, flags=re.IGNORECASE)
+
+ return txt
+
+
+def thify(n: int) -> str:
+ """Return the proper cardinal suffix for a number.
+
+ >>> thify(1)
+ 'st'
+ >>> thify(33)
+ 'rd'
+ >>> thify(16)
+ 'th'
+
+ """
+ digit = str(n)
+ assert is_integer_number(digit)
+ digit = digit[-1:]
+ if digit == "1":
+ return "st"
+ elif digit == "2":
+ return "nd"
+ elif digit == "3":
+ return "rd"
+ else:
+ return "th"
+
+
+def ngrams(txt: str, n: int):
+ """Return the ngrams from a string.
+
+ >>> [x for x in ngrams('This is a test', 2)]
+ ['This is', 'is a', 'a test']
+
+ """
+ words = txt.split()
+ for ngram in ngrams_presplit(words, n):
+ ret = ''
+ for word in ngram:
+ ret += f'{word} '
+ yield ret.strip()
+
+
+def ngrams_presplit(words: Sequence[str], n: int):
+ return list_utils.ngrams(words, n)
+
+
+def bigrams(txt: str):
+ return ngrams(txt, 2)
+
+
+def trigrams(txt: str):
+ return ngrams(txt, 3)
+
+
+def shuffle_columns_into_list(
+ input_lines: Sequence[str], column_specs: Iterable[Iterable[int]], delim=''
+) -> Iterable[str]:
+ """Helper to shuffle / parse columnar data and return the results as a
+ list. The column_specs argument is an iterable collection of
+ numeric sequences that indicate one or more column numbers to
+ copy.
+
+ >>> cols = '-rwxr-xr-x 1 scott wheel 3.1K Jul 9 11:34 acl_test.py'.split()
+ >>> shuffle_columns_into_list(
+ ... cols,
+ ... [ [8], [2, 3], [5, 6, 7] ],
+ ... delim=' ',
+ ... )
+ ['acl_test.py', 'scott wheel', 'Jul 9 11:34']
+
+ """
+ out = []
+
+ # Column specs map input lines' columns into outputs.
+ # [col1, col2...]
+ for spec in column_specs:
+ hunk = ''
+ for n in spec:
+ hunk = hunk + delim + input_lines[n]
+ hunk = hunk.strip(delim)
+ out.append(hunk)
+ return out
+
+
+def shuffle_columns_into_dict(
+ input_lines: Sequence[str],
+ column_specs: Iterable[Tuple[str, Iterable[int]]],
+ delim='',
+) -> Dict[str, str]:
+ """Helper to shuffle / parse columnar data and return the results
+ as a dict.
+
+ >>> cols = '-rwxr-xr-x 1 scott wheel 3.1K Jul 9 11:34 acl_test.py'.split()
+ >>> shuffle_columns_into_dict(
+ ... cols,
+ ... [ ('filename', [8]), ('owner', [2, 3]), ('mtime', [5, 6, 7]) ],
+ ... delim=' ',
+ ... )
+ {'filename': 'acl_test.py', 'owner': 'scott wheel', 'mtime': 'Jul 9 11:34'}
+
+ """
+ out = {}
+
+ # Column specs map input lines' columns into outputs.
+ # "key", [col1, col2...]
+ for spec in column_specs:
+ hunk = ''
+ for n in spec[1]:
+ hunk = hunk + delim + input_lines[n]
+ hunk = hunk.strip(delim)
+ out[spec[0]] = hunk
+ return out
+
+
+def interpolate_using_dict(txt: str, values: Dict[str, str]) -> str:
+ """Interpolate a string with data from a dict.
+
+ >>> interpolate_using_dict('This is a {adjective} {noun}.',
+ ... {'adjective': 'good', 'noun': 'example'})
+ 'This is a good example.'
+
+ """
+ return sprintf(txt.format(**values), end='')
+
+
+def to_ascii(x: str):
+ """Encode as ascii bytes string.
+
+ >>> to_ascii('test')
+ b'test'
+
+ >>> to_ascii(b'1, 2, 3')
+ b'1, 2, 3'
+
+ """
+ if isinstance(x, str):
+ return x.encode('ascii')
+ if isinstance(x, bytes):
+ return x
+ raise Exception('to_ascii works with strings and bytes')
+
+
+def to_base64(txt: str, *, encoding='utf-8', errors='surrogatepass') -> bytes:
+ """Encode txt and then encode the bytes with a 64-character
+ alphabet. This is compatible with uudecode.
+
+ >>> to_base64('hello?')
+ b'aGVsbG8/\\n'
+
+ """
+ return base64.encodebytes(txt.encode(encoding, errors))
+
+
+def is_base64(txt: str) -> bool:
+ """Determine whether a string is base64 encoded (with Python's standard
+ base64 alphabet which is the same as what uuencode uses).
+
+ >>> is_base64('test') # all letters in the b64 alphabet
+ True
+
+ >>> is_base64('another test, how do you like this one?')
+ False
+
+ >>> is_base64(b'aGVsbG8/\\n') # Ending newline is ok.
+ True
+
+ """
+ a = string.ascii_uppercase + string.ascii_lowercase + string.digits + '+/'
+ alphabet = set(a.encode('ascii'))
+ for char in to_ascii(txt.strip()):
+ if char not in alphabet:
+ return False
+ return True
+
+
+def from_base64(b64: bytes, encoding='utf-8', errors='surrogatepass') -> str:
+ """Convert base64 encoded string back to normal strings.
+
+ >>> from_base64(b'aGVsbG8/\\n')
+ 'hello?'
+
+ """
+ return base64.decodebytes(b64).decode(encoding, errors)
+
+
+def chunk(txt: str, chunk_size):
+ """Chunk up a string.
+
+ >>> ' '.join(chunk('010011011100010110101010101010101001111110101000', 8))
+ '01001101 11000101 10101010 10101010 10011111 10101000'
+
+ """
+ if len(txt) % chunk_size != 0:
+ msg = f'String to chunk\'s length ({len(txt)} is not an even multiple of chunk_size ({chunk_size})'
+ logger.warning(msg)
+ warnings.warn(msg, stacklevel=2)
+ for x in range(0, len(txt), chunk_size):
+ yield txt[x : x + chunk_size]
+
+
+def to_bitstring(txt: str, *, delimiter='', encoding='utf-8', errors='surrogatepass') -> str:
+ """Encode txt and then chop it into bytes. Note: only bitstrings
+ with delimiter='' are interpretable by from_bitstring.
+
+ >>> to_bitstring('hello?')
+ '011010000110010101101100011011000110111100111111'
+
+ >>> to_bitstring('test', delimiter=' ')
+ '01110100 01100101 01110011 01110100'
+
+ >>> to_bitstring(b'test')
+ '01110100011001010111001101110100'
+
+ """
+ etxt = to_ascii(txt)
+ bits = bin(int.from_bytes(etxt, 'big'))
+ bits = bits[2:]
+ return delimiter.join(chunk(bits.zfill(8 * ((len(bits) + 7) // 8)), 8))
+
+
+def is_bitstring(txt: str) -> bool:
+ """Is this a bitstring?
+
+ >>> is_bitstring('011010000110010101101100011011000110111100111111')
+ True
+
+ >>> is_bitstring('1234')
+ False
+
+ """
+ return is_binary_integer_number(f'0b{txt}')
+
+
+def from_bitstring(bits: str, encoding='utf-8', errors='surrogatepass') -> str:
+ """Convert from bitstring back to bytes then decode into a str.
+
+ >>> from_bitstring('011010000110010101101100011011000110111100111111')
+ 'hello?'
+
+ """
+ n = int(bits, 2)
+ return n.to_bytes((n.bit_length() + 7) // 8, 'big').decode(encoding, errors) or '\0'
+
+
+def ip_v4_sort_key(txt: str) -> Optional[Tuple[int, ...]]:
+ """Turn an IPv4 address into a tuple for sorting purposes.
+
+ >>> ip_v4_sort_key('10.0.0.18')
+ (10, 0, 0, 18)
+
+ >>> ips = ['10.0.0.10', '100.0.0.1', '1.2.3.4', '10.0.0.9']
+ >>> sorted(ips, key=lambda x: ip_v4_sort_key(x))
+ ['1.2.3.4', '10.0.0.9', '10.0.0.10', '100.0.0.1']
+
+ """
+ if not is_ip_v4(txt):
+ print(f"not IP: {txt}")
+ return None
+ return tuple([int(x) for x in txt.split('.')])
+
+
+def path_ancestors_before_descendants_sort_key(volume: str) -> Tuple[str, ...]:
+ """Chunk up a file path so that parent/ancestor paths sort before
+ children/descendant paths.
+
+ >>> path_ancestors_before_descendants_sort_key('/usr/local/bin')
+ ('usr', 'local', 'bin')
+
+ >>> paths = ['/usr/local', '/usr/local/bin', '/usr']
+ >>> sorted(paths, key=lambda x: path_ancestors_before_descendants_sort_key(x))
+ ['/usr', '/usr/local', '/usr/local/bin']
+
+ """
+ return tuple([x for x in volume.split('/') if len(x) > 0])
+
+
+def replace_all(in_str: str, replace_set: str, replacement: str) -> str:
+ """Execute several replace operations in a row.
+
+ >>> s = 'this_is a-test!'
+ >>> replace_all(s, ' _-!', '')
+ 'thisisatest'
+
+ """
+ for char in replace_set:
+ in_str = in_str.replace(char, replacement)
+ return in_str
+
+
+if __name__ == '__main__':
+ import doctest
+
+ doctest.testmod()