Adds IntervalTree.

[pyutils.git] / src / pyutils / string_utils.py
diff --git a/src/pyutils/string_utils.py b/src/pyutils/string_utils.py

index 575e64e7ff8fbd151a936201385c05fc5b61555a..a990275541601a3c25378237aac3a271447729ae 100644 (file)
--- a/src/pyutils/string_utils.py
+++ b/src/pyutils/string_utils.py
@@ -4,6 +4,7 @@
  """The MIT License (MIT)
  
  Copyright (c) 2016-2020 Davide Zanotti
+
  Modifications Copyright (c) 2021-2022 Scott Gasch
  
  Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -24,9 +25,12 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  SOFTWARE.
  
-This class is based on: https://github.com/daveoncode/python-string-utils.
-See NOTICE in the root of this module for a detailed enumeration of what
-work is Davide's and what work was added by Scott.
+This class is based on:
+https://github.com/daveoncode/python-string-utils.  See `NOTICE
+<https://wannabe.guru.org/gitweb/?p=pyutils.git;a=blob_plain;f=NOTICE;hb=HEAD>`__
+in the root of this module for a detailed enumeration of what work is
+Davide's and what work was added by Scott.
+
  """
  
  import base64
@@ -79,9 +83,9 @@ URLS_RAW_STRING = (
      r"(#\S*)?"  # hash
  )
  
-URL_RE = re.compile(r"^{}$".format(URLS_RAW_STRING), re.IGNORECASE)
+URL_RE = re.compile(rf"^{URLS_RAW_STRING}$", re.IGNORECASE)
  
-URLS_RE = re.compile(r"({})".format(URLS_RAW_STRING), re.IGNORECASE)
+URLS_RE = re.compile(rf"({URLS_RAW_STRING})", re.IGNORECASE)
  
  ESCAPED_AT_SIGN = re.compile(r'(?!"[^"]*)@+(?=[^"]*")|\\@')
  
@@ -89,9 +93,9 @@ EMAILS_RAW_STRING = (
      r"[a-zA-Z\d._\+\-'`!%#$&*/=\?\^\{\}\|~\\]+@[a-z\d-]+\.?[a-z\d-]+\.[a-z]{2,4}"
  )
  
-EMAIL_RE = re.compile(r"^{}$".format(EMAILS_RAW_STRING))
+EMAIL_RE = re.compile(rf"^{EMAILS_RAW_STRING}$")
  
-EMAILS_RE = re.compile(r"({})".format(EMAILS_RAW_STRING))
+EMAILS_RE = re.compile(rf"({EMAILS_RAW_STRING})")
  
  CAMEL_CASE_TEST_RE = re.compile(r"^[a-zA-Z]*([a-z]+[A-Z]+|[A-Z]+[a-z]+)[a-zA-Z\d]*$")
  
@@ -161,7 +165,7 @@ NO_LETTERS_OR_NUMBERS_RE = re.compile(r"[^\w\d]+|_+", re.IGNORECASE | re.UNICODE
  
  MARGIN_RE = re.compile(r"^[^\S\r\n]+")
  
-ESCAPE_SEQUENCE_RE = re.compile(r"\e\[[^A-Za-z]*[A-Za-z]")
+ESCAPE_SEQUENCE_RE = re.compile(r"\x1B\[[^A-Za-z]*[A-Za-z]")
  
  NUM_SUFFIXES = {
      "Pb": (1024**5),
@@ -176,7 +180,7 @@ NUM_SUFFIXES = {
      "K": (1024**1),
  }
  
-units = [
+UNIT_WORDS = [
      "zero",
      "one",
      "two",
@@ -199,7 +203,7 @@ units = [
      "nineteen",
  ]
  
-tens = [
+TENS_WORDS = [
      "",
      "",
      "twenty",
@@ -212,16 +216,26 @@ tens = [
      "ninety",
  ]
  
-scales = ["hundred", "thousand", "million", "billion", "trillion"]
+MAGNITUDE_SCALES = [
+    "hundred",
+    "thousand",
+    "million",
+    "billion",
+    "trillion",
+    "quadrillion",
+]
  
  NUM_WORDS = {}
  NUM_WORDS["and"] = (1, 0)
-for i, word in enumerate(units):
+for i, word in enumerate(UNIT_WORDS):
      NUM_WORDS[word] = (1, i)
-for i, word in enumerate(tens):
+for i, word in enumerate(TENS_WORDS):
      NUM_WORDS[word] = (1, i * 10)
-for i, word in enumerate(scales):
-    NUM_WORDS[word] = (10 ** (i * 3 or 2), 0)
+for i, word in enumerate(MAGNITUDE_SCALES):
+    if i == 0:
+        NUM_WORDS[word] = (100, 0)
+    else:
+        NUM_WORDS[word] = (10 ** (i * 3), 0)
  NUM_WORDS['score'] = (20, 0)
  
  
@@ -234,6 +248,8 @@ def is_none_or_empty(in_str: Optional[str]) -> bool:
          True if the input string is either None or an empty string,
          False otherwise.
  
+    See also :meth:`is_string` and :meth:`is_empty_string`.
+
      >>> is_none_or_empty("")
      True
      >>> is_none_or_empty(None)
@@ -246,7 +262,7 @@ def is_none_or_empty(in_str: Optional[str]) -> bool:
      return in_str is None or len(in_str.strip()) == 0
  
  
-def is_string(obj: Any) -> bool:
+def is_string(in_str: Any) -> bool:
      """
      Args:
          in_str: the object to test
@@ -254,6 +270,8 @@ def is_string(obj: Any) -> bool:
      Returns:
          True if the object is a string and False otherwise.
  
+    See also :meth:`is_empty_string`, :meth:`is_none_or_empty`.
+
      >>> is_string('test')
      True
      >>> is_string(123)
@@ -263,7 +281,7 @@ def is_string(obj: Any) -> bool:
      >>> is_string([1, 2, 3])
      False
      """
-    return isinstance(obj, str)
+    return isinstance(in_str, str)
  
  
  def is_empty_string(in_str: Any) -> bool:
@@ -273,6 +291,8 @@ def is_empty_string(in_str: Any) -> bool:
  
      Returns:
          True if the string is empty and False otherwise.
+
+    See also :meth:`is_none_or_empty`, :meth:`is_full_string`.
      """
      return is_empty(in_str)
  
@@ -285,6 +305,8 @@ def is_empty(in_str: Any) -> bool:
      Returns:
          True if the string is empty and false otherwise.
  
+    See also :meth:`is_none_or_empty`, :meth:`is_full_string`.
+
      >>> is_empty('')
      True
      >>> is_empty('    \t\t    ')
@@ -308,6 +330,8 @@ def is_full_string(in_str: Any) -> bool:
          True if the object is a string and is not empty ('') and
          is not only composed of whitespace.
  
+    See also :meth:`is_string`, :meth:`is_empty_string`, :meth:`is_none_or_empty`.
+
      >>> is_full_string('test!')
      True
      >>> is_full_string('')
@@ -331,6 +355,10 @@ def is_number(in_str: str) -> bool:
          True if the string contains a valid numberic value and
          False otherwise.
  
+    See also :meth:`is_integer_number`, :meth:`is_decimal_number`,
+    :meth:`is_hexidecimal_integer_number`, :meth:`is_octal_integer_number`,
+    etc...
+
      >>> is_number(100.5)
      Traceback (most recent call last):
      ...
@@ -361,6 +389,10 @@ def is_integer_number(in_str: str) -> bool:
          decimal, hex, or octal, regular or scientific) integral
          expression and False otherwise.
  
+    See also :meth:`is_number`, :meth:`is_decimal_number`,
+    :meth:`is_hexidecimal_integer_number`, :meth:`is_octal_integer_number`,
+    etc...
+
      >>> is_integer_number('42')
      True
      >>> is_integer_number('42.0')
@@ -382,6 +414,9 @@ def is_hexidecimal_integer_number(in_str: str) -> bool:
      Returns:
          True if the string is a hex integer number and False otherwise.
  
+    See also :meth:`is_integer_number`, :meth:`is_decimal_number`,
+    :meth:`is_octal_integer_number`, :meth:`is_binary_integer_number`, etc...
+
      >>> is_hexidecimal_integer_number('0x12345')
      True
      >>> is_hexidecimal_integer_number('0x1A3E')
@@ -418,6 +453,10 @@ def is_octal_integer_number(in_str: str) -> bool:
      Returns:
          True if the string is a valid octal integral number and False otherwise.
  
+    See also :meth:`is_integer_number`, :meth:`is_decimal_number`,
+    :meth:`is_hexidecimal_integer_number`, :meth:`is_binary_integer_number`,
+    etc...
+
      >>> is_octal_integer_number('0o777')
      True
      >>> is_octal_integer_number('-0O115')
@@ -442,6 +481,10 @@ def is_binary_integer_number(in_str: str) -> bool:
      Returns:
          True if the string contains a binary integral number and False otherwise.
  
+    See also :meth:`is_integer_number`, :meth:`is_decimal_number`,
+    :meth:`is_hexidecimal_integer_number`, :meth:`is_octal_integer_number`,
+    etc...
+
      >>> is_binary_integer_number('0b10111')
      True
      >>> is_binary_integer_number('-0b111')
@@ -468,8 +511,18 @@ def to_int(in_str: str) -> int:
      Returns:
          The integral value of the string or raises on error.
  
+    See also :meth:`is_integer_number`, :meth:`is_decimal_number`,
+    :meth:`is_hexidecimal_integer_number`, :meth:`is_octal_integer_number`,
+    :meth:`is_binary_integer_number`, etc...
+
      >>> to_int('1234')
      1234
+    >>> to_int('0x1234')
+    4660
+    >>> to_int('0b01101')
+    13
+    >>> to_int('0o777')
+    511
      >>> to_int('test')
      Traceback (most recent call last):
      ...
@@ -489,6 +542,18 @@ def to_int(in_str: str) -> int:
  def number_string_to_integer(in_str: str) -> int:
      """Convert a string containing a written-out number into an int.
  
+    Args:
+        in_str: the string containing the long-hand written out integer number
+            in English.  See examples below.
+
+    Returns:
+        The integer whose value was parsed from in_str.
+
+    See also :meth:`integer_to_number_string`.
+
+    .. warning::
+        This code only handles integers; it will not work with decimals / floats.
+
      >>> number_string_to_integer("one hundred fifty two")
      152
  
@@ -503,19 +568,19 @@ def number_string_to_integer(in_str: str) -> int:
      ...
      ValueError: Unknown word: xyzzy
      """
-    if type(in_str) == int:
-        return in_str
+    if isinstance(in_str, int):
+        return int(in_str)
  
      current = result = 0
      in_str = in_str.replace('-', ' ')
-    for word in in_str.split():
-        if word not in NUM_WORDS:
-            if is_integer_number(word):
-                current += int(word)
+    for w in in_str.split():
+        if w not in NUM_WORDS:
+            if is_integer_number(w):
+                current += int(w)
                  continue
              else:
-                raise ValueError("Unknown word: " + word)
-        scale, increment = NUM_WORDS[word]
+                raise ValueError("Unknown word: " + w)
+        scale, increment = NUM_WORDS[w]
          current = current * scale + increment
          if scale > 100:
              result += current
@@ -523,6 +588,62 @@ def number_string_to_integer(in_str: str) -> int:
      return result + current
  
  
+def integer_to_number_string(num: int) -> str:
+    """
+    Opposite of :meth:`number_string_to_integer`; converts a number to a written out
+    longhand format in English.
+
+    Args:
+        num: the integer number to convert
+
+    Returns:
+        The long-hand written out English form of the number.  See examples below.
+
+    See also :meth:`number_string_to_integer`.
+
+    .. warning::
+        This method does not handle decimals or floats, only ints.
+
+    >>> integer_to_number_string(9)
+    'nine'
+
+    >>> integer_to_number_string(42)
+    'forty two'
+
+    >>> integer_to_number_string(123219982)
+    'one hundred twenty three million two hundred nineteen thousand nine hundred eighty two'
+    """
+
+    if num < 20:
+        return UNIT_WORDS[num]
+    if num < 100:
+        ret = TENS_WORDS[num // 10]
+        leftover = num % 10
+        if leftover != 0:
+            ret += ' ' + UNIT_WORDS[leftover]
+        return ret
+
+    # If num > 100 go find the highest chunk and convert that, then recursively
+    # convert the rest.  NUM_WORDS contains items like 'thousand' -> (1000, 0).
+    # The second item in the tuple is an increment that can be ignored; the first
+    # is the numeric "scale" of the entry.  So find the greatest entry in NUM_WORDS
+    # still less than num.  For 123,456 it would be thousand.  Then pull out the
+    # 123, convert it, and append "thousand".  Then do the rest.
+    scales = {}
+    for name, val in NUM_WORDS.items():
+        if val[0] <= num:
+            scales[name] = val[0]
+    scale = max(scales.items(), key=lambda _: _[1])
+
+    # scale[1] = numeric magnitude (e.g. 1000)
+    # scale[0] = name (e.g. "thousand")
+    ret = integer_to_number_string(num // scale[1]) + ' ' + scale[0]
+    leftover = num % scale[1]
+    if leftover != 0:
+        ret += ' ' + integer_to_number_string(leftover)
+    return ret
+
+
  def is_decimal_number(in_str: str) -> bool:
      """
      Args:
@@ -533,6 +654,8 @@ def is_decimal_number(in_str: str) -> bool:
          otherwise.  A decimal may be signed or unsigned or use
          a "scientific notation".
  
+    See also :meth:`is_integer_number`.
+
      .. note::
          We do not consider integers without a decimal point
          to be decimals; they return False (see example).
@@ -553,19 +676,23 @@ def strip_escape_sequences(in_str: str) -> str:
      Returns:
          in_str with escape sequences removed.
  
+    See also: :mod:`pyutils.ansi`.
+
      .. note::
          What is considered to be an "escape sequence" is defined
          by a regular expression.  While this gets common ones,
          there may exist valid sequences that it doesn't match.
  
-    >>> strip_escape_sequences('\e[12;11;22mthis is a test!')
+    >>> strip_escape_sequences('\x1B[12;11;22mthis is a test!')
      'this is a test!'
      """
      in_str = ESCAPE_SEQUENCE_RE.sub("", in_str)
      return in_str
  
  
-def add_thousands_separator(in_str: str, *, separator_char=',', places=3) -> str:
+def add_thousands_separator(
+    in_str: str, *, separator_char: str = ',', places: int = 3
+) -> str:
      """
      Args:
          in_str: string or number to which to add thousands separator(s)
@@ -597,6 +724,7 @@ def add_thousands_separator(in_str: str, *, separator_char=',', places=3) -> str
  
  
  def _add_thousands_separator(in_str: str, *, separator_char=',', places=3) -> str:
+    """Internal helper"""
      decimal_part = ""
      if '.' in in_str:
          (in_str, decimal_part) = in_str.split('.')
@@ -693,6 +821,8 @@ def suffix_string_to_number(in_str: str) -> Optional[int]:
      Returns:
          An integer number of bytes or None to indicate an error.
  
+    See also :meth:`number_to_suffix_string`.
+
      >>> suffix_string_to_number('1Mb')
      1048576
      >>> suffix_string_to_number('13.1Gb')
@@ -734,6 +864,8 @@ def number_to_suffix_string(num: int) -> Optional[str]:
          A string with a suffix representing num bytes concisely or
          None to indicate an error.
  
+    See also: :meth:`suffix_string_to_number`.
+
      >>> number_to_suffix_string(14066017894)
      '13.1Gb'
      >>> number_to_suffix_string(1024 * 1024)
@@ -771,6 +903,13 @@ def is_credit_card(in_str: Any, card_type: str = None) -> bool:
  
      Returns:
          True if in_str is a valid credit card number.
+
+    .. warning::
+        This code is not verifying the authenticity of the credit card (i.e.
+        not checking whether it's a real card that can be charged); rather
+        it's only checking that the number follows the "rules" for numbering
+        established by credit card issuers.
+
      """
      if not is_full_string(in_str):
          return False
@@ -799,6 +938,8 @@ def is_camel_case(in_str: Any) -> bool:
          * it's composed only by letters ([a-zA-Z]) and optionally numbers ([0-9])
          * it contains both lowercase and uppercase letters
          * it does not start with a number
+
+    See also :meth:`is_snake_case`, :meth:`is_slug`, and :meth:`camel_case_to_snake_case`.
      """
      return is_full_string(in_str) and CAMEL_CASE_TEST_RE.match(in_str) is not None
  
@@ -807,6 +948,7 @@ def is_snake_case(in_str: Any, *, separator: str = "_") -> bool:
      """
      Args:
          in_str: the string to test
+        separator: the snake case separator character to use
  
      Returns: True if the string is snake case and False otherwise.  A
          string is considered snake case when:
@@ -815,6 +957,8 @@ def is_snake_case(in_str: Any, *, separator: str = "_") -> bool:
          * it contains at least one underscore (or provided separator)
          * it does not start with a number
  
+    See also :meth:`is_camel_case`, :meth:`is_slug`, and :meth:`snake_case_to_camel_case`.
+
      >>> is_snake_case('this_is_a_test')
      True
      >>> is_snake_case('___This_Is_A_Test_1_2_3___')
@@ -862,10 +1006,13 @@ def is_uuid(in_str: Any, allow_hex: bool = False) -> bool:
      """
      Args:
          in_str: the string to test
+        allow_hex: should we allow hexidecimal digits in valid uuids?
  
      Returns:
          True if the in_str contains a valid UUID and False otherwise.
  
+    See also :meth:`generate_uuid`.
+
      >>> is_uuid('6f8aa2f9-686c-4ac3-8766-5712354a04cf')
      True
      >>> is_uuid('6f8aa2f9686c4ac387665712354a04cf')
@@ -888,6 +1035,9 @@ def is_ip_v4(in_str: Any) -> bool:
      Returns:
          True if in_str contains a valid IPv4 address and False otherwise.
  
+    See also :meth:`extract_ip_v4`, :meth:`is_ip_v6`, :meth:`extract_ip_v6`,
+    and :meth:`is_ip`.
+
      >>> is_ip_v4('255.200.100.75')
      True
      >>> is_ip_v4('nope')
@@ -914,6 +1064,9 @@ def extract_ip_v4(in_str: Any) -> Optional[str]:
          The first extracted IPv4 address from in_str or None if
          none were found or an error occurred.
  
+    See also :meth:`is_ip_v4`, :meth:`is_ip_v6`, :meth:`extract_ip_v6`,
+    and :meth:`is_ip`.
+
      >>> extract_ip_v4('   The secret IP address: 127.0.0.1 (use it wisely)   ')
      '127.0.0.1'
      >>> extract_ip_v4('Your mom dresses you funny.')
@@ -934,6 +1087,9 @@ def is_ip_v6(in_str: Any) -> bool:
      Returns:
          True if in_str contains a valid IPv6 address and False otherwise.
  
+    See also :meth:`is_ip_v4`, :meth:`extract_ip_v4`, :meth:`extract_ip_v6`,
+    and :meth:`is_ip`.
+
      >>> is_ip_v6('2001:db8:85a3:0000:0000:8a2e:370:7334')
      True
      >>> is_ip_v6('2001:db8:85a3:0000:0000:8a2e:370:?')    # invalid "?"
@@ -951,6 +1107,9 @@ def extract_ip_v6(in_str: Any) -> Optional[str]:
          The first IPv6 address found in in_str or None if no address
          was found or an error occurred.
  
+    See also :meth:`is_ip_v4`, :meth:`is_ip_v6`, :meth:`extract_ip_v4`,
+    and :meth:`is_ip`.
+
      >>> extract_ip_v6('IP: 2001:db8:85a3:0000:0000:8a2e:370:7334')
      '2001:db8:85a3:0000:0000:8a2e:370:7334'
      >>> extract_ip_v6("(and she's ugly too, btw)")
@@ -972,6 +1131,9 @@ def is_ip(in_str: Any) -> bool:
          True if in_str contains a valid IP address (either IPv4 or
          IPv6).
  
+    See also :meth:`is_ip_v4`, :meth:`is_ip_v6`, :meth:`extract_ip_v6`,
+    and :meth:`extract_ip_v4`.
+
      >>> is_ip('255.200.100.75')
      True
      >>> is_ip('2001:db8:85a3:0000:0000:8a2e:370:7334')
@@ -993,6 +1155,9 @@ def extract_ip(in_str: Any) -> Optional[str]:
          The first IP address (IPv4 or IPv6) found in in_str or
          None to indicate none found or an error condition.
  
+    See also :meth:`is_ip_v4`, :meth:`is_ip_v6`, :meth:`extract_ip_v6`,
+    and :meth:`extract_ip_v4`.
+
      >>> extract_ip('Attacker: 255.200.100.75')
      '255.200.100.75'
      >>> extract_ip('Remote host: 2001:db8:85a3:0000:0000:8a2e:370:7334')
@@ -1013,6 +1178,8 @@ def is_mac_address(in_str: Any) -> bool:
      Returns:
          True if in_str is a valid MAC address False otherwise.
  
+    See also :meth:`extract_mac_address`, :meth:`is_ip`, etc...
+
      >>> is_mac_address("34:29:8F:12:0D:2F")
      True
      >>> is_mac_address('34:29:8f:12:0d:2f')
@@ -1034,6 +1201,8 @@ def extract_mac_address(in_str: Any, *, separator: str = ":") -> Optional[str]:
          The first MAC address found in in_str or None to indicate no
          match or an error.
  
+    See also :meth:`is_mac_address`, :meth:`is_ip`, and :meth:`extract_ip`.
+
      >>> extract_mac_address(' MAC Address: 34:29:8F:12:0D:2F')
      '34:29:8F:12:0D:2F'
  
@@ -1056,10 +1225,13 @@ def is_slug(in_str: Any, separator: str = "-") -> bool:
      """
      Args:
          in_str: string to test
+        separator: the slug character to use
  
      Returns:
          True if in_str is a slug string and False otherwise.
  
+    See also :meth:`is_camel_case`, :meth:`is_snake_case`, and :meth:`slugify`.
+
      >>> is_slug('my-blog-post-title')
      True
      >>> is_slug('My blog post title')
@@ -1080,6 +1252,8 @@ def contains_html(in_str: str) -> bool:
          True if the given string contains HTML/XML tags and False
          otherwise.
  
+    See also :meth:`strip_html`.
+
      .. warning::
          By design, this function matches ANY type of tag, so don't expect
          to use it as an HTML validator.  It's a quick sanity check at
@@ -1106,7 +1280,6 @@ def words_count(in_str: str) -> int:
          The number of words contained in the given string.
  
      .. note::
-
          This method is "smart" in that it does consider only sequences
          of one or more letter and/or numbers to be "words".  Thus a
          string like this: "! @ # % ... []" will return zero.  Moreover
@@ -1133,7 +1306,6 @@ def word_count(in_str: str) -> int:
          The number of words contained in the given string.
  
      .. note::
-
          This method is "smart" in that it does consider only sequences
          of one or more letter and/or numbers to be "words".  Thus a
          string like this: "! @ # % ... []" will return zero.  Moreover
@@ -1158,6 +1330,8 @@ def generate_uuid(omit_dashes: bool = False) -> str:
          A generated UUID string (using `uuid.uuid4()`) with or without
          dashes per the omit_dashes arg.
  
+    See also :meth:`is_uuid`, :meth:`generate_random_alphanumeric_string`.
+
      generate_uuid() # possible output: '97e3a716-6b33-4ab9-9bb1-8128cb24d76b'
      generate_uuid(omit_dashes=True) # possible output: '97e3a7166b334ab99bb18128cb24d76b'
      """
@@ -1176,6 +1350,8 @@ def generate_random_alphanumeric_string(size: int) -> str:
          A string of the specified size containing random characters
          (uppercase/lowercase ascii letters and digits).
  
+    See also :meth:`asciify`, :meth:`generate_uuid`.
+
      >>> random.seed(22)
      >>> generate_random_alphanumeric_string(9)
      '96ipbNClS'
@@ -1203,16 +1379,19 @@ def reverse(in_str: str) -> str:
      return in_str[::-1]
  
  
-def camel_case_to_snake_case(in_str, *, separator="_"):
+def camel_case_to_snake_case(in_str: str, *, separator: str = "_"):
      """
      Args:
          in_str: the camel case string to convert
+        separator: the snake case separator character to use
  
      Returns:
          A snake case string equivalent to the camel case input or the
          original string if it is not a valid camel case string or some
          other error occurs.
  
+    See also :meth:`is_camel_case`, :meth:`is_snake_case`, and :meth:`is_slug`.
+
      >>> camel_case_to_snake_case('MacAddressExtractorFactory')
      'mac_address_extractor_factory'
      >>> camel_case_to_snake_case('Luke Skywalker')
@@ -1231,12 +1410,16 @@ def snake_case_to_camel_case(
      """
      Args:
          in_str: the snake case string to convert
+        upper_case_first: should we capitalize the first letter?
+        separator: the separator character to use
  
      Returns:
          A camel case string that is equivalent to the snake case string
          provided or the original string back again if it is not valid
          snake case or another error occurs.
  
+    See also :meth:`is_camel_case`, :meth:`is_snake_case`, and :meth:`is_slug`.
+
      >>> snake_case_to_camel_case('this_is_a_test')
      'ThisIsATest'
      >>> snake_case_to_camel_case('Han Solo')
@@ -1260,6 +1443,8 @@ def to_char_list(in_str: str) -> List[str]:
      Returns:
          A list of strings of length one each.
  
+    See also :meth:`from_char_list`.
+
      >>> to_char_list('test')
      ['t', 'e', 's', 't']
      """
@@ -1277,6 +1462,8 @@ def from_char_list(in_list: List[str]) -> str:
          The string resulting from gluing the characters in in_list
          together.
  
+    See also :meth:`to_char_list`.
+
      >>> from_char_list(['t', 'e', 's', 't'])
      'test'
      """
@@ -1316,6 +1503,8 @@ def scramble(in_str: str) -> Optional[str]:
          in the same original string as no check is done.  Returns
          None to indicate error conditions.
  
+    See also :mod:`pyutils.unscrambler`.
+
      >>> random.seed(22)
      >>> scramble('awesome')
      'meosaew'
@@ -1333,6 +1522,8 @@ def strip_html(in_str: str, keep_tag_content: bool = False) -> str:
          A string with all HTML tags removed (optionally with tag contents
          preserved).
  
+    See also :meth:`contains_html`.
+
      .. note::
          This method uses simple regular expressions to strip tags and is
          not a full fledged HTML parser by any means.  Consider using
@@ -1361,6 +1552,8 @@ def asciify(in_str: str) -> str:
          by translating all non-ascii chars into their closest possible
          ASCII representation (eg: ó -> o, Ë -> E, ç -> c...).
  
+    See also :meth:`to_ascii`, :meth:`generate_random_alphanumeric_string`.
+
      .. warning::
          Some chars may be lost if impossible to translate.
  
@@ -1399,6 +1592,8 @@ def slugify(in_str: str, *, separator: str = "-") -> str:
          * all chars are encoded as ascii (by using :meth:`asciify`)
          * is safe for URL
  
+    See also :meth:`is_slug` and :meth:`asciify`.
+
      >>> slugify('Top 10 Reasons To Love Dogs!!!')
      'top-10-reasons-to-love-dogs'
      >>> slugify('Mönstér Mägnët')
@@ -1437,6 +1632,8 @@ def to_bool(in_str: str) -> bool:
  
          Otherwise False is returned.
  
+    See also :mod:`pyutils.argparse_utils`.
+
      >>> to_bool('True')
      True
  
@@ -1457,7 +1654,7 @@ def to_bool(in_str: str) -> bool:
      """
      if not is_string(in_str):
          raise ValueError(in_str)
-    return in_str.lower() in ("true", "1", "yes", "y", "t", "on")
+    return in_str.lower() in set(["true", "1", "yes", "y", "t", "on"])
  
  
  def to_date(in_str: str) -> Optional[datetime.date]:
@@ -1468,21 +1665,23 @@ def to_date(in_str: str) -> Optional[datetime.date]:
      Returns:
          The datetime.date the string contained or None to indicate
          an error.  This parser is relatively clever; see
-        :class:`datetimez.dateparse_utils` docs for details.
+        :class:`datetimes.dateparse_utils` docs for details.
+
+    See also: :mod:`pyutils.datetimes.dateparse_utils`, :meth:`extract_date`,
+    :meth:`is_valid_date`, :meth:`to_datetime`, :meth:`valid_datetime`.
  
      >>> to_date('9/11/2001')
      datetime.date(2001, 9, 11)
      >>> to_date('xyzzy')
      """
-    import pyutils.datetimez.dateparse_utils as du
+    import pyutils.datetimes.dateparse_utils as du
  
      try:
          d = du.DateParser()  # type: ignore
          d.parse(in_str)
          return d.get_date()
      except du.ParseException:  # type: ignore
-        msg = f'Unable to parse date {in_str}.'
-        logger.warning(msg)
+        pass
      return None
  
  
@@ -1495,6 +1694,9 @@ def extract_date(in_str: Any) -> Optional[datetime.datetime]:
      Returns:
          a datetime if date was found, otherwise None
  
+    See also: :mod:`pyutils.datetimes.dateparse_utils`, :meth:`to_date`,
+    :meth:`is_valid_date`, :meth:`to_datetime`, :meth:`valid_datetime`.
+
      >>> extract_date("filename.txt    dec 13, 2022")
      datetime.datetime(2022, 12, 13, 0, 0)
  
@@ -1503,7 +1705,7 @@ def extract_date(in_str: Any) -> Optional[datetime.datetime]:
      """
      import itertools
  
-    import pyutils.datetimez.dateparse_utils as du
+    import pyutils.datetimes.dateparse_utils as du
  
      d = du.DateParser()  # type: ignore
      chunks = in_str.split()
@@ -1515,7 +1717,7 @@ def extract_date(in_str: Any) -> Optional[datetime.datetime]:
      ):
          try:
              expr = " ".join(ngram)
-            logger.debug(f"Trying {expr}")
+            logger.debug("Trying %s", expr)
              if d.parse(expr):
                  return d.get_datetime()
          except du.ParseException:  # type: ignore
@@ -1531,7 +1733,10 @@ def is_valid_date(in_str: str) -> bool:
      Returns:
          True if the string represents a valid date that we can recognize
          and False otherwise.  This parser is relatively clever; see
-        :class:`datetimez.dateparse_utils` docs for details.
+        :class:`datetimes.dateparse_utils` docs for details.
+
+    See also: :mod:`pyutils.datetimes.dateparse_utils`, :meth:`to_date`,
+    :meth:`extract_date`, :meth:`to_datetime`, :meth:`valid_datetime`.
  
      >>> is_valid_date('1/2/2022')
      True
@@ -1542,15 +1747,14 @@ def is_valid_date(in_str: str) -> bool:
      >>> is_valid_date('xyzzy')
      False
      """
-    import pyutils.datetimez.dateparse_utils as dp
+    import pyutils.datetimes.dateparse_utils as dp
  
      try:
          d = dp.DateParser()  # type: ignore
          _ = d.parse(in_str)
          return True
      except dp.ParseException:  # type: ignore
-        msg = f'Unable to parse date {in_str}.'
-        logger.warning(msg)
+        pass
      return False
  
  
@@ -1562,12 +1766,15 @@ def to_datetime(in_str: str) -> Optional[datetime.datetime]:
      Returns:
          A python datetime parsed from in_str or None to indicate
          an error.  This parser is relatively clever; see
-        :class:`datetimez.dateparse_utils` docs for details.
+        :class:`datetimes.dateparse_utils` docs for details.
+
+    See also: :mod:`pyutils.datetimes.dateparse_utils`, :meth:`to_date`,
+    :meth:`extract_date`, :meth:`valid_datetime`.
  
      >>> to_datetime('7/20/1969 02:56 GMT')
      datetime.datetime(1969, 7, 20, 2, 56, tzinfo=<StaticTzInfo 'GMT'>)
      """
-    import pyutils.datetimez.dateparse_utils as dp
+    import pyutils.datetimes.dateparse_utils as dp
  
      try:
          d = dp.DateParser()  # type: ignore
@@ -1575,8 +1782,7 @@ def to_datetime(in_str: str) -> Optional[datetime.datetime]:
          if isinstance(dt, datetime.datetime):
              return dt
      except Exception:
-        msg = f'Unable to parse datetime {in_str}.'
-        logger.warning(msg)
+        pass
      return None
  
  
@@ -1588,7 +1794,7 @@ def valid_datetime(in_str: str) -> bool:
      Returns:
          True if in_str contains a valid datetime and False otherwise.
          This parser is relatively clever; see
-        :class:`datetimez.dateparse_utils` docs for details.
+        :class:`datetimes.dateparse_utils` docs for details.
  
      >>> valid_datetime('next wednesday at noon')
      True
@@ -1602,8 +1808,6 @@ def valid_datetime(in_str: str) -> bool:
      _ = to_datetime(in_str)
      if _ is not None:
          return True
-    msg = f'Unable to parse datetime {in_str}.'
-    logger.warning(msg)
      return False
  
  
@@ -1639,9 +1843,7 @@ def dedent(in_str: str) -> Optional[str]:
      Returns:
          A string with tab indentation removed or None on error.
  
-    .. note::
-
-        Inspired by analogous Scala function.
+    See also :meth:`indent`.
  
      >>> dedent('\t\ttest\\n\t\ting')
      'test\\ning'
@@ -1662,6 +1864,8 @@ def indent(in_str: str, amount: int) -> str:
      Returns:
          An indented string created by prepending amount spaces.
  
+    See also :meth:`dedent`.
+
      >>> indent('This is a test', 4)
      '    This is a test'
      """
@@ -1672,16 +1876,8 @@ def indent(in_str: str, amount: int) -> str:
      return line_separator.join(lines)
  
  
-def sprintf(*args, **kwargs) -> str:
-    """
-    Args:
-        This function uses the same syntax as the builtin print
-        function.
-
-    Returns:
-        An interpolated string capturing print output, like man(3)
-        :code:sprintf.
-    """
+def _sprintf(*args, **kwargs) -> str:
+    """Internal helper."""
      ret = ""
  
      sep = kwargs.pop("sep", None)
@@ -1701,8 +1897,8 @@ def sprintf(*args, **kwargs) -> str:
          sep = " "
      if end is None:
          end = "\n"
-    for i, arg in enumerate(args):
-        if i:
+    for n, arg in enumerate(args):
+        if n:
              ret += sep
          if isinstance(arg, str):
              ret += arg
@@ -1720,6 +1916,8 @@ def strip_ansi_sequences(in_str: str) -> str:
      Returns:
          in_str with recognized ANSI escape sequences removed.
  
+    See also :mod:`pyutils.ansi`.
+
      .. warning::
          This method works by using a regular expression.
          It works for all ANSI escape sequences I've tested with but
@@ -1750,7 +1948,6 @@ class SprintfStdout(contextlib.AbstractContextManager):
      >>> print(buf(), end='')
      test
      1, 2, 3
-
      """
  
      def __init__(self) -> None:
@@ -1780,7 +1977,6 @@ def capitalize_first_letter(in_str: str) -> str:
      'Test'
      >>> capitalize_first_letter("ALREADY!")
      'ALREADY!'
-
      """
      return in_str[0].upper() + in_str[1:]
  
@@ -1793,6 +1989,9 @@ def it_they(n: int) -> str:
      Returns:
          'it' if n is one or 'they' otherwize.
  
+    See also :meth:`is_are`, :meth:`pluralize`, :meth:`make_contractions`,
+    :meth:`thify`.
+
      Suggested usage::
  
          n = num_files_saved_to_tmp()
@@ -1817,6 +2016,9 @@ def is_are(n: int) -> str:
      Returns:
          'is' if n is one or 'are' otherwize.
  
+    See also :meth:`it_they`, :meth:`pluralize`, :meth:`make_contractions`,
+    :meth:`thify`.
+
      Suggested usage::
  
          n = num_files_saved_to_tmp()
@@ -1842,6 +2044,9 @@ def pluralize(n: int) -> str:
      Returns:
          's' if n is greater than one otherwize ''.
  
+    See also :meth:`it_they`, :meth:`is_are`, :meth:`make_contractions`,
+    :meth:`thify`.
+
      Suggested usage::
  
          n = num_files_saved_to_tmp()
@@ -1873,6 +2078,8 @@ def make_contractions(txt: str) -> str:
          Output text identical to original input except for any
          recognized contractions are formed.
  
+    See also :meth:`it_they`, :meth:`is_are`, :meth:`make_contractions`.
+
      .. note::
          The order in which we create contractions is defined by the
          implementation and what I thought made more sense when writing
@@ -1955,7 +2162,7 @@ def make_contractions(txt: str) -> str:
              for second in second_list:
                  # Disallow there're/where're.  They're valid English
                  # but sound weird.
-                if (first in ('there', 'where')) and second == 'a(re)':
+                if (first in set(['there', 'where'])) and second == 'a(re)':
                      continue
  
                  pattern = fr'\b({first})\s+{second}\b'
@@ -1976,6 +2183,8 @@ def thify(n: int) -> str:
      Returns:
          The proper cardinal suffix for a number.
  
+    See also :meth:`it_they`, :meth:`is_are`, :meth:`make_contractions`.
+
      Suggested usage::
  
          attempt_count = 0
@@ -2014,36 +2223,49 @@ def ngrams(txt: str, n: int):
      Returns:
          Generates the ngrams from the input string.
  
+    See also :meth:`ngrams_presplit`, :meth:`bigrams`, :meth:`trigrams`.
+
      >>> [x for x in ngrams('This is a test', 2)]
      ['This is', 'is a', 'a test']
      """
      words = txt.split()
      for ngram in ngrams_presplit(words, n):
          ret = ''
-        for word in ngram:
-            ret += f'{word} '
+        for w in ngram:
+            ret += f'{w} '
          yield ret.strip()
  
  
  def ngrams_presplit(words: Sequence[str], n: int):
      """
      Same as :meth:`ngrams` but with the string pre-split.
+
+    See also :meth:`ngrams`, :meth:`bigrams`, :meth:`trigrams`.
      """
      return list_utils.ngrams(words, n)
  
  
  def bigrams(txt: str):
-    """Generates the bigrams (n=2) of the given string."""
+    """Generates the bigrams (n=2) of the given string.
+
+    See also :meth:`ngrams`, :meth:`trigrams`.
+
+    >>> [x for x in bigrams('this is a test')]
+    ['this is', 'is a', 'a test']
+    """
      return ngrams(txt, 2)
  
  
  def trigrams(txt: str):
-    """Generates the trigrams (n=3) of the given string."""
+    """Generates the trigrams (n=3) of the given string.
+
+    See also :meth:`ngrams`, :meth:`bigrams`.
+    """
      return ngrams(txt, 3)
  
  
  def shuffle_columns_into_list(
-    input_lines: Sequence[str], column_specs: Iterable[Iterable[int]], delim=''
+    input_lines: Sequence[str], column_specs: Iterable[Iterable[int]], delim: str = ''
  ) -> Iterable[str]:
      """Helper to shuffle / parse columnar data and return the results as a
      list.
@@ -2062,6 +2284,8 @@ def shuffle_columns_into_list(
          A list of string created by following the instructions set forth
          in column_specs.
  
+    See also :meth:`shuffle_columns_into_dict`.
+
      >>> cols = '-rwxr-xr-x 1 scott wheel 3.1K Jul  9 11:34 acl_test.py'.split()
      >>> shuffle_columns_into_list(
      ...     cols,
@@ -2086,7 +2310,7 @@ def shuffle_columns_into_list(
  def shuffle_columns_into_dict(
      input_lines: Sequence[str],
      column_specs: Iterable[Tuple[str, Iterable[int]]],
-    delim='',
+    delim: str = '',
  ) -> Dict[str, str]:
      """Helper to shuffle / parse columnar data and return the results
      as a dict.
@@ -2104,6 +2328,8 @@ def shuffle_columns_into_dict(
      Returns:
          A dict formed by applying the column_specs instructions.
  
+    See also :meth:`shuffle_columns_into_list`, :meth:`interpolate_using_dict`.
+
      >>> cols = '-rwxr-xr-x 1 scott wheel 3.1K Jul  9 11:34 acl_test.py'.split()
      >>> shuffle_columns_into_dict(
      ...     cols,
@@ -2133,11 +2359,13 @@ def interpolate_using_dict(txt: str, values: Dict[str, str]) -> str:
          txt: the mad libs template
          values: what you and your kids chose for each category.
  
+    See also :meth:`shuffle_columns_into_list`, :meth:`shuffle_columns_into_dict`.
+
      >>> interpolate_using_dict('This is a {adjective} {noun}.',
      ...                        {'adjective': 'good', 'noun': 'example'})
      'This is a good example.'
      """
-    return sprintf(txt.format(**values), end='')
+    return _sprintf(txt.format(**values), end='')
  
  
  def to_ascii(txt: str):
@@ -2148,6 +2376,9 @@ def to_ascii(txt: str):
      Returns:
          txt encoded as an ASCII byte string.
  
+    See also :meth:`to_base64`, :meth:`to_bitstring`, :meth:`to_bytes`,
+    :meth:`generate_random_alphanumeric_string`, :meth:`asciify`.
+
      >>> to_ascii('test')
      b'test'
  
@@ -2161,15 +2392,22 @@ def to_ascii(txt: str):
      raise Exception('to_ascii works with strings and bytes')
  
  
-def to_base64(txt: str, *, encoding='utf-8', errors='surrogatepass') -> bytes:
+def to_base64(
+    txt: str, *, encoding: str = 'utf-8', errors: str = 'surrogatepass'
+) -> bytes:
      """
      Args:
          txt: the input data to encode
+        encoding: the encoding to use during conversion
+        errors: how to handle encoding errors
  
      Returns:
          txt encoded with a 64-chracter alphabet.  Similar to and compatible
          with uuencode/uudecode.
  
+    See also :meth:`is_base64`, :meth:`to_ascii`, :meth:`to_bitstring`,
+    :meth:`from_base64`.
+
      >>> to_base64('hello?')
      b'aGVsbG8/\\n'
      """
@@ -2186,6 +2424,8 @@ def is_base64(txt: str) -> bool:
          txt was encoded with Python's standard base64 alphabet which
          is the same as what uuencode/uudecode uses).
  
+    See also :meth:`to_base64`, :meth:`from_base64`.
+
      >>> is_base64('test')    # all letters in the b64 alphabet
      True
  
@@ -2204,15 +2444,21 @@ def is_base64(txt: str) -> bool:
      return True
  
  
-def from_base64(b64: bytes, encoding='utf-8', errors='surrogatepass') -> str:
+def from_base64(
+    b64: bytes, encoding: str = 'utf-8', errors: str = 'surrogatepass'
+) -> str:
      """
      Args:
          b64: bytestring of 64-bit encoded data to decode / convert.
+        encoding: the encoding to use during conversion
+        errors: how to handle encoding errors
  
      Returns:
          The decoded form of b64 as a normal python string.  Similar to
          and compatible with uuencode / uudecode.
  
+    See also :meth:`to_base64`, :meth:`is_base64`.
+
      >>> from_base64(b'aGVsbG8/\\n')
      'hello?'
      """
@@ -2239,7 +2485,7 @@ def chunk(txt: str, chunk_size: int):
          yield txt[x : x + chunk_size]
  
  
-def to_bitstring(txt: str, *, delimiter='') -> str:
+def to_bitstring(txt: str, *, delimiter: str = '') -> str:
      """
      Args:
          txt: the string to convert into a bitstring
@@ -2250,6 +2496,9 @@ def to_bitstring(txt: str, *, delimiter='') -> str:
      Returns:
          txt converted to ascii/binary and then chopped into bytes.
  
+    See also :meth:`to_base64`, :meth:`from_bitstring`, :meth:`is_bitstring`,
+    :meth:`chunk`.
+
      >>> to_bitstring('hello?')
      '011010000110010101101100011011000110111100111111'
  
@@ -2275,6 +2524,9 @@ def is_bitstring(txt: str) -> bool:
          Note that if delimiter is non empty this code will not
          recognize the bitstring.
  
+    See also :meth:`to_base64`, :meth:`from_bitstring`, :meth:`to_bitstring`,
+    :meth:`chunk`.
+
      >>> is_bitstring('011010000110010101101100011011000110111100111111')
      True
  
@@ -2284,16 +2536,22 @@ def is_bitstring(txt: str) -> bool:
      return is_binary_integer_number(f'0b{txt}')
  
  
-def from_bitstring(bits: str, encoding='utf-8', errors='surrogatepass') -> str:
+def from_bitstring(
+    bits: str, encoding: str = 'utf-8', errors: str = 'surrogatepass'
+) -> str:
      """
      Args:
          bits: the bitstring to convert back into a python string
-        encoding: the encoding to use
+        encoding: the encoding to use during conversion
+        errors: how to handle encoding errors
  
      Returns:
          The regular python string represented by bits.  Note that this
          code does not work with to_bitstring when delimiter is non-empty.
  
+    See also :meth:`to_base64`, :meth:`to_bitstring`, :meth:`is_bitstring`,
+    :meth:`chunk`.
+
      >>> from_bitstring('011010000110010101101100011011000110111100111111')
      'hello?'
      """
@@ -2311,6 +2569,8 @@ def ip_v4_sort_key(txt: str) -> Optional[Tuple[int, ...]]:
          IP addresses using a normal comparator will do something sane
          and desireable.
  
+    See also :meth:`is_ip_v4`.
+
      >>> ip_v4_sort_key('10.0.0.18')
      (10, 0, 0, 18)
  
@@ -2334,6 +2594,8 @@ def path_ancestors_before_descendants_sort_key(volume: str) -> Tuple[str, ...]:
          volumes using a normal comparator will do something sane
          and desireable.
  
+    See also :mod:`pyutils.files.file_utils`.
+
      >>> path_ancestors_before_descendants_sort_key('/usr/local/bin')
      ('usr', 'local', 'bin')
  
@@ -2354,6 +2616,8 @@ def replace_all(in_str: str, replace_set: str, replacement: str) -> str:
          replacement: the character to replace any member of replace_set
              with
  
+    See also :meth:`replace_nth`.
+
      Returns:
          The string with replacements executed.
  
@@ -2376,6 +2640,8 @@ def replace_nth(in_str: str, source: str, target: str, nth: int):
          target: the replacement text
          nth: which occurrance of source to replace?
  
+    See also :meth:`replace_all`.
+
      >>> replace_nth('this is a test', ' ', '-', 3)
      'this is a-test'
      """