Start using warnings from stdlib.

[python_utils.git] / string_utils.py
diff --git a/string_utils.py b/string_utils.py

index a6a2da3155fcf312e9a9bc21ecd94f26141b4fcb..b93dc93aaa78e9b1b901499169c3b902f3445a59 100644 (file)
--- a/string_utils.py
+++ b/string_utils.py
@@ -14,6 +14,7 @@ import string
  from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Tuple
  import unicodedata
  from uuid import uuid4
  from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Tuple
  import unicodedata
  from uuid import uuid4
+import warnings
  
  import list_utils
  
  
  import list_utils
  
@@ -791,6 +792,9 @@ def extract_mac_address(in_str: Any, *, separator: str = ":") -> Optional[str]:
      >>> extract_mac_address(' MAC Address: 34:29:8F:12:0D:2F')
      '34:29:8F:12:0D:2F'
  
      >>> extract_mac_address(' MAC Address: 34:29:8F:12:0D:2F')
      '34:29:8F:12:0D:2F'
  
+    >>> extract_mac_address('? (10.0.0.30) at d8:5d:e2:34:54:86 on em0 expires in 1176 seconds [ethernet]')
+    'd8:5d:e2:34:54:86'
+
      """
      if not is_full_string(in_str):
          return None
      """
      if not is_full_string(in_str):
          return None
@@ -858,16 +862,16 @@ def words_count(in_str: str) -> int:
      return len(WORDS_COUNT_RE.findall(in_str))
  
  
      return len(WORDS_COUNT_RE.findall(in_str))
  
  
-def generate_uuid(as_hex: bool = False) -> str:
+def generate_uuid(omit_dashes: bool = False) -> str:
      """
      Generated an UUID string (using `uuid.uuid4()`).
  
      generate_uuid() # possible output: '97e3a716-6b33-4ab9-9bb1-8128cb24d76b'
      """
      Generated an UUID string (using `uuid.uuid4()`).
  
      generate_uuid() # possible output: '97e3a716-6b33-4ab9-9bb1-8128cb24d76b'
-    generate_uuid(as_hex=True) # possible output: '97e3a7166b334ab99bb18128cb24d76b'
+    generate_uuid(omit_dashes=True) # possible output: '97e3a7166b334ab99bb18128cb24d76b'
  
      """
      uid = uuid4()
  
      """
      uid = uuid4()
-    if as_hex:
+    if omit_dashes:
          return uid.hex
      return str(uid)
  
          return uid.hex
      return str(uid)
  
@@ -1094,7 +1098,9 @@ def to_date(in_str: str) -> Optional[datetime.date]:
          d.parse(in_str)
          return d.get_date()
      except dp.ParseException:
          d.parse(in_str)
          return d.get_date()
      except dp.ParseException:
-        logger.warning(f'Unable to parse date {in_str}.')
+        msg = f'Unable to parse date {in_str}.'
+        logger.warning(msg)
+        warnings.warn(msg)
      return None
  
  
      return None
  
  
@@ -1108,7 +1114,9 @@ def valid_date(in_str: str) -> bool:
          _ = d.parse(in_str)
          return True
      except dp.ParseException:
          _ = d.parse(in_str)
          return True
      except dp.ParseException:
-        logger.warning(f'Unable to parse date {in_str}.')
+        msg = f'Unable to parse date {in_str}.'
+        logger.warning(msg)
+        warnings.warn(msg)
      return False
  
  
      return False
  
  
@@ -1123,7 +1131,9 @@ def to_datetime(in_str: str) -> Optional[datetime.datetime]:
          if type(dt) == datetime.datetime:
              return dt
      except ValueError:
          if type(dt) == datetime.datetime:
              return dt
      except ValueError:
-        logger.warning(f'Unable to parse datetime {in_str}.')
+        msg = f'Unable to parse datetime {in_str}.'
+        logger.warning(msg)
+        warnings.warn(msg)
      return None
  
  
      return None
  
  
@@ -1134,10 +1144,30 @@ def valid_datetime(in_str: str) -> bool:
      _ = to_datetime(in_str)
      if _ is not None:
          return True
      _ = to_datetime(in_str)
      if _ is not None:
          return True
-    logger.warning(f'Unable to parse datetime {in_str}.')
+    msg = f'Unable to parse datetime {in_str}.'
+    logger.warning(msg)
+    warnings.warn(msg)
      return False
  
  
      return False
  
  
+def squeeze(in_str: str, character_to_squeeze: str = ' ') -> str:
+    """
+    Squeeze runs of more than one character_to_squeeze into one.
+
+    >>> squeeze(' this        is       a    test    ')
+    ' this is a test '
+
+    >>> squeeze('one|!||!|two|!||!|three', character_to_squeeze='|!|')
+    'one|!|two|!|three'
+
+    """
+    return re.sub(
+        r'(' + re.escape(character_to_squeeze) + r')+',
+        character_to_squeeze,
+        in_str
+    )
+
+
  def dedent(in_str: str) -> str:
      """
      Removes tab indentation from multi line strings (inspired by analogous Scala function).
  def dedent(in_str: str) -> str:
      """
      Removes tab indentation from multi line strings (inspired by analogous Scala function).
@@ -1445,8 +1475,9 @@ def chunk(txt: str, chunk_size):
  
      """
      if len(txt) % chunk_size != 0:
  
      """
      if len(txt) % chunk_size != 0:
-        logger.warning(
-            f'String to chunk\'s length ({len(txt)} is not an even multiple of chunk_size ({chunk_size})')
+        msg = f'String to chunk\'s length ({len(txt)} is not an even multiple of chunk_size ({chunk_size})'
+        logger.warning(msg)
+        warnings.warn(msg)
      for x in range(0, len(txt), chunk_size):
          yield txt[x:x+chunk_size]
  
      for x in range(0, len(txt), chunk_size):
          yield txt[x:x+chunk_size]
  
@@ -1500,6 +1531,51 @@ def from_bitstring(bits: str, encoding='utf-8', errors='surrogatepass') -> str:
      return n.to_bytes((n.bit_length() + 7) // 8, 'big').decode(encoding, errors) or '\0'
  
  
      return n.to_bytes((n.bit_length() + 7) // 8, 'big').decode(encoding, errors) or '\0'
  
  
+def ip_v4_sort_key(txt: str) -> Tuple[int]:
+    """Turn an IPv4 address into a tuple for sorting purposes.
+
+    >>> ip_v4_sort_key('10.0.0.18')
+    (10, 0, 0, 18)
+
+    >>> ips = ['10.0.0.10', '100.0.0.1', '1.2.3.4', '10.0.0.9']
+    >>> sorted(ips, key=lambda x: ip_v4_sort_key(x))
+    ['1.2.3.4', '10.0.0.9', '10.0.0.10', '100.0.0.1']
+
+    """
+    if not is_ip_v4(txt):
+        print(f"not IP: {txt}")
+        return None
+    return tuple([int(x) for x in txt.split('.')])
+
+
+def path_ancestors_before_descendants_sort_key(volume: str) -> Tuple[str]:
+    """Chunk up a file path so that parent/ancestor paths sort before
+    children/descendant paths.
+
+    >>> path_ancestors_before_descendants_sort_key('/usr/local/bin')
+    ('usr', 'local', 'bin')
+
+    >>> paths = ['/usr/local', '/usr/local/bin', '/usr']
+    >>> sorted(paths, key=lambda x: path_ancestors_before_descendants_sort_key(x))
+    ['/usr', '/usr/local', '/usr/local/bin']
+
+    """
+    return tuple([x for x in volume.split('/') if len(x) > 0])
+
+
+def replace_all(in_str: str, replace_set: str, replacement: str) -> str:
+    """Execute several replace operations in a row.
+
+    >>> s = 'this_is a-test!'
+    >>> replace_all(s, ' _-!', '')
+    'thisisatest'
+
+    """
+    for char in replace_set:
+        in_str = in_str.replace(char, replacement)
+    return in_str
+
+
  if __name__ == '__main__':
      import doctest
      doctest.testmod()
  if __name__ == '__main__':
      import doctest
      doctest.testmod()