Make the new cmd_showing_output select and display data from stderr in

[python_utils.git] / string_utils.py
diff --git a/string_utils.py b/string_utils.py

index aca4a5e3bfd9f49efa9a329b06addd9af5ffaa0a..6eda2783ea7aafa67bcc4f492825c2aa1bab1cc9 100644 (file)
--- a/string_utils.py
+++ b/string_utils.py
@@ -14,6 +14,7 @@ import string
  from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Tuple
  import unicodedata
  from uuid import uuid4
  from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Tuple
  import unicodedata
  from uuid import uuid4
+import warnings
  
  import list_utils
  
  
  import list_utils
  
@@ -861,16 +862,16 @@ def words_count(in_str: str) -> int:
      return len(WORDS_COUNT_RE.findall(in_str))
  
  
      return len(WORDS_COUNT_RE.findall(in_str))
  
  
-def generate_uuid(as_hex: bool = False) -> str:
+def generate_uuid(omit_dashes: bool = False) -> str:
      """
      Generated an UUID string (using `uuid.uuid4()`).
  
      generate_uuid() # possible output: '97e3a716-6b33-4ab9-9bb1-8128cb24d76b'
      """
      Generated an UUID string (using `uuid.uuid4()`).
  
      generate_uuid() # possible output: '97e3a716-6b33-4ab9-9bb1-8128cb24d76b'
-    generate_uuid(as_hex=True) # possible output: '97e3a7166b334ab99bb18128cb24d76b'
+    generate_uuid(omit_dashes=True) # possible output: '97e3a7166b334ab99bb18128cb24d76b'
  
      """
      uid = uuid4()
  
      """
      uid = uuid4()
-    if as_hex:
+    if omit_dashes:
          return uid.hex
      return str(uid)
  
          return uid.hex
      return str(uid)
  
@@ -1097,7 +1098,8 @@ def to_date(in_str: str) -> Optional[datetime.date]:
          d.parse(in_str)
          return d.get_date()
      except dp.ParseException:
          d.parse(in_str)
          return d.get_date()
      except dp.ParseException:
-        logger.warning(f'Unable to parse date {in_str}.')
+        msg = f'Unable to parse date {in_str}.'
+        logger.warning(msg)
      return None
  
  
      return None
  
  
@@ -1111,7 +1113,8 @@ def valid_date(in_str: str) -> bool:
          _ = d.parse(in_str)
          return True
      except dp.ParseException:
          _ = d.parse(in_str)
          return True
      except dp.ParseException:
-        logger.warning(f'Unable to parse date {in_str}.')
+        msg = f'Unable to parse date {in_str}.'
+        logger.warning(msg)
      return False
  
  
      return False
  
  
@@ -1126,7 +1129,8 @@ def to_datetime(in_str: str) -> Optional[datetime.datetime]:
          if type(dt) == datetime.datetime:
              return dt
      except ValueError:
          if type(dt) == datetime.datetime:
              return dt
      except ValueError:
-        logger.warning(f'Unable to parse datetime {in_str}.')
+        msg = f'Unable to parse datetime {in_str}.'
+        logger.warning(msg)
      return None
  
  
      return None
  
  
@@ -1137,10 +1141,29 @@ def valid_datetime(in_str: str) -> bool:
      _ = to_datetime(in_str)
      if _ is not None:
          return True
      _ = to_datetime(in_str)
      if _ is not None:
          return True
-    logger.warning(f'Unable to parse datetime {in_str}.')
+    msg = f'Unable to parse datetime {in_str}.'
+    logger.warning(msg)
      return False
  
  
      return False
  
  
+def squeeze(in_str: str, character_to_squeeze: str = ' ') -> str:
+    """
+    Squeeze runs of more than one character_to_squeeze into one.
+
+    >>> squeeze(' this        is       a    test    ')
+    ' this is a test '
+
+    >>> squeeze('one|!||!|two|!||!|three', character_to_squeeze='|!|')
+    'one|!|two|!|three'
+
+    """
+    return re.sub(
+        r'(' + re.escape(character_to_squeeze) + r')+',
+        character_to_squeeze,
+        in_str
+    )
+
+
  def dedent(in_str: str) -> str:
      """
      Removes tab indentation from multi line strings (inspired by analogous Scala function).
  def dedent(in_str: str) -> str:
      """
      Removes tab indentation from multi line strings (inspired by analogous Scala function).
@@ -1448,8 +1471,9 @@ def chunk(txt: str, chunk_size):
  
      """
      if len(txt) % chunk_size != 0:
  
      """
      if len(txt) % chunk_size != 0:
-        logger.warning(
-            f'String to chunk\'s length ({len(txt)} is not an even multiple of chunk_size ({chunk_size})')
+        msg = f'String to chunk\'s length ({len(txt)} is not an even multiple of chunk_size ({chunk_size})'
+        logger.warning(msg)
+        warnings.warn(msg, stacklevel=2)
      for x in range(0, len(txt), chunk_size):
          yield txt[x:x+chunk_size]
  
      for x in range(0, len(txt), chunk_size):
          yield txt[x:x+chunk_size]
  
@@ -1535,6 +1559,19 @@ def path_ancestors_before_descendants_sort_key(volume: str) -> Tuple[str]:
      return tuple([x for x in volume.split('/') if len(x) > 0])
  
  
      return tuple([x for x in volume.split('/') if len(x) > 0])
  
  
+def replace_all(in_str: str, replace_set: str, replacement: str) -> str:
+    """Execute several replace operations in a row.
+
+    >>> s = 'this_is a-test!'
+    >>> replace_all(s, ' _-!', '')
+    'thisisatest'
+
+    """
+    for char in replace_set:
+        in_str = in_str.replace(char, replacement)
+    return in_str
+
+
  if __name__ == '__main__':
      import doctest
      doctest.testmod()
  if __name__ == '__main__':
      import doctest
      doctest.testmod()