X-Git-Url: https://wannabe.guru.org/gitweb/?a=blobdiff_plain;f=string_utils.py;h=6eda2783ea7aafa67bcc4f492825c2aa1bab1cc9;hb=559c60c169223c7c6833e9beedf978a8ffdd3926;hp=aca4a5e3bfd9f49efa9a329b06addd9af5ffaa0a;hpb=b29be4f1750fd20bd2eada88e751dfae85817882;p=python_utils.git diff --git a/string_utils.py b/string_utils.py index aca4a5e..6eda278 100644 --- a/string_utils.py +++ b/string_utils.py @@ -14,6 +14,7 @@ import string from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Tuple import unicodedata from uuid import uuid4 +import warnings import list_utils @@ -861,16 +862,16 @@ def words_count(in_str: str) -> int: return len(WORDS_COUNT_RE.findall(in_str)) -def generate_uuid(as_hex: bool = False) -> str: +def generate_uuid(omit_dashes: bool = False) -> str: """ Generated an UUID string (using `uuid.uuid4()`). generate_uuid() # possible output: '97e3a716-6b33-4ab9-9bb1-8128cb24d76b' - generate_uuid(as_hex=True) # possible output: '97e3a7166b334ab99bb18128cb24d76b' + generate_uuid(omit_dashes=True) # possible output: '97e3a7166b334ab99bb18128cb24d76b' """ uid = uuid4() - if as_hex: + if omit_dashes: return uid.hex return str(uid) @@ -1097,7 +1098,8 @@ def to_date(in_str: str) -> Optional[datetime.date]: d.parse(in_str) return d.get_date() except dp.ParseException: - logger.warning(f'Unable to parse date {in_str}.') + msg = f'Unable to parse date {in_str}.' + logger.warning(msg) return None @@ -1111,7 +1113,8 @@ def valid_date(in_str: str) -> bool: _ = d.parse(in_str) return True except dp.ParseException: - logger.warning(f'Unable to parse date {in_str}.') + msg = f'Unable to parse date {in_str}.' + logger.warning(msg) return False @@ -1126,7 +1129,8 @@ def to_datetime(in_str: str) -> Optional[datetime.datetime]: if type(dt) == datetime.datetime: return dt except ValueError: - logger.warning(f'Unable to parse datetime {in_str}.') + msg = f'Unable to parse datetime {in_str}.' + logger.warning(msg) return None @@ -1137,10 +1141,29 @@ def valid_datetime(in_str: str) -> bool: _ = to_datetime(in_str) if _ is not None: return True - logger.warning(f'Unable to parse datetime {in_str}.') + msg = f'Unable to parse datetime {in_str}.' + logger.warning(msg) return False +def squeeze(in_str: str, character_to_squeeze: str = ' ') -> str: + """ + Squeeze runs of more than one character_to_squeeze into one. + + >>> squeeze(' this is a test ') + ' this is a test ' + + >>> squeeze('one|!||!|two|!||!|three', character_to_squeeze='|!|') + 'one|!|two|!|three' + + """ + return re.sub( + r'(' + re.escape(character_to_squeeze) + r')+', + character_to_squeeze, + in_str + ) + + def dedent(in_str: str) -> str: """ Removes tab indentation from multi line strings (inspired by analogous Scala function). @@ -1448,8 +1471,9 @@ def chunk(txt: str, chunk_size): """ if len(txt) % chunk_size != 0: - logger.warning( - f'String to chunk\'s length ({len(txt)} is not an even multiple of chunk_size ({chunk_size})') + msg = f'String to chunk\'s length ({len(txt)} is not an even multiple of chunk_size ({chunk_size})' + logger.warning(msg) + warnings.warn(msg, stacklevel=2) for x in range(0, len(txt), chunk_size): yield txt[x:x+chunk_size] @@ -1535,6 +1559,19 @@ def path_ancestors_before_descendants_sort_key(volume: str) -> Tuple[str]: return tuple([x for x in volume.split('/') if len(x) > 0]) +def replace_all(in_str: str, replace_set: str, replacement: str) -> str: + """Execute several replace operations in a row. + + >>> s = 'this_is a-test!' + >>> replace_all(s, ' _-!', '') + 'thisisatest' + + """ + for char in replace_set: + in_str = in_str.replace(char, replacement) + return in_str + + if __name__ == '__main__': import doctest doctest.testmod()