This class is based on:
https://github.com/daveoncode/python-string-utils. See `NOTICE
-<https://wannabe.guru.org/gitweb/?p=pyutils.git;a=blob_plain;f=NOTICE;hb=HEAD>`_
+<https://wannabe.guru.org/gitweb/?p=pyutils.git;a=blob_plain;f=NOTICE;hb=HEAD>`__
in the root of this module for a detailed enumeration of what work is
Davide's and what work was added by Scott.
r"(#\S*)?" # hash
)
-URL_RE = re.compile(r"^{}$".format(URLS_RAW_STRING), re.IGNORECASE)
+URL_RE = re.compile(rf"^{URLS_RAW_STRING}$", re.IGNORECASE)
-URLS_RE = re.compile(r"({})".format(URLS_RAW_STRING), re.IGNORECASE)
+URLS_RE = re.compile(rf"({URLS_RAW_STRING})", re.IGNORECASE)
ESCAPED_AT_SIGN = re.compile(r'(?!"[^"]*)@+(?=[^"]*")|\\@')
r"[a-zA-Z\d._\+\-'`!%#$&*/=\?\^\{\}\|~\\]+@[a-z\d-]+\.?[a-z\d-]+\.[a-z]{2,4}"
)
-EMAIL_RE = re.compile(r"^{}$".format(EMAILS_RAW_STRING))
+EMAIL_RE = re.compile(rf"^{EMAILS_RAW_STRING}$")
-EMAILS_RE = re.compile(r"({})".format(EMAILS_RAW_STRING))
+EMAILS_RE = re.compile(rf"({EMAILS_RAW_STRING})")
CAMEL_CASE_TEST_RE = re.compile(r"^[a-zA-Z]*([a-z]+[A-Z]+|[A-Z]+[a-z]+)[a-zA-Z\d]*$")
MARGIN_RE = re.compile(r"^[^\S\r\n]+")
-ESCAPE_SEQUENCE_RE = re.compile(r"\e\[[^A-Za-z]*[A-Za-z]")
+ESCAPE_SEQUENCE_RE = re.compile(r"\x1B\[[^A-Za-z]*[A-Za-z]")
NUM_SUFFIXES = {
"Pb": (1024**5),
return in_str is None or len(in_str.strip()) == 0
-def is_string(obj: Any) -> bool:
+def is_string(in_str: Any) -> bool:
"""
Args:
in_str: the object to test
>>> is_string([1, 2, 3])
False
"""
- return isinstance(obj, str)
+ return isinstance(in_str, str)
def is_empty_string(in_str: Any) -> bool:
...
ValueError: Unknown word: xyzzy
"""
- if type(in_str) == int:
+ if isinstance(in_str, int):
return int(in_str)
current = result = 0
in_str = in_str.replace('-', ' ')
- for word in in_str.split():
- if word not in NUM_WORDS:
- if is_integer_number(word):
- current += int(word)
+ for w in in_str.split():
+ if w not in NUM_WORDS:
+ if is_integer_number(w):
+ current += int(w)
continue
else:
- raise ValueError("Unknown word: " + word)
- scale, increment = NUM_WORDS[word]
+ raise ValueError("Unknown word: " + w)
+ scale, increment = NUM_WORDS[w]
current = current * scale + increment
if scale > 100:
result += current
by a regular expression. While this gets common ones,
there may exist valid sequences that it doesn't match.
- >>> strip_escape_sequences('\e[12;11;22mthis is a test!')
+ >>> strip_escape_sequences('\x1B[12;11;22mthis is a test!')
'this is a test!'
"""
in_str = ESCAPE_SEQUENCE_RE.sub("", in_str)
return in_str
-def add_thousands_separator(in_str: str, *, separator_char=',', places=3) -> str:
+def add_thousands_separator(
+ in_str: str, *, separator_char: str = ',', places: int = 3
+) -> str:
"""
Args:
in_str: string or number to which to add thousands separator(s)
"""
Args:
in_str: the string to test
+ separator: the snake case separator character to use
Returns: True if the string is snake case and False otherwise. A
string is considered snake case when:
"""
Args:
in_str: the string to test
+ allow_hex: should we allow hexidecimal digits in valid uuids?
Returns:
True if the in_str contains a valid UUID and False otherwise.
"""
Args:
in_str: string to test
+ separator: the slug character to use
Returns:
True if in_str is a slug string and False otherwise.
return in_str[::-1]
-def camel_case_to_snake_case(in_str, *, separator="_"):
+def camel_case_to_snake_case(in_str: str, *, separator: str = "_"):
"""
Args:
in_str: the camel case string to convert
+ separator: the snake case separator character to use
Returns:
A snake case string equivalent to the camel case input or the
"""
Args:
in_str: the snake case string to convert
+ upper_case_first: should we capitalize the first letter?
+ separator: the separator character to use
Returns:
A camel case string that is equivalent to the snake case string
"""
if not is_string(in_str):
raise ValueError(in_str)
- return in_str.lower() in ("true", "1", "yes", "y", "t", "on")
+ return in_str.lower() in set(["true", "1", "yes", "y", "t", "on"])
def to_date(in_str: str) -> Optional[datetime.date]:
Returns:
The datetime.date the string contained or None to indicate
an error. This parser is relatively clever; see
- :class:`datetimez.dateparse_utils` docs for details.
+ :class:`datetimes.dateparse_utils` docs for details.
- See also: :mod:`pyutils.datetimez.dateparse_utils`, :meth:`extract_date`,
+ See also: :mod:`pyutils.datetimes.dateparse_utils`, :meth:`extract_date`,
:meth:`is_valid_date`, :meth:`to_datetime`, :meth:`valid_datetime`.
>>> to_date('9/11/2001')
datetime.date(2001, 9, 11)
>>> to_date('xyzzy')
"""
- import pyutils.datetimez.dateparse_utils as du
+ import pyutils.datetimes.dateparse_utils as du
try:
d = du.DateParser() # type: ignore
Returns:
a datetime if date was found, otherwise None
- See also: :mod:`pyutils.datetimez.dateparse_utils`, :meth:`to_date`,
+ See also: :mod:`pyutils.datetimes.dateparse_utils`, :meth:`to_date`,
:meth:`is_valid_date`, :meth:`to_datetime`, :meth:`valid_datetime`.
>>> extract_date("filename.txt dec 13, 2022")
"""
import itertools
- import pyutils.datetimez.dateparse_utils as du
+ import pyutils.datetimes.dateparse_utils as du
d = du.DateParser() # type: ignore
chunks = in_str.split()
):
try:
expr = " ".join(ngram)
- logger.debug(f"Trying {expr}")
+ logger.debug("Trying %s", expr)
if d.parse(expr):
return d.get_datetime()
except du.ParseException: # type: ignore
Returns:
True if the string represents a valid date that we can recognize
and False otherwise. This parser is relatively clever; see
- :class:`datetimez.dateparse_utils` docs for details.
+ :class:`datetimes.dateparse_utils` docs for details.
- See also: :mod:`pyutils.datetimez.dateparse_utils`, :meth:`to_date`,
+ See also: :mod:`pyutils.datetimes.dateparse_utils`, :meth:`to_date`,
:meth:`extract_date`, :meth:`to_datetime`, :meth:`valid_datetime`.
>>> is_valid_date('1/2/2022')
>>> is_valid_date('xyzzy')
False
"""
- import pyutils.datetimez.dateparse_utils as dp
+ import pyutils.datetimes.dateparse_utils as dp
try:
d = dp.DateParser() # type: ignore
Returns:
A python datetime parsed from in_str or None to indicate
an error. This parser is relatively clever; see
- :class:`datetimez.dateparse_utils` docs for details.
+ :class:`datetimes.dateparse_utils` docs for details.
- See also: :mod:`pyutils.datetimez.dateparse_utils`, :meth:`to_date`,
+ See also: :mod:`pyutils.datetimes.dateparse_utils`, :meth:`to_date`,
:meth:`extract_date`, :meth:`valid_datetime`.
>>> to_datetime('7/20/1969 02:56 GMT')
datetime.datetime(1969, 7, 20, 2, 56, tzinfo=<StaticTzInfo 'GMT'>)
"""
- import pyutils.datetimez.dateparse_utils as dp
+ import pyutils.datetimes.dateparse_utils as dp
try:
d = dp.DateParser() # type: ignore
Returns:
True if in_str contains a valid datetime and False otherwise.
This parser is relatively clever; see
- :class:`datetimez.dateparse_utils` docs for details.
+ :class:`datetimes.dateparse_utils` docs for details.
>>> valid_datetime('next wednesday at noon')
True
sep = " "
if end is None:
end = "\n"
- for i, arg in enumerate(args):
- if i:
+ for n, arg in enumerate(args):
+ if n:
ret += sep
if isinstance(arg, str):
ret += arg
for second in second_list:
# Disallow there're/where're. They're valid English
# but sound weird.
- if (first in ('there', 'where')) and second == 'a(re)':
+ if (first in set(['there', 'where'])) and second == 'a(re)':
continue
pattern = fr'\b({first})\s+{second}\b'
words = txt.split()
for ngram in ngrams_presplit(words, n):
ret = ''
- for word in ngram:
- ret += f'{word} '
+ for w in ngram:
+ ret += f'{w} '
yield ret.strip()
def shuffle_columns_into_list(
- input_lines: Sequence[str], column_specs: Iterable[Iterable[int]], delim=''
+ input_lines: Sequence[str], column_specs: Iterable[Iterable[int]], delim: str = ''
) -> Iterable[str]:
"""Helper to shuffle / parse columnar data and return the results as a
list.
def shuffle_columns_into_dict(
input_lines: Sequence[str],
column_specs: Iterable[Tuple[str, Iterable[int]]],
- delim='',
+ delim: str = '',
) -> Dict[str, str]:
"""Helper to shuffle / parse columnar data and return the results
as a dict.
raise Exception('to_ascii works with strings and bytes')
-def to_base64(txt: str, *, encoding='utf-8', errors='surrogatepass') -> bytes:
+def to_base64(
+ txt: str, *, encoding: str = 'utf-8', errors: str = 'surrogatepass'
+) -> bytes:
"""
Args:
txt: the input data to encode
+ encoding: the encoding to use during conversion
+ errors: how to handle encoding errors
Returns:
txt encoded with a 64-chracter alphabet. Similar to and compatible
return True
-def from_base64(b64: bytes, encoding='utf-8', errors='surrogatepass') -> str:
+def from_base64(
+ b64: bytes, encoding: str = 'utf-8', errors: str = 'surrogatepass'
+) -> str:
"""
Args:
b64: bytestring of 64-bit encoded data to decode / convert.
+ encoding: the encoding to use during conversion
+ errors: how to handle encoding errors
Returns:
The decoded form of b64 as a normal python string. Similar to
yield txt[x : x + chunk_size]
-def to_bitstring(txt: str, *, delimiter='') -> str:
+def to_bitstring(txt: str, *, delimiter: str = '') -> str:
"""
Args:
txt: the string to convert into a bitstring
return is_binary_integer_number(f'0b{txt}')
-def from_bitstring(bits: str, encoding='utf-8', errors='surrogatepass') -> str:
+def from_bitstring(
+ bits: str, encoding: str = 'utf-8', errors: str = 'surrogatepass'
+) -> str:
"""
Args:
bits: the bitstring to convert back into a python string
- encoding: the encoding to use
+ encoding: the encoding to use during conversion
+ errors: how to handle encoding errors
Returns:
The regular python string represented by bits. Note that this