"""The MIT License (MIT)
Copyright (c) 2016-2020 Davide Zanotti
+
Modifications Copyright (c) 2021-2022 Scott Gasch
Permission is hereby granted, free of charge, to any person obtaining a copy
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
-This class is based on: https://github.com/daveoncode/python-string-utils.
-See NOTICE in the root of this module for a detailed enumeration of what
-work is Davide's and what work was added by Scott.
+This class is based on:
+https://github.com/daveoncode/python-string-utils. See `NOTICE
+<https://wannabe.guru.org/gitweb/?p=pyutils.git;a=blob_plain;f=NOTICE;hb=HEAD>`__
+in the root of this module for a detailed enumeration of what work is
+Davide's and what work was added by Scott.
+
"""
import base64
r"(#\S*)?" # hash
)
-URL_RE = re.compile(r"^{}$".format(URLS_RAW_STRING), re.IGNORECASE)
+URL_RE = re.compile(rf"^{URLS_RAW_STRING}$", re.IGNORECASE)
-URLS_RE = re.compile(r"({})".format(URLS_RAW_STRING), re.IGNORECASE)
+URLS_RE = re.compile(rf"({URLS_RAW_STRING})", re.IGNORECASE)
ESCAPED_AT_SIGN = re.compile(r'(?!"[^"]*)@+(?=[^"]*")|\\@')
r"[a-zA-Z\d._\+\-'`!%#$&*/=\?\^\{\}\|~\\]+@[a-z\d-]+\.?[a-z\d-]+\.[a-z]{2,4}"
)
-EMAIL_RE = re.compile(r"^{}$".format(EMAILS_RAW_STRING))
+EMAIL_RE = re.compile(rf"^{EMAILS_RAW_STRING}$")
-EMAILS_RE = re.compile(r"({})".format(EMAILS_RAW_STRING))
+EMAILS_RE = re.compile(rf"({EMAILS_RAW_STRING})")
CAMEL_CASE_TEST_RE = re.compile(r"^[a-zA-Z]*([a-z]+[A-Z]+|[A-Z]+[a-z]+)[a-zA-Z\d]*$")
MARGIN_RE = re.compile(r"^[^\S\r\n]+")
-ESCAPE_SEQUENCE_RE = re.compile(r"\e\[[^A-Za-z]*[A-Za-z]")
+ESCAPE_SEQUENCE_RE = re.compile(r"\x1B\[[^A-Za-z]*[A-Za-z]")
NUM_SUFFIXES = {
"Pb": (1024**5),
"K": (1024**1),
}
-units = [
+UNIT_WORDS = [
"zero",
"one",
"two",
"nineteen",
]
-tens = [
+TENS_WORDS = [
"",
"",
"twenty",
"ninety",
]
-scales = ["hundred", "thousand", "million", "billion", "trillion"]
+MAGNITUDE_SCALES = [
+ "hundred",
+ "thousand",
+ "million",
+ "billion",
+ "trillion",
+ "quadrillion",
+]
NUM_WORDS = {}
NUM_WORDS["and"] = (1, 0)
-for i, word in enumerate(units):
+for i, word in enumerate(UNIT_WORDS):
NUM_WORDS[word] = (1, i)
-for i, word in enumerate(tens):
+for i, word in enumerate(TENS_WORDS):
NUM_WORDS[word] = (1, i * 10)
-for i, word in enumerate(scales):
- NUM_WORDS[word] = (10 ** (i * 3 or 2), 0)
+for i, word in enumerate(MAGNITUDE_SCALES):
+ if i == 0:
+ NUM_WORDS[word] = (100, 0)
+ else:
+ NUM_WORDS[word] = (10 ** (i * 3), 0)
NUM_WORDS['score'] = (20, 0)
True if the input string is either None or an empty string,
False otherwise.
+ See also :meth:`is_string` and :meth:`is_empty_string`.
+
>>> is_none_or_empty("")
True
>>> is_none_or_empty(None)
return in_str is None or len(in_str.strip()) == 0
-def is_string(obj: Any) -> bool:
+def is_string(in_str: Any) -> bool:
"""
Args:
in_str: the object to test
Returns:
True if the object is a string and False otherwise.
+ See also :meth:`is_empty_string`, :meth:`is_none_or_empty`.
+
>>> is_string('test')
True
>>> is_string(123)
>>> is_string([1, 2, 3])
False
"""
- return isinstance(obj, str)
+ return isinstance(in_str, str)
def is_empty_string(in_str: Any) -> bool:
Returns:
True if the string is empty and False otherwise.
+
+ See also :meth:`is_none_or_empty`, :meth:`is_full_string`.
"""
return is_empty(in_str)
Returns:
True if the string is empty and false otherwise.
+ See also :meth:`is_none_or_empty`, :meth:`is_full_string`.
+
>>> is_empty('')
True
>>> is_empty(' \t\t ')
True if the object is a string and is not empty ('') and
is not only composed of whitespace.
+ See also :meth:`is_string`, :meth:`is_empty_string`, :meth:`is_none_or_empty`.
+
>>> is_full_string('test!')
True
>>> is_full_string('')
True if the string contains a valid numberic value and
False otherwise.
+ See also :meth:`is_integer_number`, :meth:`is_decimal_number`,
+ :meth:`is_hexidecimal_integer_number`, :meth:`is_octal_integer_number`,
+ etc...
+
>>> is_number(100.5)
Traceback (most recent call last):
...
decimal, hex, or octal, regular or scientific) integral
expression and False otherwise.
+ See also :meth:`is_number`, :meth:`is_decimal_number`,
+ :meth:`is_hexidecimal_integer_number`, :meth:`is_octal_integer_number`,
+ etc...
+
>>> is_integer_number('42')
True
>>> is_integer_number('42.0')
Returns:
True if the string is a hex integer number and False otherwise.
+ See also :meth:`is_integer_number`, :meth:`is_decimal_number`,
+ :meth:`is_octal_integer_number`, :meth:`is_binary_integer_number`, etc...
+
>>> is_hexidecimal_integer_number('0x12345')
True
>>> is_hexidecimal_integer_number('0x1A3E')
Returns:
True if the string is a valid octal integral number and False otherwise.
+ See also :meth:`is_integer_number`, :meth:`is_decimal_number`,
+ :meth:`is_hexidecimal_integer_number`, :meth:`is_binary_integer_number`,
+ etc...
+
>>> is_octal_integer_number('0o777')
True
>>> is_octal_integer_number('-0O115')
Returns:
True if the string contains a binary integral number and False otherwise.
+ See also :meth:`is_integer_number`, :meth:`is_decimal_number`,
+ :meth:`is_hexidecimal_integer_number`, :meth:`is_octal_integer_number`,
+ etc...
+
>>> is_binary_integer_number('0b10111')
True
>>> is_binary_integer_number('-0b111')
Returns:
The integral value of the string or raises on error.
+ See also :meth:`is_integer_number`, :meth:`is_decimal_number`,
+ :meth:`is_hexidecimal_integer_number`, :meth:`is_octal_integer_number`,
+ :meth:`is_binary_integer_number`, etc...
+
>>> to_int('1234')
1234
+ >>> to_int('0x1234')
+ 4660
+ >>> to_int('0b01101')
+ 13
+ >>> to_int('0o777')
+ 511
>>> to_int('test')
Traceback (most recent call last):
...
def number_string_to_integer(in_str: str) -> int:
"""Convert a string containing a written-out number into an int.
+ Args:
+ in_str: the string containing the long-hand written out integer number
+ in English. See examples below.
+
+ Returns:
+ The integer whose value was parsed from in_str.
+
+ See also :meth:`integer_to_number_string`.
+
+ .. warning::
+ This code only handles integers; it will not work with decimals / floats.
+
>>> number_string_to_integer("one hundred fifty two")
152
...
ValueError: Unknown word: xyzzy
"""
- if type(in_str) == int:
- return in_str
+ if isinstance(in_str, int):
+ return int(in_str)
current = result = 0
in_str = in_str.replace('-', ' ')
- for word in in_str.split():
- if word not in NUM_WORDS:
- if is_integer_number(word):
- current += int(word)
+ for w in in_str.split():
+ if w not in NUM_WORDS:
+ if is_integer_number(w):
+ current += int(w)
continue
else:
- raise ValueError("Unknown word: " + word)
- scale, increment = NUM_WORDS[word]
+ raise ValueError("Unknown word: " + w)
+ scale, increment = NUM_WORDS[w]
current = current * scale + increment
if scale > 100:
result += current
return result + current
+def integer_to_number_string(num: int) -> str:
+ """
+ Opposite of :meth:`number_string_to_integer`; converts a number to a written out
+ longhand format in English.
+
+ Args:
+ num: the integer number to convert
+
+ Returns:
+ The long-hand written out English form of the number. See examples below.
+
+ See also :meth:`number_string_to_integer`.
+
+ .. warning::
+ This method does not handle decimals or floats, only ints.
+
+ >>> integer_to_number_string(9)
+ 'nine'
+
+ >>> integer_to_number_string(42)
+ 'forty two'
+
+ >>> integer_to_number_string(123219982)
+ 'one hundred twenty three million two hundred nineteen thousand nine hundred eighty two'
+ """
+
+ if num < 20:
+ return UNIT_WORDS[num]
+ if num < 100:
+ ret = TENS_WORDS[num // 10]
+ leftover = num % 10
+ if leftover != 0:
+ ret += ' ' + UNIT_WORDS[leftover]
+ return ret
+
+ # If num > 100 go find the highest chunk and convert that, then recursively
+ # convert the rest. NUM_WORDS contains items like 'thousand' -> (1000, 0).
+ # The second item in the tuple is an increment that can be ignored; the first
+ # is the numeric "scale" of the entry. So find the greatest entry in NUM_WORDS
+ # still less than num. For 123,456 it would be thousand. Then pull out the
+ # 123, convert it, and append "thousand". Then do the rest.
+ scales = {}
+ for name, val in NUM_WORDS.items():
+ if val[0] <= num:
+ scales[name] = val[0]
+ scale = max(scales.items(), key=lambda _: _[1])
+
+ # scale[1] = numeric magnitude (e.g. 1000)
+ # scale[0] = name (e.g. "thousand")
+ ret = integer_to_number_string(num // scale[1]) + ' ' + scale[0]
+ leftover = num % scale[1]
+ if leftover != 0:
+ ret += ' ' + integer_to_number_string(leftover)
+ return ret
+
+
def is_decimal_number(in_str: str) -> bool:
"""
Args:
otherwise. A decimal may be signed or unsigned or use
a "scientific notation".
+ See also :meth:`is_integer_number`.
+
.. note::
We do not consider integers without a decimal point
to be decimals; they return False (see example).
Returns:
in_str with escape sequences removed.
+ See also: :mod:`pyutils.ansi`.
+
.. note::
What is considered to be an "escape sequence" is defined
by a regular expression. While this gets common ones,
there may exist valid sequences that it doesn't match.
- >>> strip_escape_sequences('\e[12;11;22mthis is a test!')
+ >>> strip_escape_sequences('\x1B[12;11;22mthis is a test!')
'this is a test!'
"""
in_str = ESCAPE_SEQUENCE_RE.sub("", in_str)
return in_str
-def add_thousands_separator(in_str: str, *, separator_char=',', places=3) -> str:
+def add_thousands_separator(
+ in_str: str, *, separator_char: str = ',', places: int = 3
+) -> str:
"""
Args:
in_str: string or number to which to add thousands separator(s)
def _add_thousands_separator(in_str: str, *, separator_char=',', places=3) -> str:
+ """Internal helper"""
decimal_part = ""
if '.' in in_str:
(in_str, decimal_part) = in_str.split('.')
Returns:
An integer number of bytes or None to indicate an error.
+ See also :meth:`number_to_suffix_string`.
+
>>> suffix_string_to_number('1Mb')
1048576
>>> suffix_string_to_number('13.1Gb')
A string with a suffix representing num bytes concisely or
None to indicate an error.
+ See also: :meth:`suffix_string_to_number`.
+
>>> number_to_suffix_string(14066017894)
'13.1Gb'
>>> number_to_suffix_string(1024 * 1024)
Returns:
True if in_str is a valid credit card number.
+
+ .. warning::
+ This code is not verifying the authenticity of the credit card (i.e.
+ not checking whether it's a real card that can be charged); rather
+ it's only checking that the number follows the "rules" for numbering
+ established by credit card issuers.
+
"""
if not is_full_string(in_str):
return False
* it's composed only by letters ([a-zA-Z]) and optionally numbers ([0-9])
* it contains both lowercase and uppercase letters
* it does not start with a number
+
+ See also :meth:`is_snake_case`, :meth:`is_slug`, and :meth:`camel_case_to_snake_case`.
"""
return is_full_string(in_str) and CAMEL_CASE_TEST_RE.match(in_str) is not None
"""
Args:
in_str: the string to test
+ separator: the snake case separator character to use
Returns: True if the string is snake case and False otherwise. A
string is considered snake case when:
* it contains at least one underscore (or provided separator)
* it does not start with a number
+ See also :meth:`is_camel_case`, :meth:`is_slug`, and :meth:`snake_case_to_camel_case`.
+
>>> is_snake_case('this_is_a_test')
True
>>> is_snake_case('___This_Is_A_Test_1_2_3___')
"""
Args:
in_str: the string to test
+ allow_hex: should we allow hexidecimal digits in valid uuids?
Returns:
True if the in_str contains a valid UUID and False otherwise.
+ See also :meth:`generate_uuid`.
+
>>> is_uuid('6f8aa2f9-686c-4ac3-8766-5712354a04cf')
True
>>> is_uuid('6f8aa2f9686c4ac387665712354a04cf')
Returns:
True if in_str contains a valid IPv4 address and False otherwise.
+ See also :meth:`extract_ip_v4`, :meth:`is_ip_v6`, :meth:`extract_ip_v6`,
+ and :meth:`is_ip`.
+
>>> is_ip_v4('255.200.100.75')
True
>>> is_ip_v4('nope')
The first extracted IPv4 address from in_str or None if
none were found or an error occurred.
+ See also :meth:`is_ip_v4`, :meth:`is_ip_v6`, :meth:`extract_ip_v6`,
+ and :meth:`is_ip`.
+
>>> extract_ip_v4(' The secret IP address: 127.0.0.1 (use it wisely) ')
'127.0.0.1'
>>> extract_ip_v4('Your mom dresses you funny.')
Returns:
True if in_str contains a valid IPv6 address and False otherwise.
+ See also :meth:`is_ip_v4`, :meth:`extract_ip_v4`, :meth:`extract_ip_v6`,
+ and :meth:`is_ip`.
+
>>> is_ip_v6('2001:db8:85a3:0000:0000:8a2e:370:7334')
True
>>> is_ip_v6('2001:db8:85a3:0000:0000:8a2e:370:?') # invalid "?"
The first IPv6 address found in in_str or None if no address
was found or an error occurred.
+ See also :meth:`is_ip_v4`, :meth:`is_ip_v6`, :meth:`extract_ip_v4`,
+ and :meth:`is_ip`.
+
>>> extract_ip_v6('IP: 2001:db8:85a3:0000:0000:8a2e:370:7334')
'2001:db8:85a3:0000:0000:8a2e:370:7334'
>>> extract_ip_v6("(and she's ugly too, btw)")
True if in_str contains a valid IP address (either IPv4 or
IPv6).
+ See also :meth:`is_ip_v4`, :meth:`is_ip_v6`, :meth:`extract_ip_v6`,
+ and :meth:`extract_ip_v4`.
+
>>> is_ip('255.200.100.75')
True
>>> is_ip('2001:db8:85a3:0000:0000:8a2e:370:7334')
The first IP address (IPv4 or IPv6) found in in_str or
None to indicate none found or an error condition.
+ See also :meth:`is_ip_v4`, :meth:`is_ip_v6`, :meth:`extract_ip_v6`,
+ and :meth:`extract_ip_v4`.
+
>>> extract_ip('Attacker: 255.200.100.75')
'255.200.100.75'
>>> extract_ip('Remote host: 2001:db8:85a3:0000:0000:8a2e:370:7334')
Returns:
True if in_str is a valid MAC address False otherwise.
+ See also :meth:`extract_mac_address`, :meth:`is_ip`, etc...
+
>>> is_mac_address("34:29:8F:12:0D:2F")
True
>>> is_mac_address('34:29:8f:12:0d:2f')
The first MAC address found in in_str or None to indicate no
match or an error.
+ See also :meth:`is_mac_address`, :meth:`is_ip`, and :meth:`extract_ip`.
+
>>> extract_mac_address(' MAC Address: 34:29:8F:12:0D:2F')
'34:29:8F:12:0D:2F'
"""
Args:
in_str: string to test
+ separator: the slug character to use
Returns:
True if in_str is a slug string and False otherwise.
+ See also :meth:`is_camel_case`, :meth:`is_snake_case`, and :meth:`slugify`.
+
>>> is_slug('my-blog-post-title')
True
>>> is_slug('My blog post title')
True if the given string contains HTML/XML tags and False
otherwise.
+ See also :meth:`strip_html`.
+
.. warning::
By design, this function matches ANY type of tag, so don't expect
to use it as an HTML validator. It's a quick sanity check at
The number of words contained in the given string.
.. note::
-
This method is "smart" in that it does consider only sequences
of one or more letter and/or numbers to be "words". Thus a
string like this: "! @ # % ... []" will return zero. Moreover
The number of words contained in the given string.
.. note::
-
This method is "smart" in that it does consider only sequences
of one or more letter and/or numbers to be "words". Thus a
string like this: "! @ # % ... []" will return zero. Moreover
A generated UUID string (using `uuid.uuid4()`) with or without
dashes per the omit_dashes arg.
+ See also :meth:`is_uuid`, :meth:`generate_random_alphanumeric_string`.
+
generate_uuid() # possible output: '97e3a716-6b33-4ab9-9bb1-8128cb24d76b'
generate_uuid(omit_dashes=True) # possible output: '97e3a7166b334ab99bb18128cb24d76b'
"""
A string of the specified size containing random characters
(uppercase/lowercase ascii letters and digits).
+ See also :meth:`asciify`, :meth:`generate_uuid`.
+
>>> random.seed(22)
>>> generate_random_alphanumeric_string(9)
'96ipbNClS'
return in_str[::-1]
-def camel_case_to_snake_case(in_str, *, separator="_"):
+def camel_case_to_snake_case(in_str: str, *, separator: str = "_"):
"""
Args:
in_str: the camel case string to convert
+ separator: the snake case separator character to use
Returns:
A snake case string equivalent to the camel case input or the
original string if it is not a valid camel case string or some
other error occurs.
+ See also :meth:`is_camel_case`, :meth:`is_snake_case`, and :meth:`is_slug`.
+
>>> camel_case_to_snake_case('MacAddressExtractorFactory')
'mac_address_extractor_factory'
>>> camel_case_to_snake_case('Luke Skywalker')
"""
Args:
in_str: the snake case string to convert
+ upper_case_first: should we capitalize the first letter?
+ separator: the separator character to use
Returns:
A camel case string that is equivalent to the snake case string
provided or the original string back again if it is not valid
snake case or another error occurs.
+ See also :meth:`is_camel_case`, :meth:`is_snake_case`, and :meth:`is_slug`.
+
>>> snake_case_to_camel_case('this_is_a_test')
'ThisIsATest'
>>> snake_case_to_camel_case('Han Solo')
Returns:
A list of strings of length one each.
+ See also :meth:`from_char_list`.
+
>>> to_char_list('test')
['t', 'e', 's', 't']
"""
The string resulting from gluing the characters in in_list
together.
+ See also :meth:`to_char_list`.
+
>>> from_char_list(['t', 'e', 's', 't'])
'test'
"""
in the same original string as no check is done. Returns
None to indicate error conditions.
+ See also :mod:`pyutils.unscrambler`.
+
>>> random.seed(22)
>>> scramble('awesome')
'meosaew'
A string with all HTML tags removed (optionally with tag contents
preserved).
+ See also :meth:`contains_html`.
+
.. note::
This method uses simple regular expressions to strip tags and is
not a full fledged HTML parser by any means. Consider using
by translating all non-ascii chars into their closest possible
ASCII representation (eg: ó -> o, Ë -> E, ç -> c...).
+ See also :meth:`to_ascii`, :meth:`generate_random_alphanumeric_string`.
+
.. warning::
Some chars may be lost if impossible to translate.
* all chars are encoded as ascii (by using :meth:`asciify`)
* is safe for URL
+ See also :meth:`is_slug` and :meth:`asciify`.
+
>>> slugify('Top 10 Reasons To Love Dogs!!!')
'top-10-reasons-to-love-dogs'
>>> slugify('Mönstér Mägnët')
Otherwise False is returned.
+ See also :mod:`pyutils.argparse_utils`.
+
>>> to_bool('True')
True
"""
if not is_string(in_str):
raise ValueError(in_str)
- return in_str.lower() in ("true", "1", "yes", "y", "t", "on")
+ return in_str.lower() in set(["true", "1", "yes", "y", "t", "on"])
def to_date(in_str: str) -> Optional[datetime.date]:
Returns:
The datetime.date the string contained or None to indicate
an error. This parser is relatively clever; see
- :class:`datetimez.dateparse_utils` docs for details.
+ :class:`datetimes.dateparse_utils` docs for details.
+
+ See also: :mod:`pyutils.datetimes.dateparse_utils`, :meth:`extract_date`,
+ :meth:`is_valid_date`, :meth:`to_datetime`, :meth:`valid_datetime`.
>>> to_date('9/11/2001')
datetime.date(2001, 9, 11)
>>> to_date('xyzzy')
"""
- import pyutils.datetimez.dateparse_utils as du
+ import pyutils.datetimes.dateparse_utils as du
try:
d = du.DateParser() # type: ignore
d.parse(in_str)
return d.get_date()
except du.ParseException: # type: ignore
- msg = f'Unable to parse date {in_str}.'
- logger.warning(msg)
+ pass
return None
Returns:
a datetime if date was found, otherwise None
+ See also: :mod:`pyutils.datetimes.dateparse_utils`, :meth:`to_date`,
+ :meth:`is_valid_date`, :meth:`to_datetime`, :meth:`valid_datetime`.
+
>>> extract_date("filename.txt dec 13, 2022")
datetime.datetime(2022, 12, 13, 0, 0)
"""
import itertools
- import pyutils.datetimez.dateparse_utils as du
+ import pyutils.datetimes.dateparse_utils as du
d = du.DateParser() # type: ignore
chunks = in_str.split()
):
try:
expr = " ".join(ngram)
- logger.debug(f"Trying {expr}")
+ logger.debug("Trying %s", expr)
if d.parse(expr):
return d.get_datetime()
except du.ParseException: # type: ignore
Returns:
True if the string represents a valid date that we can recognize
and False otherwise. This parser is relatively clever; see
- :class:`datetimez.dateparse_utils` docs for details.
+ :class:`datetimes.dateparse_utils` docs for details.
+
+ See also: :mod:`pyutils.datetimes.dateparse_utils`, :meth:`to_date`,
+ :meth:`extract_date`, :meth:`to_datetime`, :meth:`valid_datetime`.
>>> is_valid_date('1/2/2022')
True
>>> is_valid_date('xyzzy')
False
"""
- import pyutils.datetimez.dateparse_utils as dp
+ import pyutils.datetimes.dateparse_utils as dp
try:
d = dp.DateParser() # type: ignore
_ = d.parse(in_str)
return True
except dp.ParseException: # type: ignore
- msg = f'Unable to parse date {in_str}.'
- logger.warning(msg)
+ pass
return False
Returns:
A python datetime parsed from in_str or None to indicate
an error. This parser is relatively clever; see
- :class:`datetimez.dateparse_utils` docs for details.
+ :class:`datetimes.dateparse_utils` docs for details.
+
+ See also: :mod:`pyutils.datetimes.dateparse_utils`, :meth:`to_date`,
+ :meth:`extract_date`, :meth:`valid_datetime`.
>>> to_datetime('7/20/1969 02:56 GMT')
datetime.datetime(1969, 7, 20, 2, 56, tzinfo=<StaticTzInfo 'GMT'>)
"""
- import pyutils.datetimez.dateparse_utils as dp
+ import pyutils.datetimes.dateparse_utils as dp
try:
d = dp.DateParser() # type: ignore
if isinstance(dt, datetime.datetime):
return dt
except Exception:
- msg = f'Unable to parse datetime {in_str}.'
- logger.warning(msg)
+ pass
return None
Returns:
True if in_str contains a valid datetime and False otherwise.
This parser is relatively clever; see
- :class:`datetimez.dateparse_utils` docs for details.
+ :class:`datetimes.dateparse_utils` docs for details.
>>> valid_datetime('next wednesday at noon')
True
_ = to_datetime(in_str)
if _ is not None:
return True
- msg = f'Unable to parse datetime {in_str}.'
- logger.warning(msg)
return False
Returns:
A string with tab indentation removed or None on error.
- .. note::
-
- Inspired by analogous Scala function.
+ See also :meth:`indent`.
>>> dedent('\t\ttest\\n\t\ting')
'test\\ning'
Returns:
An indented string created by prepending amount spaces.
+ See also :meth:`dedent`.
+
>>> indent('This is a test', 4)
' This is a test'
"""
return line_separator.join(lines)
-def sprintf(*args, **kwargs) -> str:
- """
- Args:
- This function uses the same syntax as the builtin print
- function.
-
- Returns:
- An interpolated string capturing print output, like man(3)
- :code:sprintf.
- """
+def _sprintf(*args, **kwargs) -> str:
+ """Internal helper."""
ret = ""
sep = kwargs.pop("sep", None)
sep = " "
if end is None:
end = "\n"
- for i, arg in enumerate(args):
- if i:
+ for n, arg in enumerate(args):
+ if n:
ret += sep
if isinstance(arg, str):
ret += arg
Returns:
in_str with recognized ANSI escape sequences removed.
+ See also :mod:`pyutils.ansi`.
+
.. warning::
This method works by using a regular expression.
It works for all ANSI escape sequences I've tested with but
>>> print(buf(), end='')
test
1, 2, 3
-
"""
def __init__(self) -> None:
'Test'
>>> capitalize_first_letter("ALREADY!")
'ALREADY!'
-
"""
return in_str[0].upper() + in_str[1:]
Returns:
'it' if n is one or 'they' otherwize.
+ See also :meth:`is_are`, :meth:`pluralize`, :meth:`make_contractions`,
+ :meth:`thify`.
+
Suggested usage::
n = num_files_saved_to_tmp()
Returns:
'is' if n is one or 'are' otherwize.
+ See also :meth:`it_they`, :meth:`pluralize`, :meth:`make_contractions`,
+ :meth:`thify`.
+
Suggested usage::
n = num_files_saved_to_tmp()
Returns:
's' if n is greater than one otherwize ''.
+ See also :meth:`it_they`, :meth:`is_are`, :meth:`make_contractions`,
+ :meth:`thify`.
+
Suggested usage::
n = num_files_saved_to_tmp()
Output text identical to original input except for any
recognized contractions are formed.
+ See also :meth:`it_they`, :meth:`is_are`, :meth:`make_contractions`.
+
.. note::
The order in which we create contractions is defined by the
implementation and what I thought made more sense when writing
for second in second_list:
# Disallow there're/where're. They're valid English
# but sound weird.
- if (first in ('there', 'where')) and second == 'a(re)':
+ if (first in set(['there', 'where'])) and second == 'a(re)':
continue
pattern = fr'\b({first})\s+{second}\b'
Returns:
The proper cardinal suffix for a number.
+ See also :meth:`it_they`, :meth:`is_are`, :meth:`make_contractions`.
+
Suggested usage::
attempt_count = 0
Returns:
Generates the ngrams from the input string.
+ See also :meth:`ngrams_presplit`, :meth:`bigrams`, :meth:`trigrams`.
+
>>> [x for x in ngrams('This is a test', 2)]
['This is', 'is a', 'a test']
"""
words = txt.split()
for ngram in ngrams_presplit(words, n):
ret = ''
- for word in ngram:
- ret += f'{word} '
+ for w in ngram:
+ ret += f'{w} '
yield ret.strip()
def ngrams_presplit(words: Sequence[str], n: int):
"""
Same as :meth:`ngrams` but with the string pre-split.
+
+ See also :meth:`ngrams`, :meth:`bigrams`, :meth:`trigrams`.
"""
return list_utils.ngrams(words, n)
def bigrams(txt: str):
- """Generates the bigrams (n=2) of the given string."""
+ """Generates the bigrams (n=2) of the given string.
+
+ See also :meth:`ngrams`, :meth:`trigrams`.
+
+ >>> [x for x in bigrams('this is a test')]
+ ['this is', 'is a', 'a test']
+ """
return ngrams(txt, 2)
def trigrams(txt: str):
- """Generates the trigrams (n=3) of the given string."""
+ """Generates the trigrams (n=3) of the given string.
+
+ See also :meth:`ngrams`, :meth:`bigrams`.
+ """
return ngrams(txt, 3)
def shuffle_columns_into_list(
- input_lines: Sequence[str], column_specs: Iterable[Iterable[int]], delim=''
+ input_lines: Sequence[str], column_specs: Iterable[Iterable[int]], delim: str = ''
) -> Iterable[str]:
"""Helper to shuffle / parse columnar data and return the results as a
list.
A list of string created by following the instructions set forth
in column_specs.
+ See also :meth:`shuffle_columns_into_dict`.
+
>>> cols = '-rwxr-xr-x 1 scott wheel 3.1K Jul 9 11:34 acl_test.py'.split()
>>> shuffle_columns_into_list(
... cols,
def shuffle_columns_into_dict(
input_lines: Sequence[str],
column_specs: Iterable[Tuple[str, Iterable[int]]],
- delim='',
+ delim: str = '',
) -> Dict[str, str]:
"""Helper to shuffle / parse columnar data and return the results
as a dict.
Returns:
A dict formed by applying the column_specs instructions.
+ See also :meth:`shuffle_columns_into_list`, :meth:`interpolate_using_dict`.
+
>>> cols = '-rwxr-xr-x 1 scott wheel 3.1K Jul 9 11:34 acl_test.py'.split()
>>> shuffle_columns_into_dict(
... cols,
txt: the mad libs template
values: what you and your kids chose for each category.
+ See also :meth:`shuffle_columns_into_list`, :meth:`shuffle_columns_into_dict`.
+
>>> interpolate_using_dict('This is a {adjective} {noun}.',
... {'adjective': 'good', 'noun': 'example'})
'This is a good example.'
"""
- return sprintf(txt.format(**values), end='')
+ return _sprintf(txt.format(**values), end='')
def to_ascii(txt: str):
Returns:
txt encoded as an ASCII byte string.
+ See also :meth:`to_base64`, :meth:`to_bitstring`, :meth:`to_bytes`,
+ :meth:`generate_random_alphanumeric_string`, :meth:`asciify`.
+
>>> to_ascii('test')
b'test'
raise Exception('to_ascii works with strings and bytes')
-def to_base64(txt: str, *, encoding='utf-8', errors='surrogatepass') -> bytes:
+def to_base64(
+ txt: str, *, encoding: str = 'utf-8', errors: str = 'surrogatepass'
+) -> bytes:
"""
Args:
txt: the input data to encode
+ encoding: the encoding to use during conversion
+ errors: how to handle encoding errors
Returns:
txt encoded with a 64-chracter alphabet. Similar to and compatible
with uuencode/uudecode.
+ See also :meth:`is_base64`, :meth:`to_ascii`, :meth:`to_bitstring`,
+ :meth:`from_base64`.
+
>>> to_base64('hello?')
b'aGVsbG8/\\n'
"""
txt was encoded with Python's standard base64 alphabet which
is the same as what uuencode/uudecode uses).
+ See also :meth:`to_base64`, :meth:`from_base64`.
+
>>> is_base64('test') # all letters in the b64 alphabet
True
return True
-def from_base64(b64: bytes, encoding='utf-8', errors='surrogatepass') -> str:
+def from_base64(
+ b64: bytes, encoding: str = 'utf-8', errors: str = 'surrogatepass'
+) -> str:
"""
Args:
b64: bytestring of 64-bit encoded data to decode / convert.
+ encoding: the encoding to use during conversion
+ errors: how to handle encoding errors
Returns:
The decoded form of b64 as a normal python string. Similar to
and compatible with uuencode / uudecode.
+ See also :meth:`to_base64`, :meth:`is_base64`.
+
>>> from_base64(b'aGVsbG8/\\n')
'hello?'
"""
yield txt[x : x + chunk_size]
-def to_bitstring(txt: str, *, delimiter='') -> str:
+def to_bitstring(txt: str, *, delimiter: str = '') -> str:
"""
Args:
txt: the string to convert into a bitstring
Returns:
txt converted to ascii/binary and then chopped into bytes.
+ See also :meth:`to_base64`, :meth:`from_bitstring`, :meth:`is_bitstring`,
+ :meth:`chunk`.
+
>>> to_bitstring('hello?')
'011010000110010101101100011011000110111100111111'
Note that if delimiter is non empty this code will not
recognize the bitstring.
+ See also :meth:`to_base64`, :meth:`from_bitstring`, :meth:`to_bitstring`,
+ :meth:`chunk`.
+
>>> is_bitstring('011010000110010101101100011011000110111100111111')
True
return is_binary_integer_number(f'0b{txt}')
-def from_bitstring(bits: str, encoding='utf-8', errors='surrogatepass') -> str:
+def from_bitstring(
+ bits: str, encoding: str = 'utf-8', errors: str = 'surrogatepass'
+) -> str:
"""
Args:
bits: the bitstring to convert back into a python string
- encoding: the encoding to use
+ encoding: the encoding to use during conversion
+ errors: how to handle encoding errors
Returns:
The regular python string represented by bits. Note that this
code does not work with to_bitstring when delimiter is non-empty.
+ See also :meth:`to_base64`, :meth:`to_bitstring`, :meth:`is_bitstring`,
+ :meth:`chunk`.
+
>>> from_bitstring('011010000110010101101100011011000110111100111111')
'hello?'
"""
IP addresses using a normal comparator will do something sane
and desireable.
+ See also :meth:`is_ip_v4`.
+
>>> ip_v4_sort_key('10.0.0.18')
(10, 0, 0, 18)
volumes using a normal comparator will do something sane
and desireable.
+ See also :mod:`pyutils.files.file_utils`.
+
>>> path_ancestors_before_descendants_sort_key('/usr/local/bin')
('usr', 'local', 'bin')
replacement: the character to replace any member of replace_set
with
+ See also :meth:`replace_nth`.
+
Returns:
The string with replacements executed.
target: the replacement text
nth: which occurrance of source to replace?
+ See also :meth:`replace_all`.
+
>>> replace_nth('this is a test', ' ', '-', 3)
'this is a-test'
"""