Easier and more self documenting patterns for loading/saving Persistent

[python_utils.git] / profanity_filter.py
diff --git a/profanity_filter.py b/profanity_filter.py

index a1f0c0b9adaa8971dfd243694cd096a2e84a077d..1a855857478089f010a16115166c3ea488922259 100755 (executable)
--- a/profanity_filter.py
+++ b/profanity_filter.py
@@ -2,7 +2,8 @@
  
  # © Copyright 2021-2022, Scott Gasch
  
-"""A helper to identify and optionally obscure some bad words."""
+"""A helper to identify and optionally obscure some bad words.  Not
+perfect but decent.  Uses a fuzzy block list rather than ML."""
  
  import logging
  import random
@@ -477,6 +478,9 @@ class ProfanityFilter(object):
          >>> _normalize('fucking a whore')
          'fuck a whore'
  
+        >>> _normalize('pu55y')
+        'pussy'
+
          """
          result = text.lower()
          result = result.replace("_", " ")
@@ -492,6 +496,7 @@ class ProfanityFilter(object):
  
      @staticmethod
      def tokenize(text: str):
+        """Tokenize text into word-like chunks"""
          for x in nltk.word_tokenize(text):
              for y in re.split(r'\W+', x):
                  yield y
@@ -532,12 +537,12 @@ class ProfanityFilter(object):
          return False
  
      def is_bad_word(self, word: str) -> bool:
+        """True if we think word is a bad word."""
          return word in self.bad_words or self._normalize(word) in self.bad_words
  
      def obscure_bad_words(self, text: str) -> str:
          """Obscure bad words that are detected by inserting random punctuation
          characters.
-
          """
  
          def obscure(word: str):