X-Git-Url: https://wannabe.guru.org/gitweb/?a=blobdiff_plain;f=profanity_filter.py;h=1a855857478089f010a16115166c3ea488922259;hb=a9bdfd8fc9f84b7b2c09a57cd12ba32259e84d1c;hp=a1f0c0b9adaa8971dfd243694cd096a2e84a077d;hpb=532df2c5b57c7517dfb3dddd8c1358fbadf8baf3;p=python_utils.git

diff --git a/profanity_filter.py b/profanity_filter.py
index a1f0c0b..1a85585 100755
--- a/profanity_filter.py
+++ b/profanity_filter.py
@@ -2,7 +2,8 @@
 
 # Â© Copyright 2021-2022, Scott Gasch
 
-"""A helper to identify and optionally obscure some bad words."""
+"""A helper to identify and optionally obscure some bad words.  Not
+perfect but decent.  Uses a fuzzy block list rather than ML."""
 
 import logging
 import random
@@ -477,6 +478,9 @@ class ProfanityFilter(object):
         >>> _normalize('fucking a whore')
         'fuck a whore'
 
+        >>> _normalize('pu55y')
+        'pussy'
+
         """
         result = text.lower()
         result = result.replace("_", " ")
@@ -492,6 +496,7 @@ class ProfanityFilter(object):
 
     @staticmethod
     def tokenize(text: str):
+        """Tokenize text into word-like chunks"""
         for x in nltk.word_tokenize(text):
             for y in re.split(r'\W+', x):
                 yield y
@@ -532,12 +537,12 @@ class ProfanityFilter(object):
         return False
 
     def is_bad_word(self, word: str) -> bool:
+        """True if we think word is a bad word."""
         return word in self.bad_words or self._normalize(word) in self.bad_words
 
     def obscure_bad_words(self, text: str) -> str:
         """Obscure bad words that are detected by inserting random punctuation
         characters.
-
         """
 
         def obscure(word: str):