X-Git-Url: https://wannabe.guru.org/gitweb/?a=blobdiff_plain;f=profanity_filter.py;h=1a855857478089f010a16115166c3ea488922259;hb=a9bdfd8fc9f84b7b2c09a57cd12ba32259e84d1c;hp=c1767bf16370bae17b63b56d426c7fc2e9e49519;hpb=1f9d550895e0112b1e0a1eb4a5d725deace8e810;p=python_utils.git

diff --git a/profanity_filter.py b/profanity_filter.py
index c1767bf..1a85585 100755
--- a/profanity_filter.py
+++ b/profanity_filter.py
@@ -1,6 +1,9 @@
 #!/usr/bin/env python3
 
-"""A helper to identify and optionally obscure some bad words."""
+# Â© Copyright 2021-2022, Scott Gasch
+
+"""A helper to identify and optionally obscure some bad words.  Not
+perfect but decent.  Uses a fuzzy block list rather than ML."""
 
 import logging
 import random
@@ -475,6 +478,9 @@ class ProfanityFilter(object):
         >>> _normalize('fucking a whore')
         'fuck a whore'
 
+        >>> _normalize('pu55y')
+        'pussy'
+
         """
         result = text.lower()
         result = result.replace("_", " ")
@@ -490,6 +496,7 @@ class ProfanityFilter(object):
 
     @staticmethod
     def tokenize(text: str):
+        """Tokenize text into word-like chunks"""
         for x in nltk.word_tokenize(text):
             for y in re.split(r'\W+', x):
                 yield y
@@ -530,12 +537,12 @@ class ProfanityFilter(object):
         return False
 
     def is_bad_word(self, word: str) -> bool:
+        """True if we think word is a bad word."""
         return word in self.bad_words or self._normalize(word) in self.bad_words
 
     def obscure_bad_words(self, text: str) -> str:
         """Obscure bad words that are detected by inserting random punctuation
         characters.
-
         """
 
         def obscure(word: str):