Adds unittest.

[python_utils.git] / profanity_filter.py
diff --git a/profanity_filter.py b/profanity_filter.py

index 95540fa7b36f0bd8fcf813196e2f9f2390569fce..e5c9e11b59a9f45b0aed4288a61b9fed09ca34ee 100755 (executable)
--- a/profanity_filter.py
+++ b/profanity_filter.py
@@ -12,7 +12,6 @@ from nltk.stem import PorterStemmer
  import decorator_utils
  import string_utils
  
-
  logger = logging.getLogger(__name__)
  
  
@@ -238,6 +237,9 @@ class ProfanityFilter(object):
                  'girl gone wild',
                  'girl on top',
                  'girl on',
+                'give head',
+                'giving head',
+                'gave head',
                  'goatcx',
                  'goatse',
                  'goddamn',
@@ -494,14 +496,12 @@ class ProfanityFilter(object):
          result = result.replace('3', 'e')
          for x in string.punctuation:
              result = result.replace(x, "")
-        chunks = [
-            self.stemmer.stem(word) for word in nltk.word_tokenize(result)
-        ]
+        chunks = [self.stemmer.stem(word) for word in nltk.word_tokenize(result)]
          return ' '.join(chunks)
  
      def tokenize(self, text: str):
          for x in nltk.word_tokenize(text):
-            for y in re.split('\W+', x):
+            for y in re.split(r'\W+', x):
                  yield y
  
      def contains_bad_word(self, text: str) -> bool:
@@ -563,7 +563,7 @@ class ProfanityFilter(object):
                              break
              return out
  
-        words = self.tokenize(text)
+        words = [x for x in self.tokenize(text)]
          words.append('')
          words.append('')
          words.append('')