import logging
import random
+import re
import string
import sys
]
return ' '.join(chunks)
+ def tokenize(self, text: str):
+ for x in nltk.word_tokenize(text):
+ for y in re.split('\W+', x):
+ yield y
+
def contains_bad_word(self, text: str) -> bool:
"""Returns True if text contains a bad word (or more than one)
and False if no bad words were detected.
False
"""
- words = nltk.word_tokenize(text)
+ words = [word for word in self.tokenize(text)]
for word in words:
if self.is_bad_word(word):
logger.debug(f'"{word}" is profanity')
break
return out
- words = nltk.word_tokenize(text)
+ words = self.tokenize(text)
words.append('')
words.append('')
words.append('')