X-Git-Url: https://wannabe.guru.org/gitweb/?a=blobdiff_plain;f=profanity_filter.py;h=e5c9e11b59a9f45b0aed4288a61b9fed09ca34ee;hb=413d28443c7308414e8d283b9c5b9037463274f3;hp=4723a2db0679e5f866f14bbb723c66391fa06ae6;hpb=6887fe6aaa9d844fe421d2ccdf87f6d03249da9a;p=python_utils.git diff --git a/profanity_filter.py b/profanity_filter.py index 4723a2d..e5c9e11 100755 --- a/profanity_filter.py +++ b/profanity_filter.py @@ -12,7 +12,6 @@ from nltk.stem import PorterStemmer import decorator_utils import string_utils - logger = logging.getLogger(__name__) @@ -238,6 +237,9 @@ class ProfanityFilter(object): 'girl gone wild', 'girl on top', 'girl on', + 'give head', + 'giving head', + 'gave head', 'goatcx', 'goatse', 'goddamn', @@ -499,7 +501,7 @@ class ProfanityFilter(object): def tokenize(self, text: str): for x in nltk.word_tokenize(text): - for y in re.split('\W+', x): + for y in re.split(r'\W+', x): yield y def contains_bad_word(self, text: str) -> bool: @@ -561,7 +563,7 @@ class ProfanityFilter(object): break return out - words = self.tokenize(text) + words = [x for x in self.tokenize(text)] words.append('') words.append('') words.append('')