From 12dfb5afcdc42c449364c1207c175de20393a5c1 Mon Sep 17 00:00:00 2001 From: Scott Date: Sun, 23 Jan 2022 16:13:44 -0800 Subject: [PATCH] Make profanity filter catch foo/bar where foo and/or bar are bad words. --- profanity_filter.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/profanity_filter.py b/profanity_filter.py index db014e1..3109f16 100755 --- a/profanity_filter.py +++ b/profanity_filter.py @@ -2,6 +2,7 @@ import logging import random +import re import string import sys @@ -496,6 +497,11 @@ class ProfanityFilter(object): ] return ' '.join(chunks) + def tokenize(self, text: str): + for x in nltk.word_tokenize(text): + for y in re.split('\W+', x): + yield y + def contains_bad_word(self, text: str) -> bool: """Returns True if text contains a bad word (or more than one) and False if no bad words were detected. @@ -510,7 +516,7 @@ class ProfanityFilter(object): False """ - words = nltk.word_tokenize(text) + words = [word for word in self.tokenize(text)] for word in words: if self.is_bad_word(word): logger.debug(f'"{word}" is profanity') @@ -557,7 +563,7 @@ class ProfanityFilter(object): break return out - words = nltk.word_tokenize(text) + words = self.tokenize(text) words.append('') words.append('') words.append('') -- 2.46.0