X-Git-Url: https://wannabe.guru.org/gitweb/?a=blobdiff_plain;f=profanity_filter.py;h=31577e0fbf1a4a76486dab066fff764aeb5bbc47;hb=351e77c767c9084aa486eedbdc9902c635b06261;hp=e1b474323fb67823a0c1607e69df08ec41b5398d;hpb=b843703134a166013518c707fa5a77373f1bf0bf;p=python_utils.git diff --git a/profanity_filter.py b/profanity_filter.py index e1b4743..31577e0 100755 --- a/profanity_filter.py +++ b/profanity_filter.py @@ -8,12 +8,14 @@ import sys import nltk from nltk.stem import PorterStemmer +import decorator_utils import string_utils logger = logging.getLogger(__name__) +@decorator_utils.singleton class ProfanityFilter(object): def __init__(self): self.bad_words = set([ @@ -82,6 +84,7 @@ class ProfanityFilter(object): 'blonde action', 'blow j', 'blow job', + 'blowjob', 'blow my', 'blow me', 'blow ourselv', @@ -484,12 +487,14 @@ class ProfanityFilter(object): if len(words) > 1: for bigram in string_utils.ngrams_presplit(words, 2): + bigram = ' '.join(bigram) if self.is_bad_word(bigram): logger.debug('"{bigram}" is profanity') return True if len(words) > 2: for trigram in string_utils.ngrams_presplit(words, 3): + trigram = ' '.join(trigram) if self.is_bad_word(trigram): logger.debug('"{trigram}" is profanity') return True