projects
/
python_utils.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
A bunch of changes...
[python_utils.git]
/
profanity_filter.py
diff --git
a/profanity_filter.py
b/profanity_filter.py
index e1b474323fb67823a0c1607e69df08ec41b5398d..5621cef94489f6b5446a9e786777a8cb93e68be4 100755
(executable)
--- a/
profanity_filter.py
+++ b/
profanity_filter.py
@@
-8,12
+8,14
@@
import sys
import nltk
from nltk.stem import PorterStemmer
import nltk
from nltk.stem import PorterStemmer
+import decorator_utils
import string_utils
logger = logging.getLogger(__name__)
import string_utils
logger = logging.getLogger(__name__)
+@decorator_utils.singleton
class ProfanityFilter(object):
def __init__(self):
self.bad_words = set([
class ProfanityFilter(object):
def __init__(self):
self.bad_words = set([
@@
-82,6
+84,7
@@
class ProfanityFilter(object):
'blonde action',
'blow j',
'blow job',
'blonde action',
'blow j',
'blow job',
+ 'blowjob',
'blow my',
'blow me',
'blow ourselv',
'blow my',
'blow me',
'blow ourselv',
@@
-484,14
+487,16
@@
class ProfanityFilter(object):
if len(words) > 1:
for bigram in string_utils.ngrams_presplit(words, 2):
if len(words) > 1:
for bigram in string_utils.ngrams_presplit(words, 2):
+ bigram = ' '.join(bigram)
if self.is_bad_word(bigram):
if self.is_bad_word(bigram):
- logger.debug('"{bigram}" is profanity')
+ logger.debug(
f
'"{bigram}" is profanity')
return True
if len(words) > 2:
for trigram in string_utils.ngrams_presplit(words, 3):
return True
if len(words) > 2:
for trigram in string_utils.ngrams_presplit(words, 3):
+ trigram = ' '.join(trigram)
if self.is_bad_word(trigram):
if self.is_bad_word(trigram):
- logger.debug('"{trigram}" is profanity')
+ logger.debug(
f
'"{trigram}" is profanity')
return True
return False
return True
return False