projects
/
python_utils.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
How to do the thing.
[python_utils.git]
/
profanity_filter.py
diff --git
a/profanity_filter.py
b/profanity_filter.py
index 4723a2db0679e5f866f14bbb723c66391fa06ae6..e5c9e11b59a9f45b0aed4288a61b9fed09ca34ee 100755
(executable)
--- a/
profanity_filter.py
+++ b/
profanity_filter.py
@@
-12,7
+12,6
@@
from nltk.stem import PorterStemmer
import decorator_utils
import string_utils
import decorator_utils
import string_utils
-
logger = logging.getLogger(__name__)
logger = logging.getLogger(__name__)
@@
-238,6
+237,9
@@
class ProfanityFilter(object):
'girl gone wild',
'girl on top',
'girl on',
'girl gone wild',
'girl on top',
'girl on',
+ 'give head',
+ 'giving head',
+ 'gave head',
'goatcx',
'goatse',
'goddamn',
'goatcx',
'goatse',
'goddamn',
@@
-499,7
+501,7
@@
class ProfanityFilter(object):
def tokenize(self, text: str):
for x in nltk.word_tokenize(text):
def tokenize(self, text: str):
for x in nltk.word_tokenize(text):
- for y in re.split('\W+', x):
+ for y in re.split(
r
'\W+', x):
yield y
def contains_bad_word(self, text: str) -> bool:
yield y
def contains_bad_word(self, text: str) -> bool:
@@
-561,7
+563,7
@@
class ProfanityFilter(object):
break
return out
break
return out
- words =
self.tokenize(text)
+ words =
[x for x in self.tokenize(text)]
words.append('')
words.append('')
words.append('')
words.append('')
words.append('')
words.append('')