projects
/
python_utils.git
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
f33ea8b
)
Adds doctests.
author
Scott
<
[email protected]
>
Thu, 13 Jan 2022 04:58:40 +0000
(20:58 -0800)
committer
Scott
<
[email protected]
>
Thu, 13 Jan 2022 04:58:40 +0000
(20:58 -0800)
profanity_filter.py
patch
|
blob
|
history
diff --git
a/profanity_filter.py
b/profanity_filter.py
index fe5422179ba9a50c678188e088689184f139a14d..db014e1704742c7cab01bc6e7ca1f6ca7f874de5 100755
(executable)
--- a/
profanity_filter.py
+++ b/
profanity_filter.py
@@
-470,6
+470,18
@@
class ProfanityFilter(object):
self.stemmer = PorterStemmer()
def _normalize(self, text: str) -> str:
self.stemmer = PorterStemmer()
def _normalize(self, text: str) -> str:
+ """Normalize text.
+
+ >>> _normalize('Tittie5')
+ 'titties'
+
+ >>> _normalize('Suck a Dick!')
+ 'suck a dick'
+
+ >>> _normalize('fucking a whore')
+ 'fuck a whore'
+
+ """
result = text.lower()
result = result.replace("_", " ")
result = result.replace('0', 'o')
result = text.lower()
result = result.replace("_", " ")
result = result.replace('0', 'o')
@@
-485,6
+497,19
@@
class ProfanityFilter(object):
return ' '.join(chunks)
def contains_bad_word(self, text: str) -> bool:
return ' '.join(chunks)
def contains_bad_word(self, text: str) -> bool:
+ """Returns True if text contains a bad word (or more than one)
+ and False if no bad words were detected.
+
+ >>> contains_bad_word('fuck you')
+ True
+
+ >>> contains_bad_word('FucK u')
+ True
+
+ >>> contains_bad_word('FuK U')
+ False
+
+ """
words = nltk.word_tokenize(text)
for word in words:
if self.is_bad_word(word):
words = nltk.word_tokenize(text)
for word in words:
if self.is_bad_word(word):
@@
-513,7
+538,10
@@
class ProfanityFilter(object):
)
def obscure_bad_words(self, text: str) -> str:
)
def obscure_bad_words(self, text: str) -> str:
+ """Obscure bad words that are detected by inserting random punctuation
+ characters.
+ """
def obscure(word: str):
out = ''
last = ''
def obscure(word: str):
out = ''
last = ''
@@
-556,6
+584,8
@@
class ProfanityFilter(object):
def main() -> None:
def main() -> None:
+ import doctest
+ doctest.testmod()
pf = ProfanityFilter()
phrase = ' '.join(sys.argv[1:])
print(pf.contains_bad_word(phrase))
pf = ProfanityFilter()
phrase = ' '.join(sys.argv[1:])
print(pf.contains_bad_word(phrase))