result = result.replace('3', 'e')
for x in string.punctuation:
result = result.replace(x, "")
- chunks = [
- self.stemmer.stem(word) for word in nltk.word_tokenize(result)
- ]
+ chunks = [self.stemmer.stem(word) for word in nltk.word_tokenize(result)]
return ' '.join(chunks)
def tokenize(self, text: str):
for x in nltk.word_tokenize(text):
- for y in re.split('\W+', x):
+ for y in re.split(r'\W+', x):
yield y
def contains_bad_word(self, text: str) -> bool: