X-Git-Url: https://wannabe.guru.org/gitweb/?a=blobdiff_plain;f=profanity_filter.py;h=6329a5511febccf1e7cda7102141e1bc4b2812fc;hb=7eae23537dcc61565a24d5c957d4325b7337b63a;hp=1c862eb5f54f3769008ec4a73944f15c61bf60e0;hpb=75b27cc68871343681f01e3f5b04cae84b1b7b2a;p=kiosk.git diff --git a/profanity_filter.py b/profanity_filter.py index 1c862eb..6329a55 100644 --- a/profanity_filter.py +++ b/profanity_filter.py @@ -1,453 +1,451 @@ +#!/usr/bin/env python3 + import string import re + class profanity_filter: def __init__(self): self.arrBad = [ - 'acrotomophilia', - 'anal', - 'anally', - 'anilingus', - 'anus', - 'arsehole', - 'ass', - 'asses', - 'asshole', - 'assmunch', - 'auto erotic', - 'autoerotic', - 'babeland', - 'baby batter', - 'ball gag', - 'ball gravy', - 'ball kicking', - 'ball licking', - 'ball sack', - 'ball zack', - 'ball sucking', - 'bangbros', - 'bareback', - 'barely legal', - 'barenaked', - 'bastardo', - 'bastinado', - 'bbw', - 'bdsm', - 'beaver cleaver', - 'beaver lips', - 'bestiality', - 'bi curious', - 'big black', - 'big breasts', - 'big knockers', - 'big tits', - 'bimbos', - 'birdlock', - 'bitch', - 'bitches', - 'black cock', - 'blonde action', - 'blonde on blonde', - 'blow j', - 'blow your l', - 'blow ourselves', - 'blow m', - 'blue waffle', - 'blumpkin', - 'bollocks', - 'bondage', - 'boner', - 'boob', - 'boobs', - 'booty call', - 'breasts', - 'brown showers', - 'brunette action', - 'bukkake', - 'bulldyke', - 'bullshit', - 'bullet vibe', - 'bung hole', - 'bunghole', - 'busty', - 'butt', - 'buttcheeks', - 'butthole', - 'camel toe', - 'camgirl', - 'camslut', - 'camwhore', - 'carpet muncher', - 'carpetmuncher', - 'chocolate rosebuds', - 'circlejerk', - 'cleveland steamer', - 'clit', - 'clitoris', - 'clover clamps', - 'clusterfuck', - 'cock', - 'cocks', - 'coprolagnia', - 'coprophilia', - 'cornhole', - 'creampie', - 'cream pie', - 'cum', - 'cumming', - 'cunnilingus', - 'cunt', - 'damn', - 'darkie', - 'date rape', - 'daterape', - 'deep throat', - 'deepthroat', - 'dick', - 'dildo', - 'dirty pillows', - 'dirty sanchez', - 'dog style', - 'doggie style', - 'doggiestyle', - 'doggy style', - 'doggystyle', - 'dolcett', - 'domination', - 'dominatrix', - 'dommes', - 'donkey punch', - 'double dick', - 'double dong', - 'double penetration', - 'dp action', - 'dtf', - 'eat my ass', - 'ecchi', - 'ejaculation', - 'erotic', - 'erotism', - 'escort', - 'ethical slut', - 'eunuch', - 'faggot', - 'posts each week', - 'fecal', - 'felch', - 'fellatio', - 'feltch', - 'female squirting', - 'femdom', - 'figging', - 'fingering', - 'fisting', - 'foot fetish', - 'footjob', - 'frotting', - 'fuck', - 'fucking', - 'fuckin', - 'fuckin\'', - 'fucked', - 'fuckers', - 'fuck buttons', - 'fuckhead', - 'fudge packer', - 'fudgepacker', - 'futanari', - 'g-spot', - 'gspot', - 'gang bang', - 'gay sex', - 'genitals', - 'giant cock', - 'girl on', - 'girl on top', - 'girls gone wild', - 'goatcx', - 'goatse', - 'goddamn', - 'gokkun', - 'golden shower', - 'goo girl', - 'goodpoop', - 'goregasm', - 'grope', - 'group sex', - 'guro', - 'hand job', - 'handjob', - 'hard core', - 'hardcore', - 'hentai', - 'homoerotic', - 'honkey', - 'hooker', - 'horny', - 'hot chick', - 'how to kill', - 'how to murder', - 'huge fat', - 'humping', - 'incest', - 'intercourse', - 'jack off', - 'jail bait', - 'jailbait', - 'jerk off', - 'jigaboo', - 'jiggaboo', - 'jiggerboo', - 'jizz', - 'juggs', - 'kike', - 'kinbaku', - 'kinkster', - 'kinky', - 'knobbing', - 'leather restraint', - 'lemon party', - 'lolita', - 'lovemaking', - 'lpt request', - 'make me come', - 'male squirting', - 'masturbate', - 'masturbated', - 'masturbating', - 'menage a trois', - 'milf', - 'milfs', - 'missionary position', - 'motherfucker', - 'mound of venus', - 'mr hands', - 'muff diver', - 'muffdiving', - 'nambla', - 'nawashi', - 'negro', - 'neonazi', - 'nig nog', - 'nigga', - 'nigger', - 'nimphomania', - 'nipple', - 'not safe for', - 'nsfw', - 'nsfw images', - 'nude', - 'nudity', - 'nutsack', - 'nut sack', - 'nympho', - 'nymphomania', - 'octopussy', - 'omorashi', - 'one night stand', - 'orgasm', - 'orgy', - 'paedophile', - 'panties', - 'panty', - 'pedobear', - 'pedophile', - 'pegging', - 'pee', - 'penis', - 'phone sex', - 'piss pig', - 'pissing', - 'pisspig', - 'playboy', - 'pleasure chest', - 'pole smoker', - 'ponyplay', - 'poof', - 'poop chute', - 'poopchute', - 'porn', - 'pornhub', - 'porno', - 'pornography', - 'prince albert', - 'pthc', - 'pube', - 'pubes', - 'pussy', - 'pussies', - 'queaf', - 'queer', - 'raghead', - 'raging boner', - 'rape', - 'raping', - 'rapist', - 'rectum', - 'reverse cowgirl', - 'rimjob', - 'rimming', - 'rosy palm', - 'rusty trombone', - 's&m', - 'sadism', - 'scat', - 'schlong', - 'scissoring', - 'semen', - 'sex', - 'sexo', - 'sexy', - 'shaved beaver', - 'shaved pussy', - 'shemale', - 'shibari', - 'shit', - 'shota', - 'shrimping', - 'slanteye', - 'slut', - 'smut', - 'snatch', - 'snowballing', - 'sodomize', - 'sodomy', - 'spic', - 'spooge', - 'spread legs', - 'strap on', - 'strapon', - 'strappado', - 'strip club', - 'style doggy', - 'suck', - 'sucks', - 'suicide girls', - 'sultry women', - 'swastika', - 'swinger', - 'tainted love', - 'taste my', - 'tea bagging', - 'threesome', - 'throating', - 'tied up', - 'tight white', - 'tit', - 'tits', - 'titties', - 'titty', - 'tongue in a', - 'topless', - 'tosser', - 'towelhead', - 'tranny', - 'tribadism', - 'tub girl', - 'tubgirl', - 'tushy', - 'twat', - 'twink', - 'twinkie', - 'undressing', - 'upskirt', - 'urethra play', - 'urophilia', - 'vagina', - 'venus mound', - 'vibrator', - 'violet blue', - 'violet wand', - 'vorarephilia', - 'voyeur', - 'vulva', - 'wank', - 'wet dream', - 'wetback', - 'white power', - 'whore', - 'women rapping', - 'wrapping men', - 'wrinkled starfish', - 'xx', - 'xxx', - 'yaoi', - 'yellow showers', - 'yiffy', - 'zoophilia', + "acrotomophilia", + "anal", + "anally", + "anilingus", + "anus", + "arsehole", + "ass", + "asses", + "asshole", + "assmunch", + "auto erotic", + "autoerotic", + "babeland", + "baby batter", + "ball gag", + "ball gravy", + "ball kicking", + "ball licking", + "ball sack", + "ball zack", + "ball sucking", + "bangbros", + "bareback", + "barely legal", + "barenaked", + "bastardo", + "bastinado", + "bbw", + "bdsm", + "beaver cleaver", + "beaver lips", + "bestiality", + "bi curious", + "big black", + "big breasts", + "big knockers", + "big tits", + "bimbos", + "birdlock", + "bitch", + "bitches", + "black cock", + "blonde action", + "blonde on blonde", + "blow j", + "blow your l", + "blow ourselves", + "blow m", + "blue waffle", + "blumpkin", + "bollocks", + "bondage", + "boner", + "boob", + "boobs", + "booty call", + "breasts", + "brown showers", + "brunette action", + "bukkake", + "bulldyke", + "bullshit", + "bullet vibe", + "bung hole", + "bunghole", + "busty", + "butt", + "buttcheeks", + "butthole", + "camel toe", + "camgirl", + "camslut", + "camwhore", + "carpet muncher", + "carpetmuncher", + "chocolate rosebuds", + "circlejerk", + "cleveland steamer", + "clit", + "clitoris", + "clover clamps", + "clusterfuck", + "cock", + "cocks", + "coprolagnia", + "coprophilia", + "cornhole", + "creampie", + "cream pie", + "cum", + "cumming", + "cunnilingus", + "cunt", + "damn", + "darkie", + "date rape", + "daterape", + "deep throat", + "deepthroat", + "dick", + "dildo", + "dirty pillows", + "dirty sanchez", + "dog style", + "doggie style", + "doggiestyle", + "doggy style", + "doggystyle", + "dolcett", + "domination", + "dominatrix", + "dommes", + "donkey punch", + "double dick", + "double dong", + "double penetration", + "dp action", + "dtf", + "eat my ass", + "ecchi", + "ejaculation", + "erection", + "erotic", + "erotism", + "escort", + "ethical slut", + "eunuch", + "faggot", + "posts each week", + "fecal", + "felch", + "fellatio", + "feltch", + "female squirting", + "femdom", + "figging", + "fingering", + "fisting", + "foot fetish", + "footjob", + "frotting", + "fuck", + "fucking", + "fuckin", + "fuckin'", + "fucked", + "fuckers", + "fuck buttons", + "fuckhead", + "fudge packer", + "fudgepacker", + "futanari", + "g-spot", + "gspot", + "gang bang", + "gay sex", + "genitals", + "giant cock", + "girl on", + "girl on top", + "girls gone wild", + "goatcx", + "goatse", + "goddamn", + "gokkun", + "golden shower", + "goo girl", + "goodpoop", + "goregasm", + "grope", + "group sex", + "guro", + "hand job", + "handjob", + "hard core", + "hardcore", + "hentai", + "homoerotic", + "honkey", + "hooker", + "horny", + "hot chick", + "how to kill", + "how to murder", + "huge fat", + "humping", + "incest", + "intercourse", + "jack off", + "jail bait", + "jailbait", + "jerk off", + "jerking off", + "jigaboo", + "jiggaboo", + "jiggerboo", + "jizz", + "juggs", + "kike", + "kinbaku", + "kinkster", + "kinky", + "knobbing", + "leather restraint", + "lemon party", + "lolita", + "lovemaking", + "lpt request", + "make me come", + "male squirting", + "masturbate", + "masturbated", + "masturbating", + "menage a trois", + "milf", + "milfs", + "missionary position", + "motherfucker", + "mound of venus", + "mr hands", + "muff diver", + "muffdiving", + "nambla", + "nawashi", + "negro", + "neonazi", + "nig nog", + "nigga", + "nigger", + "nimphomania", + "nipple", + "not safe for", + "nsfw", + "nsfw images", + "nude", + "nudity", + "nutsack", + "nut sack", + "nympho", + "nymphomania", + "octopussy", + "omorashi", + "one night stand", + "orgasm", + "orgy", + "paedophile", + "panties", + "panty", + "pedobear", + "pedophile", + "pegging", + "pee", + "penis", + "phone sex", + "piss pig", + "pissing", + "pisspig", + "playboy", + "pleasure chest", + "pole smoker", + "ponyplay", + "poof", + "poop chute", + "poopchute", + "porn", + "pornhub", + "porno", + "pornography", + "prince albert", + "pthc", + "pube", + "pubes", + "pussy", + "pussies", + "queaf", + "queer", + "raghead", + "raging boner", + "rape", + "raping", + "rapist", + "rectum", + "reverse cowgirl", + "rimjob", + "rimming", + "rosy palm", + "rusty trombone", + "s&m", + "sadism", + "scat", + "schlong", + "scissoring", + "semen", + "sex", + "sexo", + "sexy", + "shaved beaver", + "shaved pussy", + "shemale", + "shibari", + "shit", + "shota", + "shrimping", + "slanteye", + "slut", + "smut", + "snatch", + "snowballing", + "sodomize", + "sodomy", + "spic", + "spooge", + "spread legs", + "strap on", + "strapon", + "strappado", + "strip club", + "style doggy", + "suck", + "sucks", + "suicide girls", + "sultry women", + "swastika", + "swinger", + "tainted love", + "taste my", + "tea bagging", + "threesome", + "throating", + "tied up", + "tight white", + "tit", + "tits", + "titties", + "titty", + "tongue in a", + "topless", + "tosser", + "towelhead", + "tranny", + "tribadism", + "tub girl", + "tubgirl", + "tushy", + "twat", + "twink", + "twinkie", + "undressing", + "upskirt", + "urethra play", + "urophilia", + "vagina", + "venus mound", + "vibrator", + "violet blue", + "violet wand", + "vorarephilia", + "voyeur", + "vulva", + "wank", + "wet dream", + "wetback", + "white power", + "whore", + "women rapping", + "wrapping men", + "wrinkled starfish", + "xx", + "xxx", + "yaoi", + "yellow showers", + "yiffy", + "zoophilia", ] - def normalize(self, text): + def normalize(self, text: str) -> str: result = text.lower() - result = result.replace('_', ' ') + result = result.replace("_", " ") for x in string.punctuation: - result = result.replace(x, '') - result = re.sub( - r"e?s$", "", result) + result = result.replace(x, "") + result = re.sub(r"e?s$", "", result) return result - def filter_bad_words(self, text): - badWordMask = '!@#$%!@#$%^~!@%^~@#$%!@#$%^~!' + def filter_bad_words(self, text: str) -> str: + badWordMask = "!@#$%!@#$%^~!@%^~@#$%!@#$%^~!" brokenStr1 = text.split() for word in brokenStr1: - if (self.normalize(word) in self.arrBad or - word in self.arrBad): - print(('***** PROFANITY WORD="%s"' % word)) - text = text.replace(word, badWordMask[:len(word)]) + if self.normalize(word) in self.arrBad or word in self.arrBad: + print(f'***** PROFANITY WORD="{word}"') + text = text.replace(word, badWordMask[: len(word)]) if len(brokenStr1) > 1: bigrams = list(zip(brokenStr1, brokenStr1[1:])) for bigram in bigrams: - phrase = "%s %s" % (bigram[0], bigram[1]) - if (self.normalize(phrase) in self.arrBad or - phrase in self.arrBad): - print(('***** PROFANITY PHRASE="%s"' % phrase)) - text = text.replace(bigram[0], badWordMask[:len(bigram[0])]) - text = text.replace(bigram[1], badWordMask[:len(bigram[1])]) + phrase = f"{bigram[0]} {bigram[1]}" + if self.normalize(phrase) in self.arrBad or phrase in self.arrBad: + print(f'***** PROFANITY PHRASE="{phrase}"') + text = text.replace(bigram[0], badWordMask[: len(bigram[0])]) + text = text.replace(bigram[1], badWordMask[: len(bigram[1])]) if len(brokenStr1) > 2: trigrams = list(zip(brokenStr1, brokenStr1[1:], brokenStr1[2:])) for trigram in trigrams: - phrase = "%s %s %s" % (trigram[0], trigram[1], trigram[2]) - if (self.normalize(phrase) in self.arrBad or - phrase in self.arrBad): - print(('***** PROFANITY PHRASE="%s"' % phrase)) - text = text.replace(trigram[0], badWordMask[:len(trigram[0])]) - text = text.replace(trigram[1], badWordMask[:len(trigram[1])]) - text = text.replace(trigram[2], badWordMask[:len(trigram[2])]) + phrase = f"{trigram[0]} {trigram[1]} {trigram[2]}" + if self.normalize(phrase) in self.arrBad or phrase in self.arrBad: + print(f'***** PROFANITY PHRASE="{phrase}"') + text = text.replace(trigram[0], badWordMask[: len(trigram[0])]) + text = text.replace(trigram[1], badWordMask[: len(trigram[1])]) + text = text.replace(trigram[2], badWordMask[: len(trigram[2])]) return text - def contains_bad_words(self, text): + def contains_bad_words(self, text: str) -> bool: brokenStr1 = text.split() for word in brokenStr1: - if (self.normalize(word) in self.arrBad or - word in self.arrBad): - print(('***** PROFANITY WORD="%s"' % word)) + if self.normalize(word) in self.arrBad or word in self.arrBad: + print(f'***** PROFANITY WORD="{word}"') return True if len(brokenStr1) > 1: bigrams = list(zip(brokenStr1, brokenStr1[1:])) for bigram in bigrams: - phrase = "%s %s" % (bigram[0], bigram[1]) - if (self.normalize(phrase) in self.arrBad or - phrase in self.arrBad): - print(('***** PROFANITY PHRASE="%s"' % phrase)) + phrase = f"{bigram[0]} {bigram[1]}" + if self.normalize(phrase) in self.arrBad or phrase in self.arrBad: + print(f'***** PROFANITY PHRASE="{phrase}"') return True if len(brokenStr1) > 2: trigrams = list(zip(brokenStr1, brokenStr1[1:], brokenStr1[2:])) for trigram in trigrams: - phrase = "%s %s %s" % (trigram[0], trigram[1], trigram[2]) - if (self.normalize(phrase) in self.arrBad or - phrase in self.arrBad): - print(('***** PROFANITY PHRASE="%s"' % phrase)) + phrase = f"{trigram[0]} {trigram[1]} {trigram[2]}" + if self.normalize(phrase) in self.arrBad or phrase in self.arrBad: + print(f'***** PROFANITY PHRASE="{phrase}"') return True - return False -#x = profanity_filter() -#print(x.filter_bad_words("Fuck this auto erotic shit, it's not safe for work.")) -#print(x.contains_bad_words("cream pie their daughter.")) -#print(x.contains_bad_words("If you tell someone your penis is 6 inches it's pretty believable. If you say it's half a foot no one will believe you.")) -#print(x.normalize("dickes")); + +# x = profanity_filter() +# print(x.filter_bad_words("Fuck this auto erotic shit, it's not safe for work.")) +# print(x.contains_bad_words("cream pie their daughter.")) +# print(x.contains_bad_words("If you tell someone your penis is 6 inches it's pretty believable. If you say it's half a foot no one will believe you.")) +# print(x.normalize("dickes"));