Adds profanity filter, fixes bugs.

author Scott Gasch <[email protected]>

Thu, 5 Aug 2021 21:56:34 +0000 (14:56 -0700)

committer Scott Gasch <[email protected]>

Thu, 5 Aug 2021 21:56:34 +0000 (14:56 -0700)
author Scott Gasch <[email protected]>
Thu, 5 Aug 2021 21:56:34 +0000 (14:56 -0700)
committer Scott Gasch <[email protected]>
Thu, 5 Aug 2021 21:56:34 +0000 (14:56 -0700)
diff --git a/dateparse/dateparse_utils.py b/dateparse/dateparse_utils.py

index 4decb81570bb0bbf02f066ec31993b20ea0ce6af..cd8bc3543513d147b030c5303424964aaa49b5b9 100755 (executable)
--- a/dateparse/dateparse_utils.py
+++ b/dateparse/dateparse_utils.py
@@ -385,7 +385,7 @@ class DateParser(dateparse_utilsListener):
              tz = pytz.timezone(txt)
              if tz is not None:
                  return tz
-        except:
+        except Exception:
              pass
  
          # Try dateutil
@@ -393,7 +393,7 @@ class DateParser(dateparse_utilsListener):
              tz = dateutil.tz.gettz(txt)
              if tz is not None:
                  return tz
-        except:
+        except Exception:
              pass
  
          # Try constructing an offset in seconds
@@ -406,7 +406,7 @@ class DateParser(dateparse_utilsListener):
                  offset = sign * (hour * 60 * 60) + sign * (minute * 60)
                  tzoffset = dateutil.tz.tzoffset(txt, offset)
                  return tzoffset
-        except:
+        except Exception:
              pass
          return None
  
@@ -574,7 +574,7 @@ class DateParser(dateparse_utilsListener):
              unit = self._figure_out_date_unit(
                  ctx.deltaUnit().getText().lower()
              )
-        except:
+        except Exception:
              raise ParseException(f'Invalid Delta +/-: {ctx.getText()}')
          else:
              self.context['delta_int'] = n
@@ -585,7 +585,7 @@ class DateParser(dateparse_utilsListener):
      ) -> None:
          try:
              unit = self._figure_out_date_unit(ctx.getText().lower())
-        except:
+        except Exception:
              raise ParseException(f'Bad delta unit: {ctx.getText()}')
          else:
              self.context['delta_unit'] = unit
@@ -595,7 +595,7 @@ class DateParser(dateparse_utilsListener):
      ) -> None:
          try:
              txt = ctx.getText().lower()
-        except:
+        except Exception:
              raise ParseException(f'Bad next/last: {ctx.getText()}')
          if (
                  'month' in self.context or
@@ -630,7 +630,7 @@ class DateParser(dateparse_utilsListener):
                  ctx.deltaTimeUnit().getText().lower()
              )
              self.context['time_delta_unit'] = unit
-        except:
+        except Exception:
              raise ParseException(f'Bad delta unit: {ctx.getText()}')
          if 'time_delta_before_after' not in self.context:
              raise ParseException(
@@ -654,7 +654,7 @@ class DateParser(dateparse_utilsListener):
                  ] = TimeUnit.MINUTES
              else:
                  raise ParseException(f'Bad time fraction {ctx.getText()}')
-        except:
+        except Exception:
              raise ParseException(f'Bad time fraction {ctx.getText()}')
  
      def exitDeltaBeforeAfter(
@@ -662,7 +662,7 @@ class DateParser(dateparse_utilsListener):
      ) -> None:
          try:
              txt = ctx.getText().lower()
-        except:
+        except Exception:
              raise ParseException(f'Bad delta before|after: {ctx.getText()}')
          else:
              self.context['delta_before_after'] = txt
@@ -672,7 +672,7 @@ class DateParser(dateparse_utilsListener):
      ) -> None:
          try:
              txt = ctx.getText().lower()
-        except:
+        except Exception:
              raise ParseException(f'Bad delta before|after: {ctx.getText()}')
          else:
              self.context['time_delta_before_after'] = txt
@@ -732,7 +732,7 @@ class DateParser(dateparse_utilsListener):
                  self.context['month'] = month
                  self.context['day'] = 1
              self.main_type = DateParser.PARSE_TYPE_BASE_AND_OFFSET_EXPR
-        except:
+        except Exception:
              raise ParseException(
                  f'Invalid nthWeekday expression: {ctx.getText()}'
              )
@@ -746,7 +746,7 @@ class DateParser(dateparse_utilsListener):
      def exitNth(self, ctx: dateparse_utilsParser.NthContext) -> None:
          try:
              i = self._get_int(ctx.getText())
-        except:
+        except Exception:
              raise ParseException(f'Bad nth expression: {ctx.getText()}')
          else:
              self.context['nth'] = i
@@ -764,7 +764,7 @@ class DateParser(dateparse_utilsListener):
                  raise ParseException(
                      f'Bad first|last expression: {ctx.getText()}'
                  )
-        except:
+        except Exception:
              raise ParseException(f'Bad first|last expression: {ctx.getText()}')
          else:
              self.context['nth'] = txt
@@ -773,7 +773,7 @@ class DateParser(dateparse_utilsListener):
          try:
              dow = ctx.getText().lower()[:3]
              dow = self.day_name_to_number.get(dow, None)
-        except:
+        except Exception:
              raise ParseException('Bad day of week')
          else:
              self.context['dow'] = dow
@@ -797,7 +797,7 @@ class DateParser(dateparse_utilsListener):
                  raise ParseException(
                      f'Bad dayOfMonth expression: {ctx.getText()}'
                  )
-        except:
+        except Exception:
              raise ParseException(f'Bad dayOfMonth expression: {ctx.getText()}')
          self.context['day'] = day
  
@@ -814,7 +814,7 @@ class DateParser(dateparse_utilsListener):
                  raise ParseException(
                      f'Bad monthName expression: {ctx.getText()}'
                  )
-        except:
+        except Exception:
              raise ParseException(f'Bad monthName expression: {ctx.getText()}')
          else:
              self.context['month'] = month
@@ -828,7 +828,7 @@ class DateParser(dateparse_utilsListener):
                  raise ParseException(
                      f'Bad monthNumber expression: {ctx.getText()}'
                  )
-        except:
+        except Exception:
              raise ParseException(
                  f'Bad monthNumber expression: {ctx.getText()}'
              )
@@ -840,7 +840,7 @@ class DateParser(dateparse_utilsListener):
              year = self._get_int(ctx.getText())
              if year < 1:
                  raise ParseException(f'Bad year expression: {ctx.getText()}')
-        except:
+        except Exception:
              raise ParseException(f'Bad year expression: {ctx.getText()}')
          else:
              self.context['year'] = year
@@ -851,7 +851,7 @@ class DateParser(dateparse_utilsListener):
          try:
              special = ctx.specialDate().getText().lower()
              self.context['special'] = special
-        except:
+        except Exception:
              raise ParseException(
                  f'Bad specialDate expression: {ctx.specialDate().getText()}'
              )
@@ -864,7 +864,7 @@ class DateParser(dateparse_utilsListener):
                      self.context['special_next_last'] = 'next'
                  elif mod.LAST() is not None:
                      self.context['special_next_last'] = 'last'
-        except:
+        except Exception:
              raise ParseException(
                  f'Bad specialDateNextLast expression: {ctx.getText()}'
              )
@@ -877,7 +877,7 @@ class DateParser(dateparse_utilsListener):
              count = self._get_int(ctx.unsignedInt().getText())
              unit = ctx.deltaUnit().getText().lower()
              ago_from_now = ctx.AGO_FROM_NOW().getText()
-        except:
+        except Exception:
              raise ParseException(
                  f'Bad NFoosFromTodayAgoExpr: {ctx.getText()}'
              )
@@ -911,7 +911,7 @@ class DateParser(dateparse_utilsListener):
                      f'Bad This/Next/Last modifier: {mod}'
                  )
              unit = ctx.deltaUnit().getText().lower()
-        except:
+        except Exception:
              raise ParseException(
                  f'Bad DeltaRelativeToTodayExpr: {ctx.getText()}'
              )
@@ -929,7 +929,7 @@ class DateParser(dateparse_utilsListener):
      ) -> None:
          try:
              txt = ctx.specialTime().getText().lower()
-        except:
+        except Exception:
              raise ParseException(
                  f'Bad special time expression: {ctx.getText()}'
              )
@@ -950,7 +950,7 @@ class DateParser(dateparse_utilsListener):
          try:
              tz = ctx.tzExpr().getText()
              self.context['tz'] = self._parse_tz(tz)
-        except:
+        except Exception:
              pass
  
      def exitTwelveHourTimeExpr(
@@ -961,14 +961,14 @@ class DateParser(dateparse_utilsListener):
              while not hour[-1].isdigit():
                  hour = hour[:-1]
              hour = self._get_int(hour)
-        except:
+        except Exception:
              raise ParseException(f'Bad hour: {ctx.hour().getText()}')
          if hour <= 0 or hour > 12:
              raise ParseException(f'Bad hour (out of range): {hour}')
  
          try:
              minute = self._get_int(ctx.minute().getText())
-        except:
+        except Exception:
              minute = 0
          if minute < 0 or minute > 59:
              raise ParseException(f'Bad minute (out of range): {minute}')
@@ -976,7 +976,7 @@ class DateParser(dateparse_utilsListener):
  
          try:
              seconds = self._get_int(ctx.second().getText())
-        except:
+        except Exception:
              seconds = 0
          if seconds < 0 or seconds > 59:
              raise ParseException(f'Bad second (out of range): {seconds}')
@@ -984,7 +984,7 @@ class DateParser(dateparse_utilsListener):
  
          try:
              micros = self._get_int(ctx.micros().getText())
-        except:
+        except Exception:
              micros = 0
          if micros < 0 or micros > 1000000:
              raise ParseException(f'Bad micros (out of range): {micros}')
@@ -992,7 +992,7 @@ class DateParser(dateparse_utilsListener):
  
          try:
              ampm = ctx.ampm().getText()
-        except:
+        except Exception:
              raise ParseException(f'Bad ampm: {ctx.ampm().getText()}')
          if hour == 12:
              hour = 0
@@ -1003,7 +1003,7 @@ class DateParser(dateparse_utilsListener):
          try:
              tz = ctx.tzExpr().getText()
              self.context['tz'] = self._parse_tz(tz)
-        except:
+        except Exception:
              pass
  
      def exitTwentyFourHourTimeExpr(
@@ -1014,7 +1014,7 @@ class DateParser(dateparse_utilsListener):
              while not hour[-1].isdigit():
                  hour = hour[:-1]
              hour = self._get_int(hour)
-        except:
+        except Exception:
              raise ParseException(f'Bad hour: {ctx.hour().getText()}')
          if hour < 0 or hour > 23:
              raise ParseException(f'Bad hour (out of range): {hour}')
@@ -1022,7 +1022,7 @@ class DateParser(dateparse_utilsListener):
  
          try:
              minute = self._get_int(ctx.minute().getText())
-        except:
+        except Exception:
              minute = 0
          if minute < 0 or minute > 59:
              raise ParseException(f'Bad minute (out of range): {ctx.getText()}')
@@ -1030,7 +1030,7 @@ class DateParser(dateparse_utilsListener):
  
          try:
              seconds = self._get_int(ctx.second().getText())
-        except:
+        except Exception:
              seconds = 0
          if seconds < 0 or seconds > 59:
              raise ParseException(f'Bad second (out of range): {ctx.getText()}')
@@ -1038,7 +1038,7 @@ class DateParser(dateparse_utilsListener):
  
          try:
              micros = self._get_int(ctx.micros().getText())
-        except:
+        except Exception:
              micros = 0
          if micros < 0 or micros >= 1000000:
              raise ParseException(f'Bad micros (out of range): {ctx.getText()}')
@@ -1047,7 +1047,7 @@ class DateParser(dateparse_utilsListener):
          try:
              tz = ctx.tzExpr().getText()
              self.context['tz'] = self._parse_tz(tz)
-        except:
+        except Exception:
              pass
  
  
diff --git a/dict_utils.py b/dict_utils.py

index 292b933886d7b6b5cc80ec98f358ad1f29ae9abf..74e8fdab22749917f21ae88c150b838120bb0820 100644 (file)
--- a/dict_utils.py
+++ b/dict_utils.py
@@ -3,6 +3,7 @@
  from itertools import islice
  from typing import Any, Callable, Dict, Iterator, Tuple
  
+
  def init_or_inc(
      d: Dict[Any, Any],
      key: Any,
diff --git a/lockfile.py b/lockfile.py

index 770beaa9f97e3525f55b938e09a972e3b67e0e5a..34279ba8392c0e538ede3bfbc09b7af882657c16 100644 (file)
--- a/lockfile.py
+++ b/lockfile.py
@@ -123,9 +123,10 @@ class LockFile(object):
              cmd = self.override_command
          else:
              cmd = ' '.join(sys.argv)
+        print(cmd)
          contents = LockFileContents(
              pid = os.getpid(),
-            cmd,
+            commandline = cmd,
              expiration_timestamp = self.expiration_timestamp,
          )
          return json.dumps(contents.__dict__)
diff --git a/logging_utils.py b/logging_utils.py

index 0c7d19362d7ed59cf9009053465763e47e6e4709..a0131b15373482fc00edd12297209622d0a70128 100644 (file)
--- a/logging_utils.py
+++ b/logging_utils.py
@@ -269,7 +269,7 @@ class OutputMultiplexer(object):
                  open(filename, 'wb', buffering=0) for filename in filenames
              ]
          else:
-            if self.destination_bitv & OutputMultiplexer.FILENAMES:
+            if destination_bitv & OutputMultiplexer.FILENAMES:
                  raise ValueError(
                      "Filenames argument is required if bitv & FILENAMES"
                  )
@@ -278,7 +278,7 @@ class OutputMultiplexer(object):
          if handles is not None:
              self.h = [handle for handle in handles]
          else:
-            if self.destination_bitv & OutputMultiplexer.FILEHANDLES:
+            if destination_bitv & OutputMultiplexer.Destination.FILEHANDLES:
                  raise ValueError(
                      "Handle argument is required if bitv & FILEHANDLES"
                  )
diff --git a/presence.py b/presence.py

old mode 100644 (file)

new mode 100755 (executable)

index 682855d..c697124
--- a/presence.py
+++ b/presence.py
@@ -8,6 +8,7 @@ import re
  from typing import Dict, List
  
  import argparse_utils
+import bootstrap
  import config
  
  logger = logging.getLogger(__name__)
@@ -111,6 +112,7 @@ class PresenceDetection(object):
              if "cabin_" in line:
                  continue
              if location == Location.CABIN:
+                logger.debug('Cabin count: {cabin_count}')
                  cabin_count += 1
              try:
                  (mac, count, ip_name, mfg, ts) = line.split(",")
@@ -128,6 +130,7 @@ class PresenceDetection(object):
                  name = match.group(2)
                  self.names_by_mac[mac] = name
          if cabin_count > 0:
+            logger.debug('Weird MAC at the cabin')
              self.weird_mac_at_cabin = True
  
      def is_anyone_in_location_now(self, location: Location) -> bool:
@@ -152,15 +155,18 @@ class PresenceDetection(object):
          tiebreaks: Dict[Location, datetime.datetime] = {}
          credit = 10000
          for mac in self.devices_by_person[name]:
+            logger.debug(f'Looking for {name}... check for mac {mac}')
              if mac not in self.names_by_mac:
                  continue
              for location in self.location_ts_by_mac:
                  if mac in self.location_ts_by_mac[location]:
                      ts = (self.location_ts_by_mac[location])[mac]
+                    logger.debug(f'I saw {mac} at {location} at {ts}')
                      tiebreaks[location] = ts
              location = dict_utils.key_with_min_value(tiebreaks)
              v = votes.get(location, 0)
              votes[location] = v + credit
+            logger.debug('{name}: {location} gets {credit} votes.')
              credit = int(
                  credit * 0.667
              )  # Note: list most important devices first
@@ -170,3 +176,17 @@ class PresenceDetection(object):
              item = dict_utils.item_with_max_value(votes)
              return item[0]
          return Location.UNKNOWN
+
+
+[email protected]
+def main() -> None:
+    p = PresenceDetection()
+    for person in Person:
+        print(f'{person} => {p.where_is_person_now(person)}')
+    print()
+    for location in Location:
+        print(f'{location} => {p.is_anyone_in_location_now(location)}')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/profanity_filter.py b/profanity_filter.py

new file mode 100755 (executable)

index 0000000..e1b4743
--- /dev/null
+++ b/profanity_filter.py
@@ -0,0 +1,556 @@
+#!/usr/bin/env python3
+
+import logging
+import random
+import string
+import sys
+
+import nltk
+from nltk.stem import PorterStemmer
+
+import string_utils
+
+
+logger = logging.getLogger(__name__)
+
+
+class ProfanityFilter(object):
+    def __init__(self):
+        self.bad_words = set([
+            'acrotomophilia',
+            'anal',
+            'analingus',
+            'anally',
+            'anilingus',
+            'anus',
+            'arsehol',
+            'arsehole',
+            'ass',
+            'asses',
+            'asshol',
+            'asshole',
+            'assmunch',
+            'auto erot',
+            'auto erotic',
+            'autoerotic',
+            'babeland',
+            'babi batter',
+            'baby batter',
+            'ball gag',
+            'ball gravi',
+            'ball gravy',
+            'ball kick',
+            'ball kicking',
+            'ball lick',
+            'ball licking',
+            'ball sack',
+            'ball suck',
+            'ball sucking',
+            'ball zack',
+            'bangbro',
+            'bangbros',
+            'bare legal',
+            'bareback',
+            'barely legal',
+            'barenak',
+            'barenaked',
+            'bastardo',
+            'bastinado',
+            'bbc',
+            'bbw',
+            'bdsm',
+            'beaver cleaver',
+            'beaver lip',
+            'beaver lips',
+            'bestial',
+            'bestiality',
+            'bi curiou',
+            'bi curious',
+            'big black',
+            'big breasts',
+            'big knocker',
+            'big knockers',
+            'big tit',
+            'big tits',
+            'bimbo',
+            'birdlock',
+            'bitch',
+            'bitches',
+            'black cock',
+            'blond action',
+            'blond on blond',
+            'blonde action',
+            'blow j',
+            'blow job',
+            'blow my',
+            'blow me',
+            'blow ourselv',
+            'blow ourselves',
+            'blow your load',
+            'blue waffl',
+            'blue waffle',
+            'blumpkin',
+            'bollock',
+            'bollocks',
+            'bondag',
+            'bondage',
+            'boner',
+            'boob',
+            'boobs',
+            'booti call',
+            'booty call',
+            'breast',
+            'breasts',
+            'brown shower',
+            'brown showers',
+            'brunett action',
+            'brunette action',
+            'bukkak',
+            'bukkake',
+            'bulldyk',
+            'bulldyke',
+            'bullet vibe',
+            'bullshit',
+            'bung hole',
+            'bunghol',
+            'bunghole',
+            'busti',
+            'busty',
+            'butt',
+            'buttcheek',
+            'buttcheeks',
+            'butthol',
+            'butthole',
+            'camel toe',
+            'camgirl',
+            'camslut',
+            'camwhore',
+            'carpet muncher',
+            'carpetmuncher',
+            'chocol rosebud',
+            'chocolate rosebuds',
+            'circlejerk',
+            'chink',
+            'cleveland steamer',
+            'clit',
+            'clitori',
+            'clitoris',
+            'clover clamp',
+            'clover clamps',
+            'clusterfuck',
+            'cock',
+            'cocks',
+            'coprolagnia',
+            'coprophilia',
+            'cornhol',
+            'cornhole',
+            'cream pie',
+            'creampi',
+            'creampie',
+            'cum',
+            'cumming',
+            'cunnilingu',
+            'cunnilingus',
+            'cunt',
+            'damn',
+            'darki',
+            'darkie',
+            'date rape',
+            'daterap',
+            'daterape',
+            'deep throat',
+            'deepthroat',
+            'dick',
+            'dildo',
+            'dirti pillow',
+            'dirti sanchez',
+            'dirty pillow',
+            'dirty sanchez',
+            'dog style',
+            'doggi style',
+            'doggie style',
+            'doggiestyl',
+            'doggiestyle',
+            'doggystyle',
+            'dolcett',
+            'domination',
+            'dominatrix',
+            'domm',
+            'dommes',
+            'donkey punch',
+            'doubl dick',
+            'doubl dong',
+            'doubl penetr',
+            'double dick',
+            'double dong',
+            'double penetration',
+            'dp action',
+            'dtf',
+            'eat my ass',
+            'ecchi',
+            'ejacul',
+            'erection',
+            'erotic',
+            'erotism',
+            'escort',
+            'ethical slut',
+            'eunuch',
+            'faggot',
+            'fecal',
+            'felch',
+            'fellatio',
+            'feltch',
+            'female squirting',
+            'femdom',
+            'figging',
+            'fingered',
+            'fingering',
+            'fingers',
+            'fisted',
+            'fisting',
+            'fists',
+            'foot fetish',
+            'footjob',
+            'frotting',
+            'fuck button',
+            'fuck',
+            'fucked',
+            'fucker',
+            'fuckhead',
+            'fuckin',
+            'fucking',
+            'fudge packer',
+            'fudgepack',
+            'fudgepacker',
+            'futanari',
+            'g spot',
+            'g-spot',
+            'gang bang',
+            'gay sex',
+            'gee spot',
+            'genital',
+            'giant cock',
+            'girl gone wild',
+            'girl on top',
+            'girl on',
+            'goatcx',
+            'goatse',
+            'goddamn',
+            'gokkun',
+            'golden shower',
+            'goo girl',
+            'goodpoop',
+            'goregasm',
+            'grope',
+            'group sex',
+            'gspot',
+            'guro',
+            'hand job',
+            'handjob',
+            'hard core',
+            'hardcore',
+            'hentai',
+            'homoerotic',
+            'honkey',
+            'hooker',
+            'horni',
+            'horny',
+            'hot chick',
+            'how to kill',
+            'how to murder',
+            'huge fat',
+            'humped',
+            'humping',
+            'humps',
+            'incest',
+            'intercourse',
+            'jack off',
+            'jail bait',
+            'jailbait',
+            'jerk off',
+            'jigaboo',
+            'jiggaboo',
+            'jiggerboo',
+            'jizz',
+            'jugg',
+            'kike',
+            'kinbaku',
+            'kinkster',
+            'kinky',
+            'knobbing',
+            'leather restraint',
+            'lemon party',
+            'lolita',
+            'lovemaking',
+            'make me come',
+            'male squirting',
+            'masturb',
+            'menage a trois',
+            'milf',
+            'missionary position',
+            'motherfuck',
+            'mound of venus',
+            'mr hand',
+            'muff diver',
+            'muffdiv',
+            'muffdiving',
+            'nambla',
+            'nawashi',
+            'negro',
+            'neonazi',
+            'nig nog',
+            'nigga',
+            'nigger',
+            'nimphomania',
+            'nipple',
+            'nip',
+            'not safe for',
+            'nsfl',
+            'nsfw',
+            'nude',
+            'nudes',
+            'nudity',
+            'nut sack',
+            'nutsack',
+            'nympho',
+            'nymphomania',
+            'octopussy',
+            'omorashi',
+            'one night stand',
+            'orgasm',
+            'orgy',
+            'paedophil',
+            'paedophile',
+            'panties',
+            'panty',
+            'pedobear',
+            'pedophil',
+            'pedophile',
+            'pee',
+            'pegging',
+            'peni',
+            'penis',
+            'phone sex',
+            'pigfucker',
+            'piss pig',
+            'piss',
+            'pissing',
+            'pisspig',
+            'playboy',
+            'pleasure chest',
+            'pole smoker',
+            'ponyplay',
+            'poof',
+            'poop chute',
+            'poopchute',
+            'porn',
+            'pornhub',
+            'porno',
+            'pornographi',
+            'pornography',
+            'prince albert',
+            'pthc',
+            'pube',
+            'pussi',
+            'pussies',
+            'pussy',
+            'queaf',
+            'queer',
+            'raghead',
+            'raging boner',
+            'rape',
+            'raping',
+            'rapist',
+            'rectum',
+            'reverse cowgirl',
+            'rimjob',
+            'rimming',
+            'rosy palm',
+            'rusty trombone',
+            's & m',
+            's&m',
+            's+m',
+            'sadism',
+            'scat',
+            'schlong',
+            'scissoring',
+            'semen',
+            'sex',
+            'sexi',
+            'sexo',
+            'sexy',
+            'shaved beaver',
+            'shaved pussy',
+            'shemale',
+            'shibari',
+            'shit',
+            'shota',
+            'shrimping',
+            'slanteye',
+            'slut',
+            'smut',
+            'snatch',
+            'snm',
+            'snowballing',
+            'sodomi',
+            'sodomize',
+            'sodomy',
+            'spic',
+            'spooge',
+            'spread legs',
+            'squirting',
+            'strap on',
+            'strapon',
+            'strappado',
+            'strip club',
+            'style doggy',
+            'suck',
+            'suicide girls',
+            'sultry women',
+            'swastika',
+            'swinger',
+            'taint',
+            'tainted love',
+            'taste my',
+            'tea bagging',
+            'threesome',
+            'throating',
+            'tied up',
+            'tight white',
+            'tit',
+            'tits',
+            'titti',
+            'titties',
+            'titty',
+            'tongue in',
+            'topless',
+            'tosser',
+            'towelhead',
+            'tranny',
+            'tribadism',
+            'tub girl',
+            'tubgirl',
+            'tushy',
+            'twat',
+            'twink',
+            'twinki',
+            'twinkie',
+            'undress',
+            'upskirt',
+            'urethra play',
+            'urophilia',
+            'vag',
+            'vagina',
+            'venus mound',
+            'vibrator',
+            'violet blue',
+            'violet wand',
+            'vorarephilia',
+            'voyeur',
+            'vulva',
+            'wank',
+            'wet dream',
+            'wetback',
+            'white power',
+            'whore',
+            'women rapping',
+            'wrapping men',
+            'wrinkled starfish',
+            'xx',
+            'xxx',
+            'yaoi',
+            'yellow shower',
+            'yiffy',
+            'zoophilia',
+        ])
+        self.stemmer = PorterStemmer()
+
+    def _normalize(self, text: str) -> str:
+        result = text.lower()
+        result = result.replace("_", " ")
+        for x in string.punctuation:
+            result = result.replace(x, "")
+        chunks = [
+            self.stemmer.stem(word) for word in nltk.word_tokenize(result)
+        ]
+        return ' '.join(chunks)
+
+    def contains_bad_word(self, text: str) -> bool:
+        words = nltk.word_tokenize(text)
+        for word in words:
+            if self.is_bad_word(word):
+                logger.debug(f'"{word}" is profanity')
+                return True
+
+        if len(words) > 1:
+            for bigram in string_utils.ngrams_presplit(words, 2):
+                if self.is_bad_word(bigram):
+                    logger.debug('"{bigram}" is profanity')
+                    return True
+
+        if len(words) > 2:
+            for trigram in string_utils.ngrams_presplit(words, 3):
+                if self.is_bad_word(trigram):
+                    logger.debug('"{trigram}" is profanity')
+                    return True
+        return False
+
+    def is_bad_word(self, word: str) -> bool:
+        return (
+            word in self.bad_words or
+            self._normalize(word) in self.bad_words
+        )
+
+    def obscure_bad_words(self, text: str) -> str:
+
+        def obscure(word: str):
+            out = ''
+            last = ''
+            for letter in word:
+                if letter.isspace():
+                    out += letter
+                else:
+                    while True:
+                        char = random.choice(['#', '%', '!', '@', '&', '*'])
+                        if last != char:
+                            last = char
+                            out += char
+                            break
+            return out
+
+        words = nltk.word_tokenize(text)
+        words.append('')
+        words.append('')
+        words.append('')
+        out = ''
+
+        cursor = 0
+        while cursor < len(words) - 3:
+            word = words[cursor]
+            bigram = word + ' ' + words[cursor + 1]
+            trigram = bigram + ' ' + words[cursor + 2]
+            if self.is_bad_word(trigram):
+                out += obscure(trigram) + ' '
+                cursor += 3
+            elif self.is_bad_word(bigram):
+                out += obscure(bigram) + ' '
+                cursor += 2
+            elif self.is_bad_word(word):
+                out += obscure(word) + ' '
+                cursor += 1
+            else:
+                out += word + ' '
+                cursor += 1
+        return out.strip()
+
+
+def main() -> None:
+    pf = ProfanityFilter()
+    phrase = ' '.join(sys.argv[1:])
+    print(pf.contains_bad_word(phrase))
+    print(pf.obscure_bad_words(phrase))
+    sys.exit(0)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/string_utils.py b/string_utils.py

index 6fc257de52c48f34e207e79e8b2227e914ad2b8c..45cf5aab7ac7f5202346745de733c792c984214d 100644 (file)
--- a/string_utils.py
+++ b/string_utils.py
@@ -9,7 +9,7 @@ import logging
  import random
  import re
  import string
-from typing import Any, Callable, List, Optional
+from typing import Any, Callable, Iterable, List, Optional
  import unicodedata
  from uuid import uuid4
  
@@ -963,3 +963,21 @@ def thify(n: int) -> str:
          return "rd"
      else:
          return "th"
+
+
+def ngrams(txt: str, n: int):
+    words = txt.split()
+    return ngrams_presplit(words, n)
+
+
+def ngrams_presplit(words: Iterable[str], n: int):
+    for ngram in zip(*[words[i:] for i in range(n)]):
+        yield(' '.join(ngram))
+
+
+def bigrams(txt: str):
+    return ngrams(txt, 2)
+
+
+def trigrams(txt: str):
+    return ngrams(txt, 3)
diff --git a/tests/profanity_filter_test.py b/tests/profanity_filter_test.py

new file mode 100755 (executable)

index 0000000..5648ad3
--- /dev/null
+++ b/tests/profanity_filter_test.py
@@ -0,0 +1,20 @@
+#!/usr/bin/env python3
+
+import unittest
+
+import profanity_filter as pf
+import unittest_utils
+
+
+class TestProfanityFilter(unittest.TestCase):
+
+    def test_basic_functionality(self):
+        p = pf.ProfanityFilter()
+        self.assertTrue(p.is_bad_word('shit'))
+        self.assertTrue(p.contains_bad_word('this is another fucking test'))
+        self.assertTrue(p.contains_bad_word('this is another fuckin test'))
+        self.assertFalse(p.contains_bad_word('Mary had a little lamb whose fleese was white as snow.'))
+
+
+if __name__ == '__main__':
+    unittest.main()
author	Scott Gasch <[email protected]>
	Thu, 5 Aug 2021 21:56:34 +0000 (14:56 -0700)
committer	Scott Gasch <[email protected]>
	Thu, 5 Aug 2021 21:56:34 +0000 (14:56 -0700)
dateparse/dateparse_utils.py		patch \| blob \| history
dict_utils.py		patch \| blob \| history
lockfile.py		patch \| blob \| history
logging_utils.py		patch \| blob \| history
presence.py	[changed mode: 0644->0755]	patch \| blob \| history
profanity_filter.py	[new file with mode: 0755]	patch \| blob
string_utils.py		patch \| blob \| history
tests/profanity_filter_test.py	[new file with mode: 0755]	patch \| blob