Somewhat large overhaul to move the kiosk towards using normal python
[kiosk.git] / kiosk.py
index bd7137335ea25e5dbae0d8f39a9226c53c9bb19a..8f6154273a00045c2a297d5142a58bd407a72774 100755 (executable)
--- a/kiosk.py
+++ b/kiosk.py
@@ -2,37 +2,40 @@
 
 import collections
 from datetime import datetime
-import difflib
+from difflib import SequenceMatcher
 import gc
-import linecache
 import logging
 import os
 import re
-import sys
 from threading import Thread
 import time
-import traceback
 import tracemalloc
-from typing import Optional, List
-from queue import Queue, Empty
+from typing import Dict, List, Optional
+from queue import Queue
 
 import astral  # type: ignore
 from astral.sun import sun  # type: ignore
 import numpy as np
+import pvporcupine
 import pytz
 
+import bootstrap
+import config
+import datetime_utils
+
 import constants
 import file_writer
-import renderer
-import renderer
 import renderer_catalog
 import chooser
 import listen
-import logging
-import pvporcupine
 import trigger_catalog
 import utils
 
+
+cfg = config.add_commandline_args(
+    f'Kiosk Server ({__file__})',
+    'A python server that runs a kiosk.'
+)
 logger = logging.getLogger(__file__)
 
 
@@ -42,6 +45,7 @@ def thread_janitor() -> None:
     gc_target = 0.0
     gc.enable()
 
+    # Main janitor loop; dump the largest pigs and force regular gcs.
     while True:
         now = time.time()
         if now > tracemalloc_target:
@@ -54,34 +58,37 @@ def thread_janitor() -> None:
             key_type = 'lineno'
             limit = 10
             top_stats = snapshot.statistics(key_type)
-            print("janitor: Top %s lines" % limit)
+            logger.info(f'janitor: Top {limit} lines')
             for index, stat in enumerate(top_stats[:limit], 1):
                 frame = stat.traceback[0]
+
                 # replace "/path/to/module/file.py" with "module/file.py"
                 filename = os.sep.join(frame.filename.split(os.sep)[-2:])
-                print("janitor: #%s: %s:%s: %.1f KiB"
-                      % (index, filename, frame.lineno, stat.size / 1024))
-                line = linecache.getline(frame.filename, frame.lineno).strip()
-                if line:
-                    print('janitor:    %s' % line)
+                logger.info(
+                    f'janitor: #{index}: {filename}:{frame.lineno}: {stat.size / 1024:.1f} KiB'
+                )
 
             other = top_stats[limit:]
             if other:
                 size = sum(stat.size for stat in other)
-                print("janitor: %s other: %.1f KiB" % (len(other), size / 1024))
+                logger.info(
+                    f'janitor: {len(other)} others: {size/1024:.1f} KiB'
+                )
             total = sum(stat.size for stat in top_stats)
-            print("janitor: Total allocated size: %.1f KiB" % (total / 1024))
+            logger.info(
+                f'janitor: Total allocated size: {total / 1024:.1f} KiB'
+            )
         if now > gc_target:
-            print("janitor: Running gc operation")
-            gc_target = now + 60.0
+            logger.info('janitor: kicking off a manual gc operation now.')
             gc.collect()
-        time.sleep(10.0)
+            gc_target = now + 120.0
+        time.sleep(30.0)
 
 
 def guess_page(command: str, page_chooser: chooser.chooser) -> str:
-    best_page = None
-    best_score = None
-    for page in page_chooser.get_page_list():
+
+    def normalize_page(page: str) -> str:
+        logger.debug(f'normalize_page input: {page}')
         page = page.replace('(', ' ')
         page = page.replace('_', ' ')
         page = page.replace(')', ' ')
@@ -101,7 +108,14 @@ def guess_page(command: str, page_chooser: chooser.chooser) -> str:
         page = page.replace('telma', 'telma cabin')
         page = page.replace('WSJBusiness', 'business news')
         page = re.sub(r'[0-9]+', '', page)
-        score = SequenceMatcher(None, command, page).ratio()
+        logger.debug(f'normalize_page output: {page}')
+        return page
+
+    best_page = None
+    best_score = None
+    for page in page_chooser.get_page_list():
+        npage = normalize_page(page)
+        score = SequenceMatcher(None, command, npage).ratio()
         if best_score is None or score > best_score:
             best_page = page
     assert best_page is not None
@@ -109,6 +123,7 @@ def guess_page(command: str, page_chooser: chooser.chooser) -> str:
 
 
 def process_command(command: str, page_history: List[str], page_chooser) -> str:
+    logger.debug(f'Parsing verbal command: {command}')
     page = None
     if 'hold' in command:
         page = page_history[0]
@@ -117,8 +132,15 @@ def process_command(command: str, page_history: List[str], page_chooser) -> str:
     elif 'skip' in command:
         while True:
             (page, _) = page_chooser.choose_next_page()
-            if page != page_history[0]:
+            if page == page_history[0]:
+                logger.debug(f'chooser: {page} is the same as last time!  Try again.')
+            else:
                 break
+    elif 'internal' in command:
+        if 'render' in command:
+            page = constants.render_stats_pagename
+        else:
+            page = constants.render_stats_pagename
     elif 'weather' in command:
         if 'telma' in command or 'cabin' in command:
             page = 'weather-telma_3_10800.html'
@@ -174,29 +196,32 @@ def process_command(command: str, page_history: List[str], page_chooser) -> str:
     else:
         page = guess_page(command, page_chooser)
     assert page is not None
+    logger.debug(f'Chose page {page}')
     return page
 
 
 def thread_change_current(command_queue: Queue) -> None:
-    page_history = [ "", "" ]
+    page_history = ["", ""]
     swap_page_target = 0.0
 
     def filter_news_during_dinnertime(page: str) -> bool:
         now = datetime.now(tz=pytz.timezone("US/Pacific"))
-        is_dinnertime = now.hour >= 17 and now.hour <= 20
+        is_dinnertime = now.hour >= 18 and now.hour <= 20
         return not is_dinnertime or not (
             "cnn" in page
             or "news" in page
             or "mynorthwest" in page
             or "seattle" in page
-            or "stranger" in page
-            or "twitter" in page
             or "wsj" in page
         )
+
     page_chooser = chooser.weighted_random_chooser_with_triggers(
         trigger_catalog.get_triggers(), [filter_news_during_dinnertime]
     )
+    current_file = os.path.join(constants.pages_dir, "current.shtml")
+    emergency_file = os.path.join(constants.pages_dir, "reload_immediately.html")
 
+    # Main chooser loop
     while True:
         now = time.time()
 
@@ -206,63 +231,71 @@ def thread_change_current(command_queue: Queue) -> None:
             command = command_queue.get(block=False)
         except Exception:
             command = None
-            pass
+
         if command is not None:
+            logger.info(f'chooser: We got a verbal command ("{command}"), parsing it...')
             triggered = True
             page = process_command(command, page_history, page_chooser)
-
-        # Else pick a page randomly.
         else:
             while True:
                 (page, triggered) = page_chooser.choose_next_page()
-                if triggered or page != page_history[0]:
+                if triggered:
+                    logger.info('chooser: A trigger is active...')
                     break
+                else:
+                    if page == page_history[0]:
+                        logger.debug(f'chooser: {page} is the same as last time! Try again.')
+                    else:
+                        break
 
         if triggered:
-            print("chooser[%s] - WE ARE TRIGGERED." % utils.timestamp())
             if page != page_history[0] or (swap_page_target - now) < 10.0:
-                print(
-                    "chooser[%s] - EMERGENCY PAGE %s LOAD NEEDED"
-                    % (utils.timestamp(), page)
-                )
-#                try:
-                current = os.path.join(constants.pages_dir, "current.shtml")
-                with open(current, "w") as f:
-                    emit_wrapped(f, page, override_refresh_sec = 40, command = command)
-                    print(f'Wrote {current}')
-
-                page_history.insert(0, page)
-                page_history = page_history[0:10]
-                swap_page_target = now + 40
-#                except:
-#                    print("chooser[%s] - page does not exist?!" % (utils.timestamp()))
-#                    continue
+                logger.info(f'chooser: An emergency page reload to {page} is needed at this time.')
+                swap_page_target = now + constants.emergency_refresh_period_sec
+
+                # Set current.shtml to the right page.
+                try:
+                    with open(current_file, "w") as f:
+                        emit_wrapped(
+                            f,
+                            page,
+                            override_refresh_sec = constants.emergency_refresh_period_sec,
+                            command = command
+                        )
+                    logger.debug(f'chooser: Wrote {current_file}.')
+                except Exception as e:
+                    logger.exception(e)
+                    logger.error(f'chooser: Unexpected exception; assuming {page} doesn\'t exist?!')
+                    continue
 
                 # Also notify XMLHTTP clients that they need to refresh now.
-                emergency_file = os.path.join(constants.pages_dir, "reload_immediately.html")
                 with open(emergency_file, "w") as f:
                     f.write(f'Reload, suckers... you HAVE to see {page}!')
-                    print(f'Causing immediate page reload with {emergency_file}...')
+                logger.debug(f'chooser: Wrote {emergency_file}...')
 
                 # Fix this hack... maybe read the webserver logs and see if it
                 # actually was picked up?
-                time.sleep(3.0)
+                time.sleep(0.95)
                 os.remove(emergency_file)
-                print(f'...and removed {emergency_file}')
+                logger.debug(f'chooser: ...and removed {emergency_file}.')
 
         elif now >= swap_page_target:
             assert page != page_history[0]
-            print("chooser[%s] - nominal choice of %s" % (utils.timestamp(), page))
-#            try:
-            with open(os.path.join(constants.pages_dir, "current.shtml"), "w") as f:
-                emit_wrapped(f, page)
-            page_history.insert(0, page)
-            page_history = page_history[0:10]
+            logger.info(
+                f'chooser: Nominal choice of {page} as the next to show.'
+            )
             swap_page_target = now + constants.refresh_period_sec
-#            except:
-#                print("chooser[%s] - page does not exist?!" % (utils.timestamp()))
-#                continue
-        time.sleep(1)
+            try:
+                with open(current_file, "w") as f:
+                    emit_wrapped(f, page)
+                logger.debug(f'chooser: Wrote {current_file}.')
+            except Exception as e:
+                logger.exception(e)
+                logger.error(f'chooser: Unexpected exception; assuming {page} doesn\'t exist?!')
+                continue
+        page_history.insert(0, page)
+        page_history = page_history[0:10]
+        time.sleep(0.5)
 
 
 def emit_wrapped(f,
@@ -270,8 +303,9 @@ def emit_wrapped(f,
                  *,
                  override_refresh_sec: int = None,
                  command: str = None) -> None:
+
     def pick_background_color() -> str:
-        now = datetime.now(tz=pytz.timezone("US/Pacific"))
+        now = datetime_utils.now_pacific()
         city = astral.LocationInfo(
             "Bellevue", "USA", "US/Pacific", 47.610, -122.201
         )
@@ -290,7 +324,7 @@ def emit_wrapped(f,
         if override_refresh_sec is not None:
             return float(override_refresh_sec * 1000.0)
         now = datetime.now(tz=pytz.timezone("US/Pacific"))
-        if now.hour < 7:
+        if now.hour < 6:
             return float(constants.refresh_period_night_sec * 1000.0)
         else:
             return float(constants.refresh_period_sec * 1000.0)
@@ -349,7 +383,8 @@ def emit_wrapped(f,
       }
     }
   }
-""")
+"""
+    )
     f.write(
 """
   // Operate the clock at the top of the page.
@@ -368,7 +403,8 @@ def emit_wrapped(f,
     document.getElementById("date").innerHTML = today.toDateString();
     var t = setTimeout(function(){runClock()}, 1000);
   }
-""" % bgcolor)
+""" % bgcolor
+    )
     f.write(
 """
   // Helper method for running the clock.
@@ -401,8 +437,8 @@ def emit_wrapped(f,
   addLoadEvent(zoomScreen);
   addLoadEvent(runClock);
   addLoadEvent(lateLoadImages);
-""")
-
+"""
+    )
     f.write(
 """
   // Runs the countdown line at the bottom and is responsible for
@@ -425,7 +461,7 @@ def emit_wrapped(f,
           }
         } else {
           // Reload unconditionally after 22 sec.
-          window.location.reload();
+          window.location.reload(true);
         }
 
         // Brief sleep before doing it all over again.
@@ -434,29 +470,25 @@ def emit_wrapped(f,
         });
       }, 50)
   })();
-""" % get_refresh_period())
+""" % get_refresh_period()
+    )
     f.write(
 """
-  // Periodically checks for emergency reload events.
-  (function poll() {
-    setTimeout(
+  function check_reload() {
+    var xhr = new XMLHttpRequest();
+    xhr.open('GET',
+             '%s/reload_immediately.html');
+    xhr.onload =
       function() {
-        var xhr = new XMLHttpRequest();
-        xhr.open('GET',
-                 '%s/reload_immediately.html');
-        xhr.onload =
-          function() {
-            if (xhr.status === 200) {
-              window.location.reload();
-            } else {
-              sleep(500).then(() => {
-                poll();
-              });
-            }
-          };
-        xhr.send();
-      }, 500);
-  })();
+        if (xhr.status === 200) {
+          window.location.reload(true);
+        }
+      };
+    xhr.send();
+  }
+
+  // Periodically checks for emergency reload events.
+  setInterval(check_reload, 500);
   </SCRIPT>
 </HEAD>
 """ % constants.root_url)
@@ -479,7 +511,8 @@ def emit_wrapped(f,
         <TD COLSPAN=3>
             <DIV ID="content" STYLE="zoom: 1; visibility: hidden;">
               <!-- BEGIN main page contents. -->
-""")
+"""
+    )
     f.write(f'<!--#include virtual="{filename}"-->')
     f.write(
 """
@@ -488,7 +521,8 @@ def emit_wrapped(f,
             <BR>
             <DIV STYLE="position: absolute; top:1030px; width:99%">
             <P ALIGN="right">
-""")
+"""
+    )
     f.write(f'<FONT SIZE=2 COLOR=#bbbbbb>{pageid} @ {age} ago.</FONT>')
     f.write(
 """
@@ -505,7 +539,63 @@ def emit_wrapped(f,
         </TD>
     </TR>
     </TABLE>
-</BODY>""")
+</BODY>"""
+    )
+
+
+def renderer_update_internal_stats_page(
+        last_render: Dict[str, datetime],
+        render_counts: collections.Counter,
+        render_times: Dict[str, np.array],
+) -> None:
+    logger.debug(
+        f'renderer: Updating internal render statistics page.'
+    )
+    with file_writer.file_writer(constants.render_stats_pagename) as f:
+        f.write(
+f'''
+<CENTER>
+<TABLE BORDER=0 WIDTH=95%>
+    <TR>
+    <TH><B>Renderer Name</B></TH>
+    <TH><B>Last Run</B></TH>
+    <TH><B>Num Invocations</B></TH>
+    <TH><B>Render Latency</B></TH>
+    </TR>
+''')
+        for n, r in enumerate(renderer_catalog.get_renderers()):
+            if n % 2 == 0:
+                style = 'style="margin: 0; padding: 0; background: #c6b0b0;"'
+            else:
+                style = 'style="margin: 0; padding: 0; background: #eeeeee;"'
+            name = r.get_name()
+            last = last_render.get(name, None)
+            if last is None:
+                last = 'never'
+            else:
+                last = last.strftime('%Y/%m/%d %I:%M:%S%P')
+            count = render_counts.get(name, 0)
+            latency = render_times.get(name, np.array([]))
+            p25 = p50 = p75 = p90 = p99 = 'N/A'
+            try:
+                p25 = np.percentile(latency, 25)
+                p50 = np.percentile(latency, 50)
+                p75 = np.percentile(latency, 75)
+                p90 = np.percentile(latency, 90)
+                p99 = np.percentile(latency, 99)
+            except IndexError:
+                pass
+            f.write(
+f'''
+    <TR>
+    <TD {style}>{name}&nbsp;</TD>
+    <TD {style}>&nbsp;{last}&nbsp;</TD>
+    <TD {style}><CENTER>&nbsp;{count}&nbsp;</CENTER></TD>
+    <TD {style}>&nbsp;p25={p25:5.2f}, p50={p50:5.2f}, p75={p75:5.2f}, p90={p90:5.2f}, p99={p99:5.2f}</TD>
+    </TR>
+'''
+            )
+            f.write('</TABLE>')
 
 
 def thread_invoke_renderers() -> None:
@@ -513,25 +603,28 @@ def thread_invoke_renderers() -> None:
     render_counts: collections.Counter = collections.Counter()
     last_render: Dict[str, datetime] = {}
 
+    # Main renderer loop
     while True:
-        print(f'renderer[{utils.timestamp()}]: invoking all overdue renderers in catalog...')
+        logger.info(
+            'renderer: invoking all overdue renderers in catalog...'
+        )
         for r in renderer_catalog.get_renderers():
             name = r.get_name()
             now = time.time()
             try:
                 r.render()
             except Exception as e:
-                traceback.print_exc(file=sys.stdout)
                 logger.exception(e)
-                print(
-                    f"renderer[{utils.timestamp()}] Unknown exception ({e}) in {name}, swallowing it."
+                logger.error(
+                    f'renderer: Unexpected and unhandled exception ({e}) in {name}, swallowing it.'
                 )
+                continue
 
             # Increment the count of render operations per renderer.
             render_counts[name] += 1
 
             # Keep track of the last time we invoked each renderer.
-            last_render[name] = datetime.now(tz=pytz.timezone("US/Pacific"))
+            last_render[name] = datetime_utils.now_pacific()
 
             # Record how long each render operation takes and warn if very long.
             delta = time.time() - now
@@ -539,69 +632,22 @@ def thread_invoke_renderers() -> None:
             times = np.insert(times, 0, delta)
             render_times[name] = times
             if delta > 1.0:
-                hdr = f'renderer[{utils.timestamp()}]:'
-                print(
+                hdr = f'renderer'
+                logger.warning(
 f'''
 {hdr} Warning: {name}'s rendering took {delta:5.2f}s.
 {hdr} FYI: {name}'s render times: p25={np.percentile(times, 25):5.2f}, p50={np.percentile(times, 50):5.2f}, p75={np.percentile(times, 75):5.2f}, p90={np.percentile(times, 90):5.2f}, p99={np.percentile(times, 99):5.2f}
-''')
-
-        # Render a page about internal stats of renderers.
-        print(f'renderer[{utils.timestamp()}]: Updating internal statistics page.')
-        with file_writer.file_writer(constants.internal_stats_pagename) as f:
-            f.write(
-f'''
-<CENTER>
-<TABLE BORDER=0 WIDTH=95%>
-    <TR>
-    <TH><B>Renderer Name</B></TH>
-    <TH><B>Last Run</B></TH>
-    <TH><B>Num Invocations</B></TH>
-    <TH><B>Render Latency</B></TH>
-    </TR>
-''')
-            for n, r in enumerate(renderer_catalog.get_renderers()):
-                if n % 2 == 0:
-                    style = 'style="margin: 0; padding: 0; background: #c6b0b0;"'
-                else:
-                    style = 'style="margin: 0; padding: 0; background: #eeeeee;"'
-                name = r.get_name()
-                last = last_render.get(name, None)
-                if last is None:
-                    last = 'never'
-                else:
-                    last = last.strftime('%Y/%m/%d %I:%M:%S%P')
-                count = render_counts.get(name, 0)
-                latency = render_times.get(name, np.array([]))
-                p25 = p50 = p75 = p90 = p99 = 'N/A'
-                try:
-                    p25 = np.percentile(latency, 25)
-                    p50 = np.percentile(latency, 50)
-                    p75 = np.percentile(latency, 75)
-                    p90 = np.percentile(latency, 90)
-                    p99 = np.percentile(latency, 99)
-                except IndexError:
-                    pass
-                f.write(
-f'''
-    <TR>
-    <TD {style}>{name}&nbsp;</TD>
-    <TD {style}>&nbsp;{last}&nbsp;</TD>
-    <TD {style}><CENTER>&nbsp;{count}&nbsp;</CENTER></TD>
-    <TD {style}>&nbsp;p25={p25:5.2f}, p50={p50:5.2f}, p75={p75:5.2f}, p90={p90:5.2f}, p99={p99:5.2f}</TD>
-    </TR>
-''')
-            f.write('</TABLE>')
+'''
+                )
 
-        print(
-            f"renderer[{utils.timestamp()}]: " +
-            f"thread having a little break for {constants.render_period_sec}s..."
-        )
+        # Update a page about internal stats of renderers.
+        renderer_update_internal_stats_page(last_render, render_counts, render_times)
+        logger.debug('renderer: having a little nap...')
         time.sleep(constants.render_period_sec)
 
 
-if __name__ == "__main__":
-    logging.basicConfig()
+def main() -> None:
     command_queue: Queue = Queue()
     changer_thread: Optional[Thread] = None
     renderer_thread: Optional[Thread] = None
@@ -609,6 +655,12 @@ if __name__ == "__main__":
     hotword_thread: Optional[Thread] = None
     while True:
         if hotword_thread is None or not hotword_thread.is_alive():
+            if hotword_thread is None:
+                logger.info('watchdog: Starting up the hotword detector thread...')
+            else:
+                logger.warning(
+                    'watchdog: The hotword detector thread seems to have died; restarting it and hoping for the best.'
+                )
             keyword_paths = [pvporcupine.KEYWORD_PATHS[x] for x in ["bumblebee"]]
             sensitivities = [0.7] * len(keyword_paths)
             listener = listen.HotwordListener(
@@ -618,22 +670,41 @@ if __name__ == "__main__":
             )
             hotword_thread = Thread(target=listener.listen_forever, args=())
             hotword_thread.start()
+
         if changer_thread is None or not changer_thread.is_alive():
-            print(
-                f"MAIN[{utils.timestamp()}] - (Re?)initializing chooser thread... (wtf?!)"
-            )
+            if changer_thread is None:
+                logger.info('watchdog: Starting up the current page changer thread...')
+            else:
+                logger.warning(
+                    'watchdog: The current page changer thread seems to have died; restarting it and hoping for the best.'
+                )
             changer_thread = Thread(target=thread_change_current, args=(command_queue,))
             changer_thread.start()
+
         if renderer_thread is None or not renderer_thread.is_alive():
-            print(
-                f"MAIN[{utils.timestamp()}] - (Re?)initializing render thread... (wtf?!)"
-            )
+            if renderer_thread is None:
+                logger.info('watchdog: Starting up the page renderer thread...')
+            else:
+                logger.warning(
+                    'watchdog: The page renderer thread seems to have died; restarting it and hoping for the best.'
+                )
             renderer_thread = Thread(target=thread_invoke_renderers, args=())
             renderer_thread.start()
+
         if janitor_thread is None or not janitor_thread.is_alive():
-            print(
-                f"MAIN[{utils.timestamp()}] - (Re?)initializing janitor thread... (wtf?!)"
-            )
+            if janitor_thread is None:
+                logger.info('watchdog: Starting up the memory janitor thread...')
+            else:
+                logger.warning(
+                    'watchdog: The memory janitor thread seems to have died; restarting it and hoping for the best.'
+                )
             janitor_thread = Thread(target=thread_janitor, args=())
             janitor_thread.start()
-        time.sleep(60)
+
+        # Have a little break and then check to make sure all threads are still alive.
+        logger.debug('watchdog: having a little nap.')
+        time.sleep(constants.check_threads_period_sec)
+
+
+if __name__ == "__main__":
+    main()