Somewhat large overhaul to move the kiosk towards using normal python
[kiosk.git] / generic_news_rss_renderer.py
index 149f8acb3aa9f163d195d42deec8e82b442da34f..61be6ff01c487122cf3215a63eda3f31463223c2 100644 (file)
@@ -4,10 +4,8 @@ from abc import abstractmethod
 import datetime
 from dateutil.parser import parse
 import http.client
-import random
+import logging
 import re
-import sys
-import traceback
 from typing import Dict, List, Optional, Union
 import xml.etree.ElementTree as ET
 
@@ -18,7 +16,10 @@ import page_builder
 import profanity_filter
 
 
-class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer):
+logger = logging.getLogger(__file__)
+
+
+class generic_news_rss_renderer(renderer.abstaining_renderer):
     def __init__(
         self,
         name_to_timeout_dict: Dict[str, int],
@@ -26,8 +27,7 @@ class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer):
         feed_uris: List[str],
         page_title: str,
     ):
-        super(generic_news_rss_renderer, self).__init__(name_to_timeout_dict, False)
-        self.debug = True
+        super().__init__(name_to_timeout_dict)
         self.feed_site = feed_site
         self.feed_uris = feed_uris
         self.page_title = page_title
@@ -35,10 +35,6 @@ class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer):
         self.details = grab_bag.grab_bag()
         self.filter = profanity_filter.ProfanityFilter()
 
-    @abstractmethod
-    def debug_prefix(self) -> str:
-        pass
-
     @abstractmethod
     def get_headlines_page_prefix(self) -> str:
         pass
@@ -136,7 +132,7 @@ class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer):
             headlines.set_title("%s" % self.page_title)
             subset = self.news.subset(4)
             if subset is None:
-                self.debug_print("Not enough messages to choose from.")
+                logger.warning('Not enough messages to select from in shuffle_news?!')
                 return False
             for msg in subset:
                 headlines.add_item(msg)
@@ -187,10 +183,11 @@ class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer):
     }
     </STYLE>"""
             )
-            details.set_title(f"{self.page_title}")
+            details.set_title(self.page_title)
             subset = self.details.subset(1)
             if subset is None:
-                self.debug_print("Not enough details to choose from.")
+                logger.warning('Not enough details to choose from in do_details')
+                logger.debug("Not enough details to choose from.")
                 return False
             for msg in subset:
                 blurb = msg
@@ -209,47 +206,55 @@ class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer):
                                   http.client.HTTPSConnection]] = None
 
         for uri in self.feed_uris:
+            url = None
             if self.should_use_https():
-                self.debug_print("Fetching: https://%s%s" % (self.feed_site, uri))
+                url = f'https://{self.feed_site}{uri}'
+                logger.info(f'Fetching: {url}')
                 self.conn = http.client.HTTPSConnection(self.feed_site, timeout=10)
             else:
-                self.debug_print("Fetching: http://%s%s" % (self.feed_site, uri))
+                url = f'http://{self.feed_site}{uri}'
+                logger.info(f'Fetching: {url}')
                 self.conn = http.client.HTTPConnection(self.feed_site, timeout=10)
-            assert(self.conn is not None)
+            assert self.conn is not None
+            assert url is not None
             self.conn.request(
                 "GET",
                 uri,
                 None,
                 {
                     "Accept": "*/*",
-#                    "Cache-control": "max-age=50",
-#                    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36",
+                    "Cache-control": "max-age=50",
                 },
             )
             try:
                 response = self.conn.getresponse()
             except Exception as e:
-                traceback.print_exc(file=sys.stdout)
-                print(
-                    f"Exception in generic RSS renderer HTTP connection fetching {self.feed_site}{uri}"
+                logger.exception(e)
+                logger.error(
+                    f"Exception in generic RSS renderer HTTP connection fetching {url}; giving up."
                 )
                 return False
 
             if response.status != 200:
-                print(
-                    f"{self.page_title}: RSS fetch_news error, response: {response.status}"
+                logger.error(
+                    f'Unexpected status {response.status} while fetching {url}; giving up.'
                 )
-                self.debug_print(str(response.read()))
                 return False
 
-            rss = ET.fromstring(response.read())
+            raw = response.read()
+            logger.info(f'Status 200: got {len(raw)} bytes back from {url}')
+            rss = ET.fromstring(raw)
             channel = rss[0]
             title_filter = set()
-            for item in channel.getchildren():
+            for item in list(channel):
                 title = self.find_title(item)
                 description = item.findtext("description")
                 if title is not None:
                     title = self.munge_title(title, item)
+                else:
+                    logger.info('Skipping RSS feed item with no title.')
+                    continue
+                logger.debug(f'Considering RSS item {title}...')
                 if description is not None:
                     description = self.munge_description(description, item)
                 else:
@@ -260,22 +265,22 @@ class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer):
                 link = item.findtext("link")
                 if link is not None:
                     link = self.munge_link(link)
-
-                if title is None or not self.item_is_interesting_for_headlines(
-                    title, description, item
+                if not self.item_is_interesting_for_headlines(
+                        title, description, item
                 ):
-                    self.debug_print(f'Item "{title}" is not interesting')
+                    logger.info(f'Skipping {title} because it\'s not interesting.')
                     continue
 
                 if self.should_profanity_filter() and (
                     self.filter.contains_bad_word(title)
                     or self.filter.contains_bad_word(description)
                 ):
-                    self.debug_print(f'Found bad words in item "{title}"')
+                    logger.info(f'Skipping {title} because it contains profanity.')
                     continue
 
                 if title in title_filter:
-                    self.debug_print(f'Already saw title {title}, skipping.')
+                    logger.info(f'Skipping {title} because we already saw an item with the same title.')
+                    continue
                 title_filter.add(title)
 
                 blurb = """<DIV style="padding:8px;
@@ -292,18 +297,24 @@ class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer):
 
                 pubdate = self.find_pubdate(item)
                 if pubdate is not None:
+                    logger.debug(f'Raw pubdate={pubdate}')
                     pubdate = self.munge_pubdate(pubdate)
                     ts = parse(pubdate)
+                    logger.debug(f'Translated pubdate into: {ts}')
                     blurb += f'  <FONT COLOR=#cccccc>{ts.strftime("%b&nbsp;%d")}</FONT>'
 
                 if self.item_is_interesting_for_article(title, description, item):
+                    logger.info(f'Item {title} is also interesting as an article details page; creating...')
                     longblurb = blurb
                     longblurb += "<BR>"
                     longblurb += description
                     longblurb += "</DIV>"
                     longblurb = longblurb.replace("font-size:34pt", "font-size:44pt")
                     self.details.add(longblurb)
+                else:
+                    logger.info(f'Item {title} isn\'t interesting for article details page; skipped.')
                 blurb += "</DIV>"
                 self.news.add(blurb)
                 count += 1
+                logger.debug(f'Added {count} items so far...')
         return count > 0