X-Git-Url: https://wannabe.guru.org/gitweb/?a=blobdiff_plain;f=generic_news_rss_renderer.py;fp=generic_news_rss_renderer.py;h=61be6ff01c487122cf3215a63eda3f31463223c2;hb=6b8d4eeb7153617221f822a243a117f0bcab07bf;hp=149f8acb3aa9f163d195d42deec8e82b442da34f;hpb=7eae23537dcc61565a24d5c957d4325b7337b63a;p=kiosk.git diff --git a/generic_news_rss_renderer.py b/generic_news_rss_renderer.py index 149f8ac..61be6ff 100644 --- a/generic_news_rss_renderer.py +++ b/generic_news_rss_renderer.py @@ -4,10 +4,8 @@ from abc import abstractmethod import datetime from dateutil.parser import parse import http.client -import random +import logging import re -import sys -import traceback from typing import Dict, List, Optional, Union import xml.etree.ElementTree as ET @@ -18,7 +16,10 @@ import page_builder import profanity_filter -class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer): +logger = logging.getLogger(__file__) + + +class generic_news_rss_renderer(renderer.abstaining_renderer): def __init__( self, name_to_timeout_dict: Dict[str, int], @@ -26,8 +27,7 @@ class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer): feed_uris: List[str], page_title: str, ): - super(generic_news_rss_renderer, self).__init__(name_to_timeout_dict, False) - self.debug = True + super().__init__(name_to_timeout_dict) self.feed_site = feed_site self.feed_uris = feed_uris self.page_title = page_title @@ -35,10 +35,6 @@ class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer): self.details = grab_bag.grab_bag() self.filter = profanity_filter.ProfanityFilter() - @abstractmethod - def debug_prefix(self) -> str: - pass - @abstractmethod def get_headlines_page_prefix(self) -> str: pass @@ -136,7 +132,7 @@ class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer): headlines.set_title("%s" % self.page_title) subset = self.news.subset(4) if subset is None: - self.debug_print("Not enough messages to choose from.") + logger.warning('Not enough messages to select from in shuffle_news?!') return False for msg in subset: headlines.add_item(msg) @@ -187,10 +183,11 @@ class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer): } """ ) - details.set_title(f"{self.page_title}") + details.set_title(self.page_title) subset = self.details.subset(1) if subset is None: - self.debug_print("Not enough details to choose from.") + logger.warning('Not enough details to choose from in do_details') + logger.debug("Not enough details to choose from.") return False for msg in subset: blurb = msg @@ -209,47 +206,55 @@ class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer): http.client.HTTPSConnection]] = None for uri in self.feed_uris: + url = None if self.should_use_https(): - self.debug_print("Fetching: https://%s%s" % (self.feed_site, uri)) + url = f'https://{self.feed_site}{uri}' + logger.info(f'Fetching: {url}') self.conn = http.client.HTTPSConnection(self.feed_site, timeout=10) else: - self.debug_print("Fetching: http://%s%s" % (self.feed_site, uri)) + url = f'http://{self.feed_site}{uri}' + logger.info(f'Fetching: {url}') self.conn = http.client.HTTPConnection(self.feed_site, timeout=10) - assert(self.conn is not None) + assert self.conn is not None + assert url is not None self.conn.request( "GET", uri, None, { "Accept": "*/*", -# "Cache-control": "max-age=50", -# "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36", + "Cache-control": "max-age=50", }, ) try: response = self.conn.getresponse() except Exception as e: - traceback.print_exc(file=sys.stdout) - print( - f"Exception in generic RSS renderer HTTP connection fetching {self.feed_site}{uri}" + logger.exception(e) + logger.error( + f"Exception in generic RSS renderer HTTP connection fetching {url}; giving up." ) return False if response.status != 200: - print( - f"{self.page_title}: RSS fetch_news error, response: {response.status}" + logger.error( + f'Unexpected status {response.status} while fetching {url}; giving up.' ) - self.debug_print(str(response.read())) return False - rss = ET.fromstring(response.read()) + raw = response.read() + logger.info(f'Status 200: got {len(raw)} bytes back from {url}') + rss = ET.fromstring(raw) channel = rss[0] title_filter = set() - for item in channel.getchildren(): + for item in list(channel): title = self.find_title(item) description = item.findtext("description") if title is not None: title = self.munge_title(title, item) + else: + logger.info('Skipping RSS feed item with no title.') + continue + logger.debug(f'Considering RSS item {title}...') if description is not None: description = self.munge_description(description, item) else: @@ -260,22 +265,22 @@ class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer): link = item.findtext("link") if link is not None: link = self.munge_link(link) - - if title is None or not self.item_is_interesting_for_headlines( - title, description, item + if not self.item_is_interesting_for_headlines( + title, description, item ): - self.debug_print(f'Item "{title}" is not interesting') + logger.info(f'Skipping {title} because it\'s not interesting.') continue if self.should_profanity_filter() and ( self.filter.contains_bad_word(title) or self.filter.contains_bad_word(description) ): - self.debug_print(f'Found bad words in item "{title}"') + logger.info(f'Skipping {title} because it contains profanity.') continue if title in title_filter: - self.debug_print(f'Already saw title {title}, skipping.') + logger.info(f'Skipping {title} because we already saw an item with the same title.') + continue title_filter.add(title) blurb = """
' if self.item_is_interesting_for_article(title, description, item): + logger.info(f'Item {title} is also interesting as an article details page; creating...') longblurb = blurb longblurb += "
" longblurb += description longblurb += "
" longblurb = longblurb.replace("font-size:34pt", "font-size:44pt") self.details.add(longblurb) + else: + logger.info(f'Item {title} isn\'t interesting for article details page; skipped.') blurb += "" self.news.add(blurb) count += 1 + logger.debug(f'Added {count} items so far...') return count > 0