X-Git-Url: https://wannabe.guru.org/gitweb/?a=blobdiff_plain;f=generic_news_rss_renderer.py;h=149f8acb3aa9f163d195d42deec8e82b442da34f;hb=7eae23537dcc61565a24d5c957d4325b7337b63a;hp=34c48210c9ce4b3069710e27db9d6ecf783a0113;hpb=ba913d3c5ec6fd5e229398ebfe9e073aaae7d73c;p=kiosk.git diff --git a/generic_news_rss_renderer.py b/generic_news_rss_renderer.py index 34c4821..149f8ac 100644 --- a/generic_news_rss_renderer.py +++ b/generic_news_rss_renderer.py @@ -6,6 +6,8 @@ from dateutil.parser import parse import http.client import random import re +import sys +import traceback from typing import Dict, List, Optional, Union import xml.etree.ElementTree as ET @@ -31,7 +33,7 @@ class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer): self.page_title = page_title self.news = grab_bag.grab_bag() self.details = grab_bag.grab_bag() - self.filter = profanity_filter.profanity_filter() + self.filter = profanity_filter.ProfanityFilter() @abstractmethod def debug_prefix(self) -> str: @@ -61,13 +63,17 @@ class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer): def find_title(self, item: ET.Element) -> Optional[str]: return item.findtext("title") - def munge_title(self, title: str) -> str: + def munge_title(self, title: str, item: ET.Element) -> str: return title def find_description(self, item: ET.Element) -> Optional[str]: return item.findtext("description") - def munge_description(self, description: str) -> str: + def munge_description( + self, + description: str, + item: ET.Element + ) -> str: description = re.sub("<[^>]+>", "", description) return description @@ -94,6 +100,12 @@ class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer): ) -> bool: return True + def do_headlines(self) -> bool: + return True + + def do_details(self) -> bool: + return True + def is_item_older_than_n_days(self, item: ET.Element, n: int) -> bool: pubdate = self.find_pubdate(item) if pubdate is None: @@ -118,73 +130,75 @@ class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer): raise Exception def shuffle_news(self) -> bool: - headlines = page_builder.page_builder() - headlines.set_layout(page_builder.page_builder.LAYOUT_FOUR_ITEMS) - headlines.set_title("%s" % self.page_title) - subset = self.news.subset(4) - if subset is None: - self.debug_print("Not enough messages to choose from.") - return False - for msg in subset: - headlines.add_item(msg) - headlines.set_custom_html( - """ -""" - ) - _ = f"{self.get_headlines_page_prefix()}_{self.get_headlines_page_priority()}_25900.html" - with file_writer.file_writer(_) as f: - headlines.render_html(f) - - details = page_builder.page_builder() - details.set_layout(page_builder.page_builder.LAYOUT_ONE_ITEM) - details.set_custom_html( - """ -""" - ) - details.set_title(f"{self.page_title}") - subset = self.details.subset(1) - if subset is None: - self.debug_print("Not enough details to choose from.") - return False - for msg in subset: - blurb = msg - blurb += "" - details.add_item(blurb) - _ = f"{self.get_details_page_prefix()}_{self.get_details_page_priority()}_86400.html" - with file_writer.file_writer(_) as g: - details.render_html(g) + if self.do_headlines(): + headlines = page_builder.page_builder() + headlines.set_layout(page_builder.page_builder.LAYOUT_FOUR_ITEMS) + headlines.set_title("%s" % self.page_title) + subset = self.news.subset(4) + if subset is None: + self.debug_print("Not enough messages to choose from.") + return False + for msg in subset: + headlines.add_item(msg) + headlines.set_custom_html( + """ + """ + ) + _ = f"{self.get_headlines_page_prefix()}_{self.get_headlines_page_priority()}_25900.html" + with file_writer.file_writer(_) as f: + headlines.render_html(f) + + if self.do_details(): + details = page_builder.page_builder() + details.set_layout(page_builder.page_builder.LAYOUT_ONE_ITEM) + details.set_custom_html( + """ + """ + ) + details.set_title(f"{self.page_title}") + subset = self.details.subset(1) + if subset is None: + self.debug_print("Not enough details to choose from.") + return False + for msg in subset: + blurb = msg + blurb += "" + details.add_item(blurb) + _ = f"{self.get_details_page_prefix()}_{self.get_details_page_priority()}_86400.html" + with file_writer.file_writer(_) as g: + details.render_html(g) return True def fetch_news(self) -> bool: @@ -197,10 +211,10 @@ a:active { for uri in self.feed_uris: if self.should_use_https(): self.debug_print("Fetching: https://%s%s" % (self.feed_site, uri)) - self.conn = http.client.HTTPSConnection(self.feed_site, timeout=20) + self.conn = http.client.HTTPSConnection(self.feed_site, timeout=10) else: self.debug_print("Fetching: http://%s%s" % (self.feed_site, uri)) - self.conn = http.client.HTTPConnection(self.feed_site, timeout=20) + self.conn = http.client.HTTPConnection(self.feed_site, timeout=10) assert(self.conn is not None) self.conn.request( "GET", @@ -208,14 +222,17 @@ a:active { None, { "Accept": "*/*", - "Cache-control": "max-age=59", - "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36", +# "Cache-control": "max-age=50", +# "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36", }, ) try: response = self.conn.getresponse() - except: - print("Exception in generic RSS renderer HTTP connection") + except Exception as e: + traceback.print_exc(file=sys.stdout) + print( + f"Exception in generic RSS renderer HTTP connection fetching {self.feed_site}{uri}" + ) return False if response.status != 200: @@ -227,13 +244,14 @@ a:active { rss = ET.fromstring(response.read()) channel = rss[0] + title_filter = set() for item in channel.getchildren(): title = self.find_title(item) - if title is not None: - title = self.munge_title(title) description = item.findtext("description") + if title is not None: + title = self.munge_title(title, item) if description is not None: - description = self.munge_description(description) + description = self.munge_description(description, item) else: description = "" image = self.find_image(item) @@ -250,15 +268,19 @@ a:active { continue if self.should_profanity_filter() and ( - self.filter.contains_bad_words(title) - or self.filter.contains_bad_words(description) + self.filter.contains_bad_word(title) + or self.filter.contains_bad_word(description) ): self.debug_print(f'Found bad words in item "{title}"') continue + if title in title_filter: + self.debug_print(f'Already saw title {title}, skipping.') + title_filter.add(title) + blurb = """
""" + font-size:34pt; + -webkit-column-break-inside:avoid;">""" if image is not None: blurb += f'{ts.strftime("%b %d")}' - if description is not None and self.item_is_interesting_for_article( - title, description, item - ): + if self.item_is_interesting_for_article(title, description, item): longblurb = blurb longblurb += "
" longblurb += description