X-Git-Url: https://wannabe.guru.org/gitweb/?a=blobdiff_plain;f=generic_news_rss_renderer.py;h=d952c4b9122a0cf8b00221e8064a1641dc0f27f0;hb=fa85ebf815dd7973250a5137e0152c2cb10a8b5e;hp=61be6ff01c487122cf3215a63eda3f31463223c2;hpb=6b8d4eeb7153617221f822a243a117f0bcab07bf;p=kiosk.git diff --git a/generic_news_rss_renderer.py b/generic_news_rss_renderer.py index 61be6ff..d952c4b 100644 --- a/generic_news_rss_renderer.py +++ b/generic_news_rss_renderer.py @@ -9,14 +9,15 @@ import re from typing import Dict, List, Optional, Union import xml.etree.ElementTree as ET +from scottutilz import profanity_filter + import file_writer import grab_bag import renderer import page_builder -import profanity_filter -logger = logging.getLogger(__file__) +logger = logging.getLogger(__name__) class generic_news_rss_renderer(renderer.abstaining_renderer): @@ -65,11 +66,7 @@ class generic_news_rss_renderer(renderer.abstaining_renderer): def find_description(self, item: ET.Element) -> Optional[str]: return item.findtext("description") - def munge_description( - self, - description: str, - item: ET.Element - ) -> str: + def munge_description(self, description: str, item: ET.Element) -> str: description = re.sub("<[^>]+>", "", description) return description @@ -132,7 +129,7 @@ class generic_news_rss_renderer(renderer.abstaining_renderer): headlines.set_title("%s" % self.page_title) subset = self.news.subset(4) if subset is None: - logger.warning('Not enough messages to select from in shuffle_news?!') + logger.warning("Not enough messages to select from in shuffle_news?!") return False for msg in subset: headlines.add_item(msg) @@ -186,7 +183,7 @@ class generic_news_rss_renderer(renderer.abstaining_renderer): details.set_title(self.page_title) subset = self.details.subset(1) if subset is None: - logger.warning('Not enough details to choose from in do_details') + logger.warning("Not enough details to choose from in do_details") logger.debug("Not enough details to choose from.") return False for msg in subset: @@ -202,18 +199,19 @@ class generic_news_rss_renderer(renderer.abstaining_renderer): count = 0 self.news.clear() self.details.clear() - self.conn: Optional[Union[http.client.HTTPConnection, - http.client.HTTPSConnection]] = None + self.conn: Optional[ + Union[http.client.HTTPConnection, http.client.HTTPSConnection] + ] = None for uri in self.feed_uris: url = None if self.should_use_https(): - url = f'https://{self.feed_site}{uri}' - logger.info(f'Fetching: {url}') + url = f"https://{self.feed_site}{uri}" + logger.info(f"Fetching: {url}") self.conn = http.client.HTTPSConnection(self.feed_site, timeout=10) else: - url = f'http://{self.feed_site}{uri}' - logger.info(f'Fetching: {url}') + url = f"http://{self.feed_site}{uri}" + logger.info(f"Fetching: {url}") self.conn = http.client.HTTPConnection(self.feed_site, timeout=10) assert self.conn is not None assert url is not None @@ -228,21 +226,20 @@ class generic_news_rss_renderer(renderer.abstaining_renderer): ) try: response = self.conn.getresponse() - except Exception as e: - logger.exception(e) - logger.error( + except Exception: + logger.exception( f"Exception in generic RSS renderer HTTP connection fetching {url}; giving up." ) return False if response.status != 200: logger.error( - f'Unexpected status {response.status} while fetching {url}; giving up.' + f"Unexpected status {response.status} while fetching {url}; giving up." ) return False raw = response.read() - logger.info(f'Status 200: got {len(raw)} bytes back from {url}') + logger.info(f"Status 200: got {len(raw)} bytes back from {url}") rss = ET.fromstring(raw) channel = rss[0] title_filter = set() @@ -252,9 +249,9 @@ class generic_news_rss_renderer(renderer.abstaining_renderer): if title is not None: title = self.munge_title(title, item) else: - logger.info('Skipping RSS feed item with no title.') + logger.info("Skipping RSS feed item with no title.") continue - logger.debug(f'Considering RSS item {title}...') + logger.debug(f"Considering RSS item {title}...") if description is not None: description = self.munge_description(description, item) else: @@ -265,21 +262,21 @@ class generic_news_rss_renderer(renderer.abstaining_renderer): link = item.findtext("link") if link is not None: link = self.munge_link(link) - if not self.item_is_interesting_for_headlines( - title, description, item - ): - logger.info(f'Skipping {title} because it\'s not interesting.') + if not self.item_is_interesting_for_headlines(title, description, item): + logger.info(f"Skipping {title} because it's not interesting.") continue if self.should_profanity_filter() and ( self.filter.contains_bad_word(title) or self.filter.contains_bad_word(description) ): - logger.info(f'Skipping {title} because it contains profanity.') + logger.info(f"Skipping {title} because it contains profanity.") continue if title in title_filter: - logger.info(f'Skipping {title} because we already saw an item with the same title.') + logger.info( + f"Skipping {title} because we already saw an item with the same title." + ) continue title_filter.add(title) @@ -297,14 +294,16 @@ class generic_news_rss_renderer(renderer.abstaining_renderer): pubdate = self.find_pubdate(item) if pubdate is not None: - logger.debug(f'Raw pubdate={pubdate}') + logger.debug(f"Raw pubdate={pubdate}") pubdate = self.munge_pubdate(pubdate) ts = parse(pubdate) - logger.debug(f'Translated pubdate into: {ts}') + logger.debug(f"Translated pubdate into: {ts}") blurb += f' {ts.strftime("%b %d")}' if self.item_is_interesting_for_article(title, description, item): - logger.info(f'Item {title} is also interesting as an article details page; creating...') + logger.info( + f"Item {title} is also interesting as an article details page; creating..." + ) longblurb = blurb longblurb += "
" longblurb += description @@ -312,9 +311,11 @@ class generic_news_rss_renderer(renderer.abstaining_renderer): longblurb = longblurb.replace("font-size:34pt", "font-size:44pt") self.details.add(longblurb) else: - logger.info(f'Item {title} isn\'t interesting for article details page; skipped.') + logger.info( + f"Item {title} isn't interesting for article details page; skipped." + ) blurb += "" self.news.add(blurb) count += 1 - logger.debug(f'Added {count} items so far...') + logger.debug(f"Added {count} items so far...") return count > 0