X-Git-Url: https://wannabe.guru.org/gitweb/?a=blobdiff_plain;f=seattletimes_rss_renderer.py;h=653c74ac4c7e984f153cf189ca82b07623a1c4e0;hb=5ea88ab72e175e2d4f57ae8645ca6f825549a7a9;hp=c8d12ce17d6bcadef5a79c645f0b2cdae1121df9;hpb=75b27cc68871343681f01e3f5b04cae84b1b7b2a;p=kiosk.git diff --git a/seattletimes_rss_renderer.py b/seattletimes_rss_renderer.py index c8d12ce..653c74a 100644 --- a/seattletimes_rss_renderer.py +++ b/seattletimes_rss_renderer.py @@ -1,89 +1,93 @@ -import datetime +#!/usr/bin/env python3 + +import logging +from typing import Dict, List +import xml + import generic_news_rss_renderer as gnrss -class seattletimes_rss_renderer(gnrss.generic_news_rss_renderer): - interesting_categories = frozenset([ - 'Nation', - 'World', - 'Life', - 'Technology' - 'Local News', - 'Food', - 'Drink', - 'Today File', - 'Seahawks', - 'Oddities', - 'Packfic NW', - 'Home', - 'Garden', - 'Travel', - 'Outdoors', - ]) - def __init__(self, name_to_timeout_dict, feed_site, feed_uris, page_title): - super(seattletimes_rss_renderer, self).__init__( - name_to_timeout_dict, - feed_site, - feed_uris, - page_title) - self.oldest = datetime.datetime.now() - datetime.timedelta(14) - self.debug_print("oldest story we'll keep: %s" % self.oldest) +logger = logging.getLogger(__name__) - def debug_prefix(self): - return "seattletimes" - def get_headlines_page_prefix(self): +class seattletimes_rss_renderer(gnrss.generic_news_rss_renderer): + interesting_categories = frozenset( + [ + "Nation", + "World", + "Life", + "Technology", + "Local News", + "Food", + "Drink", + "Today File", + "Seahawks", + "Oddities", + "Packfic NW", + "Home", + "Garden", + "Travel", + "Outdoors", + ] + ) + + def __init__( + self, + name_to_timeout_dict: Dict[str, int], + feed_site: str, + feed_uris: List[str], + page_title: str, + ): + super().__init__(name_to_timeout_dict, feed_site, feed_uris, page_title) + + def get_headlines_page_prefix(self) -> str: return "seattletimes-nonnews" - def get_details_page_prefix(self): + def get_details_page_prefix(self) -> str: return "seattletimes-details-nonnews" - def should_use_https(self): + def should_use_https(self) -> bool: return True - def item_is_interesting_for_headlines(self, title, description, item): + def item_is_interesting_for_headlines( + self, title: str, description: str, item: xml.etree.ElementTree.Element + ) -> bool: if item.tag != "item": - self.debug_print("Item.tag isn't item?!") + logger.debug(f'{title}: item.tag ({item}) isn\'t "item"?!') + return False + if self.is_item_older_than_n_days(item, 14): + logger.info(f"{title}: too old to be interesting.") return False details = {} - for detail in item.getchildren(): - self.debug_print("detail %s => %s (%s)" % (detail.tag, - detail.attrib, - detail.text)) - if detail.text != None: + for detail in list(item): + logger.debug(f"detail {detail.tag} => {detail.attrib} ({detail.text})") + if detail.text is not None: details[detail.tag] = detail.text if "category" not in details: - self.debug_print("No category in details?!") - self.debug_print(details) + logger.debug(f"{title}: no category in details?") return False - interesting = False for x in seattletimes_rss_renderer.interesting_categories: if x in details["category"]: - self.debug_print("%s looks like a good category." % x) + logger.debug(f"{x} looks like a good category.") interesting = True - if not interesting: - return False - - if 'enclosure' in details: - if 'pubDate' in details: - x = details['pubDate'] - x = x.rsplit(' ', 1)[0] - # Fri, 13 Nov 2015 10:07:00 - dt = datetime.datetime.strptime(x, '%a, %d %b %Y %H:%M:%S') - if dt < self.oldest: - self.debug_print("%s is too old." % ( - details["pubDate"])) - return False - return True + break + return interesting - def item_is_interesting_for_article(self, title, description, item): + def item_is_interesting_for_article( + self, title: str, description: str, item: xml.etree.ElementTree.Element + ) -> bool: + if self.is_item_older_than_n_days(item, 14): + logger.info(f"{title}: is too old to be interesting.") + return False return len(description) >= 65 -#x = seattletimes_rss_renderer({"Test", 123}, + +# Test +# x = seattletimes_rss_renderer({"Test", 123}, # "www.seattletimes.com", -# [ "/life/feed/" ], +# [ "/outdoors/feed/", '/live/feed/' ], # "nonnews") -#x.periodic_render("Fetch News") -#x.periodic_render("Shuffle News") +# x.periodic_render("Fetch News") +# x.periodic_render("Shuffle News")