X-Git-Url: https://wannabe.guru.org/gitweb/?a=blobdiff_plain;f=cnn_rss_renderer.py;h=a4c8945bad66d066d00d1b3a203a9f7bc5506b79;hb=addd4980077f6e3857c5c035b49784dc3ceca49a;hp=a93b4917d97ac169165fe7fc21f1c16bc2ec677b;hpb=653093921875389e79c9a37ad97cc115a99dfc22;p=kiosk.git diff --git a/cnn_rss_renderer.py b/cnn_rss_renderer.py index a93b491..a4c8945 100644 --- a/cnn_rss_renderer.py +++ b/cnn_rss_renderer.py @@ -1,41 +1,64 @@ +#!/usr/bin/env python3 + import generic_news_rss_renderer import re +from typing import Dict, List, Optional +import xml + class cnn_rss_renderer(generic_news_rss_renderer.generic_news_rss_renderer): - def __init__(self, name_to_timeout_dict, feed_site, feed_uris, page_title): - super(cnn_rss_renderer, self).__init__( - name_to_timeout_dict, - feed_site, - feed_uris, - page_title) - self.debug = 1 - - def debug_prefix(self): - return "cnn(%s)" % (self.page_title) - - def get_headlines_page_prefix(self): - return "cnn-%s" % (self.page_title) - - def get_details_page_prefix(self): - return "cnn-details-%s" % (self.page_title) - - def munge_description(self, description): - description = re.sub('[Rr]ead full story for latest details.', '', description) - description = re.sub('<[^>]+>', '', description) + def __init__( + self, + name_to_timeout_dict: Dict[str, int], + feed_site: str, + feed_uris: List[str], + page_title: str, + ): + super().__init__( + name_to_timeout_dict, feed_site, feed_uris, page_title + ) + + def get_headlines_page_prefix(self) -> str: + return f"cnn-{self.page_title}" + + def get_details_page_prefix(self) -> str: + return f"cnn-details-{self.page_title}" + + def munge_description(self, description: str, item: xml.etree.ElementTree.Element) -> str: + description = re.sub("[Rr]ead full story for latest details.", "", description) + description = re.sub("<[^>]+>", "", description) return description - def should_use_https(self): + def find_image(self, item: xml.etree.ElementTree.Element) -> Optional[str]: + image = item.findtext("media:thumbnail") + if image is not None: + image_url = image.get("url") + return image_url + return None + + def should_use_https(self) -> bool: return False - def item_is_interesting_for_headlines(self, title, description, item): - return re.search(r'[Cc][Nn][Nn][A-Za-z]*\.com', title) is None + def item_is_interesting_for_headlines( + self, title: str, description: str, item: xml.etree.ElementTree.Element + ) -> bool: + if self.is_item_older_than_n_days(item, 14): + return False + return re.search(r"[Cc][Nn][Nn][A-Za-z]*\.com", title) is None + + def item_is_interesting_for_article( + self, title, description, item: xml.etree.ElementTree.Element + ): + if self.is_item_older_than_n_days(item, 7): + return False + return ( + re.search(r"[Cc][Nn][Nn][A-Za-z]*\.com", title) is None + and len(description) >= 65 + ) - def item_is_interesting_for_article(self, title, description, item): - return (re.search(r'[Cc][Nn][Nn][A-Za-z]*\.com', title) is None and - len(description) >= 65) # Test -#x = cnn_rss_renderer( +# x = cnn_rss_renderer( # {"Fetch News" : 1, # "Shuffle News" : 1}, # "rss.cnn.com", @@ -44,6 +67,6 @@ class cnn_rss_renderer(generic_news_rss_renderer.generic_news_rss_renderer): # "/rss/cnn_tech.rss", # ], # "Test" ) -#if x.fetch_news() == 0: +# if x.fetch_news() == 0: # print("Error fetching news, no items fetched.") -#x.shuffle_news() +# x.shuffle_news()