#!/usr/bin/env python3 import generic_news_rss_renderer import re from typing import Dict, List, Optional import xml class cnn_rss_renderer(generic_news_rss_renderer.generic_news_rss_renderer): def __init__( self, name_to_timeout_dict: Dict[str, int], feed_site: str, feed_uris: List[str], page_title: str, ): super().__init__(name_to_timeout_dict, feed_site, feed_uris, page_title) def get_headlines_page_prefix(self) -> str: return f"cnn-{self.page_title}" def get_details_page_prefix(self) -> str: return f"cnn-details-{self.page_title}" def munge_description( self, description: str, item: xml.etree.ElementTree.Element ) -> str: description = re.sub("[Rr]ead full story for latest details.", "", description) description = re.sub("<[^>]+>", "", description) return description def find_image(self, item: xml.etree.ElementTree.Element) -> Optional[str]: image = item.findtext("media:thumbnail") if image is not None: image_url = image.get("url") return image_url return None def should_use_https(self) -> bool: return False def item_is_interesting_for_headlines( self, title: str, description: str, item: xml.etree.ElementTree.Element ) -> bool: if self.is_item_older_than_n_days(item, 14, default=True): return False if re.search(r"[Cc][Nn][Nn][A-Za-z]*\.com", title) is None: return True return False def item_is_interesting_for_article( self, title, description, item: xml.etree.ElementTree.Element ): if self.is_item_older_than_n_days(item, 7): return False return ( re.search(r"[Cc][Nn][Nn][A-Za-z]*\.com", title) is None and len(description) >= 65 ) # Test # x = cnn_rss_renderer( # { # "Fetch News" : 1, # "Shuffle News" : 1 # }, # "rss.cnn.com", # [ # "/rss/money_technology.rss", # "/rss/cnn_tech.rss", # "/rss/cnn_topstories.rss", # "/rss/cnn_world.rss", # "/rss/cnn_us.rss", # ], # "Test" # ) # if x.fetch_news() == 0: # print("Error fetching news, no items fetched.") # x.shuffle_news()