import generic_news_rss_renderer import re class cnn_rss_renderer(generic_news_rss_renderer.generic_news_rss_renderer): def __init__(self, name_to_timeout_dict, feed_site, feed_uris, page_title): super(cnn_rss_renderer, self).__init__( name_to_timeout_dict, feed_site, feed_uris, page_title) self.debug = 1 def debug_prefix(self): return "cnn(%s)" % (self.page_title) def get_headlines_page_prefix(self): return "cnn-%s" % (self.page_title) def get_details_page_prefix(self): return "cnn-details-%s" % (self.page_title) def munge_description(self, description): description = re.sub('[Rr]ead full story for latest details.', '', description) description = re.sub('<[^>]+>', '', description) return description def find_image(self, item): image = item.findtext('media:thumbnail') if image is not None: image_url = image.get('url') return image_url return None def should_use_https(self): return False def item_is_interesting_for_headlines(self, title, description, item): if self.is_item_older_than_n_days(item, 7): return False return re.search(r'[Cc][Nn][Nn][A-Za-z]*\.com', title) is None def item_is_interesting_for_article(self, title, description, item): if self.is_item_older_than_n_days(item, 7): return False return (re.search(r'[Cc][Nn][Nn][A-Za-z]*\.com', title) is None and len(description) >= 65) # Test #x = cnn_rss_renderer( # {"Fetch News" : 1, # "Shuffle News" : 1}, # "rss.cnn.com", # [ "/rss/cnn_topstories.rss", # "/rss/money_latest.rss", # "/rss/cnn_tech.rss", # ], # "Test" ) #if x.fetch_news() == 0: # print("Error fetching news, no items fetched.") #x.shuffle_news()