from bs4 import BeautifulSoup import generic_news_rss_renderer import re class google_news_rss_renderer(generic_news_rss_renderer.generic_news_rss_renderer): def __init__(self, name_to_timeout_dict, feed_site, feed_uris, page_title): super(google_news_rss_renderer, self).__init__( name_to_timeout_dict, feed_site, feed_uris, page_title) self.debug = 1 def debug_prefix(self): return "google-news" def get_headlines_page_prefix(self): return "google-news" def get_details_page_prefix(self): return "google-news-details" def find_description(self, item): descr = item.findtext('description') source = item.findtext('source') if source is not None: descr = descr + " (%s)" % source return descr def munge_description(self, description): soup = BeautifulSoup(description) for a in soup.findAll('a'): del a['href'] descr = str(soup) if len(descr) > 400: descr = descr[:400] descr = descr + " [...]" return descr def find_image(self, item): return None def should_use_https(self): return True def item_is_interesting_for_headlines(self, title, description, item): return not self.is_item_older_than_n_days(item, 2) def item_is_interesting_for_article(self, title, description, item): return not self.is_item_older_than_n_days(item, 2) # Test #x = google_news_rss_renderer( # {"Fetch News" : 1, # "Shuffle News" : 1}, # "news.google.com", # [ "/rss?hl=en-US&gl=US&ceid=US:en" ], # "Test" ) #if x.fetch_news() == 0: # print("Error fetching news, no items fetched.") #x.shuffle_news()