import datetime import generic_news_rss_renderer as gnrss class seattletimes_rss_renderer(gnrss.generic_news_rss_renderer): interesting_categories = frozenset([ 'Nation', 'World', 'Life', 'Technology' 'Local News', 'Food', 'Drink', 'Today File', 'Seahawks', 'Oddities', 'Packfic NW', 'Home', 'Garden', 'Travel', 'Outdoors', ]) def __init__(self, name_to_timeout_dict, feed_site, feed_uris, page_title): super(seattletimes_rss_renderer, self).__init__( name_to_timeout_dict, feed_site, feed_uris, page_title) self.oldest = datetime.datetime.now() - datetime.timedelta(14) self.debug_print("oldest story we'll keep: %s" % self.oldest) def debug_prefix(self): return "seattletimes" def get_headlines_page_prefix(self): return "seattletimes-nonnews" def get_details_page_prefix(self): return "seattletimes-details-nonnews" def should_use_https(self): return True def item_is_interesting_for_headlines(self, title, description, item): if item.tag != "item": self.debug_print("Item.tag isn't item?!") return False details = {} for detail in item.getchildren(): self.debug_print("detail %s => %s (%s)" % (detail.tag, detail.attrib, detail.text)) if detail.text != None: details[detail.tag] = detail.text if "category" not in details: self.debug_print("No category in details?!") self.debug_print(details) return False interesting = False for x in seattletimes_rss_renderer.interesting_categories: if x in details["category"]: self.debug_print("%s looks like a good category." % x) interesting = True if not interesting: return False if 'enclosure' in details: if 'pubDate' in details: x = details['pubDate'] x = x.rsplit(' ', 1)[0] # Fri, 13 Nov 2015 10:07:00 dt = datetime.datetime.strptime(x, '%a, %d %b %Y %H:%M:%S') if dt < self.oldest: self.debug_print("%s is too old." % ( details["pubDate"])) return False return True def item_is_interesting_for_article(self, title, description, item): return len(description) >= 65 #x = seattletimes_rss_renderer({"Test", 123}, # "www.seattletimes.com", # [ "/life/feed/" ], # "nonnews") #x.periodic_render("Fetch News") #x.periodic_render("Shuffle News")