Cleanup and improve the RSS stuff.
[kiosk.git] / seattletimes_rss_renderer.py
1 import datetime
2 import generic_news_rss_renderer as gnrss
3
4 class seattletimes_rss_renderer(gnrss.generic_news_rss_renderer):
5     interesting_categories = frozenset([
6         'Nation',
7         'World',
8         'Life',
9         'Technology'
10         'Local News',
11         'Food',
12         'Drink',
13         'Today File',
14         'Seahawks',
15         'Oddities',
16         'Packfic NW',
17         'Home',
18         'Garden',
19         'Travel',
20         'Outdoors',
21     ])
22
23     def __init__(self, name_to_timeout_dict, feed_site, feed_uris, page_title):
24         super(seattletimes_rss_renderer, self).__init__(
25             name_to_timeout_dict,
26             feed_site,
27             feed_uris,
28             page_title)
29
30     def debug_prefix(self):
31         return "seattletimes"
32
33     def get_headlines_page_prefix(self):
34         return "seattletimes-nonnews"
35
36     def get_details_page_prefix(self):
37         return "seattletimes-details-nonnews"
38
39     def should_use_https(self):
40         return True
41
42     def item_is_interesting_for_headlines(self, title, description, item):
43         if item.tag != "item":
44             self.debug_print("Item.tag isn't item?!")
45             return False
46         if self.is_item_older_than_n_days(item, 14):
47             return False
48
49         details = {}
50         for detail in item.getchildren():
51             self.debug_print("detail %s => %s (%s)" % (detail.tag,
52                                                        detail.attrib,
53                                                        detail.text))
54             if detail.text != None:
55                 details[detail.tag] = detail.text
56         if "category" not in details:
57             self.debug_print("No category in details?!")
58             self.debug_print(details)
59             return False
60
61         interesting = False
62         for x in seattletimes_rss_renderer.interesting_categories:
63             if x in details["category"]:
64                 self.debug_print("%s looks like a good category." % x)
65                 interesting = True
66         if not interesting:
67             return False
68
69         if 'enclosure' in details:
70             if 'pubDate' in details:
71                 x = details['pubDate']
72                 x = x.rsplit(' ', 1)[0]
73                 # Fri, 13 Nov 2015 10:07:00
74                 dt = datetime.datetime.strptime(x, '%a, %d %b %Y %H:%M:%S')
75                 if dt < self.oldest:
76                     self.debug_print("%s is too old." % (
77                         details["pubDate"]))
78                     return False
79         return True
80
81     def item_is_interesting_for_article(self, title, description, item):
82         if self.is_item_older_than_n_days(item, 14):
83             return False
84         return len(description) >= 65
85
86 #x = seattletimes_rss_renderer({"Test", 123},
87 #                              "www.seattletimes.com",
88 #                              [ "/life/feed/" ],
89 #                              "nonnews")
90 #x.periodic_render("Fetch News")
91 #x.periodic_render("Shuffle News")