ab59888cdedeae2d23575e48215f365d2d326692
[kiosk.git] / generic_news_rss_renderer.py
1 import file_writer
2 import grab_bag
3 import renderer
4 import http.client
5 import page_builder
6 import profanity_filter
7 import random
8 import re
9 import xml.etree.ElementTree as ET
10
11 class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer):
12     def __init__(self, name_to_timeout_dict, feed_site, feed_uris, page_title):
13         super(generic_news_rss_renderer, self).__init__(name_to_timeout_dict, False)
14         self.debug = 1
15         self.feed_site = feed_site
16         self.feed_uris = feed_uris
17         self.page_title = page_title
18         self.news = grab_bag.grab_bag()
19         self.details = grab_bag.grab_bag()
20         self.filter = profanity_filter.profanity_filter()
21
22     def debug_prefix(self):
23         pass
24
25     def get_headlines_page_prefix(self):
26         pass
27
28     def get_details_page_prefix(self):
29         pass
30
31     def should_use_https(self):
32         pass
33
34     def should_profanity_filter(self):
35         return False
36
37     def find_title(self, item):
38         return item.findtext('title')
39
40     def munge_title(self, title):
41         return title
42
43     def find_description(self, item):
44         return item.findtext('description')
45
46     def munge_description(self, description):
47         description = re.sub('<[^>]+>', '', description)
48         return description
49
50     def find_link(self, item):
51         return item.findtext('link')
52
53     def find_image(self, item):
54         return item.findtext('image')
55
56     def item_is_interesting_for_headlines(self, title, description, item):
57         pass
58
59     def item_is_interesting_for_article(self, title, description, item):
60         pass
61
62     def periodic_render(self, key):
63         if key == "Fetch News":
64             return self.fetch_news()
65         elif key == "Shuffle News":
66             return self.shuffle_news()
67         else:
68             raise error('Unexpected operation')
69
70     def shuffle_news(self):
71         headlines = page_builder.page_builder()
72         headlines.set_layout(page_builder.page_builder.LAYOUT_FOUR_ITEMS)
73         headlines.set_title("%s" % self.page_title)
74         subset = self.news.subset(4)
75         if subset is None:
76             self.debug_print("Not enough messages to choose from.")
77             return False
78         for msg in subset:
79             headlines.add_item(msg)
80         f = file_writer.file_writer('%s_4_none.html' % (
81             self.get_headlines_page_prefix()))
82         headlines.render_html(f)
83         f.close()
84
85         details = page_builder.page_builder()
86         details.set_layout(page_builder.page_builder.LAYOUT_ONE_ITEM)
87         details.set_title("%s" % self.page_title)
88         subset = self.details.subset(1)
89         if subset is None:
90             self.debug_print("Not enough details to choose from.");
91             return False
92         for msg in subset:
93             blurb = msg
94             blurb += b'</TD>\n'
95             details.add_item(blurb)
96         g = file_writer.file_writer('%s_6_none.html' % (
97             self.get_details_page_prefix()))
98         details.render_html(g)
99         g.close()
100         return True
101
102     def fetch_news(self):
103         count = 0
104         self.news.clear()
105         self.details.clear()
106
107         for uri in self.feed_uris:
108             if self.should_use_https():
109                 self.debug_print("Fetching: https://%s%s" % (self.feed_site, uri))
110                 self.conn = http.client.HTTPSConnection(self.feed_site)
111             else:
112                 self.debug_print("Fetching: http://%s%s" % (self.feed_site, uri))
113                 self.conn = http.client.HTTPConnection(self.feed_site)
114             self.conn.request(
115                 "GET",
116                 uri,
117                 None,
118                 {"Accept-Charset": "utf-8"})
119             response = self.conn.getresponse()
120             if response.status != 200:
121                 print(("%s: RSS fetch_news error, response: %d" % (self.page_title,
122                                                                   response.status)))
123                 self.debug_print(response.read())
124                 return False
125
126             rss = ET.fromstring(response.read())
127             channel = rss[0]
128             for item in channel.getchildren():
129                 title = self.find_title(item)
130                 if title is not None:
131                     title = self.munge_title(title)
132                 description = item.findtext('description')
133                 if description is not None:
134                     description = self.munge_description(description)
135                 link = item.findtext('link')
136                 image = item.findtext('image')
137
138                 if (title is None or
139                     not self.item_is_interesting_for_headlines(title,
140                                                                description,
141                                                                item)):
142                     self.debug_print('Item "%s" is not interesting' % title)
143                     continue
144
145                 if (self.should_profanity_filter() and
146                     (self.filter.contains_bad_words(title) or
147                     self.filter.contains_bad_words(description))):
148                     self.debug_print('Found bad words in item "%s"' % title)
149                     continue
150
151                 #print u"Title: %s\nDescription: %s\nLink: %s\nImage: %s\n" % (
152                 #    title, description, link, image)
153
154                 blurb = """<DIV style="padding:8px;
155                                  font-size:34pt;
156                                  -webkit-column-break-inside:avoid;">"""
157                 if image is not None:
158                     blurb += '<IMG SRC="%s" ALIGN=LEFT HEIGHT=115 style="padding:8px;">\n' % image
159                 blurb += '<P><B>%s</B>' % title
160
161                 if (description is not None and
162                     self.item_is_interesting_for_article(title, description, item)):
163                     longblurb = blurb
164                     longblurb += "<BR>"
165                     longblurb += description
166                     longblurb += "</DIV>"
167                     longblurb = longblurb.replace("font-size:34pt",
168                                                   "font-size:44pt")
169                     self.details.add(longblurb.encode('utf-8', errors='ignore'))
170
171                 blurb += "</DIV>"
172                 self.news.add(blurb.encode('utf-8', errors='ignore'))
173                 count += 1
174         return count > 0
175
176 # Test
177 #x = generic_news_rss_renderer(
178 #    {"Fetch News" : 1,
179 #     "Shuffle News" : 1},
180 #    "rss.cnn.com",
181 #    [ "/rss/generic_news_topstories.rss",
182 #      "/rss/money_latest.rss",
183 #      "/rss/generic_news_tech.rss",
184 #    ],
185 #    "Test" )
186 #if x.fetch_news() == 0:
187 #    print "Error fetching news, no items fetched."
188 #x.shuffle_news()