2 from dateutil.parser import parse
8 import profanity_filter
11 import xml.etree.ElementTree as ET
13 class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer):
14 def __init__(self, name_to_timeout_dict, feed_site, feed_uris, page_title):
15 super(generic_news_rss_renderer, self).__init__(name_to_timeout_dict,
18 self.feed_site = feed_site
19 self.feed_uris = feed_uris
20 self.page_title = page_title
21 self.news = grab_bag.grab_bag()
22 self.details = grab_bag.grab_bag()
23 self.filter = profanity_filter.profanity_filter()
25 def debug_prefix(self):
28 def get_headlines_page_prefix(self):
31 def get_details_page_prefix(self):
34 def should_use_https(self):
37 def should_profanity_filter(self):
40 def find_title(self, item):
41 return item.findtext('title')
43 def munge_title(self, title):
46 def find_description(self, item):
47 return item.findtext('description')
49 def munge_description(self, description):
50 description = re.sub('<[^>]+>', '', description)
53 def find_link(self, item):
54 return item.findtext('link')
56 def munge_link(self, link):
59 def find_image(self, item):
60 return item.findtext('image')
62 def munge_image(self, image):
65 def item_is_interesting_for_headlines(self, title, description, item):
68 def is_item_older_than_n_days(self, item, n):
69 pubdate = item.findtext('pubDate')
70 if pubdate is not None:
71 pubdate = parse(pubdate)
72 tzinfo = pubdate.tzinfo
73 now = datetime.datetime.now(tzinfo)
74 delta = (now - pubdate).total_seconds() / (60 * 60 * 24)
79 def item_is_interesting_for_article(self, title, description, item):
82 def periodic_render(self, key):
83 if key == "Fetch News":
84 return self.fetch_news()
85 elif key == "Shuffle News":
86 return self.shuffle_news()
88 raise error('Unexpected operation')
90 def shuffle_news(self):
91 headlines = page_builder.page_builder()
92 headlines.set_layout(page_builder.page_builder.LAYOUT_FOUR_ITEMS)
93 headlines.set_title("%s" % self.page_title)
94 subset = self.news.subset(4)
96 self.debug_print("Not enough messages to choose from.")
99 headlines.add_item(msg)
100 headlines.set_custom_html("""
104 text-decoration: none;
109 text-decoration: none;
114 text-decoration: none;
118 f = file_writer.file_writer('%s_4_none.html' % (
119 self.get_headlines_page_prefix()))
120 headlines.render_html(f)
123 details = page_builder.page_builder()
124 details.set_layout(page_builder.page_builder.LAYOUT_ONE_ITEM)
125 details.set_custom_html("""
129 text-decoration: none;
134 text-decoration: none;
139 text-decoration: none;
143 details.set_title("%s" % self.page_title)
144 subset = self.details.subset(1)
146 self.debug_print("Not enough details to choose from.");
151 details.add_item(blurb)
152 g = file_writer.file_writer('%s_6_none.html' % (
153 self.get_details_page_prefix()))
154 details.render_html(g)
158 def fetch_news(self):
163 for uri in self.feed_uris:
164 if self.should_use_https():
165 self.debug_print("Fetching: https://%s%s" % (self.feed_site, uri))
166 self.conn = http.client.HTTPSConnection(self.feed_site)
168 self.debug_print("Fetching: http://%s%s" % (self.feed_site, uri))
169 self.conn = http.client.HTTPConnection(self.feed_site)
174 {"Accept-Charset": "utf-8"})
175 response = self.conn.getresponse()
176 if response.status != 200:
177 print(("%s: RSS fetch_news error, response: %d" % (self.page_title,
179 self.debug_print(response.read())
182 rss = ET.fromstring(response.read())
184 for item in channel.getchildren():
185 title = self.find_title(item)
186 if title is not None:
187 title = self.munge_title(title)
188 description = item.findtext('description')
189 if description is not None:
190 description = self.munge_description(description)
191 image = self.find_image(item)
192 if image is not None:
193 image = self.munge_image(image)
194 link = item.findtext('link')
196 link = self.munge_link(link)
199 not self.item_is_interesting_for_headlines(title,
202 self.debug_print('Item "%s" is not interesting' % title)
205 if (self.should_profanity_filter() and
206 (self.filter.contains_bad_words(title) or
207 self.filter.contains_bad_words(description))):
208 self.debug_print('Found bad words in item "%s"' % title)
211 #print u"Title: %s\nDescription: %s\nLink: %s\nImage: %s\n" % (
212 # title, description, link, image)
214 blurb = u"""<DIV style="padding:8px;
216 -webkit-column-break-inside:avoid;">"""
217 if image is not None:
218 blurb += u'<IMG SRC="%s" ALIGN=LEFT HEIGHT=115 ' % image
219 blurb += u'style="padding:8px;">'
222 blurb += u'<P><B>%s</B>' % title
224 blurb += u'<P><B><A HREF="%s">%s</A></B>' % (link, title)
226 if (description is not None and
227 self.item_is_interesting_for_article(title,
232 longblurb += description
233 longblurb += u"</DIV>"
234 longblurb = longblurb.replace("font-size:34pt",
236 self.details.add(longblurb)