generic_news_rss_renderer.py

   1 import datetime
   2 from dateutil.parser import parse
   3 import file_writer
   4 import grab_bag
   5 import renderer
   6 import http.client
   7 import page_builder
   8 import profanity_filter
   9 import random
  10 import re
  11 import xml.etree.ElementTree as ET
  12
  13 class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer):
  14     def __init__(self, name_to_timeout_dict, feed_site, feed_uris, page_title):
  15         super(generic_news_rss_renderer, self).__init__(name_to_timeout_dict,
  16                                                         False)
  17         self.debug = 1
  18         self.feed_site = feed_site
  19         self.feed_uris = feed_uris
  20         self.page_title = page_title
  21         self.news = grab_bag.grab_bag()
  22         self.details = grab_bag.grab_bag()
  23         self.filter = profanity_filter.profanity_filter()
  24
  25     def debug_prefix(self):
  26         pass
  27
  28     def get_headlines_page_prefix(self):
  29         pass
  30
  31     def get_details_page_prefix(self):
  32         pass
  33
  34     def should_use_https(self):
  35         pass
  36
  37     def should_profanity_filter(self):
  38         return False
  39
  40     def find_title(self, item):
  41         return item.findtext('title')
  42
  43     def munge_title(self, title):
  44         return title
  45
  46     def find_description(self, item):
  47         return item.findtext('description')
  48
  49     def munge_description(self, description):
  50         description = re.sub('<[^>]+>', '', description)
  51         return description
  52
  53     def find_link(self, item):
  54         return item.findtext('link')
  55
  56     def munge_link(self, link):
  57         return link
  58
  59     def find_image(self, item):
  60         return item.findtext('image')
  61
  62     def munge_image(self, image):
  63         return image
  64
  65     def item_is_interesting_for_headlines(self, title, description, item):
  66         return True
  67
  68     def is_item_older_than_n_days(self, item, n):
  69         pubdate = item.findtext('pubDate')
  70         if pubdate is not None:
  71             pubdate = parse(pubdate)
  72             tzinfo = pubdate.tzinfo
  73             now = datetime.datetime.now(tzinfo)
  74             delta = (now - pubdate).total_seconds() / (60 * 60 * 24)
  75             if (delta > n):
  76                 return True
  77         return False
  78
  79     def item_is_interesting_for_article(self, title, description, item):
  80         return True
  81
  82     def periodic_render(self, key):
  83         if key == "Fetch News":
  84             return self.fetch_news()
  85         elif key == "Shuffle News":
  86             return self.shuffle_news()
  87         else:
  88             raise error('Unexpected operation')
  89
  90     def shuffle_news(self):
  91         headlines = page_builder.page_builder()
  92         headlines.set_layout(page_builder.page_builder.LAYOUT_FOUR_ITEMS)
  93         headlines.set_title("%s" % self.page_title)
  94         subset = self.news.subset(4)
  95         if subset is None:
  96             self.debug_print("Not enough messages to choose from.")
  97             return False
  98         for msg in subset:
  99             headlines.add_item(msg)
 100         headlines.set_custom_html("""
 101 <STYLE>
 102 a:link {
 103   color: maroon;
 104   text-decoration: none;
 105   font-weight: bold;
 106 }
 107 a:visited {
 108   color: maroon;
 109   text-decoration: none;
 110   font-weight: bold;
 111 }
 112 a:active {
 113   color: maroon;
 114   text-decoration: none;
 115   font-weight: bold;
 116 }
 117 </STYLE>""")
 118         f = file_writer.file_writer('%s_4_none.html' % (
 119             self.get_headlines_page_prefix()))
 120         headlines.render_html(f)
 121         f.close()
 122
 123         details = page_builder.page_builder()
 124         details.set_layout(page_builder.page_builder.LAYOUT_ONE_ITEM)
 125         details.set_custom_html("""
 126 <STYLE>
 127 a:link {
 128   color: maroon;
 129   text-decoration: none;
 130   font-weight: bold;
 131 }
 132 a:visited {
 133   color: maroon;
 134   text-decoration: none;
 135   font-weight: bold;
 136 }
 137 a:active {
 138   color: maroon;
 139   text-decoration: none;
 140   font-weight: bold;
 141 }
 142 </STYLE>""")
 143         details.set_title("%s" % self.page_title)
 144         subset = self.details.subset(1)
 145         if subset is None:
 146             self.debug_print("Not enough details to choose from.");
 147             return False
 148         for msg in subset:
 149             blurb = msg
 150             blurb += u'</TD>'
 151             details.add_item(blurb)
 152         g = file_writer.file_writer('%s_6_none.html' % (
 153             self.get_details_page_prefix()))
 154         details.render_html(g)
 155         g.close()
 156         return True
 157
 158     def fetch_news(self):
 159         count = 0
 160         self.news.clear()
 161         self.details.clear()
 162
 163         for uri in self.feed_uris:
 164             if self.should_use_https():
 165                 self.debug_print("Fetching: https://%s%s" % (self.feed_site, uri))
 166                 self.conn = http.client.HTTPSConnection(self.feed_site)
 167             else:
 168                 self.debug_print("Fetching: http://%s%s" % (self.feed_site, uri))
 169                 self.conn = http.client.HTTPConnection(self.feed_site)
 170             self.conn.request(
 171                 "GET",
 172                 uri,
 173                 None,
 174                 {"Accept-Charset": "utf-8"})
 175             response = self.conn.getresponse()
 176             if response.status != 200:
 177                 print(("%s: RSS fetch_news error, response: %d" % (self.page_title,
 178                                                                   response.status)))
 179                 self.debug_print(response.read())
 180                 return False
 181
 182             rss = ET.fromstring(response.read())
 183             channel = rss[0]
 184             for item in channel.getchildren():
 185                 title = self.find_title(item)
 186                 if title is not None:
 187                     title = self.munge_title(title)
 188                 description = item.findtext('description')
 189                 if description is not None:
 190                     description = self.munge_description(description)
 191                 image = self.find_image(item)
 192                 if image is not None:
 193                     image = self.munge_image(image)
 194                 link = item.findtext('link')
 195                 if link is not None:
 196                     link = self.munge_link(link)
 197
 198                 if (title is None or
 199                     not self.item_is_interesting_for_headlines(title,
 200                                                                description,
 201                                                                item)):
 202                     self.debug_print('Item "%s" is not interesting' % title)
 203                     continue
 204
 205                 if (self.should_profanity_filter() and
 206                     (self.filter.contains_bad_words(title) or
 207                     self.filter.contains_bad_words(description))):
 208                     self.debug_print('Found bad words in item "%s"' % title)
 209                     continue
 210
 211                 #print u"Title: %s\nDescription: %s\nLink: %s\nImage: %s\n" % (
 212                 #    title, description, link, image)
 213
 214                 blurb = u"""<DIV style="padding:8px;
 215                                  font-size:34pt;
 216                                  -webkit-column-break-inside:avoid;">"""
 217                 if image is not None:
 218                     blurb += u'<IMG SRC="%s" ALIGN=LEFT HEIGHT=115 ' % image
 219                     blurb += u'style="padding:8px;">'
 220
 221                 if link is None:
 222                     blurb += u'<P><B>%s</B>' % title
 223                 else:
 224                     blurb += u'<P><B><A HREF="%s">%s</A></B>' % (link, title)
 225
 226                 if (description is not None and
 227                     self.item_is_interesting_for_article(title,
 228                                                          description,
 229                                                          item)):
 230                     longblurb = blurb
 231                     longblurb += u"<BR>"
 232                     longblurb += description
 233                     longblurb += u"</DIV>"
 234                     longblurb = longblurb.replace("font-size:34pt",
 235                                                   "font-size:44pt")
 236                     self.details.add(longblurb)
 237
 238                 blurb += u"</DIV>"
 239                 self.news.add(blurb)
 240                 count += 1
 241         return count > 0