3 from abc import abstractmethod
5 from dateutil.parser import parse
11 from typing import Dict, List, Optional, Union
12 import xml.etree.ElementTree as ET
18 import profanity_filter
21 class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer):
24 name_to_timeout_dict: Dict[str, int],
29 super(generic_news_rss_renderer, self).__init__(name_to_timeout_dict, False)
31 self.feed_site = feed_site
32 self.feed_uris = feed_uris
33 self.page_title = page_title
34 self.news = grab_bag.grab_bag()
35 self.details = grab_bag.grab_bag()
36 self.filter = profanity_filter.ProfanityFilter()
39 def debug_prefix(self) -> str:
43 def get_headlines_page_prefix(self) -> str:
47 def get_details_page_prefix(self) -> str:
50 def get_headlines_page_priority(self) -> str:
53 def get_details_page_priority(self) -> str:
57 def should_use_https(self) -> bool:
60 def should_profanity_filter(self) -> bool:
63 def find_title(self, item: ET.Element) -> Optional[str]:
64 return item.findtext("title")
66 def munge_title(self, title: str, item: ET.Element) -> str:
69 def find_description(self, item: ET.Element) -> Optional[str]:
70 return item.findtext("description")
72 def munge_description(
77 description = re.sub("<[^>]+>", "", description)
80 def find_link(self, item: ET.Element) -> Optional[str]:
81 return item.findtext("link")
83 def munge_link(self, link: str) -> str:
86 def find_image(self, item: ET.Element) -> Optional[str]:
87 return item.findtext("image")
89 def munge_image(self, image: str) -> str:
92 def find_pubdate(self, item: ET.Element) -> Optional[str]:
93 return item.findtext("pubDate")
95 def munge_pubdate(self, pubdate: str) -> str:
98 def item_is_interesting_for_headlines(
99 self, title: str, description: str, item: ET.Element
103 def do_headlines(self) -> bool:
106 def do_details(self) -> bool:
109 def is_item_older_than_n_days(self, item: ET.Element, n: int) -> bool:
110 pubdate = self.find_pubdate(item)
113 pubdatetime = parse(pubdate)
114 tzinfo = pubdatetime.tzinfo
115 now = datetime.datetime.now(tzinfo)
116 delta = (now - pubdatetime).total_seconds() / (60 * 60 * 24)
119 def item_is_interesting_for_article(
120 self, title: str, description: str, item: ET.Element
124 def periodic_render(self, key: str) -> bool:
125 if key == "Fetch News":
126 return self.fetch_news()
127 elif key == "Shuffle News":
128 return self.shuffle_news()
132 def shuffle_news(self) -> bool:
133 if self.do_headlines():
134 headlines = page_builder.page_builder()
135 headlines.set_layout(page_builder.page_builder.LAYOUT_FOUR_ITEMS)
136 headlines.set_title("%s" % self.page_title)
137 subset = self.news.subset(4)
139 self.debug_print("Not enough messages to choose from.")
142 headlines.add_item(msg)
143 headlines.set_custom_html(
148 text-decoration: none;
153 text-decoration: none;
158 text-decoration: none;
163 _ = f"{self.get_headlines_page_prefix()}_{self.get_headlines_page_priority()}_25900.html"
164 with file_writer.file_writer(_) as f:
165 headlines.render_html(f)
167 if self.do_details():
168 details = page_builder.page_builder()
169 details.set_layout(page_builder.page_builder.LAYOUT_ONE_ITEM)
170 details.set_custom_html(
175 text-decoration: none;
180 text-decoration: none;
185 text-decoration: none;
190 details.set_title(f"{self.page_title}")
191 subset = self.details.subset(1)
193 self.debug_print("Not enough details to choose from.")
198 details.add_item(blurb)
199 _ = f"{self.get_details_page_prefix()}_{self.get_details_page_priority()}_86400.html"
200 with file_writer.file_writer(_) as g:
201 details.render_html(g)
204 def fetch_news(self) -> bool:
208 self.conn: Optional[Union[http.client.HTTPConnection,
209 http.client.HTTPSConnection]] = None
211 for uri in self.feed_uris:
212 if self.should_use_https():
213 self.debug_print("Fetching: https://%s%s" % (self.feed_site, uri))
214 self.conn = http.client.HTTPSConnection(self.feed_site, timeout=10)
216 self.debug_print("Fetching: http://%s%s" % (self.feed_site, uri))
217 self.conn = http.client.HTTPConnection(self.feed_site, timeout=10)
218 assert(self.conn is not None)
225 # "Cache-control": "max-age=50",
226 # "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36",
230 response = self.conn.getresponse()
231 except Exception as e:
232 traceback.print_exc(file=sys.stdout)
234 f"Exception in generic RSS renderer HTTP connection fetching {self.feed_site}{uri}"
238 if response.status != 200:
240 f"{self.page_title}: RSS fetch_news error, response: {response.status}"
242 self.debug_print(str(response.read()))
245 rss = ET.fromstring(response.read())
248 for item in channel.getchildren():
249 title = self.find_title(item)
250 description = item.findtext("description")
251 if title is not None:
252 title = self.munge_title(title, item)
253 if description is not None:
254 description = self.munge_description(description, item)
257 image = self.find_image(item)
258 if image is not None:
259 image = self.munge_image(image)
260 link = item.findtext("link")
262 link = self.munge_link(link)
264 if title is None or not self.item_is_interesting_for_headlines(
265 title, description, item
267 self.debug_print(f'Item "{title}" is not interesting')
270 if self.should_profanity_filter() and (
271 self.filter.contains_bad_word(title)
272 or self.filter.contains_bad_word(description)
274 self.debug_print(f'Found bad words in item "{title}"')
277 if title in title_filter:
278 self.debug_print(f'Already saw title {title}, skipping.')
279 title_filter.add(title)
281 blurb = """<DIV style="padding:8px;
283 -webkit-column-break-inside:avoid;">"""
284 if image is not None:
285 blurb += f'<IMG SRC="{image}" ALIGN=LEFT HEIGHT=115 '
286 blurb += 'style="padding:8px;">'
289 blurb += f"<P><B>{title}</B>"
291 blurb += f'<P><B><A HREF="{link}">{title}</A></B>'
293 pubdate = self.find_pubdate(item)
294 if pubdate is not None:
295 pubdate = self.munge_pubdate(pubdate)
297 blurb += f' <FONT COLOR=#cccccc>{ts.strftime("%b %d")}</FONT>'
299 if self.item_is_interesting_for_article(title, description, item):
302 longblurb += description
303 longblurb += "</DIV>"
304 longblurb = longblurb.replace("font-size:34pt", "font-size:44pt")
305 self.details.add(longblurb)