3 from abc import abstractmethod
5 from dateutil.parser import parse
9 from typing import Dict, List, Optional, Union
10 import xml.etree.ElementTree as ET
12 from scottutilz import profanity_filter
20 logger = logging.getLogger(__file__)
23 class generic_news_rss_renderer(renderer.abstaining_renderer):
26 name_to_timeout_dict: Dict[str, int],
31 super().__init__(name_to_timeout_dict)
32 self.feed_site = feed_site
33 self.feed_uris = feed_uris
34 self.page_title = page_title
35 self.news = grab_bag.grab_bag()
36 self.details = grab_bag.grab_bag()
37 self.filter = profanity_filter.ProfanityFilter()
40 def get_headlines_page_prefix(self) -> str:
44 def get_details_page_prefix(self) -> str:
47 def get_headlines_page_priority(self) -> str:
50 def get_details_page_priority(self) -> str:
54 def should_use_https(self) -> bool:
57 def should_profanity_filter(self) -> bool:
60 def find_title(self, item: ET.Element) -> Optional[str]:
61 return item.findtext("title")
63 def munge_title(self, title: str, item: ET.Element) -> str:
66 def find_description(self, item: ET.Element) -> Optional[str]:
67 return item.findtext("description")
69 def munge_description(
74 description = re.sub("<[^>]+>", "", description)
77 def find_link(self, item: ET.Element) -> Optional[str]:
78 return item.findtext("link")
80 def munge_link(self, link: str) -> str:
83 def find_image(self, item: ET.Element) -> Optional[str]:
84 return item.findtext("image")
86 def munge_image(self, image: str) -> str:
89 def find_pubdate(self, item: ET.Element) -> Optional[str]:
90 return item.findtext("pubDate")
92 def munge_pubdate(self, pubdate: str) -> str:
95 def item_is_interesting_for_headlines(
96 self, title: str, description: str, item: ET.Element
100 def do_headlines(self) -> bool:
103 def do_details(self) -> bool:
106 def is_item_older_than_n_days(self, item: ET.Element, n: int) -> bool:
107 pubdate = self.find_pubdate(item)
110 pubdatetime = parse(pubdate)
111 tzinfo = pubdatetime.tzinfo
112 now = datetime.datetime.now(tzinfo)
113 delta = (now - pubdatetime).total_seconds() / (60 * 60 * 24)
116 def item_is_interesting_for_article(
117 self, title: str, description: str, item: ET.Element
121 def periodic_render(self, key: str) -> bool:
122 if key == "Fetch News":
123 return self.fetch_news()
124 elif key == "Shuffle News":
125 return self.shuffle_news()
129 def shuffle_news(self) -> bool:
130 if self.do_headlines():
131 headlines = page_builder.page_builder()
132 headlines.set_layout(page_builder.page_builder.LAYOUT_FOUR_ITEMS)
133 headlines.set_title("%s" % self.page_title)
134 subset = self.news.subset(4)
136 logger.warning('Not enough messages to select from in shuffle_news?!')
139 headlines.add_item(msg)
140 headlines.set_custom_html(
145 text-decoration: none;
150 text-decoration: none;
155 text-decoration: none;
160 _ = f"{self.get_headlines_page_prefix()}_{self.get_headlines_page_priority()}_25900.html"
161 with file_writer.file_writer(_) as f:
162 headlines.render_html(f)
164 if self.do_details():
165 details = page_builder.page_builder()
166 details.set_layout(page_builder.page_builder.LAYOUT_ONE_ITEM)
167 details.set_custom_html(
172 text-decoration: none;
177 text-decoration: none;
182 text-decoration: none;
187 details.set_title(self.page_title)
188 subset = self.details.subset(1)
190 logger.warning('Not enough details to choose from in do_details')
191 logger.debug("Not enough details to choose from.")
196 details.add_item(blurb)
197 _ = f"{self.get_details_page_prefix()}_{self.get_details_page_priority()}_86400.html"
198 with file_writer.file_writer(_) as g:
199 details.render_html(g)
202 def fetch_news(self) -> bool:
206 self.conn: Optional[Union[http.client.HTTPConnection,
207 http.client.HTTPSConnection]] = None
209 for uri in self.feed_uris:
211 if self.should_use_https():
212 url = f'https://{self.feed_site}{uri}'
213 logger.info(f'Fetching: {url}')
214 self.conn = http.client.HTTPSConnection(self.feed_site, timeout=10)
216 url = f'http://{self.feed_site}{uri}'
217 logger.info(f'Fetching: {url}')
218 self.conn = http.client.HTTPConnection(self.feed_site, timeout=10)
219 assert self.conn is not None
220 assert url is not None
227 "Cache-control": "max-age=50",
231 response = self.conn.getresponse()
232 except Exception as e:
235 f"Exception in generic RSS renderer HTTP connection fetching {url}; giving up."
239 if response.status != 200:
241 f'Unexpected status {response.status} while fetching {url}; giving up.'
245 raw = response.read()
246 logger.info(f'Status 200: got {len(raw)} bytes back from {url}')
247 rss = ET.fromstring(raw)
250 for item in list(channel):
251 title = self.find_title(item)
252 description = item.findtext("description")
253 if title is not None:
254 title = self.munge_title(title, item)
256 logger.info('Skipping RSS feed item with no title.')
258 logger.debug(f'Considering RSS item {title}...')
259 if description is not None:
260 description = self.munge_description(description, item)
263 image = self.find_image(item)
264 if image is not None:
265 image = self.munge_image(image)
266 link = item.findtext("link")
268 link = self.munge_link(link)
269 if not self.item_is_interesting_for_headlines(
270 title, description, item
272 logger.info(f'Skipping {title} because it\'s not interesting.')
275 if self.should_profanity_filter() and (
276 self.filter.contains_bad_word(title)
277 or self.filter.contains_bad_word(description)
279 logger.info(f'Skipping {title} because it contains profanity.')
282 if title in title_filter:
283 logger.info(f'Skipping {title} because we already saw an item with the same title.')
285 title_filter.add(title)
287 blurb = """<DIV style="padding:8px;
289 -webkit-column-break-inside:avoid;">"""
290 if image is not None:
291 blurb += f'<IMG SRC="{image}" ALIGN=LEFT HEIGHT=115 '
292 blurb += 'style="padding:8px;">'
295 blurb += f"<P><B>{title}</B>"
297 blurb += f'<P><B><A HREF="{link}">{title}</A></B>'
299 pubdate = self.find_pubdate(item)
300 if pubdate is not None:
301 logger.debug(f'Raw pubdate={pubdate}')
302 pubdate = self.munge_pubdate(pubdate)
304 logger.debug(f'Translated pubdate into: {ts}')
305 blurb += f' <FONT COLOR=#cccccc>{ts.strftime("%b %d")}</FONT>'
307 if self.item_is_interesting_for_article(title, description, item):
308 logger.info(f'Item {title} is also interesting as an article details page; creating...')
311 longblurb += description
312 longblurb += "</DIV>"
313 longblurb = longblurb.replace("font-size:34pt", "font-size:44pt")
314 self.details.add(longblurb)
316 logger.info(f'Item {title} isn\'t interesting for article details page; skipped.')
320 logger.debug(f'Added {count} items so far...')