3 from abc import abstractmethod
5 from dateutil.parser import parse
9 from typing import Dict, List, Optional, Union
10 import xml.etree.ElementTree as ET
12 from scottutilz import profanity_filter
20 logger = logging.getLogger(__name__)
23 class generic_news_rss_renderer(renderer.abstaining_renderer):
26 name_to_timeout_dict: Dict[str, int],
31 super().__init__(name_to_timeout_dict)
32 self.feed_site = feed_site
33 self.feed_uris = feed_uris
34 self.page_title = page_title
35 self.news = grab_bag.grab_bag()
36 self.details = grab_bag.grab_bag()
37 self.filter = profanity_filter.ProfanityFilter()
40 def get_headlines_page_prefix(self) -> str:
44 def get_details_page_prefix(self) -> str:
47 def get_headlines_page_priority(self) -> str:
50 def get_details_page_priority(self) -> str:
54 def should_use_https(self) -> bool:
57 def should_profanity_filter(self) -> bool:
60 def find_title(self, item: ET.Element) -> Optional[str]:
61 return item.findtext("title")
63 def munge_title(self, title: str, item: ET.Element) -> str:
66 def find_description(self, item: ET.Element) -> Optional[str]:
67 return item.findtext("description")
69 def munge_description(self, description: str, item: ET.Element) -> str:
70 description = re.sub("<[^>]+>", "", description)
73 def find_link(self, item: ET.Element) -> Optional[str]:
74 return item.findtext("link")
76 def munge_link(self, link: str) -> str:
79 def find_image(self, item: ET.Element) -> Optional[str]:
80 return item.findtext("image")
82 def munge_image(self, image: str) -> str:
85 def find_pubdate(self, item: ET.Element) -> Optional[str]:
86 return item.findtext("pubDate")
88 def munge_pubdate(self, pubdate: str) -> str:
91 def item_is_interesting_for_headlines(
92 self, title: str, description: str, item: ET.Element
96 def do_headlines(self) -> bool:
99 def do_details(self) -> bool:
102 def is_item_older_than_n_days(
103 self, item: ET.Element, n: int, default: bool = False
105 pubdate = self.find_pubdate(item)
108 pubdatetime = parse(pubdate)
109 tzinfo = pubdatetime.tzinfo
110 now = datetime.datetime.now(tzinfo)
111 delta = (now - pubdatetime).total_seconds() / (60 * 60 * 24)
114 def item_is_interesting_for_article(
115 self, title: str, description: str, item: ET.Element
119 def periodic_render(self, key: str) -> bool:
120 if key == "Fetch News":
121 return self.fetch_news()
122 elif key == "Shuffle News":
123 return self.shuffle_news()
127 def shuffle_news(self) -> bool:
128 if self.do_headlines():
129 headlines = page_builder.page_builder()
130 headlines.set_layout(page_builder.page_builder.LAYOUT_FOUR_ITEMS)
131 headlines.set_title("%s" % self.page_title)
132 subset = self.news.subset(4)
134 logger.warning("Not enough messages to select from in shuffle_news?!")
137 headlines.add_item(msg)
138 headlines.set_custom_html(
143 text-decoration: none;
148 text-decoration: none;
153 text-decoration: none;
158 _ = f"{self.get_headlines_page_prefix()}_{self.get_headlines_page_priority()}_25900.html"
159 with file_writer.file_writer(_) as f:
160 headlines.render_html(f)
162 if self.do_details():
163 details = page_builder.page_builder()
164 details.set_layout(page_builder.page_builder.LAYOUT_ONE_ITEM)
165 details.set_custom_html(
170 text-decoration: none;
175 text-decoration: none;
180 text-decoration: none;
185 details.set_title(self.page_title)
186 subset = self.details.subset(1)
188 logger.warning("Not enough details to choose from in do_details")
189 logger.debug("Not enough details to choose from.")
194 details.add_item(blurb)
195 _ = f"{self.get_details_page_prefix()}_{self.get_details_page_priority()}_86400.html"
196 with file_writer.file_writer(_) as g:
197 details.render_html(g)
200 def fetch_news(self) -> bool:
205 Union[http.client.HTTPConnection, http.client.HTTPSConnection]
208 for uri in self.feed_uris:
210 if self.should_use_https():
211 url = f"https://{self.feed_site}{uri}"
212 logger.info(f"Fetching: {url}")
213 self.conn = http.client.HTTPSConnection(self.feed_site, timeout=10)
215 url = f"http://{self.feed_site}{uri}"
216 logger.info(f"Fetching: {url}")
217 self.conn = http.client.HTTPConnection(self.feed_site, timeout=10)
218 assert self.conn is not None
219 assert url is not None
226 "Cache-control": "max-age=50",
230 response = self.conn.getresponse()
233 f"Exception in generic RSS renderer HTTP connection fetching {url}; giving up."
237 if response.status != 200:
239 f"Unexpected status {response.status} while fetching {url}: {response.reason}; giving up."
242 print(response.headers)
245 raw = response.read()
246 logger.info(f"Status 200: got {len(raw)} bytes back from {url}")
247 rss = ET.fromstring(raw)
250 for item in list(channel):
251 title = self.find_title(item)
252 description = item.findtext("description")
253 if title is not None:
254 title = self.munge_title(title, item)
256 logger.info("Skipping RSS feed item with no title.")
258 logger.debug(f"Considering RSS item {title}...")
259 if description is not None:
260 description = self.munge_description(description, item)
263 image = self.find_image(item)
264 if image is not None:
265 image = self.munge_image(image)
266 link = item.findtext("link")
268 link = self.munge_link(link)
269 if not self.item_is_interesting_for_headlines(title, description, item):
270 logger.info(f"Skipping {title} because it's not interesting.")
273 if self.should_profanity_filter() and (
274 self.filter.contains_bad_word(title)
275 or self.filter.contains_bad_word(description)
277 logger.info(f"Skipping {title} because it contains profanity.")
280 if title in title_filter:
282 f"Skipping {title} because we already saw an item with the same title."
285 title_filter.add(title)
287 blurb = """<DIV style="padding:8px;
289 -webkit-column-break-inside:avoid;">"""
290 if image is not None:
291 blurb += f'<IMG SRC="{image}" ALIGN=LEFT HEIGHT=115 '
292 blurb += 'style="padding:8px;">'
295 blurb += f"<P><B>{title}</B>"
297 blurb += f'<P><B><A HREF="{link}">{title}</A></B>'
299 pubdate = self.find_pubdate(item)
300 if pubdate is not None:
301 logger.debug(f"Raw pubdate={pubdate}")
302 pubdate = self.munge_pubdate(pubdate)
304 logger.debug(f"Translated pubdate into: {ts}")
305 blurb += f' <FONT COLOR=#cccccc>{ts.strftime("%b %d")}</FONT>'
307 if self.item_is_interesting_for_article(title, description, item):
309 f"Item {title} is also interesting as an article details page; creating..."
313 longblurb += description
314 longblurb += "</DIV>"
315 longblurb = longblurb.replace("font-size:34pt", "font-size:44pt")
316 self.details.add(longblurb)
319 f"Item {title} isn't interesting for article details page; skipped."
324 logger.debug(f"Added {count} items so far...")