3 from abc import abstractmethod
5 from dateutil.parser import parse
9 from typing import Dict, List, Optional, Union
10 import xml.etree.ElementTree as ET
12 from scottutilz import profanity_filter
20 logger = logging.getLogger(__name__)
23 class generic_news_rss_renderer(renderer.abstaining_renderer):
26 name_to_timeout_dict: Dict[str, int],
31 super().__init__(name_to_timeout_dict)
32 self.feed_site = feed_site
33 self.feed_uris = feed_uris
34 self.page_title = page_title
35 self.news = grab_bag.grab_bag()
36 self.details = grab_bag.grab_bag()
37 self.filter = profanity_filter.ProfanityFilter()
40 def get_headlines_page_prefix(self) -> str:
44 def get_details_page_prefix(self) -> str:
47 def get_headlines_page_priority(self) -> str:
50 def get_details_page_priority(self) -> str:
54 def should_use_https(self) -> bool:
57 def should_profanity_filter(self) -> bool:
60 def find_title(self, item: ET.Element) -> Optional[str]:
61 return item.findtext("title")
63 def munge_title(self, title: str, item: ET.Element) -> str:
66 def find_description(self, item: ET.Element) -> Optional[str]:
67 return item.findtext("description")
69 def munge_description(self, description: str, item: ET.Element) -> str:
70 description = re.sub("<[^>]+>", "", description)
73 def find_link(self, item: ET.Element) -> Optional[str]:
74 return item.findtext("link")
76 def munge_link(self, link: str) -> str:
79 def find_image(self, item: ET.Element) -> Optional[str]:
80 return item.findtext("image")
82 def munge_image(self, image: str) -> str:
85 def find_pubdate(self, item: ET.Element) -> Optional[str]:
86 return item.findtext("pubDate")
88 def munge_pubdate(self, pubdate: str) -> str:
91 def item_is_interesting_for_headlines(
92 self, title: str, description: str, item: ET.Element
96 def do_headlines(self) -> bool:
99 def do_details(self) -> bool:
102 def is_item_older_than_n_days(self, item: ET.Element, n: int) -> bool:
103 pubdate = self.find_pubdate(item)
106 pubdatetime = parse(pubdate)
107 tzinfo = pubdatetime.tzinfo
108 now = datetime.datetime.now(tzinfo)
109 delta = (now - pubdatetime).total_seconds() / (60 * 60 * 24)
112 def item_is_interesting_for_article(
113 self, title: str, description: str, item: ET.Element
117 def periodic_render(self, key: str) -> bool:
118 if key == "Fetch News":
119 return self.fetch_news()
120 elif key == "Shuffle News":
121 return self.shuffle_news()
125 def shuffle_news(self) -> bool:
126 if self.do_headlines():
127 headlines = page_builder.page_builder()
128 headlines.set_layout(page_builder.page_builder.LAYOUT_FOUR_ITEMS)
129 headlines.set_title("%s" % self.page_title)
130 subset = self.news.subset(4)
132 logger.warning("Not enough messages to select from in shuffle_news?!")
135 headlines.add_item(msg)
136 headlines.set_custom_html(
141 text-decoration: none;
146 text-decoration: none;
151 text-decoration: none;
156 _ = f"{self.get_headlines_page_prefix()}_{self.get_headlines_page_priority()}_25900.html"
157 with file_writer.file_writer(_) as f:
158 headlines.render_html(f)
160 if self.do_details():
161 details = page_builder.page_builder()
162 details.set_layout(page_builder.page_builder.LAYOUT_ONE_ITEM)
163 details.set_custom_html(
168 text-decoration: none;
173 text-decoration: none;
178 text-decoration: none;
183 details.set_title(self.page_title)
184 subset = self.details.subset(1)
186 logger.warning("Not enough details to choose from in do_details")
187 logger.debug("Not enough details to choose from.")
192 details.add_item(blurb)
193 _ = f"{self.get_details_page_prefix()}_{self.get_details_page_priority()}_86400.html"
194 with file_writer.file_writer(_) as g:
195 details.render_html(g)
198 def fetch_news(self) -> bool:
203 Union[http.client.HTTPConnection, http.client.HTTPSConnection]
206 for uri in self.feed_uris:
208 if self.should_use_https():
209 url = f"https://{self.feed_site}{uri}"
210 logger.info(f"Fetching: {url}")
211 self.conn = http.client.HTTPSConnection(self.feed_site, timeout=10)
213 url = f"http://{self.feed_site}{uri}"
214 logger.info(f"Fetching: {url}")
215 self.conn = http.client.HTTPConnection(self.feed_site, timeout=10)
216 assert self.conn is not None
217 assert url is not None
224 "Cache-control": "max-age=50",
228 response = self.conn.getresponse()
231 f"Exception in generic RSS renderer HTTP connection fetching {url}; giving up."
235 if response.status != 200:
237 f"Unexpected status {response.status} while fetching {url}; giving up."
241 raw = response.read()
242 logger.info(f"Status 200: got {len(raw)} bytes back from {url}")
243 rss = ET.fromstring(raw)
246 for item in list(channel):
247 title = self.find_title(item)
248 description = item.findtext("description")
249 if title is not None:
250 title = self.munge_title(title, item)
252 logger.info("Skipping RSS feed item with no title.")
254 logger.debug(f"Considering RSS item {title}...")
255 if description is not None:
256 description = self.munge_description(description, item)
259 image = self.find_image(item)
260 if image is not None:
261 image = self.munge_image(image)
262 link = item.findtext("link")
264 link = self.munge_link(link)
265 if not self.item_is_interesting_for_headlines(title, description, item):
266 logger.info(f"Skipping {title} because it's not interesting.")
269 if self.should_profanity_filter() and (
270 self.filter.contains_bad_word(title)
271 or self.filter.contains_bad_word(description)
273 logger.info(f"Skipping {title} because it contains profanity.")
276 if title in title_filter:
278 f"Skipping {title} because we already saw an item with the same title."
281 title_filter.add(title)
283 blurb = """<DIV style="padding:8px;
285 -webkit-column-break-inside:avoid;">"""
286 if image is not None:
287 blurb += f'<IMG SRC="{image}" ALIGN=LEFT HEIGHT=115 '
288 blurb += 'style="padding:8px;">'
291 blurb += f"<P><B>{title}</B>"
293 blurb += f'<P><B><A HREF="{link}">{title}</A></B>'
295 pubdate = self.find_pubdate(item)
296 if pubdate is not None:
297 logger.debug(f"Raw pubdate={pubdate}")
298 pubdate = self.munge_pubdate(pubdate)
300 logger.debug(f"Translated pubdate into: {ts}")
301 blurb += f' <FONT COLOR=#cccccc>{ts.strftime("%b %d")}</FONT>'
303 if self.item_is_interesting_for_article(title, description, item):
305 f"Item {title} is also interesting as an article details page; creating..."
309 longblurb += description
310 longblurb += "</DIV>"
311 longblurb = longblurb.replace("font-size:34pt", "font-size:44pt")
312 self.details.add(longblurb)
315 f"Item {title} isn't interesting for article details page; skipped."
320 logger.debug(f"Added {count} items so far...")