Working on the voice command logic.
[kiosk.git] / generic_news_rss_renderer.py
1 #!/usr/bin/env python3
2
3 from abc import abstractmethod
4 import datetime
5 from dateutil.parser import parse
6 import http.client
7 import random
8 import re
9 from typing import Dict, List, Optional, Union
10 import xml.etree.ElementTree as ET
11
12 import file_writer
13 import grab_bag
14 import renderer
15 import page_builder
16 import profanity_filter
17
18
19 class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer):
20     def __init__(
21         self,
22         name_to_timeout_dict: Dict[str, int],
23         feed_site: str,
24         feed_uris: List[str],
25         page_title: str,
26     ):
27         super(generic_news_rss_renderer, self).__init__(name_to_timeout_dict, False)
28         self.debug = True
29         self.feed_site = feed_site
30         self.feed_uris = feed_uris
31         self.page_title = page_title
32         self.news = grab_bag.grab_bag()
33         self.details = grab_bag.grab_bag()
34         self.filter = profanity_filter.profanity_filter()
35
36     @abstractmethod
37     def debug_prefix(self) -> str:
38         pass
39
40     @abstractmethod
41     def get_headlines_page_prefix(self) -> str:
42         pass
43
44     @abstractmethod
45     def get_details_page_prefix(self) -> str:
46         pass
47
48     def get_headlines_page_priority(self) -> str:
49         return "4"
50
51     def get_details_page_priority(self) -> str:
52         return "6"
53
54     @abstractmethod
55     def should_use_https(self) -> bool:
56         pass
57
58     def should_profanity_filter(self) -> bool:
59         return False
60
61     def find_title(self, item: ET.Element) -> Optional[str]:
62         return item.findtext("title")
63
64     def munge_title(self, title: str) -> str:
65         return title
66
67     def find_description(self, item: ET.Element) -> Optional[str]:
68         return item.findtext("description")
69
70     def munge_description(self, description: str) -> str:
71         description = re.sub("<[^>]+>", "", description)
72         return description
73
74     def find_link(self, item: ET.Element) -> Optional[str]:
75         return item.findtext("link")
76
77     def munge_link(self, link: str) -> str:
78         return link
79
80     def find_image(self, item: ET.Element) -> Optional[str]:
81         return item.findtext("image")
82
83     def munge_image(self, image: str) -> str:
84         return image
85
86     def find_pubdate(self, item: ET.Element) -> Optional[str]:
87         return item.findtext("pubDate")
88
89     def munge_pubdate(self, pubdate: str) -> str:
90         return pubdate
91
92     def item_is_interesting_for_headlines(
93         self, title: str, description: str, item: ET.Element
94     ) -> bool:
95         return True
96
97     def is_item_older_than_n_days(self, item: ET.Element, n: int) -> bool:
98         pubdate = self.find_pubdate(item)
99         if pubdate is None:
100             return False
101         pubdatetime = parse(pubdate)
102         tzinfo = pubdatetime.tzinfo
103         now = datetime.datetime.now(tzinfo)
104         delta = (now - pubdatetime).total_seconds() / (60 * 60 * 24)
105         return delta > n
106
107     def item_is_interesting_for_article(
108         self, title: str, description: str, item: ET.Element
109     ) -> bool:
110         return True
111
112     def periodic_render(self, key: str) -> bool:
113         if key == "Fetch News":
114             return self.fetch_news()
115         elif key == "Shuffle News":
116             return self.shuffle_news()
117         else:
118             raise Exception
119
120     def shuffle_news(self) -> bool:
121         headlines = page_builder.page_builder()
122         headlines.set_layout(page_builder.page_builder.LAYOUT_FOUR_ITEMS)
123         headlines.set_title("%s" % self.page_title)
124         subset = self.news.subset(4)
125         if subset is None:
126             self.debug_print("Not enough messages to choose from.")
127             return False
128         for msg in subset:
129             headlines.add_item(msg)
130         headlines.set_custom_html(
131             """
132 <STYLE>
133 a:link {
134   color: black;
135   text-decoration: none;
136   font-weight: bold;
137 }
138 a:visited {
139   color: black;
140   text-decoration: none;
141   font-weight: bold;
142 }
143 a:active {
144   color: black;
145   text-decoration: none;
146   font-weight: bold;
147 }
148 </STYLE>"""
149         )
150         _ = f"{self.get_headlines_page_prefix()}_{self.get_headlines_page_priority()}_25900.html"
151         with file_writer.file_writer(_) as f:
152             headlines.render_html(f)
153
154         details = page_builder.page_builder()
155         details.set_layout(page_builder.page_builder.LAYOUT_ONE_ITEM)
156         details.set_custom_html(
157             """
158 <STYLE>
159 a:link {
160   color: black;
161   text-decoration: none;
162   font-weight: bold;
163 }
164 a:visited {
165   color: black;
166   text-decoration: none;
167   font-weight: bold;
168 }
169 a:active {
170   color: black;
171   text-decoration: none;
172   font-weight: bold;
173 }
174 </STYLE>"""
175         )
176         details.set_title(f"{self.page_title}")
177         subset = self.details.subset(1)
178         if subset is None:
179             self.debug_print("Not enough details to choose from.")
180             return False
181         for msg in subset:
182             blurb = msg
183             blurb += "</TD>"
184             details.add_item(blurb)
185         _ = f"{self.get_details_page_prefix()}_{self.get_details_page_priority()}_86400.html"
186         with file_writer.file_writer(_) as g:
187             details.render_html(g)
188         return True
189
190     def fetch_news(self) -> bool:
191         count = 0
192         self.news.clear()
193         self.details.clear()
194         self.conn: Optional[Union[http.client.HTTPConnection,
195                                   http.client.HTTPSConnection]] = None
196
197         for uri in self.feed_uris:
198             if self.should_use_https():
199                 self.debug_print("Fetching: https://%s%s" % (self.feed_site, uri))
200                 self.conn = http.client.HTTPSConnection(self.feed_site, timeout=20)
201             else:
202                 self.debug_print("Fetching: http://%s%s" % (self.feed_site, uri))
203                 self.conn = http.client.HTTPConnection(self.feed_site, timeout=20)
204             assert(self.conn is not None)
205             self.conn.request(
206                 "GET",
207                 uri,
208                 None,
209                 {
210                     "Accept": "*/*",
211                     "Cache-control": "max-age=59",
212                     "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36",
213                 },
214             )
215             try:
216                 response = self.conn.getresponse()
217             except:
218                 print("Exception in generic RSS renderer HTTP connection")
219                 return False
220
221             if response.status != 200:
222                 print(
223                     f"{self.page_title}: RSS fetch_news error, response: {response.status}"
224                 )
225                 self.debug_print(str(response.read()))
226                 return False
227
228             rss = ET.fromstring(response.read())
229             channel = rss[0]
230             for item in channel.getchildren():
231                 title = self.find_title(item)
232                 if title is not None:
233                     title = self.munge_title(title)
234                 description = item.findtext("description")
235                 if description is not None:
236                     description = self.munge_description(description)
237                 else:
238                     description = ""
239                 image = self.find_image(item)
240                 if image is not None:
241                     image = self.munge_image(image)
242                 link = item.findtext("link")
243                 if link is not None:
244                     link = self.munge_link(link)
245
246                 if title is None or not self.item_is_interesting_for_headlines(
247                     title, description, item
248                 ):
249                     self.debug_print(f'Item "{title}" is not interesting')
250                     continue
251
252                 if self.should_profanity_filter() and (
253                     self.filter.contains_bad_words(title)
254                     or self.filter.contains_bad_words(description)
255                 ):
256                     self.debug_print(f'Found bad words in item "{title}"')
257                     continue
258
259                 blurb = """<DIV style="padding:8px;
260                                  font-size:34pt;
261                                  -webkit-column-break-inside:avoid;">"""
262                 if image is not None:
263                     blurb += f'<IMG SRC="{image}" ALIGN=LEFT HEIGHT=115 '
264                     blurb += 'style="padding:8px;">'
265
266                 if link is None:
267                     blurb += f"<P><B>{title}</B>"
268                 else:
269                     blurb += f'<P><B><A HREF="{link}">{title}</A></B>'
270
271                 pubdate = self.find_pubdate(item)
272                 if pubdate is not None:
273                     pubdate = self.munge_pubdate(pubdate)
274                     ts = parse(pubdate)
275                     blurb += f'  <FONT COLOR=#cccccc>{ts.strftime("%b&nbsp;%d")}</FONT>'
276
277                 if self.item_is_interesting_for_article(title, description, item):
278                     longblurb = blurb
279                     longblurb += "<BR>"
280                     longblurb += description
281                     longblurb += "</DIV>"
282                     longblurb = longblurb.replace("font-size:34pt", "font-size:44pt")
283                     self.details.add(longblurb)
284                 blurb += "</DIV>"
285                 self.news.add(blurb)
286                 count += 1
287         return count > 0