9bec2a8f354a209f8c35a2221a1e2d052565ffca
[kiosk.git] / cnn_rss_renderer.py
1 #!/usr/bin/env python3
2
3 import generic_news_rss_renderer
4 import re
5 from typing import Dict, List, Optional
6 import xml
7
8
9 class cnn_rss_renderer(generic_news_rss_renderer.generic_news_rss_renderer):
10     def __init__(
11         self,
12         name_to_timeout_dict: Dict[str, int],
13         feed_site: str,
14         feed_uris: List[str],
15         page_title: str,
16     ):
17         super(cnn_rss_renderer, self).__init__(
18             name_to_timeout_dict, feed_site, feed_uris, page_title
19         )
20         self.debug = True
21
22     def debug_prefix(self) -> str:
23         return f"cnn({self.page_title})"
24
25     def get_headlines_page_prefix(self) -> str:
26         return f"cnn-{self.page_title}"
27
28     def get_details_page_prefix(self) -> str:
29         return f"cnn-details-{self.page_title}"
30
31     def munge_description(self, description: str) -> str:
32         description = re.sub("[Rr]ead full story for latest details.", "", description)
33         description = re.sub("<[^>]+>", "", description)
34         return description
35
36     def find_image(self, item: xml.etree.ElementTree.Element) -> Optional[str]:
37         image = item.findtext("media:thumbnail")
38         if image is not None:
39             image_url = image.get("url")
40             return image_url
41         return None
42
43     def should_use_https(self) -> bool:
44         return False
45
46     def item_is_interesting_for_headlines(
47         self, title: str, description: str, item: xml.etree.ElementTree.Element
48     ) -> bool:
49         if self.is_item_older_than_n_days(item, 14):
50             return False
51         return re.search(r"[Cc][Nn][Nn][A-Za-z]*\.com", title) is None
52
53     def item_is_interesting_for_article(
54         self, title, description, item: xml.etree.ElementTree.Element
55     ):
56         if self.is_item_older_than_n_days(item, 7):
57             return False
58         return (
59             re.search(r"[Cc][Nn][Nn][A-Za-z]*\.com", title) is None
60             and len(description) >= 65
61         )
62
63
64 # Test
65 # x = cnn_rss_renderer(
66 #    {"Fetch News" : 1,
67 #     "Shuffle News" : 1},
68 #    "rss.cnn.com",
69 #    [ "/rss/cnn_topstories.rss",
70 #      "/rss/money_latest.rss",
71 #     "/rss/cnn_tech.rss",
72 #    ],
73 #    "Test" )
74 # if x.fetch_news() == 0:
75 #    print("Error fetching news, no items fetched.")
76 # x.shuffle_news()