2628bd58975f0bb9b4f2614961b28377a68daf74
[kiosk.git] / cnn_rss_renderer.py
1 #!/usr/bin/env python3
2
3 import generic_news_rss_renderer
4 import re
5 from typing import Dict, List, Optional
6 import xml
7
8
9 class cnn_rss_renderer(generic_news_rss_renderer.generic_news_rss_renderer):
10     def __init__(
11         self,
12         name_to_timeout_dict: Dict[str, int],
13         feed_site: str,
14         feed_uris: List[str],
15         page_title: str,
16     ):
17         super().__init__(name_to_timeout_dict, feed_site, feed_uris, page_title)
18
19     def get_headlines_page_prefix(self) -> str:
20         return f"cnn-{self.page_title}"
21
22     def get_details_page_prefix(self) -> str:
23         return f"cnn-details-{self.page_title}"
24
25     def munge_description(
26         self, description: str, item: xml.etree.ElementTree.Element
27     ) -> str:
28         description = re.sub("[Rr]ead full story for latest details.", "", description)
29         description = re.sub("<[^>]+>", "", description)
30         return description
31
32     def find_image(self, item: xml.etree.ElementTree.Element) -> Optional[str]:
33         image = item.findtext("media:thumbnail")
34         if image is not None:
35             image_url = image.get("url")
36             return image_url
37         return None
38
39     def should_use_https(self) -> bool:
40         return False
41
42     def item_is_interesting_for_headlines(
43         self, title: str, description: str, item: xml.etree.ElementTree.Element
44     ) -> bool:
45         if self.is_item_older_than_n_days(item, 14, default=False):
46             return False
47         return re.search(r"[Cc][Nn][Nn][A-Za-z]*\.com", title) is None
48
49     def item_is_interesting_for_article(
50         self, title, description, item: xml.etree.ElementTree.Element
51     ):
52         if self.is_item_older_than_n_days(item, 7):
53             return False
54         return (
55             re.search(r"[Cc][Nn][Nn][A-Za-z]*\.com", title) is None
56             and len(description) >= 65
57         )
58
59
60 # Test
61 # x = cnn_rss_renderer(
62 #    {"Fetch News" : 1,
63 #     "Shuffle News" : 1},
64 #    "rss.cnn.com",
65 #    [ "/rss/cnn_topstories.rss",
66 #      "/rss/money_latest.rss",
67 #     "/rss/cnn_tech.rss",
68 #    ],
69 #    "Test" )
70 # if x.fetch_news() == 0:
71 #    print("Error fetching news, no items fetched.")
72 # x.shuffle_news()