-from bs4 import BeautifulSoup
-import generic_news_rss_renderer
+#!/usr/bin/env python3
+
+import logging
import re
+from typing import Dict, List, Optional
+import xml
+import xml.etree.ElementTree as ET
+
+from bs4 import BeautifulSoup # type: ignore
+
+import generic_news_rss_renderer
+
+logger = logging.getLogger(__file__)
class google_news_rss_renderer(generic_news_rss_renderer.generic_news_rss_renderer):
- def __init__(self, name_to_timeout_dict, feed_site, feed_uris, page_title):
- super(google_news_rss_renderer, self).__init__(
+ def __init__(
+ self,
+ name_to_timeout_dict: Dict[str, int],
+ feed_site: str,
+ feed_uris: List[str],
+ page_title: str,
+ ) -> None:
+ super().__init__(
name_to_timeout_dict, feed_site, feed_uris, page_title
)
- self.debug = 1
-
- def debug_prefix(self):
- return "google-news"
- def get_headlines_page_prefix(self):
+ def get_headlines_page_prefix(self) -> str:
return "google-news"
- def get_details_page_prefix(self):
+ def get_details_page_prefix(self) -> str:
return "google-news-details"
- def find_description(self, item):
+ def find_description(self, item: xml.etree.ElementTree.Element) -> str:
descr = item.findtext("description")
- source = item.findtext("source")
- if source is not None:
- descr = descr + " (%s)" % source
+ if descr is not None:
+ source = item.findtext("source")
+ if source is not None:
+ descr = descr + f" ({source})"
+ else:
+ descr = ""
return descr
- def munge_description_internal(self, descr):
+ def munge_description_internal(self, descr: str, item: ET.Element) -> str:
if len(descr) > 450:
descr = descr[:450]
descr = re.sub(r"\<[^\>]*$", "", descr)
descr += "</A></LI></UL></OL></P>"
return descr
- def munge_description(self, description):
- soup = BeautifulSoup(description)
+ def munge_description(self, description: str, item: ET.Element) -> str:
+ soup = BeautifulSoup(description, features="lxml")
for a in soup.findAll("a"):
del a["href"]
descr = str(soup)
- return munge_description_internal(descr)
+ return self.munge_description_internal(descr, item)
- def find_image(self, item):
+ def find_image(self, item: xml.etree.ElementTree.Element) -> Optional[str]:
return None
- def should_use_https(self):
+ def should_use_https(self) -> bool:
return True
- def item_is_interesting_for_headlines(self, title, description, item):
+ def item_is_interesting_for_headlines(
+ self, title: str, description: str, item: xml.etree.ElementTree.Element
+ ) -> bool:
return not self.is_item_older_than_n_days(item, 2)
- def item_is_interesting_for_article(self, title, description, item):
+ def item_is_interesting_for_article(
+ self, title: str, description: str, item: xml.etree.ElementTree.Element
+ ) -> bool:
return not self.is_item_older_than_n_days(item, 2)