X-Git-Url: https://wannabe.guru.org/gitweb/?a=blobdiff_plain;f=google_news_rss_renderer.py;h=c7442707b623aa97d6493306b1b675996accb5d0;hb=addd4980077f6e3857c5c035b49784dc3ceca49a;hp=b4290f3f6c0c9628ebf1b61150fe044e31abdbef;hpb=d6990436e08a57ce211b10058dc61fb223cb94ec;p=kiosk.git diff --git a/google_news_rss_renderer.py b/google_news_rss_renderer.py index b4290f3..c744270 100644 --- a/google_news_rss_renderer.py +++ b/google_news_rss_renderer.py @@ -1,33 +1,47 @@ -from bs4 import BeautifulSoup -import generic_news_rss_renderer +#!/usr/bin/env python3 + +import logging import re +from typing import Dict, List, Optional +import xml +import xml.etree.ElementTree as ET + +from bs4 import BeautifulSoup # type: ignore + +import generic_news_rss_renderer + +logger = logging.getLogger(__file__) + class google_news_rss_renderer(generic_news_rss_renderer.generic_news_rss_renderer): - def __init__(self, name_to_timeout_dict, feed_site, feed_uris, page_title): - super(google_news_rss_renderer, self).__init__( - name_to_timeout_dict, - feed_site, - feed_uris, - page_title) - self.debug = 1 - - def debug_prefix(self): - return "google-news" + def __init__( + self, + name_to_timeout_dict: Dict[str, int], + feed_site: str, + feed_uris: List[str], + page_title: str, + ) -> None: + super().__init__( + name_to_timeout_dict, feed_site, feed_uris, page_title + ) - def get_headlines_page_prefix(self): + def get_headlines_page_prefix(self) -> str: return "google-news" - def get_details_page_prefix(self): + def get_details_page_prefix(self) -> str: return "google-news-details" - def find_description(self, item): - descr = item.findtext('description') - source = item.findtext('source') - if source is not None: - descr = descr + " (%s)" % source + def find_description(self, item: xml.etree.ElementTree.Element) -> str: + descr = item.findtext("description") + if descr is not None: + source = item.findtext("source") + if source is not None: + descr = descr + f" ({source})" + else: + descr = "" return descr - def munge_description_internal(self, descr): + def munge_description_internal(self, descr: str, item: ET.Element) -> str: if len(descr) > 450: descr = descr[:450] descr = re.sub(r"\<[^\>]*$", "", descr) @@ -35,37 +49,41 @@ class google_news_rss_renderer(generic_news_rss_renderer.generic_news_rss_render descr += "

" return descr - def munge_description(self, description): - soup = BeautifulSoup(description) - for a in soup.findAll('a'): - del a['href'] + def munge_description(self, description: str, item: ET.Element) -> str: + soup = BeautifulSoup(description, features="lxml") + for a in soup.findAll("a"): + del a["href"] descr = str(soup) - return munge_description_internal(descr) + return self.munge_description_internal(descr, item) - def find_image(self, item): + def find_image(self, item: xml.etree.ElementTree.Element) -> Optional[str]: return None - def should_use_https(self): + def should_use_https(self) -> bool: return True - def item_is_interesting_for_headlines(self, title, description, item): + def item_is_interesting_for_headlines( + self, title: str, description: str, item: xml.etree.ElementTree.Element + ) -> bool: return not self.is_item_older_than_n_days(item, 2) - def item_is_interesting_for_article(self, title, description, item): + def item_is_interesting_for_article( + self, title: str, description: str, item: xml.etree.ElementTree.Element + ) -> bool: return not self.is_item_older_than_n_days(item, 2) + # Test -#x = google_news_rss_renderer( +# x = google_news_rss_renderer( # {"Fetch News" : 1, # "Shuffle News" : 1}, # "news.google.com", # [ "/rss?hl=en-US&gl=US&ceid=US:en" ], # "Test" ) -#if x.fetch_news() == 0: +# if x.fetch_news() == 0: # print("Error fetching news, no items fetched.") -#x.shuffle_news() +# x.shuffle_news() # -#descr = "this is a lot of really long text about nothign in particular. It's pretty interesting, don't you think? I hope that the munge description method works by both truncating it and remembering to close any open
  • items as well as making sure not to truncate in the middle of a
  • Out!" -#d = x.munge_description_internal(descr) -#print(d) - +# descr = "this is a lot of really long text about nothign in particular. It's pretty interesting, don't you think? I hope that the munge description method works by both truncating it and remembering to close any open
  • items as well as making sure not to truncate in the middle of a
  • Out!" +# d = x.munge_description_internal(descr) +# print(d)