#!/usr/bin/env python3 import logging import re from typing import Dict, List, Optional import xml import xml.etree.ElementTree as ET from bs4 import BeautifulSoup # type: ignore import generic_news_rss_renderer logger = logging.getLogger(__name__) class google_news_rss_renderer(generic_news_rss_renderer.generic_news_rss_renderer): def __init__( self, name_to_timeout_dict: Dict[str, int], feed_site: str, feed_uris: List[str], page_title: str, ) -> None: super().__init__(name_to_timeout_dict, feed_site, feed_uris, page_title) def get_headlines_page_prefix(self) -> str: return "google-news" def get_details_page_prefix(self) -> str: return "google-news-details" def find_description(self, item: xml.etree.ElementTree.Element) -> str: descr = item.findtext("description") if descr is not None: source = item.findtext("source") if source is not None: descr = descr + f" ({source})" else: descr = "" return descr def munge_description_internal(self, descr: str, item: ET.Element) -> str: if len(descr) > 450: descr = descr[:450] descr = re.sub(r"\<[^\>]*$", "", descr) descr = descr + " [...]" descr += "

" return descr def munge_description(self, description: str, item: ET.Element) -> str: soup = BeautifulSoup(description, features="lxml") for a in soup.findAll("a"): del a["href"] descr = str(soup) return self.munge_description_internal(descr, item) def find_image(self, item: xml.etree.ElementTree.Element) -> Optional[str]: return None def should_use_https(self) -> bool: return True def item_is_interesting_for_headlines( self, title: str, description: str, item: xml.etree.ElementTree.Element ) -> bool: return not self.is_item_older_than_n_days(item, 2) def item_is_interesting_for_article( self, title: str, description: str, item: xml.etree.ElementTree.Element ) -> bool: return not self.is_item_older_than_n_days(item, 2) # Test # x = google_news_rss_renderer( # {"Fetch News" : 1, # "Shuffle News" : 1}, # "news.google.com", # [ "/rss?hl=en-US&gl=US&ceid=US:en" ], # "Test" ) # if x.fetch_news() == 0: # print("Error fetching news, no items fetched.") # x.shuffle_news() # # descr = "this is a lot of really long text about nothign in particular. It's pretty interesting, don't you think? I hope that the munge description method works by both truncating it and remembering to close any open
  • items as well as making sure not to truncate in the middle of a
  • Out!" # d = x.munge_description_internal(descr) # print(d)