#!/usr/bin/env python3 # © Copyright 2021-2022, Scott Gasch """A module to serve as a local client library around HTTP calls to the Google Assistant via a local gateway. """ import logging import warnings from dataclasses import dataclass from typing import Optional import requests import speech_recognition as sr # type: ignore import config logger = logging.getLogger(__name__) parser = config.add_commandline_args( f"Google Assistant ({__file__})", "Args related to contacting the Google Assistant", ) parser.add_argument( "--google_assistant_bridge", type=str, default="http://kiosk.house:3000", metavar="URL", help="How to contact the Google Assistant bridge", ) parser.add_argument( "--google_assistant_username", type=str, metavar="GOOGLE_ACCOUNT", default="scott.gasch", help="The user account for talking to Google Assistant", ) @dataclass class GoogleResponse: """A Google response wrapper dataclass.""" success: bool = False """Did the request succeed (True) or fail (False)?""" response: str = '' """The response as a text string, if available.""" audio_url: str = '' """A URL that can be used to fetch the raw audio response.""" audio_transcription: Optional[str] = None """A transcription of the audio response, if available. Otherwise None""" def __repr__(self): return f""" success: {self.success} response: {self.response} audio_transcription: {self.audio_transcription} audio_url: {self.audio_url}""" def tell_google(cmd: str, *, recognize_speech=True) -> GoogleResponse: """Alias for ask_google.""" return ask_google(cmd, recognize_speech=recognize_speech) def ask_google(cmd: str, *, recognize_speech=True) -> GoogleResponse: """Send a command string to Google via the google_assistant_bridge as the user google_assistant_username and return the response. If recognize_speech is True, perform speech recognition on the audio response from Google so as to translate it into text (best effort, YMMV). e.g.:: >>> google_assistant.ask_google('What time is it?') success: True response: 9:27 PM. audio_transcription: 9:27 p.m. audio_url: http://kiosk.house:3000/server/audio?v=1653971233030 """ logging.debug("Asking google: '%s'", cmd) payload = { "command": cmd, "user": config.config['google_assistant_username'], } url = f"{config.config['google_assistant_bridge']}/assistant" r = requests.post(url, json=payload) success = False response = "" audio = "" audio_transcription: Optional[str] = "" if r.status_code == 200: j = r.json() logger.debug(j) success = bool(j["success"]) response = j["response"] if success else j["error"] if success: logger.debug('Google request succeeded.') if len(response) > 0: logger.debug("Google said: '%s'", response) audio = f"{config.config['google_assistant_bridge']}{j['audio']}" if recognize_speech: recognizer = sr.Recognizer() r = requests.get(audio) if r.status_code == 200: raw = r.content speech = sr.AudioData( frame_data=raw, sample_rate=24000, sample_width=2, ) try: audio_transcription = recognizer.recognize_google( speech, ) logger.debug("Transcription: '%s'", audio_transcription) except sr.UnknownValueError as e: logger.exception(e) msg = 'Unable to parse Google assistant\'s response.' logger.warning(msg) warnings.warn(msg, stacklevel=3) audio_transcription = None return GoogleResponse( success=success, response=response, audio_url=audio, audio_transcription=audio_transcription, ) else: message = f'HTTP request to {url} with {payload} failed; code {r.status_code}' logger.error(message) return GoogleResponse( success=False, response=message, audio_url=audio, audio_transcription=audio_transcription, )