#!/usr/bin/env python3 import logging from typing import NamedTuple import sys import warnings import requests import speech_recognition as sr # type: ignore import config logger = logging.getLogger(__name__) parser = config.add_commandline_args( f"Google Assistant ({__file__})", "Args related to contacting the Google Assistant", ) parser.add_argument( "--google_assistant_bridge", type=str, default="http://kiosk.house:3000", metavar="URL", help="How to contact the Google Assistant bridge", ) parser.add_argument( "--google_assistant_username", type=str, metavar="GOOGLE_ACCOUNT", default="scott.gasch", help="The user account for talking to Google Assistant", ) class GoogleResponse(NamedTuple): success: bool response: str audio_url: str audio_transcription: str def __repr__(self): return f""" success: {self.success} response: {self.response} audio_transcription: {self.audio_transcription} audio_url: {self.audio_url}""" def tell_google(cmd: str, *, recognize_speech=True) -> GoogleResponse: """Alias for ask_google.""" return ask_google(cmd, recognize_speech=recognize_speech) def ask_google(cmd: str, *, recognize_speech=True) -> GoogleResponse: """Send a command string to Google via the google_assistant_bridge as the user google_assistant_username and return the response. If recognize_speech is True, perform speech recognition on the audio response from Google so as to translate it into text (best effort, YMMV). """ logging.debug(f"Asking google: '{cmd}'") payload = { "command": cmd, "user": config.config['google_assistant_username'], } url = f"{config.config['google_assistant_bridge']}/assistant" r = requests.post(url, json=payload) success = False response = "" audio = "" audio_transcription = "" if r.status_code == 200: j = r.json() success = bool(j["success"]) response = j["response"] if success else j["error"] if success: logger.debug('Google request succeeded.') if len(response) > 0: logger.debug(f"Google said: '{response}'") audio = f"{config.config['google_assistant_bridge']}{j['audio']}" if recognize_speech: recognizer = sr.Recognizer() r = requests.get(audio) if r.status_code == 200: raw = r.content speech = sr.AudioData( frame_data=raw, sample_rate=24000, sample_width=2, ) try: audio_transcription = recognizer.recognize_google( speech, ) logger.debug(f"Transcription: '{audio_transcription}'") except sr.UnknownValueError as e: logger.exception(e) msg = 'Unable to parse Google assistant\'s response.' logger.warning(msg) warnings.warn(msg, stacklevel=3) audio_transcription = None return GoogleResponse( success=success, response=response, audio_url=audio, audio_transcription=audio_transcription, ) else: message = f'HTTP request to {url} with {payload} failed; code {r.status_code}' logger.error(message) return GoogleResponse( success=False, response=message, audio_url=audio, audio_transcription=audio_transcription, ) sys.exit(-1)