google_assistant.py

   1 #!/usr/bin/env python3
   2
   3 import logging
   4 from typing import NamedTuple
   5
   6 import requests
   7 import speech_recognition as sr  # type: ignore
   8
   9 import config
  10
  11 logger = logging.getLogger(__name__)
  12
  13 parser = config.add_commandline_args(
  14     f"Google Assistant ({__file__})",
  15     "Args related to contacting the Google Assistant",
  16 )
  17 parser.add_argument(
  18     "--google_assistant_bridge",
  19     type=str,
  20     default="http://kiosk.house:3000",
  21     metavar="URL",
  22     help="How to contact the Google Assistant bridge"
  23 )
  24 parser.add_argument(
  25     "--google_assistant_username",
  26     type=str,
  27     metavar="GOOGLE_ACCOUNT",
  28     default="scott.gasch",
  29     help="The user account for talking to Google Assistant"
  30 )
  31
  32
  33 class GoogleResponse(NamedTuple):
  34     success: bool
  35     response: str
  36     audio_url: str
  37     audio_transcription: str
  38
  39     def __repr__(self):
  40         return f"""
  41 success: {self.success}
  42 response: {self.response}
  43 audio_transcription: {self.audio_transcription}
  44 audio_url: {self.audio_url}"""
  45
  46
  47 def tell_google(cmd: str, *, recognize_speech=True) -> GoogleResponse:
  48     """Alias for ask_google."""
  49     return ask_google(cmd, recognize_speech=recognize_speech)
  50
  51
  52 def ask_google(cmd: str, *, recognize_speech=True) -> GoogleResponse:
  53     """Send a command string to Google via the google_assistant_bridge as the
  54     user google_assistant_username and return the response.  If recognize_speech
  55     is True, perform speech recognition on the audio response from Google so as
  56     to translate it into text (best effort, YMMV).
  57     """
  58     logging.debug(f"Asking google: '{cmd}'")
  59     payload = {
  60         "command": cmd,
  61         "user": config.config['google_assistant_username'],
  62     }
  63     url = f"{config.config['google_assistant_bridge']}/assistant"
  64     r = requests.post(url, json=payload)
  65     success = False
  66     response = ""
  67     audio = ""
  68     audio_transcription = ""
  69     if r.status_code == 200:
  70         j = r.json()
  71         success = bool(j["success"])
  72         response = j["response"] if success else j["error"]
  73         if success:
  74             logger.debug('Google request succeeded.')
  75             if len(response) > 0:
  76                 logger.debug(f"Google said: '{response}'")
  77         audio = f"{config.config['google_assistant_bridge']}{j['audio']}"
  78         if recognize_speech:
  79             recognizer = sr.Recognizer()
  80             r = requests.get(audio)
  81             if r.status_code == 200:
  82                 raw = r.content
  83                 speech = sr.AudioData(
  84                     frame_data=raw,
  85                     sample_rate=24000,
  86                     sample_width=2,
  87                 )
  88                 try:
  89                     audio_transcription = recognizer.recognize_google(
  90                         speech,
  91                     )
  92                     logger.debug(f"Transcription: '{audio_transcription}'")
  93                 except sr.UnknownValueError as e:
  94                     logger.exception(e)
  95                     logger.warning('Unable to parse Google assistant\'s response.')
  96                     audio_transcription = None
  97     else:
  98         logger.error(
  99             f'HTTP request to {url} with {payload} failed; code {r.status_code}'
 100         )
 101     return GoogleResponse(
 102         success=success,
 103         response=response,
 104         audio_url=audio,
 105         audio_transcription=audio_transcription,
 106     )