Used isort to sort imports. Also added to the git pre-commit hook.
[python_utils.git] / google_assistant.py
index 500a909feae4f5f898cbd5fbb6812449043f4796..ec5f6a4c85e17ed0fb2eaaf4f2f22d8ee2ebf300 100644 (file)
@@ -1,7 +1,9 @@
 #!/usr/bin/env python3
 
 import logging
-from typing import NamedTuple
+import sys
+import warnings
+from typing import NamedTuple, Optional
 
 import requests
 import speech_recognition as sr  # type: ignore
@@ -19,14 +21,14 @@ parser.add_argument(
     type=str,
     default="http://kiosk.house:3000",
     metavar="URL",
-    help="How to contact the Google Assistant bridge"
+    help="How to contact the Google Assistant bridge",
 )
 parser.add_argument(
     "--google_assistant_username",
     type=str,
     metavar="GOOGLE_ACCOUNT",
     default="scott.gasch",
-    help="The user account for talking to Google Assistant"
+    help="The user account for talking to Google Assistant",
 )
 
 
@@ -34,7 +36,7 @@ class GoogleResponse(NamedTuple):
     success: bool
     response: str
     audio_url: str
-    audio_transcription: str
+    audio_transcription: Optional[str]  # None if not available.
 
     def __repr__(self):
         return f"""
@@ -45,10 +47,17 @@ audio_url: {self.audio_url}"""
 
 
 def tell_google(cmd: str, *, recognize_speech=True) -> GoogleResponse:
+    """Alias for ask_google."""
     return ask_google(cmd, recognize_speech=recognize_speech)
 
 
 def ask_google(cmd: str, *, recognize_speech=True) -> GoogleResponse:
+    """Send a command string to Google via the google_assistant_bridge as the
+    user google_assistant_username and return the response.  If recognize_speech
+    is True, perform speech recognition on the audio response from Google so as
+    to translate it into text (best effort, YMMV).
+    """
+    logging.debug(f"Asking google: '{cmd}'")
     payload = {
         "command": cmd,
         "user": config.config['google_assistant_username'],
@@ -58,11 +67,15 @@ def ask_google(cmd: str, *, recognize_speech=True) -> GoogleResponse:
     success = False
     response = ""
     audio = ""
-    audio_transcription = ""
+    audio_transcription: Optional[str] = ""
     if r.status_code == 200:
         j = r.json()
         success = bool(j["success"])
         response = j["response"] if success else j["error"]
+        if success:
+            logger.debug('Google request succeeded.')
+            if len(response) > 0:
+                logger.debug(f"Google said: '{response}'")
         audio = f"{config.config['google_assistant_bridge']}{j['audio']}"
         if recognize_speech:
             recognizer = sr.Recognizer()
@@ -74,16 +87,30 @@ def ask_google(cmd: str, *, recognize_speech=True) -> GoogleResponse:
                     sample_rate=24000,
                     sample_width=2,
                 )
-                audio_transcription = recognizer.recognize_google(
-                    speech,
-                )
+                try:
+                    audio_transcription = recognizer.recognize_google(
+                        speech,
+                    )
+                    logger.debug(f"Transcription: '{audio_transcription}'")
+                except sr.UnknownValueError as e:
+                    logger.exception(e)
+                    msg = 'Unable to parse Google assistant\'s response.'
+                    logger.warning(msg)
+                    warnings.warn(msg, stacklevel=3)
+                    audio_transcription = None
+        return GoogleResponse(
+            success=success,
+            response=response,
+            audio_url=audio,
+            audio_transcription=audio_transcription,
+        )
     else:
-        logger.error(
-            f'HTTP request to {url} with {payload} failed; code {r.status_code}'
+        message = f'HTTP request to {url} with {payload} failed; code {r.status_code}'
+        logger.error(message)
+        return GoogleResponse(
+            success=False,
+            response=message,
+            audio_url=audio,
+            audio_transcription=audio_transcription,
         )
-    return GoogleResponse(
-        success=success,
-        response=response,
-        audio_url=audio,
-        audio_transcription=audio_transcription,
-    )
+        sys.exit(-1)