Make smart futures avoid polling.
[python_utils.git] / google_assistant.py
1 #!/usr/bin/env python3
2
3 import logging
4 from typing import NamedTuple
5 import sys
6
7 import requests
8 import speech_recognition as sr  # type: ignore
9
10 import config
11
12 logger = logging.getLogger(__name__)
13
14 parser = config.add_commandline_args(
15     f"Google Assistant ({__file__})",
16     "Args related to contacting the Google Assistant",
17 )
18 parser.add_argument(
19     "--google_assistant_bridge",
20     type=str,
21     default="http://kiosk.house:3000",
22     metavar="URL",
23     help="How to contact the Google Assistant bridge"
24 )
25 parser.add_argument(
26     "--google_assistant_username",
27     type=str,
28     metavar="GOOGLE_ACCOUNT",
29     default="scott.gasch",
30     help="The user account for talking to Google Assistant"
31 )
32
33
34 class GoogleResponse(NamedTuple):
35     success: bool
36     response: str
37     audio_url: str
38     audio_transcription: str
39
40     def __repr__(self):
41         return f"""
42 success: {self.success}
43 response: {self.response}
44 audio_transcription: {self.audio_transcription}
45 audio_url: {self.audio_url}"""
46
47
48 def tell_google(cmd: str, *, recognize_speech=True) -> GoogleResponse:
49     """Alias for ask_google."""
50     return ask_google(cmd, recognize_speech=recognize_speech)
51
52
53 def ask_google(cmd: str, *, recognize_speech=True) -> GoogleResponse:
54     """Send a command string to Google via the google_assistant_bridge as the
55     user google_assistant_username and return the response.  If recognize_speech
56     is True, perform speech recognition on the audio response from Google so as
57     to translate it into text (best effort, YMMV).
58     """
59     logging.debug(f"Asking google: '{cmd}'")
60     payload = {
61         "command": cmd,
62         "user": config.config['google_assistant_username'],
63     }
64     url = f"{config.config['google_assistant_bridge']}/assistant"
65     r = requests.post(url, json=payload)
66     success = False
67     response = ""
68     audio = ""
69     audio_transcription = ""
70     if r.status_code == 200:
71         j = r.json()
72         success = bool(j["success"])
73         response = j["response"] if success else j["error"]
74         if success:
75             logger.debug('Google request succeeded.')
76             if len(response) > 0:
77                 logger.debug(f"Google said: '{response}'")
78         audio = f"{config.config['google_assistant_bridge']}{j['audio']}"
79         if recognize_speech:
80             recognizer = sr.Recognizer()
81             r = requests.get(audio)
82             if r.status_code == 200:
83                 raw = r.content
84                 speech = sr.AudioData(
85                     frame_data=raw,
86                     sample_rate=24000,
87                     sample_width=2,
88                 )
89                 try:
90                     audio_transcription = recognizer.recognize_google(
91                         speech,
92                     )
93                     logger.debug(f"Transcription: '{audio_transcription}'")
94                 except sr.UnknownValueError as e:
95                     logger.exception(e)
96                     logger.warning('Unable to parse Google assistant\'s response.')
97                     audio_transcription = None
98         return GoogleResponse(
99             success=success,
100             response=response,
101             audio_url=audio,
102             audio_transcription=audio_transcription,
103         )
104     else:
105         message = f'HTTP request to {url} with {payload} failed; code {r.status_code}'
106         logger.error(message)
107         return GoogleResponse(
108             success=False,
109             response=message,
110             audio_url=audio,
111             audio_transcription=audio_transcription,
112         )
113         sys.exit(-1)