Add doctests to some of this stuff.
[python_utils.git] / google_assistant.py
1 #!/usr/bin/env python3
2
3 import logging
4 from typing import NamedTuple
5
6 import requests
7 import speech_recognition as sr  # type: ignore
8
9 import config
10
11 logger = logging.getLogger(__name__)
12
13 parser = config.add_commandline_args(
14     f"Google Assistant ({__file__})",
15     "Args related to contacting the Google Assistant",
16 )
17 parser.add_argument(
18     "--google_assistant_bridge",
19     type=str,
20     default="http://kiosk.house:3000",
21     metavar="URL",
22     help="How to contact the Google Assistant bridge"
23 )
24 parser.add_argument(
25     "--google_assistant_username",
26     type=str,
27     metavar="GOOGLE_ACCOUNT",
28     default="scott.gasch",
29     help="The user account for talking to Google Assistant"
30 )
31
32
33 class GoogleResponse(NamedTuple):
34     success: bool
35     response: str
36     audio_url: str
37     audio_transcription: str
38
39     def __repr__(self):
40         return f"""
41 success: {self.success}
42 response: {self.response}
43 audio_transcription: {self.audio_transcription}
44 audio_url: {self.audio_url}"""
45
46
47 def tell_google(cmd: str, *, recognize_speech=True) -> GoogleResponse:
48     """Alias for ask_google."""
49     return ask_google(cmd, recognize_speech=recognize_speech)
50
51
52 def ask_google(cmd: str, *, recognize_speech=True) -> GoogleResponse:
53     """Send a command string to Google via the google_assistant_bridge as the
54     user google_assistant_username and return the response.  If recognize_speech
55     is True, perform speech recognition on the audio response from Google so as
56     to translate it into text (best effort, YMMV).
57     """
58     logging.debug(f"Asking google: '{cmd}'")
59     payload = {
60         "command": cmd,
61         "user": config.config['google_assistant_username'],
62     }
63     url = f"{config.config['google_assistant_bridge']}/assistant"
64     r = requests.post(url, json=payload)
65     success = False
66     response = ""
67     audio = ""
68     audio_transcription = ""
69     if r.status_code == 200:
70         j = r.json()
71         success = bool(j["success"])
72         response = j["response"] if success else j["error"]
73         if success:
74             logger.debug('Google request succeeded.')
75             if len(response) > 0:
76                 logger.debug(f"Google said: '{response}'")
77         audio = f"{config.config['google_assistant_bridge']}{j['audio']}"
78         if recognize_speech:
79             recognizer = sr.Recognizer()
80             r = requests.get(audio)
81             if r.status_code == 200:
82                 raw = r.content
83                 speech = sr.AudioData(
84                     frame_data=raw,
85                     sample_rate=24000,
86                     sample_width=2,
87                 )
88                 try:
89                     audio_transcription = recognizer.recognize_google(
90                         speech,
91                     )
92                     logger.debug(f"Transcription: '{audio_transcription}'")
93                 except sr.UnknownValueError as e:
94                     logger.exception(e)
95                     logger.warning('Unable to parse Google assistant\'s response.')
96                     audio_transcription = None
97     else:
98         logger.error(
99             f'HTTP request to {url} with {payload} failed; code {r.status_code}'
100         )
101     return GoogleResponse(
102         success=success,
103         response=response,
104         audio_url=audio,
105         audio_transcription=audio_transcription,
106     )