Used isort to sort imports. Also added to the git pre-commit hook.
[python_utils.git] / google_assistant.py
1 #!/usr/bin/env python3
2
3 import logging
4 import sys
5 import warnings
6 from typing import NamedTuple, Optional
7
8 import requests
9 import speech_recognition as sr  # type: ignore
10
11 import config
12
13 logger = logging.getLogger(__name__)
14
15 parser = config.add_commandline_args(
16     f"Google Assistant ({__file__})",
17     "Args related to contacting the Google Assistant",
18 )
19 parser.add_argument(
20     "--google_assistant_bridge",
21     type=str,
22     default="http://kiosk.house:3000",
23     metavar="URL",
24     help="How to contact the Google Assistant bridge",
25 )
26 parser.add_argument(
27     "--google_assistant_username",
28     type=str,
29     metavar="GOOGLE_ACCOUNT",
30     default="scott.gasch",
31     help="The user account for talking to Google Assistant",
32 )
33
34
35 class GoogleResponse(NamedTuple):
36     success: bool
37     response: str
38     audio_url: str
39     audio_transcription: Optional[str]  # None if not available.
40
41     def __repr__(self):
42         return f"""
43 success: {self.success}
44 response: {self.response}
45 audio_transcription: {self.audio_transcription}
46 audio_url: {self.audio_url}"""
47
48
49 def tell_google(cmd: str, *, recognize_speech=True) -> GoogleResponse:
50     """Alias for ask_google."""
51     return ask_google(cmd, recognize_speech=recognize_speech)
52
53
54 def ask_google(cmd: str, *, recognize_speech=True) -> GoogleResponse:
55     """Send a command string to Google via the google_assistant_bridge as the
56     user google_assistant_username and return the response.  If recognize_speech
57     is True, perform speech recognition on the audio response from Google so as
58     to translate it into text (best effort, YMMV).
59     """
60     logging.debug(f"Asking google: '{cmd}'")
61     payload = {
62         "command": cmd,
63         "user": config.config['google_assistant_username'],
64     }
65     url = f"{config.config['google_assistant_bridge']}/assistant"
66     r = requests.post(url, json=payload)
67     success = False
68     response = ""
69     audio = ""
70     audio_transcription: Optional[str] = ""
71     if r.status_code == 200:
72         j = r.json()
73         success = bool(j["success"])
74         response = j["response"] if success else j["error"]
75         if success:
76             logger.debug('Google request succeeded.')
77             if len(response) > 0:
78                 logger.debug(f"Google said: '{response}'")
79         audio = f"{config.config['google_assistant_bridge']}{j['audio']}"
80         if recognize_speech:
81             recognizer = sr.Recognizer()
82             r = requests.get(audio)
83             if r.status_code == 200:
84                 raw = r.content
85                 speech = sr.AudioData(
86                     frame_data=raw,
87                     sample_rate=24000,
88                     sample_width=2,
89                 )
90                 try:
91                     audio_transcription = recognizer.recognize_google(
92                         speech,
93                     )
94                     logger.debug(f"Transcription: '{audio_transcription}'")
95                 except sr.UnknownValueError as e:
96                     logger.exception(e)
97                     msg = 'Unable to parse Google assistant\'s response.'
98                     logger.warning(msg)
99                     warnings.warn(msg, stacklevel=3)
100                     audio_transcription = None
101         return GoogleResponse(
102             success=success,
103             response=response,
104             audio_url=audio,
105             audio_transcription=audio_transcription,
106         )
107     else:
108         message = f'HTTP request to {url} with {payload} failed; code {r.status_code}'
109         logger.error(message)
110         return GoogleResponse(
111             success=False,
112             response=message,
113             audio_url=audio,
114             audio_transcription=audio_transcription,
115         )
116         sys.exit(-1)