Merge branch 'master' of ssh://git.house:/usr/local/git/base/kiosk
[kiosk.git] / listen.py
1 #!/usr/bin/env python3
2
3 import logging
4 import os
5 import struct
6
7 import pvporcupine
8 import pyaudio
9 import speech_recognition as sr
10 from pyutils import logging_utils
11
12 logger = logging.getLogger(__name__)
13
14
15 class HotwordListener(object):
16     def __init__(
17         self,
18         command_queue,
19         keyword_paths,
20         sensitivities,
21         input_device_index=None,
22         library_path=pvporcupine.LIBRARY_PATH,
23         model_path=pvporcupine.MODEL_PATH,
24     ):
25         self._queue = command_queue
26         self._library_path = library_path
27         self._model_path = model_path
28         self._keyword_paths = keyword_paths
29         self._sensitivities = sensitivities
30         self._input_device_index = input_device_index
31
32     @logging_utils.LoggingContext(logger, prefix="listener:")
33     def listen_forever(self):
34         keywords = list()
35         for x in self._keyword_paths:
36             keywords.append(os.path.basename(x).replace(".ppn", "").split("_")[0])
37
38         porcupine = None
39         pa = None
40         audio_stream = None
41         try:
42             porcupine = pvporcupine.create(
43                 library_path=self._library_path,
44                 model_path=self._model_path,
45                 keyword_paths=self._keyword_paths,
46                 sensitivities=self._sensitivities,
47             )
48             recognizer = sr.Recognizer()
49             pa = pyaudio.PyAudio()
50
51             audio_stream = pa.open(
52                 rate=porcupine.sample_rate,
53                 channels=1,
54                 format=pyaudio.paInt16,
55                 input=True,
56                 frames_per_buffer=porcupine.frame_length,
57                 input_device_index=self._input_device_index,
58             )
59
60             logger.info("Listening {")
61             for keyword, sensitivity in zip(keywords, self._sensitivities):
62                 logger.info("  %s (%.2f)" % (keyword, sensitivity))
63             logger.info("}")
64
65             while True:
66                 raw = audio_stream.read(
67                     porcupine.frame_length, exception_on_overflow=False
68                 )
69                 pcm = struct.unpack_from("h" * porcupine.frame_length, raw)
70                 result = porcupine.process(pcm)
71                 if result >= 0:
72                     cmd = "aplay /var/www/kiosk/attention.wav"
73                     logger.info(
74                         "Running %s (attention tone) because I heard the wake-word", cmd
75                     )
76                     os.system(cmd)
77                     logger.debug(
78                         ">>>>>>>>>>>>> Detected wakeword %s" % keywords[result]
79                     )
80                     raw = bytearray()
81                     for i in range(
82                         0, int(porcupine.sample_rate / porcupine.frame_length * 4)
83                     ):
84                         raw += audio_stream.read(
85                             porcupine.frame_length, exception_on_overflow=False
86                         )
87                     logger.debug(
88                         f">>>>>>>>>>>>>> Recognizing command... {len(raw)} bytes"
89                     )
90                     speech = sr.AudioData(
91                         frame_data=bytes(raw),
92                         sample_rate=porcupine.sample_rate,
93                         sample_width=2,  # 16 bits
94                     )
95                     command = recognizer.recognize_google(speech)
96                     logger.debug(">>>>>>>>>>>>> Google says command was %s" % command)
97                     logger.info("Enqueued command=%s", command)
98                     self._queue.put(command)
99
100         except Exception:
101             logger.exception("Stopping listener because of unexpected exception!")
102
103         except KeyboardInterrupt:
104             logger.exception("Stopping listener because of ^C!")
105
106         finally:
107             logger.debug("Cleaning up... one sec...")
108             if porcupine is not None:
109                 porcupine.delete()
110
111             if audio_stream is not None:
112                 audio_stream.close()
113
114             if pa is not None:
115                 pa.terminate()
116
117     @classmethod
118     def show_audio_devices(cls):
119         fields = ("index", "name", "defaultSampleRate", "maxInputChannels")
120         pa = pyaudio.PyAudio()
121         for i in range(pa.get_device_count()):
122             info = pa.get_device_info_by_index(i)
123             print(", ".join("'%s': '%s'" % (k, str(info[k])) for k in fields))
124         pa.terminate()
125
126
127 def main():
128     keyword_paths = [pvporcupine.KEYWORD_PATHS[x] for x in ["blueberry", "bumblebee"]]
129     sensitivities = [0.85, 0.95]
130     HotwordListener(
131         [],
132         keyword_paths,
133         sensitivities,
134     ).listen_forever()
135
136
137 if __name__ == "__main__":
138     main()