feat: send audio

AudioSender runs in a separate thread to send audio from the microphone. ref: N25B-119
2025-10-22 15:10:27 +02:00
parent f8db719bfa
commit 0499cd8a24
7 changed files with 150 additions and 3 deletions
--- a/src/robot_interface/endpoints/audio_sender.py
+++ b/src/robot_interface/endpoints/audio_sender.py
@@ -0,0 +1,66 @@
+from __future__ import unicode_literals  # So that `logging` can use Unicode characters in names
+import threading
+import logging
+
+import pyaudio
+import zmq
+
+from robot_interface.endpoints.socket_base import SocketBase
+from robot_interface.state import state
+from robot_interface.utils.microphone import choose_mic_default
+
+
+logger = logging.getLogger(__name__)
+
+
+class AudioSender(SocketBase):
+    def __init__(self, zmq_context, port=5558):
+        super(AudioSender, self).__init__(str("audio"))  # Convert future's unicode_literal to str
+        self.create_socket(zmq_context, zmq.PUB, port)
+        self.audio = pyaudio.PyAudio()
+        self.microphone = choose_mic_default(self.audio)
+        self.thread = None
+
+    def start(self):
+        """
+        Start sending audio in a different thread.
+        """
+        if not self.microphone:
+            logger.info("Not listening: no microphone available.")
+            return
+
+        logger.info("Listening with microphone \"{}\".".format(self.microphone["name"]))
+        self.thread = threading.Thread(target=self._stream)
+        self.thread.start()
+
+    def wait_until_done(self):
+        """
+        Wait until the audio thread is done. Will only be done if `state.exit_event` is set, so
+        make sure to set that before calling this method or it will block.
+        """
+        if not self.thread: return
+        self.thread.join()
+        self.thread = None
+
+    def _stream(self):
+        chunk = 512  # 320 at 16000 Hz is 20ms, 512 is required for Silero-VAD
+
+        # Docs say this only raises an error if neither `input` nor `output` is True
+        stream = self.audio.open(
+            format=pyaudio.paFloat32,
+            channels=1,
+            rate=16000,
+            input=True,
+            input_device_index=self.microphone["index"],
+            frames_per_buffer=chunk,
+        )
+
+        try:
+            while not state.exit_event.is_set():
+                data = stream.read(chunk)
+                self.socket.send(data)
+        except IOError as e:
+            logger.error("Stopped listening: failed to get audio from microphone.", exc_info=e)
+        finally:
+            stream.stop_stream()
+            stream.close()
--- a/src/robot_interface/main.py
+++ b/src/robot_interface/main.py
@@ -1,4 +1,7 @@
 import logging
+
+from robot_interface.endpoints.audio_sender import AudioSender
+
 logging.basicConfig(level=logging.DEBUG)

 import zmq
@@ -25,8 +28,11 @@ def main_loop(context):

    video_sender = VideoSender(context)
    state.sockets.append(video_sender)
+    audio_sender = AudioSender(context)
+    state.sockets.append(audio_sender)

    video_sender.start_video_rcv()
+    audio_sender.start()

    # Sockets that can run on the main thread. These sockets' endpoints should not block for long (say 50 ms at most).
    receivers = [main_receiver, actuation_receiver]
--- a/src/robot_interface/utils/microphone.py
+++ b/src/robot_interface/utils/microphone.py
@@ -1,4 +1,4 @@
-from __future__ import unicode_literals  # So that `print` can print the Unicode strings in names
+from __future__ import unicode_literals  # So that `print` can print Unicode characters in names
 import logging

 logger = logging.getLogger(__name__)