From 0499cd8a24044c6af4192876ff43afea23d99810 Mon Sep 17 00:00:00 2001 From: Twirre Meulenbelt <43213592+TwirreM@users.noreply.github.com> Date: Wed, 22 Oct 2025 15:10:27 +0200 Subject: [PATCH] feat: send audio AudioSender runs in a separate thread to send audio from the microphone. ref: N25B-119 --- src/robot_interface/endpoints/audio_sender.py | 66 ++++++++++++++++ src/robot_interface/main.py | 6 ++ src/robot_interface/utils/microphone.py | 2 +- test/common/__init__.py | 0 test/common/microphone_utils.py | 2 - test/integration/__init__.py | 0 test/unit/test_audio_sender.py | 77 +++++++++++++++++++ 7 files changed, 150 insertions(+), 3 deletions(-) create mode 100644 src/robot_interface/endpoints/audio_sender.py create mode 100644 test/common/__init__.py create mode 100644 test/integration/__init__.py create mode 100644 test/unit/test_audio_sender.py diff --git a/src/robot_interface/endpoints/audio_sender.py b/src/robot_interface/endpoints/audio_sender.py new file mode 100644 index 0000000..5cd5a6b --- /dev/null +++ b/src/robot_interface/endpoints/audio_sender.py @@ -0,0 +1,66 @@ +from __future__ import unicode_literals # So that `logging` can use Unicode characters in names +import threading +import logging + +import pyaudio +import zmq + +from robot_interface.endpoints.socket_base import SocketBase +from robot_interface.state import state +from robot_interface.utils.microphone import choose_mic_default + + +logger = logging.getLogger(__name__) + + +class AudioSender(SocketBase): + def __init__(self, zmq_context, port=5558): + super(AudioSender, self).__init__(str("audio")) # Convert future's unicode_literal to str + self.create_socket(zmq_context, zmq.PUB, port) + self.audio = pyaudio.PyAudio() + self.microphone = choose_mic_default(self.audio) + self.thread = None + + def start(self): + """ + Start sending audio in a different thread. + """ + if not self.microphone: + logger.info("Not listening: no microphone available.") + return + + logger.info("Listening with microphone \"{}\".".format(self.microphone["name"])) + self.thread = threading.Thread(target=self._stream) + self.thread.start() + + def wait_until_done(self): + """ + Wait until the audio thread is done. Will only be done if `state.exit_event` is set, so + make sure to set that before calling this method or it will block. + """ + if not self.thread: return + self.thread.join() + self.thread = None + + def _stream(self): + chunk = 512 # 320 at 16000 Hz is 20ms, 512 is required for Silero-VAD + + # Docs say this only raises an error if neither `input` nor `output` is True + stream = self.audio.open( + format=pyaudio.paFloat32, + channels=1, + rate=16000, + input=True, + input_device_index=self.microphone["index"], + frames_per_buffer=chunk, + ) + + try: + while not state.exit_event.is_set(): + data = stream.read(chunk) + self.socket.send(data) + except IOError as e: + logger.error("Stopped listening: failed to get audio from microphone.", exc_info=e) + finally: + stream.stop_stream() + stream.close() diff --git a/src/robot_interface/main.py b/src/robot_interface/main.py index 934dfd3..8874f7d 100644 --- a/src/robot_interface/main.py +++ b/src/robot_interface/main.py @@ -1,4 +1,7 @@ import logging + +from robot_interface.endpoints.audio_sender import AudioSender + logging.basicConfig(level=logging.DEBUG) import zmq @@ -25,8 +28,11 @@ def main_loop(context): video_sender = VideoSender(context) state.sockets.append(video_sender) + audio_sender = AudioSender(context) + state.sockets.append(audio_sender) video_sender.start_video_rcv() + audio_sender.start() # Sockets that can run on the main thread. These sockets' endpoints should not block for long (say 50 ms at most). receivers = [main_receiver, actuation_receiver] diff --git a/src/robot_interface/utils/microphone.py b/src/robot_interface/utils/microphone.py index 769f9a6..c37ed0b 100644 --- a/src/robot_interface/utils/microphone.py +++ b/src/robot_interface/utils/microphone.py @@ -1,4 +1,4 @@ -from __future__ import unicode_literals # So that `print` can print the Unicode strings in names +from __future__ import unicode_literals # So that `print` can print Unicode characters in names import logging logger = logging.getLogger(__name__) diff --git a/test/common/__init__.py b/test/common/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/common/microphone_utils.py b/test/common/microphone_utils.py index 70bcb84..7ecbf27 100644 --- a/test/common/microphone_utils.py +++ b/test/common/microphone_utils.py @@ -2,8 +2,6 @@ import random import sys from StringIO import StringIO -import mock - from robot_interface.utils.microphone import choose_mic_default, choose_mic_interactive, get_microphones diff --git a/test/integration/__init__.py b/test/integration/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/unit/test_audio_sender.py b/test/unit/test_audio_sender.py new file mode 100644 index 0000000..9aab86f --- /dev/null +++ b/test/unit/test_audio_sender.py @@ -0,0 +1,77 @@ +# coding=utf-8 +import os +import time + +import mock +import pytest +import zmq + +from robot_interface.endpoints.audio_sender import AudioSender + + +@pytest.fixture +def zmq_context(): + context = zmq.Context() + yield context + + +def test_no_microphone(zmq_context, mocker): + mock_info_logger = mocker.patch("robot_interface.endpoints.audio_sender.logger.info") + mock_choose_mic = mocker.patch("robot_interface.endpoints.audio_sender.choose_mic_default") + mock_choose_mic.return_value = None + + sender = AudioSender(zmq_context) + assert sender.microphone is None + + sender.start() + assert sender.thread is None + mock_info_logger.assert_called() + + sender.wait_until_done() # Should return early because we didn't start a thread + + +def test_unicode_mic_name(zmq_context, mocker): + mocker.patch("robot_interface.endpoints.audio_sender.threading") + mock_choose_mic = mocker.patch("robot_interface.endpoints.audio_sender.choose_mic_default") + mock_choose_mic.return_value = {"name": u"• Some Unicode name"} + + sender = AudioSender(zmq_context) + assert sender.microphone is not None + + # `.start()` logs the name of the microphone. It should not give an error if it contains Unicode + # symbols. + sender.start() + assert sender.thread is not None + + sender.wait_until_done() # Should return instantly because we didn't start a real thread + + +def _fake_read(num_frames): + return os.urandom(num_frames * 4) + + +def test_sending_audio(mocker): + mock_choose_mic = mocker.patch("robot_interface.endpoints.audio_sender.choose_mic_default") + mock_choose_mic.return_value = {"name": u"Some mic", "index": 0L} + + mock_state = mocker.patch("robot_interface.endpoints.audio_sender.state") + mock_state.exit_event.is_set.return_value = False + + mock_audio = mocker.patch("robot_interface.endpoints.audio_sender.pyaudio") + mock_audio.PyAudio = mock.Mock() + stream = mock.Mock() + stream.read = _fake_read + mock_audio.PyAudio.open.return_value = stream + + mock_zmq_context = mock.Mock() + send_socket = mock.Mock() + + sender = AudioSender(mock_zmq_context) + sender.socket.send = send_socket + + sender.start() + time.sleep(0.01) + mock_state.exit_event.is_set.return_value = True + sender.wait_until_done() + + send_socket.assert_called()