From 0f60f67ab98d4c790aece28ce48fb4ac8212f3cb Mon Sep 17 00:00:00 2001 From: Twirre Meulenbelt <43213592+TwirreM@users.noreply.github.com> Date: Wed, 22 Oct 2025 11:44:51 +0200 Subject: [PATCH 01/11] feat: add microphone selection utils Providing two functions, one to choose the default microphone, the other to choose a microphone interactively. With tests. ref: N25B-119 --- src/robot_interface/utils/microphone.py | 54 ++++++++++++ test/unit/test_microphone_utils.py | 107 ++++++++++++++++++++++++ 2 files changed, 161 insertions(+) create mode 100644 src/robot_interface/utils/microphone.py create mode 100644 test/unit/test_microphone_utils.py diff --git a/src/robot_interface/utils/microphone.py b/src/robot_interface/utils/microphone.py new file mode 100644 index 0000000..c8f5ee3 --- /dev/null +++ b/src/robot_interface/utils/microphone.py @@ -0,0 +1,54 @@ +import logging + +logger = logging.getLogger(__name__) + + +def choose_mic_interactive(audio): + """ + Choose a microphone to use, interactively in the CLI. + + :param audio: An instance of PyAudio to use. + :type audio: pyaudio.PyAudio + + :return: A dictionary from PyAudio containing information about the microphone to use, or None + if there is no microphone. + :rtype: dict | None + """ + device_count = audio.get_device_count() + if device_count == 0: return None + + print("Found {} audio devices:".format(device_count)) + for i in range(device_count): + print("- {}: {}".format(i, audio.get_device_info_by_index(i)["name"])) + + microphone_index = None + while microphone_index is None: + chosen = raw_input("Which device would you like to use?\n> ") + try: + chosen = int(chosen) + if chosen < 0 or chosen >= device_count: raise ValueError() + microphone_index = chosen + except ValueError: + print("Please enter a number between 0 and {}".format(device_count-1)) + + chosen_microphone = audio.get_device_info_by_index(microphone_index) + logger.info("Chose microphone \"{}\"".format(chosen_microphone["name"])) + return chosen_microphone + + +def choose_mic_default(audio): + """ + Get the system's default microphone to use. + + :param audio: An instance of PyAudio to use. + :type audio: pyaudio.PyAudio + + :return: A dictionary from PyAudio containing information about the microphone to use, or None + if there is no microphone. + :rtype: dict | None + """ + device_count = audio.get_device_count() + if device_count == 0: return None + + default_device = audio.get_default_input_device_info() + return default_device diff --git a/test/unit/test_microphone_utils.py b/test/unit/test_microphone_utils.py new file mode 100644 index 0000000..0114f73 --- /dev/null +++ b/test/unit/test_microphone_utils.py @@ -0,0 +1,107 @@ +import functools +import random +from StringIO import StringIO +import sys + +import pyaudio +import pytest + +from robot_interface.utils.microphone import choose_mic_default, choose_mic_interactive + + +@pytest.fixture +def pyaudio_instance(): + audio = pyaudio.PyAudio() + yield audio + + +def test_choose_mic_default(pyaudio_instance): + """ + The result must contain at least "index", as this is used to identify the microphone. + The "name" is used for logging, so it should also exist. + It must have one or more channels. + Lastly it must be capable of sending at least 16000 samples per second. + """ + result = choose_mic_default(pyaudio_instance) + assert "index" in result + assert isinstance(result["index"], long) + + assert "name" in result + assert isinstance(result["name"], (str, unicode)) + + assert "maxInputChannels" in result + assert isinstance(result["maxInputChannels"], long) + assert result["maxInputChannels"] > 0 + + assert "defaultSampleRate" in result + assert isinstance(result["defaultSampleRate"], float) + assert result["defaultSampleRate"] >= 16000 + + +def test_choose_mic_interactive_input_not_int(pyaudio_instance, mocker): + """ + First mock an input that's not an integer, then a valid integer. There should be no errors. + """ + mock_input = mocker.patch("__builtin__.raw_input", side_effect=["not an integer", "0"]) + fake_out = StringIO() + mocker.patch.object(sys, "stdout", fake_out) + + result = choose_mic_interactive(pyaudio_instance) + assert "index" in result + assert isinstance(result["index"], (int, long)) + assert result["index"] == 0 + + assert mock_input.called + + assert any(p.startswith("Please enter a number") for p in fake_out.getvalue().splitlines()) + + +def test_choose_mic_interactive_negative_index(pyaudio_instance, mocker): + """ + Make sure that the interactive method does not allow negative integers as input. + """ + mock_input = mocker.patch("__builtin__.raw_input", side_effect=["-1", "0"]) + fake_out = StringIO() + mocker.patch.object(sys, "stdout", fake_out) + + result = choose_mic_interactive(pyaudio_instance) + assert "index" in result + assert isinstance(result["index"], (int, long)) + assert result["index"] == 0 + + assert mock_input.called + + assert any(p.startswith("Please enter a number") for p in fake_out.getvalue().splitlines()) + + +def test_choose_mic_interactive_index_too_high(pyaudio_instance, mocker): + """ + Make sure that the interactive method does not allow indices higher than the highest mic index. + """ + real_count = pyaudio_instance.get_device_count() + mock_input = mocker.patch("__builtin__.raw_input", side_effect=[str(real_count), "0"]) + fake_out = StringIO() + mocker.patch.object(sys, "stdout", fake_out) + + result = choose_mic_interactive(pyaudio_instance) + assert "index" in result + assert isinstance(result["index"], (int, long)) + assert result["index"] == 0 + + assert mock_input.called + + assert any(p.startswith("Please enter a number") for p in fake_out.getvalue().splitlines()) + + +def test_choose_mic_interactive_random_index(pyaudio_instance, mocker): + """ + Get a random index from the list of available mics, make sure it's correct. + """ + real_count = pyaudio_instance.get_device_count() + random_index = random.randrange(real_count) + mocker.patch("__builtin__.raw_input", side_effect=[str(random_index)]) + + result = choose_mic_interactive(pyaudio_instance) + assert "index" in result + assert isinstance(result["index"], (int, long)) + assert result["index"] == random_index From 1e3e077029a60febd09e1ea669bde566514bb67f Mon Sep 17 00:00:00 2001 From: Twirre Meulenbelt <43213592+TwirreM@users.noreply.github.com> Date: Wed, 22 Oct 2025 13:24:46 +0200 Subject: [PATCH 02/11] fix: disallow selecting non-microphone audio device Previously any audio device was allowed to be selected as microphone. Now, only ones with at least one input channel can be selected. ref: N25B-119 --- src/robot_interface/utils/microphone.py | 47 ++++++++++++++++--------- 1 file changed, 31 insertions(+), 16 deletions(-) diff --git a/src/robot_interface/utils/microphone.py b/src/robot_interface/utils/microphone.py index c8f5ee3..769f9a6 100644 --- a/src/robot_interface/utils/microphone.py +++ b/src/robot_interface/utils/microphone.py @@ -1,8 +1,25 @@ +from __future__ import unicode_literals # So that `print` can print the Unicode strings in names import logging logger = logging.getLogger(__name__) +def get_microphones(audio): + """ + Get audio devices which have input channels. + + :param audio: An instance of PyAudio to use. + :type audio: pyaudio.PyAudio + + :return: An interator of PaAudio dicts containing information about the microphone devices. + :rtype: Iterator[dict] + """ + for i in range(audio.get_device_count()): + device = audio.get_device_info_by_index(i) + if device["maxInputChannels"] > 0: + yield device + + def choose_mic_interactive(audio): """ Choose a microphone to use, interactively in the CLI. @@ -14,24 +31,23 @@ def choose_mic_interactive(audio): if there is no microphone. :rtype: dict | None """ - device_count = audio.get_device_count() - if device_count == 0: return None + microphones = list(get_microphones(audio)) + if len(microphones) == 0: return None - print("Found {} audio devices:".format(device_count)) - for i in range(device_count): - print("- {}: {}".format(i, audio.get_device_info_by_index(i)["name"])) + print("Found {} microphones:".format(len(microphones))) + for i, mic in enumerate(microphones): + print("- {}: {}".format(i, mic["name"])) - microphone_index = None - while microphone_index is None: + chosen_microphone = None + while chosen_microphone is None: chosen = raw_input("Which device would you like to use?\n> ") try: chosen = int(chosen) - if chosen < 0 or chosen >= device_count: raise ValueError() - microphone_index = chosen + if chosen < 0 or chosen >= len(microphones): raise ValueError() + chosen_microphone = microphones[chosen] except ValueError: - print("Please enter a number between 0 and {}".format(device_count-1)) + print("Please enter a number between 0 and {}".format(len(microphones)-1)) - chosen_microphone = audio.get_device_info_by_index(microphone_index) logger.info("Chose microphone \"{}\"".format(chosen_microphone["name"])) return chosen_microphone @@ -47,8 +63,7 @@ def choose_mic_default(audio): if there is no microphone. :rtype: dict | None """ - device_count = audio.get_device_count() - if device_count == 0: return None - - default_device = audio.get_default_input_device_info() - return default_device + try: + return audio.get_default_input_device_info() + except IOError: + return None From f8db719bfa5cac2366334e9e6aacd739aa991746 Mon Sep 17 00:00:00 2001 From: Twirre Meulenbelt <43213592+TwirreM@users.noreply.github.com> Date: Wed, 22 Oct 2025 13:27:35 +0200 Subject: [PATCH 03/11] test: unit test mock PyAudio, integration test use real Make unit tests use a mock version of PyAudio, while making integration tests using the real version. If no real microphone is available, these integration tests are skipped. ref: N25B-119 --- test/common/microphone_utils.py | 97 +++++++++++++ test/integration/test_microphone_utils.py | 20 +++ test/unit/test_microphone_utils.py | 160 ++++++++++------------ 3 files changed, 186 insertions(+), 91 deletions(-) create mode 100644 test/common/microphone_utils.py create mode 100644 test/integration/test_microphone_utils.py diff --git a/test/common/microphone_utils.py b/test/common/microphone_utils.py new file mode 100644 index 0000000..70bcb84 --- /dev/null +++ b/test/common/microphone_utils.py @@ -0,0 +1,97 @@ +import random +import sys +from StringIO import StringIO + +import mock + +from robot_interface.utils.microphone import choose_mic_default, choose_mic_interactive, get_microphones + + +class MicrophoneUtils(object): + """Shared tests for any PyAudio-like implementation, e.g. mock and real.""" + + def test_choose_mic_default(self, pyaudio_instance): + """ + The result must contain at least "index", as this is used to identify the microphone. + The "name" is used for logging, so it should also exist. + It must have one or more channels. + Lastly it must be capable of sending at least 16000 samples per second. + """ + result = choose_mic_default(pyaudio_instance) + assert "index" in result + assert isinstance(result["index"], (int, long)) + + assert "name" in result + assert isinstance(result["name"], (str, unicode)) + + assert "maxInputChannels" in result + assert isinstance(result["maxInputChannels"], (int, long)) + assert result["maxInputChannels"] > 0 + + assert "defaultSampleRate" in result + assert isinstance(result["defaultSampleRate"], float) + assert result["defaultSampleRate"] >= 16000 + + def test_choose_mic_interactive_input_not_int(self, pyaudio_instance, mocker): + """ + First mock an input that's not an integer, then a valid integer. There should be no errors. + """ + mock_input = mocker.patch("__builtin__.raw_input", side_effect=["not an integer", "0"]) + fake_out = StringIO() + mocker.patch.object(sys, "stdout", fake_out) + + result = choose_mic_interactive(pyaudio_instance) + assert "index" in result + assert isinstance(result["index"], (int, long)) + assert result["index"] == 0 + + assert mock_input.called + + assert any(p.startswith("Please enter a number") for p in fake_out.getvalue().splitlines()) + + def test_choose_mic_interactive_negative_index(self, pyaudio_instance, mocker): + """ + Make sure that the interactive method does not allow negative integers as input. + """ + mock_input = mocker.patch("__builtin__.raw_input", side_effect=["-1", "0"]) + fake_out = StringIO() + mocker.patch.object(sys, "stdout", fake_out) + + result = choose_mic_interactive(pyaudio_instance) + assert "index" in result + assert isinstance(result["index"], (int, long)) + assert result["index"] == 0 + + assert mock_input.called + + assert any(p.startswith("Please enter a number") for p in fake_out.getvalue().splitlines()) + + def test_choose_mic_interactive_index_too_high(self, pyaudio_instance, mocker): + """ + Make sure that the interactive method does not allow indices higher than the highest mic index. + """ + real_count = len(list(get_microphones(pyaudio_instance))) + mock_input = mocker.patch("__builtin__.raw_input", side_effect=[str(real_count), "0"]) + fake_out = StringIO() + mocker.patch.object(sys, "stdout", fake_out) + + result = choose_mic_interactive(pyaudio_instance) + assert "index" in result + assert isinstance(result["index"], (int, long)) + + assert mock_input.called + + assert any(p.startswith("Please enter a number") for p in fake_out.getvalue().splitlines()) + + def test_choose_mic_interactive_random_index(self, pyaudio_instance, mocker): + """ + Get a random index from the list of available mics, make sure it's correct. + """ + microphones = list(get_microphones(pyaudio_instance)) + random_index = random.randrange(len(microphones)) + mocker.patch("__builtin__.raw_input", side_effect=[str(random_index)]) + + result = choose_mic_interactive(pyaudio_instance) + assert "index" in result + assert isinstance(result["index"], (int, long)) + assert result["index"] == microphones[random_index]["index"] diff --git a/test/integration/test_microphone_utils.py b/test/integration/test_microphone_utils.py new file mode 100644 index 0000000..a857498 --- /dev/null +++ b/test/integration/test_microphone_utils.py @@ -0,0 +1,20 @@ +import pyaudio + +import pytest + +from common.microphone_utils import MicrophoneUtils + + +@pytest.fixture +def pyaudio_instance(): + audio = pyaudio.PyAudio() + try: + audio.get_default_input_device_info() + return audio + except IOError: + pytest.skip("No microphone available to test with.") + + +class TestAudioIntegration(MicrophoneUtils): + """Run shared audio behavior tests with the mock implementation.""" + pass diff --git a/test/unit/test_microphone_utils.py b/test/unit/test_microphone_utils.py index 0114f73..5ad551d 100644 --- a/test/unit/test_microphone_utils.py +++ b/test/unit/test_microphone_utils.py @@ -1,107 +1,85 @@ -import functools -import random -from StringIO import StringIO -import sys - -import pyaudio +# coding=utf-8 +import mock import pytest +from common.microphone_utils import MicrophoneUtils from robot_interface.utils.microphone import choose_mic_default, choose_mic_interactive +class MockPyAudio: + def __init__(self): + # You can predefine fake device info here + self.devices = [ + { + "index": 0, + "name": u"Someone’s Microphone", # Using a Unicode ’ character + "maxInputChannels": 2, + "maxOutputChannels": 0, + "defaultSampleRate": 44100.0, + "defaultLowInputLatency": 0.01, + "defaultLowOutputLatency": 0.01, + "defaultHighInputLatency": 0.1, + "defaultHighOutputLatency": 0.1, + "hostApi": 0, + }, + { + "index": 1, + "name": u"Mock Speaker 1", + "maxInputChannels": 0, + "maxOutputChannels": 2, + "defaultSampleRate": 48000.0, + "defaultLowInputLatency": 0.01, + "defaultLowOutputLatency": 0.01, + "defaultHighInputLatency": 0.1, + "defaultHighOutputLatency": 0.1, + "hostApi": 0, + }, + ] + + def get_device_count(self): + """Return the number of available mock devices.""" + return len(self.devices) + + def get_device_info_by_index(self, index): + """Return information for a given mock device index.""" + if 0 <= index < len(self.devices): + return self.devices[index] + else: + raise IOError("Invalid device index: {}".format(index)) + + def get_default_input_device_info(self): + """Return info for a default mock input device.""" + for device in self.devices: + if device.get("maxInputChannels", 0) > 0: + return device + raise IOError("No default input device found") + + @pytest.fixture def pyaudio_instance(): - audio = pyaudio.PyAudio() - yield audio + return MockPyAudio() -def test_choose_mic_default(pyaudio_instance): - """ - The result must contain at least "index", as this is used to identify the microphone. - The "name" is used for logging, so it should also exist. - It must have one or more channels. - Lastly it must be capable of sending at least 16000 samples per second. - """ - result = choose_mic_default(pyaudio_instance) - assert "index" in result - assert isinstance(result["index"], long) - - assert "name" in result - assert isinstance(result["name"], (str, unicode)) - - assert "maxInputChannels" in result - assert isinstance(result["maxInputChannels"], long) - assert result["maxInputChannels"] > 0 - - assert "defaultSampleRate" in result - assert isinstance(result["defaultSampleRate"], float) - assert result["defaultSampleRate"] >= 16000 +def _raise_io_error(): + raise IOError() -def test_choose_mic_interactive_input_not_int(pyaudio_instance, mocker): - """ - First mock an input that's not an integer, then a valid integer. There should be no errors. - """ - mock_input = mocker.patch("__builtin__.raw_input", side_effect=["not an integer", "0"]) - fake_out = StringIO() - mocker.patch.object(sys, "stdout", fake_out) +class TestAudioUnit(MicrophoneUtils): + """Run shared audio behavior tests with the mock implementation.""" + def test_choose_mic_default_no_mic(self): + mock_pyaudio = mock.Mock() + mock_pyaudio.get_device_count = mock.Mock(return_value=0L) + mock_pyaudio.get_default_input_device_info = _raise_io_error - result = choose_mic_interactive(pyaudio_instance) - assert "index" in result - assert isinstance(result["index"], (int, long)) - assert result["index"] == 0 + result = choose_mic_default(mock_pyaudio) - assert mock_input.called + assert result is None - assert any(p.startswith("Please enter a number") for p in fake_out.getvalue().splitlines()) + def test_choose_mic_interactive_no_mic(self): + mock_pyaudio = mock.Mock() + mock_pyaudio.get_device_count = mock.Mock(return_value=0L) + mock_pyaudio.get_default_input_device_info = _raise_io_error + result = choose_mic_interactive(mock_pyaudio) -def test_choose_mic_interactive_negative_index(pyaudio_instance, mocker): - """ - Make sure that the interactive method does not allow negative integers as input. - """ - mock_input = mocker.patch("__builtin__.raw_input", side_effect=["-1", "0"]) - fake_out = StringIO() - mocker.patch.object(sys, "stdout", fake_out) - - result = choose_mic_interactive(pyaudio_instance) - assert "index" in result - assert isinstance(result["index"], (int, long)) - assert result["index"] == 0 - - assert mock_input.called - - assert any(p.startswith("Please enter a number") for p in fake_out.getvalue().splitlines()) - - -def test_choose_mic_interactive_index_too_high(pyaudio_instance, mocker): - """ - Make sure that the interactive method does not allow indices higher than the highest mic index. - """ - real_count = pyaudio_instance.get_device_count() - mock_input = mocker.patch("__builtin__.raw_input", side_effect=[str(real_count), "0"]) - fake_out = StringIO() - mocker.patch.object(sys, "stdout", fake_out) - - result = choose_mic_interactive(pyaudio_instance) - assert "index" in result - assert isinstance(result["index"], (int, long)) - assert result["index"] == 0 - - assert mock_input.called - - assert any(p.startswith("Please enter a number") for p in fake_out.getvalue().splitlines()) - - -def test_choose_mic_interactive_random_index(pyaudio_instance, mocker): - """ - Get a random index from the list of available mics, make sure it's correct. - """ - real_count = pyaudio_instance.get_device_count() - random_index = random.randrange(real_count) - mocker.patch("__builtin__.raw_input", side_effect=[str(random_index)]) - - result = choose_mic_interactive(pyaudio_instance) - assert "index" in result - assert isinstance(result["index"], (int, long)) - assert result["index"] == random_index + assert result is None From 0499cd8a24044c6af4192876ff43afea23d99810 Mon Sep 17 00:00:00 2001 From: Twirre Meulenbelt <43213592+TwirreM@users.noreply.github.com> Date: Wed, 22 Oct 2025 15:10:27 +0200 Subject: [PATCH 04/11] feat: send audio AudioSender runs in a separate thread to send audio from the microphone. ref: N25B-119 --- src/robot_interface/endpoints/audio_sender.py | 66 ++++++++++++++++ src/robot_interface/main.py | 6 ++ src/robot_interface/utils/microphone.py | 2 +- test/common/__init__.py | 0 test/common/microphone_utils.py | 2 - test/integration/__init__.py | 0 test/unit/test_audio_sender.py | 77 +++++++++++++++++++ 7 files changed, 150 insertions(+), 3 deletions(-) create mode 100644 src/robot_interface/endpoints/audio_sender.py create mode 100644 test/common/__init__.py create mode 100644 test/integration/__init__.py create mode 100644 test/unit/test_audio_sender.py diff --git a/src/robot_interface/endpoints/audio_sender.py b/src/robot_interface/endpoints/audio_sender.py new file mode 100644 index 0000000..5cd5a6b --- /dev/null +++ b/src/robot_interface/endpoints/audio_sender.py @@ -0,0 +1,66 @@ +from __future__ import unicode_literals # So that `logging` can use Unicode characters in names +import threading +import logging + +import pyaudio +import zmq + +from robot_interface.endpoints.socket_base import SocketBase +from robot_interface.state import state +from robot_interface.utils.microphone import choose_mic_default + + +logger = logging.getLogger(__name__) + + +class AudioSender(SocketBase): + def __init__(self, zmq_context, port=5558): + super(AudioSender, self).__init__(str("audio")) # Convert future's unicode_literal to str + self.create_socket(zmq_context, zmq.PUB, port) + self.audio = pyaudio.PyAudio() + self.microphone = choose_mic_default(self.audio) + self.thread = None + + def start(self): + """ + Start sending audio in a different thread. + """ + if not self.microphone: + logger.info("Not listening: no microphone available.") + return + + logger.info("Listening with microphone \"{}\".".format(self.microphone["name"])) + self.thread = threading.Thread(target=self._stream) + self.thread.start() + + def wait_until_done(self): + """ + Wait until the audio thread is done. Will only be done if `state.exit_event` is set, so + make sure to set that before calling this method or it will block. + """ + if not self.thread: return + self.thread.join() + self.thread = None + + def _stream(self): + chunk = 512 # 320 at 16000 Hz is 20ms, 512 is required for Silero-VAD + + # Docs say this only raises an error if neither `input` nor `output` is True + stream = self.audio.open( + format=pyaudio.paFloat32, + channels=1, + rate=16000, + input=True, + input_device_index=self.microphone["index"], + frames_per_buffer=chunk, + ) + + try: + while not state.exit_event.is_set(): + data = stream.read(chunk) + self.socket.send(data) + except IOError as e: + logger.error("Stopped listening: failed to get audio from microphone.", exc_info=e) + finally: + stream.stop_stream() + stream.close() diff --git a/src/robot_interface/main.py b/src/robot_interface/main.py index 934dfd3..8874f7d 100644 --- a/src/robot_interface/main.py +++ b/src/robot_interface/main.py @@ -1,4 +1,7 @@ import logging + +from robot_interface.endpoints.audio_sender import AudioSender + logging.basicConfig(level=logging.DEBUG) import zmq @@ -25,8 +28,11 @@ def main_loop(context): video_sender = VideoSender(context) state.sockets.append(video_sender) + audio_sender = AudioSender(context) + state.sockets.append(audio_sender) video_sender.start_video_rcv() + audio_sender.start() # Sockets that can run on the main thread. These sockets' endpoints should not block for long (say 50 ms at most). receivers = [main_receiver, actuation_receiver] diff --git a/src/robot_interface/utils/microphone.py b/src/robot_interface/utils/microphone.py index 769f9a6..c37ed0b 100644 --- a/src/robot_interface/utils/microphone.py +++ b/src/robot_interface/utils/microphone.py @@ -1,4 +1,4 @@ -from __future__ import unicode_literals # So that `print` can print the Unicode strings in names +from __future__ import unicode_literals # So that `print` can print Unicode characters in names import logging logger = logging.getLogger(__name__) diff --git a/test/common/__init__.py b/test/common/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/common/microphone_utils.py b/test/common/microphone_utils.py index 70bcb84..7ecbf27 100644 --- a/test/common/microphone_utils.py +++ b/test/common/microphone_utils.py @@ -2,8 +2,6 @@ import random import sys from StringIO import StringIO -import mock - from robot_interface.utils.microphone import choose_mic_default, choose_mic_interactive, get_microphones diff --git a/test/integration/__init__.py b/test/integration/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/unit/test_audio_sender.py b/test/unit/test_audio_sender.py new file mode 100644 index 0000000..9aab86f --- /dev/null +++ b/test/unit/test_audio_sender.py @@ -0,0 +1,77 @@ +# coding=utf-8 +import os +import time + +import mock +import pytest +import zmq + +from robot_interface.endpoints.audio_sender import AudioSender + + +@pytest.fixture +def zmq_context(): + context = zmq.Context() + yield context + + +def test_no_microphone(zmq_context, mocker): + mock_info_logger = mocker.patch("robot_interface.endpoints.audio_sender.logger.info") + mock_choose_mic = mocker.patch("robot_interface.endpoints.audio_sender.choose_mic_default") + mock_choose_mic.return_value = None + + sender = AudioSender(zmq_context) + assert sender.microphone is None + + sender.start() + assert sender.thread is None + mock_info_logger.assert_called() + + sender.wait_until_done() # Should return early because we didn't start a thread + + +def test_unicode_mic_name(zmq_context, mocker): + mocker.patch("robot_interface.endpoints.audio_sender.threading") + mock_choose_mic = mocker.patch("robot_interface.endpoints.audio_sender.choose_mic_default") + mock_choose_mic.return_value = {"name": u"• Some Unicode name"} + + sender = AudioSender(zmq_context) + assert sender.microphone is not None + + # `.start()` logs the name of the microphone. It should not give an error if it contains Unicode + # symbols. + sender.start() + assert sender.thread is not None + + sender.wait_until_done() # Should return instantly because we didn't start a real thread + + +def _fake_read(num_frames): + return os.urandom(num_frames * 4) + + +def test_sending_audio(mocker): + mock_choose_mic = mocker.patch("robot_interface.endpoints.audio_sender.choose_mic_default") + mock_choose_mic.return_value = {"name": u"Some mic", "index": 0L} + + mock_state = mocker.patch("robot_interface.endpoints.audio_sender.state") + mock_state.exit_event.is_set.return_value = False + + mock_audio = mocker.patch("robot_interface.endpoints.audio_sender.pyaudio") + mock_audio.PyAudio = mock.Mock() + stream = mock.Mock() + stream.read = _fake_read + mock_audio.PyAudio.open.return_value = stream + + mock_zmq_context = mock.Mock() + send_socket = mock.Mock() + + sender = AudioSender(mock_zmq_context) + sender.socket.send = send_socket + + sender.start() + time.sleep(0.01) + mock_state.exit_event.is_set.return_value = True + sender.wait_until_done() + + send_socket.assert_called() From 230ab5d5cc5946b5deb243bc722ec20c2c37f1ed Mon Sep 17 00:00:00 2001 From: Twirre Meulenbelt <43213592+TwirreM@users.noreply.github.com> Date: Wed, 22 Oct 2025 15:38:30 +0200 Subject: [PATCH 05/11] test: add case for microphone failure When the microphone fails, it will raise an IOError during the `read`. This is simulated with a new test. ref: N25B-119 --- test/unit/test_audio_sender.py | 45 +++++++++++++++++++++++++++------- 1 file changed, 36 insertions(+), 9 deletions(-) diff --git a/test/unit/test_audio_sender.py b/test/unit/test_audio_sender.py index 9aab86f..4324cdb 100644 --- a/test/unit/test_audio_sender.py +++ b/test/unit/test_audio_sender.py @@ -55,23 +55,50 @@ def test_sending_audio(mocker): mock_choose_mic.return_value = {"name": u"Some mic", "index": 0L} mock_state = mocker.patch("robot_interface.endpoints.audio_sender.state") - mock_state.exit_event.is_set.return_value = False - - mock_audio = mocker.patch("robot_interface.endpoints.audio_sender.pyaudio") - mock_audio.PyAudio = mock.Mock() - stream = mock.Mock() - stream.read = _fake_read - mock_audio.PyAudio.open.return_value = stream + mock_state.exit_event.is_set.side_effect = [False, True] mock_zmq_context = mock.Mock() send_socket = mock.Mock() + # If there's something wrong with the microphone, it will raise an IOError when `read`ing. + stream = mock.Mock() + stream.read = _fake_read + sender = AudioSender(mock_zmq_context) sender.socket.send = send_socket + sender.audio.open = mock.Mock() + sender.audio.open.return_value = stream sender.start() - time.sleep(0.01) - mock_state.exit_event.is_set.return_value = True sender.wait_until_done() send_socket.assert_called() + + +def _fake_read_error(num_frames): + raise IOError() + + +def test_break_microphone(mocker): + mock_choose_mic = mocker.patch("robot_interface.endpoints.audio_sender.choose_mic_default") + mock_choose_mic.return_value = {"name": u"Some mic", "index": 0L} + + mock_state = mocker.patch("robot_interface.endpoints.audio_sender.state") + mock_state.exit_event.is_set.side_effect = [False, True] + + mock_zmq_context = mock.Mock() + send_socket = mock.Mock() + + # If there's something wrong with the microphone, it will raise an IOError when `read`ing. + stream = mock.Mock() + stream.read = _fake_read_error + + sender = AudioSender(mock_zmq_context) + sender.socket.send = send_socket + sender.audio.open = mock.Mock() + sender.audio.open.return_value = stream + + sender.start() + sender.wait_until_done() + + send_socket.assert_not_called() From a6a12a5886ff060a30caf72ca52ec5d20b70aa36 Mon Sep 17 00:00:00 2001 From: Twirre Meulenbelt <43213592+TwirreM@users.noreply.github.com> Date: Sun, 2 Nov 2025 14:58:32 +0100 Subject: [PATCH 06/11] fix: remove unused qi import It had already been made so that the VideoSender does not depend on `qi`, but the import was not yet removed. ref: N25B-119 --- src/robot_interface/endpoints/video_sender.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/robot_interface/endpoints/video_sender.py b/src/robot_interface/endpoints/video_sender.py index c46b768..9e75447 100644 --- a/src/robot_interface/endpoints/video_sender.py +++ b/src/robot_interface/endpoints/video_sender.py @@ -1,6 +1,5 @@ import zmq import threading -import qi import logging from robot_interface.endpoints.socket_base import SocketBase From 9ea446275ed3e74c14742f04f96afe77819093ca Mon Sep 17 00:00:00 2001 From: Twirre Meulenbelt <43213592+TwirreM@users.noreply.github.com> Date: Sun, 2 Nov 2025 14:59:16 +0100 Subject: [PATCH 07/11] fix: allow speaking text with Unicode characters When speaking, the actuation receiver logs the message to speak. If the message includes Unicode characters, it will now no longer crash. ref: N25B-119 --- src/robot_interface/endpoints/actuation_receiver.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/robot_interface/endpoints/actuation_receiver.py b/src/robot_interface/endpoints/actuation_receiver.py index 7fe16b7..aa2511a 100644 --- a/src/robot_interface/endpoints/actuation_receiver.py +++ b/src/robot_interface/endpoints/actuation_receiver.py @@ -1,3 +1,4 @@ +from __future__ import unicode_literals # So that we can log texts with Unicode characters import logging import zmq From 5912ac606a575ada820277c736991ec97304abb2 Mon Sep 17 00:00:00 2001 From: Twirre Meulenbelt <43213592+TwirreM@users.noreply.github.com> Date: Sun, 2 Nov 2025 15:01:18 +0100 Subject: [PATCH 08/11] docs: add installation instructions for the portaudio dependency ref: N25B-119 --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index ae5e2b3..0d47f7b 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,12 @@ python -m virtualenv .venv source .venv/bin/activate ``` +We depend on PortAudio for the `pyaudio` package, so install it with: + +```bash +sudo apt install -y portaudio19-dev +``` + Install the required packages with ```bash From fab5127cace233d50f9f5903d6afa4f838e75043 Mon Sep 17 00:00:00 2001 From: Twirre Meulenbelt <43213592+TwirreM@users.noreply.github.com> Date: Sun, 2 Nov 2025 16:12:56 +0100 Subject: [PATCH 09/11] feat: add application parameter to choose a custom microphone ref: N25B-119 --- src/robot_interface/endpoints/audio_sender.py | 12 +++- src/robot_interface/utils/microphone.py | 51 ++++++++++++++++ test/common/microphone_utils.py | 59 ++++++++++++++++++- test/unit/test_audio_sender.py | 8 +-- 4 files changed, 122 insertions(+), 8 deletions(-) diff --git a/src/robot_interface/endpoints/audio_sender.py b/src/robot_interface/endpoints/audio_sender.py index 5cd5a6b..7365816 100644 --- a/src/robot_interface/endpoints/audio_sender.py +++ b/src/robot_interface/endpoints/audio_sender.py @@ -7,7 +7,7 @@ import zmq from robot_interface.endpoints.socket_base import SocketBase from robot_interface.state import state -from robot_interface.utils.microphone import choose_mic_default +from robot_interface.utils.microphone import choose_mic logger = logging.getLogger(__name__) @@ -17,10 +17,16 @@ class AudioSender(SocketBase): def __init__(self, zmq_context, port=5558): super(AudioSender, self).__init__(str("audio")) # Convert future's unicode_literal to str self.create_socket(zmq_context, zmq.PUB, port) - self.audio = pyaudio.PyAudio() - self.microphone = choose_mic_default(self.audio) self.thread = None + try: + self.audio = pyaudio.PyAudio() + self.microphone = choose_mic(self.audio) + except IOError as e: + logger.warning("PyAudio is not available.", exc_info=e) + self.audio = None + self.microphone = None + def start(self): """ Start sending audio in a different thread. diff --git a/src/robot_interface/utils/microphone.py b/src/robot_interface/utils/microphone.py index c37ed0b..877ca3f 100644 --- a/src/robot_interface/utils/microphone.py +++ b/src/robot_interface/utils/microphone.py @@ -1,5 +1,6 @@ from __future__ import unicode_literals # So that `print` can print Unicode characters in names import logging +import sys logger = logging.getLogger(__name__) @@ -67,3 +68,53 @@ def choose_mic_default(audio): return audio.get_default_input_device_info() except IOError: return None + + +def choose_mic_arguments(audio): + """ + Get a microphone to use from command line arguments. + + :param audio: An instance of PyAudio to use. + :type audio: pyaudio.PyAudio + + :return: A dictionary from PyAudio containing information about the microphone to use, or None + if there is no microphone satisfied by the arguments. + :rtype: dict | None + """ + microphone_name = None + for i, arg in enumerate(sys.argv): + if arg == "--microphone" and len(sys.argv) > i+1: + microphone_name = sys.argv[i+1].strip() + if arg.startswith("--microphone="): + microphone_name = arg[13:].strip() + + if not microphone_name: return None + + available_mics = list(get_microphones(audio)) + for mic in available_mics: + if mic["name"] == microphone_name: + return mic + + available_mic_names = [mic["name"] for mic in available_mics] + logger.warning("Microphone \"{}\" not found. Choose one of {}" + .format(microphone_name, available_mic_names)) + + return None + + +def choose_mic(audio): + """ + Get a microphone to use. Firstly, tries to see if there's an application argument specifying the + microphone to use. If not, get the default microphone. + + :param audio: An instance of PyAudio to use. + :type audio: pyaudio.PyAudio + + :return: A dictionary from PyAudio containing information about the microphone to use, or None + if there is no microphone. + :rtype: dict | None + """ + chosen_mic = choose_mic_arguments(audio) + if chosen_mic: return chosen_mic + + return choose_mic_default(audio) diff --git a/test/common/microphone_utils.py b/test/common/microphone_utils.py index 7ecbf27..c82de37 100644 --- a/test/common/microphone_utils.py +++ b/test/common/microphone_utils.py @@ -1,8 +1,15 @@ +from __future__ import unicode_literals # So that we can format strings with Unicode characters import random import sys from StringIO import StringIO -from robot_interface.utils.microphone import choose_mic_default, choose_mic_interactive, get_microphones +from robot_interface.utils.microphone import ( + choose_mic_default, + choose_mic_interactive, + choose_mic_arguments, + choose_mic, + get_microphones, +) class MicrophoneUtils(object): @@ -93,3 +100,53 @@ class MicrophoneUtils(object): assert "index" in result assert isinstance(result["index"], (int, long)) assert result["index"] == microphones[random_index]["index"] + + def test_choose_mic_no_arguments(self, pyaudio_instance, mocker): + mocker.patch.object(sys, "argv", []) + + result = choose_mic_arguments(pyaudio_instance) + + assert result is None + + def test_choose_mic_arguments(self, pyaudio_instance, mocker): + for mic in get_microphones(pyaudio_instance): + mocker.patch.object(sys, "argv", ["--microphone", mic["name"]]) + + result = choose_mic_arguments(pyaudio_instance) + + assert result is not None + assert result == mic + + def test_choose_mic_arguments_eq(self, pyaudio_instance, mocker): + for mic in get_microphones(pyaudio_instance): + mocker.patch.object(sys, "argv", ["--microphone={}".format(mic["name"])]) + + result = choose_mic_arguments(pyaudio_instance) + + assert result is not None + assert result == mic + + def test_choose_mic_arguments_not_exits(self, pyaudio_instance, mocker): + mocker.patch.object(sys, "argv", ["--microphone", "Surely this microphone doesn't exist"]) + + result = choose_mic_arguments(pyaudio_instance) + + assert result is None + + def test_choose_mic_with_argument(self, pyaudio_instance, mocker): + mic = next(get_microphones(pyaudio_instance)) + mocker.patch.object(sys, "argv", ["--microphone", mic["name"]]) + + result = choose_mic(pyaudio_instance) + + assert result is not None + assert result == mic + + def test_choose_mic_no_argument(self, pyaudio_instance, mocker): + default_mic = choose_mic_default(pyaudio_instance) + mocker.patch.object(sys, "argv", []) + + result = choose_mic(pyaudio_instance) + + assert result is not None + assert result == default_mic diff --git a/test/unit/test_audio_sender.py b/test/unit/test_audio_sender.py index 4324cdb..fc21805 100644 --- a/test/unit/test_audio_sender.py +++ b/test/unit/test_audio_sender.py @@ -17,7 +17,7 @@ def zmq_context(): def test_no_microphone(zmq_context, mocker): mock_info_logger = mocker.patch("robot_interface.endpoints.audio_sender.logger.info") - mock_choose_mic = mocker.patch("robot_interface.endpoints.audio_sender.choose_mic_default") + mock_choose_mic = mocker.patch("robot_interface.endpoints.audio_sender.choose_mic") mock_choose_mic.return_value = None sender = AudioSender(zmq_context) @@ -32,7 +32,7 @@ def test_no_microphone(zmq_context, mocker): def test_unicode_mic_name(zmq_context, mocker): mocker.patch("robot_interface.endpoints.audio_sender.threading") - mock_choose_mic = mocker.patch("robot_interface.endpoints.audio_sender.choose_mic_default") + mock_choose_mic = mocker.patch("robot_interface.endpoints.audio_sender.choose_mic") mock_choose_mic.return_value = {"name": u"• Some Unicode name"} sender = AudioSender(zmq_context) @@ -51,7 +51,7 @@ def _fake_read(num_frames): def test_sending_audio(mocker): - mock_choose_mic = mocker.patch("robot_interface.endpoints.audio_sender.choose_mic_default") + mock_choose_mic = mocker.patch("robot_interface.endpoints.audio_sender.choose_mic") mock_choose_mic.return_value = {"name": u"Some mic", "index": 0L} mock_state = mocker.patch("robot_interface.endpoints.audio_sender.state") @@ -80,7 +80,7 @@ def _fake_read_error(num_frames): def test_break_microphone(mocker): - mock_choose_mic = mocker.patch("robot_interface.endpoints.audio_sender.choose_mic_default") + mock_choose_mic = mocker.patch("robot_interface.endpoints.audio_sender.choose_mic") mock_choose_mic.return_value = {"name": u"Some mic", "index": 0L} mock_state = mocker.patch("robot_interface.endpoints.audio_sender.state") From 854a14bf0c8395ac0b47b7d661e23e979beed823 Mon Sep 17 00:00:00 2001 From: Twirre Meulenbelt <43213592+TwirreM@users.noreply.github.com> Date: Sun, 2 Nov 2025 16:16:43 +0100 Subject: [PATCH 10/11] docs: describe `--microphone` program parameter ref: N25B-119 --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 0d47f7b..8ef5ce9 100644 --- a/README.md +++ b/README.md @@ -104,6 +104,8 @@ $env:PYTHONPATH="src"; python -m robot_interface.main With both, if you want to connect to the actual robot (or simulator), pass the `--qi-url` argument. +There's also a `--microphone` argument that can be used to choose a microphone to use. If not given, the program will try the default microphone. If you don't know the name of the microphone, pass the argument with any value, and it will list the names of available microphones. + ## Testing From 8a095323ec45f98fde38b985a17d5b4fc96e94ce Mon Sep 17 00:00:00 2001 From: Twirre Meulenbelt <43213592+TwirreM@users.noreply.github.com> Date: Sun, 2 Nov 2025 16:35:15 +0100 Subject: [PATCH 11/11] docs: describe extra WSL installation step ref: N25B-119 --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index 8ef5ce9..ce43e57 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,12 @@ We depend on PortAudio for the `pyaudio` package, so install it with: sudo apt install -y portaudio19-dev ``` +On WSL, also install: + +```bash +sudo apt install -y libasound2-plugins +``` + Install the required packages with ```bash