From 6e2bedcd3249acacd6015eb619b36dd63f17dd64 Mon Sep 17 00:00:00 2001 From: Kasper Marinus Date: Wed, 4 Feb 2026 18:38:40 +0100 Subject: [PATCH 1/5] feat: (almost) qi audio sender --- src/robot_interface/endpoints/audio_sender.py | 247 +++++++++++++----- 1 file changed, 179 insertions(+), 68 deletions(-) diff --git a/src/robot_interface/endpoints/audio_sender.py b/src/robot_interface/endpoints/audio_sender.py index 54e149c..86dc66c 100644 --- a/src/robot_interface/endpoints/audio_sender.py +++ b/src/robot_interface/endpoints/audio_sender.py @@ -6,9 +6,12 @@ University within the Software Project course. """ from __future__ import unicode_literals # So that `logging` can use Unicode characters in names +from abc import ABCMeta, abstractmethod import threading import logging +import Queue +import numpy as np import pyaudio import zmq @@ -20,86 +23,194 @@ from robot_interface.core.config import settings logger = logging.getLogger(__name__) -class AudioSender(SocketBase): - """ - Audio sender endpoint, responsible for sending microphone audio data. +class AudioCapturer(object): + __metaclass__ = ABCMeta - :param zmq_context: The ZeroMQ context to use. - :type zmq_context: zmq.Context + @abstractmethod + def setup(self): + raise NotImplementedError() - :param port: The port to use. - :type port: int + @abstractmethod + def stop(self): + raise NotImplementedError() - :ivar thread: Thread used for sending audio. - :vartype thread: threading.Thread | None + @abstractmethod + def generate_chunk(self): + raise NotImplementedError() + - :ivar audio: PyAudio instance. - :vartype audio: pyaudio.PyAudio | None - :ivar microphone: Selected microphone information. - :vartype microphone: dict | None - """ - def __init__(self, zmq_context, port=settings.agent_settings.audio_sender_port): - super(AudioSender, self).__init__(str("audio")) # Convert future's unicode_literal to str - self.create_socket(zmq_context, zmq.PUB, port) - self.thread = None +class QiAudioCapturer(AudioCapturer): + def __init__(self, sample_rate=16000, channels=1, deinterleaved=0): + self.session = state.qi_session + if not self.session: + raise RuntimeError("Cannot capture from qi device, no qi session available.") + self.audio = self.session.service("ALAudioDevice") + + self.service_name = "ZmqAudioStreamer" + self.sample_rate = sample_rate + self.channels = channels + self.deinterleaved = deinterleaved + + self.overflow = np.empty(0, dtype=np.float32) + + self.q = Queue.Queue() + + def setup(self): + assert self.session is not None + self.session.registerService(self.service_name, self) + self.audio.setClientPreferences(self.service_name, self.sample_rate, self.channels, self.deinterleaved) + self.audio.subscribe(self.service_name) + + def stop(self): try: - self.audio = pyaudio.PyAudio() - self.microphone = choose_mic(self.audio) - except IOError as e: - logger.warning("PyAudio is not available.", exc_info=e) - self.audio = None - self.microphone = None + self.audio.unsubscribe(self.service_name) + except: + pass + + + def audio_gen(self): + try: + chunk = self.q.get(True, 0.1) + return chunk + except Queue.Empty: + return None + + + # Callback invoked by NAOqi + def processRemote(self, nbOfChannels, nbOfSamplesByChannel, timeStamp, inputBuffer): + raw_pcm = bytes(inputBuffer) + + assert nbOfChannels == 1 + + pcm_i16 = np.frombuffer(raw_pcm, dtype=np.int16) + pcm_f32 = pcm_i16.astype(np.float32) / 32768.0 + + # Attach overflow + pcm_f32 = np.append(self.overflow, pcm_f32) + + for i in range(len(pcm_f32) // 512): + self.q.put_nowait(pcm_f32[i * 512 : (i + 1) * 512]) + + self.overflow = pcm_f32[len(pcm_f32) // 512 * 512 :] + +class StandaloneAudioCapturer: + pass + + +class AudioSender(SocketBase): + def __init__(self, zmq_context, port=settings.agent_settings.audio_sender_port): + super(AudioSender, self).__init__(str("audio")) + self.create_socket(zmq_context, zmq.PUB, port) + + self.thread = threading.Thread(target=self.stream) + + self.capturer = self.choose_capturer() + def start(self): - """ - Start sending audio in a different thread. - - Will not start if no microphone is available. - """ - if not self.microphone: - logger.info("Not listening: no microphone available.") - return - - logger.info("Listening with microphone \"{}\".".format(self.microphone["name"])) - self.thread = threading.Thread(target=self._stream) self.thread.start() - def wait_until_done(self): - """ - Wait until the audio thread is done. + def close(self): + self.capturer.stop() + super(AudioSender, self).close() - Will block until `state.exit_event` is set. If the thread is not running, does nothing. - """ - if not self.thread: return - self.thread.join() - self.thread = None + def stream(self): + while not state.exit_event.is_set(): + chunk = self.capturer.generate_chunk() - def _stream(self): - """ - Internal method to continuously read audio from the microphone and send it over the socket. - """ - audio_settings = settings.audio_config - chunk = audio_settings.chunk_size # 320 at 16000 Hz is 20ms, 512 is required for Silero-VAD + if not chunk or state.is_speaking: + continue - # Docs say this only raises an error if neither `input` nor `output` is True - stream = self.audio.open( - format=pyaudio.paFloat32, - channels=audio_settings.channels, - rate=audio_settings.sample_rate, - input=True, - input_device_index=self.microphone["index"], - frames_per_buffer=chunk, - ) + self.socket.send(chunk) - try: - while not state.exit_event.is_set(): - data = stream.read(chunk) - if (state.is_speaking): continue # Do not send audio while the robot is speaking - self.socket.send(data) - except IOError as e: - logger.error("Stopped listening: failed to get audio from microphone.", exc_info=e) - finally: - stream.stop_stream() - stream.close() + def choose_capturer(self): + return QiAudioCapturer() + + + + +# class AudioSender(SocketBase): +# """ +# Audio sender endpoint, responsible for sending microphone audio data. +# +# :param zmq_context: The ZeroMQ context to use. +# :type zmq_context: zmq.Context +# +# :param port: The port to use. +# :type port: int +# +# :ivar thread: Thread used for sending audio. +# :vartype thread: threading.Thread | None +# +# :ivar audio: PyAudio instance. +# :vartype audio: pyaudio.PyAudio | None +# +# :ivar microphone: Selected microphone information. +# :vartype microphone: dict | None +# """ +# def __init__(self, zmq_context, port=settings.agent_settings.audio_sender_port): +# super(AudioSender, self).__init__(str("audio")) # Convert future's unicode_literal to str +# self.create_socket(zmq_context, zmq.PUB, port) +# self.thread = None +# +# try: +# self.audio = pyaudio.PyAudio() +# self.microphone = choose_mic(self.audio) +# except IOError as e: +# logger.warning("PyAudio is not available.", exc_info=e) +# self.audio = None +# self.microphone = None +# +# def start(self): +# """ +# Start sending audio in a different thread. +# +# Will not start if no microphone is available. +# """ +# if not self.microphone: +# logger.info("Not listening: no microphone available.") +# return +# +# logger.info("Listening with microphone \"{}\".".format(self.microphone["name"])) +# self.thread = threading.Thread(target=self._stream) +# self.thread.start() +# +# def wait_until_done(self): +# """ +# Wait until the audio thread is done. +# +# Will block until `state.exit_event` is set. If the thread is not running, does nothing. +# """ +# if not self.thread: return +# self.thread.join() +# self.thread = None +# +# def _stream(self): +# """ +# Internal method to continuously read audio from the microphone and send it over the socket. +# """ +# audio_settings = settings.audio_config +# chunk = audio_settings.chunk_size # 320 at 16000 Hz is 20ms, 512 is required for Silero-VAD +# +# # Docs say this only raises an error if neither `input` nor `output` is True +# stream = self.audio.open( +# format=pyaudio.paFloat32, +# channels=audio_settings.channels, +# rate=audio_settings.sample_rate, +# input=True, +# input_device_index=self.microphone["index"], +# frames_per_buffer=chunk, +# ) +# +# try: +# while not state.exit_event.is_set(): +# data = stream.read(chunk) +# if (state.is_speaking): continue # Do not send audio while the robot is speaking +# self.socket.send(data) +# except IOError as e: +# logger.error("Stopped listening: failed to get audio from microphone.", exc_info=e) +# finally: +# stream.stop_stream() +# stream.close() -- 2.49.1 From 31c76ecf84f0ac9e9ac3d7133c85c45c5ea2f028 Mon Sep 17 00:00:00 2001 From: Twirre Meulenbelt <43213592+TwirreM@users.noreply.github.com> Date: Mon, 9 Feb 2026 09:39:48 +0100 Subject: [PATCH 2/5] fix: make QI audio sender working --- requirements.txt | 1 + src/robot_interface/endpoints/audio_sender.py | 12 +++++------- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/requirements.txt b/requirements.txt index bc679f4..8a6ee6e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,3 +7,4 @@ sphinx sphinx_rtd_theme pre-commit python-dotenv +numpy<=1.16.6 diff --git a/src/robot_interface/endpoints/audio_sender.py b/src/robot_interface/endpoints/audio_sender.py index 86dc66c..e168285 100644 --- a/src/robot_interface/endpoints/audio_sender.py +++ b/src/robot_interface/endpoints/audio_sender.py @@ -37,7 +37,6 @@ class AudioCapturer(object): @abstractmethod def generate_chunk(self): raise NotImplementedError() - class QiAudioCapturer(AudioCapturer): @@ -69,15 +68,13 @@ class QiAudioCapturer(AudioCapturer): except: pass - - def audio_gen(self): + def generate_chunk(self): try: chunk = self.q.get(True, 0.1) return chunk except Queue.Empty: return None - # Callback invoked by NAOqi def processRemote(self, nbOfChannels, nbOfSamplesByChannel, timeStamp, inputBuffer): raw_pcm = bytes(inputBuffer) @@ -91,10 +88,11 @@ class QiAudioCapturer(AudioCapturer): pcm_f32 = np.append(self.overflow, pcm_f32) for i in range(len(pcm_f32) // 512): - self.q.put_nowait(pcm_f32[i * 512 : (i + 1) * 512]) + self.q.put_nowait(pcm_f32[i * 512 : (i + 1) * 512].tobytes()) self.overflow = pcm_f32[len(pcm_f32) // 512 * 512 :] + class StandaloneAudioCapturer: pass @@ -108,8 +106,8 @@ class AudioSender(SocketBase): self.capturer = self.choose_capturer() - def start(self): + self.capturer.setup() self.thread.start() def close(self): @@ -120,7 +118,7 @@ class AudioSender(SocketBase): while not state.exit_event.is_set(): chunk = self.capturer.generate_chunk() - if not chunk or state.is_speaking: + if chunk is None or state.is_speaking: continue self.socket.send(chunk) -- 2.49.1 From abd6988d1ec57feb33c78d54008052e2f5eb288c Mon Sep 17 00:00:00 2001 From: Kasper Marinus Date: Mon, 9 Feb 2026 13:40:47 +0100 Subject: [PATCH 3/5] feat: multi-channel qi audio possible --- src/robot_interface/endpoints/audio_sender.py | 63 +++++++++++++++---- 1 file changed, 52 insertions(+), 11 deletions(-) diff --git a/src/robot_interface/endpoints/audio_sender.py b/src/robot_interface/endpoints/audio_sender.py index e168285..547dbb9 100644 --- a/src/robot_interface/endpoints/audio_sender.py +++ b/src/robot_interface/endpoints/audio_sender.py @@ -6,13 +6,16 @@ University within the Software Project course. """ from __future__ import unicode_literals # So that `logging` can use Unicode characters in names + +import audioop + +import enum from abc import ABCMeta, abstractmethod import threading import logging import Queue import numpy as np -import pyaudio import zmq from robot_interface.endpoints.socket_base import SocketBase @@ -38,28 +41,56 @@ class AudioCapturer(object): def generate_chunk(self): raise NotImplementedError() +class SampleRate(enum.Enum): + """ + Sample rate to use in Hz. + """ + LOW = 16000 + HIGH = 48000 + +class PepperMicrophone(enum.Enum): + """ + Which of Pepper's microphones to use. In our case, some of the mics were damages/didn't work + well, so we choose to only use the fron right. If you have a Pepper robot with all working mics, + you might wish to use all microphones, to improve overall audio quality. + """ + ALL = 0 + LEFT = 1 + RIGHT = 2 + FRONT_LEFT = 3 + FRONT_RIGHT = 4 + class QiAudioCapturer(AudioCapturer): - def __init__(self, sample_rate=16000, channels=1, deinterleaved=0): + def __init__(self, sample_rate=SampleRate.LOW, mic=PepperMicrophone.FRONT_RIGHT, + deinterleaved=0): self.session = state.qi_session if not self.session: raise RuntimeError("Cannot capture from qi device, no qi session available.") + if sample_rate == SampleRate.HIGH and mic != PepperMicrophone.ALL: + raise RuntimeError("For 48000 Hz, you must select all microphones.") + if mic == PepperMicrophone.ALL and sample_rate != SampleRate.HIGH: + raise RuntimeError("For using all microphones, 48000 Hz is required.") + self.audio = self.session.service("ALAudioDevice") self.service_name = "ZmqAudioStreamer" self.sample_rate = sample_rate - self.channels = channels + self.mic = mic self.deinterleaved = deinterleaved self.overflow = np.empty(0, dtype=np.float32) self.q = Queue.Queue() + self._rate_state = None + def setup(self): assert self.session is not None self.session.registerService(self.service_name, self) - self.audio.setClientPreferences(self.service_name, self.sample_rate, self.channels, self.deinterleaved) + self.audio.setClientPreferences(self.service_name, self.sample_rate.value, self.mic.value, + self.deinterleaved) self.audio.subscribe(self.service_name) def stop(self): @@ -79,18 +110,28 @@ class QiAudioCapturer(AudioCapturer): def processRemote(self, nbOfChannels, nbOfSamplesByChannel, timeStamp, inputBuffer): raw_pcm = bytes(inputBuffer) - assert nbOfChannels == 1 - pcm_i16 = np.frombuffer(raw_pcm, dtype=np.int16) - pcm_f32 = pcm_i16.astype(np.float32) / 32768.0 + + # Make mono channel (if it was 4 channels) + pcm_i32_mono = self._make_mono(pcm_i16.astype(np.int32), nbOfChannels) + + # Resample (if it was 48k) + pcm_i32_mono_16k, self._rate_state = audioop.ratecv(pcm_i32_mono.tobytes(), 4, 1, + self.sample_rate.value, + SampleRate.LOW.value, self._rate_state) + pcm_f32_mono_16k = (np.frombuffer(pcm_i32_mono_16k, dtype=np.int32).astype(np.float32) / + 32768.0) # Attach overflow - pcm_f32 = np.append(self.overflow, pcm_f32) + pcm_f32_mono_16k = np.append(self.overflow, pcm_f32_mono_16k) - for i in range(len(pcm_f32) // 512): - self.q.put_nowait(pcm_f32[i * 512 : (i + 1) * 512].tobytes()) + for i in range(len(pcm_f32_mono_16k) // 512): + self.q.put_nowait(pcm_f32_mono_16k[i * 512 : (i + 1) * 512].tobytes()) - self.overflow = pcm_f32[len(pcm_f32) // 512 * 512 :] + self.overflow = pcm_f32_mono_16k[len(pcm_f32_mono_16k) // 512 * 512 :] + + def _make_mono(self, frag, channels): + return frag.reshape(-1, channels).mean(axis=1, dtype=np.int32) class StandaloneAudioCapturer: -- 2.49.1 From 0fe5fcf8f81a00c25943272aa3bbbb0b889d0702 Mon Sep 17 00:00:00 2001 From: Twirre Meulenbelt <43213592+TwirreM@users.noreply.github.com> Date: Mon, 9 Feb 2026 15:45:17 +0100 Subject: [PATCH 4/5] feat: choose between Qi microphone and local microphone --- .env.example | 3 + requirements.txt | 1 + src/robot_interface/core/config.py | 11 +- src/robot_interface/endpoints/audio_sender.py | 168 ++++++++---------- src/robot_interface/utils/get_config.py | 11 +- src/robot_interface/utils/qi_utils.py | 19 +- test/unit/test_get_config.py | 55 ++++++ test/unit/test_qi_utils.py | 4 +- 8 files changed, 177 insertions(+), 95 deletions(-) diff --git a/.env.example b/.env.example index 173b63c..95723e0 100644 --- a/.env.example +++ b/.env.example @@ -6,6 +6,9 @@ # The hostname or IP address of the Control Backend. AGENT__CONTROL_BACKEND_HOST=localhost +# Whether to use Pepper's microphone when Pepper is connected. +AUDIO__USE_PEPPER_MICROPHONE=true + # Variables that are unlikely to be configured, you can probably ignore these: diff --git a/requirements.txt b/requirements.txt index 8a6ee6e..21f517b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,3 +8,4 @@ sphinx_rtd_theme pre-commit python-dotenv numpy<=1.16.6 +enum34 diff --git a/src/robot_interface/core/config.py b/src/robot_interface/core/config.py index a07e36e..3a19c48 100644 --- a/src/robot_interface/core/config.py +++ b/src/robot_interface/core/config.py @@ -78,6 +78,8 @@ class AudioConfig(object): """ Audio configuration constants. + :ivar use_pepper_microphone: Whether to use Pepper's microphone or not, defaults to True. + :vartype use_pepper_microphone: bool :ivar sample_rate: Audio sampling rate in Hz, defaults to 16000. :vartype sample_rate: int :ivar chunk_size: Size of audio chunks to capture/process, defaults to 512. @@ -85,7 +87,14 @@ class AudioConfig(object): :ivar channels: Number of audio channels, defaults to 1. :vartype channels: int """ - def __init__(self, sample_rate=None, chunk_size=None, channels=None): + def __init__( + self, + use_pepper_microphone=None, + sample_rate=None, + chunk_size=None, + channels=None, + ): + self.use_pepper_microphone = get_config(use_pepper_microphone, "AUDIO__USE_PEPPER_MICROPHONE", True, bool) self.sample_rate = get_config(sample_rate, "AUDIO__SAMPLE_RATE", 16000, int) self.chunk_size = get_config(chunk_size, "AUDIO__CHUNK_SIZE", 512, int) self.channels = get_config(channels, "AUDIO__CHANNELS", 1, int) diff --git a/src/robot_interface/endpoints/audio_sender.py b/src/robot_interface/endpoints/audio_sender.py index 547dbb9..d2e32df 100644 --- a/src/robot_interface/endpoints/audio_sender.py +++ b/src/robot_interface/endpoints/audio_sender.py @@ -16,6 +16,7 @@ import logging import Queue import numpy as np +import pyaudio import zmq from robot_interface.endpoints.socket_base import SocketBase @@ -27,6 +28,9 @@ logger = logging.getLogger(__name__) class AudioCapturer(object): + """ + Interface for audio capturers. + """ __metaclass__ = ABCMeta @abstractmethod @@ -62,8 +66,14 @@ class PepperMicrophone(enum.Enum): class QiAudioCapturer(AudioCapturer): + # Some of this class' methods have docstrings as binary strings. Keep them that way, otherwise + # ``qi.Session.registerService`` will give RuntimeErrors. def __init__(self, sample_rate=SampleRate.LOW, mic=PepperMicrophone.FRONT_RIGHT, deinterleaved=0): + """ + :raises RuntimeError: If there is no Qi session available. + :raises ValueError: If the given arguments are not compatible. + """ self.session = state.qi_session if not self.session: raise RuntimeError("Cannot capture from qi device, no qi session available.") @@ -87,13 +97,20 @@ class QiAudioCapturer(AudioCapturer): self._rate_state = None def setup(self): + b""" + :raises RuntimeError: If no Qi session is available or if the session is not compatible with audio streaming. + """ assert self.session is not None + logger.info("Listening with Pepper's microphone.") self.session.registerService(self.service_name, self) self.audio.setClientPreferences(self.service_name, self.sample_rate.value, self.mic.value, self.deinterleaved) self.audio.subscribe(self.service_name) def stop(self): + b""" + Stop the audio capturer. + """ try: self.audio.unsubscribe(self.service_name) except: @@ -134,8 +151,65 @@ class QiAudioCapturer(AudioCapturer): return frag.reshape(-1, channels).mean(axis=1, dtype=np.int32) -class StandaloneAudioCapturer: - pass +class StandaloneAudioCapturer(AudioCapturer): + """ + Audio capturer that uses a microphone from the local device, can be chosen with the + ``--microphone`` program argument. + + :ivar audio: PyAudio instance. + :vartype audio: pyaudio.PyAudio | None + + :ivar microphone: Selected microphone information. + :vartype microphone: dict | None + + :ivar stream: PyAudio stream instance. None until ``setup()`` is called, remaining None if setup + fails for any reason. + :vartype stream: pyaudio.Stream | None + """ + def __init__(self): + self.stream = None + + try: + self.audio = pyaudio.PyAudio() + self.microphone = choose_mic(self.audio) + except IOError as e: + logger.warning("PyAudio is not available. Won't be able to send audio.", exc_info=True) + self.audio = None + self.microphone = None + + def setup(self): + """ + Setup audio stream. Will not if no microphone is available. + """ + if not self.microphone: + logger.info("Not listening: no microphone available.") + return + + logger.info("Listening with microphone \"{}\".".format(self.microphone["name"])) + self.stream = self.audio.open( + format=pyaudio.paFloat32, + channels=settings.audio_config.channels, + rate=settings.audio_config.sample_rate, + input=True, + input_device_index=self.microphone["index"], + frames_per_buffer=settings.audio_config.chunk_size, + ) + + def stop(self): + """ + Close the audio stream. + """ + if not self.stream: return + self.stream.stop_stream() + self.stream.close() + + def generate_chunk(self): + """ + :return: Audio frames from the microphone of size ``settings.audio_config.chunk_size``. + :rtype: bytes. + :raises IOError: If reading from the audio stream fails. + """ + return self.stream.read(settings.audio_config.chunk_size) class AudioSender(SocketBase): @@ -165,91 +239,7 @@ class AudioSender(SocketBase): self.socket.send(chunk) def choose_capturer(self): - return QiAudioCapturer() + if state.qi_session and settings.audio_config.use_pepper_microphone: + return QiAudioCapturer() - - - -# class AudioSender(SocketBase): -# """ -# Audio sender endpoint, responsible for sending microphone audio data. -# -# :param zmq_context: The ZeroMQ context to use. -# :type zmq_context: zmq.Context -# -# :param port: The port to use. -# :type port: int -# -# :ivar thread: Thread used for sending audio. -# :vartype thread: threading.Thread | None -# -# :ivar audio: PyAudio instance. -# :vartype audio: pyaudio.PyAudio | None -# -# :ivar microphone: Selected microphone information. -# :vartype microphone: dict | None -# """ -# def __init__(self, zmq_context, port=settings.agent_settings.audio_sender_port): -# super(AudioSender, self).__init__(str("audio")) # Convert future's unicode_literal to str -# self.create_socket(zmq_context, zmq.PUB, port) -# self.thread = None -# -# try: -# self.audio = pyaudio.PyAudio() -# self.microphone = choose_mic(self.audio) -# except IOError as e: -# logger.warning("PyAudio is not available.", exc_info=e) -# self.audio = None -# self.microphone = None -# -# def start(self): -# """ -# Start sending audio in a different thread. -# -# Will not start if no microphone is available. -# """ -# if not self.microphone: -# logger.info("Not listening: no microphone available.") -# return -# -# logger.info("Listening with microphone \"{}\".".format(self.microphone["name"])) -# self.thread = threading.Thread(target=self._stream) -# self.thread.start() -# -# def wait_until_done(self): -# """ -# Wait until the audio thread is done. -# -# Will block until `state.exit_event` is set. If the thread is not running, does nothing. -# """ -# if not self.thread: return -# self.thread.join() -# self.thread = None -# -# def _stream(self): -# """ -# Internal method to continuously read audio from the microphone and send it over the socket. -# """ -# audio_settings = settings.audio_config -# chunk = audio_settings.chunk_size # 320 at 16000 Hz is 20ms, 512 is required for Silero-VAD -# -# # Docs say this only raises an error if neither `input` nor `output` is True -# stream = self.audio.open( -# format=pyaudio.paFloat32, -# channels=audio_settings.channels, -# rate=audio_settings.sample_rate, -# input=True, -# input_device_index=self.microphone["index"], -# frames_per_buffer=chunk, -# ) -# -# try: -# while not state.exit_event.is_set(): -# data = stream.read(chunk) -# if (state.is_speaking): continue # Do not send audio while the robot is speaking -# self.socket.send(data) -# except IOError as e: -# logger.error("Stopped listening: failed to get audio from microphone.", exc_info=e) -# finally: -# stream.stop_stream() -# stream.close() + return StandaloneAudioCapturer() diff --git a/src/robot_interface/utils/get_config.py b/src/robot_interface/utils/get_config.py index 64d5734..ae29cf7 100644 --- a/src/robot_interface/utils/get_config.py +++ b/src/robot_interface/utils/get_config.py @@ -16,6 +16,8 @@ def get_config(value, env, default, cast=None): Small utility to get a configuration value, returns `value` if it is not None, else it will try to get the environment variable cast with `cast`. If the environment variable is not set, it will return `default`. + Special handling for booleans, which are only true if the value of the variable is "true" or "yes", ignoring capitalization. + :param value: The value to check. :type value: Any :param env: The environment variable to check. @@ -33,7 +35,14 @@ def get_config(value, env, default, cast=None): env = os.environ.get(env, default) - if cast is None: + if cast is None or env is None: return env + if cast == bool: + if isinstance(env, bool): + return env + if not isinstance(default, bool): + raise ValueError("Default value must be a boolean if the cast type is a boolean.") + return env.lower() == "true" or env.lower() == "yes" + return cast(env) diff --git a/src/robot_interface/utils/qi_utils.py b/src/robot_interface/utils/qi_utils.py index c9f3a02..a3c9abf 100644 --- a/src/robot_interface/utils/qi_utils.py +++ b/src/robot_interface/utils/qi_utils.py @@ -14,6 +14,20 @@ except ImportError: qi = None +def _get_qi_url(): + """ + Get the Qi URL from the command line arguments, or None if not given. + """ + if "--qi-url" in sys.argv: + return sys.argv[sys.argv.index("--qi-url") + 1] + + for arg in sys.argv: + if arg.startswith("--qi-url="): + return arg[len("--qi-url="):] + + return None + + def get_qi_session(): """ Create and return a Qi session if available. @@ -25,12 +39,13 @@ def get_qi_session(): logging.info("Unable to import qi. Running in stand-alone mode.") return None - if "--qi-url" not in sys.argv: + qi_url = _get_qi_url() + if qi_url is None: logging.info("No Qi URL argument given. Running in stand-alone mode.") return None try: - app = qi.Application() + app = qi.Application(["--qi-url", qi_url, "--qi-listen-url", "tcp://0.0.0.0:0"]) app.start() return app.session except RuntimeError: diff --git a/test/unit/test_get_config.py b/test/unit/test_get_config.py index ce3d8d2..4312a0a 100644 --- a/test/unit/test_get_config.py +++ b/test/unit/test_get_config.py @@ -50,3 +50,58 @@ def test_get_config_casts_default_when_env_missing(monkeypatch): result = get_config(None, "GET_CONFIG_MISSING", "42", int) assert result == 42 + + +def test_get_config_unset_boolean_default(monkeypatch): + """ + When the env var is a boolean, and it's not set, ensure it uses the default value. + """ + monkeypatch.delenv("SOME_BOOLEAN_VARIABLE", raising=False) + + result = get_config(None, "SOME_BOOLEAN_VARIABLE", False, bool) + assert result == False + + result = get_config(None, "SOME_BOOLEAN_VARIABLE", True, bool) + assert result == True + + +def test_get_config_true_boolean(monkeypatch): + """ + When the env var is a boolean, and its value is "true", "TRUE", "yes", etc., it should return true. + """ + monkeypatch.setenv("SOME_BOOLEAN_VARIABLE", "TRUE") + result = get_config(None, "SOME_BOOLEAN_VARIABLE", False, bool) + assert result == True + + monkeypatch.setenv("SOME_BOOLEAN_VARIABLE", "true") + result = get_config(None, "SOME_BOOLEAN_VARIABLE", False, bool) + assert result == True + + monkeypatch.setenv("SOME_BOOLEAN_VARIABLE", "yes") + result = get_config(None, "SOME_BOOLEAN_VARIABLE", False, bool) + assert result == True + + monkeypatch.setenv("SOME_BOOLEAN_VARIABLE", "YES") + result = get_config(None, "SOME_BOOLEAN_VARIABLE", False, bool) + assert result == True + + monkeypatch.setenv("SOME_BOOLEAN_VARIABLE", "TrUE") + result = get_config(None, "SOME_BOOLEAN_VARIABLE", False, bool) + assert result == True + + +def test_get_config_false_boolean(monkeypatch): + """ + When the env var is a boolean, and its value is not "true", "TRUE", "yes", etc., it should return False. + """ + monkeypatch.setenv("SOME_BOOLEAN_VARIABLE", "FALSE") + result = get_config(None, "SOME_BOOLEAN_VARIABLE", True, bool) + assert result == False + + monkeypatch.setenv("SOME_BOOLEAN_VARIABLE", "false") + result = get_config(None, "SOME_BOOLEAN_VARIABLE", True, bool) + assert result == False + + monkeypatch.setenv("SOME_BOOLEAN_VARIABLE", "anything, tbh") + result = get_config(None, "SOME_BOOLEAN_VARIABLE", True, bool) + assert result == False diff --git a/test/unit/test_qi_utils.py b/test/unit/test_qi_utils.py index 6817be6..b18fa3c 100644 --- a/test/unit/test_qi_utils.py +++ b/test/unit/test_qi_utils.py @@ -62,7 +62,7 @@ def test_get_qi_session_runtime_error(monkeypatch): raise RuntimeError("boom") class FakeQi: - Application = lambda self=None: FakeApp() + Application = lambda *args, **kwargs: FakeApp() reload_qi_utils_with(FakeQi()) @@ -87,7 +87,7 @@ def test_get_qi_session_success(monkeypatch): return True class FakeQi: - Application = lambda self=None: FakeApp() + Application = lambda *args, **kwargs: FakeApp() reload_qi_utils_with(FakeQi()) -- 2.49.1 From ecf9d14a4ef550f23d2ef869b369293f8c8cd26f Mon Sep 17 00:00:00 2001 From: Twirre Meulenbelt <43213592+TwirreM@users.noreply.github.com> Date: Mon, 9 Feb 2026 15:51:35 +0100 Subject: [PATCH 5/5] test: make audio sender tests pass --- test/unit/test_audio_sender.py | 51 ++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 24 deletions(-) diff --git a/test/unit/test_audio_sender.py b/test/unit/test_audio_sender.py index d9f9ac2..32f7d46 100644 --- a/test/unit/test_audio_sender.py +++ b/test/unit/test_audio_sender.py @@ -33,35 +33,33 @@ def test_no_microphone(zmq_context, mocker): mock_info_logger = mocker.patch("robot_interface.endpoints.audio_sender.logger.info") mock_choose_mic = mocker.patch("robot_interface.endpoints.audio_sender.choose_mic") mock_choose_mic.return_value = None + mock_state = mocker.patch("robot_interface.endpoints.audio_sender.state") + mock_state.qi_session = None sender = AudioSender(zmq_context) - assert sender.microphone is None + assert sender.capturer.microphone is None - sender.start() - assert sender.thread is None + sender.capturer.setup() mock_info_logger.assert_called() - sender.wait_until_done() # Should return early because we didn't start a thread - def test_unicode_mic_name(zmq_context, mocker): """ Tests the robustness of the `AudioSender` when handling microphone names that contain Unicode characters. """ - mocker.patch("robot_interface.endpoints.audio_sender.threading") mock_choose_mic = mocker.patch("robot_interface.endpoints.audio_sender.choose_mic") - mock_choose_mic.return_value = {"name": u"• Some Unicode name"} + mock_choose_mic.return_value = {"name": u"• Some Unicode name", "index": 0L} + mock_state = mocker.patch("robot_interface.endpoints.audio_sender.state") + mock_state.qi_session = None sender = AudioSender(zmq_context) - assert sender.microphone is not None + assert sender.capturer.microphone is not None + sender.capturer.audio.open = mock.Mock(return_value=mock.Mock()) - # `.start()` logs the name of the microphone. It should not give an error if it contains Unicode + # `.setup()` logs the name of the microphone. It should not give an error if it contains Unicode # symbols. - sender.start() - assert sender.thread is not None - - sender.wait_until_done() # Should return instantly because we didn't start a real thread + sender.capturer.setup() def _fake_read(num_frames): @@ -79,6 +77,7 @@ def test_sending_audio(mocker): mock_choose_mic.return_value = {"name": u"Some mic", "index": 0L} mock_state = mocker.patch("robot_interface.endpoints.audio_sender.state") + mock_state.qi_session = None mock_state.exit_event.is_set.side_effect = [False, True] mock_zmq_context = mock.Mock() @@ -91,11 +90,11 @@ def test_sending_audio(mocker): sender = AudioSender(mock_zmq_context) sender.socket.send = send_socket - sender.audio.open = mock.Mock() - sender.audio.open.return_value = stream + sender.capturer.audio.open = mock.Mock() + sender.capturer.audio.open.return_value = stream sender.start() - sender.wait_until_done() + sender.thread.join() send_socket.assert_called() @@ -108,6 +107,7 @@ def test_no_sending_if_speaking(mocker): mock_choose_mic.return_value = {"name": u"Some mic", "index": 0L} mock_state = mocker.patch("robot_interface.endpoints.audio_sender.state") + mock_state.qi_session = None mock_state.exit_event.is_set.side_effect = [False, True] mock_zmq_context = mock.Mock() @@ -121,11 +121,11 @@ def test_no_sending_if_speaking(mocker): sender = AudioSender(mock_zmq_context) sender.socket.send = send_socket - sender.audio.open = mock.Mock() - sender.audio.open.return_value = stream + sender.capturer.audio.open = mock.Mock() + sender.capturer.audio.open.return_value = stream sender.start() - sender.wait_until_done() + sender.thread.join() send_socket.assert_not_called() @@ -145,6 +145,7 @@ def test_break_microphone(mocker): mock_choose_mic.return_value = {"name": u"Some mic", "index": 0L} mock_state = mocker.patch("robot_interface.endpoints.audio_sender.state") + mock_state.qi_session = None mock_state.exit_event.is_set.side_effect = [False, True] mock_zmq_context = mock.Mock() @@ -156,11 +157,11 @@ def test_break_microphone(mocker): sender = AudioSender(mock_zmq_context) sender.socket.send = send_socket - sender.audio.open = mock.Mock() - sender.audio.open.return_value = stream + sender.capturer.audio.open = mock.Mock() + sender.capturer.audio.open.return_value = stream sender.start() - sender.wait_until_done() + sender.thread.join() send_socket.assert_not_called() @@ -171,6 +172,8 @@ def test_pyaudio_init_failure(mocker, zmq_context): """ # Prevent binding the ZMQ socket mocker.patch("robot_interface.endpoints.audio_sender.AudioSender.create_socket") + mock_state = mocker.patch("robot_interface.endpoints.audio_sender.state") + mock_state.qi_session = None # Simulate PyAudio() failing mocker.patch( @@ -180,5 +183,5 @@ def test_pyaudio_init_failure(mocker, zmq_context): sender = AudioSender(zmq_context) - assert sender.audio is None - assert sender.microphone is None + assert sender.capturer.audio is None + assert sender.capturer.microphone is None -- 2.49.1