5 Commits

Author SHA1 Message Date
Twirre Meulenbelt
ecf9d14a4e test: make audio sender tests pass 2026-02-09 15:51:35 +01:00
Twirre Meulenbelt
0fe5fcf8f8 feat: choose between Qi microphone and local microphone 2026-02-09 15:45:17 +01:00
abd6988d1e feat: multi-channel qi audio possible 2026-02-09 13:40:47 +01:00
Twirre Meulenbelt
31c76ecf84 fix: make QI audio sender working 2026-02-09 09:39:48 +01:00
6e2bedcd32 feat: (almost) qi audio sender 2026-02-04 18:38:40 +01:00
9 changed files with 318 additions and 82 deletions

View File

@@ -6,6 +6,9 @@
# The hostname or IP address of the Control Backend. # The hostname or IP address of the Control Backend.
AGENT__CONTROL_BACKEND_HOST=localhost AGENT__CONTROL_BACKEND_HOST=localhost
# Whether to use Pepper's microphone when Pepper is connected.
AUDIO__USE_PEPPER_MICROPHONE=true
# Variables that are unlikely to be configured, you can probably ignore these: # Variables that are unlikely to be configured, you can probably ignore these:

View File

@@ -7,3 +7,5 @@ sphinx
sphinx_rtd_theme sphinx_rtd_theme
pre-commit pre-commit
python-dotenv python-dotenv
numpy<=1.16.6
enum34

View File

@@ -78,6 +78,8 @@ class AudioConfig(object):
""" """
Audio configuration constants. Audio configuration constants.
:ivar use_pepper_microphone: Whether to use Pepper's microphone or not, defaults to True.
:vartype use_pepper_microphone: bool
:ivar sample_rate: Audio sampling rate in Hz, defaults to 16000. :ivar sample_rate: Audio sampling rate in Hz, defaults to 16000.
:vartype sample_rate: int :vartype sample_rate: int
:ivar chunk_size: Size of audio chunks to capture/process, defaults to 512. :ivar chunk_size: Size of audio chunks to capture/process, defaults to 512.
@@ -85,7 +87,14 @@ class AudioConfig(object):
:ivar channels: Number of audio channels, defaults to 1. :ivar channels: Number of audio channels, defaults to 1.
:vartype channels: int :vartype channels: int
""" """
def __init__(self, sample_rate=None, chunk_size=None, channels=None): def __init__(
self,
use_pepper_microphone=None,
sample_rate=None,
chunk_size=None,
channels=None,
):
self.use_pepper_microphone = get_config(use_pepper_microphone, "AUDIO__USE_PEPPER_MICROPHONE", True, bool)
self.sample_rate = get_config(sample_rate, "AUDIO__SAMPLE_RATE", 16000, int) self.sample_rate = get_config(sample_rate, "AUDIO__SAMPLE_RATE", 16000, int)
self.chunk_size = get_config(chunk_size, "AUDIO__CHUNK_SIZE", 512, int) self.chunk_size = get_config(chunk_size, "AUDIO__CHUNK_SIZE", 512, int)
self.channels = get_config(channels, "AUDIO__CHANNELS", 1, int) self.channels = get_config(channels, "AUDIO__CHANNELS", 1, int)

View File

@@ -6,9 +6,16 @@ University within the Software Project course.
""" """
from __future__ import unicode_literals # So that `logging` can use Unicode characters in names from __future__ import unicode_literals # So that `logging` can use Unicode characters in names
import audioop
import enum
from abc import ABCMeta, abstractmethod
import threading import threading
import logging import logging
import Queue
import numpy as np
import pyaudio import pyaudio
import zmq import zmq
@@ -20,86 +27,219 @@ from robot_interface.core.config import settings
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
class AudioSender(SocketBase): class AudioCapturer(object):
""" """
Audio sender endpoint, responsible for sending microphone audio data. Interface for audio capturers.
"""
__metaclass__ = ABCMeta
:param zmq_context: The ZeroMQ context to use. @abstractmethod
:type zmq_context: zmq.Context def setup(self):
raise NotImplementedError()
:param port: The port to use. @abstractmethod
:type port: int def stop(self):
raise NotImplementedError()
:ivar thread: Thread used for sending audio. @abstractmethod
:vartype thread: threading.Thread | None def generate_chunk(self):
raise NotImplementedError()
class SampleRate(enum.Enum):
"""
Sample rate to use in Hz.
"""
LOW = 16000
HIGH = 48000
class PepperMicrophone(enum.Enum):
"""
Which of Pepper's microphones to use. In our case, some of the mics were damages/didn't work
well, so we choose to only use the fron right. If you have a Pepper robot with all working mics,
you might wish to use all microphones, to improve overall audio quality.
"""
ALL = 0
LEFT = 1
RIGHT = 2
FRONT_LEFT = 3
FRONT_RIGHT = 4
class QiAudioCapturer(AudioCapturer):
# Some of this class' methods have docstrings as binary strings. Keep them that way, otherwise
# ``qi.Session.registerService`` will give RuntimeErrors.
def __init__(self, sample_rate=SampleRate.LOW, mic=PepperMicrophone.FRONT_RIGHT,
deinterleaved=0):
"""
:raises RuntimeError: If there is no Qi session available.
:raises ValueError: If the given arguments are not compatible.
"""
self.session = state.qi_session
if not self.session:
raise RuntimeError("Cannot capture from qi device, no qi session available.")
if sample_rate == SampleRate.HIGH and mic != PepperMicrophone.ALL:
raise RuntimeError("For 48000 Hz, you must select all microphones.")
if mic == PepperMicrophone.ALL and sample_rate != SampleRate.HIGH:
raise RuntimeError("For using all microphones, 48000 Hz is required.")
self.audio = self.session.service("ALAudioDevice")
self.service_name = "ZmqAudioStreamer"
self.sample_rate = sample_rate
self.mic = mic
self.deinterleaved = deinterleaved
self.overflow = np.empty(0, dtype=np.float32)
self.q = Queue.Queue()
self._rate_state = None
def setup(self):
b"""
:raises RuntimeError: If no Qi session is available or if the session is not compatible with audio streaming.
"""
assert self.session is not None
logger.info("Listening with Pepper's microphone.")
self.session.registerService(self.service_name, self)
self.audio.setClientPreferences(self.service_name, self.sample_rate.value, self.mic.value,
self.deinterleaved)
self.audio.subscribe(self.service_name)
def stop(self):
b"""
Stop the audio capturer.
"""
try:
self.audio.unsubscribe(self.service_name)
except:
pass
def generate_chunk(self):
try:
chunk = self.q.get(True, 0.1)
return chunk
except Queue.Empty:
return None
# Callback invoked by NAOqi
def processRemote(self, nbOfChannels, nbOfSamplesByChannel, timeStamp, inputBuffer):
raw_pcm = bytes(inputBuffer)
pcm_i16 = np.frombuffer(raw_pcm, dtype=np.int16)
# Make mono channel (if it was 4 channels)
pcm_i32_mono = self._make_mono(pcm_i16.astype(np.int32), nbOfChannels)
# Resample (if it was 48k)
pcm_i32_mono_16k, self._rate_state = audioop.ratecv(pcm_i32_mono.tobytes(), 4, 1,
self.sample_rate.value,
SampleRate.LOW.value, self._rate_state)
pcm_f32_mono_16k = (np.frombuffer(pcm_i32_mono_16k, dtype=np.int32).astype(np.float32) /
32768.0)
# Attach overflow
pcm_f32_mono_16k = np.append(self.overflow, pcm_f32_mono_16k)
for i in range(len(pcm_f32_mono_16k) // 512):
self.q.put_nowait(pcm_f32_mono_16k[i * 512 : (i + 1) * 512].tobytes())
self.overflow = pcm_f32_mono_16k[len(pcm_f32_mono_16k) // 512 * 512 :]
def _make_mono(self, frag, channels):
return frag.reshape(-1, channels).mean(axis=1, dtype=np.int32)
class StandaloneAudioCapturer(AudioCapturer):
"""
Audio capturer that uses a microphone from the local device, can be chosen with the
``--microphone`` program argument.
:ivar audio: PyAudio instance. :ivar audio: PyAudio instance.
:vartype audio: pyaudio.PyAudio | None :vartype audio: pyaudio.PyAudio | None
:ivar microphone: Selected microphone information. :ivar microphone: Selected microphone information.
:vartype microphone: dict | None :vartype microphone: dict | None
:ivar stream: PyAudio stream instance. None until ``setup()`` is called, remaining None if setup
fails for any reason.
:vartype stream: pyaudio.Stream | None
""" """
def __init__(self, zmq_context, port=settings.agent_settings.audio_sender_port): def __init__(self):
super(AudioSender, self).__init__(str("audio")) # Convert future's unicode_literal to str self.stream = None
self.create_socket(zmq_context, zmq.PUB, port)
self.thread = None
try: try:
self.audio = pyaudio.PyAudio() self.audio = pyaudio.PyAudio()
self.microphone = choose_mic(self.audio) self.microphone = choose_mic(self.audio)
except IOError as e: except IOError as e:
logger.warning("PyAudio is not available.", exc_info=e) logger.warning("PyAudio is not available. Won't be able to send audio.", exc_info=True)
self.audio = None self.audio = None
self.microphone = None self.microphone = None
def start(self): def setup(self):
""" """
Start sending audio in a different thread. Setup audio stream. Will not if no microphone is available.
Will not start if no microphone is available.
""" """
if not self.microphone: if not self.microphone:
logger.info("Not listening: no microphone available.") logger.info("Not listening: no microphone available.")
return return
logger.info("Listening with microphone \"{}\".".format(self.microphone["name"])) logger.info("Listening with microphone \"{}\".".format(self.microphone["name"]))
self.thread = threading.Thread(target=self._stream) self.stream = self.audio.open(
self.thread.start()
def wait_until_done(self):
"""
Wait until the audio thread is done.
Will block until `state.exit_event` is set. If the thread is not running, does nothing.
"""
if not self.thread: return
self.thread.join()
self.thread = None
def _stream(self):
"""
Internal method to continuously read audio from the microphone and send it over the socket.
"""
audio_settings = settings.audio_config
chunk = audio_settings.chunk_size # 320 at 16000 Hz is 20ms, 512 is required for Silero-VAD
# Docs say this only raises an error if neither `input` nor `output` is True
stream = self.audio.open(
format=pyaudio.paFloat32, format=pyaudio.paFloat32,
channels=audio_settings.channels, channels=settings.audio_config.channels,
rate=audio_settings.sample_rate, rate=settings.audio_config.sample_rate,
input=True, input=True,
input_device_index=self.microphone["index"], input_device_index=self.microphone["index"],
frames_per_buffer=chunk, frames_per_buffer=settings.audio_config.chunk_size,
) )
try: def stop(self):
while not state.exit_event.is_set(): """
data = stream.read(chunk) Close the audio stream.
if (state.is_speaking): continue # Do not send audio while the robot is speaking """
self.socket.send(data) if not self.stream: return
except IOError as e: self.stream.stop_stream()
logger.error("Stopped listening: failed to get audio from microphone.", exc_info=e) self.stream.close()
finally:
stream.stop_stream() def generate_chunk(self):
stream.close() """
:return: Audio frames from the microphone of size ``settings.audio_config.chunk_size``.
:rtype: bytes.
:raises IOError: If reading from the audio stream fails.
"""
return self.stream.read(settings.audio_config.chunk_size)
class AudioSender(SocketBase):
def __init__(self, zmq_context, port=settings.agent_settings.audio_sender_port):
super(AudioSender, self).__init__(str("audio"))
self.create_socket(zmq_context, zmq.PUB, port)
self.thread = threading.Thread(target=self.stream)
self.capturer = self.choose_capturer()
def start(self):
self.capturer.setup()
self.thread.start()
def close(self):
self.capturer.stop()
super(AudioSender, self).close()
def stream(self):
while not state.exit_event.is_set():
chunk = self.capturer.generate_chunk()
if chunk is None or state.is_speaking:
continue
self.socket.send(chunk)
def choose_capturer(self):
if state.qi_session and settings.audio_config.use_pepper_microphone:
return QiAudioCapturer()
return StandaloneAudioCapturer()

View File

@@ -16,6 +16,8 @@ def get_config(value, env, default, cast=None):
Small utility to get a configuration value, returns `value` if it is not None, else it will try to get the Small utility to get a configuration value, returns `value` if it is not None, else it will try to get the
environment variable cast with `cast`. If the environment variable is not set, it will return `default`. environment variable cast with `cast`. If the environment variable is not set, it will return `default`.
Special handling for booleans, which are only true if the value of the variable is "true" or "yes", ignoring capitalization.
:param value: The value to check. :param value: The value to check.
:type value: Any :type value: Any
:param env: The environment variable to check. :param env: The environment variable to check.
@@ -33,7 +35,14 @@ def get_config(value, env, default, cast=None):
env = os.environ.get(env, default) env = os.environ.get(env, default)
if cast is None: if cast is None or env is None:
return env return env
if cast == bool:
if isinstance(env, bool):
return env
if not isinstance(default, bool):
raise ValueError("Default value must be a boolean if the cast type is a boolean.")
return env.lower() == "true" or env.lower() == "yes"
return cast(env) return cast(env)

View File

@@ -14,6 +14,20 @@ except ImportError:
qi = None qi = None
def _get_qi_url():
"""
Get the Qi URL from the command line arguments, or None if not given.
"""
if "--qi-url" in sys.argv:
return sys.argv[sys.argv.index("--qi-url") + 1]
for arg in sys.argv:
if arg.startswith("--qi-url="):
return arg[len("--qi-url="):]
return None
def get_qi_session(): def get_qi_session():
""" """
Create and return a Qi session if available. Create and return a Qi session if available.
@@ -25,12 +39,13 @@ def get_qi_session():
logging.info("Unable to import qi. Running in stand-alone mode.") logging.info("Unable to import qi. Running in stand-alone mode.")
return None return None
if "--qi-url" not in sys.argv: qi_url = _get_qi_url()
if qi_url is None:
logging.info("No Qi URL argument given. Running in stand-alone mode.") logging.info("No Qi URL argument given. Running in stand-alone mode.")
return None return None
try: try:
app = qi.Application() app = qi.Application(["--qi-url", qi_url, "--qi-listen-url", "tcp://0.0.0.0:0"])
app.start() app.start()
return app.session return app.session
except RuntimeError: except RuntimeError:

View File

@@ -33,35 +33,33 @@ def test_no_microphone(zmq_context, mocker):
mock_info_logger = mocker.patch("robot_interface.endpoints.audio_sender.logger.info") mock_info_logger = mocker.patch("robot_interface.endpoints.audio_sender.logger.info")
mock_choose_mic = mocker.patch("robot_interface.endpoints.audio_sender.choose_mic") mock_choose_mic = mocker.patch("robot_interface.endpoints.audio_sender.choose_mic")
mock_choose_mic.return_value = None mock_choose_mic.return_value = None
mock_state = mocker.patch("robot_interface.endpoints.audio_sender.state")
mock_state.qi_session = None
sender = AudioSender(zmq_context) sender = AudioSender(zmq_context)
assert sender.microphone is None assert sender.capturer.microphone is None
sender.start() sender.capturer.setup()
assert sender.thread is None
mock_info_logger.assert_called() mock_info_logger.assert_called()
sender.wait_until_done() # Should return early because we didn't start a thread
def test_unicode_mic_name(zmq_context, mocker): def test_unicode_mic_name(zmq_context, mocker):
""" """
Tests the robustness of the `AudioSender` when handling microphone names Tests the robustness of the `AudioSender` when handling microphone names
that contain Unicode characters. that contain Unicode characters.
""" """
mocker.patch("robot_interface.endpoints.audio_sender.threading")
mock_choose_mic = mocker.patch("robot_interface.endpoints.audio_sender.choose_mic") mock_choose_mic = mocker.patch("robot_interface.endpoints.audio_sender.choose_mic")
mock_choose_mic.return_value = {"name": u"• Some Unicode name"} mock_choose_mic.return_value = {"name": u"• Some Unicode name", "index": 0L}
mock_state = mocker.patch("robot_interface.endpoints.audio_sender.state")
mock_state.qi_session = None
sender = AudioSender(zmq_context) sender = AudioSender(zmq_context)
assert sender.microphone is not None assert sender.capturer.microphone is not None
sender.capturer.audio.open = mock.Mock(return_value=mock.Mock())
# `.start()` logs the name of the microphone. It should not give an error if it contains Unicode # `.setup()` logs the name of the microphone. It should not give an error if it contains Unicode
# symbols. # symbols.
sender.start() sender.capturer.setup()
assert sender.thread is not None
sender.wait_until_done() # Should return instantly because we didn't start a real thread
def _fake_read(num_frames): def _fake_read(num_frames):
@@ -79,6 +77,7 @@ def test_sending_audio(mocker):
mock_choose_mic.return_value = {"name": u"Some mic", "index": 0L} mock_choose_mic.return_value = {"name": u"Some mic", "index": 0L}
mock_state = mocker.patch("robot_interface.endpoints.audio_sender.state") mock_state = mocker.patch("robot_interface.endpoints.audio_sender.state")
mock_state.qi_session = None
mock_state.exit_event.is_set.side_effect = [False, True] mock_state.exit_event.is_set.side_effect = [False, True]
mock_zmq_context = mock.Mock() mock_zmq_context = mock.Mock()
@@ -91,11 +90,11 @@ def test_sending_audio(mocker):
sender = AudioSender(mock_zmq_context) sender = AudioSender(mock_zmq_context)
sender.socket.send = send_socket sender.socket.send = send_socket
sender.audio.open = mock.Mock() sender.capturer.audio.open = mock.Mock()
sender.audio.open.return_value = stream sender.capturer.audio.open.return_value = stream
sender.start() sender.start()
sender.wait_until_done() sender.thread.join()
send_socket.assert_called() send_socket.assert_called()
@@ -108,6 +107,7 @@ def test_no_sending_if_speaking(mocker):
mock_choose_mic.return_value = {"name": u"Some mic", "index": 0L} mock_choose_mic.return_value = {"name": u"Some mic", "index": 0L}
mock_state = mocker.patch("robot_interface.endpoints.audio_sender.state") mock_state = mocker.patch("robot_interface.endpoints.audio_sender.state")
mock_state.qi_session = None
mock_state.exit_event.is_set.side_effect = [False, True] mock_state.exit_event.is_set.side_effect = [False, True]
mock_zmq_context = mock.Mock() mock_zmq_context = mock.Mock()
@@ -121,11 +121,11 @@ def test_no_sending_if_speaking(mocker):
sender = AudioSender(mock_zmq_context) sender = AudioSender(mock_zmq_context)
sender.socket.send = send_socket sender.socket.send = send_socket
sender.audio.open = mock.Mock() sender.capturer.audio.open = mock.Mock()
sender.audio.open.return_value = stream sender.capturer.audio.open.return_value = stream
sender.start() sender.start()
sender.wait_until_done() sender.thread.join()
send_socket.assert_not_called() send_socket.assert_not_called()
@@ -145,6 +145,7 @@ def test_break_microphone(mocker):
mock_choose_mic.return_value = {"name": u"Some mic", "index": 0L} mock_choose_mic.return_value = {"name": u"Some mic", "index": 0L}
mock_state = mocker.patch("robot_interface.endpoints.audio_sender.state") mock_state = mocker.patch("robot_interface.endpoints.audio_sender.state")
mock_state.qi_session = None
mock_state.exit_event.is_set.side_effect = [False, True] mock_state.exit_event.is_set.side_effect = [False, True]
mock_zmq_context = mock.Mock() mock_zmq_context = mock.Mock()
@@ -156,11 +157,11 @@ def test_break_microphone(mocker):
sender = AudioSender(mock_zmq_context) sender = AudioSender(mock_zmq_context)
sender.socket.send = send_socket sender.socket.send = send_socket
sender.audio.open = mock.Mock() sender.capturer.audio.open = mock.Mock()
sender.audio.open.return_value = stream sender.capturer.audio.open.return_value = stream
sender.start() sender.start()
sender.wait_until_done() sender.thread.join()
send_socket.assert_not_called() send_socket.assert_not_called()
@@ -171,6 +172,8 @@ def test_pyaudio_init_failure(mocker, zmq_context):
""" """
# Prevent binding the ZMQ socket # Prevent binding the ZMQ socket
mocker.patch("robot_interface.endpoints.audio_sender.AudioSender.create_socket") mocker.patch("robot_interface.endpoints.audio_sender.AudioSender.create_socket")
mock_state = mocker.patch("robot_interface.endpoints.audio_sender.state")
mock_state.qi_session = None
# Simulate PyAudio() failing # Simulate PyAudio() failing
mocker.patch( mocker.patch(
@@ -180,5 +183,5 @@ def test_pyaudio_init_failure(mocker, zmq_context):
sender = AudioSender(zmq_context) sender = AudioSender(zmq_context)
assert sender.audio is None assert sender.capturer.audio is None
assert sender.microphone is None assert sender.capturer.microphone is None

View File

@@ -50,3 +50,58 @@ def test_get_config_casts_default_when_env_missing(monkeypatch):
result = get_config(None, "GET_CONFIG_MISSING", "42", int) result = get_config(None, "GET_CONFIG_MISSING", "42", int)
assert result == 42 assert result == 42
def test_get_config_unset_boolean_default(monkeypatch):
"""
When the env var is a boolean, and it's not set, ensure it uses the default value.
"""
monkeypatch.delenv("SOME_BOOLEAN_VARIABLE", raising=False)
result = get_config(None, "SOME_BOOLEAN_VARIABLE", False, bool)
assert result == False
result = get_config(None, "SOME_BOOLEAN_VARIABLE", True, bool)
assert result == True
def test_get_config_true_boolean(monkeypatch):
"""
When the env var is a boolean, and its value is "true", "TRUE", "yes", etc., it should return true.
"""
monkeypatch.setenv("SOME_BOOLEAN_VARIABLE", "TRUE")
result = get_config(None, "SOME_BOOLEAN_VARIABLE", False, bool)
assert result == True
monkeypatch.setenv("SOME_BOOLEAN_VARIABLE", "true")
result = get_config(None, "SOME_BOOLEAN_VARIABLE", False, bool)
assert result == True
monkeypatch.setenv("SOME_BOOLEAN_VARIABLE", "yes")
result = get_config(None, "SOME_BOOLEAN_VARIABLE", False, bool)
assert result == True
monkeypatch.setenv("SOME_BOOLEAN_VARIABLE", "YES")
result = get_config(None, "SOME_BOOLEAN_VARIABLE", False, bool)
assert result == True
monkeypatch.setenv("SOME_BOOLEAN_VARIABLE", "TrUE")
result = get_config(None, "SOME_BOOLEAN_VARIABLE", False, bool)
assert result == True
def test_get_config_false_boolean(monkeypatch):
"""
When the env var is a boolean, and its value is not "true", "TRUE", "yes", etc., it should return False.
"""
monkeypatch.setenv("SOME_BOOLEAN_VARIABLE", "FALSE")
result = get_config(None, "SOME_BOOLEAN_VARIABLE", True, bool)
assert result == False
monkeypatch.setenv("SOME_BOOLEAN_VARIABLE", "false")
result = get_config(None, "SOME_BOOLEAN_VARIABLE", True, bool)
assert result == False
monkeypatch.setenv("SOME_BOOLEAN_VARIABLE", "anything, tbh")
result = get_config(None, "SOME_BOOLEAN_VARIABLE", True, bool)
assert result == False

View File

@@ -62,7 +62,7 @@ def test_get_qi_session_runtime_error(monkeypatch):
raise RuntimeError("boom") raise RuntimeError("boom")
class FakeQi: class FakeQi:
Application = lambda self=None: FakeApp() Application = lambda *args, **kwargs: FakeApp()
reload_qi_utils_with(FakeQi()) reload_qi_utils_with(FakeQi())
@@ -87,7 +87,7 @@ def test_get_qi_session_success(monkeypatch):
return True return True
class FakeQi: class FakeQi:
Application = lambda self=None: FakeApp() Application = lambda *args, **kwargs: FakeApp()
reload_qi_utils_with(FakeQi()) reload_qi_utils_with(FakeQi())