6 Commits

Author SHA1 Message Date
b4814d431f feat: ignore own speech
When we detect that Pepper is talking we stop sending audio.

ref: N25B-214
2025-10-27 13:08:57 +01:00
Twirre Meulenbelt
230ab5d5cc test: add case for microphone failure
When the microphone fails, it will raise an IOError during the `read`. This is simulated with a new test.

ref: N25B-119
2025-10-22 15:38:30 +02:00
Twirre Meulenbelt
0499cd8a24 feat: send audio
AudioSender runs in a separate thread to send audio from the microphone.

ref: N25B-119
2025-10-22 15:10:27 +02:00
Twirre Meulenbelt
f8db719bfa test: unit test mock PyAudio, integration test use real
Make unit tests use a mock version of PyAudio, while making integration tests using the real version. If no real microphone is available, these integration tests are skipped.

ref: N25B-119
2025-10-22 13:27:35 +02:00
Twirre Meulenbelt
1e3e077029 fix: disallow selecting non-microphone audio device
Previously any audio device was allowed to be selected as microphone. Now, only ones with at least one input channel can be selected.

ref: N25B-119
2025-10-22 13:24:46 +02:00
Twirre Meulenbelt
0f60f67ab9 feat: add microphone selection utils
Providing two functions, one to choose the default microphone, the other to choose a microphone interactively. With tests.

ref: N25B-119
2025-10-22 11:44:51 +02:00
15 changed files with 482 additions and 96 deletions

View File

@@ -1,6 +1,4 @@
## Development environment
### Linux (or WSL)
Start off by installing [Pyenv](https://github.com/pyenv/pyenv?tab=readme-ov-file#installation) and walk through the steps outlined there (be sure to also add it to PATH). Also install the [Python build requirements](https://github.com/pyenv/pyenv/wiki#suggested-build-environment). Afterwards, install Python 2.7 and activate it for your current shell:
# PepperPlus-RI
The robot interface is a high-level API for controlling the robot. It implements the API as designed: https://utrechtuniversity.youtrack.cloud/articles/N25B-A-14/RI-CB-Communication.

Binary file not shown.

View File

@@ -1,93 +0,0 @@
import threading
import pyaudio
import zmq
from state import state
def choose_mic_interactive(audio):
"""Choose a microphone to use. The `audio` parameter is an instance of PyAudio. Returns a dict."""
device_count = audio.get_device_count()
print("Found {} audio devices:".format(device_count))
for i in range(device_count):
print("- {}: {}".format(i, audio.get_device_info_by_index(i)["name"]))
microphone_index = None
while microphone_index is None:
chosen = input("Which device would you like to use?\n> ")
try:
chosen = int(chosen)
if chosen < 0 or chosen > device_count: raise ValueError()
microphone_index = chosen
except ValueError:
print("Please enter a number between 0 and {}".format(device_count))
chosen_microphone = audio.get_device_info_by_index(microphone_index)
print("Chose microphone \"{}\"".format(chosen_microphone["name"]))
return chosen_microphone
def choose_mic_default(audio):
"""Choose a microphone to use based on defaults. The `audio` parameter is a PyAudio. Returns a dict."""
default_device = audio.get_default_input_device_info()
return default_device
class AudioStreaming:
def __init__(self, port=5557):
self.port = port
self.audio = pyaudio.PyAudio()
self.microphone = choose_mic_default(self.audio)
self.thread = None
def run(self):
self.thread = threading.Thread(target=self._stream)
self.thread.start()
def wait_until_done(self):
if not self.thread: return
self.thread.join()
def _stream(self):
context = zmq.Context()
socket = context.socket(zmq.PUB)
socket.bind("tcp://*:{}".format(self.port))
chunk = 512 # 320 at 16000 Hz is 20ms, 512 is required for Silero-VAD
stream = self.audio.open(
format=pyaudio.paFloat32,
channels=1,
rate=16000,
input=True,
input_device_index=self.microphone["index"],
frames_per_buffer=chunk,
)
try:
while not state.exit_event.is_set():
data = stream.read(chunk)
socket.send(data)
finally:
stream.stop_stream()
stream.close()
if __name__ == "__main__":
state.initialize()
try:
audio = AudioStreaming()
print("Starting audio streaming...")
audio.run()
import time
end = time.time() + 10
while not state.exit_event.is_set() and time.time() < end:
print "\rExiting in {:.2f} seconds".format(end - time.time()),
time.sleep(0.05)
state.exit_event.set()
audio.wait_until_done()
finally:
state.deinitialize()

Binary file not shown.

View File

@@ -21,6 +21,7 @@ class ActuationReceiver(ReceiverBase):
self.create_socket(zmq_context, zmq.SUB, port)
self.socket.setsockopt_string(zmq.SUBSCRIBE, u"") # Causes block if given in options
self._tts_service = None
self._al_memory = None
def _handle_speech(self, message):
text = message.get("data")
@@ -40,10 +41,26 @@ class ActuationReceiver(ReceiverBase):
if not self._tts_service:
self._tts_service = state.qi_session.service("ALTextToSpeech")
if not self._al_memory:
self._al_memory = state.qi_session.service("ALMemory")
# Subscribe to speech end event
self.status_subscriber = self._al_memory.subscriber("ALTextToSpeech/Status") # self because garbage collect
self.status_subscriber.signal.connect(self._on_status_changed)
# Returns instantly. Messages received while speaking will be queued.
qi.async(self._tts_service.say, text)
@staticmethod
def _on_status_changed(value): # value will contain either 'enqueued', 'started' or 'done' depending on the status
"""Callback function for when the speaking status changes. Will change the is_speaking value of the state."""
if "started" in value:
logging.debug("Started speaking.")
state.is_speaking = True
if "done" in value:
logging.debug("Done speaking.")
state.is_speaking = False
def handle_message(self, message):
if message["endpoint"] == "actuate/speech":
self._handle_speech(message)

View File

@@ -0,0 +1,73 @@
from __future__ import unicode_literals # So that `logging` can use Unicode characters in names
import threading
import logging
import pyaudio
import zmq
from robot_interface.endpoints.socket_base import SocketBase
from robot_interface.state import state
from robot_interface.utils.microphone import choose_mic_default
logger = logging.getLogger(__name__)
class AudioSender(SocketBase):
def __init__(self, zmq_context, port=5558):
super(AudioSender, self).__init__(str("audio")) # Convert future's unicode_literal to str
self.create_socket(zmq_context, zmq.PUB, port)
self.audio = pyaudio.PyAudio()
self.microphone = choose_mic_default(self.audio)
self.thread = None
def start(self):
"""
Start sending audio in a different thread.
"""
if not self.microphone:
logger.info("Not listening: no microphone available.")
return
logger.info("Listening with microphone \"{}\".".format(self.microphone["name"]))
self.thread = threading.Thread(target=self._stream)
self.thread.start()
def wait_until_done(self):
"""
Wait until the audio thread is done. Will only be done if `state.exit_event` is set, so
make sure to set that before calling this method or it will block.
"""
if not self.thread: return
self.thread.join()
self.thread = None
def _stream(self):
chunk = 512 # 320 at 16000 Hz is 20ms, 512 is required for Silero-VAD
# Docs say this only raises an error if neither `input` nor `output` is True
stream = self.audio.open(
format=pyaudio.paFloat32,
channels=1,
rate=16000,
input=True,
input_device_index=self.microphone["index"],
frames_per_buffer=chunk,
)
try:
while not state.exit_event.is_set():
# Don't send audio if Pepper is speaking
if state.is_speaking:
if stream.is_active(): stream.stop_stream()
continue
if stream.is_stopped(): stream.start_stream()
data = stream.read(chunk)
self.socket.send(data)
except IOError as e:
logger.error("Stopped listening: failed to get audio from microphone.", exc_info=e)
finally:
stream.stop_stream()
stream.close()

View File

@@ -1,4 +1,7 @@
import logging
from robot_interface.endpoints.audio_sender import AudioSender
logging.basicConfig(level=logging.DEBUG)
import zmq
@@ -25,8 +28,11 @@ def main_loop(context):
video_sender = VideoSender(context)
state.sockets.append(video_sender)
audio_sender = AudioSender(context)
state.sockets.append(audio_sender)
video_sender.start_video_rcv()
audio_sender.start()
# Sockets that can run on the main thread. These sockets' endpoints should not block for long (say 50 ms at most).
receivers = [main_receiver, actuation_receiver]
@@ -37,8 +43,19 @@ def main_loop(context):
logging.debug("Starting main loop.")
import schedule
test_speaking_message = {"data": "Hi, my name is Pepper, and this is quite a long message."}
def test_speak():
logging.debug("Testing speech.")
actuation_receiver._handle_speech(test_speaking_message)
schedule.every(10).seconds.do(test_speak)
while True:
if state.exit_event.is_set(): break
schedule.run_pending()
socks = dict(poller.poll(100))
for receiver in receivers:

View File

@@ -18,6 +18,7 @@ class State(object):
self.exit_event = None
self.sockets = [] # type: List[SocketBase]
self.qi_session = None # type: None | ssl.SSLSession
self.is_speaking = False # type: Boolean
def initialize(self):
if self.is_initialized:

View File

@@ -0,0 +1,69 @@
from __future__ import unicode_literals # So that `print` can print Unicode characters in names
import logging
logger = logging.getLogger(__name__)
def get_microphones(audio):
"""
Get audio devices which have input channels.
:param audio: An instance of PyAudio to use.
:type audio: pyaudio.PyAudio
:return: An interator of PaAudio dicts containing information about the microphone devices.
:rtype: Iterator[dict]
"""
for i in range(audio.get_device_count()):
device = audio.get_device_info_by_index(i)
if device["maxInputChannels"] > 0:
yield device
def choose_mic_interactive(audio):
"""
Choose a microphone to use, interactively in the CLI.
:param audio: An instance of PyAudio to use.
:type audio: pyaudio.PyAudio
:return: A dictionary from PyAudio containing information about the microphone to use, or None
if there is no microphone.
:rtype: dict | None
"""
microphones = list(get_microphones(audio))
if len(microphones) == 0: return None
print("Found {} microphones:".format(len(microphones)))
for i, mic in enumerate(microphones):
print("- {}: {}".format(i, mic["name"]))
chosen_microphone = None
while chosen_microphone is None:
chosen = raw_input("Which device would you like to use?\n> ")
try:
chosen = int(chosen)
if chosen < 0 or chosen >= len(microphones): raise ValueError()
chosen_microphone = microphones[chosen]
except ValueError:
print("Please enter a number between 0 and {}".format(len(microphones)-1))
logger.info("Chose microphone \"{}\"".format(chosen_microphone["name"]))
return chosen_microphone
def choose_mic_default(audio):
"""
Get the system's default microphone to use.
:param audio: An instance of PyAudio to use.
:type audio: pyaudio.PyAudio
:return: A dictionary from PyAudio containing information about the microphone to use, or None
if there is no microphone.
:rtype: dict | None
"""
try:
return audio.get_default_input_device_info()
except IOError:
return None

View File

@@ -0,0 +1,95 @@
import random
import sys
from StringIO import StringIO
from robot_interface.utils.microphone import choose_mic_default, choose_mic_interactive, get_microphones
class MicrophoneUtils(object):
"""Shared tests for any PyAudio-like implementation, e.g. mock and real."""
def test_choose_mic_default(self, pyaudio_instance):
"""
The result must contain at least "index", as this is used to identify the microphone.
The "name" is used for logging, so it should also exist.
It must have one or more channels.
Lastly it must be capable of sending at least 16000 samples per second.
"""
result = choose_mic_default(pyaudio_instance)
assert "index" in result
assert isinstance(result["index"], (int, long))
assert "name" in result
assert isinstance(result["name"], (str, unicode))
assert "maxInputChannels" in result
assert isinstance(result["maxInputChannels"], (int, long))
assert result["maxInputChannels"] > 0
assert "defaultSampleRate" in result
assert isinstance(result["defaultSampleRate"], float)
assert result["defaultSampleRate"] >= 16000
def test_choose_mic_interactive_input_not_int(self, pyaudio_instance, mocker):
"""
First mock an input that's not an integer, then a valid integer. There should be no errors.
"""
mock_input = mocker.patch("__builtin__.raw_input", side_effect=["not an integer", "0"])
fake_out = StringIO()
mocker.patch.object(sys, "stdout", fake_out)
result = choose_mic_interactive(pyaudio_instance)
assert "index" in result
assert isinstance(result["index"], (int, long))
assert result["index"] == 0
assert mock_input.called
assert any(p.startswith("Please enter a number") for p in fake_out.getvalue().splitlines())
def test_choose_mic_interactive_negative_index(self, pyaudio_instance, mocker):
"""
Make sure that the interactive method does not allow negative integers as input.
"""
mock_input = mocker.patch("__builtin__.raw_input", side_effect=["-1", "0"])
fake_out = StringIO()
mocker.patch.object(sys, "stdout", fake_out)
result = choose_mic_interactive(pyaudio_instance)
assert "index" in result
assert isinstance(result["index"], (int, long))
assert result["index"] == 0
assert mock_input.called
assert any(p.startswith("Please enter a number") for p in fake_out.getvalue().splitlines())
def test_choose_mic_interactive_index_too_high(self, pyaudio_instance, mocker):
"""
Make sure that the interactive method does not allow indices higher than the highest mic index.
"""
real_count = len(list(get_microphones(pyaudio_instance)))
mock_input = mocker.patch("__builtin__.raw_input", side_effect=[str(real_count), "0"])
fake_out = StringIO()
mocker.patch.object(sys, "stdout", fake_out)
result = choose_mic_interactive(pyaudio_instance)
assert "index" in result
assert isinstance(result["index"], (int, long))
assert mock_input.called
assert any(p.startswith("Please enter a number") for p in fake_out.getvalue().splitlines())
def test_choose_mic_interactive_random_index(self, pyaudio_instance, mocker):
"""
Get a random index from the list of available mics, make sure it's correct.
"""
microphones = list(get_microphones(pyaudio_instance))
random_index = random.randrange(len(microphones))
mocker.patch("__builtin__.raw_input", side_effect=[str(random_index)])
result = choose_mic_interactive(pyaudio_instance)
assert "index" in result
assert isinstance(result["index"], (int, long))
assert result["index"] == microphones[random_index]["index"]

View File

View File

@@ -0,0 +1,20 @@
import pyaudio
import pytest
from common.microphone_utils import MicrophoneUtils
@pytest.fixture
def pyaudio_instance():
audio = pyaudio.PyAudio()
try:
audio.get_default_input_device_info()
return audio
except IOError:
pytest.skip("No microphone available to test with.")
class TestAudioIntegration(MicrophoneUtils):
"""Run shared audio behavior tests with the mock implementation."""
pass

View File

@@ -0,0 +1,104 @@
# coding=utf-8
import os
import time
import mock
import pytest
import zmq
from robot_interface.endpoints.audio_sender import AudioSender
@pytest.fixture
def zmq_context():
context = zmq.Context()
yield context
def test_no_microphone(zmq_context, mocker):
mock_info_logger = mocker.patch("robot_interface.endpoints.audio_sender.logger.info")
mock_choose_mic = mocker.patch("robot_interface.endpoints.audio_sender.choose_mic_default")
mock_choose_mic.return_value = None
sender = AudioSender(zmq_context)
assert sender.microphone is None
sender.start()
assert sender.thread is None
mock_info_logger.assert_called()
sender.wait_until_done() # Should return early because we didn't start a thread
def test_unicode_mic_name(zmq_context, mocker):
mocker.patch("robot_interface.endpoints.audio_sender.threading")
mock_choose_mic = mocker.patch("robot_interface.endpoints.audio_sender.choose_mic_default")
mock_choose_mic.return_value = {"name": u"• Some Unicode name"}
sender = AudioSender(zmq_context)
assert sender.microphone is not None
# `.start()` logs the name of the microphone. It should not give an error if it contains Unicode
# symbols.
sender.start()
assert sender.thread is not None
sender.wait_until_done() # Should return instantly because we didn't start a real thread
def _fake_read(num_frames):
return os.urandom(num_frames * 4)
def test_sending_audio(mocker):
mock_choose_mic = mocker.patch("robot_interface.endpoints.audio_sender.choose_mic_default")
mock_choose_mic.return_value = {"name": u"Some mic", "index": 0L}
mock_state = mocker.patch("robot_interface.endpoints.audio_sender.state")
mock_state.exit_event.is_set.side_effect = [False, True]
mock_zmq_context = mock.Mock()
send_socket = mock.Mock()
# If there's something wrong with the microphone, it will raise an IOError when `read`ing.
stream = mock.Mock()
stream.read = _fake_read
sender = AudioSender(mock_zmq_context)
sender.socket.send = send_socket
sender.audio.open = mock.Mock()
sender.audio.open.return_value = stream
sender.start()
sender.wait_until_done()
send_socket.assert_called()
def _fake_read_error(num_frames):
raise IOError()
def test_break_microphone(mocker):
mock_choose_mic = mocker.patch("robot_interface.endpoints.audio_sender.choose_mic_default")
mock_choose_mic.return_value = {"name": u"Some mic", "index": 0L}
mock_state = mocker.patch("robot_interface.endpoints.audio_sender.state")
mock_state.exit_event.is_set.side_effect = [False, True]
mock_zmq_context = mock.Mock()
send_socket = mock.Mock()
# If there's something wrong with the microphone, it will raise an IOError when `read`ing.
stream = mock.Mock()
stream.read = _fake_read_error
sender = AudioSender(mock_zmq_context)
sender.socket.send = send_socket
sender.audio.open = mock.Mock()
sender.audio.open.return_value = stream
sender.start()
sender.wait_until_done()
send_socket.assert_not_called()

View File

@@ -0,0 +1,85 @@
# coding=utf-8
import mock
import pytest
from common.microphone_utils import MicrophoneUtils
from robot_interface.utils.microphone import choose_mic_default, choose_mic_interactive
class MockPyAudio:
def __init__(self):
# You can predefine fake device info here
self.devices = [
{
"index": 0,
"name": u"Someones Microphone", # Using a Unicode character
"maxInputChannels": 2,
"maxOutputChannels": 0,
"defaultSampleRate": 44100.0,
"defaultLowInputLatency": 0.01,
"defaultLowOutputLatency": 0.01,
"defaultHighInputLatency": 0.1,
"defaultHighOutputLatency": 0.1,
"hostApi": 0,
},
{
"index": 1,
"name": u"Mock Speaker 1",
"maxInputChannels": 0,
"maxOutputChannels": 2,
"defaultSampleRate": 48000.0,
"defaultLowInputLatency": 0.01,
"defaultLowOutputLatency": 0.01,
"defaultHighInputLatency": 0.1,
"defaultHighOutputLatency": 0.1,
"hostApi": 0,
},
]
def get_device_count(self):
"""Return the number of available mock devices."""
return len(self.devices)
def get_device_info_by_index(self, index):
"""Return information for a given mock device index."""
if 0 <= index < len(self.devices):
return self.devices[index]
else:
raise IOError("Invalid device index: {}".format(index))
def get_default_input_device_info(self):
"""Return info for a default mock input device."""
for device in self.devices:
if device.get("maxInputChannels", 0) > 0:
return device
raise IOError("No default input device found")
@pytest.fixture
def pyaudio_instance():
return MockPyAudio()
def _raise_io_error():
raise IOError()
class TestAudioUnit(MicrophoneUtils):
"""Run shared audio behavior tests with the mock implementation."""
def test_choose_mic_default_no_mic(self):
mock_pyaudio = mock.Mock()
mock_pyaudio.get_device_count = mock.Mock(return_value=0L)
mock_pyaudio.get_default_input_device_info = _raise_io_error
result = choose_mic_default(mock_pyaudio)
assert result is None
def test_choose_mic_interactive_no_mic(self):
mock_pyaudio = mock.Mock()
mock_pyaudio.get_device_count = mock.Mock(return_value=0L)
mock_pyaudio.get_default_input_device_info = _raise_io_error
result = choose_mic_interactive(mock_pyaudio)
assert result is None