12 Commits

Author SHA1 Message Date
Björn Otgaar
4688a8fe17 chore: cleanup for merging/ switching to later branches 2025-10-30 17:07:20 +01:00
Björn Otgaar
8990af88fb Merge remote-tracking branch 'origin/dev' into feat/ri2cb-robot-connections 2025-10-22 11:48:21 +02:00
Björn Otgaar
da14a67791 feat: made ping system for automatic disconnection
events, and fixed some issues.

ref: N25B-150
2025-10-08 14:30:55 +02:00
Björn Otgaar
ae60105b4d feat: send connection message when starting up
issue: N25B-150
2025-10-08 12:11:24 +02:00
Twirre Meulenbelt
c634e4b516 chore: replace print with logging and make robot conditional
All print statements in the main program, and components used by the main program, have been replaced with appropriate logging statements. The connection to the robot now only gets made when it's possible, otherwise only the microphone will be run.

ref: N25B-119
2025-10-02 16:13:39 +02:00
Twirre Meulenbelt
2132a74321 fix: allow access to state's exit_event while exiting
When exiting, the state's `is_initialized` flag is unset. Noticeable on Windows, when a thread tried to access the state's `exit_event` property to check whether it had been set, it would complain that the state was no longer initialized. Now, even when no longer initialized, if the `exit_event` is set, it will not raise an error when accessing this attribute.

ref: N25B-119
2025-10-01 17:34:51 +02:00
Twirre Meulenbelt
d21c7fa423 fix: always use 1 audio channel
Before, I chose the number of audio channels that the microphone supports. Should be 1.

ref: N25B-119
2025-10-01 13:41:53 +02:00
Twirre Meulenbelt
afae6fc331 feat: stream audio to CB
Uses PyAudio and ZeroMQ to publish audio chunks.

ref: N25B-119
2025-10-01 10:50:53 +02:00
da99b5cd62 chore: update README 2025-09-30 13:26:42 +02:00
d48ea930a1 chore: complete installation instructions
ref: N25B-115
2025-09-30 13:16:33 +02:00
9e001da685 chore: update README and gitignore
Add installation instructions for the development environment.

ref: N25B-115
2025-09-30 13:14:52 +02:00
a41552f7c6 feat: basic implementation of CB2RI
Nothing fancy yet. When we receive a message through ZeroMQ's PUB/SUB
architecture, we tell the robot to say it out loud.
2025-09-27 20:41:03 +02:00
15 changed files with 96 additions and 482 deletions

View File

@@ -1,4 +1,6 @@
# PepperPlus-RI
## Development environment
### Linux (or WSL)
Start off by installing [Pyenv](https://github.com/pyenv/pyenv?tab=readme-ov-file#installation) and walk through the steps outlined there (be sure to also add it to PATH). Also install the [Python build requirements](https://github.com/pyenv/pyenv/wiki#suggested-build-environment). Afterwards, install Python 2.7 and activate it for your current shell:
The robot interface is a high-level API for controlling the robot. It implements the API as designed: https://utrechtuniversity.youtrack.cloud/articles/N25B-A-14/RI-CB-Communication.

BIN
src/__init__.pyc Normal file

Binary file not shown.

93
src/audio_streaming.py Normal file
View File

@@ -0,0 +1,93 @@
import threading
import pyaudio
import zmq
from state import state
def choose_mic_interactive(audio):
"""Choose a microphone to use. The `audio` parameter is an instance of PyAudio. Returns a dict."""
device_count = audio.get_device_count()
print("Found {} audio devices:".format(device_count))
for i in range(device_count):
print("- {}: {}".format(i, audio.get_device_info_by_index(i)["name"]))
microphone_index = None
while microphone_index is None:
chosen = input("Which device would you like to use?\n> ")
try:
chosen = int(chosen)
if chosen < 0 or chosen > device_count: raise ValueError()
microphone_index = chosen
except ValueError:
print("Please enter a number between 0 and {}".format(device_count))
chosen_microphone = audio.get_device_info_by_index(microphone_index)
print("Chose microphone \"{}\"".format(chosen_microphone["name"]))
return chosen_microphone
def choose_mic_default(audio):
"""Choose a microphone to use based on defaults. The `audio` parameter is a PyAudio. Returns a dict."""
default_device = audio.get_default_input_device_info()
return default_device
class AudioStreaming:
def __init__(self, port=5557):
self.port = port
self.audio = pyaudio.PyAudio()
self.microphone = choose_mic_default(self.audio)
self.thread = None
def run(self):
self.thread = threading.Thread(target=self._stream)
self.thread.start()
def wait_until_done(self):
if not self.thread: return
self.thread.join()
def _stream(self):
context = zmq.Context()
socket = context.socket(zmq.PUB)
socket.bind("tcp://*:{}".format(self.port))
chunk = 512 # 320 at 16000 Hz is 20ms, 512 is required for Silero-VAD
stream = self.audio.open(
format=pyaudio.paFloat32,
channels=1,
rate=16000,
input=True,
input_device_index=self.microphone["index"],
frames_per_buffer=chunk,
)
try:
while not state.exit_event.is_set():
data = stream.read(chunk)
socket.send(data)
finally:
stream.stop_stream()
stream.close()
if __name__ == "__main__":
state.initialize()
try:
audio = AudioStreaming()
print("Starting audio streaming...")
audio.run()
import time
end = time.time() + 10
while not state.exit_event.is_set() and time.time() < end:
print "\rExiting in {:.2f} seconds".format(end - time.time()),
time.sleep(0.05)
state.exit_event.set()
audio.wait_until_done()
finally:
state.deinitialize()

BIN
src/audio_streaming.pyc Normal file

Binary file not shown.

View File

@@ -21,7 +21,6 @@ class ActuationReceiver(ReceiverBase):
self.create_socket(zmq_context, zmq.SUB, port)
self.socket.setsockopt_string(zmq.SUBSCRIBE, u"") # Causes block if given in options
self._tts_service = None
self._al_memory = None
def _handle_speech(self, message):
text = message.get("data")
@@ -41,26 +40,10 @@ class ActuationReceiver(ReceiverBase):
if not self._tts_service:
self._tts_service = state.qi_session.service("ALTextToSpeech")
if not self._al_memory:
self._al_memory = state.qi_session.service("ALMemory")
# Subscribe to speech end event
self.status_subscriber = self._al_memory.subscriber("ALTextToSpeech/Status") # self because garbage collect
self.status_subscriber.signal.connect(self._on_status_changed)
# Returns instantly. Messages received while speaking will be queued.
qi.async(self._tts_service.say, text)
@staticmethod
def _on_status_changed(value): # value will contain either 'enqueued', 'started' or 'done' depending on the status
"""Callback function for when the speaking status changes. Will change the is_speaking value of the state."""
if "started" in value:
logging.debug("Started speaking.")
state.is_speaking = True
if "done" in value:
logging.debug("Done speaking.")
state.is_speaking = False
def handle_message(self, message):
if message["endpoint"] == "actuate/speech":
self._handle_speech(message)

View File

@@ -1,73 +0,0 @@
from __future__ import unicode_literals # So that `logging` can use Unicode characters in names
import threading
import logging
import pyaudio
import zmq
from robot_interface.endpoints.socket_base import SocketBase
from robot_interface.state import state
from robot_interface.utils.microphone import choose_mic_default
logger = logging.getLogger(__name__)
class AudioSender(SocketBase):
def __init__(self, zmq_context, port=5558):
super(AudioSender, self).__init__(str("audio")) # Convert future's unicode_literal to str
self.create_socket(zmq_context, zmq.PUB, port)
self.audio = pyaudio.PyAudio()
self.microphone = choose_mic_default(self.audio)
self.thread = None
def start(self):
"""
Start sending audio in a different thread.
"""
if not self.microphone:
logger.info("Not listening: no microphone available.")
return
logger.info("Listening with microphone \"{}\".".format(self.microphone["name"]))
self.thread = threading.Thread(target=self._stream)
self.thread.start()
def wait_until_done(self):
"""
Wait until the audio thread is done. Will only be done if `state.exit_event` is set, so
make sure to set that before calling this method or it will block.
"""
if not self.thread: return
self.thread.join()
self.thread = None
def _stream(self):
chunk = 512 # 320 at 16000 Hz is 20ms, 512 is required for Silero-VAD
# Docs say this only raises an error if neither `input` nor `output` is True
stream = self.audio.open(
format=pyaudio.paFloat32,
channels=1,
rate=16000,
input=True,
input_device_index=self.microphone["index"],
frames_per_buffer=chunk,
)
try:
while not state.exit_event.is_set():
# Don't send audio if Pepper is speaking
if state.is_speaking:
if stream.is_active(): stream.stop_stream()
continue
if stream.is_stopped(): stream.start_stream()
data = stream.read(chunk)
self.socket.send(data)
except IOError as e:
logger.error("Stopped listening: failed to get audio from microphone.", exc_info=e)
finally:
stream.stop_stream()
stream.close()

View File

@@ -1,7 +1,4 @@
import logging
from robot_interface.endpoints.audio_sender import AudioSender
logging.basicConfig(level=logging.DEBUG)
import zmq
@@ -28,11 +25,8 @@ def main_loop(context):
video_sender = VideoSender(context)
state.sockets.append(video_sender)
audio_sender = AudioSender(context)
state.sockets.append(audio_sender)
video_sender.start_video_rcv()
audio_sender.start()
# Sockets that can run on the main thread. These sockets' endpoints should not block for long (say 50 ms at most).
receivers = [main_receiver, actuation_receiver]
@@ -43,19 +37,8 @@ def main_loop(context):
logging.debug("Starting main loop.")
import schedule
test_speaking_message = {"data": "Hi, my name is Pepper, and this is quite a long message."}
def test_speak():
logging.debug("Testing speech.")
actuation_receiver._handle_speech(test_speaking_message)
schedule.every(10).seconds.do(test_speak)
while True:
if state.exit_event.is_set(): break
schedule.run_pending()
socks = dict(poller.poll(100))
for receiver in receivers:

View File

@@ -18,7 +18,6 @@ class State(object):
self.exit_event = None
self.sockets = [] # type: List[SocketBase]
self.qi_session = None # type: None | ssl.SSLSession
self.is_speaking = False # type: Boolean
def initialize(self):
if self.is_initialized:

View File

@@ -1,69 +0,0 @@
from __future__ import unicode_literals # So that `print` can print Unicode characters in names
import logging
logger = logging.getLogger(__name__)
def get_microphones(audio):
"""
Get audio devices which have input channels.
:param audio: An instance of PyAudio to use.
:type audio: pyaudio.PyAudio
:return: An interator of PaAudio dicts containing information about the microphone devices.
:rtype: Iterator[dict]
"""
for i in range(audio.get_device_count()):
device = audio.get_device_info_by_index(i)
if device["maxInputChannels"] > 0:
yield device
def choose_mic_interactive(audio):
"""
Choose a microphone to use, interactively in the CLI.
:param audio: An instance of PyAudio to use.
:type audio: pyaudio.PyAudio
:return: A dictionary from PyAudio containing information about the microphone to use, or None
if there is no microphone.
:rtype: dict | None
"""
microphones = list(get_microphones(audio))
if len(microphones) == 0: return None
print("Found {} microphones:".format(len(microphones)))
for i, mic in enumerate(microphones):
print("- {}: {}".format(i, mic["name"]))
chosen_microphone = None
while chosen_microphone is None:
chosen = raw_input("Which device would you like to use?\n> ")
try:
chosen = int(chosen)
if chosen < 0 or chosen >= len(microphones): raise ValueError()
chosen_microphone = microphones[chosen]
except ValueError:
print("Please enter a number between 0 and {}".format(len(microphones)-1))
logger.info("Chose microphone \"{}\"".format(chosen_microphone["name"]))
return chosen_microphone
def choose_mic_default(audio):
"""
Get the system's default microphone to use.
:param audio: An instance of PyAudio to use.
:type audio: pyaudio.PyAudio
:return: A dictionary from PyAudio containing information about the microphone to use, or None
if there is no microphone.
:rtype: dict | None
"""
try:
return audio.get_default_input_device_info()
except IOError:
return None

View File

@@ -1,95 +0,0 @@
import random
import sys
from StringIO import StringIO
from robot_interface.utils.microphone import choose_mic_default, choose_mic_interactive, get_microphones
class MicrophoneUtils(object):
"""Shared tests for any PyAudio-like implementation, e.g. mock and real."""
def test_choose_mic_default(self, pyaudio_instance):
"""
The result must contain at least "index", as this is used to identify the microphone.
The "name" is used for logging, so it should also exist.
It must have one or more channels.
Lastly it must be capable of sending at least 16000 samples per second.
"""
result = choose_mic_default(pyaudio_instance)
assert "index" in result
assert isinstance(result["index"], (int, long))
assert "name" in result
assert isinstance(result["name"], (str, unicode))
assert "maxInputChannels" in result
assert isinstance(result["maxInputChannels"], (int, long))
assert result["maxInputChannels"] > 0
assert "defaultSampleRate" in result
assert isinstance(result["defaultSampleRate"], float)
assert result["defaultSampleRate"] >= 16000
def test_choose_mic_interactive_input_not_int(self, pyaudio_instance, mocker):
"""
First mock an input that's not an integer, then a valid integer. There should be no errors.
"""
mock_input = mocker.patch("__builtin__.raw_input", side_effect=["not an integer", "0"])
fake_out = StringIO()
mocker.patch.object(sys, "stdout", fake_out)
result = choose_mic_interactive(pyaudio_instance)
assert "index" in result
assert isinstance(result["index"], (int, long))
assert result["index"] == 0
assert mock_input.called
assert any(p.startswith("Please enter a number") for p in fake_out.getvalue().splitlines())
def test_choose_mic_interactive_negative_index(self, pyaudio_instance, mocker):
"""
Make sure that the interactive method does not allow negative integers as input.
"""
mock_input = mocker.patch("__builtin__.raw_input", side_effect=["-1", "0"])
fake_out = StringIO()
mocker.patch.object(sys, "stdout", fake_out)
result = choose_mic_interactive(pyaudio_instance)
assert "index" in result
assert isinstance(result["index"], (int, long))
assert result["index"] == 0
assert mock_input.called
assert any(p.startswith("Please enter a number") for p in fake_out.getvalue().splitlines())
def test_choose_mic_interactive_index_too_high(self, pyaudio_instance, mocker):
"""
Make sure that the interactive method does not allow indices higher than the highest mic index.
"""
real_count = len(list(get_microphones(pyaudio_instance)))
mock_input = mocker.patch("__builtin__.raw_input", side_effect=[str(real_count), "0"])
fake_out = StringIO()
mocker.patch.object(sys, "stdout", fake_out)
result = choose_mic_interactive(pyaudio_instance)
assert "index" in result
assert isinstance(result["index"], (int, long))
assert mock_input.called
assert any(p.startswith("Please enter a number") for p in fake_out.getvalue().splitlines())
def test_choose_mic_interactive_random_index(self, pyaudio_instance, mocker):
"""
Get a random index from the list of available mics, make sure it's correct.
"""
microphones = list(get_microphones(pyaudio_instance))
random_index = random.randrange(len(microphones))
mocker.patch("__builtin__.raw_input", side_effect=[str(random_index)])
result = choose_mic_interactive(pyaudio_instance)
assert "index" in result
assert isinstance(result["index"], (int, long))
assert result["index"] == microphones[random_index]["index"]

View File

@@ -1,20 +0,0 @@
import pyaudio
import pytest
from common.microphone_utils import MicrophoneUtils
@pytest.fixture
def pyaudio_instance():
audio = pyaudio.PyAudio()
try:
audio.get_default_input_device_info()
return audio
except IOError:
pytest.skip("No microphone available to test with.")
class TestAudioIntegration(MicrophoneUtils):
"""Run shared audio behavior tests with the mock implementation."""
pass

View File

@@ -1,104 +0,0 @@
# coding=utf-8
import os
import time
import mock
import pytest
import zmq
from robot_interface.endpoints.audio_sender import AudioSender
@pytest.fixture
def zmq_context():
context = zmq.Context()
yield context
def test_no_microphone(zmq_context, mocker):
mock_info_logger = mocker.patch("robot_interface.endpoints.audio_sender.logger.info")
mock_choose_mic = mocker.patch("robot_interface.endpoints.audio_sender.choose_mic_default")
mock_choose_mic.return_value = None
sender = AudioSender(zmq_context)
assert sender.microphone is None
sender.start()
assert sender.thread is None
mock_info_logger.assert_called()
sender.wait_until_done() # Should return early because we didn't start a thread
def test_unicode_mic_name(zmq_context, mocker):
mocker.patch("robot_interface.endpoints.audio_sender.threading")
mock_choose_mic = mocker.patch("robot_interface.endpoints.audio_sender.choose_mic_default")
mock_choose_mic.return_value = {"name": u"• Some Unicode name"}
sender = AudioSender(zmq_context)
assert sender.microphone is not None
# `.start()` logs the name of the microphone. It should not give an error if it contains Unicode
# symbols.
sender.start()
assert sender.thread is not None
sender.wait_until_done() # Should return instantly because we didn't start a real thread
def _fake_read(num_frames):
return os.urandom(num_frames * 4)
def test_sending_audio(mocker):
mock_choose_mic = mocker.patch("robot_interface.endpoints.audio_sender.choose_mic_default")
mock_choose_mic.return_value = {"name": u"Some mic", "index": 0L}
mock_state = mocker.patch("robot_interface.endpoints.audio_sender.state")
mock_state.exit_event.is_set.side_effect = [False, True]
mock_zmq_context = mock.Mock()
send_socket = mock.Mock()
# If there's something wrong with the microphone, it will raise an IOError when `read`ing.
stream = mock.Mock()
stream.read = _fake_read
sender = AudioSender(mock_zmq_context)
sender.socket.send = send_socket
sender.audio.open = mock.Mock()
sender.audio.open.return_value = stream
sender.start()
sender.wait_until_done()
send_socket.assert_called()
def _fake_read_error(num_frames):
raise IOError()
def test_break_microphone(mocker):
mock_choose_mic = mocker.patch("robot_interface.endpoints.audio_sender.choose_mic_default")
mock_choose_mic.return_value = {"name": u"Some mic", "index": 0L}
mock_state = mocker.patch("robot_interface.endpoints.audio_sender.state")
mock_state.exit_event.is_set.side_effect = [False, True]
mock_zmq_context = mock.Mock()
send_socket = mock.Mock()
# If there's something wrong with the microphone, it will raise an IOError when `read`ing.
stream = mock.Mock()
stream.read = _fake_read_error
sender = AudioSender(mock_zmq_context)
sender.socket.send = send_socket
sender.audio.open = mock.Mock()
sender.audio.open.return_value = stream
sender.start()
sender.wait_until_done()
send_socket.assert_not_called()

View File

@@ -1,85 +0,0 @@
# coding=utf-8
import mock
import pytest
from common.microphone_utils import MicrophoneUtils
from robot_interface.utils.microphone import choose_mic_default, choose_mic_interactive
class MockPyAudio:
def __init__(self):
# You can predefine fake device info here
self.devices = [
{
"index": 0,
"name": u"Someones Microphone", # Using a Unicode character
"maxInputChannels": 2,
"maxOutputChannels": 0,
"defaultSampleRate": 44100.0,
"defaultLowInputLatency": 0.01,
"defaultLowOutputLatency": 0.01,
"defaultHighInputLatency": 0.1,
"defaultHighOutputLatency": 0.1,
"hostApi": 0,
},
{
"index": 1,
"name": u"Mock Speaker 1",
"maxInputChannels": 0,
"maxOutputChannels": 2,
"defaultSampleRate": 48000.0,
"defaultLowInputLatency": 0.01,
"defaultLowOutputLatency": 0.01,
"defaultHighInputLatency": 0.1,
"defaultHighOutputLatency": 0.1,
"hostApi": 0,
},
]
def get_device_count(self):
"""Return the number of available mock devices."""
return len(self.devices)
def get_device_info_by_index(self, index):
"""Return information for a given mock device index."""
if 0 <= index < len(self.devices):
return self.devices[index]
else:
raise IOError("Invalid device index: {}".format(index))
def get_default_input_device_info(self):
"""Return info for a default mock input device."""
for device in self.devices:
if device.get("maxInputChannels", 0) > 0:
return device
raise IOError("No default input device found")
@pytest.fixture
def pyaudio_instance():
return MockPyAudio()
def _raise_io_error():
raise IOError()
class TestAudioUnit(MicrophoneUtils):
"""Run shared audio behavior tests with the mock implementation."""
def test_choose_mic_default_no_mic(self):
mock_pyaudio = mock.Mock()
mock_pyaudio.get_device_count = mock.Mock(return_value=0L)
mock_pyaudio.get_default_input_device_info = _raise_io_error
result = choose_mic_default(mock_pyaudio)
assert result is None
def test_choose_mic_interactive_no_mic(self):
mock_pyaudio = mock.Mock()
mock_pyaudio.get_device_count = mock.Mock(return_value=0L)
mock_pyaudio.get_default_input_device_info = _raise_io_error
result = choose_mic_interactive(mock_pyaudio)
assert result is None