Merge branch 'feat/stream-audio' into 'dev'

Implement audio streaming See merge request ics/sp/2025/n25b/pepperplus-ri!8
2025-11-05 12:08:28 +00:00
parent 4da83a0a7e 8a095323ec
commit c037eb7ec2
12 changed files with 574 additions and 1 deletions
--- a/README.md
+++ b/README.md
@@ -34,6 +34,18 @@ python -m virtualenv .venv
 source .venv/bin/activate
 ```
 We depend on PortAudio for the `pyaudio` package, so install it with:
 ```bash
 sudo apt install -y portaudio19-dev
 ```
 On WSL, also install:
 ```bash
 sudo apt install -y libasound2-plugins
 ```
 Install the required packages with
 ```bash
@@ -98,6 +110,8 @@ $env:PYTHONPATH="src"; python -m robot_interface.main
 With both, if you want to connect to the actual robot (or simulator), pass the `--qi-url` argument.
 There's also a `--microphone` argument that can be used to choose a microphone to use. If not given, the program will try the default microphone. If you don't know the name of the microphone, pass the argument with any value, and it will list the names of available microphones.
 ## Testing
--- a/src/robot_interface/endpoints/actuation_receiver.py
+++ b/src/robot_interface/endpoints/actuation_receiver.py
@@ -1,3 +1,4 @@
 from __future__ import unicode_literals  # So that we can log texts with Unicode characters
 import logging
 import zmq
--- a/src/robot_interface/endpoints/audio_sender.py
+++ b/src/robot_interface/endpoints/audio_sender.py
@@ -0,0 +1,72 @@
 from __future__ import unicode_literals  # So that `logging` can use Unicode characters in names
 import threading
 import logging
 import pyaudio
 import zmq
 from robot_interface.endpoints.socket_base import SocketBase
 from robot_interface.state import state
 from robot_interface.utils.microphone import choose_mic
 logger = logging.getLogger(__name__)
 class AudioSender(SocketBase):
    def __init__(self, zmq_context, port=5558):
        super(AudioSender, self).__init__(str("audio"))  # Convert future's unicode_literal to str
        self.create_socket(zmq_context, zmq.PUB, port)
        self.thread = None
        try:
            self.audio = pyaudio.PyAudio()
            self.microphone = choose_mic(self.audio)
        except IOError as e:
            logger.warning("PyAudio is not available.", exc_info=e)
            self.audio = None
            self.microphone = None
    def start(self):
        """
        Start sending audio in a different thread.
        """
        if not self.microphone:
            logger.info("Not listening: no microphone available.")
            return
        logger.info("Listening with microphone \"{}\".".format(self.microphone["name"]))
        self.thread = threading.Thread(target=self._stream)
        self.thread.start()
    def wait_until_done(self):
        """
        Wait until the audio thread is done. Will only be done if `state.exit_event` is set, so
        make sure to set that before calling this method or it will block.
        """
        if not self.thread: return
        self.thread.join()
        self.thread = None
    def _stream(self):
        chunk = 512  # 320 at 16000 Hz is 20ms, 512 is required for Silero-VAD
        # Docs say this only raises an error if neither `input` nor `output` is True
        stream = self.audio.open(
            format=pyaudio.paFloat32,
            channels=1,
            rate=16000,
            input=True,
            input_device_index=self.microphone["index"],
            frames_per_buffer=chunk,
        )
        try:
            while not state.exit_event.is_set():
                data = stream.read(chunk)
                self.socket.send(data)
        except IOError as e:
            logger.error("Stopped listening: failed to get audio from microphone.", exc_info=e)
        finally:
            stream.stop_stream()
            stream.close()
--- a/src/robot_interface/endpoints/video_sender.py
+++ b/src/robot_interface/endpoints/video_sender.py
@@ -1,6 +1,5 @@
 import zmq
 import threading
 import qi
 import logging
 from robot_interface.endpoints.socket_base import SocketBase
--- a/src/robot_interface/main.py
+++ b/src/robot_interface/main.py
@@ -1,4 +1,7 @@
 import logging
 from robot_interface.endpoints.audio_sender import AudioSender
 logging.basicConfig(level=logging.DEBUG)
 import zmq
@@ -25,8 +28,11 @@ def main_loop(context):
    video_sender = VideoSender(context)
    state.sockets.append(video_sender)
    audio_sender = AudioSender(context)
    state.sockets.append(audio_sender)
    video_sender.start_video_rcv()
    audio_sender.start()
    # Sockets that can run on the main thread. These sockets' endpoints should not block for long (say 50 ms at most).
    receivers = [main_receiver, actuation_receiver]
--- a/src/robot_interface/utils/microphone.py
+++ b/src/robot_interface/utils/microphone.py
@@ -0,0 +1,120 @@
 from __future__ import unicode_literals  # So that `print` can print Unicode characters in names
 import logging
 import sys
 logger = logging.getLogger(__name__)
 def get_microphones(audio):
    """
    Get audio devices which have input channels.
    :param audio: An instance of PyAudio to use.
    :type audio: pyaudio.PyAudio
    :return: An interator of PaAudio dicts containing information about the microphone devices.
    :rtype: Iterator[dict]
    """
    for i in range(audio.get_device_count()):
        device = audio.get_device_info_by_index(i)
        if device["maxInputChannels"] > 0:
            yield device
 def choose_mic_interactive(audio):
    """
    Choose a microphone to use, interactively in the CLI.
    :param audio: An instance of PyAudio to use.
    :type audio: pyaudio.PyAudio
    :return: A dictionary from PyAudio containing information about the microphone to use, or None
    if there is no microphone.
    :rtype: dict | None
    """
    microphones = list(get_microphones(audio))
    if len(microphones) == 0: return None
    print("Found {} microphones:".format(len(microphones)))
    for i, mic in enumerate(microphones):
        print("- {}: {}".format(i, mic["name"]))
    chosen_microphone = None
    while chosen_microphone is None:
        chosen = raw_input("Which device would you like to use?\n> ")
        try:
            chosen = int(chosen)
            if chosen < 0 or chosen >= len(microphones): raise ValueError()
            chosen_microphone = microphones[chosen]
        except ValueError:
            print("Please enter a number between 0 and {}".format(len(microphones)-1))
    logger.info("Chose microphone \"{}\"".format(chosen_microphone["name"]))
    return chosen_microphone
 def choose_mic_default(audio):
    """
    Get the system's default microphone to use.
    :param audio: An instance of PyAudio to use.
    :type audio: pyaudio.PyAudio
    :return: A dictionary from PyAudio containing information about the microphone to use, or None
    if there is no microphone.
    :rtype: dict | None
    """
    try:
        return audio.get_default_input_device_info()
    except IOError:
        return None
 def choose_mic_arguments(audio):
    """
    Get a microphone to use from command line arguments.
    :param audio: An instance of PyAudio to use.
    :type audio: pyaudio.PyAudio
    :return: A dictionary from PyAudio containing information about the microphone to use, or None
    if there is no microphone satisfied by the arguments.
    :rtype: dict | None
    """
    microphone_name = None
    for i, arg in enumerate(sys.argv):
        if arg == "--microphone" and len(sys.argv) > i+1:
            microphone_name = sys.argv[i+1].strip()
        if arg.startswith("--microphone="):
            microphone_name = arg[13:].strip()
    if not microphone_name: return None
    available_mics = list(get_microphones(audio))
    for mic in available_mics:
        if mic["name"] == microphone_name:
            return mic
    available_mic_names = [mic["name"] for mic in available_mics]
    logger.warning("Microphone \"{}\" not found. Choose one of {}"
                   .format(microphone_name, available_mic_names))
    return None
 def choose_mic(audio):
    """
    Get a microphone to use. Firstly, tries to see if there's an application argument specifying the
    microphone to use. If not, get the default microphone.
    :param audio: An instance of PyAudio to use.
    :type audio: pyaudio.PyAudio
    :return: A dictionary from PyAudio containing information about the microphone to use, or None
    if there is no microphone.
    :rtype: dict | None
    """
    chosen_mic = choose_mic_arguments(audio)
    if chosen_mic: return chosen_mic
    return choose_mic_default(audio)
--- a/test/common/init.py
+++ b/test/common/init.py
--- a/test/common/microphone_utils.py
+++ b/test/common/microphone_utils.py
@@ -0,0 +1,152 @@
 from __future__ import unicode_literals  # So that we can format strings with Unicode characters
 import random
 import sys
 from StringIO import StringIO
 from robot_interface.utils.microphone import (
    choose_mic_default,
    choose_mic_interactive,
    choose_mic_arguments,
    choose_mic,
    get_microphones,
 )
 class MicrophoneUtils(object):
    """Shared tests for any PyAudio-like implementation, e.g. mock and real."""
    def test_choose_mic_default(self, pyaudio_instance):
        """
        The result must contain at least "index", as this is used to identify the microphone.
        The "name" is used for logging, so it should also exist.
        It must have one or more channels.
        Lastly it must be capable of sending at least 16000 samples per second.
        """
        result = choose_mic_default(pyaudio_instance)
        assert "index" in result
        assert isinstance(result["index"], (int, long))
        assert "name" in result
        assert isinstance(result["name"], (str, unicode))
        assert "maxInputChannels" in result
        assert isinstance(result["maxInputChannels"], (int, long))
        assert result["maxInputChannels"] > 0
        assert "defaultSampleRate" in result
        assert isinstance(result["defaultSampleRate"], float)
        assert result["defaultSampleRate"] >= 16000
    def test_choose_mic_interactive_input_not_int(self, pyaudio_instance, mocker):
        """
        First mock an input that's not an integer, then a valid integer. There should be no errors.
        """
        mock_input = mocker.patch("__builtin__.raw_input", side_effect=["not an integer", "0"])
        fake_out = StringIO()
        mocker.patch.object(sys, "stdout", fake_out)
        result = choose_mic_interactive(pyaudio_instance)
        assert "index" in result
        assert isinstance(result["index"], (int, long))
        assert result["index"] == 0
        assert mock_input.called
        assert any(p.startswith("Please enter a number") for p in fake_out.getvalue().splitlines())
    def test_choose_mic_interactive_negative_index(self, pyaudio_instance, mocker):
        """
        Make sure that the interactive method does not allow negative integers as input.
        """
        mock_input = mocker.patch("__builtin__.raw_input", side_effect=["-1", "0"])
        fake_out = StringIO()
        mocker.patch.object(sys, "stdout", fake_out)
        result = choose_mic_interactive(pyaudio_instance)
        assert "index" in result
        assert isinstance(result["index"], (int, long))
        assert result["index"] == 0
        assert mock_input.called
        assert any(p.startswith("Please enter a number") for p in fake_out.getvalue().splitlines())
    def test_choose_mic_interactive_index_too_high(self, pyaudio_instance, mocker):
        """
        Make sure that the interactive method does not allow indices higher than the highest mic index.
        """
        real_count = len(list(get_microphones(pyaudio_instance)))
        mock_input = mocker.patch("__builtin__.raw_input", side_effect=[str(real_count), "0"])
        fake_out = StringIO()
        mocker.patch.object(sys, "stdout", fake_out)
        result = choose_mic_interactive(pyaudio_instance)
        assert "index" in result
        assert isinstance(result["index"], (int, long))
        assert mock_input.called
        assert any(p.startswith("Please enter a number") for p in fake_out.getvalue().splitlines())
    def test_choose_mic_interactive_random_index(self, pyaudio_instance, mocker):
        """
        Get a random index from the list of available mics, make sure it's correct.
        """
        microphones = list(get_microphones(pyaudio_instance))
        random_index = random.randrange(len(microphones))
        mocker.patch("__builtin__.raw_input", side_effect=[str(random_index)])
        result = choose_mic_interactive(pyaudio_instance)
        assert "index" in result
        assert isinstance(result["index"], (int, long))
        assert result["index"] == microphones[random_index]["index"]
    def test_choose_mic_no_arguments(self, pyaudio_instance, mocker):
        mocker.patch.object(sys, "argv", [])
        result = choose_mic_arguments(pyaudio_instance)
        assert result is None
    def test_choose_mic_arguments(self, pyaudio_instance, mocker):
        for mic in get_microphones(pyaudio_instance):
            mocker.patch.object(sys, "argv", ["--microphone", mic["name"]])
            result = choose_mic_arguments(pyaudio_instance)
            assert result is not None
            assert result == mic
    def test_choose_mic_arguments_eq(self, pyaudio_instance, mocker):
        for mic in get_microphones(pyaudio_instance):
            mocker.patch.object(sys, "argv", ["--microphone={}".format(mic["name"])])
            result = choose_mic_arguments(pyaudio_instance)
            assert result is not None
            assert result == mic
    def test_choose_mic_arguments_not_exits(self, pyaudio_instance, mocker):
        mocker.patch.object(sys, "argv", ["--microphone", "Surely this microphone doesn't exist"])
        result = choose_mic_arguments(pyaudio_instance)
        assert result is None
    def test_choose_mic_with_argument(self, pyaudio_instance, mocker):
        mic = next(get_microphones(pyaudio_instance))
        mocker.patch.object(sys, "argv", ["--microphone", mic["name"]])
        result = choose_mic(pyaudio_instance)
        assert result is not None
        assert result == mic
    def test_choose_mic_no_argument(self, pyaudio_instance, mocker):
        default_mic = choose_mic_default(pyaudio_instance)
        mocker.patch.object(sys, "argv", [])
        result = choose_mic(pyaudio_instance)
        assert result is not None
        assert result == default_mic
--- a/test/integration/init.py
+++ b/test/integration/init.py
--- a/test/integration/test_microphone_utils.py
+++ b/test/integration/test_microphone_utils.py
@@ -0,0 +1,20 @@
 import pyaudio
 import pytest
 from common.microphone_utils import MicrophoneUtils
@pytest.fixture
 def pyaudio_instance():
    audio = pyaudio.PyAudio()
    try:
        audio.get_default_input_device_info()
        return audio
    except IOError:
        pytest.skip("No microphone available to test with.")
 class TestAudioIntegration(MicrophoneUtils):
    """Run shared audio behavior tests with the mock implementation."""
    pass
--- a/test/unit/test_audio_sender.py
+++ b/test/unit/test_audio_sender.py
@@ -0,0 +1,104 @@
 # coding=utf-8
 import os
 import time
 import mock
 import pytest
 import zmq
 from robot_interface.endpoints.audio_sender import AudioSender
@pytest.fixture
 def zmq_context():
    context = zmq.Context()
    yield context
 def test_no_microphone(zmq_context, mocker):
    mock_info_logger = mocker.patch("robot_interface.endpoints.audio_sender.logger.info")
    mock_choose_mic = mocker.patch("robot_interface.endpoints.audio_sender.choose_mic")
    mock_choose_mic.return_value = None
    sender = AudioSender(zmq_context)
    assert sender.microphone is None
    sender.start()
    assert sender.thread is None
    mock_info_logger.assert_called()
    sender.wait_until_done()  # Should return early because we didn't start a thread
 def test_unicode_mic_name(zmq_context, mocker):
    mocker.patch("robot_interface.endpoints.audio_sender.threading")
    mock_choose_mic = mocker.patch("robot_interface.endpoints.audio_sender.choose_mic")
    mock_choose_mic.return_value = {"name": u"• Some Unicode name"}
    sender = AudioSender(zmq_context)
    assert sender.microphone is not None
    # `.start()` logs the name of the microphone. It should not give an error if it contains Unicode
    #  symbols.
    sender.start()
    assert sender.thread is not None
    sender.wait_until_done()  # Should return instantly because we didn't start a real thread
 def _fake_read(num_frames):
    return os.urandom(num_frames * 4)
 def test_sending_audio(mocker):
    mock_choose_mic = mocker.patch("robot_interface.endpoints.audio_sender.choose_mic")
    mock_choose_mic.return_value = {"name": u"Some mic", "index": 0L}
    mock_state = mocker.patch("robot_interface.endpoints.audio_sender.state")
    mock_state.exit_event.is_set.side_effect = [False, True]
    mock_zmq_context = mock.Mock()
    send_socket = mock.Mock()
    # If there's something wrong with the microphone, it will raise an IOError when `read`ing.
    stream = mock.Mock()
    stream.read = _fake_read
    sender = AudioSender(mock_zmq_context)
    sender.socket.send = send_socket
    sender.audio.open = mock.Mock()
    sender.audio.open.return_value = stream
    sender.start()
    sender.wait_until_done()
    send_socket.assert_called()
 def _fake_read_error(num_frames):
    raise IOError()
 def test_break_microphone(mocker):
    mock_choose_mic = mocker.patch("robot_interface.endpoints.audio_sender.choose_mic")
    mock_choose_mic.return_value = {"name": u"Some mic", "index": 0L}
    mock_state = mocker.patch("robot_interface.endpoints.audio_sender.state")
    mock_state.exit_event.is_set.side_effect = [False, True]
    mock_zmq_context = mock.Mock()
    send_socket = mock.Mock()
    # If there's something wrong with the microphone, it will raise an IOError when `read`ing.
    stream = mock.Mock()
    stream.read = _fake_read_error
    sender = AudioSender(mock_zmq_context)
    sender.socket.send = send_socket
    sender.audio.open = mock.Mock()
    sender.audio.open.return_value = stream
    sender.start()
    sender.wait_until_done()
    send_socket.assert_not_called()
--- a/test/unit/test_microphone_utils.py
+++ b/test/unit/test_microphone_utils.py
@@ -0,0 +1,85 @@
 # coding=utf-8
 import mock
 import pytest
 from common.microphone_utils import MicrophoneUtils
 from robot_interface.utils.microphone import choose_mic_default, choose_mic_interactive
 class MockPyAudio:
    def __init__(self):
        # You can predefine fake device info here
        self.devices = [
            {
                "index": 0,
                "name": u"Someone’s Microphone",  # Using a Unicode ’ character
                "maxInputChannels": 2,
                "maxOutputChannels": 0,
                "defaultSampleRate": 44100.0,
                "defaultLowInputLatency": 0.01,
                "defaultLowOutputLatency": 0.01,
                "defaultHighInputLatency": 0.1,
                "defaultHighOutputLatency": 0.1,
                "hostApi": 0,
            },
            {
                "index": 1,
                "name": u"Mock Speaker 1",
                "maxInputChannels": 0,
                "maxOutputChannels": 2,
                "defaultSampleRate": 48000.0,
                "defaultLowInputLatency": 0.01,
                "defaultLowOutputLatency": 0.01,
                "defaultHighInputLatency": 0.1,
                "defaultHighOutputLatency": 0.1,
                "hostApi": 0,
            },
        ]
    def get_device_count(self):
        """Return the number of available mock devices."""
        return len(self.devices)
    def get_device_info_by_index(self, index):
        """Return information for a given mock device index."""
        if 0 <= index < len(self.devices):
            return self.devices[index]
        else:
            raise IOError("Invalid device index: {}".format(index))
    def get_default_input_device_info(self):
        """Return info for a default mock input device."""
        for device in self.devices:
            if device.get("maxInputChannels", 0) > 0:
                return device
        raise IOError("No default input device found")
@pytest.fixture
 def pyaudio_instance():
    return MockPyAudio()
 def _raise_io_error():
    raise IOError()
 class TestAudioUnit(MicrophoneUtils):
    """Run shared audio behavior tests with the mock implementation."""
    def test_choose_mic_default_no_mic(self):
        mock_pyaudio = mock.Mock()
        mock_pyaudio.get_device_count = mock.Mock(return_value=0L)
        mock_pyaudio.get_default_input_device_info = _raise_io_error
        result = choose_mic_default(mock_pyaudio)
        assert result is None
    def test_choose_mic_interactive_no_mic(self):
        mock_pyaudio = mock.Mock()
        mock_pyaudio.get_device_count = mock.Mock(return_value=0L)
        mock_pyaudio.get_default_input_device_info = _raise_io_error
        result = choose_mic_interactive(mock_pyaudio)
        assert result is None