feat: choose between Qi microphone and local microphone

2026-02-09 15:45:17 +01:00
parent abd6988d1e
commit 0fe5fcf8f8
8 changed files with 177 additions and 95 deletions
--- a/.env.example
+++ b/.env.example
@@ -6,6 +6,9 @@
 # The hostname or IP address of the Control Backend.
 AGENT__CONTROL_BACKEND_HOST=localhost

+# Whether to use Pepper's microphone when Pepper is connected.
+AUDIO__USE_PEPPER_MICROPHONE=true
+


 # Variables that are unlikely to be configured, you can probably ignore these:
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,3 +8,4 @@ sphinx_rtd_theme
 pre-commit
 python-dotenv
 numpy<=1.16.6
+enum34
--- a/src/robot_interface/core/config.py
+++ b/src/robot_interface/core/config.py
@@ -78,6 +78,8 @@ class AudioConfig(object):
    """
    Audio configuration constants.

+    :ivar use_pepper_microphone: Whether to use Pepper's microphone or not, defaults to True.
+    :vartype use_pepper_microphone: bool
    :ivar sample_rate: Audio sampling rate in Hz, defaults to 16000.
    :vartype sample_rate: int
    :ivar chunk_size: Size of audio chunks to capture/process, defaults to 512.
@@ -85,7 +87,14 @@ class AudioConfig(object):
    :ivar channels: Number of audio channels, defaults to 1.
    :vartype channels: int
    """
-    def __init__(self, sample_rate=None, chunk_size=None, channels=None):
+    def __init__(
+        self,
+        use_pepper_microphone=None,
+        sample_rate=None,
+        chunk_size=None,
+        channels=None,
+    ):
+        self.use_pepper_microphone = get_config(use_pepper_microphone, "AUDIO__USE_PEPPER_MICROPHONE", True, bool)
        self.sample_rate = get_config(sample_rate, "AUDIO__SAMPLE_RATE", 16000, int)
        self.chunk_size = get_config(chunk_size, "AUDIO__CHUNK_SIZE", 512, int)
        self.channels = get_config(channels, "AUDIO__CHANNELS", 1, int)
--- a/src/robot_interface/endpoints/audio_sender.py
+++ b/src/robot_interface/endpoints/audio_sender.py
@@ -16,6 +16,7 @@ import logging
 import Queue

 import numpy as np
+import pyaudio
 import zmq

 from robot_interface.endpoints.socket_base import SocketBase
@@ -27,6 +28,9 @@ logger = logging.getLogger(__name__)


 class AudioCapturer(object):
+    """
+    Interface for audio capturers.
+    """
    __metaclass__ = ABCMeta

    @abstractmethod
@@ -62,8 +66,14 @@ class PepperMicrophone(enum.Enum):


 class QiAudioCapturer(AudioCapturer):
+    # Some of this class' methods have docstrings as binary strings. Keep them that way, otherwise
+    #  ``qi.Session.registerService`` will give RuntimeErrors.
    def __init__(self, sample_rate=SampleRate.LOW, mic=PepperMicrophone.FRONT_RIGHT,
                 deinterleaved=0):
+        """
+        :raises RuntimeError: If there is no Qi session available.
+        :raises ValueError: If the given arguments are not compatible.
+        """
        self.session = state.qi_session
        if not self.session:
            raise RuntimeError("Cannot capture from qi device, no qi session available.")
@@ -87,13 +97,20 @@ class QiAudioCapturer(AudioCapturer):
        self._rate_state = None

    def setup(self):
+        b"""
+        :raises RuntimeError: If no Qi session is available or if the session is not compatible with audio streaming.
+        """
        assert self.session is not None
+        logger.info("Listening with Pepper's microphone.")
        self.session.registerService(self.service_name, self)
        self.audio.setClientPreferences(self.service_name, self.sample_rate.value, self.mic.value,
                                        self.deinterleaved)
        self.audio.subscribe(self.service_name)

    def stop(self):
+        b"""
+        Stop the audio capturer.
+        """
        try:
            self.audio.unsubscribe(self.service_name)
        except:
@@ -134,8 +151,65 @@ class QiAudioCapturer(AudioCapturer):
        return frag.reshape(-1, channels).mean(axis=1, dtype=np.int32)


-class StandaloneAudioCapturer:
-    pass
+class StandaloneAudioCapturer(AudioCapturer):
+    """
+    Audio capturer that uses a microphone from the local device, can be chosen with the
+    ``--microphone`` program argument.
+
+    :ivar audio: PyAudio instance.
+    :vartype audio: pyaudio.PyAudio | None
+
+    :ivar microphone: Selected microphone information.
+    :vartype microphone: dict | None
+
+    :ivar stream: PyAudio stream instance. None until ``setup()`` is called, remaining None if setup
+        fails for any reason.
+    :vartype stream: pyaudio.Stream | None
+    """
+    def __init__(self):
+        self.stream = None
+
+        try:
+            self.audio = pyaudio.PyAudio()
+            self.microphone = choose_mic(self.audio)
+        except IOError as e:
+            logger.warning("PyAudio is not available. Won't be able to send audio.", exc_info=True)
+            self.audio = None
+            self.microphone = None
+
+    def setup(self):
+        """
+        Setup audio stream. Will not if no microphone is available.
+        """
+        if not self.microphone:
+            logger.info("Not listening: no microphone available.")
+            return
+
+        logger.info("Listening with microphone \"{}\".".format(self.microphone["name"]))
+        self.stream = self.audio.open(
+            format=pyaudio.paFloat32,
+            channels=settings.audio_config.channels,
+            rate=settings.audio_config.sample_rate,
+            input=True,
+            input_device_index=self.microphone["index"],
+            frames_per_buffer=settings.audio_config.chunk_size,
+        )
+
+    def stop(self):
+        """
+        Close the audio stream.
+        """
+        if not self.stream: return
+        self.stream.stop_stream()
+        self.stream.close()
+
+    def generate_chunk(self):
+        """
+        :return: Audio frames from the microphone of size ``settings.audio_config.chunk_size``.
+        :rtype: bytes.
+        :raises IOError: If reading from the audio stream fails.
+        """
+        return self.stream.read(settings.audio_config.chunk_size)


 class AudioSender(SocketBase):
@@ -165,91 +239,7 @@ class AudioSender(SocketBase):
            self.socket.send(chunk)

    def choose_capturer(self):
-        return QiAudioCapturer()
+        if state.qi_session and settings.audio_config.use_pepper_microphone:
+            return QiAudioCapturer()

-
-
-
-# class AudioSender(SocketBase):
-#     """
-#     Audio sender endpoint, responsible for sending microphone audio data.
-#
-#     :param zmq_context: The ZeroMQ context to use.
-#     :type zmq_context: zmq.Context
-#
-#     :param port: The port to use.
-#     :type port: int
-#
-#     :ivar thread: Thread used for sending audio.
-#     :vartype thread: threading.Thread | None
-#
-#     :ivar audio: PyAudio instance.
-#     :vartype audio: pyaudio.PyAudio | None
-#
-#     :ivar microphone: Selected microphone information.
-#     :vartype microphone: dict | None
-#     """
-#     def __init__(self, zmq_context, port=settings.agent_settings.audio_sender_port):
-#         super(AudioSender, self).__init__(str("audio"))  # Convert future's unicode_literal to str
-#         self.create_socket(zmq_context, zmq.PUB, port)
-#         self.thread = None
-#
-#         try:
-#             self.audio = pyaudio.PyAudio()
-#             self.microphone = choose_mic(self.audio)
-#         except IOError as e:
-#             logger.warning("PyAudio is not available.", exc_info=e)
-#             self.audio = None
-#             self.microphone = None
-#
-#     def start(self):
-#         """
-#         Start sending audio in a different thread.
-#
-#         Will not start if no microphone is available.
-#         """
-#         if not self.microphone:
-#             logger.info("Not listening: no microphone available.")
-#             return
-#
-#         logger.info("Listening with microphone \"{}\".".format(self.microphone["name"]))
-#         self.thread = threading.Thread(target=self._stream)
-#         self.thread.start()
-#
-#     def wait_until_done(self):
-#         """
-#         Wait until the audio thread is done.
-#
-#         Will block until `state.exit_event` is set. If the thread is not running, does nothing.
-#         """
-#         if not self.thread: return
-#         self.thread.join()
-#         self.thread = None
-#
-#     def _stream(self):
-#         """
-#         Internal method to continuously read audio from the microphone and send it over the socket.
-#         """
-#         audio_settings = settings.audio_config
-#         chunk = audio_settings.chunk_size # 320 at 16000 Hz is 20ms, 512 is required for Silero-VAD
-#
-#         # Docs say this only raises an error if neither `input` nor `output` is True
-#         stream = self.audio.open(
-#             format=pyaudio.paFloat32,
-#             channels=audio_settings.channels,
-#             rate=audio_settings.sample_rate,
-#             input=True,
-#             input_device_index=self.microphone["index"],
-#             frames_per_buffer=chunk,
-#         )
-#
-#         try:
-#             while not state.exit_event.is_set():
-#                 data = stream.read(chunk)
-#                 if (state.is_speaking): continue  # Do not send audio while the robot is speaking
-#                 self.socket.send(data)
-#         except IOError as e:
-#             logger.error("Stopped listening: failed to get audio from microphone.", exc_info=e)
-#         finally:
-#             stream.stop_stream()
-#             stream.close()
+        return StandaloneAudioCapturer()
--- a/src/robot_interface/utils/get_config.py
+++ b/src/robot_interface/utils/get_config.py
@@ -16,6 +16,8 @@ def get_config(value, env, default, cast=None):
    Small utility to get a configuration value, returns `value` if it is not None, else it will try to get the
    environment variable cast with `cast`. If the environment variable is not set, it will return `default`.

+    Special handling for booleans, which are only true if the value of the variable is "true" or "yes", ignoring capitalization.
+
    :param value: The value to check.
    :type value: Any
    :param env: The environment variable to check.
@@ -33,7 +35,14 @@ def get_config(value, env, default, cast=None):

    env = os.environ.get(env, default)

-    if cast is None:
+    if cast is None or env is None:
        return env

+    if cast == bool:
+        if isinstance(env, bool):
+            return env
+        if not isinstance(default, bool):
+            raise ValueError("Default value must be a boolean if the cast type is a boolean.")
+        return env.lower() == "true" or env.lower() == "yes"
+
    return cast(env)
--- a/src/robot_interface/utils/qi_utils.py
+++ b/src/robot_interface/utils/qi_utils.py
@@ -14,6 +14,20 @@ except ImportError:
    qi = None


+def _get_qi_url():
+    """
+    Get the Qi URL from the command line arguments, or None if not given.
+    """
+    if "--qi-url" in sys.argv:
+        return sys.argv[sys.argv.index("--qi-url") + 1]
+
+    for arg in sys.argv:
+        if arg.startswith("--qi-url="):
+            return arg[len("--qi-url="):]
+
+    return None
+
+
 def get_qi_session():
    """
    Create and return a Qi session if available.
@@ -25,12 +39,13 @@ def get_qi_session():
        logging.info("Unable to import qi. Running in stand-alone mode.")
        return None

-    if "--qi-url" not in sys.argv:
+    qi_url = _get_qi_url()
+    if qi_url is None:
        logging.info("No Qi URL argument given. Running in stand-alone mode.")
        return None

    try:
-        app = qi.Application()
+        app = qi.Application(["--qi-url", qi_url, "--qi-listen-url", "tcp://0.0.0.0:0"])
        app.start()
        return app.session
    except RuntimeError:
--- a/test/unit/test_get_config.py
+++ b/test/unit/test_get_config.py
@@ -50,3 +50,58 @@ def test_get_config_casts_default_when_env_missing(monkeypatch):
    result = get_config(None, "GET_CONFIG_MISSING", "42", int)

    assert result == 42
+
+
+def test_get_config_unset_boolean_default(monkeypatch):
+    """
+    When the env var is a boolean, and it's not set, ensure it uses the default value.
+    """
+    monkeypatch.delenv("SOME_BOOLEAN_VARIABLE", raising=False)
+
+    result = get_config(None, "SOME_BOOLEAN_VARIABLE", False, bool)
+    assert result == False
+
+    result = get_config(None, "SOME_BOOLEAN_VARIABLE", True, bool)
+    assert result == True
+
+
+def test_get_config_true_boolean(monkeypatch):
+    """
+    When the env var is a boolean, and its value is "true", "TRUE", "yes", etc., it should return true.
+    """
+    monkeypatch.setenv("SOME_BOOLEAN_VARIABLE", "TRUE")
+    result = get_config(None, "SOME_BOOLEAN_VARIABLE", False, bool)
+    assert result == True
+
+    monkeypatch.setenv("SOME_BOOLEAN_VARIABLE", "true")
+    result = get_config(None, "SOME_BOOLEAN_VARIABLE", False, bool)
+    assert result == True
+
+    monkeypatch.setenv("SOME_BOOLEAN_VARIABLE", "yes")
+    result = get_config(None, "SOME_BOOLEAN_VARIABLE", False, bool)
+    assert result == True
+
+    monkeypatch.setenv("SOME_BOOLEAN_VARIABLE", "YES")
+    result = get_config(None, "SOME_BOOLEAN_VARIABLE", False, bool)
+    assert result == True
+
+    monkeypatch.setenv("SOME_BOOLEAN_VARIABLE", "TrUE")
+    result = get_config(None, "SOME_BOOLEAN_VARIABLE", False, bool)
+    assert result == True
+
+
+def test_get_config_false_boolean(monkeypatch):
+    """
+    When the env var is a boolean, and its value is not "true", "TRUE", "yes", etc., it should return False.
+    """
+    monkeypatch.setenv("SOME_BOOLEAN_VARIABLE", "FALSE")
+    result = get_config(None, "SOME_BOOLEAN_VARIABLE", True, bool)
+    assert result == False
+
+    monkeypatch.setenv("SOME_BOOLEAN_VARIABLE", "false")
+    result = get_config(None, "SOME_BOOLEAN_VARIABLE", True, bool)
+    assert result == False
+
+    monkeypatch.setenv("SOME_BOOLEAN_VARIABLE", "anything, tbh")
+    result = get_config(None, "SOME_BOOLEAN_VARIABLE", True, bool)
+    assert result == False
--- a/test/unit/test_qi_utils.py
+++ b/test/unit/test_qi_utils.py
@@ -62,7 +62,7 @@ def test_get_qi_session_runtime_error(monkeypatch):
            raise RuntimeError("boom")

    class FakeQi:
-        Application = lambda self=None: FakeApp()
+        Application = lambda *args, **kwargs: FakeApp()

    reload_qi_utils_with(FakeQi())

@@ -87,7 +87,7 @@ def test_get_qi_session_success(monkeypatch):
            return True

    class FakeQi:
-        Application = lambda self=None: FakeApp()
+        Application = lambda *args, **kwargs: FakeApp()

    reload_qi_utils_with(FakeQi())