feat: (almost) qi audio sender

2026-02-04 18:38:40 +01:00
parent 06e3dad25d
commit 6e2bedcd32
1 changed files with 179 additions and 68 deletions
--- a/src/robot_interface/endpoints/audio_sender.py
+++ b/src/robot_interface/endpoints/audio_sender.py
@@ -6,9 +6,12 @@ University within the Software Project course.
 """
 from __future__ import unicode_literals  # So that `logging` can use Unicode characters in names
 from abc import ABCMeta, abstractmethod
 import threading
 import logging
 import Queue
 import numpy as np
 import pyaudio
 import zmq
@@ -20,86 +23,194 @@ from robot_interface.core.config import settings
 logger = logging.getLogger(__name__)
-class AudioSender(SocketBase):
+class AudioCapturer(object):
-    """
+    __metaclass__ = ABCMeta
    Audio sender endpoint, responsible for sending microphone audio data.
-    :param zmq_context: The ZeroMQ context to use.
+    @abstractmethod
-    :type zmq_context: zmq.Context
+    def setup(self):
        raise NotImplementedError()
-    :param port: The port to use.
+    @abstractmethod
-    :type port: int
+    def stop(self):
        raise NotImplementedError()
-    :ivar thread: Thread used for sending audio.
+    @abstractmethod
-    :vartype thread: threading.Thread | None
+    def generate_chunk(self):
        raise NotImplementedError()
    :ivar audio: PyAudio instance.
    :vartype audio: pyaudio.PyAudio | None
-    :ivar microphone: Selected microphone information.
+class QiAudioCapturer(AudioCapturer):
-    :vartype microphone: dict | None
+    def __init__(self, sample_rate=16000, channels=1, deinterleaved=0):
-    """
+        self.session = state.qi_session
-    def __init__(self, zmq_context, port=settings.agent_settings.audio_sender_port):
+        if not self.session:
-        super(AudioSender, self).__init__(str("audio"))  # Convert future's unicode_literal to str
+            raise RuntimeError("Cannot capture from qi device, no qi session available.")
        self.create_socket(zmq_context, zmq.PUB, port)
        self.thread = None
        self.audio = self.session.service("ALAudioDevice")
        self.service_name = "ZmqAudioStreamer"
        self.sample_rate = sample_rate
        self.channels = channels
        self.deinterleaved = deinterleaved
        self.overflow = np.empty(0, dtype=np.float32)
        self.q = Queue.Queue()
    def setup(self):
        assert self.session is not None
        self.session.registerService(self.service_name, self)
        self.audio.setClientPreferences(self.service_name, self.sample_rate, self.channels, self.deinterleaved)
        self.audio.subscribe(self.service_name)
    def stop(self):
        try:
-            self.audio = pyaudio.PyAudio()
+            self.audio.unsubscribe(self.service_name)
-            self.microphone = choose_mic(self.audio)
+        except:
-        except IOError as e:
+            pass
-            logger.warning("PyAudio is not available.", exc_info=e)
+
-            self.audio = None
+
-            self.microphone = None
+    def audio_gen(self):
        try:
            chunk = self.q.get(True, 0.1)
            return chunk
        except Queue.Empty:
            return None
    # Callback invoked by NAOqi
    def processRemote(self, nbOfChannels, nbOfSamplesByChannel, timeStamp, inputBuffer):
        raw_pcm = bytes(inputBuffer)
        assert nbOfChannels == 1
        pcm_i16 = np.frombuffer(raw_pcm, dtype=np.int16)
        pcm_f32 = pcm_i16.astype(np.float32) / 32768.0
        # Attach overflow
        pcm_f32 = np.append(self.overflow, pcm_f32)
        for i in range(len(pcm_f32) // 512):
            self.q.put_nowait(pcm_f32[i * 512 : (i + 1) * 512])
        self.overflow = pcm_f32[len(pcm_f32) // 512 * 512 :]
 class StandaloneAudioCapturer:
    pass
 class AudioSender(SocketBase):
    def __init__(self, zmq_context, port=settings.agent_settings.audio_sender_port):
        super(AudioSender, self).__init__(str("audio"))
        self.create_socket(zmq_context, zmq.PUB, port)
        self.thread = threading.Thread(target=self.stream)
        self.capturer = self.choose_capturer()
    def start(self):
        """
        Start sending audio in a different thread.
        Will not start if no microphone is available.
        """
        if not self.microphone:
            logger.info("Not listening: no microphone available.")
            return
        logger.info("Listening with microphone \"{}\".".format(self.microphone["name"]))
        self.thread = threading.Thread(target=self._stream)
        self.thread.start()
-    def wait_until_done(self):
+    def close(self):
-        """
+        self.capturer.stop()
-        Wait until the audio thread is done.
+        super(AudioSender, self).close()
-        Will block until `state.exit_event` is set. If the thread is not running, does nothing.
+    def stream(self):
-        """
+        while not state.exit_event.is_set():
-        if not self.thread: return
+            chunk = self.capturer.generate_chunk()
        self.thread.join()
        self.thread = None
-    def _stream(self):
+            if not chunk or state.is_speaking:
-        """
+                continue
        Internal method to continuously read audio from the microphone and send it over the socket.
        """
        audio_settings = settings.audio_config
        chunk = audio_settings.chunk_size # 320 at 16000 Hz is 20ms, 512 is required for Silero-VAD
-        # Docs say this only raises an error if neither `input` nor `output` is True
+            self.socket.send(chunk)
        stream = self.audio.open(
            format=pyaudio.paFloat32,
            channels=audio_settings.channels,
            rate=audio_settings.sample_rate,
            input=True,
            input_device_index=self.microphone["index"],
            frames_per_buffer=chunk,
        )
-        try:
+    def choose_capturer(self):
-            while not state.exit_event.is_set():
+        return QiAudioCapturer()
-                data = stream.read(chunk)
+
-                if (state.is_speaking): continue  # Do not send audio while the robot is speaking
+
-                self.socket.send(data)
+
-        except IOError as e:
+
-            logger.error("Stopped listening: failed to get audio from microphone.", exc_info=e)
+# class AudioSender(SocketBase):
-        finally:
+#     """
-            stream.stop_stream()
+#     Audio sender endpoint, responsible for sending microphone audio data.
-            stream.close()
+#
 #     :param zmq_context: The ZeroMQ context to use.
 #     :type zmq_context: zmq.Context
 #
 #     :param port: The port to use.
 #     :type port: int
 #
 #     :ivar thread: Thread used for sending audio.
 #     :vartype thread: threading.Thread | None
 #
 #     :ivar audio: PyAudio instance.
 #     :vartype audio: pyaudio.PyAudio | None
 #
 #     :ivar microphone: Selected microphone information.
 #     :vartype microphone: dict | None
 #     """
 #     def __init__(self, zmq_context, port=settings.agent_settings.audio_sender_port):
 #         super(AudioSender, self).__init__(str("audio"))  # Convert future's unicode_literal to str
 #         self.create_socket(zmq_context, zmq.PUB, port)
 #         self.thread = None
 #
 #         try:
 #             self.audio = pyaudio.PyAudio()
 #             self.microphone = choose_mic(self.audio)
 #         except IOError as e:
 #             logger.warning("PyAudio is not available.", exc_info=e)
 #             self.audio = None
 #             self.microphone = None
 #
 #     def start(self):
 #         """
 #         Start sending audio in a different thread.
 #
 #         Will not start if no microphone is available.
 #         """
 #         if not self.microphone:
 #             logger.info("Not listening: no microphone available.")
 #             return
 #
 #         logger.info("Listening with microphone \"{}\".".format(self.microphone["name"]))
 #         self.thread = threading.Thread(target=self._stream)
 #         self.thread.start()
 #
 #     def wait_until_done(self):
 #         """
 #         Wait until the audio thread is done.
 #
 #         Will block until `state.exit_event` is set. If the thread is not running, does nothing.
 #         """
 #         if not self.thread: return
 #         self.thread.join()
 #         self.thread = None
 #
 #     def _stream(self):
 #         """
 #         Internal method to continuously read audio from the microphone and send it over the socket.
 #         """
 #         audio_settings = settings.audio_config
 #         chunk = audio_settings.chunk_size # 320 at 16000 Hz is 20ms, 512 is required for Silero-VAD
 #
 #         # Docs say this only raises an error if neither `input` nor `output` is True
 #         stream = self.audio.open(
 #             format=pyaudio.paFloat32,
 #             channels=audio_settings.channels,
 #             rate=audio_settings.sample_rate,
 #             input=True,
 #             input_device_index=self.microphone["index"],
 #             frames_per_buffer=chunk,
 #         )
 #
 #         try:
 #             while not state.exit_event.is_set():
 #                 data = stream.read(chunk)
 #                 if (state.is_speaking): continue  # Do not send audio while the robot is speaking
 #                 self.socket.send(data)
 #         except IOError as e:
 #             logger.error("Stopped listening: failed to get audio from microphone.", exc_info=e)
 #         finally:
 #             stream.stop_stream()
 #             stream.close()