feat: (almost) qi audio sender

2026-02-04 18:38:40 +01:00
parent 06e3dad25d
commit 6e2bedcd32
1 changed files with 179 additions and 68 deletions
--- a/src/robot_interface/endpoints/audio_sender.py
+++ b/src/robot_interface/endpoints/audio_sender.py
@@ -6,9 +6,12 @@ University within the Software Project course.
 """

 from __future__ import unicode_literals  # So that `logging` can use Unicode characters in names
+from abc import ABCMeta, abstractmethod
 import threading
 import logging
+import Queue

+import numpy as np
 import pyaudio
 import zmq

@@ -20,86 +23,194 @@ from robot_interface.core.config import settings
 logger = logging.getLogger(__name__)


-class AudioSender(SocketBase):
-    """
-    Audio sender endpoint, responsible for sending microphone audio data.
+class AudioCapturer(object):
+    __metaclass__ = ABCMeta

-    :param zmq_context: The ZeroMQ context to use.
-    :type zmq_context: zmq.Context
+    @abstractmethod
+    def setup(self):
+        raise NotImplementedError()

-    :param port: The port to use.
-    :type port: int
+    @abstractmethod
+    def stop(self):
+        raise NotImplementedError()

-    :ivar thread: Thread used for sending audio.
-    :vartype thread: threading.Thread | None
+    @abstractmethod
+    def generate_chunk(self):
+        raise NotImplementedError()
    
-    :ivar audio: PyAudio instance.
-    :vartype audio: pyaudio.PyAudio | None

-    :ivar microphone: Selected microphone information.
-    :vartype microphone: dict | None
-    """
-    def __init__(self, zmq_context, port=settings.agent_settings.audio_sender_port):
-        super(AudioSender, self).__init__(str("audio"))  # Convert future's unicode_literal to str
-        self.create_socket(zmq_context, zmq.PUB, port)
-        self.thread = None

+class QiAudioCapturer(AudioCapturer):
+    def __init__(self, sample_rate=16000, channels=1, deinterleaved=0):
+        self.session = state.qi_session
+        if not self.session:
+            raise RuntimeError("Cannot capture from qi device, no qi session available.")
+
+        self.audio = self.session.service("ALAudioDevice")
+
+        self.service_name = "ZmqAudioStreamer"
+        self.sample_rate = sample_rate
+        self.channels = channels
+        self.deinterleaved = deinterleaved
+
+        self.overflow = np.empty(0, dtype=np.float32)
+
+        self.q = Queue.Queue()
+
+    def setup(self):
+        assert self.session is not None
+        self.session.registerService(self.service_name, self)
+        self.audio.setClientPreferences(self.service_name, self.sample_rate, self.channels, self.deinterleaved)
+        self.audio.subscribe(self.service_name)
+
+    def stop(self):
        try:
-            self.audio = pyaudio.PyAudio()
-            self.microphone = choose_mic(self.audio)
-        except IOError as e:
-            logger.warning("PyAudio is not available.", exc_info=e)
-            self.audio = None
-            self.microphone = None
+            self.audio.unsubscribe(self.service_name)
+        except:
+            pass
+
+
+    def audio_gen(self):
+        try:
+            chunk = self.q.get(True, 0.1)
+            return chunk
+        except Queue.Empty:
+            return None
+
+
+    # Callback invoked by NAOqi
+    def processRemote(self, nbOfChannels, nbOfSamplesByChannel, timeStamp, inputBuffer):
+        raw_pcm = bytes(inputBuffer)
+
+        assert nbOfChannels == 1
+
+        pcm_i16 = np.frombuffer(raw_pcm, dtype=np.int16)
+        pcm_f32 = pcm_i16.astype(np.float32) / 32768.0
+
+        # Attach overflow
+        pcm_f32 = np.append(self.overflow, pcm_f32)
+
+        for i in range(len(pcm_f32) // 512):
+            self.q.put_nowait(pcm_f32[i * 512 : (i + 1) * 512])
+
+        self.overflow = pcm_f32[len(pcm_f32) // 512 * 512 :]
+
+class StandaloneAudioCapturer:
+    pass
+
+
+class AudioSender(SocketBase):
+    def __init__(self, zmq_context, port=settings.agent_settings.audio_sender_port):
+        super(AudioSender, self).__init__(str("audio"))
+        self.create_socket(zmq_context, zmq.PUB, port)
+
+        self.thread = threading.Thread(target=self.stream)
+
+        self.capturer = self.choose_capturer()
+

    def start(self):
-        """
-        Start sending audio in a different thread.
-
-        Will not start if no microphone is available.
-        """
-        if not self.microphone:
-            logger.info("Not listening: no microphone available.")
-            return
-
-        logger.info("Listening with microphone \"{}\".".format(self.microphone["name"]))
-        self.thread = threading.Thread(target=self._stream)
        self.thread.start()

-    def wait_until_done(self):
-        """
-        Wait until the audio thread is done.
+    def close(self):
+        self.capturer.stop()
+        super(AudioSender, self).close()

-        Will block until `state.exit_event` is set. If the thread is not running, does nothing.
-        """
-        if not self.thread: return
-        self.thread.join()
-        self.thread = None
-
-    def _stream(self):
-        """
-        Internal method to continuously read audio from the microphone and send it over the socket.
-        """
-        audio_settings = settings.audio_config
-        chunk = audio_settings.chunk_size # 320 at 16000 Hz is 20ms, 512 is required for Silero-VAD
-
-        # Docs say this only raises an error if neither `input` nor `output` is True
-        stream = self.audio.open(
-            format=pyaudio.paFloat32,
-            channels=audio_settings.channels,
-            rate=audio_settings.sample_rate,
-            input=True,
-            input_device_index=self.microphone["index"],
-            frames_per_buffer=chunk,
-        )
-
-        try:
+    def stream(self):
        while not state.exit_event.is_set():
-                data = stream.read(chunk)
-                if (state.is_speaking): continue  # Do not send audio while the robot is speaking
-                self.socket.send(data)
-        except IOError as e:
-            logger.error("Stopped listening: failed to get audio from microphone.", exc_info=e)
-        finally:
-            stream.stop_stream()
-            stream.close()
+            chunk = self.capturer.generate_chunk()
+
+            if not chunk or state.is_speaking:
+                continue
+
+            self.socket.send(chunk)
+
+    def choose_capturer(self):
+        return QiAudioCapturer()
+
+
+
+
+# class AudioSender(SocketBase):
+#     """
+#     Audio sender endpoint, responsible for sending microphone audio data.
+#
+#     :param zmq_context: The ZeroMQ context to use.
+#     :type zmq_context: zmq.Context
+#
+#     :param port: The port to use.
+#     :type port: int
+#
+#     :ivar thread: Thread used for sending audio.
+#     :vartype thread: threading.Thread | None
+#
+#     :ivar audio: PyAudio instance.
+#     :vartype audio: pyaudio.PyAudio | None
+#
+#     :ivar microphone: Selected microphone information.
+#     :vartype microphone: dict | None
+#     """
+#     def __init__(self, zmq_context, port=settings.agent_settings.audio_sender_port):
+#         super(AudioSender, self).__init__(str("audio"))  # Convert future's unicode_literal to str
+#         self.create_socket(zmq_context, zmq.PUB, port)
+#         self.thread = None
+#
+#         try:
+#             self.audio = pyaudio.PyAudio()
+#             self.microphone = choose_mic(self.audio)
+#         except IOError as e:
+#             logger.warning("PyAudio is not available.", exc_info=e)
+#             self.audio = None
+#             self.microphone = None
+#
+#     def start(self):
+#         """
+#         Start sending audio in a different thread.
+#
+#         Will not start if no microphone is available.
+#         """
+#         if not self.microphone:
+#             logger.info("Not listening: no microphone available.")
+#             return
+#
+#         logger.info("Listening with microphone \"{}\".".format(self.microphone["name"]))
+#         self.thread = threading.Thread(target=self._stream)
+#         self.thread.start()
+#
+#     def wait_until_done(self):
+#         """
+#         Wait until the audio thread is done.
+#
+#         Will block until `state.exit_event` is set. If the thread is not running, does nothing.
+#         """
+#         if not self.thread: return
+#         self.thread.join()
+#         self.thread = None
+#
+#     def _stream(self):
+#         """
+#         Internal method to continuously read audio from the microphone and send it over the socket.
+#         """
+#         audio_settings = settings.audio_config
+#         chunk = audio_settings.chunk_size # 320 at 16000 Hz is 20ms, 512 is required for Silero-VAD
+#
+#         # Docs say this only raises an error if neither `input` nor `output` is True
+#         stream = self.audio.open(
+#             format=pyaudio.paFloat32,
+#             channels=audio_settings.channels,
+#             rate=audio_settings.sample_rate,
+#             input=True,
+#             input_device_index=self.microphone["index"],
+#             frames_per_buffer=chunk,
+#         )
+#
+#         try:
+#             while not state.exit_event.is_set():
+#                 data = stream.read(chunk)
+#                 if (state.is_speaking): continue  # Do not send audio while the robot is speaking
+#                 self.socket.send(data)
+#         except IOError as e:
+#             logger.error("Stopped listening: failed to get audio from microphone.", exc_info=e)
+#         finally:
+#             stream.stop_stream()
+#             stream.close()