feat: (almost) qi audio sender

This commit is contained in:
2026-02-04 18:38:40 +01:00
parent 06e3dad25d
commit 6e2bedcd32

View File

@@ -6,9 +6,12 @@ University within the Software Project course.
""" """
from __future__ import unicode_literals # So that `logging` can use Unicode characters in names from __future__ import unicode_literals # So that `logging` can use Unicode characters in names
from abc import ABCMeta, abstractmethod
import threading import threading
import logging import logging
import Queue
import numpy as np
import pyaudio import pyaudio
import zmq import zmq
@@ -20,86 +23,194 @@ from robot_interface.core.config import settings
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
class AudioSender(SocketBase): class AudioCapturer(object):
""" __metaclass__ = ABCMeta
Audio sender endpoint, responsible for sending microphone audio data.
:param zmq_context: The ZeroMQ context to use. @abstractmethod
:type zmq_context: zmq.Context def setup(self):
raise NotImplementedError()
:param port: The port to use. @abstractmethod
:type port: int def stop(self):
raise NotImplementedError()
:ivar thread: Thread used for sending audio. @abstractmethod
:vartype thread: threading.Thread | None def generate_chunk(self):
raise NotImplementedError()
:ivar audio: PyAudio instance.
:vartype audio: pyaudio.PyAudio | None
:ivar microphone: Selected microphone information. class QiAudioCapturer(AudioCapturer):
:vartype microphone: dict | None def __init__(self, sample_rate=16000, channels=1, deinterleaved=0):
""" self.session = state.qi_session
def __init__(self, zmq_context, port=settings.agent_settings.audio_sender_port): if not self.session:
super(AudioSender, self).__init__(str("audio")) # Convert future's unicode_literal to str raise RuntimeError("Cannot capture from qi device, no qi session available.")
self.create_socket(zmq_context, zmq.PUB, port)
self.thread = None
self.audio = self.session.service("ALAudioDevice")
self.service_name = "ZmqAudioStreamer"
self.sample_rate = sample_rate
self.channels = channels
self.deinterleaved = deinterleaved
self.overflow = np.empty(0, dtype=np.float32)
self.q = Queue.Queue()
def setup(self):
assert self.session is not None
self.session.registerService(self.service_name, self)
self.audio.setClientPreferences(self.service_name, self.sample_rate, self.channels, self.deinterleaved)
self.audio.subscribe(self.service_name)
def stop(self):
try: try:
self.audio = pyaudio.PyAudio() self.audio.unsubscribe(self.service_name)
self.microphone = choose_mic(self.audio) except:
except IOError as e: pass
logger.warning("PyAudio is not available.", exc_info=e)
self.audio = None
self.microphone = None def audio_gen(self):
try:
chunk = self.q.get(True, 0.1)
return chunk
except Queue.Empty:
return None
# Callback invoked by NAOqi
def processRemote(self, nbOfChannels, nbOfSamplesByChannel, timeStamp, inputBuffer):
raw_pcm = bytes(inputBuffer)
assert nbOfChannels == 1
pcm_i16 = np.frombuffer(raw_pcm, dtype=np.int16)
pcm_f32 = pcm_i16.astype(np.float32) / 32768.0
# Attach overflow
pcm_f32 = np.append(self.overflow, pcm_f32)
for i in range(len(pcm_f32) // 512):
self.q.put_nowait(pcm_f32[i * 512 : (i + 1) * 512])
self.overflow = pcm_f32[len(pcm_f32) // 512 * 512 :]
class StandaloneAudioCapturer:
pass
class AudioSender(SocketBase):
def __init__(self, zmq_context, port=settings.agent_settings.audio_sender_port):
super(AudioSender, self).__init__(str("audio"))
self.create_socket(zmq_context, zmq.PUB, port)
self.thread = threading.Thread(target=self.stream)
self.capturer = self.choose_capturer()
def start(self): def start(self):
"""
Start sending audio in a different thread.
Will not start if no microphone is available.
"""
if not self.microphone:
logger.info("Not listening: no microphone available.")
return
logger.info("Listening with microphone \"{}\".".format(self.microphone["name"]))
self.thread = threading.Thread(target=self._stream)
self.thread.start() self.thread.start()
def wait_until_done(self): def close(self):
""" self.capturer.stop()
Wait until the audio thread is done. super(AudioSender, self).close()
Will block until `state.exit_event` is set. If the thread is not running, does nothing. def stream(self):
""" while not state.exit_event.is_set():
if not self.thread: return chunk = self.capturer.generate_chunk()
self.thread.join()
self.thread = None
def _stream(self): if not chunk or state.is_speaking:
""" continue
Internal method to continuously read audio from the microphone and send it over the socket.
"""
audio_settings = settings.audio_config
chunk = audio_settings.chunk_size # 320 at 16000 Hz is 20ms, 512 is required for Silero-VAD
# Docs say this only raises an error if neither `input` nor `output` is True self.socket.send(chunk)
stream = self.audio.open(
format=pyaudio.paFloat32,
channels=audio_settings.channels,
rate=audio_settings.sample_rate,
input=True,
input_device_index=self.microphone["index"],
frames_per_buffer=chunk,
)
try: def choose_capturer(self):
while not state.exit_event.is_set(): return QiAudioCapturer()
data = stream.read(chunk)
if (state.is_speaking): continue # Do not send audio while the robot is speaking
self.socket.send(data)
except IOError as e:
logger.error("Stopped listening: failed to get audio from microphone.", exc_info=e) # class AudioSender(SocketBase):
finally: # """
stream.stop_stream() # Audio sender endpoint, responsible for sending microphone audio data.
stream.close() #
# :param zmq_context: The ZeroMQ context to use.
# :type zmq_context: zmq.Context
#
# :param port: The port to use.
# :type port: int
#
# :ivar thread: Thread used for sending audio.
# :vartype thread: threading.Thread | None
#
# :ivar audio: PyAudio instance.
# :vartype audio: pyaudio.PyAudio | None
#
# :ivar microphone: Selected microphone information.
# :vartype microphone: dict | None
# """
# def __init__(self, zmq_context, port=settings.agent_settings.audio_sender_port):
# super(AudioSender, self).__init__(str("audio")) # Convert future's unicode_literal to str
# self.create_socket(zmq_context, zmq.PUB, port)
# self.thread = None
#
# try:
# self.audio = pyaudio.PyAudio()
# self.microphone = choose_mic(self.audio)
# except IOError as e:
# logger.warning("PyAudio is not available.", exc_info=e)
# self.audio = None
# self.microphone = None
#
# def start(self):
# """
# Start sending audio in a different thread.
#
# Will not start if no microphone is available.
# """
# if not self.microphone:
# logger.info("Not listening: no microphone available.")
# return
#
# logger.info("Listening with microphone \"{}\".".format(self.microphone["name"]))
# self.thread = threading.Thread(target=self._stream)
# self.thread.start()
#
# def wait_until_done(self):
# """
# Wait until the audio thread is done.
#
# Will block until `state.exit_event` is set. If the thread is not running, does nothing.
# """
# if not self.thread: return
# self.thread.join()
# self.thread = None
#
# def _stream(self):
# """
# Internal method to continuously read audio from the microphone and send it over the socket.
# """
# audio_settings = settings.audio_config
# chunk = audio_settings.chunk_size # 320 at 16000 Hz is 20ms, 512 is required for Silero-VAD
#
# # Docs say this only raises an error if neither `input` nor `output` is True
# stream = self.audio.open(
# format=pyaudio.paFloat32,
# channels=audio_settings.channels,
# rate=audio_settings.sample_rate,
# input=True,
# input_device_index=self.microphone["index"],
# frames_per_buffer=chunk,
# )
#
# try:
# while not state.exit_event.is_set():
# data = stream.read(chunk)
# if (state.is_speaking): continue # Do not send audio while the robot is speaking
# self.socket.send(data)
# except IOError as e:
# logger.error("Stopped listening: failed to get audio from microphone.", exc_info=e)
# finally:
# stream.stop_stream()
# stream.close()