feat: multi-channel qi audio possible

This commit is contained in:
2026-02-09 13:40:47 +01:00
parent 31c76ecf84
commit abd6988d1e

View File

@@ -6,13 +6,16 @@ University within the Software Project course.
""" """
from __future__ import unicode_literals # So that `logging` can use Unicode characters in names from __future__ import unicode_literals # So that `logging` can use Unicode characters in names
import audioop
import enum
from abc import ABCMeta, abstractmethod from abc import ABCMeta, abstractmethod
import threading import threading
import logging import logging
import Queue import Queue
import numpy as np import numpy as np
import pyaudio
import zmq import zmq
from robot_interface.endpoints.socket_base import SocketBase from robot_interface.endpoints.socket_base import SocketBase
@@ -38,28 +41,56 @@ class AudioCapturer(object):
def generate_chunk(self): def generate_chunk(self):
raise NotImplementedError() raise NotImplementedError()
class SampleRate(enum.Enum):
"""
Sample rate to use in Hz.
"""
LOW = 16000
HIGH = 48000
class PepperMicrophone(enum.Enum):
"""
Which of Pepper's microphones to use. In our case, some of the mics were damages/didn't work
well, so we choose to only use the fron right. If you have a Pepper robot with all working mics,
you might wish to use all microphones, to improve overall audio quality.
"""
ALL = 0
LEFT = 1
RIGHT = 2
FRONT_LEFT = 3
FRONT_RIGHT = 4
class QiAudioCapturer(AudioCapturer): class QiAudioCapturer(AudioCapturer):
def __init__(self, sample_rate=16000, channels=1, deinterleaved=0): def __init__(self, sample_rate=SampleRate.LOW, mic=PepperMicrophone.FRONT_RIGHT,
deinterleaved=0):
self.session = state.qi_session self.session = state.qi_session
if not self.session: if not self.session:
raise RuntimeError("Cannot capture from qi device, no qi session available.") raise RuntimeError("Cannot capture from qi device, no qi session available.")
if sample_rate == SampleRate.HIGH and mic != PepperMicrophone.ALL:
raise RuntimeError("For 48000 Hz, you must select all microphones.")
if mic == PepperMicrophone.ALL and sample_rate != SampleRate.HIGH:
raise RuntimeError("For using all microphones, 48000 Hz is required.")
self.audio = self.session.service("ALAudioDevice") self.audio = self.session.service("ALAudioDevice")
self.service_name = "ZmqAudioStreamer" self.service_name = "ZmqAudioStreamer"
self.sample_rate = sample_rate self.sample_rate = sample_rate
self.channels = channels self.mic = mic
self.deinterleaved = deinterleaved self.deinterleaved = deinterleaved
self.overflow = np.empty(0, dtype=np.float32) self.overflow = np.empty(0, dtype=np.float32)
self.q = Queue.Queue() self.q = Queue.Queue()
self._rate_state = None
def setup(self): def setup(self):
assert self.session is not None assert self.session is not None
self.session.registerService(self.service_name, self) self.session.registerService(self.service_name, self)
self.audio.setClientPreferences(self.service_name, self.sample_rate, self.channels, self.deinterleaved) self.audio.setClientPreferences(self.service_name, self.sample_rate.value, self.mic.value,
self.deinterleaved)
self.audio.subscribe(self.service_name) self.audio.subscribe(self.service_name)
def stop(self): def stop(self):
@@ -79,18 +110,28 @@ class QiAudioCapturer(AudioCapturer):
def processRemote(self, nbOfChannels, nbOfSamplesByChannel, timeStamp, inputBuffer): def processRemote(self, nbOfChannels, nbOfSamplesByChannel, timeStamp, inputBuffer):
raw_pcm = bytes(inputBuffer) raw_pcm = bytes(inputBuffer)
assert nbOfChannels == 1
pcm_i16 = np.frombuffer(raw_pcm, dtype=np.int16) pcm_i16 = np.frombuffer(raw_pcm, dtype=np.int16)
pcm_f32 = pcm_i16.astype(np.float32) / 32768.0
# Make mono channel (if it was 4 channels)
pcm_i32_mono = self._make_mono(pcm_i16.astype(np.int32), nbOfChannels)
# Resample (if it was 48k)
pcm_i32_mono_16k, self._rate_state = audioop.ratecv(pcm_i32_mono.tobytes(), 4, 1,
self.sample_rate.value,
SampleRate.LOW.value, self._rate_state)
pcm_f32_mono_16k = (np.frombuffer(pcm_i32_mono_16k, dtype=np.int32).astype(np.float32) /
32768.0)
# Attach overflow # Attach overflow
pcm_f32 = np.append(self.overflow, pcm_f32) pcm_f32_mono_16k = np.append(self.overflow, pcm_f32_mono_16k)
for i in range(len(pcm_f32) // 512): for i in range(len(pcm_f32_mono_16k) // 512):
self.q.put_nowait(pcm_f32[i * 512 : (i + 1) * 512].tobytes()) self.q.put_nowait(pcm_f32_mono_16k[i * 512 : (i + 1) * 512].tobytes())
self.overflow = pcm_f32[len(pcm_f32) // 512 * 512 :] self.overflow = pcm_f32_mono_16k[len(pcm_f32_mono_16k) // 512 * 512 :]
def _make_mono(self, frag, channels):
return frag.reshape(-1, channels).mean(axis=1, dtype=np.int32)
class StandaloneAudioCapturer: class StandaloneAudioCapturer: