feat: multi-channel qi audio possible
This commit is contained in:
@@ -6,13 +6,16 @@ University within the Software Project course.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import unicode_literals # So that `logging` can use Unicode characters in names
|
from __future__ import unicode_literals # So that `logging` can use Unicode characters in names
|
||||||
|
|
||||||
|
import audioop
|
||||||
|
|
||||||
|
import enum
|
||||||
from abc import ABCMeta, abstractmethod
|
from abc import ABCMeta, abstractmethod
|
||||||
import threading
|
import threading
|
||||||
import logging
|
import logging
|
||||||
import Queue
|
import Queue
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pyaudio
|
|
||||||
import zmq
|
import zmq
|
||||||
|
|
||||||
from robot_interface.endpoints.socket_base import SocketBase
|
from robot_interface.endpoints.socket_base import SocketBase
|
||||||
@@ -38,28 +41,56 @@ class AudioCapturer(object):
|
|||||||
def generate_chunk(self):
|
def generate_chunk(self):
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
class SampleRate(enum.Enum):
|
||||||
|
"""
|
||||||
|
Sample rate to use in Hz.
|
||||||
|
"""
|
||||||
|
LOW = 16000
|
||||||
|
HIGH = 48000
|
||||||
|
|
||||||
|
class PepperMicrophone(enum.Enum):
|
||||||
|
"""
|
||||||
|
Which of Pepper's microphones to use. In our case, some of the mics were damages/didn't work
|
||||||
|
well, so we choose to only use the fron right. If you have a Pepper robot with all working mics,
|
||||||
|
you might wish to use all microphones, to improve overall audio quality.
|
||||||
|
"""
|
||||||
|
ALL = 0
|
||||||
|
LEFT = 1
|
||||||
|
RIGHT = 2
|
||||||
|
FRONT_LEFT = 3
|
||||||
|
FRONT_RIGHT = 4
|
||||||
|
|
||||||
|
|
||||||
class QiAudioCapturer(AudioCapturer):
|
class QiAudioCapturer(AudioCapturer):
|
||||||
def __init__(self, sample_rate=16000, channels=1, deinterleaved=0):
|
def __init__(self, sample_rate=SampleRate.LOW, mic=PepperMicrophone.FRONT_RIGHT,
|
||||||
|
deinterleaved=0):
|
||||||
self.session = state.qi_session
|
self.session = state.qi_session
|
||||||
if not self.session:
|
if not self.session:
|
||||||
raise RuntimeError("Cannot capture from qi device, no qi session available.")
|
raise RuntimeError("Cannot capture from qi device, no qi session available.")
|
||||||
|
|
||||||
|
if sample_rate == SampleRate.HIGH and mic != PepperMicrophone.ALL:
|
||||||
|
raise RuntimeError("For 48000 Hz, you must select all microphones.")
|
||||||
|
if mic == PepperMicrophone.ALL and sample_rate != SampleRate.HIGH:
|
||||||
|
raise RuntimeError("For using all microphones, 48000 Hz is required.")
|
||||||
|
|
||||||
self.audio = self.session.service("ALAudioDevice")
|
self.audio = self.session.service("ALAudioDevice")
|
||||||
|
|
||||||
self.service_name = "ZmqAudioStreamer"
|
self.service_name = "ZmqAudioStreamer"
|
||||||
self.sample_rate = sample_rate
|
self.sample_rate = sample_rate
|
||||||
self.channels = channels
|
self.mic = mic
|
||||||
self.deinterleaved = deinterleaved
|
self.deinterleaved = deinterleaved
|
||||||
|
|
||||||
self.overflow = np.empty(0, dtype=np.float32)
|
self.overflow = np.empty(0, dtype=np.float32)
|
||||||
|
|
||||||
self.q = Queue.Queue()
|
self.q = Queue.Queue()
|
||||||
|
|
||||||
|
self._rate_state = None
|
||||||
|
|
||||||
def setup(self):
|
def setup(self):
|
||||||
assert self.session is not None
|
assert self.session is not None
|
||||||
self.session.registerService(self.service_name, self)
|
self.session.registerService(self.service_name, self)
|
||||||
self.audio.setClientPreferences(self.service_name, self.sample_rate, self.channels, self.deinterleaved)
|
self.audio.setClientPreferences(self.service_name, self.sample_rate.value, self.mic.value,
|
||||||
|
self.deinterleaved)
|
||||||
self.audio.subscribe(self.service_name)
|
self.audio.subscribe(self.service_name)
|
||||||
|
|
||||||
def stop(self):
|
def stop(self):
|
||||||
@@ -79,18 +110,28 @@ class QiAudioCapturer(AudioCapturer):
|
|||||||
def processRemote(self, nbOfChannels, nbOfSamplesByChannel, timeStamp, inputBuffer):
|
def processRemote(self, nbOfChannels, nbOfSamplesByChannel, timeStamp, inputBuffer):
|
||||||
raw_pcm = bytes(inputBuffer)
|
raw_pcm = bytes(inputBuffer)
|
||||||
|
|
||||||
assert nbOfChannels == 1
|
|
||||||
|
|
||||||
pcm_i16 = np.frombuffer(raw_pcm, dtype=np.int16)
|
pcm_i16 = np.frombuffer(raw_pcm, dtype=np.int16)
|
||||||
pcm_f32 = pcm_i16.astype(np.float32) / 32768.0
|
|
||||||
|
# Make mono channel (if it was 4 channels)
|
||||||
|
pcm_i32_mono = self._make_mono(pcm_i16.astype(np.int32), nbOfChannels)
|
||||||
|
|
||||||
|
# Resample (if it was 48k)
|
||||||
|
pcm_i32_mono_16k, self._rate_state = audioop.ratecv(pcm_i32_mono.tobytes(), 4, 1,
|
||||||
|
self.sample_rate.value,
|
||||||
|
SampleRate.LOW.value, self._rate_state)
|
||||||
|
pcm_f32_mono_16k = (np.frombuffer(pcm_i32_mono_16k, dtype=np.int32).astype(np.float32) /
|
||||||
|
32768.0)
|
||||||
|
|
||||||
# Attach overflow
|
# Attach overflow
|
||||||
pcm_f32 = np.append(self.overflow, pcm_f32)
|
pcm_f32_mono_16k = np.append(self.overflow, pcm_f32_mono_16k)
|
||||||
|
|
||||||
for i in range(len(pcm_f32) // 512):
|
for i in range(len(pcm_f32_mono_16k) // 512):
|
||||||
self.q.put_nowait(pcm_f32[i * 512 : (i + 1) * 512].tobytes())
|
self.q.put_nowait(pcm_f32_mono_16k[i * 512 : (i + 1) * 512].tobytes())
|
||||||
|
|
||||||
self.overflow = pcm_f32[len(pcm_f32) // 512 * 512 :]
|
self.overflow = pcm_f32_mono_16k[len(pcm_f32_mono_16k) // 512 * 512 :]
|
||||||
|
|
||||||
|
def _make_mono(self, frag, channels):
|
||||||
|
return frag.reshape(-1, channels).mean(axis=1, dtype=np.int32)
|
||||||
|
|
||||||
|
|
||||||
class StandaloneAudioCapturer:
|
class StandaloneAudioCapturer:
|
||||||
|
|||||||
Reference in New Issue
Block a user