diff --git a/src/robot_interface/endpoints/audio_sender.py b/src/robot_interface/endpoints/audio_sender.py index e168285..547dbb9 100644 --- a/src/robot_interface/endpoints/audio_sender.py +++ b/src/robot_interface/endpoints/audio_sender.py @@ -6,13 +6,16 @@ University within the Software Project course. """ from __future__ import unicode_literals # So that `logging` can use Unicode characters in names + +import audioop + +import enum from abc import ABCMeta, abstractmethod import threading import logging import Queue import numpy as np -import pyaudio import zmq from robot_interface.endpoints.socket_base import SocketBase @@ -38,28 +41,56 @@ class AudioCapturer(object): def generate_chunk(self): raise NotImplementedError() +class SampleRate(enum.Enum): + """ + Sample rate to use in Hz. + """ + LOW = 16000 + HIGH = 48000 + +class PepperMicrophone(enum.Enum): + """ + Which of Pepper's microphones to use. In our case, some of the mics were damages/didn't work + well, so we choose to only use the fron right. If you have a Pepper robot with all working mics, + you might wish to use all microphones, to improve overall audio quality. + """ + ALL = 0 + LEFT = 1 + RIGHT = 2 + FRONT_LEFT = 3 + FRONT_RIGHT = 4 + class QiAudioCapturer(AudioCapturer): - def __init__(self, sample_rate=16000, channels=1, deinterleaved=0): + def __init__(self, sample_rate=SampleRate.LOW, mic=PepperMicrophone.FRONT_RIGHT, + deinterleaved=0): self.session = state.qi_session if not self.session: raise RuntimeError("Cannot capture from qi device, no qi session available.") + if sample_rate == SampleRate.HIGH and mic != PepperMicrophone.ALL: + raise RuntimeError("For 48000 Hz, you must select all microphones.") + if mic == PepperMicrophone.ALL and sample_rate != SampleRate.HIGH: + raise RuntimeError("For using all microphones, 48000 Hz is required.") + self.audio = self.session.service("ALAudioDevice") self.service_name = "ZmqAudioStreamer" self.sample_rate = sample_rate - self.channels = channels + self.mic = mic self.deinterleaved = deinterleaved self.overflow = np.empty(0, dtype=np.float32) self.q = Queue.Queue() + self._rate_state = None + def setup(self): assert self.session is not None self.session.registerService(self.service_name, self) - self.audio.setClientPreferences(self.service_name, self.sample_rate, self.channels, self.deinterleaved) + self.audio.setClientPreferences(self.service_name, self.sample_rate.value, self.mic.value, + self.deinterleaved) self.audio.subscribe(self.service_name) def stop(self): @@ -79,18 +110,28 @@ class QiAudioCapturer(AudioCapturer): def processRemote(self, nbOfChannels, nbOfSamplesByChannel, timeStamp, inputBuffer): raw_pcm = bytes(inputBuffer) - assert nbOfChannels == 1 - pcm_i16 = np.frombuffer(raw_pcm, dtype=np.int16) - pcm_f32 = pcm_i16.astype(np.float32) / 32768.0 + + # Make mono channel (if it was 4 channels) + pcm_i32_mono = self._make_mono(pcm_i16.astype(np.int32), nbOfChannels) + + # Resample (if it was 48k) + pcm_i32_mono_16k, self._rate_state = audioop.ratecv(pcm_i32_mono.tobytes(), 4, 1, + self.sample_rate.value, + SampleRate.LOW.value, self._rate_state) + pcm_f32_mono_16k = (np.frombuffer(pcm_i32_mono_16k, dtype=np.int32).astype(np.float32) / + 32768.0) # Attach overflow - pcm_f32 = np.append(self.overflow, pcm_f32) + pcm_f32_mono_16k = np.append(self.overflow, pcm_f32_mono_16k) - for i in range(len(pcm_f32) // 512): - self.q.put_nowait(pcm_f32[i * 512 : (i + 1) * 512].tobytes()) + for i in range(len(pcm_f32_mono_16k) // 512): + self.q.put_nowait(pcm_f32_mono_16k[i * 512 : (i + 1) * 512].tobytes()) - self.overflow = pcm_f32[len(pcm_f32) // 512 * 512 :] + self.overflow = pcm_f32_mono_16k[len(pcm_f32_mono_16k) // 512 * 512 :] + + def _make_mono(self, frag, channels): + return frag.reshape(-1, channels).mean(axis=1, dtype=np.int32) class StandaloneAudioCapturer: