diff --git a/src/robot_interface/endpoints/audio_sender.py b/src/robot_interface/endpoints/audio_sender.py index 54e149c..86dc66c 100644 --- a/src/robot_interface/endpoints/audio_sender.py +++ b/src/robot_interface/endpoints/audio_sender.py @@ -6,9 +6,12 @@ University within the Software Project course. """ from __future__ import unicode_literals # So that `logging` can use Unicode characters in names +from abc import ABCMeta, abstractmethod import threading import logging +import Queue +import numpy as np import pyaudio import zmq @@ -20,86 +23,194 @@ from robot_interface.core.config import settings logger = logging.getLogger(__name__) -class AudioSender(SocketBase): - """ - Audio sender endpoint, responsible for sending microphone audio data. +class AudioCapturer(object): + __metaclass__ = ABCMeta - :param zmq_context: The ZeroMQ context to use. - :type zmq_context: zmq.Context + @abstractmethod + def setup(self): + raise NotImplementedError() - :param port: The port to use. - :type port: int + @abstractmethod + def stop(self): + raise NotImplementedError() - :ivar thread: Thread used for sending audio. - :vartype thread: threading.Thread | None + @abstractmethod + def generate_chunk(self): + raise NotImplementedError() + - :ivar audio: PyAudio instance. - :vartype audio: pyaudio.PyAudio | None - :ivar microphone: Selected microphone information. - :vartype microphone: dict | None - """ - def __init__(self, zmq_context, port=settings.agent_settings.audio_sender_port): - super(AudioSender, self).__init__(str("audio")) # Convert future's unicode_literal to str - self.create_socket(zmq_context, zmq.PUB, port) - self.thread = None +class QiAudioCapturer(AudioCapturer): + def __init__(self, sample_rate=16000, channels=1, deinterleaved=0): + self.session = state.qi_session + if not self.session: + raise RuntimeError("Cannot capture from qi device, no qi session available.") + self.audio = self.session.service("ALAudioDevice") + + self.service_name = "ZmqAudioStreamer" + self.sample_rate = sample_rate + self.channels = channels + self.deinterleaved = deinterleaved + + self.overflow = np.empty(0, dtype=np.float32) + + self.q = Queue.Queue() + + def setup(self): + assert self.session is not None + self.session.registerService(self.service_name, self) + self.audio.setClientPreferences(self.service_name, self.sample_rate, self.channels, self.deinterleaved) + self.audio.subscribe(self.service_name) + + def stop(self): try: - self.audio = pyaudio.PyAudio() - self.microphone = choose_mic(self.audio) - except IOError as e: - logger.warning("PyAudio is not available.", exc_info=e) - self.audio = None - self.microphone = None + self.audio.unsubscribe(self.service_name) + except: + pass + + + def audio_gen(self): + try: + chunk = self.q.get(True, 0.1) + return chunk + except Queue.Empty: + return None + + + # Callback invoked by NAOqi + def processRemote(self, nbOfChannels, nbOfSamplesByChannel, timeStamp, inputBuffer): + raw_pcm = bytes(inputBuffer) + + assert nbOfChannels == 1 + + pcm_i16 = np.frombuffer(raw_pcm, dtype=np.int16) + pcm_f32 = pcm_i16.astype(np.float32) / 32768.0 + + # Attach overflow + pcm_f32 = np.append(self.overflow, pcm_f32) + + for i in range(len(pcm_f32) // 512): + self.q.put_nowait(pcm_f32[i * 512 : (i + 1) * 512]) + + self.overflow = pcm_f32[len(pcm_f32) // 512 * 512 :] + +class StandaloneAudioCapturer: + pass + + +class AudioSender(SocketBase): + def __init__(self, zmq_context, port=settings.agent_settings.audio_sender_port): + super(AudioSender, self).__init__(str("audio")) + self.create_socket(zmq_context, zmq.PUB, port) + + self.thread = threading.Thread(target=self.stream) + + self.capturer = self.choose_capturer() + def start(self): - """ - Start sending audio in a different thread. - - Will not start if no microphone is available. - """ - if not self.microphone: - logger.info("Not listening: no microphone available.") - return - - logger.info("Listening with microphone \"{}\".".format(self.microphone["name"])) - self.thread = threading.Thread(target=self._stream) self.thread.start() - def wait_until_done(self): - """ - Wait until the audio thread is done. + def close(self): + self.capturer.stop() + super(AudioSender, self).close() - Will block until `state.exit_event` is set. If the thread is not running, does nothing. - """ - if not self.thread: return - self.thread.join() - self.thread = None + def stream(self): + while not state.exit_event.is_set(): + chunk = self.capturer.generate_chunk() - def _stream(self): - """ - Internal method to continuously read audio from the microphone and send it over the socket. - """ - audio_settings = settings.audio_config - chunk = audio_settings.chunk_size # 320 at 16000 Hz is 20ms, 512 is required for Silero-VAD + if not chunk or state.is_speaking: + continue - # Docs say this only raises an error if neither `input` nor `output` is True - stream = self.audio.open( - format=pyaudio.paFloat32, - channels=audio_settings.channels, - rate=audio_settings.sample_rate, - input=True, - input_device_index=self.microphone["index"], - frames_per_buffer=chunk, - ) + self.socket.send(chunk) - try: - while not state.exit_event.is_set(): - data = stream.read(chunk) - if (state.is_speaking): continue # Do not send audio while the robot is speaking - self.socket.send(data) - except IOError as e: - logger.error("Stopped listening: failed to get audio from microphone.", exc_info=e) - finally: - stream.stop_stream() - stream.close() + def choose_capturer(self): + return QiAudioCapturer() + + + + +# class AudioSender(SocketBase): +# """ +# Audio sender endpoint, responsible for sending microphone audio data. +# +# :param zmq_context: The ZeroMQ context to use. +# :type zmq_context: zmq.Context +# +# :param port: The port to use. +# :type port: int +# +# :ivar thread: Thread used for sending audio. +# :vartype thread: threading.Thread | None +# +# :ivar audio: PyAudio instance. +# :vartype audio: pyaudio.PyAudio | None +# +# :ivar microphone: Selected microphone information. +# :vartype microphone: dict | None +# """ +# def __init__(self, zmq_context, port=settings.agent_settings.audio_sender_port): +# super(AudioSender, self).__init__(str("audio")) # Convert future's unicode_literal to str +# self.create_socket(zmq_context, zmq.PUB, port) +# self.thread = None +# +# try: +# self.audio = pyaudio.PyAudio() +# self.microphone = choose_mic(self.audio) +# except IOError as e: +# logger.warning("PyAudio is not available.", exc_info=e) +# self.audio = None +# self.microphone = None +# +# def start(self): +# """ +# Start sending audio in a different thread. +# +# Will not start if no microphone is available. +# """ +# if not self.microphone: +# logger.info("Not listening: no microphone available.") +# return +# +# logger.info("Listening with microphone \"{}\".".format(self.microphone["name"])) +# self.thread = threading.Thread(target=self._stream) +# self.thread.start() +# +# def wait_until_done(self): +# """ +# Wait until the audio thread is done. +# +# Will block until `state.exit_event` is set. If the thread is not running, does nothing. +# """ +# if not self.thread: return +# self.thread.join() +# self.thread = None +# +# def _stream(self): +# """ +# Internal method to continuously read audio from the microphone and send it over the socket. +# """ +# audio_settings = settings.audio_config +# chunk = audio_settings.chunk_size # 320 at 16000 Hz is 20ms, 512 is required for Silero-VAD +# +# # Docs say this only raises an error if neither `input` nor `output` is True +# stream = self.audio.open( +# format=pyaudio.paFloat32, +# channels=audio_settings.channels, +# rate=audio_settings.sample_rate, +# input=True, +# input_device_index=self.microphone["index"], +# frames_per_buffer=chunk, +# ) +# +# try: +# while not state.exit_event.is_set(): +# data = stream.read(chunk) +# if (state.is_speaking): continue # Do not send audio while the robot is speaking +# self.socket.send(data) +# except IOError as e: +# logger.error("Stopped listening: failed to get audio from microphone.", exc_info=e) +# finally: +# stream.stop_stream() +# stream.close()