diff --git a/.env.example b/.env.example index 173b63c..95723e0 100644 --- a/.env.example +++ b/.env.example @@ -6,6 +6,9 @@ # The hostname or IP address of the Control Backend. AGENT__CONTROL_BACKEND_HOST=localhost +# Whether to use Pepper's microphone when Pepper is connected. +AUDIO__USE_PEPPER_MICROPHONE=true + # Variables that are unlikely to be configured, you can probably ignore these: diff --git a/requirements.txt b/requirements.txt index bc679f4..21f517b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,3 +7,5 @@ sphinx sphinx_rtd_theme pre-commit python-dotenv +numpy<=1.16.6 +enum34 diff --git a/src/robot_interface/core/config.py b/src/robot_interface/core/config.py index a07e36e..3a19c48 100644 --- a/src/robot_interface/core/config.py +++ b/src/robot_interface/core/config.py @@ -78,6 +78,8 @@ class AudioConfig(object): """ Audio configuration constants. + :ivar use_pepper_microphone: Whether to use Pepper's microphone or not, defaults to True. + :vartype use_pepper_microphone: bool :ivar sample_rate: Audio sampling rate in Hz, defaults to 16000. :vartype sample_rate: int :ivar chunk_size: Size of audio chunks to capture/process, defaults to 512. @@ -85,7 +87,14 @@ class AudioConfig(object): :ivar channels: Number of audio channels, defaults to 1. :vartype channels: int """ - def __init__(self, sample_rate=None, chunk_size=None, channels=None): + def __init__( + self, + use_pepper_microphone=None, + sample_rate=None, + chunk_size=None, + channels=None, + ): + self.use_pepper_microphone = get_config(use_pepper_microphone, "AUDIO__USE_PEPPER_MICROPHONE", True, bool) self.sample_rate = get_config(sample_rate, "AUDIO__SAMPLE_RATE", 16000, int) self.chunk_size = get_config(chunk_size, "AUDIO__CHUNK_SIZE", 512, int) self.channels = get_config(channels, "AUDIO__CHANNELS", 1, int) diff --git a/src/robot_interface/endpoints/audio_sender.py b/src/robot_interface/endpoints/audio_sender.py index 54e149c..d2e32df 100644 --- a/src/robot_interface/endpoints/audio_sender.py +++ b/src/robot_interface/endpoints/audio_sender.py @@ -6,9 +6,16 @@ University within the Software Project course. """ from __future__ import unicode_literals # So that `logging` can use Unicode characters in names + +import audioop + +import enum +from abc import ABCMeta, abstractmethod import threading import logging +import Queue +import numpy as np import pyaudio import zmq @@ -20,86 +27,219 @@ from robot_interface.core.config import settings logger = logging.getLogger(__name__) -class AudioSender(SocketBase): +class AudioCapturer(object): """ - Audio sender endpoint, responsible for sending microphone audio data. + Interface for audio capturers. + """ + __metaclass__ = ABCMeta - :param zmq_context: The ZeroMQ context to use. - :type zmq_context: zmq.Context + @abstractmethod + def setup(self): + raise NotImplementedError() - :param port: The port to use. - :type port: int + @abstractmethod + def stop(self): + raise NotImplementedError() - :ivar thread: Thread used for sending audio. - :vartype thread: threading.Thread | None + @abstractmethod + def generate_chunk(self): + raise NotImplementedError() + +class SampleRate(enum.Enum): + """ + Sample rate to use in Hz. + """ + LOW = 16000 + HIGH = 48000 + +class PepperMicrophone(enum.Enum): + """ + Which of Pepper's microphones to use. In our case, some of the mics were damages/didn't work + well, so we choose to only use the fron right. If you have a Pepper robot with all working mics, + you might wish to use all microphones, to improve overall audio quality. + """ + ALL = 0 + LEFT = 1 + RIGHT = 2 + FRONT_LEFT = 3 + FRONT_RIGHT = 4 + + +class QiAudioCapturer(AudioCapturer): + # Some of this class' methods have docstrings as binary strings. Keep them that way, otherwise + # ``qi.Session.registerService`` will give RuntimeErrors. + def __init__(self, sample_rate=SampleRate.LOW, mic=PepperMicrophone.FRONT_RIGHT, + deinterleaved=0): + """ + :raises RuntimeError: If there is no Qi session available. + :raises ValueError: If the given arguments are not compatible. + """ + self.session = state.qi_session + if not self.session: + raise RuntimeError("Cannot capture from qi device, no qi session available.") + + if sample_rate == SampleRate.HIGH and mic != PepperMicrophone.ALL: + raise RuntimeError("For 48000 Hz, you must select all microphones.") + if mic == PepperMicrophone.ALL and sample_rate != SampleRate.HIGH: + raise RuntimeError("For using all microphones, 48000 Hz is required.") + + self.audio = self.session.service("ALAudioDevice") + + self.service_name = "ZmqAudioStreamer" + self.sample_rate = sample_rate + self.mic = mic + self.deinterleaved = deinterleaved + + self.overflow = np.empty(0, dtype=np.float32) + + self.q = Queue.Queue() + + self._rate_state = None + + def setup(self): + b""" + :raises RuntimeError: If no Qi session is available or if the session is not compatible with audio streaming. + """ + assert self.session is not None + logger.info("Listening with Pepper's microphone.") + self.session.registerService(self.service_name, self) + self.audio.setClientPreferences(self.service_name, self.sample_rate.value, self.mic.value, + self.deinterleaved) + self.audio.subscribe(self.service_name) + + def stop(self): + b""" + Stop the audio capturer. + """ + try: + self.audio.unsubscribe(self.service_name) + except: + pass + + def generate_chunk(self): + try: + chunk = self.q.get(True, 0.1) + return chunk + except Queue.Empty: + return None + + # Callback invoked by NAOqi + def processRemote(self, nbOfChannels, nbOfSamplesByChannel, timeStamp, inputBuffer): + raw_pcm = bytes(inputBuffer) + + pcm_i16 = np.frombuffer(raw_pcm, dtype=np.int16) + + # Make mono channel (if it was 4 channels) + pcm_i32_mono = self._make_mono(pcm_i16.astype(np.int32), nbOfChannels) + + # Resample (if it was 48k) + pcm_i32_mono_16k, self._rate_state = audioop.ratecv(pcm_i32_mono.tobytes(), 4, 1, + self.sample_rate.value, + SampleRate.LOW.value, self._rate_state) + pcm_f32_mono_16k = (np.frombuffer(pcm_i32_mono_16k, dtype=np.int32).astype(np.float32) / + 32768.0) + + # Attach overflow + pcm_f32_mono_16k = np.append(self.overflow, pcm_f32_mono_16k) + + for i in range(len(pcm_f32_mono_16k) // 512): + self.q.put_nowait(pcm_f32_mono_16k[i * 512 : (i + 1) * 512].tobytes()) + + self.overflow = pcm_f32_mono_16k[len(pcm_f32_mono_16k) // 512 * 512 :] + + def _make_mono(self, frag, channels): + return frag.reshape(-1, channels).mean(axis=1, dtype=np.int32) + + +class StandaloneAudioCapturer(AudioCapturer): + """ + Audio capturer that uses a microphone from the local device, can be chosen with the + ``--microphone`` program argument. :ivar audio: PyAudio instance. :vartype audio: pyaudio.PyAudio | None :ivar microphone: Selected microphone information. :vartype microphone: dict | None + + :ivar stream: PyAudio stream instance. None until ``setup()`` is called, remaining None if setup + fails for any reason. + :vartype stream: pyaudio.Stream | None """ - def __init__(self, zmq_context, port=settings.agent_settings.audio_sender_port): - super(AudioSender, self).__init__(str("audio")) # Convert future's unicode_literal to str - self.create_socket(zmq_context, zmq.PUB, port) - self.thread = None + def __init__(self): + self.stream = None try: self.audio = pyaudio.PyAudio() self.microphone = choose_mic(self.audio) except IOError as e: - logger.warning("PyAudio is not available.", exc_info=e) + logger.warning("PyAudio is not available. Won't be able to send audio.", exc_info=True) self.audio = None self.microphone = None - def start(self): + def setup(self): """ - Start sending audio in a different thread. - - Will not start if no microphone is available. + Setup audio stream. Will not if no microphone is available. """ if not self.microphone: logger.info("Not listening: no microphone available.") return logger.info("Listening with microphone \"{}\".".format(self.microphone["name"])) - self.thread = threading.Thread(target=self._stream) - self.thread.start() - - def wait_until_done(self): - """ - Wait until the audio thread is done. - - Will block until `state.exit_event` is set. If the thread is not running, does nothing. - """ - if not self.thread: return - self.thread.join() - self.thread = None - - def _stream(self): - """ - Internal method to continuously read audio from the microphone and send it over the socket. - """ - audio_settings = settings.audio_config - chunk = audio_settings.chunk_size # 320 at 16000 Hz is 20ms, 512 is required for Silero-VAD - - # Docs say this only raises an error if neither `input` nor `output` is True - stream = self.audio.open( + self.stream = self.audio.open( format=pyaudio.paFloat32, - channels=audio_settings.channels, - rate=audio_settings.sample_rate, + channels=settings.audio_config.channels, + rate=settings.audio_config.sample_rate, input=True, input_device_index=self.microphone["index"], - frames_per_buffer=chunk, + frames_per_buffer=settings.audio_config.chunk_size, ) - try: - while not state.exit_event.is_set(): - data = stream.read(chunk) - if (state.is_speaking): continue # Do not send audio while the robot is speaking - self.socket.send(data) - except IOError as e: - logger.error("Stopped listening: failed to get audio from microphone.", exc_info=e) - finally: - stream.stop_stream() - stream.close() + def stop(self): + """ + Close the audio stream. + """ + if not self.stream: return + self.stream.stop_stream() + self.stream.close() + + def generate_chunk(self): + """ + :return: Audio frames from the microphone of size ``settings.audio_config.chunk_size``. + :rtype: bytes. + :raises IOError: If reading from the audio stream fails. + """ + return self.stream.read(settings.audio_config.chunk_size) + + +class AudioSender(SocketBase): + def __init__(self, zmq_context, port=settings.agent_settings.audio_sender_port): + super(AudioSender, self).__init__(str("audio")) + self.create_socket(zmq_context, zmq.PUB, port) + + self.thread = threading.Thread(target=self.stream) + + self.capturer = self.choose_capturer() + + def start(self): + self.capturer.setup() + self.thread.start() + + def close(self): + self.capturer.stop() + super(AudioSender, self).close() + + def stream(self): + while not state.exit_event.is_set(): + chunk = self.capturer.generate_chunk() + + if chunk is None or state.is_speaking: + continue + + self.socket.send(chunk) + + def choose_capturer(self): + if state.qi_session and settings.audio_config.use_pepper_microphone: + return QiAudioCapturer() + + return StandaloneAudioCapturer() diff --git a/src/robot_interface/utils/get_config.py b/src/robot_interface/utils/get_config.py index 64d5734..ae29cf7 100644 --- a/src/robot_interface/utils/get_config.py +++ b/src/robot_interface/utils/get_config.py @@ -16,6 +16,8 @@ def get_config(value, env, default, cast=None): Small utility to get a configuration value, returns `value` if it is not None, else it will try to get the environment variable cast with `cast`. If the environment variable is not set, it will return `default`. + Special handling for booleans, which are only true if the value of the variable is "true" or "yes", ignoring capitalization. + :param value: The value to check. :type value: Any :param env: The environment variable to check. @@ -33,7 +35,14 @@ def get_config(value, env, default, cast=None): env = os.environ.get(env, default) - if cast is None: + if cast is None or env is None: return env + if cast == bool: + if isinstance(env, bool): + return env + if not isinstance(default, bool): + raise ValueError("Default value must be a boolean if the cast type is a boolean.") + return env.lower() == "true" or env.lower() == "yes" + return cast(env) diff --git a/src/robot_interface/utils/qi_utils.py b/src/robot_interface/utils/qi_utils.py index c9f3a02..a3c9abf 100644 --- a/src/robot_interface/utils/qi_utils.py +++ b/src/robot_interface/utils/qi_utils.py @@ -14,6 +14,20 @@ except ImportError: qi = None +def _get_qi_url(): + """ + Get the Qi URL from the command line arguments, or None if not given. + """ + if "--qi-url" in sys.argv: + return sys.argv[sys.argv.index("--qi-url") + 1] + + for arg in sys.argv: + if arg.startswith("--qi-url="): + return arg[len("--qi-url="):] + + return None + + def get_qi_session(): """ Create and return a Qi session if available. @@ -25,12 +39,13 @@ def get_qi_session(): logging.info("Unable to import qi. Running in stand-alone mode.") return None - if "--qi-url" not in sys.argv: + qi_url = _get_qi_url() + if qi_url is None: logging.info("No Qi URL argument given. Running in stand-alone mode.") return None try: - app = qi.Application() + app = qi.Application(["--qi-url", qi_url, "--qi-listen-url", "tcp://0.0.0.0:0"]) app.start() return app.session except RuntimeError: diff --git a/test/unit/test_audio_sender.py b/test/unit/test_audio_sender.py index d9f9ac2..32f7d46 100644 --- a/test/unit/test_audio_sender.py +++ b/test/unit/test_audio_sender.py @@ -33,35 +33,33 @@ def test_no_microphone(zmq_context, mocker): mock_info_logger = mocker.patch("robot_interface.endpoints.audio_sender.logger.info") mock_choose_mic = mocker.patch("robot_interface.endpoints.audio_sender.choose_mic") mock_choose_mic.return_value = None + mock_state = mocker.patch("robot_interface.endpoints.audio_sender.state") + mock_state.qi_session = None sender = AudioSender(zmq_context) - assert sender.microphone is None + assert sender.capturer.microphone is None - sender.start() - assert sender.thread is None + sender.capturer.setup() mock_info_logger.assert_called() - sender.wait_until_done() # Should return early because we didn't start a thread - def test_unicode_mic_name(zmq_context, mocker): """ Tests the robustness of the `AudioSender` when handling microphone names that contain Unicode characters. """ - mocker.patch("robot_interface.endpoints.audio_sender.threading") mock_choose_mic = mocker.patch("robot_interface.endpoints.audio_sender.choose_mic") - mock_choose_mic.return_value = {"name": u"• Some Unicode name"} + mock_choose_mic.return_value = {"name": u"• Some Unicode name", "index": 0L} + mock_state = mocker.patch("robot_interface.endpoints.audio_sender.state") + mock_state.qi_session = None sender = AudioSender(zmq_context) - assert sender.microphone is not None + assert sender.capturer.microphone is not None + sender.capturer.audio.open = mock.Mock(return_value=mock.Mock()) - # `.start()` logs the name of the microphone. It should not give an error if it contains Unicode + # `.setup()` logs the name of the microphone. It should not give an error if it contains Unicode # symbols. - sender.start() - assert sender.thread is not None - - sender.wait_until_done() # Should return instantly because we didn't start a real thread + sender.capturer.setup() def _fake_read(num_frames): @@ -79,6 +77,7 @@ def test_sending_audio(mocker): mock_choose_mic.return_value = {"name": u"Some mic", "index": 0L} mock_state = mocker.patch("robot_interface.endpoints.audio_sender.state") + mock_state.qi_session = None mock_state.exit_event.is_set.side_effect = [False, True] mock_zmq_context = mock.Mock() @@ -91,11 +90,11 @@ def test_sending_audio(mocker): sender = AudioSender(mock_zmq_context) sender.socket.send = send_socket - sender.audio.open = mock.Mock() - sender.audio.open.return_value = stream + sender.capturer.audio.open = mock.Mock() + sender.capturer.audio.open.return_value = stream sender.start() - sender.wait_until_done() + sender.thread.join() send_socket.assert_called() @@ -108,6 +107,7 @@ def test_no_sending_if_speaking(mocker): mock_choose_mic.return_value = {"name": u"Some mic", "index": 0L} mock_state = mocker.patch("robot_interface.endpoints.audio_sender.state") + mock_state.qi_session = None mock_state.exit_event.is_set.side_effect = [False, True] mock_zmq_context = mock.Mock() @@ -121,11 +121,11 @@ def test_no_sending_if_speaking(mocker): sender = AudioSender(mock_zmq_context) sender.socket.send = send_socket - sender.audio.open = mock.Mock() - sender.audio.open.return_value = stream + sender.capturer.audio.open = mock.Mock() + sender.capturer.audio.open.return_value = stream sender.start() - sender.wait_until_done() + sender.thread.join() send_socket.assert_not_called() @@ -145,6 +145,7 @@ def test_break_microphone(mocker): mock_choose_mic.return_value = {"name": u"Some mic", "index": 0L} mock_state = mocker.patch("robot_interface.endpoints.audio_sender.state") + mock_state.qi_session = None mock_state.exit_event.is_set.side_effect = [False, True] mock_zmq_context = mock.Mock() @@ -156,11 +157,11 @@ def test_break_microphone(mocker): sender = AudioSender(mock_zmq_context) sender.socket.send = send_socket - sender.audio.open = mock.Mock() - sender.audio.open.return_value = stream + sender.capturer.audio.open = mock.Mock() + sender.capturer.audio.open.return_value = stream sender.start() - sender.wait_until_done() + sender.thread.join() send_socket.assert_not_called() @@ -171,6 +172,8 @@ def test_pyaudio_init_failure(mocker, zmq_context): """ # Prevent binding the ZMQ socket mocker.patch("robot_interface.endpoints.audio_sender.AudioSender.create_socket") + mock_state = mocker.patch("robot_interface.endpoints.audio_sender.state") + mock_state.qi_session = None # Simulate PyAudio() failing mocker.patch( @@ -180,5 +183,5 @@ def test_pyaudio_init_failure(mocker, zmq_context): sender = AudioSender(zmq_context) - assert sender.audio is None - assert sender.microphone is None + assert sender.capturer.audio is None + assert sender.capturer.microphone is None diff --git a/test/unit/test_get_config.py b/test/unit/test_get_config.py index ce3d8d2..4312a0a 100644 --- a/test/unit/test_get_config.py +++ b/test/unit/test_get_config.py @@ -50,3 +50,58 @@ def test_get_config_casts_default_when_env_missing(monkeypatch): result = get_config(None, "GET_CONFIG_MISSING", "42", int) assert result == 42 + + +def test_get_config_unset_boolean_default(monkeypatch): + """ + When the env var is a boolean, and it's not set, ensure it uses the default value. + """ + monkeypatch.delenv("SOME_BOOLEAN_VARIABLE", raising=False) + + result = get_config(None, "SOME_BOOLEAN_VARIABLE", False, bool) + assert result == False + + result = get_config(None, "SOME_BOOLEAN_VARIABLE", True, bool) + assert result == True + + +def test_get_config_true_boolean(monkeypatch): + """ + When the env var is a boolean, and its value is "true", "TRUE", "yes", etc., it should return true. + """ + monkeypatch.setenv("SOME_BOOLEAN_VARIABLE", "TRUE") + result = get_config(None, "SOME_BOOLEAN_VARIABLE", False, bool) + assert result == True + + monkeypatch.setenv("SOME_BOOLEAN_VARIABLE", "true") + result = get_config(None, "SOME_BOOLEAN_VARIABLE", False, bool) + assert result == True + + monkeypatch.setenv("SOME_BOOLEAN_VARIABLE", "yes") + result = get_config(None, "SOME_BOOLEAN_VARIABLE", False, bool) + assert result == True + + monkeypatch.setenv("SOME_BOOLEAN_VARIABLE", "YES") + result = get_config(None, "SOME_BOOLEAN_VARIABLE", False, bool) + assert result == True + + monkeypatch.setenv("SOME_BOOLEAN_VARIABLE", "TrUE") + result = get_config(None, "SOME_BOOLEAN_VARIABLE", False, bool) + assert result == True + + +def test_get_config_false_boolean(monkeypatch): + """ + When the env var is a boolean, and its value is not "true", "TRUE", "yes", etc., it should return False. + """ + monkeypatch.setenv("SOME_BOOLEAN_VARIABLE", "FALSE") + result = get_config(None, "SOME_BOOLEAN_VARIABLE", True, bool) + assert result == False + + monkeypatch.setenv("SOME_BOOLEAN_VARIABLE", "false") + result = get_config(None, "SOME_BOOLEAN_VARIABLE", True, bool) + assert result == False + + monkeypatch.setenv("SOME_BOOLEAN_VARIABLE", "anything, tbh") + result = get_config(None, "SOME_BOOLEAN_VARIABLE", True, bool) + assert result == False diff --git a/test/unit/test_qi_utils.py b/test/unit/test_qi_utils.py index 6817be6..b18fa3c 100644 --- a/test/unit/test_qi_utils.py +++ b/test/unit/test_qi_utils.py @@ -62,7 +62,7 @@ def test_get_qi_session_runtime_error(monkeypatch): raise RuntimeError("boom") class FakeQi: - Application = lambda self=None: FakeApp() + Application = lambda *args, **kwargs: FakeApp() reload_qi_utils_with(FakeQi()) @@ -87,7 +87,7 @@ def test_get_qi_session_success(monkeypatch): return True class FakeQi: - Application = lambda self=None: FakeApp() + Application = lambda *args, **kwargs: FakeApp() reload_qi_utils_with(FakeQi())