1 Commits

Author SHA1 Message Date
b4814d431f feat: ignore own speech
When we detect that Pepper is talking we stop sending audio.

ref: N25B-214
2025-10-27 13:08:57 +01:00
15 changed files with 58 additions and 209 deletions

View File

@@ -1,9 +0,0 @@
.git
.githooks/
.idea/
.venv/
test/
typings/
.dockerignore
.gitignore
README.md

View File

@@ -1,37 +0,0 @@
FROM debian:trixie AS build
WORKDIR /app
COPY requirements.txt .
RUN apt-get update; apt-get install -y portaudio19-dev libzmq3-dev make build-essential libssl-dev zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev curl git libncursesw5-dev xz-utils tk-dev libxml2-dev libxmlsec1-dev libffi-dev liblzma-dev alsa-utils musl-dev
ENV HOME="/root"
RUN git clone --depth=1 https://github.com/pyenv/pyenv.git ${HOME}/.pyenv
ENV PYENV_ROOT="${HOME}/.pyenv"
ENV PATH="${PYENV_ROOT}/shims:${PYENV_ROOT}/bin:${PATH}"
ENV PYTHON_VERSION=2.7.18
RUN pyenv install ${PYTHON_VERSION}; pyenv global ${PYTHON_VERSION}
RUN python -m pip install virtualenv; python -m virtualenv .venv
RUN /usr/bin/env bash -c 'source .venv/bin/activate && pip install -r ./requirements.txt'
# RUN eval "$(pyenv init - bash)"; pyenv install 2.7; pyenv shell 2.7; python -m pip install virtualenv; python -m virtualenv .venv; source .venv/bin/activate; pip install -r requirements.txt
# FROM debian:trixie
#
# WORKDIR /app
#
# COPY --from=build /app/.venv /app/.venv
WORKDIR /app/.venv/lib/python2.7/site-packages
RUN /usr/bin/env bash -c 'apt-get install -y wget && wget https://community-static.aldebaran.com/resources/2.5.10/Python%20SDK/pynaoqi-python2.7-2.5.7.1-linux64.tar.gz && tar xvfz ./pynaoqi-python2.7-2.5.7.1-linux64.tar.gz && rm pynaoqi-python2.7-2.5.7.1-linux64.tar.gz'
RUN echo /app/.venv/lib/python2.7/site-packages/pynaoqi-python2.7-2.5.7.1-linux64/lib/python2.7/site-packages/ > pynaoqi-python2.7.pth
WORKDIR /app
COPY . .
ENV PYTHONPATH=src
CMD [ "/bin/bash", "-c", "source .venv/bin/activate && python -m robot_interface.main --qi-url tcp://172.17.0.1:43305" ]

View File

@@ -34,18 +34,6 @@ python -m virtualenv .venv
source .venv/bin/activate source .venv/bin/activate
``` ```
We depend on PortAudio for the `pyaudio` package, so install it with:
```bash
sudo apt install -y portaudio19-dev
```
On WSL, also install:
```bash
sudo apt install -y libasound2-plugins
```
Install the required packages with Install the required packages with
```bash ```bash
@@ -110,8 +98,6 @@ $env:PYTHONPATH="src"; python -m robot_interface.main
With both, if you want to connect to the actual robot (or simulator), pass the `--qi-url` argument. With both, if you want to connect to the actual robot (or simulator), pass the `--qi-url` argument.
There's also a `--microphone` argument that can be used to choose a microphone to use. If not given, the program will try the default microphone. If you don't know the name of the microphone, pass the argument with any value, and it will list the names of available microphones.
## Testing ## Testing

View File

@@ -1,9 +0,0 @@
pcm.!default {
type hw
card 2
}
ctl.!default {
type hw
card 2
}

View File

@@ -1,4 +0,0 @@
#!/usr/bin/env sh
apk add portaudio-dev libzmq gcc musl-dev g++ alsa-utils
pip install -r requirements.txt

View File

@@ -1,4 +1,3 @@
from __future__ import unicode_literals # So that we can log texts with Unicode characters
import logging import logging
import zmq import zmq
@@ -22,6 +21,7 @@ class ActuationReceiver(ReceiverBase):
self.create_socket(zmq_context, zmq.SUB, port) self.create_socket(zmq_context, zmq.SUB, port)
self.socket.setsockopt_string(zmq.SUBSCRIBE, u"") # Causes block if given in options self.socket.setsockopt_string(zmq.SUBSCRIBE, u"") # Causes block if given in options
self._tts_service = None self._tts_service = None
self._al_memory = None
def _handle_speech(self, message): def _handle_speech(self, message):
text = message.get("data") text = message.get("data")
@@ -41,10 +41,26 @@ class ActuationReceiver(ReceiverBase):
if not self._tts_service: if not self._tts_service:
self._tts_service = state.qi_session.service("ALTextToSpeech") self._tts_service = state.qi_session.service("ALTextToSpeech")
if not self._al_memory:
self._al_memory = state.qi_session.service("ALMemory")
# Subscribe to speech end event
self.status_subscriber = self._al_memory.subscriber("ALTextToSpeech/Status") # self because garbage collect
self.status_subscriber.signal.connect(self._on_status_changed)
# Returns instantly. Messages received while speaking will be queued. # Returns instantly. Messages received while speaking will be queued.
qi.async(self._tts_service.say, text) qi.async(self._tts_service.say, text)
@staticmethod
def _on_status_changed(value): # value will contain either 'enqueued', 'started' or 'done' depending on the status
"""Callback function for when the speaking status changes. Will change the is_speaking value of the state."""
if "started" in value:
logging.debug("Started speaking.")
state.is_speaking = True
if "done" in value:
logging.debug("Done speaking.")
state.is_speaking = False
def handle_message(self, message): def handle_message(self, message):
if message["endpoint"] == "actuate/speech": if message["endpoint"] == "actuate/speech":
self._handle_speech(message) self._handle_speech(message)

View File

@@ -7,7 +7,7 @@ import zmq
from robot_interface.endpoints.socket_base import SocketBase from robot_interface.endpoints.socket_base import SocketBase
from robot_interface.state import state from robot_interface.state import state
from robot_interface.utils.microphone import choose_mic from robot_interface.utils.microphone import choose_mic_default
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -17,15 +17,9 @@ class AudioSender(SocketBase):
def __init__(self, zmq_context, port=5558): def __init__(self, zmq_context, port=5558):
super(AudioSender, self).__init__(str("audio")) # Convert future's unicode_literal to str super(AudioSender, self).__init__(str("audio")) # Convert future's unicode_literal to str
self.create_socket(zmq_context, zmq.PUB, port) self.create_socket(zmq_context, zmq.PUB, port)
self.thread = None
try:
self.audio = pyaudio.PyAudio() self.audio = pyaudio.PyAudio()
self.microphone = choose_mic(self.audio) self.microphone = choose_mic_default(self.audio)
except IOError as e: self.thread = None
logger.warning("PyAudio is not available.", exc_info=e)
self.audio = None
self.microphone = None
def start(self): def start(self):
""" """
@@ -63,6 +57,13 @@ class AudioSender(SocketBase):
try: try:
while not state.exit_event.is_set(): while not state.exit_event.is_set():
# Don't send audio if Pepper is speaking
if state.is_speaking:
if stream.is_active(): stream.stop_stream()
continue
if stream.is_stopped(): stream.start_stream()
data = stream.read(chunk) data = stream.read(chunk)
self.socket.send(data) self.socket.send(data)
except IOError as e: except IOError as e:

View File

@@ -45,10 +45,7 @@ class MainReceiver(ReceiverBase):
if message["endpoint"] == "negotiate/ports": if message["endpoint"] == "negotiate/ports":
return MainReceiver._handle_port_negotiation(message) return MainReceiver._handle_port_negotiation(message)
return { return {"endpoint": "negotiate/error", "data": "The requested endpoint is not implemented."}
"endpoint": "negotiate/error",
"data": "The requested endpoint is not implemented.",
}
def handle_message(self, message): def handle_message(self, message):
if message["endpoint"] == "ping": if message["endpoint"] == "ping":

View File

@@ -1,5 +1,6 @@
from abc import ABCMeta from abc import ABCMeta
import os
import zmq
class SocketBase(object): class SocketBase(object):
@@ -18,7 +19,7 @@ class SocketBase(object):
self.socket = None # Set later by `create_socket` self.socket = None # Set later by `create_socket`
self.bound = None # Set later by `create_socket` self.bound = None # Set later by `create_socket`
def create_socket(self, zmq_context, socket_type, port, options=[], bind=False): def create_socket(self, zmq_context, socket_type, port, options=[], bind=True):
""" """
Create a ZeroMQ socket. Create a ZeroMQ socket.
@@ -42,19 +43,17 @@ class SocketBase(object):
self.socket = zmq_context.socket(socket_type) self.socket = zmq_context.socket(socket_type)
for option, arg in options: for option, arg in options:
self.socket.setsockopt(option, arg) self.socket.setsockopt(option,arg)
self.bound = bind self.bound = bind
host = os.environ.get("CB_HOST", "localhost")
if bind: if bind:
self.socket.bind("tcp://{}:{}".format(host, port)) self.socket.bind("tcp://*:{}".format(port))
else: else:
self.socket.connect("tcp://{}:{}".format(host, port)) self.socket.connect("tcp://localhost:{}".format(port))
def close(self): def close(self):
"""Close the ZeroMQ socket.""" """Close the ZeroMQ socket."""
if not self.socket: if not self.socket: return
return
self.socket.close() self.socket.close()
self.socket = None self.socket = None
@@ -66,4 +65,8 @@ class SocketBase(object):
https://utrechtuniversity.youtrack.cloud/articles/N25B-A-14/RI-CB-Communication#negotiation https://utrechtuniversity.youtrack.cloud/articles/N25B-A-14/RI-CB-Communication#negotiation
:rtype: dict :rtype: dict
""" """
return {"id": self.identifier, "port": self.port, "bind": not self.bound} return {
"id": self.identifier,
"port": self.port,
"bind": not self.bound
}

View File

@@ -1,5 +1,6 @@
import zmq import zmq
import threading import threading
import qi
import logging import logging
from robot_interface.endpoints.socket_base import SocketBase from robot_interface.endpoints.socket_base import SocketBase

View File

@@ -43,8 +43,19 @@ def main_loop(context):
logging.debug("Starting main loop.") logging.debug("Starting main loop.")
import schedule
test_speaking_message = {"data": "Hi, my name is Pepper, and this is quite a long message."}
def test_speak():
logging.debug("Testing speech.")
actuation_receiver._handle_speech(test_speaking_message)
schedule.every(10).seconds.do(test_speak)
while True: while True:
if state.exit_event.is_set(): break if state.exit_event.is_set(): break
schedule.run_pending()
socks = dict(poller.poll(100)) socks = dict(poller.poll(100))
for receiver in receivers: for receiver in receivers:

View File

@@ -18,6 +18,7 @@ class State(object):
self.exit_event = None self.exit_event = None
self.sockets = [] # type: List[SocketBase] self.sockets = [] # type: List[SocketBase]
self.qi_session = None # type: None | ssl.SSLSession self.qi_session = None # type: None | ssl.SSLSession
self.is_speaking = False # type: Boolean
def initialize(self): def initialize(self):
if self.is_initialized: if self.is_initialized:

View File

@@ -1,6 +1,5 @@
from __future__ import unicode_literals # So that `print` can print Unicode characters in names from __future__ import unicode_literals # So that `print` can print Unicode characters in names
import logging import logging
import sys
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -68,53 +67,3 @@ def choose_mic_default(audio):
return audio.get_default_input_device_info() return audio.get_default_input_device_info()
except IOError: except IOError:
return None return None
def choose_mic_arguments(audio):
"""
Get a microphone to use from command line arguments.
:param audio: An instance of PyAudio to use.
:type audio: pyaudio.PyAudio
:return: A dictionary from PyAudio containing information about the microphone to use, or None
if there is no microphone satisfied by the arguments.
:rtype: dict | None
"""
microphone_name = None
for i, arg in enumerate(sys.argv):
if arg == "--microphone" and len(sys.argv) > i+1:
microphone_name = sys.argv[i+1].strip()
if arg.startswith("--microphone="):
microphone_name = arg[13:].strip()
if not microphone_name: return None
available_mics = list(get_microphones(audio))
for mic in available_mics:
if mic["name"] == microphone_name:
return mic
available_mic_names = [mic["name"] for mic in available_mics]
logger.warning("Microphone \"{}\" not found. Choose one of {}"
.format(microphone_name, available_mic_names))
return None
def choose_mic(audio):
"""
Get a microphone to use. Firstly, tries to see if there's an application argument specifying the
microphone to use. If not, get the default microphone.
:param audio: An instance of PyAudio to use.
:type audio: pyaudio.PyAudio
:return: A dictionary from PyAudio containing information about the microphone to use, or None
if there is no microphone.
:rtype: dict | None
"""
chosen_mic = choose_mic_arguments(audio)
if chosen_mic: return chosen_mic
return choose_mic_default(audio)

View File

@@ -1,15 +1,8 @@
from __future__ import unicode_literals # So that we can format strings with Unicode characters
import random import random
import sys import sys
from StringIO import StringIO from StringIO import StringIO
from robot_interface.utils.microphone import ( from robot_interface.utils.microphone import choose_mic_default, choose_mic_interactive, get_microphones
choose_mic_default,
choose_mic_interactive,
choose_mic_arguments,
choose_mic,
get_microphones,
)
class MicrophoneUtils(object): class MicrophoneUtils(object):
@@ -100,53 +93,3 @@ class MicrophoneUtils(object):
assert "index" in result assert "index" in result
assert isinstance(result["index"], (int, long)) assert isinstance(result["index"], (int, long))
assert result["index"] == microphones[random_index]["index"] assert result["index"] == microphones[random_index]["index"]
def test_choose_mic_no_arguments(self, pyaudio_instance, mocker):
mocker.patch.object(sys, "argv", [])
result = choose_mic_arguments(pyaudio_instance)
assert result is None
def test_choose_mic_arguments(self, pyaudio_instance, mocker):
for mic in get_microphones(pyaudio_instance):
mocker.patch.object(sys, "argv", ["--microphone", mic["name"]])
result = choose_mic_arguments(pyaudio_instance)
assert result is not None
assert result == mic
def test_choose_mic_arguments_eq(self, pyaudio_instance, mocker):
for mic in get_microphones(pyaudio_instance):
mocker.patch.object(sys, "argv", ["--microphone={}".format(mic["name"])])
result = choose_mic_arguments(pyaudio_instance)
assert result is not None
assert result == mic
def test_choose_mic_arguments_not_exits(self, pyaudio_instance, mocker):
mocker.patch.object(sys, "argv", ["--microphone", "Surely this microphone doesn't exist"])
result = choose_mic_arguments(pyaudio_instance)
assert result is None
def test_choose_mic_with_argument(self, pyaudio_instance, mocker):
mic = next(get_microphones(pyaudio_instance))
mocker.patch.object(sys, "argv", ["--microphone", mic["name"]])
result = choose_mic(pyaudio_instance)
assert result is not None
assert result == mic
def test_choose_mic_no_argument(self, pyaudio_instance, mocker):
default_mic = choose_mic_default(pyaudio_instance)
mocker.patch.object(sys, "argv", [])
result = choose_mic(pyaudio_instance)
assert result is not None
assert result == default_mic

View File

@@ -17,7 +17,7 @@ def zmq_context():
def test_no_microphone(zmq_context, mocker): def test_no_microphone(zmq_context, mocker):
mock_info_logger = mocker.patch("robot_interface.endpoints.audio_sender.logger.info") mock_info_logger = mocker.patch("robot_interface.endpoints.audio_sender.logger.info")
mock_choose_mic = mocker.patch("robot_interface.endpoints.audio_sender.choose_mic") mock_choose_mic = mocker.patch("robot_interface.endpoints.audio_sender.choose_mic_default")
mock_choose_mic.return_value = None mock_choose_mic.return_value = None
sender = AudioSender(zmq_context) sender = AudioSender(zmq_context)
@@ -32,7 +32,7 @@ def test_no_microphone(zmq_context, mocker):
def test_unicode_mic_name(zmq_context, mocker): def test_unicode_mic_name(zmq_context, mocker):
mocker.patch("robot_interface.endpoints.audio_sender.threading") mocker.patch("robot_interface.endpoints.audio_sender.threading")
mock_choose_mic = mocker.patch("robot_interface.endpoints.audio_sender.choose_mic") mock_choose_mic = mocker.patch("robot_interface.endpoints.audio_sender.choose_mic_default")
mock_choose_mic.return_value = {"name": u"• Some Unicode name"} mock_choose_mic.return_value = {"name": u"• Some Unicode name"}
sender = AudioSender(zmq_context) sender = AudioSender(zmq_context)
@@ -51,7 +51,7 @@ def _fake_read(num_frames):
def test_sending_audio(mocker): def test_sending_audio(mocker):
mock_choose_mic = mocker.patch("robot_interface.endpoints.audio_sender.choose_mic") mock_choose_mic = mocker.patch("robot_interface.endpoints.audio_sender.choose_mic_default")
mock_choose_mic.return_value = {"name": u"Some mic", "index": 0L} mock_choose_mic.return_value = {"name": u"Some mic", "index": 0L}
mock_state = mocker.patch("robot_interface.endpoints.audio_sender.state") mock_state = mocker.patch("robot_interface.endpoints.audio_sender.state")
@@ -80,7 +80,7 @@ def _fake_read_error(num_frames):
def test_break_microphone(mocker): def test_break_microphone(mocker):
mock_choose_mic = mocker.patch("robot_interface.endpoints.audio_sender.choose_mic") mock_choose_mic = mocker.patch("robot_interface.endpoints.audio_sender.choose_mic_default")
mock_choose_mic.return_value = {"name": u"Some mic", "index": 0L} mock_choose_mic.return_value = {"name": u"Some mic", "index": 0L}
mock_state = mocker.patch("robot_interface.endpoints.audio_sender.state") mock_state = mocker.patch("robot_interface.endpoints.audio_sender.state")