feat: implemented forced speech and speech queue

2026-01-14 14:26:38 +00:00
parent 1e77548622
commit e51cf8fe65
7 changed files with 345 additions and 33 deletions
--- a/src/robot_interface/endpoints/actuation_receiver.py
+++ b/src/robot_interface/endpoints/actuation_receiver.py
@@ -1,11 +1,12 @@
 from __future__ import unicode_literals  # So that we can log texts with Unicode characters
 import logging
+from threading import Thread

+import Queue
 import zmq

 from robot_interface.endpoints.receiver_base import ReceiverBase
 from robot_interface.state import state
-
 from robot_interface.core.config import settings
 from robot_interface.endpoints.gesture_settings import GestureTags

@@ -32,6 +33,9 @@ class ActuationReceiver(ReceiverBase):
        self.socket.setsockopt_string(zmq.SUBSCRIBE, u"")  # Causes block if given in options
        self._tts_service = None
        self._animation_service = None
+        self._message_queue = Queue.Queue()
+        self.message_thread = Thread(target=self._handle_messages)
+        self.message_thread.start()

    def _handle_speech(self, message):
        """
@@ -58,8 +62,26 @@ class ActuationReceiver(ReceiverBase):
        if not self._tts_service:
            self._tts_service = state.qi_session.service("ALTextToSpeech")

-        # Returns instantly. Messages received while speaking will be queued.
-        getattr(qi, "async")(self._tts_service.say, text)
+        if message.get("is_priority"):
+            # Bypass queue and speak immediately
+            self.clear_queue()
+            self._message_queue.put(text)
+            logging.debug("Force speaking immediately: {}".format(text))
+        else: 
+            self._message_queue.put(text)
+
+    def clear_queue(self):
+        """
+        Safely drains all pending messages from the queue.
+        """
+        logging.info("Message queue size: {}".format(self._message_queue.qsize()))
+        try:
+            while True:
+                # Remove items one by one without waiting
+                self._message_queue.get_nowait()
+        except Queue.Empty:
+            pass
+        logging.info("Message queue cleared.")

    def _handle_gesture(self, message, is_single):
        """
@@ -122,6 +144,19 @@ class ActuationReceiver(ReceiverBase):
        if message["endpoint"] == "actuate/gesture/single":
            self._handle_gesture(message, True)

+    def _handle_messages(self):
+        while not state.exit_event.is_set():
+            try:
+                text = self._message_queue.get(timeout=0.1)
+                state.is_speaking = True
+                self._tts_service.say(text)
+            except Queue.Empty:
+                state.is_speaking = False
+            except RuntimeError:
+                logging.error("Lost connection to Pepper. Please check if you're connected to the "
+                              "local WiFi and restart this application.")
+                state.exit_event.set()        
+
    def endpoint_description(self):
        """
        Extend the default endpoint description with gesture tags.
--- a/src/robot_interface/endpoints/audio_sender.py
+++ b/src/robot_interface/endpoints/audio_sender.py
@@ -89,6 +89,7 @@ class AudioSender(SocketBase):
        try:
            while not state.exit_event.is_set():
                data = stream.read(chunk)
+                if (state.is_speaking): continue  # Do not send audio while the robot is speaking
                self.socket.send(data)
        except IOError as e:
            logger.error("Stopped listening: failed to get audio from microphone.", exc_info=e)
--- a/src/robot_interface/endpoints/video_sender.py
+++ b/src/robot_interface/endpoints/video_sender.py
@@ -6,6 +6,7 @@ from robot_interface.endpoints.socket_base import SocketBase
 from robot_interface.state import state
 from robot_interface.core.config import settings

+
 class VideoSender(SocketBase):
    """
    Video sender endpoint, responsible for sending video frames.
--- a/src/robot_interface/state.py
+++ b/src/robot_interface/state.py
@@ -30,6 +30,7 @@ class State(object):
        self.exit_event = None
        self.sockets = []  
        self.qi_session = None 
+        self.is_speaking = False

    def initialize(self):
        """