Merge remote-tracking branch 'origin/dev' into feat/agentspeak-generation

2025-12-17 13:20:14 +01:00
parent e704ec5ed4 3e7f2ef574
commit 57fe3ae3f6
34 changed files with 2172 additions and 81 deletions
--- a/src/control_backend/agents/actuation/init.py
+++ b/src/control_backend/agents/actuation/init.py
@@ -1 +1,2 @@
+from .robot_gesture_agent import RobotGestureAgent as RobotGestureAgent
 from .robot_speech_agent import RobotSpeechAgent as RobotSpeechAgent
--- a/src/control_backend/agents/actuation/robot_gesture_agent.py
+++ b/src/control_backend/agents/actuation/robot_gesture_agent.py
@@ -0,0 +1,162 @@
+import json
+
+import zmq
+import zmq.asyncio as azmq
+
+from control_backend.agents import BaseAgent
+from control_backend.core.agent_system import InternalMessage
+from control_backend.core.config import settings
+from control_backend.schemas.ri_message import GestureCommand, RIEndpoint
+
+
+class RobotGestureAgent(BaseAgent):
+    """
+    This agent acts as a bridge between the control backend and the Robot Interface (RI).
+    It receives gesture commands from other agents or from the UI,
+    and forwards them to the robot via a ZMQ PUB socket.
+
+    :ivar subsocket: ZMQ SUB socket for receiving external commands (e.g., from UI).
+    :ivar pubsocket: ZMQ PUB socket for sending commands to the Robot Interface.
+    :ivar address: Address to bind/connect the PUB socket.
+    :ivar bind: Whether to bind or connect the PUB socket.
+    :ivar gesture_data: A list of strings for available gestures
+    """
+
+    subsocket: azmq.Socket
+    repsocket: azmq.Socket
+    pubsocket: azmq.Socket
+    address = ""
+    bind = False
+    gesture_data = []
+
+    def __init__(
+        self,
+        name: str,
+        address=settings.zmq_settings.ri_command_address,
+        bind=False,
+        gesture_data=None,
+    ):
+        self.gesture_data = gesture_data or []
+        super().__init__(name)
+        self.address = address
+        self.bind = bind
+
+    async def setup(self):
+        """
+        Initialize the agent.
+
+        1. Sets up the PUB socket to talk to the robot.
+        2. Sets up the SUB socket to listen for "command" topics (from UI/External).
+        3. Starts the loop for handling ZMQ commands.
+        """
+        self.logger.info("Setting up %s", self.name)
+
+        context = azmq.Context.instance()
+
+        # To the robot
+        self.pubsocket = context.socket(zmq.PUB)
+        if self.bind:
+            self.pubsocket.bind(self.address)
+        else:
+            self.pubsocket.connect(self.address)
+
+        # Receive internal topics regarding commands
+        self.subsocket = context.socket(zmq.SUB)
+        self.subsocket.connect(settings.zmq_settings.internal_sub_address)
+        self.subsocket.setsockopt(zmq.SUBSCRIBE, b"command")
+        self.subsocket.setsockopt(zmq.SUBSCRIBE, b"send_gestures")
+
+        # REP socket for replying to gesture requests
+        self.repsocket = context.socket(zmq.REP)
+        self.repsocket.bind(settings.zmq_settings.internal_gesture_rep_adress)
+
+        self.add_behavior(self._zmq_command_loop())
+        self.add_behavior(self._fetch_gestures_loop())
+
+        self.logger.info("Finished setting up %s", self.name)
+
+    async def stop(self):
+        if self.subsocket:
+            self.subsocket.close()
+        if self.pubsocket:
+            self.pubsocket.close()
+        await super().stop()
+
+    async def handle_message(self, msg: InternalMessage):
+        """
+        Handle commands received from other internal Python agents.
+
+        Validates the message as a :class:`GestureCommand` and forwards it to the robot.
+
+        :param msg: The internal message containing the command.
+        """
+        try:
+            gesture_command = GestureCommand.model_validate_json(msg.body)
+            if gesture_command.endpoint == RIEndpoint.GESTURE_TAG:
+                if gesture_command.data not in self.gesture_data:
+                    self.logger.warning(
+                        "Received gesture tag '%s' which is not in available tags. Early returning",
+                        gesture_command.data,
+                    )
+                    return
+
+            await self.pubsocket.send_json(gesture_command.model_dump())
+        except Exception:
+            self.logger.exception("Error processing internal message.")
+
+    async def _zmq_command_loop(self):
+        """
+        Loop to handle commands received via ZMQ (e.g., from the UI).
+
+        Listens on the 'command' topic, validates the JSON and forwards it to the robot.
+        """
+        while self._running:
+            try:
+                topic, body = await self.subsocket.recv_multipart()
+
+                # Don't process send_gestures here
+                if topic != b"command":
+                    continue
+
+                body = json.loads(body)
+                gesture_command = GestureCommand.model_validate(body)
+                if gesture_command.endpoint == RIEndpoint.GESTURE_TAG:
+                    if gesture_command.data not in self.gesture_data:
+                        self.logger.warning(
+                            "Received gesture tag '%s' which is not in available tags.\
+                            Early returning",
+                            gesture_command.data,
+                        )
+                        continue
+                await self.pubsocket.send_json(gesture_command.model_dump())
+            except Exception:
+                self.logger.exception("Error processing ZMQ message.")
+
+    async def _fetch_gestures_loop(self):
+        """
+        Loop to handle fetching gestures received via ZMQ (e.g., from the UI).
+
+        Listens on the 'send_gestures' topic, and returns a list on the get_gestures topic.
+        """
+        while self._running:
+            try:
+                # Get a request
+                body = await self.repsocket.recv()
+
+                # Figure out amount, if specified
+                try:
+                    body = json.loads(body)
+                except json.JSONDecodeError:
+                    body = None
+
+                amount = None
+                if isinstance(body, int):
+                    amount = body
+
+                # Fetch tags from gesture data and respond
+                tags = self.gesture_data[:amount] if amount else self.gesture_data
+                response = json.dumps({"tags": tags}).encode()
+                await self.repsocket.send(response)
+
+            except Exception:
+                self.logger.exception("Error fetching gesture tags.")
--- a/src/control_backend/agents/actuation/robot_speech_agent.py
+++ b/src/control_backend/agents/actuation/robot_speech_agent.py
@@ -29,7 +29,7 @@ class RobotSpeechAgent(BaseAgent):
    def __init__(
        self,
        name: str,
-        address=settings.zmq_settings.ri_command_address,
+        address: str,
        bind=False,
    ):
        super().__init__(name)
--- a/src/control_backend/agents/communication/ri_communication_agent.py
+++ b/src/control_backend/agents/communication/ri_communication_agent.py
@@ -6,9 +6,11 @@ import zmq.asyncio as azmq
 from zmq.asyncio import Context

 from control_backend.agents import BaseAgent
+from control_backend.agents.actuation.robot_gesture_agent import RobotGestureAgent
 from control_backend.core.config import settings

 from ..actuation.robot_speech_agent import RobotSpeechAgent
+from ..perception import VADAgent


 class RICommunicationAgent(BaseAgent):
@@ -179,12 +181,24 @@ class RICommunicationAgent(BaseAgent):
                        else:
                            self._req_socket.bind(addr)
                case "actuation":
-                    ri_commands_agent = RobotSpeechAgent(
+                    gesture_data = port_data.get("gestures", [])
+                    robot_speech_agent = RobotSpeechAgent(
                        settings.agent_settings.robot_speech_name,
                        address=addr,
                        bind=bind,
                    )
-                    await ri_commands_agent.start()
+                    robot_gesture_agent = RobotGestureAgent(
+                        settings.agent_settings.robot_gesture_name,
+                        address=addr,
+                        bind=bind,
+                        gesture_data=gesture_data,
+                    )
+                    await robot_speech_agent.start()
+                    await asyncio.sleep(0.1)  # Small delay
+                    await robot_gesture_agent.start()
+                case "audio":
+                    vad_agent = VADAgent(audio_in_address=addr, audio_in_bind=bind)
+                    await vad_agent.start()
                case _:
                    self.logger.warning("Unhandled negotiation id: %s", id)

--- a/src/control_backend/agents/llm/llm_agent.py
+++ b/src/control_backend/agents/llm/llm_agent.py
@@ -125,7 +125,7 @@ class LLMAgent(BaseAgent):
                full_message += token
                current_chunk += token

-                self.logger.info(
+                self.logger.llm(
                    "Received token: %s",
                    full_message,
                    extra={"reference": message_id},  # Used in the UI to update old logs
--- a/src/control_backend/agents/perception/vad_agent.py
+++ b/src/control_backend/agents/perception/vad_agent.py
@@ -8,6 +8,7 @@ import zmq.asyncio as azmq
 from control_backend.agents import BaseAgent
 from control_backend.core.config import settings

+from ...schemas.program_status import PROGRAM_STATUS, ProgramStatus
 from .transcription_agent.transcription_agent import TranscriptionAgent


@@ -61,6 +62,7 @@ class VADAgent(BaseAgent):
    :ivar audio_in_address: Address of the input audio stream.
    :ivar audio_in_bind: Whether to bind or connect to the input address.
    :ivar audio_out_socket: ZMQ PUB socket for sending speech fragments.
+    :ivar program_sub_socket: ZMQ SUB socket for receiving program status updates.
    """

    def __init__(self, audio_in_address: str, audio_in_bind: bool):
@@ -79,6 +81,8 @@ class VADAgent(BaseAgent):
        self.audio_out_socket: azmq.Socket | None = None
        self.audio_in_poller: SocketPoller | None = None

+        self.program_sub_socket: azmq.Socket | None = None
+
        self.audio_buffer = np.array([], dtype=np.float32)
        self.i_since_speech = settings.behaviour_settings.vad_initial_since_speech
        self._ready = asyncio.Event()
@@ -90,9 +94,10 @@ class VADAgent(BaseAgent):

        1. Connects audio input socket.
        2. Binds audio output socket (random port).
-        3. Loads VAD model from Torch Hub.
-        4. Starts the streaming loop.
-        5. Instantiates and starts the :class:`TranscriptionAgent` with the output address.
+        3. Connects to program communication socket.
+        4. Loads VAD model from Torch Hub.
+        5. Starts the streaming loop.
+        6. Instantiates and starts the :class:`TranscriptionAgent` with the output address.
        """
        self.logger.info("Setting up %s", self.name)

@@ -105,6 +110,11 @@ class VADAgent(BaseAgent):
            return
        audio_out_address = f"tcp://localhost:{audio_out_port}"

+        # Connect to internal communication socket
+        self.program_sub_socket = azmq.Context.instance().socket(zmq.SUB)
+        self.program_sub_socket.connect(settings.zmq_settings.internal_sub_address)
+        self.program_sub_socket.subscribe(PROGRAM_STATUS)
+
        # Initialize VAD model
        try:
            self.model, _ = torch.hub.load(
@@ -117,10 +127,8 @@ class VADAgent(BaseAgent):
            await self.stop()
            return

-        # Warmup/reset
-        await self.reset_stream()
-
        self.add_behavior(self._streaming_loop())
+        self.add_behavior(self._status_loop())

        # Start agents dependent on the output audio fragments here
        transcriber = TranscriptionAgent(audio_out_address)
@@ -165,7 +173,7 @@ class VADAgent(BaseAgent):
            self.audio_out_socket = None
            return None

-    async def reset_stream(self):
+    async def _reset_stream(self):
        """
        Clears the ZeroMQ queue and sets ready state.
        """
@@ -176,6 +184,23 @@ class VADAgent(BaseAgent):
        self.logger.info(f"Discarded {discarded} audio packets before starting.")
        self._ready.set()

+    async def _status_loop(self):
+        """Loop for checking program status. Only start listening if program is RUNNING."""
+        while self._running:
+            topic, body = await self.program_sub_socket.recv_multipart()
+
+            if topic != PROGRAM_STATUS:
+                continue
+            if body != ProgramStatus.RUNNING.value:
+                continue
+
+            # Program is now running, we can start our stream
+            await self._reset_stream()
+
+            # We don't care about further status updates
+            self.program_sub_socket.close()
+            break
+
    async def _streaming_loop(self):
        """
        Main loop for processing audio stream.