Merge remote-tracking branch 'origin/dev' into feat/environment-variables

# Conflicts: # src/control_backend/core/config.py # test/unit/agents/actuation/test_robot_speech_agent.py
2025-12-29 12:35:39 +01:00
parent 0c682d6440 adbb7ffd5c
commit 5282c2471f
23 changed files with 1477 additions and 49 deletions
--- a/src/control_backend/agents/actuation/init.py
+++ b/src/control_backend/agents/actuation/init.py
@@ -1 +1,2 @@
+from .robot_gesture_agent import RobotGestureAgent as RobotGestureAgent
 from .robot_speech_agent import RobotSpeechAgent as RobotSpeechAgent
--- a/src/control_backend/agents/actuation/robot_gesture_agent.py
+++ b/src/control_backend/agents/actuation/robot_gesture_agent.py
@@ -0,0 +1,171 @@
+import json
+
+import zmq
+import zmq.asyncio as azmq
+
+from control_backend.agents import BaseAgent
+from control_backend.core.agent_system import InternalMessage
+from control_backend.core.config import settings
+from control_backend.schemas.ri_message import GestureCommand, RIEndpoint
+
+
+class RobotGestureAgent(BaseAgent):
+    """
+    This agent acts as a bridge between the control backend and the Robot Interface (RI).
+    It receives gesture commands from other agents or from the UI,
+    and forwards them to the robot via a ZMQ PUB socket.
+
+    :ivar subsocket: ZMQ SUB socket for receiving external commands (e.g., from UI).
+    :ivar pubsocket: ZMQ PUB socket for sending commands to the Robot Interface.
+    :ivar address: Address to bind/connect the PUB socket.
+    :ivar bind: Whether to bind or connect the PUB socket.
+    :ivar gesture_data: A list of strings for available gestures
+    """
+
+    subsocket: azmq.Socket
+    repsocket: azmq.Socket
+    pubsocket: azmq.Socket
+    address = ""
+    bind = False
+    gesture_data = []
+    single_gesture_data = []
+
+    def __init__(
+        self,
+        name: str,
+        address=settings.zmq_settings.ri_command_address,
+        bind=False,
+        gesture_data=None,
+        single_gesture_data=None,
+    ):
+        self.gesture_data = gesture_data or []
+        self.single_gesture_data = single_gesture_data or []
+        super().__init__(name)
+        self.address = address
+        self.bind = bind
+
+    async def setup(self):
+        """
+        Initialize the agent.
+
+        1. Sets up the PUB socket to talk to the robot.
+        2. Sets up the SUB socket to listen for "command" topics (from UI/External).
+        3. Starts the loop for handling ZMQ commands.
+        """
+        self.logger.info("Setting up %s", self.name)
+
+        context = azmq.Context.instance()
+
+        # To the robot
+        self.pubsocket = context.socket(zmq.PUB)
+        if self.bind:
+            self.pubsocket.bind(self.address)
+        else:
+            self.pubsocket.connect(self.address)
+
+        # Receive internal topics regarding commands
+        self.subsocket = context.socket(zmq.SUB)
+        self.subsocket.connect(settings.zmq_settings.internal_sub_address)
+        self.subsocket.setsockopt(zmq.SUBSCRIBE, b"command")
+        self.subsocket.setsockopt(zmq.SUBSCRIBE, b"send_gestures")
+
+        # REP socket for replying to gesture requests
+        self.repsocket = context.socket(zmq.REP)
+        self.repsocket.bind(settings.zmq_settings.internal_gesture_rep_adress)
+
+        self.add_behavior(self._zmq_command_loop())
+        self.add_behavior(self._fetch_gestures_loop())
+
+        self.logger.info("Finished setting up %s", self.name)
+
+    async def stop(self):
+        if self.subsocket:
+            self.subsocket.close()
+        if self.pubsocket:
+            self.pubsocket.close()
+        await super().stop()
+
+    async def handle_message(self, msg: InternalMessage):
+        """
+        Handle commands received from other internal Python agents.
+
+        Validates the message as a :class:`GestureCommand` and forwards it to the robot.
+
+        :param msg: The internal message containing the command.
+        """
+        try:
+            gesture_command = GestureCommand.model_validate_json(msg.body)
+            if gesture_command.endpoint == RIEndpoint.GESTURE_TAG:
+                if gesture_command.data not in self.gesture_data:
+                    self.logger.warning(
+                        "Received gesture tag '%s' which is not in available tags. Early returning",
+                        gesture_command.data,
+                    )
+                    return
+            elif gesture_command.endpoint == RIEndpoint.GESTURE_SINGLE:
+                if gesture_command.data not in self.single_gesture_data:
+                    self.logger.warning(
+                        "Received gesture '%s' which is not in available gestures. Early returning",
+                        gesture_command.data,
+                    )
+                    return
+            await self.pubsocket.send_json(gesture_command.model_dump())
+        except Exception:
+            self.logger.exception("Error processing internal message.")
+
+    async def _zmq_command_loop(self):
+        """
+        Loop to handle commands received via ZMQ (e.g., from the UI).
+
+        Listens on the 'command' topic, validates the JSON and forwards it to the robot.
+        """
+        while self._running:
+            try:
+                topic, body = await self.subsocket.recv_multipart()
+
+                # Don't process send_gestures here
+                if topic != b"command":
+                    continue
+
+                body = json.loads(body)
+                gesture_command = GestureCommand.model_validate(body)
+                if gesture_command.endpoint == RIEndpoint.GESTURE_TAG:
+                    if gesture_command.data not in self.gesture_data:
+                        self.logger.warning(
+                            "Received gesture tag '%s' which is not in available tags.\
+                            Early returning",
+                            gesture_command.data,
+                        )
+                        continue
+                await self.pubsocket.send_json(gesture_command.model_dump())
+            except Exception:
+                self.logger.exception("Error processing ZMQ message.")
+
+    async def _fetch_gestures_loop(self):
+        """
+        Loop to handle fetching gestures received via ZMQ (e.g., from the UI).
+
+        Listens on the 'send_gestures' topic, and returns a list on the get_gestures topic.
+        """
+        while self._running:
+            try:
+                # Get a request
+                body = await self.repsocket.recv()
+
+                # Figure out amount, if specified
+                try:
+                    body = json.loads(body)
+                except json.JSONDecodeError:
+                    body = None
+
+                amount = None
+                if isinstance(body, int):
+                    amount = body
+
+                # Fetch tags from gesture data and respond
+                tags = self.gesture_data[:amount] if amount else self.gesture_data
+                response = json.dumps({"tags": tags}).encode()
+                await self.repsocket.send(response)
+
+            except Exception:
+                self.logger.exception("Error fetching gesture tags.")
--- a/src/control_backend/agents/communication/ri_communication_agent.py
+++ b/src/control_backend/agents/communication/ri_communication_agent.py
@@ -6,6 +6,7 @@ import zmq.asyncio as azmq
 from zmq.asyncio import Context

 from control_backend.agents import BaseAgent
+from control_backend.agents.actuation.robot_gesture_agent import RobotGestureAgent
 from control_backend.core.config import settings

 from ..actuation.robot_speech_agent import RobotSpeechAgent
@@ -180,12 +181,23 @@ class RICommunicationAgent(BaseAgent):
                        else:
                            self._req_socket.bind(addr)
                case "actuation":
-                    ri_commands_agent = RobotSpeechAgent(
+                    gesture_data = port_data.get("gestures", [])
+                    single_gesture_data = port_data.get("single_gestures", [])
+                    robot_speech_agent = RobotSpeechAgent(
                        settings.agent_settings.robot_speech_name,
                        address=addr,
                        bind=bind,
                    )
-                    await ri_commands_agent.start()
+                    robot_gesture_agent = RobotGestureAgent(
+                        settings.agent_settings.robot_gesture_name,
+                        address=addr,
+                        bind=bind,
+                        gesture_data=gesture_data,
+                        single_gesture_data=single_gesture_data,
+                    )
+                    await robot_speech_agent.start()
+                    await asyncio.sleep(0.1)  # Small delay
+                    await robot_gesture_agent.start()
                case "audio":
                    vad_agent = VADAgent(audio_in_address=addr, audio_in_bind=bind)
                    await vad_agent.start()
--- a/src/control_backend/agents/llm/llm_agent.py
+++ b/src/control_backend/agents/llm/llm_agent.py
@@ -125,7 +125,7 @@ class LLMAgent(BaseAgent):
                full_message += token
                current_chunk += token

-                self.logger.info(
+                self.logger.llm(
                    "Received token: %s",
                    full_message,
                    extra={"reference": message_id},  # Used in the UI to update old logs
--- a/src/control_backend/agents/user_interrupt/init.py
+++ b/src/control_backend/agents/user_interrupt/init.py
--- a/src/control_backend/agents/user_interrupt/user_interrupt_agent.py
+++ b/src/control_backend/agents/user_interrupt/user_interrupt_agent.py
@@ -0,0 +1,146 @@
+import json
+
+import zmq
+from zmq.asyncio import Context
+
+from control_backend.agents import BaseAgent
+from control_backend.core.agent_system import InternalMessage
+from control_backend.core.config import settings
+from control_backend.schemas.ri_message import GestureCommand, RIEndpoint, SpeechCommand
+
+
+class UserInterruptAgent(BaseAgent):
+    """
+    User Interrupt Agent.
+
+    This agent receives button_pressed events from the external HTTP API
+    (via ZMQ) and uses the associated context to trigger one of the following actions:
+
+    - Send a prioritized message to the `RobotSpeechAgent`
+    - Send a prioritized gesture to the `RobotGestureAgent`
+    - Send a belief override to the `BDIProgramManager`in order to activate a
+        trigger/conditional norm or complete a goal.
+
+    Prioritized actions clear the current RI queue before inserting the new item,
+    ensuring they are executed immediately after Pepper's current action has been fulfilled.
+
+    :ivar sub_socket: The ZMQ SUB socket used to receive user intterupts.
+    """
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self.sub_socket = None
+
+    async def _receive_button_event(self):
+        """
+        The behaviour of the UserInterruptAgent.
+        Continuous loop that receives button_pressed events from the button_pressed HTTP endpoint.
+        These events contain a type and a context.
+
+        These are the different types and contexts:
+        - type: "speech", context: string that the robot has to say.
+        - type: "gesture", context: single gesture name that the robot has to perform.
+        - type: "override", context: belief_id that overrides the goal/trigger/conditional norm.
+        """
+        while True:
+            topic, body = await self.sub_socket.recv_multipart()
+
+            try:
+                event_data = json.loads(body)
+                event_type = event_data.get("type")  # e.g., "speech", "gesture"
+                event_context = event_data.get("context")  # e.g., "Hello, I am Pepper!"
+            except json.JSONDecodeError:
+                self.logger.error("Received invalid JSON payload on topic %s", topic)
+                continue
+
+            if event_type == "speech":
+                await self._send_to_speech_agent(event_context)
+                self.logger.info(
+                    "Forwarded button press (speech) with context '%s' to RobotSpeechAgent.",
+                    event_context,
+                )
+            elif event_type == "gesture":
+                await self._send_to_gesture_agent(event_context)
+                self.logger.info(
+                    "Forwarded button press (gesture) with context '%s' to RobotGestureAgent.",
+                    event_context,
+                )
+            elif event_type == "override":
+                await self._send_to_program_manager(event_context)
+                self.logger.info(
+                    "Forwarded button press (override) with context '%s' to BDIProgramManager.",
+                    event_context,
+                )
+            else:
+                self.logger.warning(
+                    "Received button press with unknown type '%s' (context: '%s').",
+                    event_type,
+                    event_context,
+                )
+
+    async def _send_to_speech_agent(self, text_to_say: str):
+        """
+        method to send prioritized speech command to RobotSpeechAgent.
+
+        :param text_to_say: The string that the robot has to say.
+        """
+        cmd = SpeechCommand(data=text_to_say, is_priority=True)
+        out_msg = InternalMessage(
+            to=settings.agent_settings.robot_speech_name,
+            sender=self.name,
+            body=cmd.model_dump_json(),
+        )
+        await self.send(out_msg)
+
+    async def _send_to_gesture_agent(self, single_gesture_name: str):
+        """
+        method to send prioritized gesture command to RobotGestureAgent.
+
+        :param single_gesture_name: The gesture tag that the robot has to perform.
+        """
+        # the endpoint is set to always be GESTURE_SINGLE for user interrupts
+        cmd = GestureCommand(
+            endpoint=RIEndpoint.GESTURE_SINGLE, data=single_gesture_name, is_priority=True
+        )
+        out_msg = InternalMessage(
+            to=settings.agent_settings.robot_gesture_name,
+            sender=self.name,
+            body=cmd.model_dump_json(),
+        )
+        await self.send(out_msg)
+
+    async def _send_to_program_manager(self, belief_id: str):
+        """
+        Send a button_override belief to the BDIProgramManager.
+
+        :param belief_id: The belief_id that overrides the goal/trigger/conditional norm.
+        this id can belong to a basic belief or an inferred belief.
+        See also: https://utrechtuniversity.youtrack.cloud/articles/N25B-A-27/UI-components
+        """
+        data = {"belief": belief_id}
+        message = InternalMessage(
+            to=settings.agent_settings.bdi_program_manager_name,
+            sender=self.name,
+            body=json.dumps(data),
+            thread="belief_override_id",
+        )
+        await self.send(message)
+        self.logger.info(
+            "Sent button_override belief with id '%s' to Program manager.",
+            belief_id,
+        )
+
+    async def setup(self):
+        """
+        Initialize the agent.
+
+        Connects the internal ZMQ SUB socket and subscribes to the 'button_pressed' topic.
+        Starts the background behavior to receive the user interrupts.
+        """
+        context = Context.instance()
+
+        self.sub_socket = context.socket(zmq.SUB)
+        self.sub_socket.connect(settings.zmq_settings.internal_sub_address)
+        self.sub_socket.subscribe("button_pressed")
+
+        self.add_behavior(self._receive_button_event())
--- a/src/control_backend/api/v1/endpoints/button_pressed.py
+++ b/src/control_backend/api/v1/endpoints/button_pressed.py
@@ -0,0 +1,31 @@
+import logging
+
+from fastapi import APIRouter, Request
+
+from control_backend.schemas.events import ButtonPressedEvent
+
+logger = logging.getLogger(__name__)
+router = APIRouter()
+
+
+@router.post("/button_pressed", status_code=202)
+async def receive_button_event(event: ButtonPressedEvent, request: Request):
+    """
+    Endpoint to handle external button press events.
+
+    Validates the event payload and publishes it to the internal 'button_pressed' topic.
+    Subscribers (in this case user_interrupt_agent) will pick this up to trigger
+    specific behaviors or state changes.
+
+    :param event: The parsed ButtonPressedEvent object.
+    :param request: The FastAPI request object.
+    """
+    logger.debug("Received button event: %s | %s", event.type, event.context)
+
+    topic = b"button_pressed"
+    body = event.model_dump_json().encode()
+
+    pub_socket = request.app.state.endpoints_pub_socket
+    await pub_socket.send_multipart([topic, body])
+
+    return {"status": "Event received"}
--- a/src/control_backend/api/v1/endpoints/robot.py
+++ b/src/control_backend/api/v1/endpoints/robot.py
@@ -8,15 +8,15 @@ from fastapi.responses import StreamingResponse
 from zmq.asyncio import Context, Socket

 from control_backend.core.config import settings
-from control_backend.schemas.ri_message import SpeechCommand
+from control_backend.schemas.ri_message import GestureCommand, SpeechCommand

 logger = logging.getLogger(__name__)

 router = APIRouter()


-@router.post("/command", status_code=202)
-async def receive_command(command: SpeechCommand, request: Request):
+@router.post("/command/speech", status_code=202)
+async def receive_command_speech(command: SpeechCommand, request: Request):
    """
    Send a direct speech command to the robot.

@@ -27,14 +27,32 @@ async def receive_command(command: SpeechCommand, request: Request):
    :param command: The speech command payload.
    :param request: The FastAPI request object.
    """
-    # Validate and retrieve data.
-    SpeechCommand.model_validate(command)
    topic = b"command"

    pub_socket: Socket = request.app.state.endpoints_pub_socket
    await pub_socket.send_multipart([topic, command.model_dump_json().encode()])

-    return {"status": "Command received"}
+    return {"status": "Speech command received"}
+
+
+@router.post("/command/gesture", status_code=202)
+async def receive_command_gesture(command: GestureCommand, request: Request):
+    """
+    Send a direct gesture command to the robot.
+
+    Publishes the command to the internal 'command' topic. The
+    :class:`~control_backend.agents.actuation.robot_speech_agent.RobotGestureAgent`
+    will forward this to the robot.
+
+    :param command: The speech command payload.
+    :param request: The FastAPI request object.
+    """
+    topic = b"command"
+
+    pub_socket: Socket = request.app.state.endpoints_pub_socket
+    await pub_socket.send_multipart([topic, command.model_dump_json().encode()])
+
+    return {"status": "Gesture command received"}


@router.get("/ping_check")
@@ -45,6 +63,41 @@ async def ping(request: Request):
    pass


+@router.get("/commands/gesture/tags")
+async def get_available_gesture_tags(request: Request, count=0):
+    """
+    Endpoint to retrieve the available gesture tags for the robot.
+
+    :param request: The FastAPI request object.
+    :return: A list of available gesture tags.
+    """
+    req_socket = Context.instance().socket(zmq.REQ)
+    req_socket.connect(settings.zmq_settings.internal_gesture_rep_adress)
+
+    # Check to see if we've got any count given in the query parameter
+    amount = count or None
+    timeout = 5  # seconds
+
+    await req_socket.send(f"{amount}".encode() if amount else b"None")
+    try:
+        body = await asyncio.wait_for(req_socket.recv(), timeout=timeout)
+    except TimeoutError:
+        body = '{"tags": []}'
+        logger.debug("Got timeout error fetching gestures.")
+
+    # Handle empty response and JSON decode errors
+    available_tags = []
+    if body:
+        try:
+            available_tags = json.loads(body).get("tags", [])
+        except json.JSONDecodeError as e:
+            logger.error(f"Failed to parse gesture tags JSON: {e}, body: {body}")
+            # Return empty list on JSON error
+            available_tags = []
+
+    return {"available_gesture_tags": available_tags}
+
+
@router.get("/ping_stream")
 async def ping_stream(request: Request):
    """
--- a/src/control_backend/api/v1/router.py
+++ b/src/control_backend/api/v1/router.py
@@ -1,6 +1,6 @@
 from fastapi.routing import APIRouter

-from control_backend.api.v1.endpoints import logs, message, program, robot, sse
+from control_backend.api.v1.endpoints import button_pressed, logs, message, program, robot, sse

 api_router = APIRouter()

@@ -13,3 +13,5 @@ api_router.include_router(robot.router, prefix="/robot", tags=["Pings", "Command
 api_router.include_router(logs.router, tags=["Logs"])

 api_router.include_router(program.router, tags=["Program"])
+
+api_router.include_router(button_pressed.router, tags=["Button Pressed Events"])
--- a/src/control_backend/core/config.py
+++ b/src/control_backend/core/config.py
@@ -26,6 +26,7 @@ class ZMQSettings(BaseModel):
    internal_pub_address: str = "tcp://localhost:5560"
    internal_sub_address: str = "tcp://localhost:5561"
    ri_communication_address: str = "tcp://*:5555"
+    internal_gesture_rep_adress: str = "tcp://localhost:7788"
    vad_pub_address: str = "inproc://vad_stream"


@@ -58,6 +59,8 @@ class AgentSettings(BaseModel):
    transcription_name: str = "transcription_agent"
    ri_communication_name: str = "ri_communication_agent"
    robot_speech_name: str = "robot_speech_agent"
+    robot_gesture_name: str = "robot_gesture_agent"
+    user_interrupt_name: str = "user_interrupt_agent"


 class BehaviourSettings(BaseModel):
--- a/src/control_backend/logging/setup_logging.py
+++ b/src/control_backend/logging/setup_logging.py
@@ -4,6 +4,7 @@ import os

 import yaml
 import zmq
+from zmq.log.handlers import PUBHandler

 from control_backend.core.config import settings

@@ -51,15 +52,27 @@ def setup_logging(path: str = ".logging_config.yaml") -> None:
                logging.warning(f"Could not load logging configuration: {e}")
                config = {}

-            if "custom_levels" in config:
-                for level_name, level_num in config["custom_levels"].items():
-                    add_logging_level(level_name, level_num)
+            custom_levels = config.get("custom_levels", {}) or {}
+            for level_name, level_num in custom_levels.items():
+                add_logging_level(level_name, level_num)

            if config.get("handlers") is not None and config.get("handlers").get("ui"):
                pub_socket = zmq.Context.instance().socket(zmq.PUB)
                pub_socket.connect(settings.zmq_settings.internal_pub_address)
                config["handlers"]["ui"]["interface_or_socket"] = pub_socket
+
            logging.config.dictConfig(config)

+            # Patch ZMQ PUBHandler to know about custom levels
+            if custom_levels:
+                for logger_name in ("control_backend",):
+                    logger = logging.getLogger(logger_name)
+                    for handler in logger.handlers:
+                        if isinstance(handler, PUBHandler):
+                            # Use the INFO formatter as the default template
+                            default_fmt = handler.formatters[logging.INFO]
+                            for level_num in custom_levels.values():
+                                handler.setFormatter(default_fmt, level=level_num)
+
    else:
        logging.warning("Logging config file not found. Using default logging configuration.")
--- a/src/control_backend/main.py
+++ b/src/control_backend/main.py
@@ -39,6 +39,9 @@ from control_backend.agents.communication import RICommunicationAgent
 # LLM Agents
 from control_backend.agents.llm import LLMAgent

+# User Interrupt Agent
+from control_backend.agents.user_interrupt.user_interrupt_agent import UserInterruptAgent
+
 # Other backend imports
 from control_backend.api.v1.router import api_router
 from control_backend.core.config import settings
@@ -138,6 +141,12 @@ async def lifespan(app: FastAPI):
                "name": settings.agent_settings.bdi_program_manager_name,
            },
        ),
+        "UserInterruptAgent": (
+            UserInterruptAgent,
+            {
+                "name": settings.agent_settings.user_interrupt_name,
+            },
+        ),
    }

    agents = []
--- a/src/control_backend/schemas/events.py
+++ b/src/control_backend/schemas/events.py
@@ -0,0 +1,6 @@
+from pydantic import BaseModel
+
+
+class ButtonPressedEvent(BaseModel):
+    type: str
+    context: str
--- a/src/control_backend/schemas/ri_message.py
+++ b/src/control_backend/schemas/ri_message.py
@@ -1,7 +1,7 @@
 from enum import Enum
-from typing import Any
+from typing import Any, Literal

-from pydantic import BaseModel
+from pydantic import BaseModel, model_validator


 class RIEndpoint(str, Enum):
@@ -10,6 +10,8 @@ class RIEndpoint(str, Enum):
    """

    SPEECH = "actuate/speech"
+    GESTURE_SINGLE = "actuate/gesture/single"
+    GESTURE_TAG = "actuate/gesture/tag"
    PING = "ping"
    NEGOTIATE_PORTS = "negotiate/ports"

@@ -36,3 +38,29 @@ class SpeechCommand(RIMessage):

    endpoint: RIEndpoint = RIEndpoint(RIEndpoint.SPEECH)
    data: str
+    is_priority: bool = False
+
+
+class GestureCommand(RIMessage):
+    """
+    A specific command to make the robot do a gesture.
+
+    :ivar endpoint: Should be ``RIEndpoint.GESTURE_SINGLE`` or ``RIEndpoint.GESTURE_TAG``.
+    :ivar data: The id of the gesture to be executed.
+    """
+
+    endpoint: Literal[  # pyright: ignore[reportIncompatibleVariableOverride] - We validate this stricter rule ourselves
+        RIEndpoint.GESTURE_SINGLE, RIEndpoint.GESTURE_TAG
+    ]
+    data: str
+    is_priority: bool = False
+
+    @model_validator(mode="after")
+    def check_endpoint(self):
+        allowed = {
+            RIEndpoint.GESTURE_SINGLE,
+            RIEndpoint.GESTURE_TAG,
+        }
+        if self.endpoint not in allowed:
+            raise ValueError("endpoint must be GESTURE_SINGLE or GESTURE_TAG")
+        return self