Merge remote-tracking branch 'origin/dev' into feat/agentspeak-generation
This commit is contained in:
@@ -1 +1,2 @@
|
||||
from .robot_gesture_agent import RobotGestureAgent as RobotGestureAgent
|
||||
from .robot_speech_agent import RobotSpeechAgent as RobotSpeechAgent
|
||||
|
||||
162
src/control_backend/agents/actuation/robot_gesture_agent.py
Normal file
162
src/control_backend/agents/actuation/robot_gesture_agent.py
Normal file
@@ -0,0 +1,162 @@
|
||||
import json
|
||||
|
||||
import zmq
|
||||
import zmq.asyncio as azmq
|
||||
|
||||
from control_backend.agents import BaseAgent
|
||||
from control_backend.core.agent_system import InternalMessage
|
||||
from control_backend.core.config import settings
|
||||
from control_backend.schemas.ri_message import GestureCommand, RIEndpoint
|
||||
|
||||
|
||||
class RobotGestureAgent(BaseAgent):
|
||||
"""
|
||||
This agent acts as a bridge between the control backend and the Robot Interface (RI).
|
||||
It receives gesture commands from other agents or from the UI,
|
||||
and forwards them to the robot via a ZMQ PUB socket.
|
||||
|
||||
:ivar subsocket: ZMQ SUB socket for receiving external commands (e.g., from UI).
|
||||
:ivar pubsocket: ZMQ PUB socket for sending commands to the Robot Interface.
|
||||
:ivar address: Address to bind/connect the PUB socket.
|
||||
:ivar bind: Whether to bind or connect the PUB socket.
|
||||
:ivar gesture_data: A list of strings for available gestures
|
||||
"""
|
||||
|
||||
subsocket: azmq.Socket
|
||||
repsocket: azmq.Socket
|
||||
pubsocket: azmq.Socket
|
||||
address = ""
|
||||
bind = False
|
||||
gesture_data = []
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
name: str,
|
||||
address=settings.zmq_settings.ri_command_address,
|
||||
bind=False,
|
||||
gesture_data=None,
|
||||
):
|
||||
self.gesture_data = gesture_data or []
|
||||
super().__init__(name)
|
||||
self.address = address
|
||||
self.bind = bind
|
||||
|
||||
async def setup(self):
|
||||
"""
|
||||
Initialize the agent.
|
||||
|
||||
1. Sets up the PUB socket to talk to the robot.
|
||||
2. Sets up the SUB socket to listen for "command" topics (from UI/External).
|
||||
3. Starts the loop for handling ZMQ commands.
|
||||
"""
|
||||
self.logger.info("Setting up %s", self.name)
|
||||
|
||||
context = azmq.Context.instance()
|
||||
|
||||
# To the robot
|
||||
self.pubsocket = context.socket(zmq.PUB)
|
||||
if self.bind:
|
||||
self.pubsocket.bind(self.address)
|
||||
else:
|
||||
self.pubsocket.connect(self.address)
|
||||
|
||||
# Receive internal topics regarding commands
|
||||
self.subsocket = context.socket(zmq.SUB)
|
||||
self.subsocket.connect(settings.zmq_settings.internal_sub_address)
|
||||
self.subsocket.setsockopt(zmq.SUBSCRIBE, b"command")
|
||||
self.subsocket.setsockopt(zmq.SUBSCRIBE, b"send_gestures")
|
||||
|
||||
# REP socket for replying to gesture requests
|
||||
self.repsocket = context.socket(zmq.REP)
|
||||
self.repsocket.bind(settings.zmq_settings.internal_gesture_rep_adress)
|
||||
|
||||
self.add_behavior(self._zmq_command_loop())
|
||||
self.add_behavior(self._fetch_gestures_loop())
|
||||
|
||||
self.logger.info("Finished setting up %s", self.name)
|
||||
|
||||
async def stop(self):
|
||||
if self.subsocket:
|
||||
self.subsocket.close()
|
||||
if self.pubsocket:
|
||||
self.pubsocket.close()
|
||||
await super().stop()
|
||||
|
||||
async def handle_message(self, msg: InternalMessage):
|
||||
"""
|
||||
Handle commands received from other internal Python agents.
|
||||
|
||||
Validates the message as a :class:`GestureCommand` and forwards it to the robot.
|
||||
|
||||
:param msg: The internal message containing the command.
|
||||
"""
|
||||
try:
|
||||
gesture_command = GestureCommand.model_validate_json(msg.body)
|
||||
if gesture_command.endpoint == RIEndpoint.GESTURE_TAG:
|
||||
if gesture_command.data not in self.gesture_data:
|
||||
self.logger.warning(
|
||||
"Received gesture tag '%s' which is not in available tags. Early returning",
|
||||
gesture_command.data,
|
||||
)
|
||||
return
|
||||
|
||||
await self.pubsocket.send_json(gesture_command.model_dump())
|
||||
except Exception:
|
||||
self.logger.exception("Error processing internal message.")
|
||||
|
||||
async def _zmq_command_loop(self):
|
||||
"""
|
||||
Loop to handle commands received via ZMQ (e.g., from the UI).
|
||||
|
||||
Listens on the 'command' topic, validates the JSON and forwards it to the robot.
|
||||
"""
|
||||
while self._running:
|
||||
try:
|
||||
topic, body = await self.subsocket.recv_multipart()
|
||||
|
||||
# Don't process send_gestures here
|
||||
if topic != b"command":
|
||||
continue
|
||||
|
||||
body = json.loads(body)
|
||||
gesture_command = GestureCommand.model_validate(body)
|
||||
if gesture_command.endpoint == RIEndpoint.GESTURE_TAG:
|
||||
if gesture_command.data not in self.gesture_data:
|
||||
self.logger.warning(
|
||||
"Received gesture tag '%s' which is not in available tags.\
|
||||
Early returning",
|
||||
gesture_command.data,
|
||||
)
|
||||
continue
|
||||
await self.pubsocket.send_json(gesture_command.model_dump())
|
||||
except Exception:
|
||||
self.logger.exception("Error processing ZMQ message.")
|
||||
|
||||
async def _fetch_gestures_loop(self):
|
||||
"""
|
||||
Loop to handle fetching gestures received via ZMQ (e.g., from the UI).
|
||||
|
||||
Listens on the 'send_gestures' topic, and returns a list on the get_gestures topic.
|
||||
"""
|
||||
while self._running:
|
||||
try:
|
||||
# Get a request
|
||||
body = await self.repsocket.recv()
|
||||
|
||||
# Figure out amount, if specified
|
||||
try:
|
||||
body = json.loads(body)
|
||||
except json.JSONDecodeError:
|
||||
body = None
|
||||
|
||||
amount = None
|
||||
if isinstance(body, int):
|
||||
amount = body
|
||||
|
||||
# Fetch tags from gesture data and respond
|
||||
tags = self.gesture_data[:amount] if amount else self.gesture_data
|
||||
response = json.dumps({"tags": tags}).encode()
|
||||
await self.repsocket.send(response)
|
||||
|
||||
except Exception:
|
||||
self.logger.exception("Error fetching gesture tags.")
|
||||
@@ -29,7 +29,7 @@ class RobotSpeechAgent(BaseAgent):
|
||||
def __init__(
|
||||
self,
|
||||
name: str,
|
||||
address=settings.zmq_settings.ri_command_address,
|
||||
address: str,
|
||||
bind=False,
|
||||
):
|
||||
super().__init__(name)
|
||||
|
||||
@@ -6,9 +6,11 @@ import zmq.asyncio as azmq
|
||||
from zmq.asyncio import Context
|
||||
|
||||
from control_backend.agents import BaseAgent
|
||||
from control_backend.agents.actuation.robot_gesture_agent import RobotGestureAgent
|
||||
from control_backend.core.config import settings
|
||||
|
||||
from ..actuation.robot_speech_agent import RobotSpeechAgent
|
||||
from ..perception import VADAgent
|
||||
|
||||
|
||||
class RICommunicationAgent(BaseAgent):
|
||||
@@ -179,12 +181,24 @@ class RICommunicationAgent(BaseAgent):
|
||||
else:
|
||||
self._req_socket.bind(addr)
|
||||
case "actuation":
|
||||
ri_commands_agent = RobotSpeechAgent(
|
||||
gesture_data = port_data.get("gestures", [])
|
||||
robot_speech_agent = RobotSpeechAgent(
|
||||
settings.agent_settings.robot_speech_name,
|
||||
address=addr,
|
||||
bind=bind,
|
||||
)
|
||||
await ri_commands_agent.start()
|
||||
robot_gesture_agent = RobotGestureAgent(
|
||||
settings.agent_settings.robot_gesture_name,
|
||||
address=addr,
|
||||
bind=bind,
|
||||
gesture_data=gesture_data,
|
||||
)
|
||||
await robot_speech_agent.start()
|
||||
await asyncio.sleep(0.1) # Small delay
|
||||
await robot_gesture_agent.start()
|
||||
case "audio":
|
||||
vad_agent = VADAgent(audio_in_address=addr, audio_in_bind=bind)
|
||||
await vad_agent.start()
|
||||
case _:
|
||||
self.logger.warning("Unhandled negotiation id: %s", id)
|
||||
|
||||
|
||||
@@ -125,7 +125,7 @@ class LLMAgent(BaseAgent):
|
||||
full_message += token
|
||||
current_chunk += token
|
||||
|
||||
self.logger.info(
|
||||
self.logger.llm(
|
||||
"Received token: %s",
|
||||
full_message,
|
||||
extra={"reference": message_id}, # Used in the UI to update old logs
|
||||
|
||||
@@ -8,6 +8,7 @@ import zmq.asyncio as azmq
|
||||
from control_backend.agents import BaseAgent
|
||||
from control_backend.core.config import settings
|
||||
|
||||
from ...schemas.program_status import PROGRAM_STATUS, ProgramStatus
|
||||
from .transcription_agent.transcription_agent import TranscriptionAgent
|
||||
|
||||
|
||||
@@ -61,6 +62,7 @@ class VADAgent(BaseAgent):
|
||||
:ivar audio_in_address: Address of the input audio stream.
|
||||
:ivar audio_in_bind: Whether to bind or connect to the input address.
|
||||
:ivar audio_out_socket: ZMQ PUB socket for sending speech fragments.
|
||||
:ivar program_sub_socket: ZMQ SUB socket for receiving program status updates.
|
||||
"""
|
||||
|
||||
def __init__(self, audio_in_address: str, audio_in_bind: bool):
|
||||
@@ -79,6 +81,8 @@ class VADAgent(BaseAgent):
|
||||
self.audio_out_socket: azmq.Socket | None = None
|
||||
self.audio_in_poller: SocketPoller | None = None
|
||||
|
||||
self.program_sub_socket: azmq.Socket | None = None
|
||||
|
||||
self.audio_buffer = np.array([], dtype=np.float32)
|
||||
self.i_since_speech = settings.behaviour_settings.vad_initial_since_speech
|
||||
self._ready = asyncio.Event()
|
||||
@@ -90,9 +94,10 @@ class VADAgent(BaseAgent):
|
||||
|
||||
1. Connects audio input socket.
|
||||
2. Binds audio output socket (random port).
|
||||
3. Loads VAD model from Torch Hub.
|
||||
4. Starts the streaming loop.
|
||||
5. Instantiates and starts the :class:`TranscriptionAgent` with the output address.
|
||||
3. Connects to program communication socket.
|
||||
4. Loads VAD model from Torch Hub.
|
||||
5. Starts the streaming loop.
|
||||
6. Instantiates and starts the :class:`TranscriptionAgent` with the output address.
|
||||
"""
|
||||
self.logger.info("Setting up %s", self.name)
|
||||
|
||||
@@ -105,6 +110,11 @@ class VADAgent(BaseAgent):
|
||||
return
|
||||
audio_out_address = f"tcp://localhost:{audio_out_port}"
|
||||
|
||||
# Connect to internal communication socket
|
||||
self.program_sub_socket = azmq.Context.instance().socket(zmq.SUB)
|
||||
self.program_sub_socket.connect(settings.zmq_settings.internal_sub_address)
|
||||
self.program_sub_socket.subscribe(PROGRAM_STATUS)
|
||||
|
||||
# Initialize VAD model
|
||||
try:
|
||||
self.model, _ = torch.hub.load(
|
||||
@@ -117,10 +127,8 @@ class VADAgent(BaseAgent):
|
||||
await self.stop()
|
||||
return
|
||||
|
||||
# Warmup/reset
|
||||
await self.reset_stream()
|
||||
|
||||
self.add_behavior(self._streaming_loop())
|
||||
self.add_behavior(self._status_loop())
|
||||
|
||||
# Start agents dependent on the output audio fragments here
|
||||
transcriber = TranscriptionAgent(audio_out_address)
|
||||
@@ -165,7 +173,7 @@ class VADAgent(BaseAgent):
|
||||
self.audio_out_socket = None
|
||||
return None
|
||||
|
||||
async def reset_stream(self):
|
||||
async def _reset_stream(self):
|
||||
"""
|
||||
Clears the ZeroMQ queue and sets ready state.
|
||||
"""
|
||||
@@ -176,6 +184,23 @@ class VADAgent(BaseAgent):
|
||||
self.logger.info(f"Discarded {discarded} audio packets before starting.")
|
||||
self._ready.set()
|
||||
|
||||
async def _status_loop(self):
|
||||
"""Loop for checking program status. Only start listening if program is RUNNING."""
|
||||
while self._running:
|
||||
topic, body = await self.program_sub_socket.recv_multipart()
|
||||
|
||||
if topic != PROGRAM_STATUS:
|
||||
continue
|
||||
if body != ProgramStatus.RUNNING.value:
|
||||
continue
|
||||
|
||||
# Program is now running, we can start our stream
|
||||
await self._reset_stream()
|
||||
|
||||
# We don't care about further status updates
|
||||
self.program_sub_socket.close()
|
||||
break
|
||||
|
||||
async def _streaming_loop(self):
|
||||
"""
|
||||
Main loop for processing audio stream.
|
||||
|
||||
Reference in New Issue
Block a user