fix: merge refactor/zmq-internal-socket-behaviour into feat/cb2ui-robot-connections. (And fixed all ruff/ test issues to commit)

ref: None
This commit is contained in:
Björn Otgaar
2025-10-31 14:16:11 +01:00
38 changed files with 1761 additions and 167 deletions

View File

@@ -2,21 +2,18 @@ import asyncio
import json
import logging
import zmq
import zmq.asyncio
from spade.agent import Agent
from spade.behaviour import CyclicBehaviour
from zmq.asyncio import Context
from control_backend.agents.ri_command_agent import RICommandAgent
from control_backend.core.config import settings
from control_backend.core.zmq_context import context
logger = logging.getLogger(__name__)
class RICommunicationAgent(Agent):
_pub_socket: zmq.asyncio.Socket
req_socket: zmq.asyncio.Socket | None
_address = ""
_bind = True
connected = False
@@ -25,7 +22,6 @@ class RICommunicationAgent(Agent):
self,
jid: str,
password: str,
pub_socket: zmq.asyncio.Socket,
port: int = 5222,
verify_security: bool = False,
address="tcp://localhost:0000",
@@ -34,8 +30,8 @@ class RICommunicationAgent(Agent):
super().__init__(jid, password, port, verify_security)
self._address = address
self._bind = bind
self.req_socket = None
self._pub_socket = pub_socket
self._req_socket: zmq.asyncio.Socket | None = None
self.pub_socket: zmq.asyncio.Socket | None = None
class ListenBehaviour(CyclicBehaviour):
async def run(self):
@@ -49,7 +45,7 @@ class RICommunicationAgent(Agent):
seconds_to_wait_total = 1.0
try:
await asyncio.wait_for(
self.agent.req_socket.send_json(message), timeout=seconds_to_wait_total / 2
self.agent._req_socket.send_json(message), timeout=seconds_to_wait_total / 2
)
except TimeoutError:
logger.debug(
@@ -61,23 +57,13 @@ class RICommunicationAgent(Agent):
try:
logger.debug(f"waiting for message for {seconds_to_wait_total / 2} seconds.")
message = await asyncio.wait_for(
self.agent.req_socket.recv_json(), timeout=seconds_to_wait_total / 2
self.agent._req_socket.recv_json(), timeout=seconds_to_wait_total / 2
)
# We didnt get a reply :(
except TimeoutError:
logger.info(
f"No ping back retrieved in {seconds_to_wait_total / 2} seconds totalling"
f"{seconds_to_wait_total} of time, killing myself (or maybe just laying low)."
)
# TODO: Send event to UI letting know that we've lost connection
topic = b"ping"
data = json.dumps(False).encode()
self.agent._pub_socket.send_multipart([topic, data])
await self.agent.setup()
except Exception as e:
logger.debug(f"Differennt exception: {e}")
logger.info("No ping retrieved in 3 seconds, killing myself.")
self.kill()
logger.debug('Received message "%s"', message)
if "endpoint" not in message:
@@ -89,46 +75,53 @@ class RICommunicationAgent(Agent):
case "ping":
topic = b"ping"
data = json.dumps(True).encode()
await self.agent._pub_socket.send_multipart([topic, data])
if self.agent.pub_socket is not None:
await self.agent.pub_socket.send_multipart([topic, data])
await asyncio.sleep(1)
case _:
logger.info(
"Received message with topic different than ping, while ping expected."
)
async def setup_req_socket(self, force=False):
async def setup_sockets(self, force=False):
"""
Sets up request socket for communication agent.
"""
if self.req_socket is None or force:
self.req_socket = context.socket(zmq.REQ)
if self._bind:
self.req_socket.bind(self._address)
# Bind request socket
if self._req_socket is None or force:
self._req_socket = Context.instance().socket(zmq.REQ)
if self._bind: # TODO: Should this ever be the case with new architecture?
self._req_socket.bind(self._address)
else:
self.req_socket.connect(self._address)
self._req_socket.connect(self._address)
async def setup(self, max_retries: int = 5):
# TODO: Check with Kasper
if self.pub_socket is None or force:
self.pub_socket = Context.instance().socket(zmq.PUB)
self.pub_socket.connect(settings.zmq_settings.internal_pub_address)
async def setup(self, max_retries: int = 100):
"""
Try to setup the communication agent, we have 5 retries in case we dont have a response yet.
"""
logger.info("Setting up %s", self.jid)
# Bind request socket
await self.setup_req_socket()
await self.setup_sockets()
retries = 0
# Let's try a certain amount of times before failing connection
while retries < max_retries:
# Make sure the socket is properly setup.
if self.req_socket is None:
if self._req_socket is None:
continue
# Send our message and receive one back:)
message = {"endpoint": "negotiate/ports", "data": {}}
await self.req_socket.send_json(message)
await self._req_socket.send_json(message)
try:
received_message = await asyncio.wait_for(self.req_socket.recv_json(), timeout=20.0)
received_message = await asyncio.wait_for(self._req_socket.recv_json(), timeout=1.0)
except TimeoutError:
logger.warning(
@@ -173,9 +166,9 @@ class RICommunicationAgent(Agent):
case "main":
if addr != self._address:
if not bind:
self.req_socket.connect(addr)
else:
self.req_socket.bind(addr)
self._req_socket.connect(addr)
else: # TODO: Should this ever be the case?
self._req_socket.bind(addr)
case "actuation":
ri_commands_agent = RICommandAgent(
settings.agent_settings.ri_command_agent_name
@@ -205,9 +198,17 @@ class RICommunicationAgent(Agent):
listen_behaviour = self.ListenBehaviour()
self.add_behaviour(listen_behaviour)
# TODO: Let UI know that we're connected >:)
# Let UI know that we're connected >:)
topic = b"ping"
data = json.dumps(True).encode()
await self._pub_socket.send_multipart([topic, data])
if self.pub_socket is None:
logger.error("communication agent pub socket not correctly initialized.")
else:
try:
await asyncio.wait_for(self.pub_socket.send_multipart([topic, data]), 5)
except TimeoutError:
logger.error(
"Initial connection ping for router timed out in ri_communication_agent."
)
self.connected = True
logger.info("Finished setting up %s", self.jid)