From feb6875a4c3f9a9ef133495a630cf46443785d1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Otgaar?= Date: Thu, 6 Nov 2025 14:16:55 +0100 Subject: [PATCH] fix: make sure that the communication agent reboots propperly. ref: N25B-256 --- .../agents/ri_communication_agent.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/src/control_backend/agents/ri_communication_agent.py b/src/control_backend/agents/ri_communication_agent.py index fe99ad4..8d72c8a 100644 --- a/src/control_backend/agents/ri_communication_agent.py +++ b/src/control_backend/agents/ri_communication_agent.py @@ -39,6 +39,10 @@ class RICommunicationAgent(BaseAgent): """ assert self.agent is not None + if not self.agent.connected: + await asyncio.sleep(1) + return + # We need to listen and sent pings. message = {"endpoint": "ping", "data": {"id": "e.g. some reference id"}} seconds_to_wait_total = 1.0 @@ -63,7 +67,13 @@ class RICommunicationAgent(BaseAgent): # We didnt get a reply :( except TimeoutError: - self.agent.logger.info("No ping retrieved in 3 seconds, killing myself.") + self.agent.logger.info( + f"No ping retrieved in {seconds_to_wait_total} seconds, " + "sending UI disconnection event and soft killing myself." + ) + + # Make sure we dont retry receiving messages untill we're setup. + self.agent.connected = False # Tell UI we're disconnected. topic = b"ping" @@ -84,7 +94,7 @@ class RICommunicationAgent(BaseAgent): ) # Try to reboot. - self.agent.setup() + await self.agent.setup() self.agent.logger.debug('Received message "%s"', message) if "endpoint" not in message: @@ -111,12 +121,11 @@ class RICommunicationAgent(BaseAgent): # Bind request socket if self._req_socket is None or force: self._req_socket = Context.instance().socket(zmq.REQ) - if self._bind: # TODO: Should this ever be the case with new architecture? + if self._bind: self._req_socket.bind(self._address) else: self._req_socket.connect(self._address) - # TODO: Check with Kasper if self.pub_socket is None or force: self.pub_socket = Context.instance().socket(zmq.PUB) self.pub_socket.connect(settings.zmq_settings.internal_pub_address) @@ -231,5 +240,7 @@ class RICommunicationAgent(BaseAgent): self.logger.error( "Initial connection ping for router timed out in ri_communication_agent." ) + + # Make sure to start listening now that we're connected. self.connected = True self.logger.info("Finished setting up %s", self.jid)