Merge remote-tracking branch 'origin/dev' into feat/vad-agent

# Conflicts: # pyproject.toml # src/control_backend/main.py # uv.lock
2025-10-28 10:44:03 +01:00
parent d47074d091 dc811fd625
commit a44df4781b
18 changed files with 271 additions and 95 deletions
--- a/src/control_backend/agents/bdi/bdi_core.py
+++ b/src/control_backend/agents/bdi/bdi_core.py
@@ -5,13 +5,15 @@ from spade_bdi.bdi import BDIAgent

 from control_backend.agents.bdi.behaviours.belief_setter import BeliefSetter

+
 class BDICoreAgent(BDIAgent):
    """
-    This is the Brain agent that does the belief inference with AgentSpeak. 
+    This is the Brain agent that does the belief inference with AgentSpeak.
    This is a continous process that happens automatically in the background.
    This class contains all the actions that can be called from AgentSpeak plans.
    It has the BeliefSetter behaviour.
    """
+
    logger = logging.getLogger("BDI Core")

    async def setup(self):
@@ -31,5 +33,3 @@ class BDICoreAgent(BDIAgent):
    def _send_to_llm(self, message) -> str:
        """TODO: implement"""
        return f"This is a reply to {message}"
-
-
--- a/src/control_backend/agents/bdi/behaviours/belief_setter.py
+++ b/src/control_backend/agents/bdi/behaviours/belief_setter.py
@@ -8,15 +8,17 @@ from spade_bdi.bdi import BDIAgent

 from control_backend.core.config import settings

+
 class BeliefSetter(CyclicBehaviour):
    """
-    This is the behaviour that the BDI agent runs.
-    This behaviour waits for incoming message and processes it based on sender.
-    Currently, t only waits for messages containing beliefs from Belief Collector and adds these to its KB.
+    This is the behaviour that the BDI agent runs. This behaviour waits for incoming
+    message and processes it based on sender. Currently, it only waits for messages
+    containing beliefs from BeliefCollector and adds these to its KB.
    """
+
    agent: BDIAgent
    logger = logging.getLogger("BDI/Belief Setter")
-    
+
    async def run(self):
        msg = await self.receive(timeout=0.1)
        if msg:
@@ -36,7 +38,8 @@ class BeliefSetter(CyclicBehaviour):
                pass

    def _process_belief_message(self, message: Message):
-        if not message.body: return
+        if not message.body:
+            return

        match message.thread:
            case "beliefs":
@@ -48,7 +51,6 @@ class BeliefSetter(CyclicBehaviour):
            case _:
                pass

-
    def _set_beliefs(self, beliefs: dict[str, list[list[str]]]):
        if self.agent.bdi is None:
            self.logger.warning("Cannot set beliefs, since agent's BDI is not yet initialized.")
--- a/src/control_backend/agents/vad_agent.py
+++ b/src/control_backend/agents/vad_agent.py
@@ -18,6 +18,7 @@ class SocketPoller[T]:
    Convenience class for polling a socket for data with a timeout, persisting a zmq.Poller for
    multiple usages.
    """
+
    def __init__(self, socket: azmq.Socket, timeout_ms: int = 100):
        """
        :param socket: The socket to poll and get data from.
@@ -46,9 +47,9 @@ class Streaming(CyclicBehaviour):
    def __init__(self, audio_in_socket: azmq.Socket, audio_out_socket: azmq.Socket):
        super().__init__()
        self.audio_in_poller = SocketPoller[bytes](audio_in_socket)
-        self.model, _ = torch.hub.load(repo_or_dir="snakers4/silero-vad",
-                                       model="silero_vad",
-                                       force_reload=False)
+        self.model, _ = torch.hub.load(
+            repo_or_dir="snakers4/silero-vad", model="silero_vad", force_reload=False
+        )
        self.audio_out_socket = audio_out_socket

        self.audio_buffer = np.array([], dtype=np.float32)
@@ -59,8 +60,10 @@ class Streaming(CyclicBehaviour):
        data = await self.audio_in_poller.poll()
        if data is None:
            if self.i_since_data % 10 == 0:
-                logger.debug("Failed to receive audio from socket for %d ms.",
-                             self.audio_in_poller.timeout_ms*(self.i_since_data+1))
+                logger.debug(
+                    "Failed to receive audio from socket for %d ms.",
+                    self.audio_in_poller.timeout_ms * (self.i_since_data + 1),
+                )
            self.i_since_data += 1
            return
        self.i_since_data = 0
@@ -70,7 +73,8 @@ class Streaming(CyclicBehaviour):
        prob = self.model(torch.from_numpy(chunk), 16000).item()

        if prob > 0.5:
-            if self.i_since_speech > 3: logger.debug("Speech started.")
+            if self.i_since_speech > 3:
+                logger.debug("Speech started.")
            self.audio_buffer = np.append(self.audio_buffer, chunk)
            self.i_since_speech = 0
            return
@@ -82,9 +86,9 @@ class Streaming(CyclicBehaviour):
            return

        # Speech probably ended. Make sure we have a usable amount of data.
-        if len(self.audio_buffer) >= 3*len(chunk):
+        if len(self.audio_buffer) >= 3 * len(chunk):
            logger.debug("Speech ended.")
-            await self.audio_out_socket.send(self.audio_buffer[:-2*len(chunk)].tobytes())
+            await self.audio_out_socket.send(self.audio_buffer[: -2 * len(chunk)].tobytes())

        # At this point, we know that the speech has ended.
        # Prepend the last chunk that had no speech, for a more fluent boundary
@@ -96,8 +100,9 @@ class VADAgent(Agent):
    An agent which listens to an audio stream, does Voice Activity Detection (VAD), and sends
    fragments with detected speech to other agents over ZeroMQ.
    """
+
    def __init__(self, audio_in_address: str, audio_in_bind: bool):
-        jid = settings.agent_settings.vad_agent_name + '@' + settings.agent_settings.host
+        jid = settings.agent_settings.vad_agent_name + "@" + settings.agent_settings.host
        super().__init__(jid, settings.agent_settings.vad_agent_name)

        self.audio_in_address = audio_in_address
@@ -146,7 +151,6 @@ class VADAgent(Agent):
        if audio_out_port is None:
            await self.stop()
            return
-        audio_out_address = f"tcp://localhost:{audio_out_port}"

        streaming = Streaming(self.audio_in_socket, self.audio_out_socket)
        self.add_behaviour(streaming)