From 867837dcc41a42b43d08a18459a25853564d9c90 Mon Sep 17 00:00:00 2001 From: Storm Date: Tue, 30 Dec 2025 15:58:18 +0200 Subject: [PATCH] feat: implemented pause functionality in VAD agent Functionality is implemented by pausing the _streaming_loop function. ref: N25B-350 --- .../agents/perception/vad_agent.py | 40 +++++++++++++++++++ src/control_backend/core/config.py | 4 -- 2 files changed, 40 insertions(+), 4 deletions(-) diff --git a/src/control_backend/agents/perception/vad_agent.py b/src/control_backend/agents/perception/vad_agent.py index 8ccff0a..b2f1782 100644 --- a/src/control_backend/agents/perception/vad_agent.py +++ b/src/control_backend/agents/perception/vad_agent.py @@ -7,6 +7,7 @@ import zmq.asyncio as azmq from control_backend.agents import BaseAgent from control_backend.core.config import settings +from control_backend.schemas.internal_message import InternalMessage from ...schemas.program_status import PROGRAM_STATUS, ProgramStatus from .transcription_agent.transcription_agent import TranscriptionAgent @@ -86,6 +87,12 @@ class VADAgent(BaseAgent): self.audio_buffer = np.array([], dtype=np.float32) self.i_since_speech = settings.behaviour_settings.vad_initial_since_speech self._ready = asyncio.Event() + + # Pause control + self._reset_needed = False + self._paused = asyncio.Event() + self._paused.set() # Not paused at start + self.model = None async def setup(self): @@ -213,6 +220,16 @@ class VADAgent(BaseAgent): """ await self._ready.wait() while self._running: + await self._paused.wait() + + # After being unpaused, reset stream and buffers + if self._reset_needed: + self.logger.debug("Resuming: resetting stream and buffers.") + await self._reset_stream() + self.audio_buffer = np.array([], dtype=np.float32) + self.i_since_speech = settings.behaviour_settings.vad_initial_since_speech + self._reset_needed = False + assert self.audio_in_poller is not None data = await self.audio_in_poller.poll() if data is None: @@ -254,3 +271,26 @@ class VADAgent(BaseAgent): # At this point, we know that the speech has ended. # Prepend the last chunk that had no speech, for a more fluent boundary self.audio_buffer = chunk + + async def handle_message(self, msg: InternalMessage): + """ + Handle incoming messages. + + Expects messages to pause or resume the VAD processing from User Interrupt Agent. + + :param msg: The received internal message. + """ + sender = msg.sender + + if sender == settings.agent_settings.user_interrupt_name: + if msg.body == "PAUSE": + self.logger.info("Pausing VAD processing.") + self._paused.clear() + self._reset_needed = True + elif msg.body == "RESUME": + self.logger.info("Resuming VAD processing.") + self._paused.set() + else: + self.logger.warning(f"Unknown command from User Interrupt Agent: {msg.body}") + else: + self.logger.debug(f"Ignoring message from unknown sender: {sender}") \ No newline at end of file diff --git a/src/control_backend/core/config.py b/src/control_backend/core/config.py index c8edfd7..8a7267c 100644 --- a/src/control_backend/core/config.py +++ b/src/control_backend/core/config.py @@ -100,14 +100,10 @@ class LLMSettings(BaseModel): """ local_llm_url: str = "http://localhost:1234/v1/chat/completions" -<<<<<<< HEAD - local_llm_model: str = "google/gemma-3-1b" -======= local_llm_model: str = "gpt-oss" chat_temperature: float = 1.0 code_temperature: float = 0.3 n_parallel: int = 4 ->>>>>>> feat/semantic-beliefs class VADSettings(BaseModel):