feat: implemented pause functionality in VAD agent

Functionality is implemented by pausing the _streaming_loop function.

ref: N25B-350
This commit is contained in:
Storm
2025-12-30 15:58:18 +02:00
parent 9adeb1efff
commit 867837dcc4
2 changed files with 40 additions and 4 deletions

View File

@@ -7,6 +7,7 @@ import zmq.asyncio as azmq
from control_backend.agents import BaseAgent from control_backend.agents import BaseAgent
from control_backend.core.config import settings from control_backend.core.config import settings
from control_backend.schemas.internal_message import InternalMessage
from ...schemas.program_status import PROGRAM_STATUS, ProgramStatus from ...schemas.program_status import PROGRAM_STATUS, ProgramStatus
from .transcription_agent.transcription_agent import TranscriptionAgent from .transcription_agent.transcription_agent import TranscriptionAgent
@@ -86,6 +87,12 @@ class VADAgent(BaseAgent):
self.audio_buffer = np.array([], dtype=np.float32) self.audio_buffer = np.array([], dtype=np.float32)
self.i_since_speech = settings.behaviour_settings.vad_initial_since_speech self.i_since_speech = settings.behaviour_settings.vad_initial_since_speech
self._ready = asyncio.Event() self._ready = asyncio.Event()
# Pause control
self._reset_needed = False
self._paused = asyncio.Event()
self._paused.set() # Not paused at start
self.model = None self.model = None
async def setup(self): async def setup(self):
@@ -213,6 +220,16 @@ class VADAgent(BaseAgent):
""" """
await self._ready.wait() await self._ready.wait()
while self._running: while self._running:
await self._paused.wait()
# After being unpaused, reset stream and buffers
if self._reset_needed:
self.logger.debug("Resuming: resetting stream and buffers.")
await self._reset_stream()
self.audio_buffer = np.array([], dtype=np.float32)
self.i_since_speech = settings.behaviour_settings.vad_initial_since_speech
self._reset_needed = False
assert self.audio_in_poller is not None assert self.audio_in_poller is not None
data = await self.audio_in_poller.poll() data = await self.audio_in_poller.poll()
if data is None: if data is None:
@@ -254,3 +271,26 @@ class VADAgent(BaseAgent):
# At this point, we know that the speech has ended. # At this point, we know that the speech has ended.
# Prepend the last chunk that had no speech, for a more fluent boundary # Prepend the last chunk that had no speech, for a more fluent boundary
self.audio_buffer = chunk self.audio_buffer = chunk
async def handle_message(self, msg: InternalMessage):
"""
Handle incoming messages.
Expects messages to pause or resume the VAD processing from User Interrupt Agent.
:param msg: The received internal message.
"""
sender = msg.sender
if sender == settings.agent_settings.user_interrupt_name:
if msg.body == "PAUSE":
self.logger.info("Pausing VAD processing.")
self._paused.clear()
self._reset_needed = True
elif msg.body == "RESUME":
self.logger.info("Resuming VAD processing.")
self._paused.set()
else:
self.logger.warning(f"Unknown command from User Interrupt Agent: {msg.body}")
else:
self.logger.debug(f"Ignoring message from unknown sender: {sender}")

View File

@@ -100,14 +100,10 @@ class LLMSettings(BaseModel):
""" """
local_llm_url: str = "http://localhost:1234/v1/chat/completions" local_llm_url: str = "http://localhost:1234/v1/chat/completions"
<<<<<<< HEAD
local_llm_model: str = "google/gemma-3-1b"
=======
local_llm_model: str = "gpt-oss" local_llm_model: str = "gpt-oss"
chat_temperature: float = 1.0 chat_temperature: float = 1.0
code_temperature: float = 0.3 code_temperature: float = 0.3
n_parallel: int = 4 n_parallel: int = 4
>>>>>>> feat/semantic-beliefs
class VADSettings(BaseModel): class VADSettings(BaseModel):