fix: move VAD agent creation to RI communication agent
Previously, it was started in main, but it should use values negotiated by the RI communication agent. ref: N25B-356
This commit is contained in:
@@ -8,6 +8,7 @@ import zmq.asyncio as azmq
|
||||
from control_backend.agents import BaseAgent
|
||||
from control_backend.core.config import settings
|
||||
|
||||
from ...schemas.program_status import PROGRAM_STATUS, ProgramStatus
|
||||
from .transcription_agent.transcription_agent import TranscriptionAgent
|
||||
|
||||
|
||||
@@ -61,6 +62,7 @@ class VADAgent(BaseAgent):
|
||||
:ivar audio_in_address: Address of the input audio stream.
|
||||
:ivar audio_in_bind: Whether to bind or connect to the input address.
|
||||
:ivar audio_out_socket: ZMQ PUB socket for sending speech fragments.
|
||||
:ivar program_sub_socket: ZMQ SUB socket for receiving program status updates.
|
||||
"""
|
||||
|
||||
def __init__(self, audio_in_address: str, audio_in_bind: bool):
|
||||
@@ -79,6 +81,8 @@ class VADAgent(BaseAgent):
|
||||
self.audio_out_socket: azmq.Socket | None = None
|
||||
self.audio_in_poller: SocketPoller | None = None
|
||||
|
||||
self.program_sub_socket: azmq.Socket | None = None
|
||||
|
||||
self.audio_buffer = np.array([], dtype=np.float32)
|
||||
self.i_since_speech = settings.behaviour_settings.vad_initial_since_speech
|
||||
self._ready = asyncio.Event()
|
||||
@@ -90,9 +94,10 @@ class VADAgent(BaseAgent):
|
||||
|
||||
1. Connects audio input socket.
|
||||
2. Binds audio output socket (random port).
|
||||
3. Loads VAD model from Torch Hub.
|
||||
4. Starts the streaming loop.
|
||||
5. Instantiates and starts the :class:`TranscriptionAgent` with the output address.
|
||||
3. Connects to program communication socket.
|
||||
4. Loads VAD model from Torch Hub.
|
||||
5. Starts the streaming loop.
|
||||
6. Instantiates and starts the :class:`TranscriptionAgent` with the output address.
|
||||
"""
|
||||
self.logger.info("Setting up %s", self.name)
|
||||
|
||||
@@ -105,6 +110,11 @@ class VADAgent(BaseAgent):
|
||||
return
|
||||
audio_out_address = f"tcp://localhost:{audio_out_port}"
|
||||
|
||||
# Connect to internal communication socket
|
||||
self.program_sub_socket = azmq.Context.instance().socket(zmq.SUB)
|
||||
self.program_sub_socket.connect(settings.zmq_settings.internal_sub_address)
|
||||
self.program_sub_socket.subscribe(PROGRAM_STATUS)
|
||||
|
||||
# Initialize VAD model
|
||||
try:
|
||||
self.model, _ = torch.hub.load(
|
||||
@@ -117,10 +127,8 @@ class VADAgent(BaseAgent):
|
||||
await self.stop()
|
||||
return
|
||||
|
||||
# Warmup/reset
|
||||
await self.reset_stream()
|
||||
|
||||
self.add_behavior(self._streaming_loop())
|
||||
self.add_behavior(self._status_loop())
|
||||
|
||||
# Start agents dependent on the output audio fragments here
|
||||
transcriber = TranscriptionAgent(audio_out_address)
|
||||
@@ -165,7 +173,7 @@ class VADAgent(BaseAgent):
|
||||
self.audio_out_socket = None
|
||||
return None
|
||||
|
||||
async def reset_stream(self):
|
||||
async def _reset_stream(self):
|
||||
"""
|
||||
Clears the ZeroMQ queue and sets ready state.
|
||||
"""
|
||||
@@ -176,6 +184,23 @@ class VADAgent(BaseAgent):
|
||||
self.logger.info(f"Discarded {discarded} audio packets before starting.")
|
||||
self._ready.set()
|
||||
|
||||
async def _status_loop(self):
|
||||
"""Loop for checking program status. Only start listening if program is RUNNING."""
|
||||
while self._running:
|
||||
topic, body = await self.program_sub_socket.recv_multipart()
|
||||
|
||||
if topic != PROGRAM_STATUS:
|
||||
continue
|
||||
if body != ProgramStatus.RUNNING.value:
|
||||
continue
|
||||
|
||||
# Program is now running, we can start our stream
|
||||
await self._reset_stream()
|
||||
|
||||
# We don't care about further status updates
|
||||
self.program_sub_socket.close()
|
||||
break
|
||||
|
||||
async def _streaming_loop(self):
|
||||
"""
|
||||
Main loop for processing audio stream.
|
||||
|
||||
Reference in New Issue
Block a user