docs: add docs to CB

Pretty much every class and method should have documentation now. ref: N25B-295
2025-11-24 21:58:22 +01:00
parent 54502e441c
commit 129d3c4420
26 changed files with 757 additions and 80 deletions
--- a/src/control_backend/agents/perception/vad_agent.py
+++ b/src/control_backend/agents/perception/vad_agent.py
@@ -15,6 +15,8 @@ class SocketPoller[T]:
    """
    Convenience class for polling a socket for data with a timeout, persisting a zmq.Poller for
    multiple usages.
+
+    :param T: The type of data returned by the socket.
    """

    def __init__(
@@ -35,7 +37,7 @@ class SocketPoller[T]:
        """
        Get data from the socket, or None if the timeout is reached.

-        :param timeout_ms: If given, the timeout. Otherwise, `self.timeout_ms` is used.
+        :param timeout_ms: If given, the timeout. Otherwise, ``self.timeout_ms`` is used.
        :return: Data from the socket or None.
        """
        timeout_ms = timeout_ms or self.timeout_ms
@@ -47,11 +49,27 @@ class SocketPoller[T]:

 class VADAgent(BaseAgent):
    """
-    An agent which listens to an audio stream, does Voice Activity Detection (VAD), and sends
-    fragments with detected speech to other agents over ZeroMQ.
+    Voice Activity Detection (VAD) Agent.
+
+    This agent:
+    1.  Receives an audio stream (via ZMQ).
+    2.  Processes the audio using the Silero VAD model to detect speech.
+    3.  Buffers potential speech segments.
+    4.  Publishes valid speech fragments (containing speech plus small buffer) to a ZMQ PUB socket.
+    5.  Instantiates and starts agents (like :class:`TranscriptionAgent`) that use this output.
+
+    :ivar audio_in_address: Address of the input audio stream.
+    :ivar audio_in_bind: Whether to bind or connect to the input address.
+    :ivar audio_out_socket: ZMQ PUB socket for sending speech fragments.
    """

    def __init__(self, audio_in_address: str, audio_in_bind: bool):
+        """
+        Initialize the VAD Agent.
+
+        :param audio_in_address: ZMQ address for input audio.
+        :param audio_in_bind: True if this agent should bind to the input address, False to connect.
+        """
        super().__init__(settings.agent_settings.vad_name)

        self.audio_in_address = audio_in_address
@@ -67,6 +85,15 @@ class VADAgent(BaseAgent):
        self.model = None

    async def setup(self):
+        """
+        Initialize resources.
+
+        1. Connects audio input socket.
+        2. Binds audio output socket (random port).
+        3. Loads VAD model from Torch Hub.
+        4. Starts the streaming loop.
+        5. Instantiates and starts the :class:`TranscriptionAgent` with the output address.
+        """
        self.logger.info("Setting up %s", self.name)

        self._connect_audio_in_socket()
@@ -123,7 +150,9 @@ class VADAgent(BaseAgent):
        self.audio_in_poller = SocketPoller[bytes](self.audio_in_socket)

    def _connect_audio_out_socket(self) -> int | None:
-        """Returns the port bound, or None if binding failed."""
+        """
+        Returns the port bound, or None if binding failed.
+        """
        try:
            self.audio_out_socket = azmq.Context.instance().socket(zmq.PUB)
            return self.audio_out_socket.bind_to_random_port("tcp://localhost", max_tries=100)
@@ -144,6 +173,15 @@ class VADAgent(BaseAgent):
        self._ready.set()

    async def _streaming_loop(self):
+        """
+        Main loop for processing audio stream.
+
+        1. Polls for new audio chunks.
+        2. Passes chunk to VAD model.
+        3. Manages `i_since_speech` counter to determine start/end of speech.
+        4. Buffers speech + context.
+        5. Sends complete speech segment to output socket when silence is detected.
+        """
        await self._ready.wait()
        while self._running:
            assert self.audio_in_poller is not None