pepperplus-cb/test/unit/agents/perception/vad_agent/test_vad_streaming.py

from unittest.mock import AsyncMock, MagicMock

import numpy as np
import pytest

from control_backend.agents.perception.vad_agent import VADAgent


@pytest.fixture
def audio_out_socket():
    return AsyncMock()


@pytest.fixture
def vad_agent(audio_out_socket):
    return VADAgent("tcp://localhost:5555", False)


@pytest.fixture(autouse=True)
def patch_settings(monkeypatch):
    # Patch the settings that vad_agent.run() reads
    from control_backend.agents.perception import vad_agent

    monkeypatch.setattr(
        vad_agent.settings.behaviour_settings, "vad_prob_threshold", 0.5, raising=False
    )
    monkeypatch.setattr(
        vad_agent.settings.behaviour_settings, "vad_non_speech_patience_chunks", 2, raising=False
    )
    monkeypatch.setattr(
        vad_agent.settings.behaviour_settings, "vad_initial_since_speech", 0, raising=False
    )
    monkeypatch.setattr(vad_agent.settings.vad_settings, "sample_rate_hz", 16_000, raising=False)


async def simulate_streaming_with_probabilities(streaming, probabilities: list[float]):
    """
    Simulates a streaming scenario with given VAD model probabilities for testing purposes.

    :param streaming: The streaming component to be tested.
    :param probabilities: A list of probabilities representing the outputs of the VAD model.
    """
    model_item = MagicMock()
    model_item.item.side_effect = probabilities
    streaming.model = MagicMock(return_value=model_item)

    # Prepare deterministic audio chunks and a poller that stops the loop when exhausted
    chunk_bytes = np.empty(shape=512, dtype=np.float32).tobytes()
    chunks = [chunk_bytes for _ in probabilities]

    class DummyPoller:
        def __init__(self, data, agent):
            self.data = data
            self.agent = agent

        async def poll(self, timeout_ms=None):
            if self.data:
                return self.data.pop(0)
            # Stop the loop cleanly once we've consumed all chunks
            self.agent._running = False
            return None

    streaming.audio_in_poller = DummyPoller(chunks, streaming)
    streaming._ready = True
    streaming._running = True

    await streaming._streaming_loop()


@pytest.mark.asyncio
async def test_voice_activity_detected(audio_out_socket, vad_agent):
    """
    Test a scenario where there is voice activity detected between silences.
    """
    speech_chunk_count = 5
    probabilities = [0.0] * 5 + [1.0] * speech_chunk_count + [0.0] * 5
    vad_agent.audio_out_socket = audio_out_socket
    await simulate_streaming_with_probabilities(vad_agent, probabilities)

    audio_out_socket.send.assert_called_once()
    data = audio_out_socket.send.call_args[0][0]
    assert isinstance(data, bytes)
    assert len(data) == 512 * 4 * (speech_chunk_count + 1)


@pytest.mark.asyncio
async def test_voice_activity_short_pause(audio_out_socket, vad_agent):
    """
    Test a scenario where there is a short pause between speech, checking whether it ignores the
    short pause.
    """
    speech_chunk_count = 5
    probabilities = (
        [0.0] * 5 + [1.0] * speech_chunk_count + [0.0] + [1.0] * speech_chunk_count + [0.0] * 5
    )
    vad_agent.audio_out_socket = audio_out_socket
    await simulate_streaming_with_probabilities(vad_agent, probabilities)

    audio_out_socket.send.assert_called_once()
    data = audio_out_socket.send.call_args[0][0]
    assert isinstance(data, bytes)
    # Expecting 13 chunks (2*5 with speech, 1 pause between, 1 as padding)
    assert len(data) == 512 * 4 * (speech_chunk_count * 2 + 1 + 1)


@pytest.mark.asyncio
async def test_no_data(audio_out_socket, vad_agent):
    """
    Test a scenario where there is no data received. This should not cause errors.
    """

    class DummyPoller:
        async def poll(self, timeout_ms=None):
            vad_agent._running = False
            return None

    vad_agent.audio_out_socket = audio_out_socket
    vad_agent.audio_in_poller = DummyPoller()
    vad_agent._ready = True
    vad_agent._running = True

    await vad_agent._streaming_loop()

    audio_out_socket.send.assert_not_called()
    assert len(vad_agent.audio_buffer) == 0