from unittest.mock import AsyncMock, MagicMock import numpy as np import pytest from control_backend.agents.vad_agent import Streaming @pytest.fixture def audio_in_socket(): return AsyncMock() @pytest.fixture def audio_out_socket(): return AsyncMock() @pytest.fixture def streaming(audio_in_socket, audio_out_socket): return Streaming(audio_in_socket, audio_out_socket) @pytest.mark.asyncio async def test_voice_activity_detected(audio_in_socket, audio_out_socket, streaming): # After three chunks of audio with speech probability of 1.0, then four chunks of audio with # speech probability of 0.0, it should send a message over the audio out socket probabilities = [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0] model_item = MagicMock() model_item.item.side_effect = probabilities streaming.model = MagicMock() streaming.model.return_value = model_item audio_in_poller = AsyncMock() audio_in_poller.poll.return_value = np.empty(shape=512, dtype=np.float32) streaming.audio_in_poller = audio_in_poller for _ in probabilities: await streaming.run() audio_out_socket.send.assert_called_once() data = audio_out_socket.send.call_args[0][0] assert isinstance(data, bytes) # each sample has 512 frames of 4 bytes, expecting 5 chunks (3 with speech, 2 as padding) assert len(data) == 512*4*5