import os from unittest.mock import AsyncMock, MagicMock import pytest import soundfile as sf import zmq from control_backend.agents.vad_agent import Streaming def get_audio_chunks() -> list[bytes]: curr_file = os.path.realpath(__file__) curr_dir = os.path.dirname(curr_file) file = f"{curr_dir}/speech_with_pauses_16k_1c_float32.wav" chunk_size = 512 chunks = [] with sf.SoundFile(file, "r") as f: assert f.samplerate == 16000 assert f.channels == 1 assert f.subtype == "FLOAT" while True: data = f.read(chunk_size, dtype="float32") if len(data) != chunk_size: break chunks.append(data.tobytes()) return chunks @pytest.mark.asyncio async def test_real_audio(mocker): """ Test the VAD agent with only input and output mocked. Using the real model, using real audio as input. Ensure that it outputs some fragments with audio. """ audio_chunks = get_audio_chunks() audio_in_socket = AsyncMock() audio_in_socket.recv.side_effect = audio_chunks mock_poller: MagicMock = mocker.patch("control_backend.agents.vad_agent.zmq.Poller") mock_poller.return_value.poll.return_value = [(audio_in_socket, zmq.POLLIN)] audio_out_socket = AsyncMock() vad_streamer = Streaming(audio_in_socket, audio_out_socket) vad_streamer._ready = True for _ in audio_chunks: await vad_streamer.run() audio_out_socket.send.assert_called() for args in audio_out_socket.send.call_args_list: assert isinstance(args[0][0], bytes) assert len(args[0][0]) >= 512 * 4 * 3 # Should be at least 3 chunks of audio