Create transcriber agent #15

Merged
0950726 merged 10 commits from feat/transcription-agent into dev 2025-10-29 15:51:44 +00:00
6 changed files with 24 additions and 3 deletions
Showing only changes of commit 4d6bac7e2b - Show all commits

View File

@@ -33,7 +33,7 @@ uv run --only-group test pytest test/unit
Or for integration tests:
```bash
uv run --only-group integration-test pytest test/integration
uv run --group integration-test pytest test/integration
```
## GitHooks

View File

@@ -7,6 +7,7 @@ requires-python = ">=3.13"
dependencies = [
"fastapi[all]>=0.115.6",
"mlx-whisper>=0.4.3 ; sys_platform == 'darwin'",
"numpy>=2.3.3",
"openai-whisper>=20250625",
"pyaudio>=0.2.14",
"pydantic>=2.12.0",
@@ -33,6 +34,7 @@ integration-test = [
"soundfile>=0.13.1",
]
test = [
"numpy>=2.3.3",
"pytest>=8.4.2",
"pytest-asyncio>=1.2.0",
"pytest-cov>=7.0.0",

View File

@@ -59,8 +59,10 @@ class Streaming(CyclicBehaviour):
async def run(self) -> None:
data = await self.audio_in_poller.poll()
if data is None:
logger.debug("No audio data received. Discarding buffer until new data arrives.")
self.audio_buffer = np.array([], dtype=np.float32)
if len(self.audio_buffer) > 0:
logger.debug("No audio data received. Discarding buffer until new data arrives.")
self.audio_buffer = np.array([], dtype=np.float32)
self.i_since_speech = 100
return
# copy otherwise Torch will be sad that it's immutable

View File

@@ -18,6 +18,9 @@ def audio_out_socket():
@pytest.fixture
def streaming(audio_in_socket, audio_out_socket):
import torch
torch.hub.load.return_value = (..., ...) # Mock
return Streaming(audio_in_socket, audio_out_socket)

View File

@@ -33,3 +33,13 @@ def pytest_configure(config):
mock_config_module.settings = MagicMock()
sys.modules["control_backend.core.config"] = mock_config_module
# --- Mock torch and zmq for VAD ---
mock_torch = MagicMock()
mock_zmq = MagicMock()
mock_zmq.asyncio = mock_zmq
# In individual tests, these can be imported and the return values changed
sys.modules["torch"] = mock_torch
sys.modules["zmq"] = mock_zmq
sys.modules["zmq.asyncio"] = mock_zmq.asyncio

4
uv.lock generated
View File

@@ -1332,6 +1332,7 @@ source = { virtual = "." }
dependencies = [
{ name = "fastapi", extra = ["all"] },
{ name = "mlx-whisper", marker = "sys_platform == 'darwin'" },
{ name = "numpy" },
{ name = "openai-whisper" },
{ name = "pyaudio" },
{ name = "pydantic" },
@@ -1358,6 +1359,7 @@ integration-test = [
{ name = "soundfile" },
]
test = [
{ name = "numpy" },
{ name = "pytest" },
{ name = "pytest-asyncio" },
{ name = "pytest-cov" },
@@ -1368,6 +1370,7 @@ test = [
requires-dist = [
{ name = "fastapi", extras = ["all"], specifier = ">=0.115.6" },
{ name = "mlx-whisper", marker = "sys_platform == 'darwin'", specifier = ">=0.4.3" },
{ name = "numpy", specifier = ">=2.3.3" },
{ name = "openai-whisper", specifier = ">=20250625" },
{ name = "pyaudio", specifier = ">=0.2.14" },
{ name = "pydantic", specifier = ">=2.12.0" },
@@ -1392,6 +1395,7 @@ dev = [
]
integration-test = [{ name = "soundfile", specifier = ">=0.13.1" }]
test = [
{ name = "numpy", specifier = ">=2.3.3" },
{ name = "pytest", specifier = ">=8.4.2" },
{ name = "pytest-asyncio", specifier = ">=1.2.0" },
{ name = "pytest-cov", specifier = ">=7.0.0" },