Merge remote-tracking branch 'origin/dev' into refactor/config-file

# Conflicts: # src/control_backend/agents/ri_communication_agent.py # src/control_backend/core/config.py # src/control_backend/main.py
2025-11-19 17:30:48 +01:00
parent 93b8db03e7 9963134262
commit 64db25c974
46 changed files with 1207 additions and 651 deletions
--- a/test/unit/agents/perception/transcription_agent/test_speech_recognizer.py
+++ b/test/unit/agents/perception/transcription_agent/test_speech_recognizer.py
@@ -0,0 +1,58 @@
+import numpy as np
+import pytest
+
+from control_backend.agents.perception.transcription_agent.speech_recognizer import (
+    OpenAIWhisperSpeechRecognizer,
+    SpeechRecognizer,
+)
+
+
+@pytest.fixture(autouse=True)
+def patch_sr_settings(monkeypatch):
+    # Patch the *module-local* settings that SpeechRecognizer imported
+    from control_backend.agents.perception.transcription_agent import speech_recognizer as sr
+
+    # Provide real numbers for everything _estimate_max_tokens() reads
+    monkeypatch.setattr(sr.settings.vad_settings, "sample_rate_hz", 16_000, raising=False)
+    monkeypatch.setattr(
+        sr.settings.behaviour_settings, "transcription_words_per_minute", 450, raising=False
+    )
+    monkeypatch.setattr(
+        sr.settings.behaviour_settings, "transcription_words_per_token", 0.75, raising=False
+    )
+    monkeypatch.setattr(
+        sr.settings.behaviour_settings, "transcription_token_buffer", 10, raising=False
+    )
+
+
+def test_estimate_max_tokens():
+    """Inputting one minute of audio, assuming 450 words per minute and adding a 10 token padding,
+    expecting 610 tokens."""
+    audio = np.empty(shape=(60 * 16_000), dtype=np.float32)
+
+    actual = SpeechRecognizer._estimate_max_tokens(audio)
+
+    assert actual == 610
+    assert isinstance(actual, int)
+
+
+def test_get_decode_options():
+    """Check whether the right decode options are given under different scenarios."""
+    audio = np.empty(shape=(60 * 16_000), dtype=np.float32)
+
+    # With the defaults, it should limit output length based on input size
+    recognizer = OpenAIWhisperSpeechRecognizer()
+    options = recognizer._get_decode_options(audio)
+
+    assert "sample_len" in options
+    assert isinstance(options["sample_len"], int)
+
+    # When explicitly enabled, it should limit output length based on input size
+    recognizer = OpenAIWhisperSpeechRecognizer(limit_output_length=True)
+    options = recognizer._get_decode_options(audio)
+
+    assert "sample_len" in options
+    assert isinstance(options["sample_len"], int)
+
+    # When disabled, it should not limit output length based on input size
+    assert "sample_rate" not in options