Merge remote-tracking branch 'origin/dev' into refactor/config-file
# Conflicts: # src/control_backend/agents/ri_communication_agent.py # src/control_backend/core/config.py # src/control_backend/main.py
This commit is contained in:
@@ -0,0 +1,58 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from control_backend.agents.perception.transcription_agent.speech_recognizer import (
|
||||
OpenAIWhisperSpeechRecognizer,
|
||||
SpeechRecognizer,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def patch_sr_settings(monkeypatch):
|
||||
# Patch the *module-local* settings that SpeechRecognizer imported
|
||||
from control_backend.agents.perception.transcription_agent import speech_recognizer as sr
|
||||
|
||||
# Provide real numbers for everything _estimate_max_tokens() reads
|
||||
monkeypatch.setattr(sr.settings.vad_settings, "sample_rate_hz", 16_000, raising=False)
|
||||
monkeypatch.setattr(
|
||||
sr.settings.behaviour_settings, "transcription_words_per_minute", 450, raising=False
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
sr.settings.behaviour_settings, "transcription_words_per_token", 0.75, raising=False
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
sr.settings.behaviour_settings, "transcription_token_buffer", 10, raising=False
|
||||
)
|
||||
|
||||
|
||||
def test_estimate_max_tokens():
|
||||
"""Inputting one minute of audio, assuming 450 words per minute and adding a 10 token padding,
|
||||
expecting 610 tokens."""
|
||||
audio = np.empty(shape=(60 * 16_000), dtype=np.float32)
|
||||
|
||||
actual = SpeechRecognizer._estimate_max_tokens(audio)
|
||||
|
||||
assert actual == 610
|
||||
assert isinstance(actual, int)
|
||||
|
||||
|
||||
def test_get_decode_options():
|
||||
"""Check whether the right decode options are given under different scenarios."""
|
||||
audio = np.empty(shape=(60 * 16_000), dtype=np.float32)
|
||||
|
||||
# With the defaults, it should limit output length based on input size
|
||||
recognizer = OpenAIWhisperSpeechRecognizer()
|
||||
options = recognizer._get_decode_options(audio)
|
||||
|
||||
assert "sample_len" in options
|
||||
assert isinstance(options["sample_len"], int)
|
||||
|
||||
# When explicitly enabled, it should limit output length based on input size
|
||||
recognizer = OpenAIWhisperSpeechRecognizer(limit_output_length=True)
|
||||
options = recognizer._get_decode_options(audio)
|
||||
|
||||
assert "sample_len" in options
|
||||
assert isinstance(options["sample_len"], int)
|
||||
|
||||
# When disabled, it should not limit output length based on input size
|
||||
assert "sample_rate" not in options
|
||||
Reference in New Issue
Block a user