Files
pepperplus-cb/test/unit/agents/transcription/test_speech_recognizer.py
Twirre Meulenbelt bec3e57658 feat: limit transcription output length based on input
Using heuristics. Also adds documentation and initial unit tests.

ref: N25B-209
2025-10-29 12:49:24 +01:00

37 lines
1.3 KiB
Python

import numpy as np
from control_backend.agents.transcription import SpeechRecognizer
from control_backend.agents.transcription.speech_recognizer import OpenAIWhisperSpeechRecognizer
def test_estimate_max_tokens():
"""Inputting one minute of audio, assuming 300 words per minute, expecting 400 tokens."""
audio = np.empty(shape=(60*16_000), dtype=np.float32)
actual = SpeechRecognizer._estimate_max_tokens(audio)
assert actual == 400
assert isinstance(actual, int)
def test_get_decode_options():
"""Check whether the right decode options are given under different scenarios."""
audio = np.empty(shape=(60*16_000), dtype=np.float32)
# With the defaults, it should limit output length based on input size
recognizer = OpenAIWhisperSpeechRecognizer()
options = recognizer._get_decode_options(audio)
assert "sample_len" in options
assert isinstance(options["sample_len"], int)
# When explicitly enabled, it should limit output length based on input size
recognizer = OpenAIWhisperSpeechRecognizer(limit_output_length=True)
options = recognizer._get_decode_options(audio)
assert "sample_len" in options
assert isinstance(options["sample_len"], int)
# When disabled, it should not limit output length based on input size
assert "sample_rate" not in options