40 lines
1.3 KiB
Python
40 lines
1.3 KiB
Python
import numpy as np
|
|
|
|
from control_backend.agents.transcription.speech_recognizer import (
|
|
OpenAIWhisperSpeechRecognizer,
|
|
SpeechRecognizer,
|
|
)
|
|
|
|
|
|
def test_estimate_max_tokens():
|
|
"""Inputting one minute of audio, assuming 450 words per minute and adding a 10 token padding,
|
|
expecting 610 tokens."""
|
|
audio = np.empty(shape=(60 * 16_000), dtype=np.float32)
|
|
|
|
actual = SpeechRecognizer._estimate_max_tokens(audio)
|
|
|
|
assert actual == 610
|
|
assert isinstance(actual, int)
|
|
|
|
|
|
def test_get_decode_options():
|
|
"""Check whether the right decode options are given under different scenarios."""
|
|
audio = np.empty(shape=(60 * 16_000), dtype=np.float32)
|
|
|
|
# With the defaults, it should limit output length based on input size
|
|
recognizer = OpenAIWhisperSpeechRecognizer()
|
|
options = recognizer._get_decode_options(audio)
|
|
|
|
assert "sample_len" in options
|
|
assert isinstance(options["sample_len"], int)
|
|
|
|
# When explicitly enabled, it should limit output length based on input size
|
|
recognizer = OpenAIWhisperSpeechRecognizer(limit_output_length=True)
|
|
options = recognizer._get_decode_options(audio)
|
|
|
|
assert "sample_len" in options
|
|
assert isinstance(options["sample_len"], int)
|
|
|
|
# When disabled, it should not limit output length based on input size
|
|
assert "sample_rate" not in options
|