import numpy as np from control_backend.agents.transcription import SpeechRecognizer from control_backend.agents.transcription.speech_recognizer import OpenAIWhisperSpeechRecognizer def test_estimate_max_tokens(): """Inputting one minute of audio, assuming 300 words per minute, expecting 400 tokens.""" audio = np.empty(shape=(60*16_000), dtype=np.float32) actual = SpeechRecognizer._estimate_max_tokens(audio) assert actual == 400 assert isinstance(actual, int) def test_get_decode_options(): """Check whether the right decode options are given under different scenarios.""" audio = np.empty(shape=(60*16_000), dtype=np.float32) # With the defaults, it should limit output length based on input size recognizer = OpenAIWhisperSpeechRecognizer() options = recognizer._get_decode_options(audio) assert "sample_len" in options assert isinstance(options["sample_len"], int) # When explicitly enabled, it should limit output length based on input size recognizer = OpenAIWhisperSpeechRecognizer(limit_output_length=True) options = recognizer._get_decode_options(audio) assert "sample_len" in options assert isinstance(options["sample_len"], int) # When disabled, it should not limit output length based on input size assert "sample_rate" not in options