Merge branch 'dev' into refactor/logging

This commit is contained in:
2025-11-04 20:54:03 +01:00
22 changed files with 359 additions and 136 deletions

View File

@@ -75,7 +75,8 @@ class MLXWhisperSpeechRecognizer(SpeechRecognizer):
self.model_name = "mlx-community/whisper-small.en-mlx"
def load_model(self):
if self.was_loaded: return
if self.was_loaded:
return
# There appears to be no dedicated mechanism to preload a model, but this `get_model` does
# store it in memory for later usage
ModelHolder.get_model(self.model_name, mx.float16)
@@ -83,9 +84,9 @@ class MLXWhisperSpeechRecognizer(SpeechRecognizer):
def recognize_speech(self, audio: np.ndarray) -> str:
self.load_model()
return mlx_whisper.transcribe(audio,
path_or_hf_repo=self.model_name,
decode_options=self._get_decode_options(audio))["text"]
return mlx_whisper.transcribe(
audio, path_or_hf_repo=self.model_name, decode_options=self._get_decode_options(audio)
)["text"]
return mlx_whisper.transcribe(audio, path_or_hf_repo=self.model_name)["text"].strip()
@@ -95,12 +96,13 @@ class OpenAIWhisperSpeechRecognizer(SpeechRecognizer):
self.model = None
def load_model(self):
if self.model is not None: return
if self.model is not None:
return
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
self.model = whisper.load_model("small.en", device=device)
def recognize_speech(self, audio: np.ndarray) -> str:
self.load_model()
return whisper.transcribe(self.model,
audio,
decode_options=self._get_decode_options(audio))["text"]
return whisper.transcribe(
self.model, audio, decode_options=self._get_decode_options(audio)
)["text"]