feat: prepend more silence before speech audio for better transcription beginnings

ref: N25B-429
2026-01-08 10:49:13 +01:00
parent 5a61225c6f
commit 45719c580b
3 changed files with 10 additions and 6 deletions
--- a/.env.example
+++ b/.env.example
@@ -10,7 +10,7 @@ LLM_SETTINGS__LOCAL_LLM_URL="http://localhost:1234/v1/chat/completions"
 LLM_SETTINGS__LOCAL_LLM_MODEL="gpt-oss"

 # Number of non-speech chunks to wait before speech ended. A chunk is approximately 31 ms. Increasing this number allows longer pauses in speech, but also increases response time.
-BEHAVIOUR_SETTINGS__VAD_NON_SPEECH_PATIENCE_CHUNKS=3
+BEHAVIOUR_SETTINGS__VAD_NON_SPEECH_PATIENCE_CHUNKS=15

 # Timeout in milliseconds for socket polling. Increase this number if network latency/jitter is high, often the case when using Wi-Fi. Perhaps 500 ms. A symptom of this issue is transcriptions getting cut off.
 BEHAVIOUR_SETTINGS__SOCKET_POLLER_TIMEOUT_MS=100