feat: prepend more silence before speech audio for better transcription beginnings
ref: N25B-429
This commit is contained in:
@@ -73,6 +73,7 @@ class BehaviourSettings(BaseModel):
|
||||
:ivar vad_prob_threshold: Probability threshold for Voice Activity Detection.
|
||||
:ivar vad_initial_since_speech: Initial value for 'since speech' counter in VAD.
|
||||
:ivar vad_non_speech_patience_chunks: Number of non-speech chunks to wait before speech ended.
|
||||
:ivar vad_begin_silence_chunks: The number of chunks of silence to prepend to speech chunks.
|
||||
:ivar transcription_max_concurrent_tasks: Maximum number of concurrent transcription tasks.
|
||||
:ivar transcription_words_per_minute: Estimated words per minute for transcription timing.
|
||||
:ivar transcription_words_per_token: Estimated words per token for transcription timing.
|
||||
@@ -90,6 +91,7 @@ class BehaviourSettings(BaseModel):
|
||||
vad_prob_threshold: float = 0.5
|
||||
vad_initial_since_speech: int = 100
|
||||
vad_non_speech_patience_chunks: int = 15
|
||||
vad_begin_silence_chunks: int = 3
|
||||
|
||||
# transcription behaviour
|
||||
transcription_max_concurrent_tasks: int = 3
|
||||
|
||||
Reference in New Issue
Block a user