From 9e926178da32c423fd12b2118698ec52c7714305 Mon Sep 17 00:00:00 2001
From: Pim Hutting
Date: Wed, 5 Nov 2025 13:43:57 +0100
Subject: [PATCH 1/9] refactor: remove constants and put in config file
removed all constants from all files and put them in src/control_backend/core/config.py
also removed some old mock agents that we don't use anymore
ref: N25B-236
---
et --hard f8dee6d | 41 ++++++++++++++++
.../agents/bdi/behaviours/belief_setter.py | 3 +-
.../behaviours/receive_llm_resp_behaviour.py | 3 +-
.../bdi/behaviours/text_belief_extractor.py | 3 +-
.../behaviours/continuous_collect.py | 3 +-
src/control_backend/agents/llm/llm.py | 6 ++-
.../agents/mock_agents/__init__.py | 0
.../agents/mock_agents/belief_text_mock.py | 44 -----------------
.../agents/ri_command_agent.py | 4 +-
.../agents/ri_communication_agent.py | 8 ++--
.../agents/transcription/speech_recognizer.py | 14 ++++--
.../transcription/transcription_agent.py | 3 +-
src/control_backend/agents/vad_agent.py | 26 ++++++----
src/control_backend/core/config.py | 47 ++++++++++++++++++-
src/control_backend/main.py | 4 +-
15 files changed, 136 insertions(+), 73 deletions(-)
create mode 100644 et --hard f8dee6d
delete mode 100644 src/control_backend/agents/mock_agents/__init__.py
delete mode 100644 src/control_backend/agents/mock_agents/belief_text_mock.py
diff --git a/et --hard f8dee6d b/et --hard f8dee6d
new file mode 100644
index 0000000..663bfc7
--- /dev/null
+++ b/et --hard f8dee6d
@@ -0,0 +1,41 @@
+[33mbcbfc26[m[33m ([m[1;36mHEAD -> [m[1;32mfeat/belief-collector[m[33m, [m[1;31morigin/feat/add-end-of-utterance-detection[m[33m)[m HEAD@{0}: reset: moving to ORIG_HEAD
+[33me48096f[m HEAD@{1}: checkout: moving from feat/add-end-of-utterance-detection to feat/belief-collector
+[33mab94c2e[m[33m ([m[1;32mfeat/add-end-of-utterance-detection[m[33m)[m HEAD@{2}: commit (merge): Merge remote-tracking branch 'origin/dev' into feat/add-end-of-utterance-detection
+[33mbcbfc26[m[33m ([m[1;36mHEAD -> [m[1;32mfeat/belief-collector[m[33m, [m[1;31morigin/feat/add-end-of-utterance-detection[m[33m)[m HEAD@{3}: checkout: moving from feat/belief-collector to feat/add-end-of-utterance-detection
+[33me48096f[m HEAD@{4}: checkout: moving from feat/add-end-of-utterance-detection to feat/belief-collector
+[33mbcbfc26[m[33m ([m[1;36mHEAD -> [m[1;32mfeat/belief-collector[m[33m, [m[1;31morigin/feat/add-end-of-utterance-detection[m[33m)[m HEAD@{5}: checkout: moving from feat/belief-collector to feat/add-end-of-utterance-detection
+[33me48096f[m HEAD@{6}: reset: moving to HEAD
+[33me48096f[m HEAD@{7}: commit (merge): Merge remote-tracking branch 'origin/dev' into feat/belief-collector
+[33mf8dee6d[m[33m ([m[1;31morigin/feat/belief-collector[m[33m)[m HEAD@{8}: commit: test: added tests
+[33m2efce93[m HEAD@{9}: checkout: moving from dev to feat/belief-collector
+[33me36f5fc[m[33m ([m[1;31morigin/dev[m[33m, [m[1;32mdev[m[33m)[m HEAD@{10}: pull: Fast-forward
+[33m9b36982[m HEAD@{11}: checkout: moving from feat/belief-collector to dev
+[33m2efce93[m HEAD@{12}: checkout: moving from feat/vad-agent to feat/belief-collector
+[33mf73f510[m[33m ([m[1;31morigin/feat/vad-agent[m[33m, [m[1;32mfeat/vad-agent[m[33m)[m HEAD@{13}: checkout: moving from feat/vad-agent to feat/vad-agent
+[33mf73f510[m[33m ([m[1;31morigin/feat/vad-agent[m[33m, [m[1;32mfeat/vad-agent[m[33m)[m HEAD@{14}: pull: Fast-forward
+[33mfd1face[m HEAD@{15}: checkout: moving from feat/belief-collector to feat/vad-agent
+[33m2efce93[m HEAD@{16}: reset: moving to HEAD
+[33m2efce93[m HEAD@{17}: commit: fix: made beliefs a dict of lists
+[33m1f34b14[m HEAD@{18}: commit: Feat: Implement belief collector
+[33m9b36982[m HEAD@{19}: checkout: moving from style/fix-style to feat/belief-collector
+[33m65cfdda[m[33m ([m[1;31morigin/style/fix-style[m[33m, [m[1;32mstyle/fix-style[m[33m)[m HEAD@{20}: checkout: moving from feat/belief-collector to style/fix-style
+[33m9b36982[m HEAD@{21}: reset: moving to HEAD
+[33m9b36982[m HEAD@{22}: checkout: moving from dev to feat/belief-collector
+[33m9b36982[m HEAD@{23}: checkout: moving from feat/belief-collector to dev
+[33m9b36982[m HEAD@{24}: reset: moving to HEAD
+[33m9b36982[m HEAD@{25}: checkout: moving from feat/belief-from-text to feat/belief-collector
+[33mbece44b[m[33m ([m[1;32mfeat/belief-from-text[m[33m)[m HEAD@{26}: checkout: moving from feat/belief-collector to feat/belief-from-text
+[33m9b36982[m HEAD@{27}: reset: moving to HEAD
+[33m9b36982[m HEAD@{28}: checkout: moving from dev to feat/belief-collector
+[33m9b36982[m HEAD@{29}: pull: Fast-forward
+[33m71ddb50[m HEAD@{30}: checkout: moving from feat/add-end-of-utterance-detection to dev
+[33mbcbfc26[m[33m ([m[1;36mHEAD -> [m[1;32mfeat/belief-collector[m[33m, [m[1;31morigin/feat/add-end-of-utterance-detection[m[33m)[m HEAD@{31}: commit: feat: prototype end-of-utterance scorer over text input
+[33m379e04a[m[33m ([m[1;31morigin/feat/add-speech-recognition[m[33m)[m HEAD@{32}: checkout: moving from feat/add-end-of-utterance-detection to feat/add-end-of-utterance-detection
+[33m379e04a[m[33m ([m[1;31morigin/feat/add-speech-recognition[m[33m)[m HEAD@{33}: rebase (abort): updating HEAD
+[33m71ddb50[m HEAD@{34}: rebase (start): checkout dev
+[33m379e04a[m[33m ([m[1;31morigin/feat/add-speech-recognition[m[33m)[m HEAD@{35}: checkout: moving from dev to feat/add-end-of-utterance-detection
+[33m71ddb50[m HEAD@{36}: checkout: moving from feat/add-end-of-utterance-detection to dev
+[33m379e04a[m[33m ([m[1;31morigin/feat/add-speech-recognition[m[33m)[m HEAD@{37}: checkout: moving from feat/add-end-of-utterance-detection to feat/add-end-of-utterance-detection
+[33m379e04a[m[33m ([m[1;31morigin/feat/add-speech-recognition[m[33m)[m HEAD@{38}: checkout: moving from feat/add-end-of-utterance-detection to feat/add-end-of-utterance-detection
+[33m379e04a[m[33m ([m[1;31morigin/feat/add-speech-recognition[m[33m)[m HEAD@{39}: checkout: moving from main to feat/add-end-of-utterance-detection
+[33m54b22d8[m[33m ([m[1;31morigin/main[m[33m, [m[1;31morigin/HEAD[m[33m, [m[1;32mmain[m[33m)[m HEAD@{40}: clone: from git.science.uu.nl:ics/sp/2025/n25b/pepperplus-cb.git
diff --git a/src/control_backend/agents/bdi/behaviours/belief_setter.py b/src/control_backend/agents/bdi/behaviours/belief_setter.py
index 2f64036..69950b6 100644
--- a/src/control_backend/agents/bdi/behaviours/belief_setter.py
+++ b/src/control_backend/agents/bdi/behaviours/belief_setter.py
@@ -18,7 +18,8 @@ class BeliefSetterBehaviour(CyclicBehaviour):
logger = logging.getLogger("BDI/Belief Setter")
async def run(self):
- msg = await self.receive(timeout=0.1)
+ t = settings.behaviour_settings.default_rcv_timeout
+ msg = await self.receive(timeout=t)
if msg:
self.logger.info(f"Received message {msg.body}")
self._process_message(msg)
diff --git a/src/control_backend/agents/bdi/behaviours/receive_llm_resp_behaviour.py b/src/control_backend/agents/bdi/behaviours/receive_llm_resp_behaviour.py
index dc6e862..0d4788e 100644
--- a/src/control_backend/agents/bdi/behaviours/receive_llm_resp_behaviour.py
+++ b/src/control_backend/agents/bdi/behaviours/receive_llm_resp_behaviour.py
@@ -13,7 +13,8 @@ class ReceiveLLMResponseBehaviour(CyclicBehaviour):
logger = logging.getLogger("BDI/LLM Reciever")
async def run(self):
- msg = await self.receive(timeout=2)
+ t = settings.llm_settings.llm_response_rcv_timeout
+ msg = await self.receive(timeout=t)
if not msg:
return
diff --git a/src/control_backend/agents/bdi/behaviours/text_belief_extractor.py b/src/control_backend/agents/bdi/behaviours/text_belief_extractor.py
index ed06463..9f10f1c 100644
--- a/src/control_backend/agents/bdi/behaviours/text_belief_extractor.py
+++ b/src/control_backend/agents/bdi/behaviours/text_belief_extractor.py
@@ -39,7 +39,8 @@ class BeliefFromText(CyclicBehaviour):
beliefs = {"mood": ["X"], "car": ["Y"]}
async def run(self):
- msg = await self.receive(timeout=0.1)
+ t = settings.behaviour_settings.default_rcv_timeout
+ msg = await self.receive(timeout=t)
if msg:
sender = msg.sender.node
match sender:
diff --git a/src/control_backend/agents/belief_collector/behaviours/continuous_collect.py b/src/control_backend/agents/belief_collector/behaviours/continuous_collect.py
index eb3ee5d..fb0a5af 100644
--- a/src/control_backend/agents/belief_collector/behaviours/continuous_collect.py
+++ b/src/control_backend/agents/belief_collector/behaviours/continuous_collect.py
@@ -16,7 +16,8 @@ class ContinuousBeliefCollector(CyclicBehaviour):
"""
async def run(self):
- msg = await self.receive(timeout=0.1) # Wait for 0.1s
+ t = settings.behaviour_settings.default_rcv_timeout
+ msg = await self.receive(timeout=t)
if msg:
await self._process_message(msg)
diff --git a/src/control_backend/agents/llm/llm.py b/src/control_backend/agents/llm/llm.py
index c3c17ab..6944180 100644
--- a/src/control_backend/agents/llm/llm.py
+++ b/src/control_backend/agents/llm/llm.py
@@ -35,7 +35,8 @@ class LLMAgent(Agent):
Receives SPADE messages and processes only those originating from the
configured BDI agent.
"""
- msg = await self.receive(timeout=1)
+ t = settings.behaviour_settings.llm_response_rcv_timeout
+ msg = await self.receive(timeout=t)
if not msg:
return
@@ -78,7 +79,8 @@ class LLMAgent(Agent):
:param prompt: Input text prompt to pass to the LLM.
:return: LLM-generated content or fallback message.
"""
- async with httpx.AsyncClient(timeout=120.0) as client:
+ t = settings.llm_settings.request_timeout_s
+ async with httpx.AsyncClient(timeout=t) as client:
# Example dynamic content for future (optional)
instructions = LLMInstructions()
diff --git a/src/control_backend/agents/mock_agents/__init__.py b/src/control_backend/agents/mock_agents/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/src/control_backend/agents/mock_agents/belief_text_mock.py b/src/control_backend/agents/mock_agents/belief_text_mock.py
deleted file mode 100644
index 27c5e49..0000000
--- a/src/control_backend/agents/mock_agents/belief_text_mock.py
+++ /dev/null
@@ -1,44 +0,0 @@
-import json
-
-from spade.agent import Agent
-from spade.behaviour import OneShotBehaviour
-from spade.message import Message
-
-from control_backend.core.config import settings
-
-
-class BeliefTextAgent(Agent):
- class SendOnceBehaviourBlfText(OneShotBehaviour):
- async def run(self):
- to_jid = (
- settings.agent_settings.belief_collector_agent_name
- + "@"
- + settings.agent_settings.host
- )
-
- # Send multiple beliefs in one JSON payload
- payload = {
- "type": "belief_extraction_text",
- "beliefs": {
- "user_said": [
- "hello test",
- "Can you help me?",
- "stop talking to me",
- "No",
- "Pepper do a dance",
- ]
- },
- }
-
- msg = Message(to=to_jid)
- msg.body = json.dumps(payload)
- await self.send(msg)
- print(f"Beliefs sent to {to_jid}!")
-
- self.exit_code = "Job Finished!"
- await self.agent.stop()
-
- async def setup(self):
- print("BeliefTextAgent started")
- self.b = self.SendOnceBehaviourBlfText()
- self.add_behaviour(self.b)
diff --git a/src/control_backend/agents/ri_command_agent.py b/src/control_backend/agents/ri_command_agent.py
index 51b8064..fc238f5 100644
--- a/src/control_backend/agents/ri_command_agent.py
+++ b/src/control_backend/agents/ri_command_agent.py
@@ -22,9 +22,9 @@ class RICommandAgent(Agent):
self,
jid: str,
password: str,
- port: int = 5222,
+ port: int = settings.agent_settings.default_spade_port,
verify_security: bool = False,
- address="tcp://localhost:0000",
+ address=settings.zmq_settings.ri_command_address,
bind=False,
):
super().__init__(jid, password, port, verify_security)
diff --git a/src/control_backend/agents/ri_communication_agent.py b/src/control_backend/agents/ri_communication_agent.py
index 8d56b09..c2340a6 100644
--- a/src/control_backend/agents/ri_communication_agent.py
+++ b/src/control_backend/agents/ri_communication_agent.py
@@ -21,9 +21,9 @@ class RICommunicationAgent(Agent):
self,
jid: str,
password: str,
- port: int = 5222,
+ port: int = settings.agent_settings.default_spade_port,
verify_security: bool = False,
- address="tcp://localhost:0000",
+ address=settings.zmq_settings.ri_command_address,
bind=False,
):
super().__init__(jid, password, port, verify_security)
@@ -58,13 +58,13 @@ class RICommunicationAgent(Agent):
# See what endpoint we received
match message["endpoint"]:
case "ping":
- await asyncio.sleep(1)
+ await asyncio.sleep(settings.agent_settings.behaviour_settings.ping_sleep_s)
case _:
logger.info(
"Received message with topic different than ping, while ping expected."
)
- async def setup(self, max_retries: int = 5):
+ async def setup(self, max_retries: int = settings.behaviour_settings.comm_setup_max_retries):
"""
Try to setup the communication agent, we have 5 retries in case we dont have a response yet.
"""
diff --git a/src/control_backend/agents/transcription/speech_recognizer.py b/src/control_backend/agents/transcription/speech_recognizer.py
index 19d82ff..40d9215 100644
--- a/src/control_backend/agents/transcription/speech_recognizer.py
+++ b/src/control_backend/agents/transcription/speech_recognizer.py
@@ -10,6 +10,8 @@ import numpy as np
import torch
import whisper
+from control_backend.core.config import settings
+
class SpeechRecognizer(abc.ABC):
def __init__(self, limit_output_length=True):
@@ -41,10 +43,10 @@ class SpeechRecognizer(abc.ABC):
:param audio: The audio sample (16 kHz) to use for length estimation.
:return: The estimated length of the transcribed audio in tokens.
"""
- length_seconds = len(audio) / 16_000
+ length_seconds = len(audio) / settings.vad_settings.sample_rate_hz
length_minutes = length_seconds / 60
- word_count = length_minutes * 300
- token_count = word_count / 3 * 4
+ word_count = length_minutes * settings.behaviour_settings.transcription_words_per_minute
+ token_count = word_count / settings.behaviour_settings.transcription_words_per_token
return int(token_count)
def _get_decode_options(self, audio: np.ndarray) -> dict:
@@ -72,7 +74,7 @@ class MLXWhisperSpeechRecognizer(SpeechRecognizer):
def __init__(self, limit_output_length=True):
super().__init__(limit_output_length)
self.was_loaded = False
- self.model_name = "mlx-community/whisper-small.en-mlx"
+ self.model_name = settings.speech_model_settings.mlx_model_name
def load_model(self):
if self.was_loaded:
@@ -99,7 +101,9 @@ class OpenAIWhisperSpeechRecognizer(SpeechRecognizer):
if self.model is not None:
return
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
- self.model = whisper.load_model("small.en", device=device)
+ self.model = whisper.load_model(
+ settings.speech_model_settings.openai_model_name, device=device
+ )
def recognize_speech(self, audio: np.ndarray) -> str:
self.load_model()
diff --git a/src/control_backend/agents/transcription/transcription_agent.py b/src/control_backend/agents/transcription/transcription_agent.py
index 2d936c4..52c0056 100644
--- a/src/control_backend/agents/transcription/transcription_agent.py
+++ b/src/control_backend/agents/transcription/transcription_agent.py
@@ -31,9 +31,10 @@ class TranscriptionAgent(Agent):
class Transcribing(CyclicBehaviour):
def __init__(self, audio_in_socket: azmq.Socket):
super().__init__()
+ max_concurrent_tasks = settings.transcription_settings.max_concurrent_transcriptions
self.audio_in_socket = audio_in_socket
self.speech_recognizer = SpeechRecognizer.best_type()
- self._concurrency = asyncio.Semaphore(3)
+ self._concurrency = asyncio.Semaphore(max_concurrent_tasks)
def warmup(self):
"""Load the transcription model into memory to speed up the first transcription."""
diff --git a/src/control_backend/agents/vad_agent.py b/src/control_backend/agents/vad_agent.py
index a228135..42c26ef 100644
--- a/src/control_backend/agents/vad_agent.py
+++ b/src/control_backend/agents/vad_agent.py
@@ -20,7 +20,11 @@ class SocketPoller[T]:
multiple usages.
"""
- def __init__(self, socket: azmq.Socket, timeout_ms: int = 100):
+ def __init__(
+ self,
+ socket: azmq.Socket,
+ timeout_ms: int = settings.behaviour_settings.socket_poller_timeout_ms,
+ ):
"""
:param socket: The socket to poll and get data from.
:param timeout_ms: A timeout in milliseconds to wait for data.
@@ -49,12 +53,16 @@ class Streaming(CyclicBehaviour):
super().__init__()
self.audio_in_poller = SocketPoller[bytes](audio_in_socket)
self.model, _ = torch.hub.load(
- repo_or_dir="snakers4/silero-vad", model="silero_vad", force_reload=False
+ repo_or_dir=settings.vad_settings.repo_or_dir,
+ model=settings.vad_settings.model_name,
+ force_reload=False,
)
self.audio_out_socket = audio_out_socket
self.audio_buffer = np.array([], dtype=np.float32)
- self.i_since_speech = 100 # Used to allow small pauses in speech
+ self.i_since_speech = (
+ settings.behaviour_settings.vad_initial_since_speech
+ ) # Used to allow small pauses in speech
async def run(self) -> None:
data = await self.audio_in_poller.poll()
@@ -62,15 +70,17 @@ class Streaming(CyclicBehaviour):
if len(self.audio_buffer) > 0:
logger.debug("No audio data received. Discarding buffer until new data arrives.")
self.audio_buffer = np.array([], dtype=np.float32)
- self.i_since_speech = 100
+ self.i_since_speech = settings.behaviour_settings.vad_initial_since_speech
return
# copy otherwise Torch will be sad that it's immutable
chunk = np.frombuffer(data, dtype=np.float32).copy()
- prob = self.model(torch.from_numpy(chunk), 16000).item()
+ prob = self.model(torch.from_numpy(chunk), settings.vad_settings.sample_rate_hz).item()
+ non_speech_patience = settings.behaviour_settings.vad_non_speech_patience_chunks
+ prob_threshold = settings.behaviour_settings.vad_prob_threshold
- if prob > 0.5:
- if self.i_since_speech > 3:
+ if prob > prob_threshold:
+ if self.i_since_speech > non_speech_patience:
logger.debug("Speech started.")
self.audio_buffer = np.append(self.audio_buffer, chunk)
self.i_since_speech = 0
@@ -78,7 +88,7 @@ class Streaming(CyclicBehaviour):
self.i_since_speech += 1
# prob < 0.5, so speech maybe ended. Wait a bit more before to be more certain
- if self.i_since_speech <= 3:
+ if self.i_since_speech <= non_speech_patience:
self.audio_buffer = np.append(self.audio_buffer, chunk)
return
diff --git a/src/control_backend/core/config.py b/src/control_backend/core/config.py
index 2fd16b8..826d972 100644
--- a/src/control_backend/core/config.py
+++ b/src/control_backend/core/config.py
@@ -4,10 +4,16 @@ from pydantic_settings import BaseSettings, SettingsConfigDict
class ZMQSettings(BaseModel):
internal_comm_address: str = "tcp://localhost:5560"
+ ri_command_address: str = "tcp://localhost:0000"
+ ri_communication_address: str = "tcp://*:5555"
+ vad_agent_address: str = "tcp://localhost:5558"
class AgentSettings(BaseModel):
+ # connection settings
host: str = "localhost"
+
+ # agent names
bdi_core_agent_name: str = "bdi_core"
belief_collector_agent_name: str = "belief_collector"
text_belief_extractor_agent_name: str = "text_belief_extractor"
@@ -15,14 +21,47 @@ class AgentSettings(BaseModel):
llm_agent_name: str = "llm_agent"
test_agent_name: str = "test_agent"
transcription_agent_name: str = "transcription_agent"
-
ri_communication_agent_name: str = "ri_communication_agent"
ri_command_agent_name: str = "ri_command_agent"
+ # default SPADE port
+ default_spade_port: int = 5222
+
+
+class BehaviourSettings(BaseModel):
+ default_rcv_timeout: float = 0.1
+ llm_response_rcv_timeout: float = 1.0
+ ping_sleep_s: float = 1.0
+ comm_setup_max_retries: int = 5
+ socket_poller_timeout_ms: int = 100
+
+ # VAD settings
+ vad_prob_threshold: float = 0.5
+ vad_initial_since_speech: int = 100
+ vad_non_speech_patience_chunks: int = 3
+
+ # transcription behaviour
+ transcription_max_concurrent_tasks: int = 3
+ transcription_words_per_minute: int = 300
+ transcription_words_per_token: float = 0.75 # (3 words = 4 tokens)
+
class LLMSettings(BaseModel):
local_llm_url: str = "http://localhost:1234/v1/chat/completions"
local_llm_model: str = "openai/gpt-oss-20b"
+ request_timeout_s: int = 120
+
+
+class VADSettings(BaseModel):
+ repo_or_dir: str = "snakers4/silero-vad"
+ model_name: str = "silero_vad"
+ sample_rate_hz: int = 16000
+
+
+class SpeechModelSettings(BaseModel):
+ # model identifiers for speech recognition
+ mlx_model_name: str = "mlx-community/whisper-small.en-mlx"
+ openai_model_name: str = "small.en"
class Settings(BaseSettings):
@@ -34,6 +73,12 @@ class Settings(BaseSettings):
agent_settings: AgentSettings = AgentSettings()
+ behaviour_settings: BehaviourSettings = BehaviourSettings()
+
+ vad_settings: VADSettings = VADSettings()
+
+ speech_model_settings: SpeechModelSettings = SpeechModelSettings()
+
llm_settings: LLMSettings = LLMSettings()
model_config = SettingsConfigDict(env_file=".env")
diff --git a/src/control_backend/main.py b/src/control_backend/main.py
index 138957c..a2cc7f6 100644
--- a/src/control_backend/main.py
+++ b/src/control_backend/main.py
@@ -39,7 +39,7 @@ async def lifespan(app: FastAPI):
ri_communication_agent = RICommunicationAgent(
settings.agent_settings.ri_communication_agent_name + "@" + settings.agent_settings.host,
settings.agent_settings.ri_communication_agent_name,
- address="tcp://*:5555",
+ address=settings.zmq_settings.ri_communication_address,
bind=True,
)
await ri_communication_agent.start()
@@ -71,7 +71,7 @@ async def lifespan(app: FastAPI):
)
await text_belief_extractor.start()
- _temp_vad_agent = VADAgent("tcp://localhost:5558", False)
+ _temp_vad_agent = VADAgent(settings.zmq_settings.vad_agent_address, False)
await _temp_vad_agent.start()
yield
From 594ad91b6d14a957a23c4dd231f2a199c6d63ead Mon Sep 17 00:00:00 2001
From: Pim Hutting
Date: Wed, 5 Nov 2025 17:32:26 +0100
Subject: [PATCH 2/9] fix: removed non used values from config
ref: N25B-236
---
src/control_backend/core/config.py | 2 --
1 file changed, 2 deletions(-)
diff --git a/src/control_backend/core/config.py b/src/control_backend/core/config.py
index a9435b7..c1c5dd1 100644
--- a/src/control_backend/core/config.py
+++ b/src/control_backend/core/config.py
@@ -30,8 +30,6 @@ class AgentSettings(BaseModel):
class BehaviourSettings(BaseModel):
- default_rcv_timeout: float = 0.1
- llm_response_rcv_timeout: float = 1.0
ping_sleep_s: float = 1.0
comm_setup_max_retries: int = 5
socket_poller_timeout_ms: int = 100
From d60df2174cbcabb510e621c581082350d58cbf03 Mon Sep 17 00:00:00 2001
From: Pim Hutting
Date: Wed, 19 Nov 2025 11:48:02 +0000
Subject: [PATCH 3/9] Apply 1 suggestion(s) to 1 file(s)
Co-authored-by: Twirre
---
src/control_backend/agents/ri_communication_agent.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/control_backend/agents/ri_communication_agent.py b/src/control_backend/agents/ri_communication_agent.py
index 7c8ec5b..a73d3a1 100644
--- a/src/control_backend/agents/ri_communication_agent.py
+++ b/src/control_backend/agents/ri_communication_agent.py
@@ -56,7 +56,7 @@ class RICommunicationAgent(BaseAgent):
# See what endpoint we received
match message["endpoint"]:
case "ping":
- await asyncio.sleep(settings.agent_settings.behaviour_settings.ping_sleep_s)
+ await asyncio.sleep(settings.behaviour_settings.ping_sleep_s)
case _:
self.agent.logger.info(
"Received message with topic different than ping, while ping expected."
From 7120a7a8aa5a9c43cb2d0ee19fa6232d2fbd3eba Mon Sep 17 00:00:00 2001
From: Pim Hutting
Date: Wed, 19 Nov 2025 11:48:11 +0000
Subject: [PATCH 4/9] Apply 1 suggestion(s) to 1 file(s)
Co-authored-by: Twirre
---
src/control_backend/agents/transcription/transcription_agent.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/control_backend/agents/transcription/transcription_agent.py b/src/control_backend/agents/transcription/transcription_agent.py
index cb9e5b4..25fb785 100644
--- a/src/control_backend/agents/transcription/transcription_agent.py
+++ b/src/control_backend/agents/transcription/transcription_agent.py
@@ -28,7 +28,7 @@ class TranscriptionAgent(BaseAgent):
class Transcribing(CyclicBehaviour):
def __init__(self, audio_in_socket: azmq.Socket):
super().__init__()
- max_concurrent_tasks = settings.transcription_settings.max_concurrent_transcriptions
+ max_concurrent_tasks = settings.behaviour_settings.transcription_max_concurrent_tasks
self.audio_in_socket = audio_in_socket
self.speech_recognizer = SpeechRecognizer.best_type()
self._concurrency = asyncio.Semaphore(max_concurrent_tasks)
From f74efba5119fb07b0487e31d974d1ddb9bd158a1 Mon Sep 17 00:00:00 2001
From: Pim Hutting
Date: Wed, 19 Nov 2025 11:48:31 +0000
Subject: [PATCH 5/9] Apply 1 suggestion(s) to 1 file(s)
Co-authored-by: Twirre
---
src/control_backend/agents/vad_agent.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/control_backend/agents/vad_agent.py b/src/control_backend/agents/vad_agent.py
index 860547d..dcc628f 100644
--- a/src/control_backend/agents/vad_agent.py
+++ b/src/control_backend/agents/vad_agent.py
@@ -64,8 +64,8 @@ class Streaming(CyclicBehaviour):
async def reset(self):
"""Clears the ZeroMQ queue and tells this behavior to start."""
discarded = 0
- poll_time = settings.behaviour_settings.vad_poll_time
- while await self.audio_in_poller.poll(poll_time) is not None:
+ # Poll for the shortest amount of time possible to clear the queue
+ while await self.audio_in_poller.poll(1) is not None:
discarded += 1
self.agent.logger.info(f"Discarded {discarded} audio packets before starting.")
self._ready = True
From 6436fc12c8df9d491e422d0b200733397a8252e7 Mon Sep 17 00:00:00 2001
From: Pim Hutting
Date: Wed, 19 Nov 2025 11:48:49 +0000
Subject: [PATCH 6/9] Apply 1 suggestion(s) to 1 file(s)
Co-authored-by: Twirre
---
src/control_backend/core/config.py | 1 -
1 file changed, 1 deletion(-)
diff --git a/src/control_backend/core/config.py b/src/control_backend/core/config.py
index c1c5dd1..808dd4a 100644
--- a/src/control_backend/core/config.py
+++ b/src/control_backend/core/config.py
@@ -38,7 +38,6 @@ class BehaviourSettings(BaseModel):
vad_prob_threshold: float = 0.5
vad_initial_since_speech: int = 100
vad_non_speech_patience_chunks: int = 3
- vad_poll_time: int = 1
# transcription behaviour
transcription_max_concurrent_tasks: int = 3
From 1372fe89f63b6c4d3a7aadd118d306cb37a2c83b Mon Sep 17 00:00:00 2001
From: Pim Hutting
Date: Wed, 19 Nov 2025 11:50:07 +0000
Subject: [PATCH 7/9] Apply 1 suggestion(s) to 1 file(s)
Co-authored-by: Twirre
---
test/unit/agents/test_vad_streaming.py | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/test/unit/agents/test_vad_streaming.py b/test/unit/agents/test_vad_streaming.py
index 8a0e072..47e8332 100644
--- a/test/unit/agents/test_vad_streaming.py
+++ b/test/unit/agents/test_vad_streaming.py
@@ -53,6 +53,12 @@ def patch_settings(monkeypatch):
async def simulate_streaming_with_probabilities(streaming, probabilities: list[float]):
+ """
+ Simulates a streaming scenario with given VAD model probabilities for testing purposes.
+
+ :param streaming: The streaming component to be tested.
+ :param probabilities: A list of probabilities representing the outputs of the VAD model.
+ """
model_item = MagicMock()
model_item.item.side_effect = probabilities
streaming.model = MagicMock()
From 98dd2637c0b867e08d04ffef821cb57a05151b4d Mon Sep 17 00:00:00 2001
From: Pim Hutting
Date: Wed, 19 Nov 2025 11:50:17 +0000
Subject: [PATCH 8/9] Apply 1 suggestion(s) to 1 file(s)
Co-authored-by: Twirre
---
test/unit/agents/test_vad_streaming.py | 3 +++
1 file changed, 3 insertions(+)
diff --git a/test/unit/agents/test_vad_streaming.py b/test/unit/agents/test_vad_streaming.py
index 47e8332..45ed77e 100644
--- a/test/unit/agents/test_vad_streaming.py
+++ b/test/unit/agents/test_vad_streaming.py
@@ -74,6 +74,9 @@ async def simulate_streaming_with_probabilities(streaming, probabilities: list[f
@pytest.mark.asyncio
async def test_voice_activity_detected(audio_in_socket, audio_out_socket, streaming):
+ """
+ Test a scenario where there is voice activity detected between silences.
+ """
speech_chunk_count = 5
probabilities = [0.0] * 5 + [1.0] * speech_chunk_count + [0.0] * 5
await simulate_streaming_with_probabilities(streaming, probabilities)
From 93b8db03e76c864012a6d226fc5c2a970f9c48a5 Mon Sep 17 00:00:00 2001
From: Pim Hutting
Date: Wed, 19 Nov 2025 12:58:50 +0100
Subject: [PATCH 9/9] fix: delete personal git history file
Accidentally added a git history file.
close: N25B-236
---
et --hard f8dee6d | 41 -----------------------------------------
1 file changed, 41 deletions(-)
delete mode 100644 et --hard f8dee6d
diff --git a/et --hard f8dee6d b/et --hard f8dee6d
deleted file mode 100644
index 663bfc7..0000000
--- a/et --hard f8dee6d
+++ /dev/null
@@ -1,41 +0,0 @@
-[33mbcbfc26[m[33m ([m[1;36mHEAD -> [m[1;32mfeat/belief-collector[m[33m, [m[1;31morigin/feat/add-end-of-utterance-detection[m[33m)[m HEAD@{0}: reset: moving to ORIG_HEAD
-[33me48096f[m HEAD@{1}: checkout: moving from feat/add-end-of-utterance-detection to feat/belief-collector
-[33mab94c2e[m[33m ([m[1;32mfeat/add-end-of-utterance-detection[m[33m)[m HEAD@{2}: commit (merge): Merge remote-tracking branch 'origin/dev' into feat/add-end-of-utterance-detection
-[33mbcbfc26[m[33m ([m[1;36mHEAD -> [m[1;32mfeat/belief-collector[m[33m, [m[1;31morigin/feat/add-end-of-utterance-detection[m[33m)[m HEAD@{3}: checkout: moving from feat/belief-collector to feat/add-end-of-utterance-detection
-[33me48096f[m HEAD@{4}: checkout: moving from feat/add-end-of-utterance-detection to feat/belief-collector
-[33mbcbfc26[m[33m ([m[1;36mHEAD -> [m[1;32mfeat/belief-collector[m[33m, [m[1;31morigin/feat/add-end-of-utterance-detection[m[33m)[m HEAD@{5}: checkout: moving from feat/belief-collector to feat/add-end-of-utterance-detection
-[33me48096f[m HEAD@{6}: reset: moving to HEAD
-[33me48096f[m HEAD@{7}: commit (merge): Merge remote-tracking branch 'origin/dev' into feat/belief-collector
-[33mf8dee6d[m[33m ([m[1;31morigin/feat/belief-collector[m[33m)[m HEAD@{8}: commit: test: added tests
-[33m2efce93[m HEAD@{9}: checkout: moving from dev to feat/belief-collector
-[33me36f5fc[m[33m ([m[1;31morigin/dev[m[33m, [m[1;32mdev[m[33m)[m HEAD@{10}: pull: Fast-forward
-[33m9b36982[m HEAD@{11}: checkout: moving from feat/belief-collector to dev
-[33m2efce93[m HEAD@{12}: checkout: moving from feat/vad-agent to feat/belief-collector
-[33mf73f510[m[33m ([m[1;31morigin/feat/vad-agent[m[33m, [m[1;32mfeat/vad-agent[m[33m)[m HEAD@{13}: checkout: moving from feat/vad-agent to feat/vad-agent
-[33mf73f510[m[33m ([m[1;31morigin/feat/vad-agent[m[33m, [m[1;32mfeat/vad-agent[m[33m)[m HEAD@{14}: pull: Fast-forward
-[33mfd1face[m HEAD@{15}: checkout: moving from feat/belief-collector to feat/vad-agent
-[33m2efce93[m HEAD@{16}: reset: moving to HEAD
-[33m2efce93[m HEAD@{17}: commit: fix: made beliefs a dict of lists
-[33m1f34b14[m HEAD@{18}: commit: Feat: Implement belief collector
-[33m9b36982[m HEAD@{19}: checkout: moving from style/fix-style to feat/belief-collector
-[33m65cfdda[m[33m ([m[1;31morigin/style/fix-style[m[33m, [m[1;32mstyle/fix-style[m[33m)[m HEAD@{20}: checkout: moving from feat/belief-collector to style/fix-style
-[33m9b36982[m HEAD@{21}: reset: moving to HEAD
-[33m9b36982[m HEAD@{22}: checkout: moving from dev to feat/belief-collector
-[33m9b36982[m HEAD@{23}: checkout: moving from feat/belief-collector to dev
-[33m9b36982[m HEAD@{24}: reset: moving to HEAD
-[33m9b36982[m HEAD@{25}: checkout: moving from feat/belief-from-text to feat/belief-collector
-[33mbece44b[m[33m ([m[1;32mfeat/belief-from-text[m[33m)[m HEAD@{26}: checkout: moving from feat/belief-collector to feat/belief-from-text
-[33m9b36982[m HEAD@{27}: reset: moving to HEAD
-[33m9b36982[m HEAD@{28}: checkout: moving from dev to feat/belief-collector
-[33m9b36982[m HEAD@{29}: pull: Fast-forward
-[33m71ddb50[m HEAD@{30}: checkout: moving from feat/add-end-of-utterance-detection to dev
-[33mbcbfc26[m[33m ([m[1;36mHEAD -> [m[1;32mfeat/belief-collector[m[33m, [m[1;31morigin/feat/add-end-of-utterance-detection[m[33m)[m HEAD@{31}: commit: feat: prototype end-of-utterance scorer over text input
-[33m379e04a[m[33m ([m[1;31morigin/feat/add-speech-recognition[m[33m)[m HEAD@{32}: checkout: moving from feat/add-end-of-utterance-detection to feat/add-end-of-utterance-detection
-[33m379e04a[m[33m ([m[1;31morigin/feat/add-speech-recognition[m[33m)[m HEAD@{33}: rebase (abort): updating HEAD
-[33m71ddb50[m HEAD@{34}: rebase (start): checkout dev
-[33m379e04a[m[33m ([m[1;31morigin/feat/add-speech-recognition[m[33m)[m HEAD@{35}: checkout: moving from dev to feat/add-end-of-utterance-detection
-[33m71ddb50[m HEAD@{36}: checkout: moving from feat/add-end-of-utterance-detection to dev
-[33m379e04a[m[33m ([m[1;31morigin/feat/add-speech-recognition[m[33m)[m HEAD@{37}: checkout: moving from feat/add-end-of-utterance-detection to feat/add-end-of-utterance-detection
-[33m379e04a[m[33m ([m[1;31morigin/feat/add-speech-recognition[m[33m)[m HEAD@{38}: checkout: moving from feat/add-end-of-utterance-detection to feat/add-end-of-utterance-detection
-[33m379e04a[m[33m ([m[1;31morigin/feat/add-speech-recognition[m[33m)[m HEAD@{39}: checkout: moving from main to feat/add-end-of-utterance-detection
-[33m54b22d8[m[33m ([m[1;31morigin/main[m[33m, [m[1;31morigin/HEAD[m[33m, [m[1;32mmain[m[33m)[m HEAD@{40}: clone: from git.science.uu.nl:ics/sp/2025/n25b/pepperplus-cb.git