diff --git a/src/control_backend/agents/bdi/text_belief_extractor_agent.py b/src/control_backend/agents/bdi/text_belief_extractor_agent.py index 9ea6b9a..fc244b9 100644 --- a/src/control_backend/agents/bdi/text_belief_extractor_agent.py +++ b/src/control_backend/agents/bdi/text_belief_extractor_agent.py @@ -150,6 +150,9 @@ class TextBeliefExtractorAgent(BaseAgent): return available_beliefs = [b for b in belief_list.beliefs if isinstance(b, SemanticBelief)] + self._current_beliefs = BeliefState( + false={InternalBelief(name=b.name, arguments=None) for b in available_beliefs}, + ) self.belief_inferrer.available_beliefs = available_beliefs self.logger.debug( "Received %d semantic beliefs from the program manager: %s", @@ -170,6 +173,9 @@ class TextBeliefExtractorAgent(BaseAgent): available_goals = {g for g in goals_list.goals if g.can_fail} available_goals -= self._force_completed_goals self.goal_inferrer.goals = available_goals + self._current_goal_completions = { + f"achieved_{AgentSpeakGenerator.slugify(goal)}": False for goal in available_goals + } self.logger.debug( "Received %d failable goals from the program manager: %s", len(available_goals), diff --git a/src/control_backend/agents/perception/vad_agent.py b/src/control_backend/agents/perception/vad_agent.py index 920c3ab..f397563 100644 --- a/src/control_backend/agents/perception/vad_agent.py +++ b/src/control_backend/agents/perception/vad_agent.py @@ -285,9 +285,10 @@ class VADAgent(BaseAgent): assert self.audio_out_socket is not None await self.audio_out_socket.send(self.audio_buffer[: -2 * len(chunk)].tobytes()) - # At this point, we know that the speech has ended. - # Prepend the last chunk that had no speech, for a more fluent boundary - self.audio_buffer = chunk + # At this point, we know that there is no speech. + # Prepend the last few chunks that had no speech, for a more fluent boundary. + self.audio_buffer = np.append(self.audio_buffer, chunk) + self.audio_buffer = self.audio_buffer[-begin_silence_length * len(chunk) :] async def handle_message(self, msg: InternalMessage): """ diff --git a/src/control_backend/agents/perception/visual_emotion_recognition_agent/visual_emotion_recognition_agent.py b/src/control_backend/agents/perception/visual_emotion_recognition_agent/visual_emotion_recognition_agent.py index 5344b9b..7f21d21 100644 --- a/src/control_backend/agents/perception/visual_emotion_recognition_agent/visual_emotion_recognition_agent.py +++ b/src/control_backend/agents/perception/visual_emotion_recognition_agent/visual_emotion_recognition_agent.py @@ -7,7 +7,6 @@ import numpy as np import zmq import zmq.asyncio as azmq from pydantic_core import ValidationError -import struct from control_backend.agents import BaseAgent from control_backend.agents.perception.visual_emotion_recognition_agent.visual_emotion_recognizer import ( # noqa @@ -89,7 +88,7 @@ class VisualEmotionRecognitionAgent(BaseAgent): while self._running: try: frame_bytes = await self.video_in_socket.recv() - + # Convert bytes to a numpy buffer nparr = np.frombuffer(frame_bytes, np.uint8) @@ -126,7 +125,6 @@ class VisualEmotionRecognitionAgent(BaseAgent): except zmq.Again: self.logger.warning("No video frame received within timeout.") - async def update_emotions(self, prev_emotions: set[str], emotions: set[str]): """ Compare emotions from previous window and current emotions, diff --git a/src/control_backend/agents/user_interrupt/user_interrupt_agent.py b/src/control_backend/agents/user_interrupt/user_interrupt_agent.py index 7320896..b1ce8b7 100644 --- a/src/control_backend/agents/user_interrupt/user_interrupt_agent.py +++ b/src/control_backend/agents/user_interrupt/user_interrupt_agent.py @@ -300,7 +300,7 @@ class UserInterruptAgent(BaseAgent): :param text_to_say: The string that the robot has to say. """ - experiment_logger.chat(text_to_say, extra={"role": "user"}) + experiment_logger.chat(text_to_say, extra={"role": "assistant"}) cmd = SpeechCommand(data=text_to_say, is_priority=True) out_msg = InternalMessage( to=settings.agent_settings.robot_speech_name,