Compare commits

..

4 Commits

Author SHA1 Message Date
dfd2c3a0a1 fix: reset counter after each loop
ref: N25B-395
2026-01-30 20:39:10 +01:00
3efe8a7b06 chore: change emo loop frequency 2026-01-30 20:34:16 +01:00
3a5c27e01f fix: update face detected at same time as emotions
ref: N25B-395
2026-01-30 20:33:16 +01:00
1f799299b9 feat: (hopefully) face detection
Simplified implementation, relying on the already-present VED Agent.

ref: N25B-395
2026-01-30 20:12:31 +01:00
12 changed files with 135 additions and 166 deletions

View File

@@ -3,9 +3,6 @@
# The hostname of the Robot Interface. Change if the Control Backend and Robot Interface are running on different computers. # The hostname of the Robot Interface. Change if the Control Backend and Robot Interface are running on different computers.
RI_HOST="localhost" RI_HOST="localhost"
# The hostname of the User Interface. This is what the browser displays in the URL bar. Strangely, even if the UI is running on a different host than the backend, if the computer with the browser is also hosting the UI itself, this value should be http://localhost.
UI_HOST="http://localhost:5173"
# URL for the local LLM API. Must be an API that implements the OpenAI Chat Completions API, but most do. # URL for the local LLM API. Must be an API that implements the OpenAI Chat Completions API, but most do.
LLM_SETTINGS__LOCAL_LLM_URL="http://localhost:1234/v1/chat/completions" LLM_SETTINGS__LOCAL_LLM_URL="http://localhost:1234/v1/chat/completions"
@@ -15,8 +12,8 @@ LLM_SETTINGS__LOCAL_LLM_MODEL="gpt-oss"
# Number of non-speech chunks to wait before speech ended. A chunk is approximately 31 ms. Increasing this number allows longer pauses in speech, but also increases response time. # Number of non-speech chunks to wait before speech ended. A chunk is approximately 31 ms. Increasing this number allows longer pauses in speech, but also increases response time.
BEHAVIOUR_SETTINGS__VAD_NON_SPEECH_PATIENCE_CHUNKS=15 BEHAVIOUR_SETTINGS__VAD_NON_SPEECH_PATIENCE_CHUNKS=15
# Timeout in milliseconds for socket polling. Increase this number if network latency/jitter is high, often the case when using Wi-Fi. Perhaps 500 ms or more. A symptom of this issue is transcriptions getting cut off. # Timeout in milliseconds for socket polling. Increase this number if network latency/jitter is high, often the case when using Wi-Fi. Perhaps 500 ms. A symptom of this issue is transcriptions getting cut off.
BEHAVIOUR_SETTINGS__SOCKET_POLLER_TIMEOUT_MS=400 BEHAVIOUR_SETTINGS__SOCKET_POLLER_TIMEOUT_MS=100

View File

@@ -24,7 +24,6 @@ dependencies = [
"sphinx-rtd-theme>=3.0.2", "sphinx-rtd-theme>=3.0.2",
"tf-keras>=2.20.1", "tf-keras>=2.20.1",
"torch>=2.8.0", "torch>=2.8.0",
"tornado ; sys_platform == 'win32'",
"uvicorn>=0.37.0", "uvicorn>=0.37.0",
] ]

View File

@@ -4,7 +4,6 @@ University within the Software Project course.
© Copyright Utrecht University (Department of Information and Computing Sciences) © Copyright Utrecht University (Department of Information and Computing Sciences)
""" """
import logging
from functools import singledispatchmethod from functools import singledispatchmethod
from slugify import slugify from slugify import slugify
@@ -31,6 +30,7 @@ from control_backend.schemas.program import (
BasicNorm, BasicNorm,
ConditionalNorm, ConditionalNorm,
EmotionBelief, EmotionBelief,
FaceBelief,
GestureAction, GestureAction,
Goal, Goal,
InferredBelief, InferredBelief,
@@ -67,7 +67,6 @@ class AgentSpeakGenerator:
""" """
_asp: AstProgram _asp: AstProgram
logger = logging.getLogger(__name__)
def generate(self, program: Program) -> str: def generate(self, program: Program) -> str:
""" """
@@ -107,7 +106,7 @@ class AgentSpeakGenerator:
check if a keyword is a substring of the user's message. check if a keyword is a substring of the user's message.
The generated rule has the form: The generated rule has the form:
keyword_said(Keyword) :- user_said(Message) & .substring_case_insensitive(Keyword, Message, Pos) & Pos >= 0 keyword_said(Keyword) :- user_said(Message) & .substring(Keyword, Message, Pos) & Pos >= 0
This enables the system to trigger behaviors based on keyword detection. This enables the system to trigger behaviors based on keyword detection.
""" """
@@ -119,7 +118,7 @@ class AgentSpeakGenerator:
AstRule( AstRule(
AstLiteral("keyword_said", [keyword]), AstLiteral("keyword_said", [keyword]),
AstLiteral("user_said", [message]) AstLiteral("user_said", [message])
& AstLiteral(".substring_case_insensitive", [keyword, message, position]) & AstLiteral(".substring", [keyword, message, position])
& (position >= 0), & (position >= 0),
) )
) )
@@ -135,6 +134,7 @@ class AgentSpeakGenerator:
""" """
self._add_reply_with_goal_plan() self._add_reply_with_goal_plan()
self._add_say_plan() self._add_say_plan()
self._add_reply_plan()
self._add_notify_cycle_plan() self._add_notify_cycle_plan()
def _add_reply_with_goal_plan(self): def _add_reply_with_goal_plan(self):
@@ -198,6 +198,40 @@ class AgentSpeakGenerator:
) )
) )
def _add_reply_plan(self):
"""
Adds a plan for general reply actions.
This plan handles general reply actions where the agent needs to respond
to user input without a specific conversational goal. It:
1. Marks that the agent has responded this turn
2. Gathers all active norms
3. Generates a reply based on the user message and norms
Trigger: +!reply
Context: user_said(Message)
"""
self._asp.plans.append(
AstPlan(
TriggerType.ADDED_GOAL,
AstLiteral("reply"),
[AstLiteral("user_said", [AstVar("Message")])],
[
AstStatement(StatementType.ADD_BELIEF, AstLiteral("responded_this_turn")),
AstStatement(
StatementType.DO_ACTION,
AstLiteral(
"findall",
[AstVar("Norm"), AstLiteral("norm", [AstVar("Norm")]), AstVar("Norms")],
),
),
AstStatement(
StatementType.DO_ACTION,
AstLiteral("reply", [AstVar("Message"), AstVar("Norms")]),
),
],
)
)
def _add_notify_cycle_plan(self): def _add_notify_cycle_plan(self):
""" """
@@ -235,39 +269,6 @@ class AgentSpeakGenerator:
) )
) )
def _add_stop_plan(self, phase: Phase):
"""
Adds a plan to stop the program. This just skips to the end phase,
where there is no behavior defined.
"""
self._asp.plans.append(
AstPlan(
TriggerType.ADDED_GOAL,
AstLiteral("stop"),
[AstLiteral("phase", [AstString(phase.id)])],
[
AstStatement(
StatementType.DO_ACTION,
AstLiteral(
"notify_transition_phase",
[
AstString(phase.id),
AstString("end")
]
)
),
AstStatement(
StatementType.REMOVE_BELIEF,
AstLiteral("phase", [AstVar("Phase")]),
),
AstStatement(
StatementType.ADD_BELIEF,
AstLiteral("phase", [AstString("end")])
)
]
)
)
def _process_phases(self, phases: list[Phase]) -> None: def _process_phases(self, phases: list[Phase]) -> None:
""" """
Processes all phases in the program and their transitions. Processes all phases in the program and their transitions.
@@ -284,6 +285,21 @@ class AgentSpeakGenerator:
self._process_phase(curr_phase) self._process_phase(curr_phase)
self._add_phase_transition(curr_phase, next_phase) self._add_phase_transition(curr_phase, next_phase)
# End phase behavior
# When deleting this, the entire `reply` plan and action can be deleted
self._asp.plans.append(
AstPlan(
type=TriggerType.ADDED_BELIEF,
trigger_literal=AstLiteral("user_said", [AstVar("Message")]),
context=[AstLiteral("phase", [AstString("end")])],
body=[
AstStatement(
StatementType.DO_ACTION, AstLiteral("notify_user_said", [AstVar("Message")])
),
AstStatement(StatementType.ACHIEVE_GOAL, AstLiteral("reply")),
],
)
)
def _process_phase(self, phase: Phase) -> None: def _process_phase(self, phase: Phase) -> None:
""" """
@@ -310,9 +326,6 @@ class AgentSpeakGenerator:
for trigger in phase.triggers: for trigger in phase.triggers:
self._process_trigger(trigger, phase) self._process_trigger(trigger, phase)
# Add force transition to end phase
self._add_stop_plan(phase)
def _add_phase_transition(self, from_phase: Phase | None, to_phase: Phase | None) -> None: def _add_phase_transition(self, from_phase: Phase | None, to_phase: Phase | None) -> None:
""" """
Adds plans for transitioning between phases. Adds plans for transitioning between phases.
@@ -488,13 +501,9 @@ class AgentSpeakGenerator:
if isinstance(step, Goal): if isinstance(step, Goal):
subgoals.append(step) subgoals.append(step)
if not goal.can_fail: if not goal.can_fail and not continues_response:
body.append(AstStatement(StatementType.ADD_BELIEF, self._astify(goal, achieved=True))) body.append(AstStatement(StatementType.ADD_BELIEF, self._astify(goal, achieved=True)))
if len(body) == 0:
self.logger.warning("Goal with no plan detected: %s", goal.name)
body.append(AstStatement(StatementType.EMPTY, AstLiteral("true")))
self._asp.plans.append(AstPlan(TriggerType.ADDED_GOAL, self._astify(goal), context, body)) self._asp.plans.append(AstPlan(TriggerType.ADDED_GOAL, self._astify(goal), context, body))
self._asp.plans.append( self._asp.plans.append(
@@ -555,10 +564,10 @@ class AgentSpeakGenerator:
) )
) )
for step in trigger.plan.steps: for step in trigger.plan.steps:
if isinstance(step, Goal):
new_step = step.model_copy(update={"can_fail": False}) # triggers are sequence
subgoals.append(new_step)
body.append(self._step_to_statement(step)) body.append(self._step_to_statement(step))
if isinstance(step, Goal):
step.can_fail = False # triggers are continuous sequence
subgoals.append(step)
# Arbitrary wait for UI to display nicely # Arbitrary wait for UI to display nicely
body.append( body.append(
@@ -602,7 +611,6 @@ class AgentSpeakGenerator:
- check_triggers: When no triggers are applicable - check_triggers: When no triggers are applicable
- transition_phase: When phase transition conditions aren't met - transition_phase: When phase transition conditions aren't met
- force_transition_phase: When forced transitions aren't possible - force_transition_phase: When forced transitions aren't possible
- stop: When we are already in the end phase
""" """
# Trigger fallback # Trigger fallback
self._asp.plans.append( self._asp.plans.append(
@@ -634,16 +642,6 @@ class AgentSpeakGenerator:
) )
) )
# Stop fallback
self._asp.plans.append(
AstPlan(
TriggerType.ADDED_GOAL,
AstLiteral("stop"),
[],
[AstStatement(StatementType.EMPTY, AstLiteral("true"))],
)
)
@singledispatchmethod @singledispatchmethod
def _astify(self, element: ProgramElement) -> AstExpression: def _astify(self, element: ProgramElement) -> AstExpression:
""" """
@@ -690,6 +688,10 @@ class AgentSpeakGenerator:
def _(self, eb: EmotionBelief) -> AstExpression: def _(self, eb: EmotionBelief) -> AstExpression:
return AstLiteral("emotion_detected", [AstAtom(eb.emotion)]) return AstLiteral("emotion_detected", [AstAtom(eb.emotion)])
@_astify.register
def _(self, fb: FaceBelief) -> AstExpression:
return AstLiteral("face_present")
@_astify.register @_astify.register
def _(self, ib: InferredBelief) -> AstExpression: def _(self, ib: InferredBelief) -> AstExpression:
""" """

View File

@@ -176,8 +176,6 @@ class BDICoreAgent(BaseAgent):
self._force_norm(msg.body) self._force_norm(msg.body)
case "force_next_phase": case "force_next_phase":
self._force_next_phase() self._force_next_phase()
case "stop":
self._stop()
case _: case _:
self.logger.warning("Received unknown user interruption: %s", msg) self.logger.warning("Received unknown user interruption: %s", msg)
@@ -337,11 +335,6 @@ class BDICoreAgent(BaseAgent):
self.logger.info("Manually forced phase transition.") self.logger.info("Manually forced phase transition.")
def _stop(self):
self._set_goal("stop")
self.logger.info("Stopped the program (skipped to end phase).")
def _add_custom_actions(self) -> None: def _add_custom_actions(self) -> None:
""" """
Add any custom actions here. Inside `@self.actions.add()`, the first argument is Add any custom actions here. Inside `@self.actions.add()`, the first argument is
@@ -349,28 +342,6 @@ class BDICoreAgent(BaseAgent):
the function expects (which will be located in `term.args`). the function expects (which will be located in `term.args`).
""" """
@self.actions.add(".substring_case_insensitive", 3)
@agentspeak.optimizer.function_like
def _substring(agent, term, intention):
"""
Find out if a string is a substring of another (case insensitive). Copied mostly from
the agentspeak library method .substring.
"""
needle = agentspeak.asl_str(agentspeak.grounded(term.args[0], intention.scope)).lower()
haystack = agentspeak.asl_str(agentspeak.grounded(term.args[1], intention.scope)).lower()
choicepoint = object()
pos = haystack.find(needle)
while pos != -1:
intention.stack.append(choicepoint)
if agentspeak.unify(term.args[2], pos, intention.scope, intention.stack):
yield
agentspeak.reroll(intention.scope, intention.stack, choicepoint)
pos = haystack.find(needle, pos + 1)
@self.actions.add(".reply", 2) @self.actions.add(".reply", 2)
def _reply(agent, term, intention): def _reply(agent, term, intention):
""" """
@@ -496,6 +467,7 @@ class BDICoreAgent(BaseAgent):
body=str(trigger_name), body=str(trigger_name),
) )
# TODO: check with Pim
self.add_behavior(self.send(msg)) self.add_behavior(self.send(msg))
yield yield

View File

@@ -538,9 +538,10 @@ class GoalAchievementInferrer(SemanticBeliefInferrer):
async def _infer_goal(self, conversation: ChatHistory, goal: BaseGoal) -> bool: async def _infer_goal(self, conversation: ChatHistory, goal: BaseGoal) -> bool:
prompt = f"""{self._format_conversation(conversation)} prompt = f"""{self._format_conversation(conversation)}
Given the above conversation, has the following goal been achieved? Given the above conversation, what has the following goal been achieved?
Description of the goal: {goal.description or goal.name} The name of the goal: {goal.name}
Description of the goal: {goal.description}
Answer with literally only `true` or `false` (without backticks).""" Answer with literally only `true` or `false` (without backticks)."""

View File

@@ -241,23 +241,12 @@ class VADAgent(BaseAgent):
self._reset_needed = False self._reset_needed = False
assert self.audio_in_poller is not None assert self.audio_in_poller is not None
non_speech_patience = settings.behaviour_settings.vad_non_speech_patience_chunks
begin_silence_length = settings.behaviour_settings.vad_begin_silence_chunks
prob_threshold = settings.behaviour_settings.vad_prob_threshold
data = await self.audio_in_poller.poll() data = await self.audio_in_poller.poll()
if data is None: if data is None:
if len(self.audio_buffer) > 0: if len(self.audio_buffer) > 0:
# Failed to receive new audio. Send remaining buffer to be transcribed. self.logger.debug(
if len(self.audio_buffer) > begin_silence_length * 512: "No audio data received. Discarding buffer until new data arrives."
self.logger.debug("Speech ended.") )
assert self.audio_out_socket is not None
await self.audio_out_socket.send(self.audio_buffer[: -2 * 512].tobytes())
else:
self.logger.debug(
"No audio data received. Discarding buffer until new data arrives."
)
self.audio_buffer = np.array([], dtype=np.float32) self.audio_buffer = np.array([], dtype=np.float32)
self.i_since_speech = settings.behaviour_settings.vad_initial_since_speech self.i_since_speech = settings.behaviour_settings.vad_initial_since_speech
continue continue
@@ -266,6 +255,9 @@ class VADAgent(BaseAgent):
chunk = np.frombuffer(data, dtype=np.float32).copy() chunk = np.frombuffer(data, dtype=np.float32).copy()
assert self.model is not None assert self.model is not None
prob = self.model(torch.from_numpy(chunk), settings.vad_settings.sample_rate_hz).item() prob = self.model(torch.from_numpy(chunk), settings.vad_settings.sample_rate_hz).item()
non_speech_patience = settings.behaviour_settings.vad_non_speech_patience_chunks
begin_silence_length = settings.behaviour_settings.vad_begin_silence_chunks
prob_threshold = settings.behaviour_settings.vad_prob_threshold
if prob > prob_threshold: if prob > prob_threshold:
if self.i_since_speech > non_speech_patience + begin_silence_length: if self.i_since_speech > non_speech_patience + begin_silence_length:

View File

@@ -14,7 +14,7 @@ from control_backend.agents.perception.visual_emotion_recognition_agent.visual_e
) )
from control_backend.core.agent_system import InternalMessage from control_backend.core.agent_system import InternalMessage
from control_backend.core.config import settings from control_backend.core.config import settings
from control_backend.schemas.belief_message import Belief from control_backend.schemas.belief_message import Belief, BeliefMessage
class VisualEmotionRecognitionAgent(BaseAgent): class VisualEmotionRecognitionAgent(BaseAgent):
@@ -44,6 +44,7 @@ class VisualEmotionRecognitionAgent(BaseAgent):
self.timeout_ms = timeout_ms self.timeout_ms = timeout_ms
self.window_duration = window_duration self.window_duration = window_duration
self.min_frames_required = min_frames_required self.min_frames_required = min_frames_required
self._face_detected = False
# Pause functionality # Pause functionality
# NOTE: flag is set when running, cleared when paused # NOTE: flag is set when running, cleared when paused
@@ -89,6 +90,9 @@ class VisualEmotionRecognitionAgent(BaseAgent):
# Tracks counts of detected emotions per face index # Tracks counts of detected emotions per face index
face_stats = defaultdict(Counter) face_stats = defaultdict(Counter)
# How many times a face has been detected
face_detection_yes_no = [0, 0]
prev_dominant_emotions = set() prev_dominant_emotions = set()
while self._running: while self._running:
@@ -97,8 +101,8 @@ class VisualEmotionRecognitionAgent(BaseAgent):
width, height, image_bytes = await self.video_in_socket.recv_multipart() width, height, image_bytes = await self.video_in_socket.recv_multipart()
width = int.from_bytes(width, 'little') width = int.from_bytes(width, "little")
height = int.from_bytes(height, 'little') height = int.from_bytes(height, "little")
# Convert bytes to a numpy buffer # Convert bytes to a numpy buffer
image_array = np.frombuffer(image_bytes, np.uint8) image_array = np.frombuffer(image_bytes, np.uint8)
@@ -107,6 +111,13 @@ class VisualEmotionRecognitionAgent(BaseAgent):
# Get the dominant emotion from each face # Get the dominant emotion from each face
current_emotions = self.emotion_recognizer.sorted_dominant_emotions(frame) current_emotions = self.emotion_recognizer.sorted_dominant_emotions(frame)
# Update face face_detection_yes_no
if len(current_emotions) > 0:
face_detection_yes_no[0] += 1
else:
face_detection_yes_no[1] += 1
# Update emotion counts for each detected face # Update emotion counts for each detected face
for i, emotion in enumerate(current_emotions): for i, emotion in enumerate(current_emotions):
face_stats[i][emotion] += 1 face_stats[i][emotion] += 1
@@ -122,18 +133,31 @@ class VisualEmotionRecognitionAgent(BaseAgent):
dominant_emotion = counter.most_common(1)[0][0] dominant_emotion = counter.most_common(1)[0][0]
window_dominant_emotions.add(dominant_emotion) window_dominant_emotions.add(dominant_emotion)
if (
face_detection_yes_no[0] > face_detection_yes_no[1]
and not self._face_detected
):
self._face_detected = True
await self._inform_face_detected()
elif (
face_detection_yes_no[0] <= face_detection_yes_no[1] and self._face_detected
):
self._face_detected = False
await self._inform_face_detected()
face_detection_yes_no = [0, 0]
await self.update_emotions(prev_dominant_emotions, window_dominant_emotions) await self.update_emotions(prev_dominant_emotions, window_dominant_emotions)
prev_dominant_emotions = window_dominant_emotions prev_dominant_emotions = window_dominant_emotions
face_stats.clear() face_stats.clear()
next_window_time = time.time() + self.window_duration next_window_time = time.time() + self.window_duration
except zmq.Again: except zmq.Again:
pass self.logger.warning("No video frame received within timeout.")
except Exception as e: except Exception as e:
self.logger.error(f"Error in emotion recognition loop: {e}") self.logger.error(f"Error in emotion recognition loop: {e}")
async def update_emotions(self, prev_emotions: set[str], emotions: set[str]): async def update_emotions(self, prev_emotions: set[str], emotions: set[str]):
""" """
Compare emotions from previous window and current emotions, Compare emotions from previous window and current emotions,
@@ -149,9 +173,7 @@ class VisualEmotionRecognitionAgent(BaseAgent):
for emotion in emotions_to_remove: for emotion in emotions_to_remove:
self.logger.info(f"Emotion '{emotion}' has disappeared.") self.logger.info(f"Emotion '{emotion}' has disappeared.")
try: try:
emotion_beliefs_remove.append( emotion_beliefs_remove.append(Belief(name="emotion_detected", arguments=[emotion]))
Belief(name="emotion_detected", arguments=[emotion], remove=True)
)
except ValidationError: except ValidationError:
self.logger.warning("Invalid belief for emotion removal: %s", emotion) self.logger.warning("Invalid belief for emotion removal: %s", emotion)
@@ -175,11 +197,25 @@ class VisualEmotionRecognitionAgent(BaseAgent):
) )
await self.send(message) await self.send(message)
async def _inform_face_detected(self):
if self._face_detected:
belief_message = BeliefMessage(create=[Belief(name="face_present")])
else:
belief_message = BeliefMessage(delete=[Belief(name="face_present")])
msg = InternalMessage(
to=settings.agent_settings.bdi_core_name,
thread="beliefs",
body=belief_message.model_dump_json(),
)
await self.send(msg)
async def handle_message(self, msg: InternalMessage): async def handle_message(self, msg: InternalMessage):
""" """
Handle incoming messages. Handle incoming messages.
Expects messages to pause or resume the Visual Emotion Recognition Expects messages to pause or resume the Visual Emotion Recognition
processing from User Interrupt Agent. processing from User Interrupt Agent.
:param msg: The received internal message. :param msg: The received internal message.
@@ -204,4 +240,3 @@ class VisualEmotionRecognitionAgent(BaseAgent):
""" """
self.video_in_socket.close() self.video_in_socket.close()
await super().stop() await super().stop()

View File

@@ -164,12 +164,6 @@ class UserInterruptAgent(BaseAgent):
else: else:
self.logger.info("Sent resume command.") self.logger.info("Sent resume command.")
case "stop":
self.logger.debug(
"Received stop command."
)
await self._send_stop_command()
case "next_phase" | "reset_phase": case "next_phase" | "reset_phase":
await self._send_experiment_control_to_bdi_core(event_type) await self._send_experiment_control_to_bdi_core(event_type)
case _: case _:
@@ -428,16 +422,4 @@ class UserInterruptAgent(BaseAgent):
) )
await self.send(vad_message) await self.send(vad_message)
# Voice Activity Detection and Visual Emotion Recognition agents # Voice Activity Detection and Visual Emotion Recognition agents
self.logger.info("Sent resume command to VAD and VED agents.") self.logger.info("Sent resume command to VAD and VED agents.")
async def _send_stop_command(self):
"""
Send a command to the BDI to stop the program (i.e., skip to end phase).
"""
msg = InternalMessage(
to=settings.agent_settings.bdi_core_name,
body="",
thread="stop"
)
await self.send(msg)

View File

@@ -123,7 +123,7 @@ async def ping_stream(request: Request):
sub_socket.setsockopt(zmq.SUBSCRIBE, b"ping") sub_socket.setsockopt(zmq.SUBSCRIBE, b"ping")
connected = False connected = False
ping_frequency = settings.behaviour_settings.sleep_s + 1 ping_frequency = 2
# Even though its most likely the updates should alternate # Even though its most likely the updates should alternate
# (So, True - False - True - False for connectivity), # (So, True - False - True - False for connectivity),

View File

@@ -82,7 +82,7 @@ class BehaviourSettings(BaseModel):
:ivar transcription_words_per_token: Estimated words per token for transcription timing. :ivar transcription_words_per_token: Estimated words per token for transcription timing.
:ivar transcription_token_buffer: Buffer for transcription tokens. :ivar transcription_token_buffer: Buffer for transcription tokens.
:ivar conversation_history_length_limit: The maximum amount of messages to extract beliefs from. :ivar conversation_history_length_limit: The maximum amount of messages to extract beliefs from.
:ivar visual_emotion_recognition_window_duration_s: Duration in seconds over which to aggregate :ivar visual_emotion_recognition_window_duration_s: Duration in seconds over which to aggregate
emotions and update emotion beliefs. emotions and update emotion beliefs.
:ivar visual_emotion_recognition_min_frames_per_face: Minimum number of frames per face required :ivar visual_emotion_recognition_min_frames_per_face: Minimum number of frames per face required
to consider a face valid. to consider a face valid.
@@ -112,7 +112,7 @@ class BehaviourSettings(BaseModel):
conversation_history_length_limit: int = 10 conversation_history_length_limit: int = 10
# Visual Emotion Recognition settings # Visual Emotion Recognition settings
visual_emotion_recognition_window_duration_s: int = 5 visual_emotion_recognition_window_duration_s: int = 3
visual_emotion_recognition_min_frames_per_face: int = 3 visual_emotion_recognition_min_frames_per_face: int = 3
# AgentSpeak related settings # AgentSpeak related settings
trigger_time_to_wait: int = 2000 trigger_time_to_wait: int = 2000

View File

@@ -7,7 +7,7 @@ University within the Software Project course.
from enum import Enum from enum import Enum
from typing import Literal from typing import Literal
from pydantic import UUID4, BaseModel, field_validator from pydantic import UUID4, BaseModel
class ProgramElement(BaseModel): class ProgramElement(BaseModel):
@@ -24,13 +24,6 @@ class ProgramElement(BaseModel):
# To make program elements hashable # To make program elements hashable
model_config = {"frozen": True} model_config = {"frozen": True}
@field_validator("name")
@classmethod
def name_must_not_start_with_number(cls, v: str) -> str:
if v and v[0].isdigit():
raise ValueError('Field "name" must not start with a number.')
return v
class LogicalOperator(Enum): class LogicalOperator(Enum):
""" """
@@ -48,8 +41,8 @@ class LogicalOperator(Enum):
OR = "OR" OR = "OR"
type Belief = KeywordBelief | SemanticBelief | InferredBelief | EmotionBelief type Belief = KeywordBelief | SemanticBelief | InferredBelief | EmotionBelief | FaceBelief
type BasicBelief = KeywordBelief | SemanticBelief | EmotionBelief type BasicBelief = KeywordBelief | SemanticBelief | EmotionBelief | FaceBelief
class KeywordBelief(ProgramElement): class KeywordBelief(ProgramElement):
@@ -124,6 +117,15 @@ class EmotionBelief(ProgramElement):
emotion: str emotion: str
class FaceBelief(ProgramElement):
"""
Represents the belief that at least one face is currently in view.
"""
name: str = ""
face_present: bool
class Norm(ProgramElement): class Norm(ProgramElement):
""" """
Base class for behavioral norms that guide the robot's interactions. Base class for behavioral norms that guide the robot's interactions.

15
uv.lock generated
View File

@@ -1,5 +1,5 @@
version = 1 version = 1
revision = 2 revision = 3
requires-python = ">=3.13" requires-python = ">=3.13"
resolution-markers = [ resolution-markers = [
"python_full_version >= '3.14' and sys_platform == 'darwin'", "python_full_version >= '3.14' and sys_platform == 'darwin'",
@@ -1524,7 +1524,6 @@ dependencies = [
{ name = "sphinx-rtd-theme" }, { name = "sphinx-rtd-theme" },
{ name = "tf-keras" }, { name = "tf-keras" },
{ name = "torch" }, { name = "torch" },
{ name = "tornado", marker = "sys_platform == 'win32'" },
{ name = "uvicorn" }, { name = "uvicorn" },
] ]
@@ -1580,7 +1579,6 @@ requires-dist = [
{ name = "sphinx-rtd-theme", specifier = ">=3.0.2" }, { name = "sphinx-rtd-theme", specifier = ">=3.0.2" },
{ name = "tf-keras", specifier = ">=2.20.1" }, { name = "tf-keras", specifier = ">=2.20.1" },
{ name = "torch", specifier = ">=2.8.0" }, { name = "torch", specifier = ">=2.8.0" },
{ name = "tornado", marker = "sys_platform == 'win32'" },
{ name = "uvicorn", specifier = ">=0.37.0" }, { name = "uvicorn", specifier = ">=0.37.0" },
] ]
@@ -2726,17 +2724,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/52/27/7fc2d7435af044ffbe0b9b8e98d99eac096d43f128a5cde23c04825d5dcf/torchaudio-2.8.0-cp313-cp313t-win_amd64.whl", hash = "sha256:d4a715d09ac28c920d031ee1e60ecbc91e8a5079ad8c61c0277e658436c821a6", size = 2549553, upload-time = "2025-08-06T14:59:00.019Z" }, { url = "https://files.pythonhosted.org/packages/52/27/7fc2d7435af044ffbe0b9b8e98d99eac096d43f128a5cde23c04825d5dcf/torchaudio-2.8.0-cp313-cp313t-win_amd64.whl", hash = "sha256:d4a715d09ac28c920d031ee1e60ecbc91e8a5079ad8c61c0277e658436c821a6", size = 2549553, upload-time = "2025-08-06T14:59:00.019Z" },
] ]
[[package]]
name = "tornado"
version = "6.5.4"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/37/1d/0a336abf618272d53f62ebe274f712e213f5a03c0b2339575430b8362ef2/tornado-6.5.4.tar.gz", hash = "sha256:a22fa9047405d03260b483980635f0b041989d8bcc9a313f8fe18b411d84b1d7", size = 513632, upload-time = "2025-12-15T19:21:03.836Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/0c/1a/d7592328d037d36f2d2462f4bc1fbb383eec9278bc786c1b111cbbd44cfa/tornado-6.5.4-cp39-abi3-win32.whl", hash = "sha256:1768110f2411d5cd281bac0a090f707223ce77fd110424361092859e089b38d1", size = 446481, upload-time = "2025-12-15T19:21:00.008Z" },
{ url = "https://files.pythonhosted.org/packages/d6/6d/c69be695a0a64fd37a97db12355a035a6d90f79067a3cf936ec2b1dc38cd/tornado-6.5.4-cp39-abi3-win_amd64.whl", hash = "sha256:fa07d31e0cd85c60713f2b995da613588aa03e1303d75705dca6af8babc18ddc", size = 446886, upload-time = "2025-12-15T19:21:01.287Z" },
{ url = "https://files.pythonhosted.org/packages/50/49/8dc3fd90902f70084bd2cd059d576ddb4f8bb44c2c7c0e33a11422acb17e/tornado-6.5.4-cp39-abi3-win_arm64.whl", hash = "sha256:053e6e16701eb6cbe641f308f4c1a9541f91b6261991160391bfc342e8a551a1", size = 445910, upload-time = "2025-12-15T19:21:02.571Z" },
]
[[package]] [[package]]
name = "tqdm" name = "tqdm"
version = "4.67.1" version = "4.67.1"