Compare commits

...

4 Commits

Author SHA1 Message Date
dfd2c3a0a1 fix: reset counter after each loop
ref: N25B-395
2026-01-30 20:39:10 +01:00
3efe8a7b06 chore: change emo loop frequency 2026-01-30 20:34:16 +01:00
3a5c27e01f fix: update face detected at same time as emotions
ref: N25B-395
2026-01-30 20:33:16 +01:00
1f799299b9 feat: (hopefully) face detection
Simplified implementation, relying on the already-present VED Agent.

ref: N25B-395
2026-01-30 20:12:31 +01:00
4 changed files with 66 additions and 15 deletions

View File

@@ -30,6 +30,7 @@ from control_backend.schemas.program import (
BasicNorm, BasicNorm,
ConditionalNorm, ConditionalNorm,
EmotionBelief, EmotionBelief,
FaceBelief,
GestureAction, GestureAction,
Goal, Goal,
InferredBelief, InferredBelief,
@@ -682,11 +683,15 @@ class AgentSpeakGenerator:
:return: An AstLiteral representing the semantic belief. :return: An AstLiteral representing the semantic belief.
""" """
return AstLiteral(self.slugify(sb)) return AstLiteral(self.slugify(sb))
@_astify.register @_astify.register
def _(self, eb: EmotionBelief) -> AstExpression: def _(self, eb: EmotionBelief) -> AstExpression:
return AstLiteral("emotion_detected", [AstAtom(eb.emotion)]) return AstLiteral("emotion_detected", [AstAtom(eb.emotion)])
@_astify.register
def _(self, fb: FaceBelief) -> AstExpression:
return AstLiteral("face_present")
@_astify.register @_astify.register
def _(self, ib: InferredBelief) -> AstExpression: def _(self, ib: InferredBelief) -> AstExpression:
""" """

View File

@@ -14,7 +14,7 @@ from control_backend.agents.perception.visual_emotion_recognition_agent.visual_e
) )
from control_backend.core.agent_system import InternalMessage from control_backend.core.agent_system import InternalMessage
from control_backend.core.config import settings from control_backend.core.config import settings
from control_backend.schemas.belief_message import Belief from control_backend.schemas.belief_message import Belief, BeliefMessage
class VisualEmotionRecognitionAgent(BaseAgent): class VisualEmotionRecognitionAgent(BaseAgent):
@@ -44,6 +44,7 @@ class VisualEmotionRecognitionAgent(BaseAgent):
self.timeout_ms = timeout_ms self.timeout_ms = timeout_ms
self.window_duration = window_duration self.window_duration = window_duration
self.min_frames_required = min_frames_required self.min_frames_required = min_frames_required
self._face_detected = False
# Pause functionality # Pause functionality
# NOTE: flag is set when running, cleared when paused # NOTE: flag is set when running, cleared when paused
@@ -89,6 +90,9 @@ class VisualEmotionRecognitionAgent(BaseAgent):
# Tracks counts of detected emotions per face index # Tracks counts of detected emotions per face index
face_stats = defaultdict(Counter) face_stats = defaultdict(Counter)
# How many times a face has been detected
face_detection_yes_no = [0, 0]
prev_dominant_emotions = set() prev_dominant_emotions = set()
while self._running: while self._running:
@@ -97,8 +101,8 @@ class VisualEmotionRecognitionAgent(BaseAgent):
width, height, image_bytes = await self.video_in_socket.recv_multipart() width, height, image_bytes = await self.video_in_socket.recv_multipart()
width = int.from_bytes(width, 'little') width = int.from_bytes(width, "little")
height = int.from_bytes(height, 'little') height = int.from_bytes(height, "little")
# Convert bytes to a numpy buffer # Convert bytes to a numpy buffer
image_array = np.frombuffer(image_bytes, np.uint8) image_array = np.frombuffer(image_bytes, np.uint8)
@@ -107,6 +111,13 @@ class VisualEmotionRecognitionAgent(BaseAgent):
# Get the dominant emotion from each face # Get the dominant emotion from each face
current_emotions = self.emotion_recognizer.sorted_dominant_emotions(frame) current_emotions = self.emotion_recognizer.sorted_dominant_emotions(frame)
# Update face face_detection_yes_no
if len(current_emotions) > 0:
face_detection_yes_no[0] += 1
else:
face_detection_yes_no[1] += 1
# Update emotion counts for each detected face # Update emotion counts for each detected face
for i, emotion in enumerate(current_emotions): for i, emotion in enumerate(current_emotions):
face_stats[i][emotion] += 1 face_stats[i][emotion] += 1
@@ -122,6 +133,20 @@ class VisualEmotionRecognitionAgent(BaseAgent):
dominant_emotion = counter.most_common(1)[0][0] dominant_emotion = counter.most_common(1)[0][0]
window_dominant_emotions.add(dominant_emotion) window_dominant_emotions.add(dominant_emotion)
if (
face_detection_yes_no[0] > face_detection_yes_no[1]
and not self._face_detected
):
self._face_detected = True
await self._inform_face_detected()
elif (
face_detection_yes_no[0] <= face_detection_yes_no[1] and self._face_detected
):
self._face_detected = False
await self._inform_face_detected()
face_detection_yes_no = [0, 0]
await self.update_emotions(prev_dominant_emotions, window_dominant_emotions) await self.update_emotions(prev_dominant_emotions, window_dominant_emotions)
prev_dominant_emotions = window_dominant_emotions prev_dominant_emotions = window_dominant_emotions
face_stats.clear() face_stats.clear()
@@ -133,7 +158,6 @@ class VisualEmotionRecognitionAgent(BaseAgent):
except Exception as e: except Exception as e:
self.logger.error(f"Error in emotion recognition loop: {e}") self.logger.error(f"Error in emotion recognition loop: {e}")
async def update_emotions(self, prev_emotions: set[str], emotions: set[str]): async def update_emotions(self, prev_emotions: set[str], emotions: set[str]):
""" """
Compare emotions from previous window and current emotions, Compare emotions from previous window and current emotions,
@@ -149,9 +173,7 @@ class VisualEmotionRecognitionAgent(BaseAgent):
for emotion in emotions_to_remove: for emotion in emotions_to_remove:
self.logger.info(f"Emotion '{emotion}' has disappeared.") self.logger.info(f"Emotion '{emotion}' has disappeared.")
try: try:
emotion_beliefs_remove.append( emotion_beliefs_remove.append(Belief(name="emotion_detected", arguments=[emotion]))
Belief(name="emotion_detected", arguments=[emotion], remove=True)
)
except ValidationError: except ValidationError:
self.logger.warning("Invalid belief for emotion removal: %s", emotion) self.logger.warning("Invalid belief for emotion removal: %s", emotion)
@@ -175,11 +197,25 @@ class VisualEmotionRecognitionAgent(BaseAgent):
) )
await self.send(message) await self.send(message)
async def _inform_face_detected(self):
if self._face_detected:
belief_message = BeliefMessage(create=[Belief(name="face_present")])
else:
belief_message = BeliefMessage(delete=[Belief(name="face_present")])
msg = InternalMessage(
to=settings.agent_settings.bdi_core_name,
thread="beliefs",
body=belief_message.model_dump_json(),
)
await self.send(msg)
async def handle_message(self, msg: InternalMessage): async def handle_message(self, msg: InternalMessage):
""" """
Handle incoming messages. Handle incoming messages.
Expects messages to pause or resume the Visual Emotion Recognition Expects messages to pause or resume the Visual Emotion Recognition
processing from User Interrupt Agent. processing from User Interrupt Agent.
:param msg: The received internal message. :param msg: The received internal message.
@@ -204,4 +240,3 @@ class VisualEmotionRecognitionAgent(BaseAgent):
""" """
self.video_in_socket.close() self.video_in_socket.close()
await super().stop() await super().stop()

View File

@@ -82,7 +82,7 @@ class BehaviourSettings(BaseModel):
:ivar transcription_words_per_token: Estimated words per token for transcription timing. :ivar transcription_words_per_token: Estimated words per token for transcription timing.
:ivar transcription_token_buffer: Buffer for transcription tokens. :ivar transcription_token_buffer: Buffer for transcription tokens.
:ivar conversation_history_length_limit: The maximum amount of messages to extract beliefs from. :ivar conversation_history_length_limit: The maximum amount of messages to extract beliefs from.
:ivar visual_emotion_recognition_window_duration_s: Duration in seconds over which to aggregate :ivar visual_emotion_recognition_window_duration_s: Duration in seconds over which to aggregate
emotions and update emotion beliefs. emotions and update emotion beliefs.
:ivar visual_emotion_recognition_min_frames_per_face: Minimum number of frames per face required :ivar visual_emotion_recognition_min_frames_per_face: Minimum number of frames per face required
to consider a face valid. to consider a face valid.
@@ -112,7 +112,7 @@ class BehaviourSettings(BaseModel):
conversation_history_length_limit: int = 10 conversation_history_length_limit: int = 10
# Visual Emotion Recognition settings # Visual Emotion Recognition settings
visual_emotion_recognition_window_duration_s: int = 5 visual_emotion_recognition_window_duration_s: int = 3
visual_emotion_recognition_min_frames_per_face: int = 3 visual_emotion_recognition_min_frames_per_face: int = 3
# AgentSpeak related settings # AgentSpeak related settings
trigger_time_to_wait: int = 2000 trigger_time_to_wait: int = 2000

View File

@@ -41,8 +41,8 @@ class LogicalOperator(Enum):
OR = "OR" OR = "OR"
type Belief = KeywordBelief | SemanticBelief | InferredBelief | EmotionBelief type Belief = KeywordBelief | SemanticBelief | InferredBelief | EmotionBelief | FaceBelief
type BasicBelief = KeywordBelief | SemanticBelief | EmotionBelief type BasicBelief = KeywordBelief | SemanticBelief | EmotionBelief | FaceBelief
class KeywordBelief(ProgramElement): class KeywordBelief(ProgramElement):
@@ -105,6 +105,7 @@ class InferredBelief(ProgramElement):
left: Belief left: Belief
right: Belief right: Belief
class EmotionBelief(ProgramElement): class EmotionBelief(ProgramElement):
""" """
Represents a belief that is set when a certain emotion is detected. Represents a belief that is set when a certain emotion is detected.
@@ -115,6 +116,16 @@ class EmotionBelief(ProgramElement):
name: str = "" name: str = ""
emotion: str emotion: str
class FaceBelief(ProgramElement):
"""
Represents the belief that at least one face is currently in view.
"""
name: str = ""
face_present: bool
class Norm(ProgramElement): class Norm(ProgramElement):
""" """
Base class for behavioral norms that guide the robot's interactions. Base class for behavioral norms that guide the robot's interactions.
@@ -329,4 +340,4 @@ class Program(BaseModel):
if __name__ == "__main__": if __name__ == "__main__":
input = input("Enter program JSON: ") input = input("Enter program JSON: ")
program = Program.model_validate_json(input) program = Program.model_validate_json(input)
print(program) print(program)