feat: fully implemented visual emotion recognition agent in pipeline

ref: N25B-393
2026-01-16 13:26:53 +01:00
parent 0771b0d607
commit 05804c158d
6 changed files with 67 additions and 29 deletions
--- a/src/control_backend/agents/bdi/bdi_core_agent.py
+++ b/src/control_backend/agents/bdi/bdi_core_agent.py
@@ -158,6 +158,9 @@ class BDICoreAgent(BaseAgent):
        for belief in beliefs:
            if belief.replace:
                self._remove_all_with_name(belief.name)
            elif belief.remove:
                self._remove_belief(belief.name, belief.arguments)
                continue
            self._add_belief(belief.name, belief.arguments)
    def _add_belief(self, name: str, args: Iterable[str] = []):
--- a/src/control_backend/agents/communication/ri_communication_agent.py
+++ b/src/control_backend/agents/communication/ri_communication_agent.py
@@ -7,6 +7,9 @@ from zmq.asyncio import Context
 from control_backend.agents import BaseAgent
 from control_backend.agents.actuation.robot_gesture_agent import RobotGestureAgent
 from control_backend.agents.perception.visual_emotion_detection_agent.visual_emotion_recognition_agent import (
    VisualEmotionRecognitionAgent,
 )
 from control_backend.core.config import settings
 from ..actuation.robot_speech_agent import RobotSpeechAgent
@@ -201,6 +204,13 @@ class RICommunicationAgent(BaseAgent):
                case "audio":
                    vad_agent = VADAgent(audio_in_address=addr, audio_in_bind=bind)
                    await vad_agent.start()
                case "video":
                    visual_emotion_agent = VisualEmotionRecognitionAgent(
                        settings.agent_settings.visual_emotion_recognition_name,
                        socket_address=addr,
                        bind=bind,
                    )
                    await visual_emotion_agent.start()
                case _:
                    self.logger.warning("Unhandled negotiation id: %s", id)
--- a/src/control_backend/agents/perception/visual_emotion_detection_agent/visual_emotion_recognition_agent.py
+++ b/src/control_backend/agents/perception/visual_emotion_detection_agent/visual_emotion_recognition_agent.py
@@ -1,23 +1,28 @@
-import asyncio
+import json
 import time
 from collections import Counter, defaultdict
 import cv2
 import numpy as np
 from pydantic_core import ValidationError
 import zmq
 import zmq.asyncio as azmq
 import numpy as np
 import cv2
 from collections import defaultdict, Counter
 import time
 from control_backend.agents import BaseAgent
-from control_backend.agents.perception.visual_emotion_detection_agent.visual_emotion_recognizer import DeepFaceEmotionRecognizer
+from control_backend.agents.perception.visual_emotion_detection_agent.visual_emotion_recognizer import (
    DeepFaceEmotionRecognizer,
 )
 from control_backend.core.agent_system import InternalMessage
 from control_backend.core.config import settings
 from control_backend.schemas.belief_message import Belief
 # START FROM RI COMMUNICATION AGENT?
 class VisualEmotionRecognitionAgent(BaseAgent):
-    def __init__(self, socket_address: str, socket_bind: bool = False, timeout_ms: int = 1000):
+    def __init__(self, name, socket_address: str, bind: bool = False, timeout_ms: int = 1000):
-        super().__init__(settings.agent_settings.visual_emotion_recognition_name)
+        super().__init__(name)
        self.socket_address = socket_address
-        self.socket_bind = socket_bind
+        self.socket_bind = bind
        self.timeout_ms = timeout_ms
    async def setup(self):
@@ -41,8 +46,6 @@ class VisualEmotionRecognitionAgent(BaseAgent):
    async def emotion_update_loop(self):
        """
        Retrieve a video frame from the input socket.
        :return: The received video frame, or None if timeout occurs.
        """
        window_duration = 1  # seconds
        next_window_time = time.time() + window_duration
@@ -70,7 +73,7 @@ class VisualEmotionRecognitionAgent(BaseAgent):
                if frame_image is None:
                    # Could not decode image, skip this frame
                    continue
-
+                
                # Get the dominant emotion from each face
                current_emotions = self.emotion_recognizer.sorted_dominant_emotions(frame_image)
                # Update emotion counts for each detected face
@@ -90,7 +93,6 @@ class VisualEmotionRecognitionAgent(BaseAgent):
                            window_dominant_emotions.add(dominant_emotion)
                    await self.update_emotions(prev_dominant_emotions, window_dominant_emotions)
                    prev_dominant_emotions = window_dominant_emotions
                    face_stats.clear()
                    next_window_time = time.time() + window_duration
@@ -98,14 +100,40 @@ class VisualEmotionRecognitionAgent(BaseAgent):
            except zmq.Again:
                self.logger.warning("No video frame received within timeout.")
-    async def update_emotions(self, prev_emotions, emotions):
+    async def update_emotions(self, prev_emotions: set[str], emotions: set[str]):
        """
        Compare emotions from previous window and current emotions, 
        send updates to BDI Core Agent.
        """
        # Remove emotions that are no longer present
        emotions_to_remove = prev_emotions - emotions
        new_emotions = emotions - prev_emotions
        if not new_emotions and not emotions_to_remove:
            return  
        emotion_beliefs = []
        # Remove emotions that have disappeared
        for emotion in emotions_to_remove:
            self.logger.info(f"Emotion '{emotion}' has disappeared.")
-
+            try:
                emotion_beliefs.append(Belief(name="emotion", arguments=[emotion], remove=True))
            except ValidationError:
                self.logger.warning("Invalid belief for emotion removal: %s", emotion)
        # Add new emotions that have appeared
        new_emotions = emotions - prev_emotions
        for emotion in new_emotions:
            self.logger.info(f"New emotion detected: '{emotion}'")
-                
+            try:
                emotion_beliefs.append(Belief(name="emotion", arguments=[emotion]))
            except ValidationError:
                self.logger.warning("Invalid belief for new emotion: %s", emotion)
        message = InternalMessage(
            to=settings.agent_settings.bdi_core_name,
            sender=self.name,
            body=json.dumps(emotion_beliefs),
            thread="beliefs",
        )
        self.logger.debug("Sending emotion beliefs update: %s", emotion_beliefs)
        await self.send(message)
--- a/src/control_backend/agents/perception/visual_emotion_detection_agent/visual_emotion_recognizer.py
+++ b/src/control_backend/agents/perception/visual_emotion_detection_agent/visual_emotion_recognizer.py
@@ -1,7 +1,8 @@
-import abc 
+import abc
-from deepface import DeepFace
+
 import numpy as np
-from collections import Counter
+from deepface import DeepFace
 class VisualEmotionRecognizer(abc.ABC):
    @abc.abstractmethod
@@ -42,7 +43,6 @@ class DeepFaceEmotionRecognizer(VisualEmotionRecognizer):
        analysis = [face for face in analysis if face['face_confidence'] >= 0.90]
        # Return list of (dominant_emotion, face_confidence) tuples
        dominant_emotions = [face['dominant_emotion'] for face in analysis]
        return dominant_emotions
--- a/src/control_backend/main.py
+++ b/src/control_backend/main.py
@@ -40,7 +40,9 @@ from control_backend.agents.communication import RICommunicationAgent
 from control_backend.agents.llm import LLMAgent
 # User Interrupt Agent
-from control_backend.agents.perception.visual_emotion_detection_agent.visual_emotion_recognition_agent import VisualEmotionRecognitionAgent
+from control_backend.agents.perception.visual_emotion_detection_agent.visual_emotion_recognition_agent import (
    VisualEmotionRecognitionAgent,
 )
 from control_backend.agents.user_interrupt.user_interrupt_agent import UserInterruptAgent
 # Other backend imports
@@ -148,13 +150,6 @@ async def lifespan(app: FastAPI):
                "name": settings.agent_settings.user_interrupt_name,
            },
        ),
        # TODO: Spawn agent from RI Communication Agent
        "VisualEmotionRecognitionAgent": ( 
            VisualEmotionRecognitionAgent,
            {
                "socket_address": "tcp://localhost:5556", # TODO: move to settings
            },
        ),
    }
    agents = []
--- a/src/control_backend/schemas/belief_message.py
+++ b/src/control_backend/schemas/belief_message.py
@@ -8,11 +8,13 @@ class Belief(BaseModel):
    :ivar name: The functor or name of the belief (e.g., 'user_said').
    :ivar arguments: A list of string arguments for the belief.
    :ivar replace: If True, existing beliefs with this name should be replaced by this one.
    :ivar remove: If True, this belief should be removed from the belief base.
    """
    name: str
    arguments: list[str]
    replace: bool = False
    remove: bool = False
 class BeliefMessage(BaseModel):