3 changed files with 99 additions and 15 deletions
--- a/src/control_backend/agents/perception/visual_emotion_detection_agent/visual_emotion_recognition_agent.py
+++ b/src/control_backend/agents/perception/visual_emotion_detection_agent/visual_emotion_recognition_agent.py
@@ -1,13 +1,17 @@
 import asyncio
 import zmq
 import zmq.asyncio as azmq
 import numpy as np
 import cv2
 from collections import defaultdict, Counter
 import time
 from control_backend.agents import BaseAgent
 from control_backend.agents.perception.visual_emotion_detection_agent.visual_emotion_recognizer import DeepFaceEmotionRecognizer
 from control_backend.core.agent_system import InternalMessage
 from control_backend.core.config import settings
-# START FROM RI?
+# START FROM RI COMMUNICATION AGENT?
 class VisualEmotionRecognitionAgent(BaseAgent):
    def __init__(self, socket_address: str, socket_bind: bool = False, timeout_ms: int = 1000):
@@ -32,19 +36,76 @@ class VisualEmotionRecognitionAgent(BaseAgent):
        self.video_in_socket.setsockopt(zmq.RCVTIMEO, self.timeout_ms)
        self.video_in_socket.setsockopt(zmq.CONFLATE, 1)
-        self.add_behavior(self.retrieve_frame())
+        self.add_behavior(self.emotion_update_loop())
-    async def retrieve_frame(self):
+    async def emotion_update_loop(self):
        """
        Retrieve a video frame from the input socket.
        :return: The received video frame, or None if timeout occurs.
        """
-        await asyncio.sleep(1)  # Yield control to the event loop
+        window_duration = 1  # seconds
-        try:
+        next_window_time = time.time() + window_duration
-            frame = await self.video_in_socket.recv()
+
-            # detected_emotions contains a list of dictionaries as follows:
+        # To detect false positives
-            detected_emotions = self.emotion_recognizer.detect(frame)
+        # Minimal number of frames a face has to be detected to consider it valid
-        except zmq.Again:
+        # Can also reduce false positives by ignoring faces that are too small; not implemented
-            self.logger.debug("No video frame received within timeout.")
+        # Also use face confidence thresholding in recognizer
-            return None
+        min_frames_required = 2
        face_stats = defaultdict(Counter)
        prev_dominant_emotions = set()
        while self._running:
            try:
                frame_bytes = await self.video_in_socket.recv()
                # Convert bytes to a numpy buffer
                nparr = np.frombuffer(frame_bytes, np.uint8)
                # Decode image into the generic Numpy Array DeepFace expects
                frame_image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
                if frame_image is None:
                    # Could not decode image, skip this frame
                    continue
                # Get the dominant emotion from each face
                current_emotions = self.emotion_recognizer.sorted_dominant_emotions(frame_image)
                # Update emotion counts for each detected face
                for i, emotion in enumerate(current_emotions):
                    face_stats[i][emotion] += 1
                # If window duration has passed, process the collected stats
                if time.time() >= next_window_time:
                    window_dominant_emotions = set()
                    # Determine dominant emotion for each face in the window
                    for _, counter in face_stats.items():
                        total_detections = sum(counter.values())
                        if total_detections >= min_frames_required:
                            dominant_emotion = counter.most_common(1)[0][0]
                            window_dominant_emotions.add(dominant_emotion)
                    await self.update_emotions(prev_dominant_emotions, window_dominant_emotions)
                    prev_dominant_emotions = window_dominant_emotions
                    face_stats.clear()
                    next_window_time = time.time() + window_duration
            except zmq.Again:
                self.logger.warning("No video frame received within timeout.")
    async def update_emotions(self, prev_emotions, emotions):
        # Remove emotions that are no longer present
        emotions_to_remove = prev_emotions - emotions
        for emotion in emotions_to_remove:
            self.logger.info(f"Emotion '{emotion}' has disappeared.")
        # Add new emotions that have appeared
        new_emotions = emotions - prev_emotions
        for emotion in new_emotions:
            self.logger.info(f"New emotion detected: '{emotion}'")
--- a/src/control_backend/agents/perception/visual_emotion_detection_agent/visual_emotion_recognizer.py
+++ b/src/control_backend/agents/perception/visual_emotion_detection_agent/visual_emotion_recognizer.py
@@ -1,6 +1,7 @@
 import abc 
 from deepface import DeepFace
 import numpy as np
 from collections import Counter
 class VisualEmotionRecognizer(abc.ABC):
    @abc.abstractmethod
@@ -9,7 +10,7 @@ class VisualEmotionRecognizer(abc.ABC):
        pass
    @abc.abstractmethod
-    def detect(self, image):
+    def sorted_dominant_emotions(self, image):
        """Recognize emotion from the given image.
        :param image: The input image for emotion recognition.
@@ -30,6 +31,20 @@ class DeepFaceEmotionRecognizer(VisualEmotionRecognizer):
        DeepFace.analyze(dummy_img, actions=['emotion'], enforce_detection=False)
        print("Deepface Emotion Model loaded.")
-    def detect(self, image):
+    def sorted_dominant_emotions(self, image):
-        analysis = DeepFace.analyze(image, actions=['emotion'], enforce_detection=False)
+        analysis = DeepFace.analyze(image, 
-        return analysis['dominant_emotion']
+                                    actions=['emotion'], 
                                    enforce_detection=False
                                    )
        # Sort faces by x coordinate to maintain left-to-right order
        analysis.sort(key=lambda face: face['region']['x'])
        analysis = [face for face in analysis if face['face_confidence'] >= 0.90]
        # Return list of (dominant_emotion, face_confidence) tuples
        dominant_emotions = [face['dominant_emotion'] for face in analysis]
        return dominant_emotions
--- a/src/control_backend/main.py
+++ b/src/control_backend/main.py
@@ -40,6 +40,7 @@ from control_backend.agents.communication import RICommunicationAgent
 from control_backend.agents.llm import LLMAgent
 # User Interrupt Agent
 from control_backend.agents.perception.visual_emotion_detection_agent.visual_emotion_recognition_agent import VisualEmotionRecognitionAgent
 from control_backend.agents.user_interrupt.user_interrupt_agent import UserInterruptAgent
 # Other backend imports
@@ -147,6 +148,13 @@ async def lifespan(app: FastAPI):
                "name": settings.agent_settings.user_interrupt_name,
            },
        ),
        # TODO: Spawn agent from RI Communication Agent
        "VisualEmotionRecognitionAgent": ( 
            VisualEmotionRecognitionAgent,
            {
                "socket_address": "tcp://localhost:5556", # TODO: move to settings
            },
        ),
    }
    agents = []