5 changed files with 923 additions and 30 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,6 +7,7 @@ requires-python = ">=3.13"
 dependencies = [
  "agentspeak>=0.2.2",
  "colorlog>=6.10.1",
  "deepface>=0.0.96",
  "fastapi[all]>=0.115.6",
  "mlx-whisper>=0.4.3 ; sys_platform == 'darwin'",
  "numpy>=2.3.3",
--- a/src/control_backend/agents/perception/visual_emotion_detection_agent/visual_emotion_recognition_agent.py
+++ b/src/control_backend/agents/perception/visual_emotion_detection_agent/visual_emotion_recognition_agent.py
@@ -0,0 +1,50 @@
 import asyncio
 import zmq
 import zmq.asyncio as azmq
 from control_backend.agents import BaseAgent
 from control_backend.agents.perception.visual_emotion_detection_agent.visual_emotion_recognizer import DeepFaceEmotionRecognizer
 from control_backend.core.agent_system import InternalMessage
 from control_backend.core.config import settings
 # START FROM RI?
 class VisualEmotionRecognitionAgent(BaseAgent):
    def __init__(self, socket_address: str, socket_bind: bool = False, timeout_ms: int = 1000):
        super().__init__(settings.agent_settings.visual_emotion_recognition_name)
        self.socket_address = socket_address
        self.socket_bind = socket_bind
        self.timeout_ms = timeout_ms
    async def setup(self):
        self.logger.info("Setting up %s.", self.name)
        self.emotion_recognizer = DeepFaceEmotionRecognizer()
        self.video_in_socket = azmq.Context.instance().socket(zmq.SUB)
        if self.socket_bind:
            self.video_in_socket.bind(self.socket_address)
        else:
            self.video_in_socket.connect(self.socket_address)
        self.video_in_socket.setsockopt_string(zmq.SUBSCRIBE, "")
        self.video_in_socket.setsockopt(zmq.RCVTIMEO, self.timeout_ms)
        self.video_in_socket.setsockopt(zmq.CONFLATE, 1)
        self.add_behavior(self.retrieve_frame())
    async def retrieve_frame(self):
        """
        Retrieve a video frame from the input socket.
        :return: The received video frame, or None if timeout occurs.
        """
        await asyncio.sleep(1)  # Yield control to the event loop
        try:
            frame = await self.video_in_socket.recv()
            # detected_emotions contains a list of dictionaries as follows:
            detected_emotions = self.emotion_recognizer.detect(frame)
        except zmq.Again:
            self.logger.debug("No video frame received within timeout.")
            return None
--- a/src/control_backend/agents/perception/visual_emotion_detection_agent/visual_emotion_recognizer.py
+++ b/src/control_backend/agents/perception/visual_emotion_detection_agent/visual_emotion_recognizer.py
@@ -0,0 +1,35 @@
 import abc 
 from deepface import DeepFace
 import numpy as np
 class VisualEmotionRecognizer(abc.ABC):
    @abc.abstractmethod
    def load_model(self):
        """Load the visual emotion recognition model into memory."""
        pass
    @abc.abstractmethod
    def detect(self, image):
        """Recognize emotion from the given image.
        :param image: The input image for emotion recognition.
        :return: Detected emotion label.
        """
        pass
 class DeepFaceEmotionRecognizer(VisualEmotionRecognizer):
    def __init__(self):
        self.load_model()
    def load_model(self):
        # Initialize DeepFace model for emotion recognition
        print("Loading Deepface Emotion Model...")
        dummy_img = np.zeros((224, 224, 3), dtype=np.uint8)
        # analyze does not take a model as an argument, calling it once on a dummy image to load 
        # the model
        DeepFace.analyze(dummy_img, actions=['emotion'], enforce_detection=False)
        print("Deepface Emotion Model loaded.")
    def detect(self, image):
        analysis = DeepFace.analyze(image, actions=['emotion'], enforce_detection=False)
        return analysis['dominant_emotion']
--- a/src/control_backend/core/config.py
+++ b/src/control_backend/core/config.py
@@ -52,6 +52,7 @@ class AgentSettings(BaseModel):
    bdi_core_name: str = "bdi_core_agent"
    bdi_belief_collector_name: str = "belief_collector_agent"
    bdi_program_manager_name: str = "bdi_program_manager_agent"
    visual_emotion_recognition_name: str = "visual_emotion_recognition_agent"
    text_belief_extractor_name: str = "text_belief_extractor_agent"
    vad_name: str = "vad_agent"
    llm_name: str = "llm_agent"
--- a/uv.lock
+++ b/uv.lock