feat: face recognition agent #53

Open
2584433 wants to merge 31 commits from feat/face-recognition into main
5 changed files with 923 additions and 30 deletions
Showing only changes of commit 1c88ae6078 - Show all commits

View File

@@ -7,6 +7,7 @@ requires-python = ">=3.13"
dependencies = [ dependencies = [
"agentspeak>=0.2.2", "agentspeak>=0.2.2",
"colorlog>=6.10.1", "colorlog>=6.10.1",
"deepface>=0.0.96",
"fastapi[all]>=0.115.6", "fastapi[all]>=0.115.6",
"mlx-whisper>=0.4.3 ; sys_platform == 'darwin'", "mlx-whisper>=0.4.3 ; sys_platform == 'darwin'",
"numpy>=2.3.3", "numpy>=2.3.3",

View File

@@ -0,0 +1,50 @@
import asyncio
import zmq
import zmq.asyncio as azmq
from control_backend.agents import BaseAgent
from control_backend.agents.perception.visual_emotion_detection_agent.visual_emotion_recognizer import DeepFaceEmotionRecognizer
from control_backend.core.agent_system import InternalMessage
from control_backend.core.config import settings
# START FROM RI?
class VisualEmotionRecognitionAgent(BaseAgent):
def __init__(self, socket_address: str, socket_bind: bool = False, timeout_ms: int = 1000):
super().__init__(settings.agent_settings.visual_emotion_recognition_name)
self.socket_address = socket_address
self.socket_bind = socket_bind
self.timeout_ms = timeout_ms
async def setup(self):
self.logger.info("Setting up %s.", self.name)
self.emotion_recognizer = DeepFaceEmotionRecognizer()
self.video_in_socket = azmq.Context.instance().socket(zmq.SUB)
if self.socket_bind:
self.video_in_socket.bind(self.socket_address)
else:
self.video_in_socket.connect(self.socket_address)
self.video_in_socket.setsockopt_string(zmq.SUBSCRIBE, "")
self.video_in_socket.setsockopt(zmq.RCVTIMEO, self.timeout_ms)
self.video_in_socket.setsockopt(zmq.CONFLATE, 1)
self.add_behavior(self.retrieve_frame())
async def retrieve_frame(self):
"""
Retrieve a video frame from the input socket.
:return: The received video frame, or None if timeout occurs.
"""
await asyncio.sleep(1) # Yield control to the event loop
try:
frame = await self.video_in_socket.recv()
# detected_emotions contains a list of dictionaries as follows:
detected_emotions = self.emotion_recognizer.detect(frame)
except zmq.Again:
self.logger.debug("No video frame received within timeout.")
return None

View File

@@ -0,0 +1,35 @@
import abc
from deepface import DeepFace
import numpy as np
class VisualEmotionRecognizer(abc.ABC):
@abc.abstractmethod
def load_model(self):
"""Load the visual emotion recognition model into memory."""
pass
@abc.abstractmethod
def detect(self, image):
"""Recognize emotion from the given image.
:param image: The input image for emotion recognition.
:return: Detected emotion label.
"""
pass
class DeepFaceEmotionRecognizer(VisualEmotionRecognizer):
def __init__(self):
self.load_model()
def load_model(self):
# Initialize DeepFace model for emotion recognition
print("Loading Deepface Emotion Model...")
dummy_img = np.zeros((224, 224, 3), dtype=np.uint8)
# analyze does not take a model as an argument, calling it once on a dummy image to load
# the model
DeepFace.analyze(dummy_img, actions=['emotion'], enforce_detection=False)
print("Deepface Emotion Model loaded.")
def detect(self, image):
analysis = DeepFace.analyze(image, actions=['emotion'], enforce_detection=False)
return analysis['dominant_emotion']

View File

@@ -52,6 +52,7 @@ class AgentSettings(BaseModel):
bdi_core_name: str = "bdi_core_agent" bdi_core_name: str = "bdi_core_agent"
bdi_belief_collector_name: str = "belief_collector_agent" bdi_belief_collector_name: str = "belief_collector_agent"
bdi_program_manager_name: str = "bdi_program_manager_agent" bdi_program_manager_name: str = "bdi_program_manager_agent"
visual_emotion_recognition_name: str = "visual_emotion_recognition_agent"
text_belief_extractor_name: str = "text_belief_extractor_agent" text_belief_extractor_name: str = "text_belief_extractor_agent"
vad_name: str = "vad_agent" vad_name: str = "vad_agent"
llm_name: str = "llm_agent" llm_name: str = "llm_agent"

866
uv.lock generated

File diff suppressed because it is too large Load Diff