feat: fully implemented visual emotion recognition agent in pipeline
ref: N25B-393
This commit is contained in:
@@ -158,6 +158,9 @@ class BDICoreAgent(BaseAgent):
|
||||
for belief in beliefs:
|
||||
if belief.replace:
|
||||
self._remove_all_with_name(belief.name)
|
||||
elif belief.remove:
|
||||
self._remove_belief(belief.name, belief.arguments)
|
||||
continue
|
||||
self._add_belief(belief.name, belief.arguments)
|
||||
|
||||
def _add_belief(self, name: str, args: Iterable[str] = []):
|
||||
|
||||
@@ -7,6 +7,9 @@ from zmq.asyncio import Context
|
||||
|
||||
from control_backend.agents import BaseAgent
|
||||
from control_backend.agents.actuation.robot_gesture_agent import RobotGestureAgent
|
||||
from control_backend.agents.perception.visual_emotion_detection_agent.visual_emotion_recognition_agent import (
|
||||
VisualEmotionRecognitionAgent,
|
||||
)
|
||||
from control_backend.core.config import settings
|
||||
|
||||
from ..actuation.robot_speech_agent import RobotSpeechAgent
|
||||
@@ -201,6 +204,13 @@ class RICommunicationAgent(BaseAgent):
|
||||
case "audio":
|
||||
vad_agent = VADAgent(audio_in_address=addr, audio_in_bind=bind)
|
||||
await vad_agent.start()
|
||||
case "video":
|
||||
visual_emotion_agent = VisualEmotionRecognitionAgent(
|
||||
settings.agent_settings.visual_emotion_recognition_name,
|
||||
socket_address=addr,
|
||||
bind=bind,
|
||||
)
|
||||
await visual_emotion_agent.start()
|
||||
case _:
|
||||
self.logger.warning("Unhandled negotiation id: %s", id)
|
||||
|
||||
|
||||
@@ -1,23 +1,28 @@
|
||||
import asyncio
|
||||
import json
|
||||
import time
|
||||
from collections import Counter, defaultdict
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from pydantic_core import ValidationError
|
||||
import zmq
|
||||
import zmq.asyncio as azmq
|
||||
import numpy as np
|
||||
import cv2
|
||||
from collections import defaultdict, Counter
|
||||
import time
|
||||
|
||||
from control_backend.agents import BaseAgent
|
||||
from control_backend.agents.perception.visual_emotion_detection_agent.visual_emotion_recognizer import DeepFaceEmotionRecognizer
|
||||
from control_backend.agents.perception.visual_emotion_detection_agent.visual_emotion_recognizer import (
|
||||
DeepFaceEmotionRecognizer,
|
||||
)
|
||||
from control_backend.core.agent_system import InternalMessage
|
||||
from control_backend.core.config import settings
|
||||
from control_backend.schemas.belief_message import Belief
|
||||
|
||||
# START FROM RI COMMUNICATION AGENT?
|
||||
|
||||
class VisualEmotionRecognitionAgent(BaseAgent):
|
||||
def __init__(self, socket_address: str, socket_bind: bool = False, timeout_ms: int = 1000):
|
||||
super().__init__(settings.agent_settings.visual_emotion_recognition_name)
|
||||
def __init__(self, name, socket_address: str, bind: bool = False, timeout_ms: int = 1000):
|
||||
super().__init__(name)
|
||||
self.socket_address = socket_address
|
||||
self.socket_bind = socket_bind
|
||||
self.socket_bind = bind
|
||||
self.timeout_ms = timeout_ms
|
||||
|
||||
async def setup(self):
|
||||
@@ -41,8 +46,6 @@ class VisualEmotionRecognitionAgent(BaseAgent):
|
||||
async def emotion_update_loop(self):
|
||||
"""
|
||||
Retrieve a video frame from the input socket.
|
||||
|
||||
:return: The received video frame, or None if timeout occurs.
|
||||
"""
|
||||
window_duration = 1 # seconds
|
||||
next_window_time = time.time() + window_duration
|
||||
@@ -70,7 +73,7 @@ class VisualEmotionRecognitionAgent(BaseAgent):
|
||||
if frame_image is None:
|
||||
# Could not decode image, skip this frame
|
||||
continue
|
||||
|
||||
|
||||
# Get the dominant emotion from each face
|
||||
current_emotions = self.emotion_recognizer.sorted_dominant_emotions(frame_image)
|
||||
# Update emotion counts for each detected face
|
||||
@@ -90,7 +93,6 @@ class VisualEmotionRecognitionAgent(BaseAgent):
|
||||
window_dominant_emotions.add(dominant_emotion)
|
||||
|
||||
await self.update_emotions(prev_dominant_emotions, window_dominant_emotions)
|
||||
|
||||
prev_dominant_emotions = window_dominant_emotions
|
||||
face_stats.clear()
|
||||
next_window_time = time.time() + window_duration
|
||||
@@ -98,14 +100,40 @@ class VisualEmotionRecognitionAgent(BaseAgent):
|
||||
except zmq.Again:
|
||||
self.logger.warning("No video frame received within timeout.")
|
||||
|
||||
async def update_emotions(self, prev_emotions, emotions):
|
||||
async def update_emotions(self, prev_emotions: set[str], emotions: set[str]):
|
||||
"""
|
||||
Compare emotions from previous window and current emotions,
|
||||
send updates to BDI Core Agent.
|
||||
"""
|
||||
# Remove emotions that are no longer present
|
||||
emotions_to_remove = prev_emotions - emotions
|
||||
new_emotions = emotions - prev_emotions
|
||||
|
||||
if not new_emotions and not emotions_to_remove:
|
||||
return
|
||||
|
||||
emotion_beliefs = []
|
||||
# Remove emotions that have disappeared
|
||||
for emotion in emotions_to_remove:
|
||||
self.logger.info(f"Emotion '{emotion}' has disappeared.")
|
||||
|
||||
try:
|
||||
emotion_beliefs.append(Belief(name="emotion", arguments=[emotion], remove=True))
|
||||
except ValidationError:
|
||||
self.logger.warning("Invalid belief for emotion removal: %s", emotion)
|
||||
|
||||
# Add new emotions that have appeared
|
||||
new_emotions = emotions - prev_emotions
|
||||
for emotion in new_emotions:
|
||||
self.logger.info(f"New emotion detected: '{emotion}'")
|
||||
|
||||
try:
|
||||
emotion_beliefs.append(Belief(name="emotion", arguments=[emotion]))
|
||||
except ValidationError:
|
||||
self.logger.warning("Invalid belief for new emotion: %s", emotion)
|
||||
|
||||
message = InternalMessage(
|
||||
to=settings.agent_settings.bdi_core_name,
|
||||
sender=self.name,
|
||||
body=json.dumps(emotion_beliefs),
|
||||
thread="beliefs",
|
||||
)
|
||||
self.logger.debug("Sending emotion beliefs update: %s", emotion_beliefs)
|
||||
await self.send(message)
|
||||
@@ -1,7 +1,8 @@
|
||||
import abc
|
||||
from deepface import DeepFace
|
||||
import abc
|
||||
|
||||
import numpy as np
|
||||
from collections import Counter
|
||||
from deepface import DeepFace
|
||||
|
||||
|
||||
class VisualEmotionRecognizer(abc.ABC):
|
||||
@abc.abstractmethod
|
||||
@@ -42,7 +43,6 @@ class DeepFaceEmotionRecognizer(VisualEmotionRecognizer):
|
||||
|
||||
analysis = [face for face in analysis if face['face_confidence'] >= 0.90]
|
||||
|
||||
# Return list of (dominant_emotion, face_confidence) tuples
|
||||
dominant_emotions = [face['dominant_emotion'] for face in analysis]
|
||||
return dominant_emotions
|
||||
|
||||
|
||||
Reference in New Issue
Block a user