Create transcriber agent #15

Merged
0950726 merged 10 commits from feat/transcription-agent into dev 2025-10-29 15:51:44 +00:00
9 changed files with 316 additions and 48 deletions
Showing only changes of commit 155c761daa - Show all commits

View File

@@ -16,6 +16,17 @@ Using UV, installing the packages and virtual environment is as simple as typing
uv sync uv sync
``` ```
## Local LLM
To run a LLM locally download https://lmstudio.ai
When installing select developer mode, download a model (it will already suggest one) and run it (see developer window, status: running)
copy the url at the top right and replace local_llm_url with it + v1/chat/completions.
This + part might differ based on what model you choose.
copy the model name in the module loaded and replace local_llm_modelL. In settings.
## Running ## Running
To run the project (development server), execute the following command (while inside the root repository): To run the project (development server), execute the following command (while inside the root repository):

View File

@@ -1,9 +1,15 @@
import logging import logging
import agentspeak import agentspeak
from spade.behaviour import OneShotBehaviour
from spade.message import Message
from spade_bdi.bdi import BDIAgent from spade_bdi.bdi import BDIAgent
from control_backend.agents.bdi.behaviours.belief_setter import BeliefSetter from control_backend.agents.bdi.behaviours.belief_setter import BeliefSetterBehaviour
from control_backend.agents.bdi.behaviours.receive_llm_resp_behaviour import (
ReceiveLLMResponseBehaviour,
)
from control_backend.core.config import settings
class BDICoreAgent(BDIAgent): class BDICoreAgent(BDIAgent):
@@ -11,25 +17,55 @@ class BDICoreAgent(BDIAgent):
This is the Brain agent that does the belief inference with AgentSpeak. This is the Brain agent that does the belief inference with AgentSpeak.
This is a continous process that happens automatically in the background. This is a continous process that happens automatically in the background.
This class contains all the actions that can be called from AgentSpeak plans. This class contains all the actions that can be called from AgentSpeak plans.
It has the BeliefSetter behaviour. It has the BeliefSetter behaviour and can aks and recieve requests from the LLM agent.
""" """
logger = logging.getLogger("BDI Core") logger = logging.getLogger("bdi_core_agent")
async def setup(self): async def setup(self) -> None:
belief_setter = BeliefSetter() """
self.add_behaviour(belief_setter) Initializes belief behaviors and message routing.
"""
self.logger.info("BDICoreAgent setup started")
self.add_behaviour(BeliefSetterBehaviour())
self.add_behaviour(ReceiveLLMResponseBehaviour())
await self._send_to_llm("Hi pepper, how are you?")
# This is the example message currently sent to the llm at the start of the Program
self.logger.info("BDICoreAgent setup complete")
def add_custom_actions(self, actions) -> None:
"""
Registers custom AgentSpeak actions callable from plans.
"""
def add_custom_actions(self, actions):
@actions.add(".reply", 1) @actions.add(".reply", 1)
def _reply(agent, term, intention): def _reply(agent: "BDICoreAgent", term, intention):
message = agentspeak.grounded(term.args[0], intention.scope) """
self.logger.info(f"Replying to message: {message}") Sends text to the LLM (AgentSpeak action).
reply = self._send_to_llm(message) Example: .reply("Hello LLM!")
self.logger.info(f"Received reply: {reply}") """
message_text = agentspeak.grounded(term.args[0], intention.scope)
self.logger.info("Reply action sending: %s", message_text)
self._send_to_llm(message_text)
yield yield
def _send_to_llm(self, message) -> str: async def _send_to_llm(self, text: str):
"""TODO: implement""" """
return f"This is a reply to {message}" Sends a text query to the LLM Agent asynchronously.
"""
class SendBehaviour(OneShotBehaviour):
async def run(self) -> None:
msg = Message(
to= settings.agent_settings.llm_agent_name + '@' + settings.agent_settings.host,
body= text
)
await self.send(msg)
self.agent.logger.debug("Message sent to LLM: %s", text)
self.add_behaviour(SendBehaviour())

View File

@@ -1,19 +1,17 @@
import asyncio
import json import json
import logging import logging
from spade.agent import Message from spade.agent import Message
from spade.behaviour import CyclicBehaviour from spade.behaviour import CyclicBehaviour
from spade_bdi.bdi import BDIAgent from spade_bdi.bdi import BDIAgent, BeliefNotInitiated
from control_backend.core.config import settings from control_backend.core.config import settings
class BeliefSetter(CyclicBehaviour): class BeliefSetterBehaviour(CyclicBehaviour):
""" """
This is the behaviour that the BDI agent runs. This behaviour waits for incoming This is the behaviour that the BDI agent runs. This behaviour waits for incoming
message and processes it based on sender. Currently, it only waits for messages message and processes it based on sender.
containing beliefs from BeliefCollector and adds these to its KB.
""" """
agent: BDIAgent agent: BDIAgent
@@ -24,7 +22,7 @@ class BeliefSetter(CyclicBehaviour):
if msg: if msg:
self.logger.info(f"Received message {msg.body}") self.logger.info(f"Received message {msg.body}")
self._process_message(msg) self._process_message(msg)
await asyncio.sleep(1)
def _process_message(self, message: Message): def _process_message(self, message: Message):
sender = message.sender.node # removes host from jid and converts to str sender = message.sender.node # removes host from jid and converts to str
@@ -35,6 +33,7 @@ class BeliefSetter(CyclicBehaviour):
self.logger.debug("Processing message from belief collector.") self.logger.debug("Processing message from belief collector.")
self._process_belief_message(message) self._process_belief_message(message)
case _: case _:
self.logger.debug("Not the belief agent, discarding message")
pass pass
def _process_belief_message(self, message: Message): def _process_belief_message(self, message: Message):
@@ -44,19 +43,28 @@ class BeliefSetter(CyclicBehaviour):
match message.thread: match message.thread:
case "beliefs": case "beliefs":
try: try:
beliefs: dict[str, list[list[str]]] = json.loads(message.body) beliefs: dict[str, list[str]] = json.loads(message.body)
self._set_beliefs(beliefs) self._set_beliefs(beliefs)
except json.JSONDecodeError as e: except json.JSONDecodeError as e:
self.logger.error("Could not decode beliefs into JSON format: %s", e) self.logger.error("Could not decode beliefs into JSON format: %s", e)
case _: case _:
pass pass
def _set_beliefs(self, beliefs: dict[str, list[list[str]]]): def _set_beliefs(self, beliefs: dict[str, list[str]]):
"""Remove previous values for beliefs and update them with the provided values."""
if self.agent.bdi is None: if self.agent.bdi is None:
self.logger.warning("Cannot set beliefs, since agent's BDI is not yet initialized.") self.logger.warning("Cannot set beliefs, since agent's BDI is not yet initialized.")
return return
for belief, arguments_list in beliefs.items(): # Set new beliefs (outdated beliefs are automatically removed)
for arguments in arguments_list: for belief, arguments in beliefs.items():
self.agent.bdi.set_belief(belief, *arguments) self.agent.bdi.set_belief(belief, *arguments)
self.logger.info("Set belief %s with arguments %s", belief, arguments)
# Special case: if there's a new user message, flag that we haven't responded yet
if belief == "user_said":
try:
self.agent.bdi.remove_belief("responded")
except BeliefNotInitiated:
pass
self.logger.info("Set belief %s with arguments %s", belief, arguments)

View File

@@ -0,0 +1,26 @@
import logging
from spade.behaviour import CyclicBehaviour
from control_backend.core.config import settings
class ReceiveLLMResponseBehaviour(CyclicBehaviour):
"""
Adds behavior to receive responses from the LLM Agent.
"""
logger = logging.getLogger("BDI/LLM Reciever")
async def run(self):
msg = await self.receive(timeout=2)
if not msg:
return
sender = msg.sender.node
match sender:
case settings.agent_settings.llm_agent_name:
content = msg.body
self.logger.info("Received LLM response: %s", content)
#Here the BDI can pass the message back as a response
case _:
self.logger.debug("Not from the llm, discarding message")
pass

View File

@@ -0,0 +1,127 @@
"""
LLM Agent module for routing text queries from the BDI Core Agent to a local LLM
service and returning its responses back to the BDI Core Agent.
"""
import logging
from typing import Any
import httpx
from spade.agent import Agent
from spade.behaviour import CyclicBehaviour
from spade.message import Message
from control_backend.agents.llm.llm_instructions import LLMInstructions
from control_backend.core.config import settings
class LLMAgent(Agent):
"""
Agent responsible for processing user text input and querying a locally
hosted LLM for text generation. Receives messages from the BDI Core Agent
and responds with processed LLM output.
"""
logger = logging.getLogger("llm_agent")
class ReceiveMessageBehaviour(CyclicBehaviour):
"""
Cyclic behaviour to continuously listen for incoming messages from
the BDI Core Agent and handle them.
"""
async def run(self):
"""
Receives SPADE messages and processes only those originating from the
configured BDI agent.
"""
msg = await self.receive(timeout=1)
if not msg:
return
sender = msg.sender.node
self.agent.logger.info(
"Received message: %s from %s",
msg.body,
sender,
)
if sender == settings.agent_settings.bdi_core_agent_name:
self.agent.logger.debug("Processing message from BDI Core Agent")
await self._process_bdi_message(msg)
else:
self.agent.logger.debug("Message ignored (not from BDI Core Agent)")
async def _process_bdi_message(self, message: Message):
"""
Forwards user text to the LLM and replies with the generated text.
"""
user_text = message.body
llm_response = await self._query_llm(user_text)
await self._reply(llm_response)
async def _reply(self, msg: str):
"""
Sends a response message back to the BDI Core Agent.
"""
reply = Message(
to=settings.agent_settings.bdi_core_agent_name + '@' + settings.agent_settings.host,
body=msg
)
await self.send(reply)
self.agent.logger.info("Reply sent to BDI Core Agent")
async def _query_llm(self, prompt: str) -> str:
"""
Sends a chat completion request to the local LLM service.
:param prompt: Input text prompt to pass to the LLM.
:return: LLM-generated content or fallback message.
"""
async with httpx.AsyncClient(timeout=120.0) as client:
# Example dynamic content for future (optional)
instructions = LLMInstructions()
developer_instruction = instructions.build_developer_instruction()
response = await client.post(
settings.llm_settings.local_llm_url,
headers={"Content-Type": "application/json"},
json={
"model": settings.llm_settings.local_llm_model,
"messages": [
{
"role": "developer",
"content": developer_instruction
},
{
"role": "user",
"content": prompt
}
],
"temperature": 0.3
},
)
try:
response.raise_for_status()
data: dict[str, Any] = response.json()
return data.get("choices", [{}])[0].get(
"message", {}
).get("content", "No response")
except httpx.HTTPError as err:
self.agent.logger.error("HTTP error: %s", err)
return "LLM service unavailable."
except Exception as err:
self.agent.logger.error("Unexpected error: %s", err)
return "Error processing the request."
async def setup(self):
"""
Sets up the SPADE behaviour to filter and process messages from the
BDI Core Agent.
"""
self.logger.info("LLMAgent setup complete")
behaviour = self.ReceiveMessageBehaviour()
self.add_behaviour(behaviour)

View File

@@ -0,0 +1,44 @@
class LLMInstructions:
"""
Defines structured instructions that are sent along with each request
to the LLM to guide its behavior (norms, goals, etc.).
"""
@staticmethod
def default_norms() -> str:
return """
Be friendly and respectful.
Make the conversation feel natural and engaging.
""".strip()
@staticmethod
def default_goals() -> str:
return """
Try to learn the user's name during conversation.
""".strip()
def __init__(self, norms: str | None = None, goals: str | None = None):
self.norms = norms if norms is not None else self.default_norms()
self.goals = goals if goals is not None else self.default_goals()
def build_developer_instruction(self) -> str:
"""
Builds a multi-line formatted instruction string for the LLM.
Includes only non-empty structured fields.
"""
sections = [
"You are a Pepper robot engaging in natural human conversation.",
"Keep responses between 15 sentences, unless instructed otherwise.\n",
]
if self.norms:
sections.append("Norms to follow:")
sections.append(self.norms)
sections.append("")
if self.goals:
sections.append("Goals to reach:")
sections.append(self.goals)
sections.append("")
return "\n".join(sections).strip()

View File

@@ -11,12 +11,18 @@ class AgentSettings(BaseModel):
bdi_core_agent_name: str = "bdi_core" bdi_core_agent_name: str = "bdi_core"
belief_collector_agent_name: str = "belief_collector" belief_collector_agent_name: str = "belief_collector"
vad_agent_name: str = "vad_agent" vad_agent_name: str = "vad_agent"
llm_agent_name: str = "llm_agent"
test_agent_name: str = "test_agent"
transcription_agent_name: str = "transcription_agent" transcription_agent_name: str = "transcription_agent"
ri_communication_agent_name: str = "ri_communication_agent" ri_communication_agent_name: str = "ri_communication_agent"
ri_command_agent_name: str = "ri_command_agent" ri_command_agent_name: str = "ri_command_agent"
class LLMSettings(BaseModel):
local_llm_url: str = "http://145.107.82.68:1234/v1/chat/completions"
local_llm_model: str = "openai/gpt-oss-120b"
class Settings(BaseSettings): class Settings(BaseSettings):
app_title: str = "PepperPlus" app_title: str = "PepperPlus"
@@ -26,7 +32,8 @@ class Settings(BaseSettings):
agent_settings: AgentSettings = AgentSettings() agent_settings: AgentSettings = AgentSettings()
llm_settings: LLMSettings = LLMSettings()
model_config = SettingsConfigDict(env_file=".env") model_config = SettingsConfigDict(env_file=".env")
settings = Settings() settings = Settings()

View File

@@ -12,6 +12,7 @@ from fastapi.middleware.cors import CORSMiddleware
from control_backend.agents.ri_communication_agent import RICommunicationAgent from control_backend.agents.ri_communication_agent import RICommunicationAgent
from control_backend.agents.bdi.bdi_core import BDICoreAgent from control_backend.agents.bdi.bdi_core import BDICoreAgent
from control_backend.agents.vad_agent import VADAgent from control_backend.agents.vad_agent import VADAgent
from control_backend.agents.llm.llm import LLMAgent
from control_backend.api.v1.router import api_router from control_backend.api.v1.router import api_router
from control_backend.core.config import settings from control_backend.core.config import settings
from control_backend.core.zmq_context import context from control_backend.core.zmq_context import context
@@ -31,6 +32,7 @@ async def lifespan(app: FastAPI):
app.state.internal_comm_socket = internal_comm_socket app.state.internal_comm_socket = internal_comm_socket
logger.info("Internal publishing socket bound to %s", internal_comm_socket) logger.info("Internal publishing socket bound to %s", internal_comm_socket)
# Initiate agents # Initiate agents
ri_communication_agent = RICommunicationAgent( ri_communication_agent = RICommunicationAgent(
settings.agent_settings.ri_communication_agent_name + "@" + settings.agent_settings.host, settings.agent_settings.ri_communication_agent_name + "@" + settings.agent_settings.host,
@@ -39,12 +41,13 @@ async def lifespan(app: FastAPI):
bind=True, bind=True,
) )
await ri_communication_agent.start() await ri_communication_agent.start()
bdi_core = BDICoreAgent(
settings.agent_settings.bdi_core_agent_name + "@" + settings.agent_settings.host, llm_agent = LLMAgent(settings.agent_settings.llm_agent_name + '@' + settings.agent_settings.host,
settings.agent_settings.bdi_core_agent_name, settings.agent_settings.llm_agent_name)
"src/control_backend/agents/bdi/rules.asl", await llm_agent.start()
) bdi_core = BDICoreAgent(settings.agent_settings.bdi_core_agent_name + '@' + settings.agent_settings.host,
settings.agent_settings.bdi_core_agent_name, "src/control_backend/agents/bdi/rules.asl")
await bdi_core.start() await bdi_core.start()
_temp_vad_agent = VADAgent("tcp://localhost:5558", False) _temp_vad_agent = VADAgent("tcp://localhost:5558", False)

View File

@@ -4,7 +4,7 @@ from unittest.mock import AsyncMock, MagicMock, call
import pytest import pytest
from control_backend.agents.bdi.behaviours.belief_setter import BeliefSetter from control_backend.agents.bdi.behaviours.belief_setter import BeliefSetterBehaviour
# Define a constant for the collector agent name to use in tests # Define a constant for the collector agent name to use in tests
COLLECTOR_AGENT_NAME = "belief_collector" COLLECTOR_AGENT_NAME = "belief_collector"
@@ -22,16 +22,14 @@ def mock_agent(mocker):
@pytest.fixture @pytest.fixture
def belief_setter(mock_agent, mocker): def belief_setter(mock_agent, mocker):
"""Fixture to create an instance of BeliefSetter with a mocked agent.""" """Fixture to create an instance of BeliefSetterBehaviour with a mocked agent."""
# Patch the settings to use a predictable agent name # Patch the settings to use a predictable agent name
mocker.patch( mocker.patch(
"control_backend.agents.bdi.behaviours.belief_setter.settings.agent_settings.belief_collector_agent_name", "control_backend.agents.bdi.behaviours.belief_setter.settings.agent_settings.belief_collector_agent_name",
COLLECTOR_AGENT_NAME, COLLECTOR_AGENT_NAME,
) )
# Patch asyncio.sleep to prevent tests from actually waiting
mocker.patch("asyncio.sleep", return_value=None)
setter = BeliefSetter() setter = BeliefSetterBehaviour()
setter.agent = mock_agent setter.agent = mock_agent
# Mock the receive method, we will control its return value in each test # Mock the receive method, we will control its return value in each test
setter.receive = AsyncMock() setter.receive = AsyncMock()
@@ -115,7 +113,7 @@ def test_process_belief_message_valid_json(belief_setter, mocker):
Test processing a valid belief message with correct thread and JSON body. Test processing a valid belief message with correct thread and JSON body.
""" """
# Arrange # Arrange
beliefs_payload = {"is_hot": [["kitchen"]], "is_clean": [["kitchen"], ["bathroom"]]} beliefs_payload = {"is_hot": ["kitchen"], "is_clean": ["kitchen", "bathroom"]}
msg = create_mock_message( msg = create_mock_message(
sender_node=COLLECTOR_AGENT_JID, body=json.dumps(beliefs_payload), thread="beliefs" sender_node=COLLECTOR_AGENT_JID, body=json.dumps(beliefs_payload), thread="beliefs"
) )
@@ -185,8 +183,8 @@ def test_set_beliefs_success(belief_setter, mock_agent, caplog):
""" """
# Arrange # Arrange
beliefs_to_set = { beliefs_to_set = {
"is_hot": [["kitchen"], ["living_room"]], "is_hot": ["kitchen"],
"door_is": [["front_door", "closed"]], "door_opened": ["front_door", "back_door"],
} }
# Act # Act
@@ -196,17 +194,25 @@ def test_set_beliefs_success(belief_setter, mock_agent, caplog):
# Assert # Assert
expected_calls = [ expected_calls = [
call("is_hot", "kitchen"), call("is_hot", "kitchen"),
call("is_hot", "living_room"), call("door_opened", "front_door", "back_door"),
call("door_is", "front_door", "closed"),
] ]
mock_agent.bdi.set_belief.assert_has_calls(expected_calls, any_order=True) mock_agent.bdi.set_belief.assert_has_calls(expected_calls, any_order=True)
assert mock_agent.bdi.set_belief.call_count == 3 assert mock_agent.bdi.set_belief.call_count == 2
# Check logs # Check logs
assert "Set belief is_hot with arguments ['kitchen']" in caplog.text assert "Set belief is_hot with arguments ['kitchen']" in caplog.text
assert "Set belief is_hot with arguments ['living_room']" in caplog.text assert "Set belief door_opened with arguments ['front_door', 'back_door']" in caplog.text
assert "Set belief door_is with arguments ['front_door', 'closed']" in caplog.text
def test_responded_unset(belief_setter, mock_agent):
# Arrange
new_beliefs = {"user_said": ["message"]}
# Act
belief_setter._set_beliefs(new_beliefs)
# Assert
mock_agent.bdi.set_belief.assert_has_calls([call("user_said", "message")])
mock_agent.bdi.remove_belief.assert_has_calls([call("responded")])
def test_set_beliefs_bdi_not_initialized(belief_setter, mock_agent, caplog): def test_set_beliefs_bdi_not_initialized(belief_setter, mock_agent, caplog):
""" """
@@ -214,7 +220,7 @@ def test_set_beliefs_bdi_not_initialized(belief_setter, mock_agent, caplog):
""" """
# Arrange # Arrange
mock_agent.bdi = None # Simulate BDI not being ready mock_agent.bdi = None # Simulate BDI not being ready
beliefs_to_set = {"is_hot": [["kitchen"]]} beliefs_to_set = {"is_hot": ["kitchen"]}
# Act # Act
with caplog.at_level(logging.WARNING): with caplog.at_level(logging.WARNING):