feat: extract semantic beliefs from conversation

ref: N25B-380
This commit is contained in:
Twirre Meulenbelt
2025-12-23 17:09:58 +01:00
parent adbb7ffd5c
commit 33501093a1
5 changed files with 508 additions and 63 deletions

View File

@@ -1,8 +1,23 @@
import asyncio
import json import json
import httpx
from pydantic import ValidationError
from slugify import slugify
from control_backend.agents.base import BaseAgent from control_backend.agents.base import BaseAgent
from control_backend.core.agent_system import InternalMessage from control_backend.core.agent_system import InternalMessage
from control_backend.core.config import settings from control_backend.core.config import settings
from control_backend.schemas.belief_message import Belief as InternalBelief
from control_backend.schemas.belief_message import BeliefMessage
from control_backend.schemas.chat_history import ChatHistory, ChatMessage
from control_backend.schemas.program import (
Belief,
ConditionalNorm,
InferredBelief,
Program,
SemanticBelief,
)
class TextBeliefExtractorAgent(BaseAgent): class TextBeliefExtractorAgent(BaseAgent):
@@ -12,46 +27,110 @@ class TextBeliefExtractorAgent(BaseAgent):
This agent is responsible for processing raw text (e.g., from speech transcription) and This agent is responsible for processing raw text (e.g., from speech transcription) and
extracting semantic beliefs from it. extracting semantic beliefs from it.
In the current demonstration version, it performs a simple wrapping of the user's input It uses the available beliefs received from the program manager to try to extract beliefs from a
into a ``user_said`` belief. In a full implementation, this agent would likely interact user's message, sends and updated beliefs to the BDI core, and forms a ``user_said`` belief from
with an LLM or NLU engine to extract intent, entities, and other structured information. the message itself.
""" """
def __init__(self, name: str):
super().__init__(name)
self.beliefs = {}
self.available_beliefs = []
self.conversation = ChatHistory(messages=[])
async def setup(self): async def setup(self):
""" """
Initialize the agent and its resources. Initialize the agent and its resources.
""" """
self.logger.info("Settting up %s.", self.name) self.logger.info("Setting up %s.", self.name)
# Setup LLM belief context if needed (currently demo is just passthrough)
self.beliefs = {"mood": ["X"], "car": ["Y"]}
async def handle_message(self, msg: InternalMessage): async def handle_message(self, msg: InternalMessage):
""" """
Handle incoming messages, primarily from the Transcription Agent. Handle incoming messages. Expect messages from the Transcriber agent, LLM agent, and the
Program manager agent.
:param msg: The received message containing transcribed text. :param msg: The received message.
""" """
sender = msg.sender sender = msg.sender
if sender == settings.agent_settings.transcription_name:
self.logger.debug("Received text from transcriber: %s", msg.body)
await self._process_transcription_demo(msg.body)
else:
self.logger.info("Discarding message from %s", sender)
async def _process_transcription_demo(self, txt: str): match sender:
case settings.agent_settings.transcription_name:
self.logger.debug("Received text from transcriber: %s", msg.body)
self._apply_conversation_message(ChatMessage(role="user", content=msg.body))
await self._infer_new_beliefs()
await self._user_said(msg.body)
case settings.agent_settings.llm_name:
self.logger.debug("Received text from LLM: %s", msg.body)
self._apply_conversation_message(ChatMessage(role="assistant", content=msg.body))
case settings.agent_settings.bdi_program_manager_name:
self._handle_program_manager_message(msg)
case _:
self.logger.info("Discarding message from %s", sender)
return
def _apply_conversation_message(self, message: ChatMessage):
""" """
Process the transcribed text and generate beliefs. Save the chat message to our conversation history, taking into account the conversation
length limit.
**Demo Implementation:** :param message: The chat message to add to the conversation history.
Currently, this method takes the raw text ``txt`` and wraps it into a belief structure:
``user_said("txt")``.
This belief is then sent to the :class:`BDIBeliefCollectorAgent`.
:param txt: The raw transcribed text string.
""" """
# For demo, just wrapping user text as user_said belief length_limit = settings.behaviour_settings.conversation_history_length_limit
belief = {"beliefs": {"user_said": [txt]}, "type": "belief_extraction_text"} self.conversation.messages = (self.conversation.messages + [message])[-length_limit:]
def _handle_program_manager_message(self, msg: InternalMessage):
"""
Handle a message from the program manager: extract available beliefs from it.
:param msg: The received message from the program manager.
"""
try:
program = Program.model_validate_json(msg.body)
except ValidationError:
self.logger.warning(
"Received message from program manager but it is not a valid program."
)
return
self.logger.debug("Received a program from the program manager.")
self.available_beliefs = self._extract_basic_beliefs_from_program(program)
# TODO Copied from an incomplete version of the program manager. Use that one instead.
@staticmethod
def _extract_basic_beliefs_from_program(program: Program) -> list[SemanticBelief]:
beliefs = []
for phase in program.phases:
for norm in phase.norms:
if isinstance(norm, ConditionalNorm):
beliefs += TextBeliefExtractorAgent._extract_basic_beliefs_from_belief(
norm.condition
)
for trigger in phase.triggers:
beliefs += TextBeliefExtractorAgent._extract_basic_beliefs_from_belief(
trigger.condition
)
return beliefs
# TODO Copied from an incomplete version of the program manager. Use that one instead.
@staticmethod
def _extract_basic_beliefs_from_belief(belief: Belief) -> list[SemanticBelief]:
if isinstance(belief, InferredBelief):
return TextBeliefExtractorAgent._extract_basic_beliefs_from_belief(
belief.left
) + TextBeliefExtractorAgent._extract_basic_beliefs_from_belief(belief.right)
return [belief]
async def _user_said(self, text: str):
"""
Create a belief for the user's full speech.
:param text: User's transcribed text.
"""
belief = {"beliefs": {"user_said": [text]}, "type": "belief_extraction_text"}
payload = json.dumps(belief) payload = json.dumps(belief)
belief_msg = InternalMessage( belief_msg = InternalMessage(
@@ -60,6 +139,200 @@ class TextBeliefExtractorAgent(BaseAgent):
body=payload, body=payload,
thread="beliefs", thread="beliefs",
) )
await self.send(belief_msg) await self.send(belief_msg)
self.logger.info("Sent %d beliefs to the belief collector.", len(belief["beliefs"]))
async def _infer_new_beliefs(self):
"""
Process conversation history to extract beliefs, semantically. Any changed beliefs are sent
to the BDI core.
"""
# Return instantly if there are no beliefs to infer
if not self.available_beliefs:
return
candidate_beliefs = await self._infer_turn()
new_beliefs: list[InternalBelief] = []
for belief_key, belief_value in candidate_beliefs.items():
if belief_value is None:
continue
old_belief_value = self.beliefs.get(belief_key)
# TODO: Do we need this check? Can we send the same beliefs multiple times?
if belief_value == old_belief_value:
continue
self.beliefs[belief_key] = belief_value
new_beliefs.append(
InternalBelief(name=belief_key, arguments=[belief_value], replace=True),
)
beliefs_message = InternalMessage(
to=settings.agent_settings.bdi_core_name,
sender=self.name,
body=BeliefMessage(beliefs=new_beliefs).model_dump_json(),
thread="beliefs",
)
await self.send(beliefs_message)
@staticmethod
def _split_into_chunks[T](items: list[T], n: int) -> list[list[T]]:
k, m = divmod(len(items), n)
return [items[i * k + min(i, m) : (i + 1) * k + min(i + 1, m)] for i in range(n)]
async def _infer_turn(self) -> dict:
"""
Process the stored conversation history to extract semantic beliefs. Returns a list of
beliefs that have been set to ``True``, ``False`` or ``None``.
:return: A dict mapping belief names to a value ``True``, ``False`` or ``None``.
"""
n_parallel = min(settings.llm_settings.n_parallel - 1, len(self.available_beliefs))
all_beliefs = await asyncio.gather(
*[
self._infer_beliefs(self.conversation, beliefs)
for beliefs in self._split_into_chunks(self.available_beliefs, n_parallel)
]
)
retval = {}
for beliefs in all_beliefs:
if beliefs is None:
continue
retval.update(beliefs)
return retval
@staticmethod
def _create_belief_schema(belief: SemanticBelief) -> tuple[str, dict]:
# TODO: use real belief names
return belief.name or slugify(belief.description), {
"type": ["boolean", "null"],
"description": belief.description,
}
@staticmethod
def _create_beliefs_schema(beliefs: list[SemanticBelief]) -> dict:
belief_schemas = [
TextBeliefExtractorAgent._create_belief_schema(belief) for belief in beliefs
]
return {
"type": "object",
"properties": dict(belief_schemas),
"required": [name for name, _ in belief_schemas],
}
@staticmethod
def _format_message(message: ChatMessage):
return f"{message.role.upper()}:\n{message.content}"
@staticmethod
def _format_conversation(conversation: ChatHistory):
return "\n\n".join(
[TextBeliefExtractorAgent._format_message(message) for message in conversation.messages]
)
@staticmethod
def _format_beliefs(beliefs: list[SemanticBelief]):
# TODO: use real belief names
return "\n".join(
[
f"- {belief.name or slugify(belief.description)}: {belief.description}"
for belief in beliefs
]
)
async def _infer_beliefs(
self,
conversation: ChatHistory,
beliefs: list[SemanticBelief],
) -> dict | None:
"""
Infer given beliefs based on the given conversation.
:param conversation: The conversation to infer beliefs from.
:param beliefs: The beliefs to infer.
:return: A dict containing belief names and a boolean whether they hold, or None if the
belief cannot be inferred based on the given conversation.
"""
example = {
"example_belief": True,
}
prompt = f"""{self._format_conversation(conversation)}
Given the above conversation, what beliefs can be inferred?
If there is no relevant information about a belief belief, give null.
In case messages conflict, prefer using the most recent messages for inference.
Choose from the following list of beliefs, formatted as (belief_name, description):
{self._format_beliefs(beliefs)}
Respond with a JSON similar to the following, but with the property names as given above:
{json.dumps(example, indent=2)}
"""
schema = self._create_beliefs_schema(beliefs)
return await self._retry_query_llm(prompt, schema)
async def _retry_query_llm(self, prompt: str, schema: dict, tries: int = 3) -> dict | None:
"""
Query the LLM with the given prompt and schema, return an instance of a dict conforming
to this schema. Try ``tries`` times, or return None.
:param prompt: Prompt to be queried.
:param schema: Schema to be queried.
:return: An instance of a dict conforming to this schema, or None if failed.
"""
try_count = 0
while try_count < tries:
try_count += 1
try:
return await self._query_llm(prompt, schema)
except (httpx.HTTPStatusError, json.JSONDecodeError, KeyError) as e:
if try_count < tries:
continue
self.logger.exception(
"Failed to get LLM response after %d tries.",
try_count,
exc_info=e,
)
return None
@staticmethod
async def _query_llm(prompt: str, schema: dict) -> dict:
"""
Query an LLM with the given prompt and schema, return an instance of a dict conforming to
that schema.
:param prompt: The prompt to be queried.
:param schema: Schema to use during response.
:return: A dict conforming to this schema.
:raises httpx.HTTPStatusError: If the LLM server responded with an error.
:raises json.JSONDecodeError: If the LLM response was not valid JSON. May happen if the
response was cut off early due to length limitations.
:raises KeyError: If the LLM server responded with no error, but the response was invalid.
"""
async with httpx.AsyncClient() as client:
response = await client.post(
settings.llm_settings.local_llm_url,
json={
"model": settings.llm_settings.local_llm_model,
"messages": [{"role": "user", "content": prompt}],
"response_format": {
"type": "json_schema",
"json_schema": {
"name": "Beliefs",
"strict": True,
"schema": schema,
},
},
"reasoning_effort": "low",
"temperature": settings.llm_settings.code_temperature,
"stream": False,
},
timeout=None,
)
response.raise_for_status()
response_json = response.json()
json_message = response_json["choices"][0]["message"]["content"]
return json.loads(json_message)

View File

@@ -64,11 +64,12 @@ class LLMAgent(BaseAgent):
:param message: The parsed prompt message containing text, norms, and goals. :param message: The parsed prompt message containing text, norms, and goals.
""" """
full_message = ""
async for chunk in self._query_llm(message.text, message.norms, message.goals): async for chunk in self._query_llm(message.text, message.norms, message.goals):
await self._send_reply(chunk) await self._send_reply(chunk)
self.logger.debug( full_message += chunk
"Finished processing BDI message. Response sent in chunks to BDI core." self.logger.debug("Finished processing BDI message. Response sent in chunks to BDI core.")
) await self._send_full_reply(full_message)
async def _send_reply(self, msg: str): async def _send_reply(self, msg: str):
""" """
@@ -83,6 +84,19 @@ class LLMAgent(BaseAgent):
) )
await self.send(reply) await self.send(reply)
async def _send_full_reply(self, msg: str):
"""
Sends a response message (full) to agents that need it.
:param msg: The text content of the message.
"""
message = InternalMessage(
to=settings.agent_settings.text_belief_extractor_name,
sender=self.name,
body=msg,
)
await self.send(message)
async def _query_llm( async def _query_llm(
self, prompt: str, norms: list[str], goals: list[str] self, prompt: str, norms: list[str], goals: list[str]
) -> AsyncGenerator[str]: ) -> AsyncGenerator[str]:
@@ -172,7 +186,7 @@ class LLMAgent(BaseAgent):
json={ json={
"model": settings.llm_settings.local_llm_model, "model": settings.llm_settings.local_llm_model,
"messages": messages, "messages": messages,
"temperature": 0.3, "temperature": settings.llm_settings.chat_temperature,
"stream": True, "stream": True,
}, },
) as response: ) as response:

View File

@@ -65,6 +65,7 @@ class BehaviourSettings(BaseModel):
:ivar transcription_words_per_minute: Estimated words per minute for transcription timing. :ivar transcription_words_per_minute: Estimated words per minute for transcription timing.
:ivar transcription_words_per_token: Estimated words per token for transcription timing. :ivar transcription_words_per_token: Estimated words per token for transcription timing.
:ivar transcription_token_buffer: Buffer for transcription tokens. :ivar transcription_token_buffer: Buffer for transcription tokens.
:ivar conversation_history_length_limit: The maximum amount of messages to extract beliefs from.
""" """
sleep_s: float = 1.0 sleep_s: float = 1.0
@@ -82,6 +83,9 @@ class BehaviourSettings(BaseModel):
transcription_words_per_token: float = 0.75 # (3 words = 4 tokens) transcription_words_per_token: float = 0.75 # (3 words = 4 tokens)
transcription_token_buffer: int = 10 transcription_token_buffer: int = 10
# Text belief extractor settings
conversation_history_length_limit = 10
class LLMSettings(BaseModel): class LLMSettings(BaseModel):
""" """
@@ -89,10 +93,17 @@ class LLMSettings(BaseModel):
:ivar local_llm_url: URL for the local LLM API. :ivar local_llm_url: URL for the local LLM API.
:ivar local_llm_model: Name of the local LLM model to use. :ivar local_llm_model: Name of the local LLM model to use.
:ivar chat_temperature: The temperature to use while generating chat responses.
:ivar code_temperature: The temperature to use while generating code-like responses like during
belief inference.
:ivar n_parallel: The number of parallel calls allowed to be made to the LLM.
""" """
local_llm_url: str = "http://localhost:1234/v1/chat/completions" local_llm_url: str = "http://localhost:1234/v1/chat/completions"
local_llm_model: str = "gpt-oss" local_llm_model: str = "gpt-oss"
chat_temperature = 1.0
code_temperature = 0.3
n_parallel: int = 4
class VADSettings(BaseModel): class VADSettings(BaseModel):

View File

@@ -0,0 +1,10 @@
from pydantic import BaseModel
class ChatMessage(BaseModel):
role: str
content: str
class ChatHistory(BaseModel):
messages: list[ChatMessage]

View File

@@ -1,64 +1,201 @@
from pydantic import BaseModel from enum import Enum
from typing import Literal
from pydantic import UUID4, BaseModel
class Norm(BaseModel): class ProgramElement(BaseModel):
""" """
Represents a behavioral norm. Represents a basic element of our behavior program.
:ivar name: The researcher-assigned name of the element.
:ivar id: Unique identifier. :ivar id: Unique identifier.
:ivar label: Human-readable label.
:ivar norm: The actual norm text describing the behavior.
""" """
id: str name: str
label: str id: UUID4
norm: str
class Goal(BaseModel): class LogicalOperator(Enum):
AND = "AND"
OR = "OR"
type Belief = KeywordBelief | SemanticBelief | InferredBelief
type BasicBelief = KeywordBelief | SemanticBelief
class KeywordBelief(ProgramElement):
""" """
Represents an objective to be achieved. Represents a belief that is set when the user spoken text contains a certain keyword.
:ivar id: Unique identifier. :ivar keyword: The keyword on which this belief gets set.
:ivar label: Human-readable label.
:ivar description: Detailed description of the goal.
:ivar achieved: Status flag indicating if the goal has been met.
""" """
id: str name: str = ""
label: str
description: str
achieved: bool
class TriggerKeyword(BaseModel):
id: str
keyword: str keyword: str
class KeywordTrigger(BaseModel): class SemanticBelief(ProgramElement):
id: str """
label: str Represents a belief that is set by semantic LLM validation.
type: str
keywords: list[TriggerKeyword] :ivar description: Description of how to form the belief, used by the LLM.
"""
name: str = ""
description: str
class Phase(BaseModel): class InferredBelief(ProgramElement):
"""
Represents a belief that gets formed by combining two beliefs with a logical AND or OR.
These beliefs can also be :class:`InferredBelief`, leading to arbitrarily deep nesting.
:ivar operator: The logical operator to apply.
:ivar left: The left part of the logical expression.
:ivar right: The right part of the logical expression.
"""
name: str = ""
operator: LogicalOperator
left: Belief
right: Belief
type Norm = BasicNorm | ConditionalNorm
class BasicNorm(ProgramElement):
"""
Represents a behavioral norm.
:ivar norm: The actual norm text describing the behavior.
:ivar critical: When true, this norm should absolutely not be violated (checked separately).
"""
name: str = ""
norm: str
critical: bool = False
class ConditionalNorm(BasicNorm):
"""
Represents a norm that is only active when a condition is met (i.e., a certain belief holds).
:ivar condition: When to activate this norm.
"""
condition: Belief
type PlanElement = Goal | Action
class Plan(ProgramElement):
"""
Represents a list of steps to execute. Each of these steps can be a goal (with its own plan)
or a simple action.
:ivar steps: The actions or subgoals to execute, in order.
"""
name: str = ""
steps: list[PlanElement]
class Goal(ProgramElement):
"""
Represents an objective to be achieved. To reach the goal, we should execute
the corresponding plan. If we can fail to achieve a goal after executing the plan,
for example when the achieving of the goal is dependent on the user's reply, this means
that the achieved status will be set from somewhere else in the program.
:ivar plan: The plan to execute.
:ivar can_fail: Whether we can fail to achieve the goal after executing the plan.
"""
plan: Plan
can_fail: bool = True
type Action = SpeechAction | GestureAction | LLMAction
class SpeechAction(ProgramElement):
"""
Represents the action of the robot speaking a literal text.
:ivar text: The text to speak.
"""
name: str = ""
text: str
class Gesture(BaseModel):
"""
Represents a gesture to be performed. Can be either a single gesture,
or a random gesture from a category (tag).
:ivar type: The type of the gesture, "tag" or "single".
:ivar name: The name of the single gesture or tag.
"""
type: Literal["tag", "single"]
name: str
class GestureAction(ProgramElement):
"""
Represents the action of the robot performing a gesture.
:ivar gesture: The gesture to perform.
"""
name: str = ""
gesture: Gesture
class LLMAction(ProgramElement):
"""
Represents the action of letting an LLM generate a reply based on its chat history
and an additional goal added in the prompt.
:ivar goal: The extra (temporary) goal to add to the LLM.
"""
name: str = ""
goal: str
class Trigger(ProgramElement):
"""
Represents a belief-based trigger. When a belief is set, the corresponding plan is executed.
:ivar condition: When to activate the trigger.
:ivar plan: The plan to execute.
"""
name: str = ""
condition: Belief
plan: Plan
class Phase(ProgramElement):
""" """
A distinct phase within a program, containing norms, goals, and triggers. A distinct phase within a program, containing norms, goals, and triggers.
:ivar id: Unique identifier.
:ivar label: Human-readable label.
:ivar norms: List of norms active in this phase. :ivar norms: List of norms active in this phase.
:ivar goals: List of goals to pursue in this phase. :ivar goals: List of goals to pursue in this phase.
:ivar triggers: List of triggers that define transitions out of this phase. :ivar triggers: List of triggers that define transitions out of this phase.
""" """
id: str name: str = ""
label: str
norms: list[Norm] norms: list[Norm]
goals: list[Goal] goals: list[Goal]
triggers: list[KeywordTrigger] triggers: list[Trigger]
class Program(BaseModel): class Program(BaseModel):