Merge branch 'dev' into refactor/logging

2025-11-05 15:09:14 +01:00
parent 220c5c7739 4d38850a1d
commit d9fef22090
11 changed files with 153 additions and 51 deletions
--- a/src/control_backend/agents/llm/llm.py
+++ b/src/control_backend/agents/llm/llm.py
@@ -1,4 +1,6 @@
-from typing import Any
+import json
+import re
+from collections.abc import AsyncGenerator

 import httpx
 from spade.behaviour import CyclicBehaviour
@@ -45,11 +47,16 @@ class LLMAgent(BaseAgent):

        async def _process_bdi_message(self, message: Message):
            """
-            Forwards user text to the LLM and replies with the generated text.
+            Forwards user text from the BDI to the LLM and replies with the generated text in chunks
+            separated by punctuation.
            """
            user_text = message.body
-            llm_response = await self._query_llm(user_text)
-            await self._reply(llm_response)
+            # Consume the streaming generator and send a reply for every chunk
+            async for chunk in self._query_llm(user_text):
+                await self._reply(chunk)
+            self.agent.logger.debug(
+                "Finished processing BDI message. Response sent in chunks to BDI Core Agent."
+            )

        async def _reply(self, msg: str):
            """
@@ -60,48 +67,89 @@ class LLMAgent(BaseAgent):
                body=msg,
            )
            await self.send(reply)
-            self.agent.logger.info("Reply sent to BDI Core Agent")

-        async def _query_llm(self, prompt: str) -> str:
+        async def _query_llm(self, prompt: str) -> AsyncGenerator[str]:
            """
-            Sends a chat completion request to the local LLM service.
+            Sends a chat completion request to the local LLM service and streams the response by
+            yielding fragments separated by punctuation like.

            :param prompt: Input text prompt to pass to the LLM.
-            :return: LLM-generated content or fallback message.
+            :yield: Fragments of the LLM-generated content.
            """
-            async with httpx.AsyncClient(timeout=120.0) as client:
-                # Example dynamic content for future (optional)
+            instructions = LLMInstructions(
+                "- Be friendly and respectful.\n"
+                "- Make the conversation feel natural and engaging.\n"
+                "- Speak like a pirate.\n"
+                "- When the user asks what you can do, tell them.",
+                "- Try to learn the user's name during conversation.\n"
+                "- Suggest playing a game of asking yes or no questions where you think of a word "
+                "and the user must guess it.",
+            )
+            messages = [
+                {
+                    "role": "developer",
+                    "content": instructions.build_developer_instruction(),
+                },
+                {
+                    "role": "user",
+                    "content": prompt,
+                },
+            ]

-                instructions = LLMInstructions()
-                developer_instruction = instructions.build_developer_instruction()
+            try:
+                current_chunk = ""
+                async for token in self._stream_query_llm(messages):
+                    current_chunk += token

-                response = await client.post(
+                    # Stream the message in chunks separated by punctuation.
+                    # We include the delimiter in the emitted chunk for natural flow.
+                    pattern = re.compile(r".*?(?:,|;|:|—|–|\.{3}|…|\.|\?|!)\s*", re.DOTALL)
+                    for m in pattern.finditer(current_chunk):
+                        chunk = m.group(0)
+                        if chunk:
+                            yield current_chunk
+                            current_chunk = ""
+
+                # Yield any remaining tail
+                if current_chunk:
+                    yield current_chunk
+            except httpx.HTTPError as err:
+                self.agent.logger.error("HTTP error.", exc_info=err)
+                yield "LLM service unavailable."
+            except Exception as err:
+                self.agent.logger.error("Unexpected error.", exc_info=err)
+                yield "Error processing the request."
+
+        async def _stream_query_llm(self, messages) -> AsyncGenerator[str]:
+            """Raises httpx.HTTPError when the API gives an error."""
+            async with httpx.AsyncClient(timeout=None) as client:
+                async with client.stream(
+                    "POST",
                    settings.llm_settings.local_llm_url,
-                    headers={"Content-Type": "application/json"},
                    json={
                        "model": settings.llm_settings.local_llm_model,
-                        "messages": [
-                            {"role": "developer", "content": developer_instruction},
-                            {"role": "user", "content": prompt},
-                        ],
+                        "messages": messages,
                        "temperature": 0.3,
+                        "stream": True,
                    },
-                )
-
-                try:
+                ) as response:
                    response.raise_for_status()
-                    data: dict[str, Any] = response.json()
-                    return (
-                        data.get("choices", [{}])[0]
-                        .get("message", {})
-                        .get("content", "No response")
-                    )
-                except httpx.HTTPError as err:
-                    self.agent.logger.error("HTTP error: %s", err)
-                    return "LLM service unavailable."
-                except Exception as err:
-                    self.agent.logger.error("Unexpected error: %s", err)
-                    return "Error processing the request."
+
+                    async for line in response.aiter_lines():
+                        if not line or not line.startswith("data: "):
+                            continue
+
+                        data = line[len("data: ") :]
+                        if data.strip() == "[DONE]":
+                            break
+
+                        try:
+                            event = json.loads(data)
+                            delta = event.get("choices", [{}])[0].get("delta", {}).get("content")
+                            if delta:
+                                yield delta
+                        except json.JSONDecodeError:
+                            self.agent.logger.error("Failed to parse LLM response: %s", data)

    async def setup(self):
        """