Merge branch 'feat/environment-variables' into 'dev'

Docs for environment variables, parameterize some constants See merge request ics/sp/2025/n25b/pepperplus-cb!38
2026-01-06 09:02:49 +00:00
parent 4c20656c75 7d798f2e77
commit 612a96940d
9 changed files with 104 additions and 41 deletions
--- a/.env.example
+++ b/.env.example
@@ -0,0 +1,20 @@
+# Example .env file. To use, make a copy, call it ".env" (i.e. removing the ".example" suffix), then you edit values.
+
+# The hostname of the Robot Interface. Change if the Control Backend and Robot Interface are running on different computers.
+RI_HOST="localhost"
+
+# URL for the local LLM API. Must be an API that implements the OpenAI Chat Completions API, but most do.
+LLM_SETTINGS__LOCAL_LLM_URL="http://localhost:1234/v1/chat/completions"
+
+# Name of the local LLM model to use.
+LLM_SETTINGS__LOCAL_LLM_MODEL="gpt-oss"
+
+# Number of non-speech chunks to wait before speech ended. A chunk is approximately 31 ms. Increasing this number allows longer pauses in speech, but also increases response time.
+BEHAVIOUR_SETTINGS__VAD_NON_SPEECH_PATIENCE_CHUNKS=3
+
+# Timeout in milliseconds for socket polling. Increase this number if network latency/jitter is high, often the case when using Wi-Fi. Perhaps 500 ms. A symptom of this issue is transcriptions getting cut off.
+BEHAVIOUR_SETTINGS__SOCKET_POLLER_TIMEOUT_MS=100
+
+
+
+# For an exhaustive list of options, see the control_backend.core.config module in the docs.
--- a/README.md
+++ b/README.md
@@ -27,6 +27,7 @@ This + part might differ based on what model you choose.
 copy the model name in the module loaded and replace local_llm_modelL. In settings.


+
 ## Running
 To run the project (development server), execute the following command (while inside the root repository):

@@ -34,6 +35,14 @@ To run the project (development server), execute the following command (while in
 uv run fastapi dev src/control_backend/main.py
 ```

+### Environment Variables
+
+You can use environment variables to change settings. Make a copy of the [`.env.example`](.env.example) file, name it `.env` and put it in the root directory. The file itself describes how to do the configuration.
+
+For an exhaustive list of environment options, see the `control_backend.core.config` module in the docs.
+
+
+
 ## Testing
 Testing happens automatically when opening a merge request to any branch. If you want to manually run the test suite, you can do so by running the following for unit tests:

--- a/src/control_backend/agents/actuation/robot_gesture_agent.py
+++ b/src/control_backend/agents/actuation/robot_gesture_agent.py
@@ -33,7 +33,7 @@ class RobotGestureAgent(BaseAgent):
    def __init__(
        self,
        name: str,
-        address=settings.zmq_settings.ri_command_address,
+        address: str,
        bind=False,
        gesture_data=None,
        single_gesture_data=None,
--- a/src/control_backend/agents/communication/ri_communication_agent.py
+++ b/src/control_backend/agents/communication/ri_communication_agent.py
@@ -38,7 +38,7 @@ class RICommunicationAgent(BaseAgent):
    def __init__(
        self,
        name: str,
-        address=settings.zmq_settings.ri_command_address,
+        address=settings.zmq_settings.ri_communication_address,
        bind=False,
    ):
        super().__init__(name)
@@ -168,7 +168,7 @@ class RICommunicationAgent(BaseAgent):
            bind = port_data["bind"]

            if not bind:
-                addr = f"tcp://localhost:{port}"
+                addr = f"tcp://{settings.ri_host}:{port}"
            else:
                addr = f"tcp://*:{port}"

--- a/src/control_backend/agents/perception/vad_agent.py
+++ b/src/control_backend/agents/perception/vad_agent.py
@@ -103,12 +103,11 @@ class VADAgent(BaseAgent):

        self._connect_audio_in_socket()

-        audio_out_port = self._connect_audio_out_socket()
-        if audio_out_port is None:
+        audio_out_address = self._connect_audio_out_socket()
+        if audio_out_address is None:
            self.logger.error("Could not bind output socket, stopping.")
            await self.stop()
            return
-        audio_out_address = f"tcp://localhost:{audio_out_port}"

        # Connect to internal communication socket
        self.program_sub_socket = azmq.Context.instance().socket(zmq.SUB)
@@ -161,13 +160,14 @@ class VADAgent(BaseAgent):
            self.audio_in_socket.connect(self.audio_in_address)
        self.audio_in_poller = SocketPoller[bytes](self.audio_in_socket)

-    def _connect_audio_out_socket(self) -> int | None:
+    def _connect_audio_out_socket(self) -> str | None:
        """
-        Returns the port bound, or None if binding failed.
+        Returns the address that was bound to, or None if binding failed.
        """
        try:
            self.audio_out_socket = azmq.Context.instance().socket(zmq.PUB)
-            return self.audio_out_socket.bind_to_random_port("tcp://localhost", max_tries=100)
+            self.audio_out_socket.bind(settings.zmq_settings.vad_pub_address)
+            return settings.zmq_settings.vad_pub_address
        except zmq.ZMQBindError:
            self.logger.error("Failed to bind an audio output socket after 100 tries.")
            self.audio_out_socket = None
--- a/src/control_backend/core/config.py
+++ b/src/control_backend/core/config.py
@@ -1,3 +1,12 @@
+"""
+An exhaustive overview of configurable options. All of these can be set using environment variables
+by nesting with double underscores (__). Start from the ``Settings`` class.
+
+For example, ``settings.ri_host`` becomes ``RI_HOST``, and
+``settings.zmq_settings.ri_communication_address`` becomes
+``ZMQ_SETTINGS__RI_COMMUNICATION_ADDRESS``.
+"""
+
 from pydantic import BaseModel
 from pydantic_settings import BaseSettings, SettingsConfigDict

@@ -8,16 +17,17 @@ class ZMQSettings(BaseModel):

    :ivar internal_pub_address: Address for the internal PUB socket.
    :ivar internal_sub_address: Address for the internal SUB socket.
-    :ivar ri_command_address: Address for sending commands to the Robot Interface.
-    :ivar ri_communication_address: Address for receiving communication from the Robot Interface.
-    :ivar vad_agent_address: Address for the Voice Activity Detection (VAD) agent.
+    :ivar ri_communication_address: Address for the endpoint that the Robot Interface connects to.
+    :ivar vad_pub_address: Address that the VAD agent binds to and publishes audio segments to.
    """

+    # ATTENTION: When adding/removing settings, make sure to update the .env.example file
+
    internal_pub_address: str = "tcp://localhost:5560"
    internal_sub_address: str = "tcp://localhost:5561"
-    ri_command_address: str = "tcp://localhost:0000"
    ri_communication_address: str = "tcp://*:5555"
    internal_gesture_rep_adress: str = "tcp://localhost:7788"
+    vad_pub_address: str = "inproc://vad_stream"


 class AgentSettings(BaseModel):
@@ -36,6 +46,8 @@ class AgentSettings(BaseModel):
    :ivar robot_speech_name: Name of the Robot Speech Agent.
    """

+    # ATTENTION: When adding/removing settings, make sure to update the .env.example file
+
    # agent names
    bdi_core_name: str = "bdi_core_agent"
    bdi_belief_collector_name: str = "belief_collector_agent"
@@ -67,6 +79,8 @@ class BehaviourSettings(BaseModel):
    :ivar transcription_token_buffer: Buffer for transcription tokens.
    """

+    # ATTENTION: When adding/removing settings, make sure to update the .env.example file
+
    sleep_s: float = 1.0
    comm_setup_max_retries: int = 5
    socket_poller_timeout_ms: int = 100
@@ -91,6 +105,8 @@ class LLMSettings(BaseModel):
    :ivar local_llm_model: Name of the local LLM model to use.
    """

+    # ATTENTION: When adding/removing settings, make sure to update the .env.example file
+
    local_llm_url: str = "http://localhost:1234/v1/chat/completions"
    local_llm_model: str = "gpt-oss"

@@ -104,6 +120,8 @@ class VADSettings(BaseModel):
    :ivar sample_rate_hz: Sample rate in Hz for the VAD model.
    """

+    # ATTENTION: When adding/removing settings, make sure to update the .env.example file
+
    repo_or_dir: str = "snakers4/silero-vad"
    model_name: str = "silero_vad"
    sample_rate_hz: int = 16000
@@ -117,6 +135,8 @@ class SpeechModelSettings(BaseModel):
    :ivar openai_model_name: Model name for OpenAI-based speech recognition.
    """

+    # ATTENTION: When adding/removing settings, make sure to update the .env.example file
+
    # model identifiers for speech recognition
    mlx_model_name: str = "mlx-community/whisper-small.en-mlx"
    openai_model_name: str = "small.en"
@@ -128,6 +148,7 @@ class Settings(BaseSettings):

    :ivar app_title: Title of the application.
    :ivar ui_url: URL of the frontend UI.
+    :ivar ri_host: The hostname of the Robot Interface.
    :ivar zmq_settings: ZMQ configuration.
    :ivar agent_settings: Agent name configuration.
    :ivar behaviour_settings: Behavior configuration.
@@ -140,6 +161,8 @@ class Settings(BaseSettings):

    ui_url: str = "http://localhost:5173"

+    ri_host: str = "localhost"
+
    zmq_settings: ZMQSettings = ZMQSettings()

    agent_settings: AgentSettings = AgentSettings()
--- a/test/integration/agents/perception/vad_agent/test_vad_agent.py
+++ b/test/integration/agents/perception/vad_agent/test_vad_agent.py
@@ -91,7 +91,7 @@ def test_out_socket_creation(zmq_context):
    assert per_vad_agent.audio_out_socket is not None

    zmq_context.return_value.socket.assert_called_once_with(zmq.PUB)
-    zmq_context.return_value.socket.return_value.bind_to_random_port.assert_called_once()
+    zmq_context.return_value.socket.return_value.bind.assert_called_once_with("inproc://vad_stream")


@pytest.mark.asyncio
--- a/test/unit/agents/actuation/test_robot_gesture_agent.py
+++ b/test/unit/agents/actuation/test_robot_gesture_agent.py
@@ -73,7 +73,7 @@ async def test_setup_connect(zmq_context, mocker):
 async def test_handle_message_sends_valid_gesture_command():
    """Internal message with valid gesture tag is forwarded to robot pub socket."""
    pubsocket = AsyncMock()
-    agent = RobotGestureAgent("robot_gesture", gesture_data=["hello", "yes", "no"])
+    agent = RobotGestureAgent("robot_gesture", gesture_data=["hello", "yes", "no"], address="")
    agent.pubsocket = pubsocket

    payload = {
@@ -91,7 +91,7 @@ async def test_handle_message_sends_valid_gesture_command():
 async def test_handle_message_sends_non_gesture_command():
    """Internal message with non-gesture endpoint is not forwarded by this agent."""
    pubsocket = AsyncMock()
-    agent = RobotGestureAgent("robot_gesture", gesture_data=["hello", "yes", "no"])
+    agent = RobotGestureAgent("robot_gesture", gesture_data=["hello", "yes", "no"], address="")
    agent.pubsocket = pubsocket

    payload = {"endpoint": "some_other_endpoint", "data": "invalid_tag_not_in_list"}
@@ -107,7 +107,7 @@ async def test_handle_message_sends_non_gesture_command():
 async def test_handle_message_rejects_invalid_gesture_tag():
    """Internal message with invalid gesture tag is not forwarded."""
    pubsocket = AsyncMock()
-    agent = RobotGestureAgent("robot_gesture", gesture_data=["hello", "yes", "no"])
+    agent = RobotGestureAgent("robot_gesture", gesture_data=["hello", "yes", "no"], address="")
    agent.pubsocket = pubsocket

    # Use a tag that's not in gesture_data
@@ -123,7 +123,7 @@ async def test_handle_message_rejects_invalid_gesture_tag():
 async def test_handle_message_invalid_payload():
    """Invalid payload is caught and does not send."""
    pubsocket = AsyncMock()
-    agent = RobotGestureAgent("robot_gesture", gesture_data=["hello", "yes", "no"])
+    agent = RobotGestureAgent("robot_gesture", gesture_data=["hello", "yes", "no"], address="")
    agent.pubsocket = pubsocket

    msg = InternalMessage(to="robot", sender="tester", body=json.dumps({"bad": "data"}))
@@ -142,12 +142,12 @@ async def test_zmq_command_loop_valid_gesture_payload():
    async def recv_once():
        # stop after first iteration
        agent._running = False
-        return (b"command", json.dumps(command).encode("utf-8"))
+        return b"command", json.dumps(command).encode("utf-8")

    fake_socket.recv_multipart = recv_once
    fake_socket.send_json = AsyncMock()

-    agent = RobotGestureAgent("robot_gesture", gesture_data=["hello", "yes", "no"])
+    agent = RobotGestureAgent("robot_gesture", gesture_data=["hello", "yes", "no"], address="")
    agent.subsocket = fake_socket
    agent.pubsocket = fake_socket
    agent._running = True
@@ -165,12 +165,12 @@ async def test_zmq_command_loop_valid_non_gesture_payload():

    async def recv_once():
        agent._running = False
-        return (b"command", json.dumps(command).encode("utf-8"))
+        return b"command", json.dumps(command).encode("utf-8")

    fake_socket.recv_multipart = recv_once
    fake_socket.send_json = AsyncMock()

-    agent = RobotGestureAgent("robot_gesture", gesture_data=["hello", "yes", "no"])
+    agent = RobotGestureAgent("robot_gesture", gesture_data=["hello", "yes", "no"], address="")
    agent.subsocket = fake_socket
    agent.pubsocket = fake_socket
    agent._running = True
@@ -188,12 +188,12 @@ async def test_zmq_command_loop_invalid_gesture_tag():

    async def recv_once():
        agent._running = False
-        return (b"command", json.dumps(command).encode("utf-8"))
+        return b"command", json.dumps(command).encode("utf-8")

    fake_socket.recv_multipart = recv_once
    fake_socket.send_json = AsyncMock()

-    agent = RobotGestureAgent("robot_gesture", gesture_data=["hello", "yes", "no"])
+    agent = RobotGestureAgent("robot_gesture", gesture_data=["hello", "yes", "no"], address="")
    agent.subsocket = fake_socket
    agent.pubsocket = fake_socket
    agent._running = True
@@ -210,12 +210,12 @@ async def test_zmq_command_loop_invalid_json():

    async def recv_once():
        agent._running = False
-        return (b"command", b"{not_json}")
+        return b"command", b"{not_json}"

    fake_socket.recv_multipart = recv_once
    fake_socket.send_json = AsyncMock()

-    agent = RobotGestureAgent("robot_gesture", gesture_data=["hello", "yes", "no"])
+    agent = RobotGestureAgent("robot_gesture", gesture_data=["hello", "yes", "no"], address="")
    agent.subsocket = fake_socket
    agent.pubsocket = fake_socket
    agent._running = True
@@ -232,12 +232,12 @@ async def test_zmq_command_loop_ignores_send_gestures_topic():

    async def recv_once():
        agent._running = False
-        return (b"send_gestures", b"{}")
+        return b"send_gestures", b"{}"

    fake_socket.recv_multipart = recv_once
    fake_socket.send_json = AsyncMock()

-    agent = RobotGestureAgent("robot_gesture", gesture_data=["hello", "yes", "no"])
+    agent = RobotGestureAgent("robot_gesture", gesture_data=["hello", "yes", "no"], address="")
    agent.subsocket = fake_socket
    agent.pubsocket = fake_socket
    agent._running = True
@@ -259,7 +259,9 @@ async def test_fetch_gestures_loop_without_amount():
    fake_repsocket.recv = recv_once
    fake_repsocket.send = AsyncMock()

-    agent = RobotGestureAgent("robot_gesture", gesture_data=["hello", "yes", "no", "wave", "point"])
+    agent = RobotGestureAgent(
+        "robot_gesture", gesture_data=["hello", "yes", "no", "wave", "point"], address=""
+    )
    agent.repsocket = fake_repsocket
    agent._running = True

@@ -287,7 +289,9 @@ async def test_fetch_gestures_loop_with_amount():
    fake_repsocket.recv = recv_once
    fake_repsocket.send = AsyncMock()

-    agent = RobotGestureAgent("robot_gesture", gesture_data=["hello", "yes", "no", "wave", "point"])
+    agent = RobotGestureAgent(
+        "robot_gesture", gesture_data=["hello", "yes", "no", "wave", "point"], address=""
+    )
    agent.repsocket = fake_repsocket
    agent._running = True

@@ -315,7 +319,7 @@ async def test_fetch_gestures_loop_with_integer_request():
    fake_repsocket.recv = recv_once
    fake_repsocket.send = AsyncMock()

-    agent = RobotGestureAgent("robot_gesture", gesture_data=["hello", "yes", "no"])
+    agent = RobotGestureAgent("robot_gesture", gesture_data=["hello", "yes", "no"], address="")
    agent.repsocket = fake_repsocket
    agent._running = True

@@ -340,7 +344,7 @@ async def test_fetch_gestures_loop_with_invalid_json():
    fake_repsocket.recv = recv_once
    fake_repsocket.send = AsyncMock()

-    agent = RobotGestureAgent("robot_gesture", gesture_data=["hello", "yes", "no"])
+    agent = RobotGestureAgent("robot_gesture", gesture_data=["hello", "yes", "no"], address="")
    agent.repsocket = fake_repsocket
    agent._running = True

@@ -365,7 +369,7 @@ async def test_fetch_gestures_loop_with_non_integer_json():
    fake_repsocket.recv = recv_once
    fake_repsocket.send = AsyncMock()

-    agent = RobotGestureAgent("robot_gesture", gesture_data=["hello", "yes", "no"])
+    agent = RobotGestureAgent("robot_gesture", gesture_data=["hello", "yes", "no"], address="")
    agent.repsocket = fake_repsocket
    agent._running = True

@@ -381,7 +385,7 @@ async def test_fetch_gestures_loop_with_non_integer_json():
 def test_gesture_data_attribute():
    """Test that gesture_data returns the expected list."""
    gesture_data = ["hello", "yes", "no", "wave"]
-    agent = RobotGestureAgent("robot_gesture", gesture_data=gesture_data)
+    agent = RobotGestureAgent("robot_gesture", gesture_data=gesture_data, address="")

    assert agent.gesture_data == gesture_data
    assert isinstance(agent.gesture_data, list)
@@ -398,7 +402,7 @@ async def test_stop_closes_sockets():
    pubsocket = MagicMock()
    subsocket = MagicMock()
    repsocket = MagicMock()
-    agent = RobotGestureAgent("robot_gesture")
+    agent = RobotGestureAgent("robot_gesture", address="")
    agent.pubsocket = pubsocket
    agent.subsocket = subsocket
    agent.repsocket = repsocket
@@ -415,7 +419,7 @@ async def test_stop_closes_sockets():
 async def test_initialization_with_custom_gesture_data():
    """Agent can be initialized with custom gesture data."""
    custom_gestures = ["custom1", "custom2", "custom3"]
-    agent = RobotGestureAgent("robot_gesture", gesture_data=custom_gestures)
+    agent = RobotGestureAgent("robot_gesture", gesture_data=custom_gestures, address="")

    assert agent.gesture_data == custom_gestures

@@ -432,7 +436,7 @@ async def test_fetch_gestures_loop_handles_exception():
    fake_repsocket.recv = recv_once
    fake_repsocket.send = AsyncMock()

-    agent = RobotGestureAgent("robot_gesture", gesture_data=["hello", "yes", "no"])
+    agent = RobotGestureAgent("robot_gesture", gesture_data=["hello", "yes", "no"], address="")
    agent.repsocket = fake_repsocket
    agent.logger = MagicMock()
    agent._running = True
--- a/test/unit/agents/perception/vad_agent/test_vad_streaming.py
+++ b/test/unit/agents/perception/vad_agent/test_vad_streaming.py
@@ -7,6 +7,15 @@ import zmq
 from control_backend.agents.perception.vad_agent import VADAgent


+# We don't want to use real ZMQ in unit tests, for example because it can give errors when sockets
+# aren't closed properly.
+@pytest.fixture(autouse=True)
+def mock_zmq():
+    with patch("zmq.asyncio.Context") as mock:
+        mock.instance.return_value = MagicMock()
+        yield mock
+
+
@pytest.fixture
 def audio_out_socket():
    return AsyncMock()
@@ -140,12 +149,10 @@ async def test_vad_model_load_failure_stops_agent(vad_agent):
        # Patch stop to an AsyncMock so we can check it was awaited
        vad_agent.stop = AsyncMock()

-        result = await vad_agent.setup()
+        await vad_agent.setup()

        # Assert stop was called
        vad_agent.stop.assert_awaited_once()
-        # Assert setup returned None
-        assert result is None


@pytest.mark.asyncio
@@ -155,7 +162,7 @@ async def test_audio_out_bind_failure_sets_none_and_logs(vad_agent, caplog):
    audio_out_socket is set to None, None is returned, and an error is logged.
    """
    mock_socket = MagicMock()
-    mock_socket.bind_to_random_port.side_effect = zmq.ZMQBindError()
+    mock_socket.bind.side_effect = zmq.ZMQBindError()
    with patch("control_backend.agents.perception.vad_agent.azmq.Context.instance") as mock_ctx:
        mock_ctx.return_value.socket.return_value = mock_socket