diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..d498054 --- /dev/null +++ b/.env.example @@ -0,0 +1,20 @@ +# Example .env file. To use, make a copy, call it ".env" (i.e. removing the ".example" suffix), then you edit values. + +# The hostname of the Robot Interface. Change if the Control Backend and Robot Interface are running on different computers. +RI_HOST="localhost" + +# URL for the local LLM API. Must be an API that implements the OpenAI Chat Completions API, but most do. +LLM_SETTINGS__LOCAL_LLM_URL="http://localhost:1234/v1/chat/completions" + +# Name of the local LLM model to use. +LLM_SETTINGS__LOCAL_LLM_MODEL="gpt-oss" + +# Number of non-speech chunks to wait before speech ended. A chunk is approximately 31 ms. Increasing this number allows longer pauses in speech, but also increases response time. +BEHAVIOUR_SETTINGS__VAD_NON_SPEECH_PATIENCE_CHUNKS=3 + +# Timeout in milliseconds for socket polling. Increase this number if network latency/jitter is high, often the case when using Wi-Fi. Perhaps 500 ms. A symptom of this issue is transcriptions getting cut off. +BEHAVIOUR_SETTINGS__SOCKET_POLLER_TIMEOUT_MS=100 + + + +# For an exhaustive list of options, see the control_backend.core.config module in the docs. diff --git a/README.md b/README.md index 1527215..03dac9a 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,7 @@ This + part might differ based on what model you choose. copy the model name in the module loaded and replace local_llm_modelL. In settings. + ## Running To run the project (development server), execute the following command (while inside the root repository): @@ -34,6 +35,14 @@ To run the project (development server), execute the following command (while in uv run fastapi dev src/control_backend/main.py ``` +### Environment Variables + +You can use environment variables to change settings. Make a copy of the [`.env.example`](.env.example) file, name it `.env` and put it in the root directory. The file itself describes how to do the configuration. + +For an exhaustive list of environment options, see the `control_backend.core.config` module in the docs. + + + ## Testing Testing happens automatically when opening a merge request to any branch. If you want to manually run the test suite, you can do so by running the following for unit tests: diff --git a/src/control_backend/agents/actuation/robot_gesture_agent.py b/src/control_backend/agents/actuation/robot_gesture_agent.py index 4f5dd79..3b264d2 100644 --- a/src/control_backend/agents/actuation/robot_gesture_agent.py +++ b/src/control_backend/agents/actuation/robot_gesture_agent.py @@ -33,7 +33,7 @@ class RobotGestureAgent(BaseAgent): def __init__( self, name: str, - address=settings.zmq_settings.ri_command_address, + address: str, bind=False, gesture_data=None, single_gesture_data=None, diff --git a/src/control_backend/agents/communication/ri_communication_agent.py b/src/control_backend/agents/communication/ri_communication_agent.py index 34e5b25..5c6ca77 100644 --- a/src/control_backend/agents/communication/ri_communication_agent.py +++ b/src/control_backend/agents/communication/ri_communication_agent.py @@ -38,7 +38,7 @@ class RICommunicationAgent(BaseAgent): def __init__( self, name: str, - address=settings.zmq_settings.ri_command_address, + address=settings.zmq_settings.ri_communication_address, bind=False, ): super().__init__(name) @@ -168,7 +168,7 @@ class RICommunicationAgent(BaseAgent): bind = port_data["bind"] if not bind: - addr = f"tcp://localhost:{port}" + addr = f"tcp://{settings.ri_host}:{port}" else: addr = f"tcp://*:{port}" diff --git a/src/control_backend/agents/perception/vad_agent.py b/src/control_backend/agents/perception/vad_agent.py index 8ccff0a..70fa9e1 100644 --- a/src/control_backend/agents/perception/vad_agent.py +++ b/src/control_backend/agents/perception/vad_agent.py @@ -103,12 +103,11 @@ class VADAgent(BaseAgent): self._connect_audio_in_socket() - audio_out_port = self._connect_audio_out_socket() - if audio_out_port is None: + audio_out_address = self._connect_audio_out_socket() + if audio_out_address is None: self.logger.error("Could not bind output socket, stopping.") await self.stop() return - audio_out_address = f"tcp://localhost:{audio_out_port}" # Connect to internal communication socket self.program_sub_socket = azmq.Context.instance().socket(zmq.SUB) @@ -161,13 +160,14 @@ class VADAgent(BaseAgent): self.audio_in_socket.connect(self.audio_in_address) self.audio_in_poller = SocketPoller[bytes](self.audio_in_socket) - def _connect_audio_out_socket(self) -> int | None: + def _connect_audio_out_socket(self) -> str | None: """ - Returns the port bound, or None if binding failed. + Returns the address that was bound to, or None if binding failed. """ try: self.audio_out_socket = azmq.Context.instance().socket(zmq.PUB) - return self.audio_out_socket.bind_to_random_port("tcp://localhost", max_tries=100) + self.audio_out_socket.bind(settings.zmq_settings.vad_pub_address) + return settings.zmq_settings.vad_pub_address except zmq.ZMQBindError: self.logger.error("Failed to bind an audio output socket after 100 tries.") self.audio_out_socket = None diff --git a/src/control_backend/core/config.py b/src/control_backend/core/config.py index 927985b..c4a4db7 100644 --- a/src/control_backend/core/config.py +++ b/src/control_backend/core/config.py @@ -1,3 +1,12 @@ +""" +An exhaustive overview of configurable options. All of these can be set using environment variables +by nesting with double underscores (__). Start from the ``Settings`` class. + +For example, ``settings.ri_host`` becomes ``RI_HOST``, and +``settings.zmq_settings.ri_communication_address`` becomes +``ZMQ_SETTINGS__RI_COMMUNICATION_ADDRESS``. +""" + from pydantic import BaseModel from pydantic_settings import BaseSettings, SettingsConfigDict @@ -8,16 +17,17 @@ class ZMQSettings(BaseModel): :ivar internal_pub_address: Address for the internal PUB socket. :ivar internal_sub_address: Address for the internal SUB socket. - :ivar ri_command_address: Address for sending commands to the Robot Interface. - :ivar ri_communication_address: Address for receiving communication from the Robot Interface. - :ivar vad_agent_address: Address for the Voice Activity Detection (VAD) agent. + :ivar ri_communication_address: Address for the endpoint that the Robot Interface connects to. + :ivar vad_pub_address: Address that the VAD agent binds to and publishes audio segments to. """ + # ATTENTION: When adding/removing settings, make sure to update the .env.example file + internal_pub_address: str = "tcp://localhost:5560" internal_sub_address: str = "tcp://localhost:5561" - ri_command_address: str = "tcp://localhost:0000" ri_communication_address: str = "tcp://*:5555" internal_gesture_rep_adress: str = "tcp://localhost:7788" + vad_pub_address: str = "inproc://vad_stream" class AgentSettings(BaseModel): @@ -36,6 +46,8 @@ class AgentSettings(BaseModel): :ivar robot_speech_name: Name of the Robot Speech Agent. """ + # ATTENTION: When adding/removing settings, make sure to update the .env.example file + # agent names bdi_core_name: str = "bdi_core_agent" bdi_belief_collector_name: str = "belief_collector_agent" @@ -67,6 +79,8 @@ class BehaviourSettings(BaseModel): :ivar transcription_token_buffer: Buffer for transcription tokens. """ + # ATTENTION: When adding/removing settings, make sure to update the .env.example file + sleep_s: float = 1.0 comm_setup_max_retries: int = 5 socket_poller_timeout_ms: int = 100 @@ -91,6 +105,8 @@ class LLMSettings(BaseModel): :ivar local_llm_model: Name of the local LLM model to use. """ + # ATTENTION: When adding/removing settings, make sure to update the .env.example file + local_llm_url: str = "http://localhost:1234/v1/chat/completions" local_llm_model: str = "gpt-oss" @@ -104,6 +120,8 @@ class VADSettings(BaseModel): :ivar sample_rate_hz: Sample rate in Hz for the VAD model. """ + # ATTENTION: When adding/removing settings, make sure to update the .env.example file + repo_or_dir: str = "snakers4/silero-vad" model_name: str = "silero_vad" sample_rate_hz: int = 16000 @@ -117,6 +135,8 @@ class SpeechModelSettings(BaseModel): :ivar openai_model_name: Model name for OpenAI-based speech recognition. """ + # ATTENTION: When adding/removing settings, make sure to update the .env.example file + # model identifiers for speech recognition mlx_model_name: str = "mlx-community/whisper-small.en-mlx" openai_model_name: str = "small.en" @@ -128,6 +148,7 @@ class Settings(BaseSettings): :ivar app_title: Title of the application. :ivar ui_url: URL of the frontend UI. + :ivar ri_host: The hostname of the Robot Interface. :ivar zmq_settings: ZMQ configuration. :ivar agent_settings: Agent name configuration. :ivar behaviour_settings: Behavior configuration. @@ -140,6 +161,8 @@ class Settings(BaseSettings): ui_url: str = "http://localhost:5173" + ri_host: str = "localhost" + zmq_settings: ZMQSettings = ZMQSettings() agent_settings: AgentSettings = AgentSettings() diff --git a/test/integration/agents/perception/vad_agent/test_vad_agent.py b/test/integration/agents/perception/vad_agent/test_vad_agent.py index f5f2615..668d1ce 100644 --- a/test/integration/agents/perception/vad_agent/test_vad_agent.py +++ b/test/integration/agents/perception/vad_agent/test_vad_agent.py @@ -91,7 +91,7 @@ def test_out_socket_creation(zmq_context): assert per_vad_agent.audio_out_socket is not None zmq_context.return_value.socket.assert_called_once_with(zmq.PUB) - zmq_context.return_value.socket.return_value.bind_to_random_port.assert_called_once() + zmq_context.return_value.socket.return_value.bind.assert_called_once_with("inproc://vad_stream") @pytest.mark.asyncio diff --git a/test/unit/agents/actuation/test_robot_gesture_agent.py b/test/unit/agents/actuation/test_robot_gesture_agent.py index c68f052..fe051a6 100644 --- a/test/unit/agents/actuation/test_robot_gesture_agent.py +++ b/test/unit/agents/actuation/test_robot_gesture_agent.py @@ -73,7 +73,7 @@ async def test_setup_connect(zmq_context, mocker): async def test_handle_message_sends_valid_gesture_command(): """Internal message with valid gesture tag is forwarded to robot pub socket.""" pubsocket = AsyncMock() - agent = RobotGestureAgent("robot_gesture", gesture_data=["hello", "yes", "no"]) + agent = RobotGestureAgent("robot_gesture", gesture_data=["hello", "yes", "no"], address="") agent.pubsocket = pubsocket payload = { @@ -91,7 +91,7 @@ async def test_handle_message_sends_valid_gesture_command(): async def test_handle_message_sends_non_gesture_command(): """Internal message with non-gesture endpoint is not forwarded by this agent.""" pubsocket = AsyncMock() - agent = RobotGestureAgent("robot_gesture", gesture_data=["hello", "yes", "no"]) + agent = RobotGestureAgent("robot_gesture", gesture_data=["hello", "yes", "no"], address="") agent.pubsocket = pubsocket payload = {"endpoint": "some_other_endpoint", "data": "invalid_tag_not_in_list"} @@ -107,7 +107,7 @@ async def test_handle_message_sends_non_gesture_command(): async def test_handle_message_rejects_invalid_gesture_tag(): """Internal message with invalid gesture tag is not forwarded.""" pubsocket = AsyncMock() - agent = RobotGestureAgent("robot_gesture", gesture_data=["hello", "yes", "no"]) + agent = RobotGestureAgent("robot_gesture", gesture_data=["hello", "yes", "no"], address="") agent.pubsocket = pubsocket # Use a tag that's not in gesture_data @@ -123,7 +123,7 @@ async def test_handle_message_rejects_invalid_gesture_tag(): async def test_handle_message_invalid_payload(): """Invalid payload is caught and does not send.""" pubsocket = AsyncMock() - agent = RobotGestureAgent("robot_gesture", gesture_data=["hello", "yes", "no"]) + agent = RobotGestureAgent("robot_gesture", gesture_data=["hello", "yes", "no"], address="") agent.pubsocket = pubsocket msg = InternalMessage(to="robot", sender="tester", body=json.dumps({"bad": "data"})) @@ -142,12 +142,12 @@ async def test_zmq_command_loop_valid_gesture_payload(): async def recv_once(): # stop after first iteration agent._running = False - return (b"command", json.dumps(command).encode("utf-8")) + return b"command", json.dumps(command).encode("utf-8") fake_socket.recv_multipart = recv_once fake_socket.send_json = AsyncMock() - agent = RobotGestureAgent("robot_gesture", gesture_data=["hello", "yes", "no"]) + agent = RobotGestureAgent("robot_gesture", gesture_data=["hello", "yes", "no"], address="") agent.subsocket = fake_socket agent.pubsocket = fake_socket agent._running = True @@ -165,12 +165,12 @@ async def test_zmq_command_loop_valid_non_gesture_payload(): async def recv_once(): agent._running = False - return (b"command", json.dumps(command).encode("utf-8")) + return b"command", json.dumps(command).encode("utf-8") fake_socket.recv_multipart = recv_once fake_socket.send_json = AsyncMock() - agent = RobotGestureAgent("robot_gesture", gesture_data=["hello", "yes", "no"]) + agent = RobotGestureAgent("robot_gesture", gesture_data=["hello", "yes", "no"], address="") agent.subsocket = fake_socket agent.pubsocket = fake_socket agent._running = True @@ -188,12 +188,12 @@ async def test_zmq_command_loop_invalid_gesture_tag(): async def recv_once(): agent._running = False - return (b"command", json.dumps(command).encode("utf-8")) + return b"command", json.dumps(command).encode("utf-8") fake_socket.recv_multipart = recv_once fake_socket.send_json = AsyncMock() - agent = RobotGestureAgent("robot_gesture", gesture_data=["hello", "yes", "no"]) + agent = RobotGestureAgent("robot_gesture", gesture_data=["hello", "yes", "no"], address="") agent.subsocket = fake_socket agent.pubsocket = fake_socket agent._running = True @@ -210,12 +210,12 @@ async def test_zmq_command_loop_invalid_json(): async def recv_once(): agent._running = False - return (b"command", b"{not_json}") + return b"command", b"{not_json}" fake_socket.recv_multipart = recv_once fake_socket.send_json = AsyncMock() - agent = RobotGestureAgent("robot_gesture", gesture_data=["hello", "yes", "no"]) + agent = RobotGestureAgent("robot_gesture", gesture_data=["hello", "yes", "no"], address="") agent.subsocket = fake_socket agent.pubsocket = fake_socket agent._running = True @@ -232,12 +232,12 @@ async def test_zmq_command_loop_ignores_send_gestures_topic(): async def recv_once(): agent._running = False - return (b"send_gestures", b"{}") + return b"send_gestures", b"{}" fake_socket.recv_multipart = recv_once fake_socket.send_json = AsyncMock() - agent = RobotGestureAgent("robot_gesture", gesture_data=["hello", "yes", "no"]) + agent = RobotGestureAgent("robot_gesture", gesture_data=["hello", "yes", "no"], address="") agent.subsocket = fake_socket agent.pubsocket = fake_socket agent._running = True @@ -259,7 +259,9 @@ async def test_fetch_gestures_loop_without_amount(): fake_repsocket.recv = recv_once fake_repsocket.send = AsyncMock() - agent = RobotGestureAgent("robot_gesture", gesture_data=["hello", "yes", "no", "wave", "point"]) + agent = RobotGestureAgent( + "robot_gesture", gesture_data=["hello", "yes", "no", "wave", "point"], address="" + ) agent.repsocket = fake_repsocket agent._running = True @@ -287,7 +289,9 @@ async def test_fetch_gestures_loop_with_amount(): fake_repsocket.recv = recv_once fake_repsocket.send = AsyncMock() - agent = RobotGestureAgent("robot_gesture", gesture_data=["hello", "yes", "no", "wave", "point"]) + agent = RobotGestureAgent( + "robot_gesture", gesture_data=["hello", "yes", "no", "wave", "point"], address="" + ) agent.repsocket = fake_repsocket agent._running = True @@ -315,7 +319,7 @@ async def test_fetch_gestures_loop_with_integer_request(): fake_repsocket.recv = recv_once fake_repsocket.send = AsyncMock() - agent = RobotGestureAgent("robot_gesture", gesture_data=["hello", "yes", "no"]) + agent = RobotGestureAgent("robot_gesture", gesture_data=["hello", "yes", "no"], address="") agent.repsocket = fake_repsocket agent._running = True @@ -340,7 +344,7 @@ async def test_fetch_gestures_loop_with_invalid_json(): fake_repsocket.recv = recv_once fake_repsocket.send = AsyncMock() - agent = RobotGestureAgent("robot_gesture", gesture_data=["hello", "yes", "no"]) + agent = RobotGestureAgent("robot_gesture", gesture_data=["hello", "yes", "no"], address="") agent.repsocket = fake_repsocket agent._running = True @@ -365,7 +369,7 @@ async def test_fetch_gestures_loop_with_non_integer_json(): fake_repsocket.recv = recv_once fake_repsocket.send = AsyncMock() - agent = RobotGestureAgent("robot_gesture", gesture_data=["hello", "yes", "no"]) + agent = RobotGestureAgent("robot_gesture", gesture_data=["hello", "yes", "no"], address="") agent.repsocket = fake_repsocket agent._running = True @@ -381,7 +385,7 @@ async def test_fetch_gestures_loop_with_non_integer_json(): def test_gesture_data_attribute(): """Test that gesture_data returns the expected list.""" gesture_data = ["hello", "yes", "no", "wave"] - agent = RobotGestureAgent("robot_gesture", gesture_data=gesture_data) + agent = RobotGestureAgent("robot_gesture", gesture_data=gesture_data, address="") assert agent.gesture_data == gesture_data assert isinstance(agent.gesture_data, list) @@ -398,7 +402,7 @@ async def test_stop_closes_sockets(): pubsocket = MagicMock() subsocket = MagicMock() repsocket = MagicMock() - agent = RobotGestureAgent("robot_gesture") + agent = RobotGestureAgent("robot_gesture", address="") agent.pubsocket = pubsocket agent.subsocket = subsocket agent.repsocket = repsocket @@ -415,7 +419,7 @@ async def test_stop_closes_sockets(): async def test_initialization_with_custom_gesture_data(): """Agent can be initialized with custom gesture data.""" custom_gestures = ["custom1", "custom2", "custom3"] - agent = RobotGestureAgent("robot_gesture", gesture_data=custom_gestures) + agent = RobotGestureAgent("robot_gesture", gesture_data=custom_gestures, address="") assert agent.gesture_data == custom_gestures @@ -432,7 +436,7 @@ async def test_fetch_gestures_loop_handles_exception(): fake_repsocket.recv = recv_once fake_repsocket.send = AsyncMock() - agent = RobotGestureAgent("robot_gesture", gesture_data=["hello", "yes", "no"]) + agent = RobotGestureAgent("robot_gesture", gesture_data=["hello", "yes", "no"], address="") agent.repsocket = fake_repsocket agent.logger = MagicMock() agent._running = True diff --git a/test/unit/agents/perception/vad_agent/test_vad_streaming.py b/test/unit/agents/perception/vad_agent/test_vad_streaming.py index 4440cae..166919f 100644 --- a/test/unit/agents/perception/vad_agent/test_vad_streaming.py +++ b/test/unit/agents/perception/vad_agent/test_vad_streaming.py @@ -7,6 +7,15 @@ import zmq from control_backend.agents.perception.vad_agent import VADAgent +# We don't want to use real ZMQ in unit tests, for example because it can give errors when sockets +# aren't closed properly. +@pytest.fixture(autouse=True) +def mock_zmq(): + with patch("zmq.asyncio.Context") as mock: + mock.instance.return_value = MagicMock() + yield mock + + @pytest.fixture def audio_out_socket(): return AsyncMock() @@ -140,12 +149,10 @@ async def test_vad_model_load_failure_stops_agent(vad_agent): # Patch stop to an AsyncMock so we can check it was awaited vad_agent.stop = AsyncMock() - result = await vad_agent.setup() + await vad_agent.setup() # Assert stop was called vad_agent.stop.assert_awaited_once() - # Assert setup returned None - assert result is None @pytest.mark.asyncio @@ -155,7 +162,7 @@ async def test_audio_out_bind_failure_sets_none_and_logs(vad_agent, caplog): audio_out_socket is set to None, None is returned, and an error is logged. """ mock_socket = MagicMock() - mock_socket.bind_to_random_port.side_effect = zmq.ZMQBindError() + mock_socket.bind.side_effect = zmq.ZMQBindError() with patch("control_backend.agents.perception.vad_agent.azmq.Context.instance") as mock_ctx: mock_ctx.return_value.socket.return_value = mock_socket