diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..d498054 --- /dev/null +++ b/.env.example @@ -0,0 +1,20 @@ +# Example .env file. To use, make a copy, call it ".env" (i.e. removing the ".example" suffix), then you edit values. + +# The hostname of the Robot Interface. Change if the Control Backend and Robot Interface are running on different computers. +RI_HOST="localhost" + +# URL for the local LLM API. Must be an API that implements the OpenAI Chat Completions API, but most do. +LLM_SETTINGS__LOCAL_LLM_URL="http://localhost:1234/v1/chat/completions" + +# Name of the local LLM model to use. +LLM_SETTINGS__LOCAL_LLM_MODEL="gpt-oss" + +# Number of non-speech chunks to wait before speech ended. A chunk is approximately 31 ms. Increasing this number allows longer pauses in speech, but also increases response time. +BEHAVIOUR_SETTINGS__VAD_NON_SPEECH_PATIENCE_CHUNKS=3 + +# Timeout in milliseconds for socket polling. Increase this number if network latency/jitter is high, often the case when using Wi-Fi. Perhaps 500 ms. A symptom of this issue is transcriptions getting cut off. +BEHAVIOUR_SETTINGS__SOCKET_POLLER_TIMEOUT_MS=100 + + + +# For an exhaustive list of options, see the control_backend.core.config module in the docs. diff --git a/README.md b/README.md index 1527215..03dac9a 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,7 @@ This + part might differ based on what model you choose. copy the model name in the module loaded and replace local_llm_modelL. In settings. + ## Running To run the project (development server), execute the following command (while inside the root repository): @@ -34,6 +35,14 @@ To run the project (development server), execute the following command (while in uv run fastapi dev src/control_backend/main.py ``` +### Environment Variables + +You can use environment variables to change settings. Make a copy of the [`.env.example`](.env.example) file, name it `.env` and put it in the root directory. The file itself describes how to do the configuration. + +For an exhaustive list of environment options, see the `control_backend.core.config` module in the docs. + + + ## Testing Testing happens automatically when opening a merge request to any branch. If you want to manually run the test suite, you can do so by running the following for unit tests: diff --git a/src/control_backend/agents/actuation/robot_speech_agent.py b/src/control_backend/agents/actuation/robot_speech_agent.py index 674b270..f8e3d4c 100644 --- a/src/control_backend/agents/actuation/robot_speech_agent.py +++ b/src/control_backend/agents/actuation/robot_speech_agent.py @@ -29,7 +29,7 @@ class RobotSpeechAgent(BaseAgent): def __init__( self, name: str, - address=settings.zmq_settings.ri_command_address, + address: str, bind=False, ): super().__init__(name) diff --git a/src/control_backend/agents/communication/ri_communication_agent.py b/src/control_backend/agents/communication/ri_communication_agent.py index 3bca6e4..4a043e9 100644 --- a/src/control_backend/agents/communication/ri_communication_agent.py +++ b/src/control_backend/agents/communication/ri_communication_agent.py @@ -37,7 +37,7 @@ class RICommunicationAgent(BaseAgent): def __init__( self, name: str, - address=settings.zmq_settings.ri_command_address, + address=settings.zmq_settings.ri_communication_address, bind=False, ): super().__init__(name) diff --git a/src/control_backend/agents/perception/vad_agent.py b/src/control_backend/agents/perception/vad_agent.py index 5d0c497..70fa9e1 100644 --- a/src/control_backend/agents/perception/vad_agent.py +++ b/src/control_backend/agents/perception/vad_agent.py @@ -166,9 +166,8 @@ class VADAgent(BaseAgent): """ try: self.audio_out_socket = azmq.Context.instance().socket(zmq.PUB) - address = "inproc://vad_stream" - self.audio_out_socket.bind(address) - return address + self.audio_out_socket.bind(settings.zmq_settings.vad_pub_address) + return settings.zmq_settings.vad_pub_address except zmq.ZMQBindError: self.logger.error("Failed to bind an audio output socket after 100 tries.") self.audio_out_socket = None diff --git a/src/control_backend/core/config.py b/src/control_backend/core/config.py index 0154c28..35acf96 100644 --- a/src/control_backend/core/config.py +++ b/src/control_backend/core/config.py @@ -1,3 +1,12 @@ +""" +An exhaustive overview of configurable options. All of these can be set using environment variables +by nesting with double underscores (__). Start from the ``Settings`` class. + +For example, ``settings.ri_host`` becomes ``RI_HOST``, and +``settings.zmq_settings.ri_communication_address`` becomes +``ZMQ_SETTINGS__RI_COMMUNICATION_ADDRESS``. +""" + from pydantic import BaseModel from pydantic_settings import BaseSettings, SettingsConfigDict @@ -8,15 +17,16 @@ class ZMQSettings(BaseModel): :ivar internal_pub_address: Address for the internal PUB socket. :ivar internal_sub_address: Address for the internal SUB socket. - :ivar ri_command_address: Address for sending commands to the Robot Interface. - :ivar ri_communication_address: Address for receiving communication from the Robot Interface. - :ivar vad_agent_address: Address for the Voice Activity Detection (VAD) agent. + :ivar ri_communication_address: Address for the endpoint that the Robot Interface connects to. + :ivar vad_pub_address: Address that the VAD agent binds to and publishes audio segments to. """ + # ATTENTION: When adding/removing settings, make sure to update the .env.example file + internal_pub_address: str = "tcp://localhost:5560" internal_sub_address: str = "tcp://localhost:5561" - ri_command_address: str = "tcp://localhost:0000" ri_communication_address: str = "tcp://*:5555" + vad_pub_address: str = "inproc://vad_stream" class AgentSettings(BaseModel): @@ -35,6 +45,8 @@ class AgentSettings(BaseModel): :ivar robot_speech_name: Name of the Robot Speech Agent. """ + # ATTENTION: When adding/removing settings, make sure to update the .env.example file + # agent names bdi_core_name: str = "bdi_core_agent" bdi_belief_collector_name: str = "belief_collector_agent" @@ -64,6 +76,8 @@ class BehaviourSettings(BaseModel): :ivar transcription_token_buffer: Buffer for transcription tokens. """ + # ATTENTION: When adding/removing settings, make sure to update the .env.example file + sleep_s: float = 1.0 comm_setup_max_retries: int = 5 socket_poller_timeout_ms: int = 100 @@ -88,6 +102,8 @@ class LLMSettings(BaseModel): :ivar local_llm_model: Name of the local LLM model to use. """ + # ATTENTION: When adding/removing settings, make sure to update the .env.example file + local_llm_url: str = "http://localhost:1234/v1/chat/completions" local_llm_model: str = "gpt-oss" @@ -101,6 +117,8 @@ class VADSettings(BaseModel): :ivar sample_rate_hz: Sample rate in Hz for the VAD model. """ + # ATTENTION: When adding/removing settings, make sure to update the .env.example file + repo_or_dir: str = "snakers4/silero-vad" model_name: str = "silero_vad" sample_rate_hz: int = 16000 @@ -114,6 +132,8 @@ class SpeechModelSettings(BaseModel): :ivar openai_model_name: Model name for OpenAI-based speech recognition. """ + # ATTENTION: When adding/removing settings, make sure to update the .env.example file + # model identifiers for speech recognition mlx_model_name: str = "mlx-community/whisper-small.en-mlx" openai_model_name: str = "small.en" @@ -125,7 +145,7 @@ class Settings(BaseSettings): :ivar app_title: Title of the application. :ivar ui_url: URL of the frontend UI. - :ivar ui_url: The hostname of the Robot Interface. + :ivar ri_host: The hostname of the Robot Interface. :ivar zmq_settings: ZMQ configuration. :ivar agent_settings: Agent name configuration. :ivar behaviour_settings: Behavior configuration. diff --git a/test/unit/agents/actuation/test_robot_speech_agent.py b/test/unit/agents/actuation/test_robot_speech_agent.py index 15324f6..567a5a1 100644 --- a/test/unit/agents/actuation/test_robot_speech_agent.py +++ b/test/unit/agents/actuation/test_robot_speech_agent.py @@ -56,7 +56,7 @@ async def test_setup_connect(zmq_context, mocker): async def test_handle_message_sends_command(): """Internal message is forwarded to robot pub socket as JSON.""" pubsocket = AsyncMock() - agent = RobotSpeechAgent("robot_speech") + agent = RobotSpeechAgent("robot_speech", "tcp://localhost:3498") agent.pubsocket = pubsocket payload = {"endpoint": "actuate/speech", "data": "hello"} @@ -80,7 +80,7 @@ async def test_zmq_command_loop_valid_payload(zmq_context): fake_socket.recv_multipart = recv_once fake_socket.send_json = AsyncMock() - agent = RobotSpeechAgent("robot_speech") + agent = RobotSpeechAgent("robot_speech", "tcp://localhost:3498") agent.subsocket = fake_socket agent.pubsocket = fake_socket agent._running = True @@ -101,7 +101,7 @@ async def test_zmq_command_loop_invalid_json(): fake_socket.recv_multipart = recv_once fake_socket.send_json = AsyncMock() - agent = RobotSpeechAgent("robot_speech") + agent = RobotSpeechAgent("robot_speech", "tcp://localhost:3498") agent.subsocket = fake_socket agent.pubsocket = fake_socket agent._running = True @@ -115,7 +115,7 @@ async def test_zmq_command_loop_invalid_json(): async def test_handle_message_invalid_payload(): """Invalid payload is caught and does not send.""" pubsocket = AsyncMock() - agent = RobotSpeechAgent("robot_speech") + agent = RobotSpeechAgent("robot_speech", "tcp://localhost:3498") agent.pubsocket = pubsocket msg = InternalMessage(to="robot", sender="tester", body=json.dumps({"bad": "data"})) @@ -129,7 +129,7 @@ async def test_handle_message_invalid_payload(): async def test_stop_closes_sockets(): pubsocket = MagicMock() subsocket = MagicMock() - agent = RobotSpeechAgent("robot_speech") + agent = RobotSpeechAgent("robot_speech", "tcp://localhost:3498") agent.pubsocket = pubsocket agent.subsocket = subsocket