pepperplus-cb/src/control_backend/core/config.py

"""
This program has been developed by students from the bachelor Computer Science at Utrecht
University within the Software Project course.
© Copyright Utrecht University (Department of Information and Computing Sciences)
--------------------------------------------------------------------------------
An exhaustive overview of configurable options. All of these can be set using environment variables
by nesting with double underscores (__). Start from the ``Settings`` class.

For example, ``settings.ri_host`` becomes ``RI_HOST``, and
``settings.zmq_settings.ri_communication_address`` becomes
``ZMQ_SETTINGS__RI_COMMUNICATION_ADDRESS``.
"""

from pydantic import BaseModel
from pydantic_settings import BaseSettings, SettingsConfigDict


class ZMQSettings(BaseModel):
    """
    Configuration for ZeroMQ (ZMQ) addresses used for inter-process communication.

    :ivar internal_pub_address: Address for the internal PUB socket.
    :ivar internal_sub_address: Address for the internal SUB socket.
    :ivar ri_communication_address: Address for the endpoint that the Robot Interface connects to.
    :ivar vad_pub_address: Address that the VAD agent binds to and publishes audio segments to.
    """

    # ATTENTION: When adding/removing settings, make sure to update the .env.example file

    internal_pub_address: str = "tcp://localhost:5560"
    internal_sub_address: str = "tcp://localhost:5561"
    ri_communication_address: str = "tcp://*:5555"
    internal_gesture_rep_adress: str = "tcp://localhost:7788"
    vad_pub_address: str = "inproc://vad_stream"


class AgentSettings(BaseModel):
    """
    Names of the various agents in the system. These names are used for routing messages.

    :ivar bdi_core_name: Name of the BDI Core Agent.
    :ivar bdi_program_manager_name: Name of the BDI Program Manager Agent.
    :ivar text_belief_extractor_name: Name of the Text Belief Extractor Agent.
    :ivar vad_name: Name of the Voice Activity Detection (VAD) Agent.
    :ivar llm_name: Name of the Large Language Model (LLM) Agent.
    :ivar test_name: Name of the Test Agent.
    :ivar transcription_name: Name of the Transcription Agent.
    :ivar ri_communication_name: Name of the RI Communication Agent.
    :ivar robot_speech_name: Name of the Robot Speech Agent.
    """

    # ATTENTION: When adding/removing settings, make sure to update the .env.example file

    # agent names
    bdi_core_name: str = "bdi_core_agent"
    bdi_program_manager_name: str = "bdi_program_manager_agent"
    visual_emotion_recognition_name: str = "visual_emotion_recognition_agent"
    text_belief_extractor_name: str = "text_belief_extractor_agent"
    vad_name: str = "vad_agent"
    llm_name: str = "llm_agent"
    test_name: str = "test_agent"
    transcription_name: str = "transcription_agent"
    ri_communication_name: str = "ri_communication_agent"
    robot_speech_name: str = "robot_speech_agent"
    robot_gesture_name: str = "robot_gesture_agent"
    user_interrupt_name: str = "user_interrupt_agent"


class BehaviourSettings(BaseModel):
    """
    Configuration for agent behaviors and parameters.

    :ivar sleep_s: Default sleep time in seconds for loops.
    :ivar comm_setup_max_retries: Maximum number of retries for setting up communication.
    :ivar socket_poller_timeout_ms: Timeout in milliseconds for socket polling.
    :ivar vad_prob_threshold: Probability threshold for Voice Activity Detection.
    :ivar vad_initial_since_speech: Initial value for 'since speech' counter in VAD.
    :ivar vad_non_speech_patience_chunks: Number of non-speech chunks to wait before speech ended.
    :ivar vad_begin_silence_chunks: The number of chunks of silence to prepend to speech chunks.
    :ivar transcription_max_concurrent_tasks: Maximum number of concurrent transcription tasks.
    :ivar transcription_words_per_minute: Estimated words per minute for transcription timing.
    :ivar transcription_words_per_token: Estimated words per token for transcription timing.
    :ivar transcription_token_buffer: Buffer for transcription tokens.
    :ivar conversation_history_length_limit: The maximum amount of messages to extract beliefs from.
    :ivar visual_emotion_recognition_window_duration_s: Duration in seconds over which to aggregate
    emotions and update emotion beliefs.
    :ivar visual_emotion_recognition_min_frames_per_face: Minimum number of frames per face required
    to consider a face valid.
    :ivar trigger_time_to_wait: Amount of milliseconds to wait before informing the UI about trigger
     completion.
    """

    # ATTENTION: When adding/removing settings, make sure to update the .env.example file

    sleep_s: float = 1.0
    comm_setup_max_retries: int = 5
    socket_poller_timeout_ms: int = 100

    # VAD settings
    vad_prob_threshold: float = 0.5
    vad_initial_since_speech: int = 100
    vad_non_speech_patience_chunks: int = 15
    vad_begin_silence_chunks: int = 6

    # transcription behaviour
    transcription_max_concurrent_tasks: int = 3
    transcription_words_per_minute: int = 300
    transcription_words_per_token: float = 0.75  # (3 words = 4 tokens)
    transcription_token_buffer: int = 10

    # Text belief extractor settings
    conversation_history_length_limit: int = 10

    # Visual Emotion Recognition settings
    visual_emotion_recognition_window_duration_s: int = 3
    visual_emotion_recognition_min_frames_per_face: int = 3
    # AgentSpeak related settings
    trigger_time_to_wait: int = 2000
    agentspeak_file: str = "src/control_backend/agents/bdi/agentspeak.asl"


class LLMSettings(BaseModel):
    """
    Configuration for the Large Language Model (LLM).

    :ivar local_llm_url: URL for the local LLM API.
    :ivar local_llm_model: Name of the local LLM model to use.
    :ivar chat_temperature: The temperature to use while generating chat responses.
    :ivar code_temperature: The temperature to use while generating code-like responses like during
        belief inference.
    :ivar n_parallel: The number of parallel calls allowed to be made to the LLM.
    """

    # ATTENTION: When adding/removing settings, make sure to update the .env.example file

    local_llm_url: str = "http://localhost:1234/v1/chat/completions"
    local_llm_model: str = "gpt-oss"
    api_key: str = ""
    chat_temperature: float = 1.0
    code_temperature: float = 0.3
    n_parallel: int = 4


class VADSettings(BaseModel):
    """
    Configuration for Voice Activity Detection (VAD) model.

    :ivar repo_or_dir: Repository or directory for the VAD model.
    :ivar model_name: Name of the VAD model.
    :ivar sample_rate_hz: Sample rate in Hz for the VAD model.
    """

    # ATTENTION: When adding/removing settings, make sure to update the .env.example file

    repo_or_dir: str = "snakers4/silero-vad"
    model_name: str = "silero_vad"
    sample_rate_hz: int = 16000


class SpeechModelSettings(BaseModel):
    """
    Configuration for speech recognition models.

    :ivar mlx_model_name: Model name for MLX-based speech recognition.
    :ivar openai_model_name: Model name for OpenAI-based speech recognition.
    """

    # ATTENTION: When adding/removing settings, make sure to update the .env.example file

    # model identifiers for speech recognition
    mlx_model_name: str = "mlx-community/whisper-small.en-mlx"
    openai_model_name: str = "small.en"


class LoggingSettings(BaseModel):
    """
    Configuration for logging.

    :ivar logging_config_file: Path to the logging configuration file.
    :ivar experiment_log_directory: Location of the experiment logs. Must match the logging config.
    :ivar experiment_logger_name: Name of the experiment logger. Must match the logging config.
    """

    logging_config_file: str = ".logging_config.yaml"
    experiment_log_directory: str = "experiment_logs"
    experiment_logger_name: str = "experiment"


class Settings(BaseSettings):
    """
    Global application settings.

    :ivar app_title: Title of the application.
    :ivar ui_url: URL of the frontend UI.
    :ivar ri_host: The hostname of the Robot Interface.
    :ivar zmq_settings: ZMQ configuration.
    :ivar agent_settings: Agent name configuration.
    :ivar behaviour_settings: Behavior configuration.
    :ivar vad_settings: VAD model configuration.
    :ivar speech_model_settings: Speech model configuration.
    :ivar llm_settings: LLM configuration.
    """

    app_title: str = "PepperPlus"

    ui_url: str = "http://localhost:5173"

    ri_host: str = "localhost"

    logging_settings: LoggingSettings = LoggingSettings()

    zmq_settings: ZMQSettings = ZMQSettings()

    agent_settings: AgentSettings = AgentSettings()

    behaviour_settings: BehaviourSettings = BehaviourSettings()

    vad_settings: VADSettings = VADSettings()

    speech_model_settings: SpeechModelSettings = SpeechModelSettings()

    llm_settings: LLMSettings = LLMSettings()

    model_config = SettingsConfigDict(env_file=".env", env_nested_delimiter="__")


settings = Settings()