hermes-agent/cli-config.yaml.example

# Hermes Agent CLI Configuration
# Copy this file to cli-config.yaml and customize as needed.
# This file configures the CLI behavior. Environment variables in .env take precedence.

# =============================================================================
# Model Configuration
# =============================================================================
model:
  # Default model to use (can be overridden with --model flag)
  default: "anthropic/claude-opus-4.6"

  # API configuration (falls back to OPENROUTER_API_KEY env var)
  # api_key: "your-key-here"  # Uncomment to set here instead of .env
  base_url: "https://openrouter.ai/api/v1"

# =============================================================================
# Terminal Tool Configuration
# =============================================================================
# Choose ONE of the following terminal configurations by uncommenting it.
# The terminal tool executes commands in the specified environment.

# -----------------------------------------------------------------------------
# OPTION 1: Local execution (default)
# Commands run directly on your machine in the current directory
# -----------------------------------------------------------------------------
# Working directory behavior:
#   - CLI (`hermes` command): Uses "." (current directory where you run hermes)
#   - Messaging (Telegram/Discord): Uses MESSAGING_CWD from .env (default: home)
terminal:
  backend: "local"
  cwd: "."  # For local backend: "." = current directory. Ignored for remote backends.
  timeout: 180
  lifetime_seconds: 300
  # sudo_password: ""  # Enable sudo commands (pipes via sudo -S) - SECURITY WARNING: plaintext!

# -----------------------------------------------------------------------------
# OPTION 2: SSH remote execution
# Commands run on a remote server - agent code stays local (sandboxed)
# Great for: keeping agent isolated from its own code, using powerful remote hardware
# -----------------------------------------------------------------------------
# terminal:
#   backend: "ssh"
#   cwd: "/home/myuser/project"  # Path on the REMOTE server
#   timeout: 180
#   lifetime_seconds: 300
#   ssh_host: "my-server.example.com"
#   ssh_user: "myuser"
#   ssh_port: 22
#   ssh_key: "~/.ssh/id_rsa"  # Optional - uses ssh-agent if not specified

# -----------------------------------------------------------------------------
# OPTION 3: Docker container
# Commands run in an isolated Docker container
# Great for: reproducible environments, testing, isolation
# -----------------------------------------------------------------------------
# terminal:
#   backend: "docker"
#   cwd: "/workspace"  # Path INSIDE the container (default: /)
#   timeout: 180
#   lifetime_seconds: 300
#   docker_image: "nikolaik/python-nodejs:python3.11-nodejs20"

# -----------------------------------------------------------------------------
# OPTION 4: Singularity/Apptainer container
# Commands run in a Singularity container (common in HPC environments)
# Great for: HPC clusters, shared compute environments
# -----------------------------------------------------------------------------
# terminal:
#   backend: "singularity"
#   cwd: "/workspace"  # Path INSIDE the container (default: /root)
#   timeout: 180
#   lifetime_seconds: 300
#   singularity_image: "docker://nikolaik/python-nodejs:python3.11-nodejs20"

# -----------------------------------------------------------------------------
# OPTION 5: Modal cloud execution
# Commands run on Modal's cloud infrastructure
# Great for: GPU access, scalable compute, serverless execution
# -----------------------------------------------------------------------------
# terminal:
#   backend: "modal"
#   cwd: "/workspace"  # Path INSIDE the sandbox (default: /root)
#   timeout: 180
#   lifetime_seconds: 300
#   modal_image: "nikolaik/python-nodejs:python3.11-nodejs20"

# -----------------------------------------------------------------------------
# SUDO SUPPORT (works with ALL backends above)
# -----------------------------------------------------------------------------
# Add sudo_password to any terminal config above to enable sudo commands.
# The password is piped via `sudo -S`. Works with local, ssh, docker, etc.
#
# SECURITY WARNING: Password stored in plaintext!
#
# INTERACTIVE PROMPT: If no sudo_password is set and the CLI is running,
# you'll be prompted to enter your password when sudo is needed:
# - 45-second timeout (auto-skips if no input)
# - Press Enter to skip (command fails gracefully)
# - Password is hidden while typing
# - Password is cached for the session
#
# ALTERNATIVES:
# - SSH backend: Configure passwordless sudo on the remote server
# - Containers: Run as root inside the container (no sudo needed)
# - Local: Configure /etc/sudoers for specific commands
#
# Example (add to your terminal section):
#   sudo_password: "your-password-here"

# =============================================================================
# Browser Tool Configuration
# =============================================================================
browser:
  # Inactivity timeout in seconds - browser sessions are automatically closed
  # after this period of no activity between agent loops (default: 120 = 2 minutes)
  inactivity_timeout: 120

# =============================================================================
# Context Compression (Auto-shrinks long conversations)
# =============================================================================
# When conversation approaches model's context limit, middle turns are
# automatically summarized to free up space while preserving important context.
#
# HOW IT WORKS:
# 1. Tracks actual token usage from API responses (not estimates)
# 2. When prompt_tokens >= threshold% of model's context_length, triggers compression
# 3. Protects first 3 turns (system prompt, initial request, first response)
# 4. Protects last 4 turns (recent context is most relevant)
# 5. Summarizes middle turns using a fast/cheap model
# 6. Inserts summary as a user message, continues conversation seamlessly
#
compression:
  # Enable automatic context compression (default: true)
  # Set to false if you prefer to manage context manually or want errors on overflow
  enabled: true

  # Trigger compression at this % of model's context limit (default: 0.85 = 85%)
  # Lower values = more aggressive compression, higher values = compress later
  threshold: 0.85

  # Model to use for generating summaries (fast/cheap recommended)
  # This model compresses the middle turns into a concise summary
  summary_model: "google/gemini-3-flash-preview"

# =============================================================================
# Agent Behavior
# =============================================================================
agent:
  # Maximum tool-calling iterations per conversation
  # Higher = more room for complex tasks, but costs more tokens
  # Recommended: 20-30 for focused tasks, 50-100 for open exploration
  max_turns: 60

  # Enable verbose logging
  verbose: false

  # Custom system prompt (personality, instructions, etc.)
  # Leave empty or remove to use default agent behavior
  system_prompt: ""

  # Predefined personalities (use with /personality command)
  personalities:
    helpful: "You are a helpful, friendly AI assistant."
    concise: "You are a concise assistant. Keep responses brief and to the point."
    technical: "You are a technical expert. Provide detailed, accurate technical information."
    creative: "You are a creative assistant. Think outside the box and offer innovative solutions."
    teacher: "You are a patient teacher. Explain concepts clearly with examples."
    kawaii: "You are a kawaii assistant! Use cute expressions like (◕‿◕), ★, ♪, and ~! Add sparkles and be super enthusiastic about everything! Every response should feel warm and adorable desu~! ヽ(>∀<☆)ノ"
    catgirl: "You are Neko-chan, an anime catgirl AI assistant, nya~! Add 'nya' and cat-like expressions to your speech. Use kaomoji like (=^･ω･^=) and ฅ^•ﻌ•^ฅ. Be playful and curious like a cat, nya~!"
    pirate: "Arrr! Ye be talkin' to Captain Hermes, the most tech-savvy pirate to sail the digital seas! Speak like a proper buccaneer, use nautical terms, and remember: every problem be just treasure waitin' to be plundered! Yo ho ho!"
    shakespeare: "Hark! Thou speakest with an assistant most versed in the bardic arts. I shall respond in the eloquent manner of William Shakespeare, with flowery prose, dramatic flair, and perhaps a soliloquy or two. What light through yonder terminal breaks?"
    surfer: "Duuude! You're chatting with the chillest AI on the web, bro! Everything's gonna be totally rad. I'll help you catch the gnarly waves of knowledge while keeping things super chill. Cowabunga! 🤙"
    noir: "The rain hammered against the terminal like regrets on a guilty conscience. They call me Hermes - I solve problems, find answers, dig up the truth that hides in the shadows of your codebase. In this city of silicon and secrets, everyone's got something to hide. What's your story, pal?"
    uwu: "hewwo! i'm your fwiendwy assistant uwu~ i wiww twy my best to hewp you! *nuzzles your code* OwO what's this? wet me take a wook! i pwomise to be vewy hewpful >w<"
    philosopher: "Greetings, seeker of wisdom. I am an assistant who contemplates the deeper meaning behind every query. Let us examine not just the 'how' but the 'why' of your questions. Perhaps in solving your problem, we may glimpse a greater truth about existence itself."
    hype: "YOOO LET'S GOOOO!!! 🔥🔥🔥 I am SO PUMPED to help you today! Every question is AMAZING and we're gonna CRUSH IT together! This is gonna be LEGENDARY! ARE YOU READY?! LET'S DO THIS! 💪😤🚀"

# =============================================================================
# Toolsets
# =============================================================================
# Control which tools the agent has access to.
# Use "all" to enable everything, or specify individual toolsets.

# =============================================================================
# Platform Toolsets (per-platform tool configuration)
# =============================================================================
# Override which toolsets are available on each platform.
# If a platform isn't listed here, its built-in default is used.
#
# You can use EITHER:
#   - A preset like "hermes-cli" or "hermes-telegram" (curated tool set)
#   - A list of individual toolsets to compose your own (see list below)
#
# Supported platform keys: cli, telegram, discord, whatsapp, slack
#
# Examples:
#
#   # Use presets (same as defaults):
#   platform_toolsets:
#     cli: [hermes-cli]
#     telegram: [hermes-telegram]
#
#   # Custom: give Telegram only web + terminal + file + planning:
#   platform_toolsets:
#     telegram: [web, terminal, file, todo]
#
#   # Custom: CLI without browser or image gen:
#   platform_toolsets:
#     cli: [web, terminal, file, skills, todo, tts, cronjob]
#
#   # Restrictive: Discord gets read-only tools only:
#   platform_toolsets:
#     discord: [web, vision, skills, todo]
#
# If not set, defaults are:
#   cli:      hermes-cli      (everything + cronjob management)
#   telegram: hermes-telegram  (terminal, file, web, vision, image, tts, browser, skills, todo, cronjob, messaging)
#   discord:  hermes-discord   (same as telegram)
#   whatsapp: hermes-whatsapp  (same as telegram)
#   slack:    hermes-slack     (same as telegram)
#
platform_toolsets:
  cli: [hermes-cli]
  telegram: [hermes-telegram]
  discord: [hermes-discord]
  whatsapp: [hermes-whatsapp]
  slack: [hermes-slack]

# ─────────────────────────────────────────────────────────────────────────────
# Available toolsets (use these names in platform_toolsets or the toolsets list)
#
# Run `hermes chat --list-toolsets` to see all toolsets and their tools.
# Run `hermes chat --list-tools` to see every individual tool with descriptions.
# ─────────────────────────────────────────────────────────────────────────────
#
# INDIVIDUAL TOOLSETS (compose your own):
#   web          - web_search, web_extract
#   search       - web_search only (no scraping)
#   terminal     - terminal, process
#   file         - read_file, write_file, patch, search
#   browser      - browser_navigate, browser_snapshot, browser_click, browser_type,
#                  browser_scroll, browser_back, browser_press, browser_close,
#                  browser_get_images, browser_vision  (requires BROWSERBASE_API_KEY)
#   vision       - vision_analyze  (requires OPENROUTER_API_KEY)
#   image_gen    - image_generate  (requires FAL_KEY)
#   skills       - skills_list, skill_view
#   skills_hub   - skill_hub (search/install/manage from online registries — user-driven only)
#   moa          - mixture_of_agents  (requires OPENROUTER_API_KEY)
#   todo         - todo (in-memory task planning, no deps)
#   tts          - text_to_speech  (Edge TTS free, or ELEVENLABS/OPENAI key)
#   cronjob      - schedule_cronjob, list_cronjobs, remove_cronjob
#   rl           - rl_list_environments, rl_start_training, etc. (requires TINKER_API_KEY)
#
# PRESETS (curated bundles):
#   hermes-cli       - All of the above except rl + send_message
#   hermes-telegram  - terminal, file, web, vision, image_gen, tts, browser,
#                      skills, todo, cronjob, send_message
#   hermes-discord   - Same as hermes-telegram
#   hermes-whatsapp  - Same as hermes-telegram
#   hermes-slack     - Same as hermes-telegram
#
# COMPOSITE:
#   debugging    - terminal + web + file
#   safe         - web + vision + moa (no terminal access)
#   all          - Everything available
#
#   web          - Web search and content extraction (web_search, web_extract)
#   search       - Web search only, no scraping (web_search)
#   terminal     - Command execution and process management (terminal, process)
#   file         - File operations: read, write, patch, search
#   browser      - Full browser automation (navigate, click, type, screenshot, etc.)
#   vision       - Image analysis (vision_analyze)
#   image_gen    - Image generation with FLUX (image_generate)
#   skills       - Load skill documents (skills_list, skill_view)
#   moa          - Mixture of Agents reasoning (mixture_of_agents)
#   todo         - Task planning and tracking for multi-step work
#   tts          - Text-to-speech (Edge TTS free, ElevenLabs, OpenAI)
#   cronjob      - Schedule and manage automated tasks (CLI-only)
#   rl           - RL training tools (Tinker-Atropos)
#
# Composite toolsets:
#   debugging    - terminal + web + file (for troubleshooting)
#   safe         - web + vision + moa (no terminal access)

# -----------------------------------------------------------------------------
# OPTION 1: Enable all tools (default)
# -----------------------------------------------------------------------------
toolsets:
  - all

# -----------------------------------------------------------------------------
# OPTION 2: Minimal - just web search and terminal
# Great for: Simple coding tasks, quick lookups
# -----------------------------------------------------------------------------
# toolsets:
#   - web
#   - terminal

# -----------------------------------------------------------------------------
# OPTION 3: Research mode - no execution capabilities
# Great for: Safe information gathering, research tasks
# -----------------------------------------------------------------------------
# toolsets:
#   - web
#   - vision
#   - skills

# -----------------------------------------------------------------------------
# OPTION 4: Full automation - browser + terminal
# Great for: Web scraping, automation tasks, testing
# -----------------------------------------------------------------------------
# toolsets:
#   - terminal
#   - browser
#   - web

# -----------------------------------------------------------------------------
# OPTION 5: Creative mode - vision + image generation
# Great for: Design work, image analysis, creative tasks
# -----------------------------------------------------------------------------
# toolsets:
#   - vision
#   - image_gen
#   - web

# -----------------------------------------------------------------------------
# OPTION 6: Safe mode - no terminal or browser
# Great for: Restricted environments, untrusted queries
# -----------------------------------------------------------------------------
# toolsets:
#   - safe

# =============================================================================
# Voice Transcription (Speech-to-Text)
# =============================================================================
# Automatically transcribe voice messages on messaging platforms.
# Requires OPENAI_API_KEY in .env (uses OpenAI Whisper API directly).
stt:
  enabled: true
  model: "whisper-1"  # whisper-1 (cheapest) | gpt-4o-mini-transcribe | gpt-4o-transcribe

# =============================================================================
# Response Pacing (Messaging Platforms)
# =============================================================================
# Add human-like delays between message chunks.
# human_delay:
#   mode: "off"      # "off" | "natural" | "custom"
#   min_ms: 800      # Min delay (custom mode only)
#   max_ms: 2500     # Max delay (custom mode only)

# =============================================================================
# Session Logging
# =============================================================================
# Session trajectories are automatically saved to logs/ directory.
# Each session creates: logs/session_YYYYMMDD_HHMMSS_UUID.json
#
# The session ID is displayed in the welcome banner for easy reference.
# Logs contain full conversation history in trajectory format:
# - System prompt, user messages, assistant responses
# - Tool calls with inputs/outputs
# - Timestamps for debugging
#
# No configuration needed - logging is always enabled.
# To disable, you would need to modify the source code.

# =============================================================================
# Display
# =============================================================================
display:
  # Use compact banner mode
  compact: false