2026-01-31 06:30:48 +00:00
# Hermes Agent CLI Configuration
# Copy this file to cli-config.yaml and customize as needed.
# This file configures the CLI behavior. Environment variables in .env take precedence.
# =============================================================================
# Model Configuration
# =============================================================================
model:
# Default model to use (can be overridden with --model flag)
2026-02-08 10:49:24 +00:00
default: "anthropic/claude-opus-4.6"
2026-01-31 06:30:48 +00:00
2026-02-20 17:24:00 -08:00
# Inference provider selection:
# "auto" - Use Nous Portal if logged in, otherwise OpenRouter/env vars (default)
2026-03-08 18:40:50 +10:00
# "nous-api" - Use Nous Portal via API key (requires: NOUS_API_KEY)
2026-02-20 17:24:00 -08:00
# "openrouter" - Always use OpenRouter API key from OPENROUTER_API_KEY
# "nous" - Always use Nous Portal (requires: hermes login)
feat: add z.ai/GLM, Kimi/Moonshot, MiniMax as first-class providers
Adds 4 new direct API-key providers (zai, kimi-coding, minimax, minimax-cn)
to the inference provider system. All use standard OpenAI-compatible
chat/completions endpoints with Bearer token auth.
Core changes:
- auth.py: Extended ProviderConfig with api_key_env_vars and base_url_env_var
fields. Added providers to PROVIDER_REGISTRY. Added provider aliases
(glm, z-ai, zhipu, kimi, moonshot). Added auto-detection of API-key
providers in resolve_provider(). Added resolve_api_key_provider_credentials()
and get_api_key_provider_status() helpers.
- runtime_provider.py: Added generic API-key provider branch in
resolve_runtime_provider() — any provider with auth_type='api_key'
is automatically handled.
- main.py: Added providers to hermes model menu with generic
_model_flow_api_key_provider() flow. Updated _has_any_provider_configured()
to check all provider env vars. Updated argparse --provider choices.
- setup.py: Added providers to setup wizard with API key prompts and
curated model lists.
- config.py: Added env vars (GLM_API_KEY, KIMI_API_KEY, MINIMAX_API_KEY,
etc.) to OPTIONAL_ENV_VARS.
- status.py: Added API key display and provider status section.
- doctor.py: Added connectivity checks for each provider endpoint.
- cli.py: Updated provider docstrings.
Docs: Updated README.md, .env.example, cli-config.yaml.example,
cli-commands.md, environment-variables.md, configuration.md.
Tests: 50 new tests covering registry, aliases, resolution, auto-detection,
credential resolution, and runtime provider dispatch.
Inspired by PR #33 (numman-ali) which proposed a provider registry approach.
Credit to tars90percent (PR #473) and manuelschipper (PR #420) for related
provider improvements merged earlier in this changeset.
2026-03-06 18:55:12 -08:00
# "zai" - Use z.ai / ZhipuAI GLM models (requires: GLM_API_KEY)
# "kimi-coding"- Use Kimi / Moonshot AI models (requires: KIMI_API_KEY)
# "minimax" - Use MiniMax global endpoint (requires: MINIMAX_API_KEY)
# "minimax-cn" - Use MiniMax China endpoint (requires: MINIMAX_CN_API_KEY)
2026-02-20 17:24:00 -08:00
# Can also be overridden with --provider flag or HERMES_INFERENCE_PROVIDER env var.
provider: "auto"
2026-01-31 06:30:48 +00:00
# API configuration (falls back to OPENROUTER_API_KEY env var)
# api_key: "your-key-here" # Uncomment to set here instead of .env
base_url: "https://openrouter.ai/api/v1"
2026-03-01 18:24:27 -08:00
# =============================================================================
# OpenRouter Provider Routing (only applies when using OpenRouter)
# =============================================================================
# Control how requests are routed across providers on OpenRouter.
# See: https://openrouter.ai/docs/guides/routing/provider-selection
#
# provider_routing:
# # Sort strategy: "price" (default), "throughput", or "latency"
# # Append :nitro to model name for a shortcut to throughput sorting.
# sort: "throughput"
#
# # Only allow these providers (provider slugs from OpenRouter)
# # only: ["anthropic", "google"]
#
# # Skip these providers entirely
# # ignore: ["deepinfra", "fireworks"]
#
# # Try providers in this order (overrides default load balancing)
# # order: ["anthropic", "google", "together"]
#
# # Require providers to support all parameters in your request
# # require_parameters: true
#
# # Data policy: "allow" (default) or "deny" to exclude providers that may store data
# # data_collection: "deny"
2026-03-07 21:05:40 -08:00
# =============================================================================
# Git Worktree Isolation
# =============================================================================
# When enabled, each CLI session creates an isolated git worktree so multiple
# agents can work on the same repo concurrently without file collisions.
# Equivalent to always passing --worktree / -w on the command line.
#
# worktree: true # Always create a worktree when in a git repo
# worktree: false # Default — only create when -w flag is passed
2026-01-31 06:30:48 +00:00
# =============================================================================
# Terminal Tool Configuration
# =============================================================================
# Choose ONE of the following terminal configurations by uncommenting it.
# The terminal tool executes commands in the specified environment.
# -----------------------------------------------------------------------------
# OPTION 1: Local execution (default)
# Commands run directly on your machine in the current directory
# -----------------------------------------------------------------------------
2026-02-03 10:46:23 -08:00
# Working directory behavior:
# - CLI (`hermes` command): Uses "." (current directory where you run hermes)
# - Messaging (Telegram/Discord): Uses MESSAGING_CWD from .env (default: home)
2026-01-31 06:30:48 +00:00
terminal:
2026-02-16 22:31:41 -08:00
backend: "local"
cwd: "." # For local backend: "." = current directory. Ignored for remote backends.
2026-01-31 06:30:48 +00:00
timeout: 180
lifetime_seconds: 300
2026-02-01 10:02:34 -08:00
# sudo_password: "" # Enable sudo commands (pipes via sudo -S) - SECURITY WARNING: plaintext!
2026-01-31 06:30:48 +00:00
# -----------------------------------------------------------------------------
# OPTION 2: SSH remote execution
# Commands run on a remote server - agent code stays local (sandboxed)
# Great for: keeping agent isolated from its own code, using powerful remote hardware
# -----------------------------------------------------------------------------
# terminal:
2026-02-16 22:31:41 -08:00
# backend: "ssh"
# cwd: "/home/myuser/project" # Path on the REMOTE server
2026-01-31 06:30:48 +00:00
# timeout: 180
# lifetime_seconds: 300
# ssh_host: "my-server.example.com"
# ssh_user: "myuser"
# ssh_port: 22
# ssh_key: "~/.ssh/id_rsa" # Optional - uses ssh-agent if not specified
# -----------------------------------------------------------------------------
# OPTION 3: Docker container
# Commands run in an isolated Docker container
# Great for: reproducible environments, testing, isolation
# -----------------------------------------------------------------------------
# terminal:
2026-02-16 22:31:41 -08:00
# backend: "docker"
# cwd: "/workspace" # Path INSIDE the container (default: /)
2026-01-31 06:30:48 +00:00
# timeout: 180
# lifetime_seconds: 300
2026-02-02 19:13:41 -08:00
# docker_image: "nikolaik/python-nodejs:python3.11-nodejs20"
2026-01-31 06:30:48 +00:00
# -----------------------------------------------------------------------------
# OPTION 4: Singularity/Apptainer container
# Commands run in a Singularity container (common in HPC environments)
# Great for: HPC clusters, shared compute environments
# -----------------------------------------------------------------------------
# terminal:
2026-02-16 22:31:41 -08:00
# backend: "singularity"
# cwd: "/workspace" # Path INSIDE the container (default: /root)
2026-01-31 06:30:48 +00:00
# timeout: 180
# lifetime_seconds: 300
2026-02-02 19:13:41 -08:00
# singularity_image: "docker://nikolaik/python-nodejs:python3.11-nodejs20"
2026-01-31 06:30:48 +00:00
# -----------------------------------------------------------------------------
# OPTION 5: Modal cloud execution
# Commands run on Modal's cloud infrastructure
# Great for: GPU access, scalable compute, serverless execution
# -----------------------------------------------------------------------------
# terminal:
2026-02-16 22:31:41 -08:00
# backend: "modal"
# cwd: "/workspace" # Path INSIDE the sandbox (default: /root)
2026-01-31 06:30:48 +00:00
# timeout: 180
# lifetime_seconds: 300
2026-02-02 19:13:41 -08:00
# modal_image: "nikolaik/python-nodejs:python3.11-nodejs20"
2026-03-05 00:44:39 -08:00
# -----------------------------------------------------------------------------
# OPTION 6: Daytona cloud execution
# Commands run in Daytona cloud sandboxes
# Great for: Cloud dev environments, persistent workspaces, team collaboration
# Requires: pip install daytona, DAYTONA_API_KEY env var
# -----------------------------------------------------------------------------
# terminal:
# backend: "daytona"
2026-03-05 01:11:55 -08:00
# cwd: "~"
2026-03-05 00:44:39 -08:00
# timeout: 180
# lifetime_seconds: 300
# daytona_image: "nikolaik/python-nodejs:python3.11-nodejs20"
2026-03-05 01:11:55 -08:00
# container_disk: 10240 # Daytona max is 10GB per sandbox
2026-03-05 00:44:39 -08:00
2026-02-23 02:11:33 -08:00
#
2026-03-05 00:44:39 -08:00
# --- Container resource limits (docker, singularity, modal, daytona -- ignored for local/ssh) ---
2026-02-23 02:11:33 -08:00
# These settings apply to all container backends. They control the resources
# allocated to the sandbox and whether its filesystem persists across sessions.
2026-03-04 03:29:05 -08:00
container_cpu: 1 # CPU cores
container_memory: 5120 # Memory in MB (5120 = 5GB)
container_disk: 51200 # Disk in MB (51200 = 50GB)
container_persistent: true # Persist filesystem across sessions (false = ephemeral)
2026-01-31 06:30:48 +00:00
2026-02-01 10:02:34 -08:00
# -----------------------------------------------------------------------------
# SUDO SUPPORT (works with ALL backends above)
# -----------------------------------------------------------------------------
# Add sudo_password to any terminal config above to enable sudo commands.
# The password is piped via `sudo -S`. Works with local, ssh, docker, etc.
#
# SECURITY WARNING: Password stored in plaintext!
#
2026-02-01 15:36:26 -08:00
# INTERACTIVE PROMPT: If no sudo_password is set and the CLI is running,
# you'll be prompted to enter your password when sudo is needed:
# - 45-second timeout (auto-skips if no input)
# - Press Enter to skip (command fails gracefully)
# - Password is hidden while typing
# - Password is cached for the session
#
2026-02-01 10:02:34 -08:00
# ALTERNATIVES:
# - SSH backend: Configure passwordless sudo on the remote server
# - Containers: Run as root inside the container (no sudo needed)
# - Local: Configure /etc/sudoers for specific commands
#
# Example (add to your terminal section):
# sudo_password: "your-password-here"
# =============================================================================
# Browser Tool Configuration
# =============================================================================
browser:
# Inactivity timeout in seconds - browser sessions are automatically closed
# after this period of no activity between agent loops (default: 120 = 2 minutes)
inactivity_timeout: 120
2026-02-01 18:01:31 -08:00
# =============================================================================
# Context Compression (Auto-shrinks long conversations)
# =============================================================================
# When conversation approaches model's context limit, middle turns are
# automatically summarized to free up space while preserving important context.
#
# HOW IT WORKS:
# 1. Tracks actual token usage from API responses (not estimates)
# 2. When prompt_tokens >= threshold% of model's context_length, triggers compression
# 3. Protects first 3 turns (system prompt, initial request, first response)
# 4. Protects last 4 turns (recent context is most relevant)
# 5. Summarizes middle turns using a fast/cheap model
# 6. Inserts summary as a user message, continues conversation seamlessly
#
compression:
# Enable automatic context compression (default: true)
# Set to false if you prefer to manage context manually or want errors on overflow
enabled: true
# Trigger compression at this % of model's context limit (default: 0.85 = 85%)
# Lower values = more aggressive compression, higher values = compress later
threshold: 0.85
# Model to use for generating summaries (fast/cheap recommended)
2026-03-07 08:52:06 -08:00
# This model compresses the middle turns into a concise summary.
# IMPORTANT: it receives the full middle section of the conversation, so it
# MUST support a context length at least as large as your main model's.
2026-02-08 10:49:24 +00:00
summary_model: "google/gemini-3-flash-preview"
2026-03-07 08:52:06 -08:00
# Provider for the summary model (default: "auto")
# Options: "auto", "openrouter", "nous", "main"
# summary_provider: "auto"
# =============================================================================
# Auxiliary Models (Advanced — Experimental)
# =============================================================================
# Hermes uses lightweight "auxiliary" models for side tasks: image analysis,
# browser screenshot analysis, web page summarization, and context compression.
#
# By default these use Gemini Flash via OpenRouter or Nous Portal and are
# auto-detected from your credentials. You do NOT need to change anything
# here for normal usage.
#
# WARNING: Overriding these with providers other than OpenRouter or Nous Portal
# is EXPERIMENTAL and may not work. Not all models/providers support vision,
# produce usable summaries, or accept the same API format. Change at your own
# risk — if things break, reset to "auto" / empty values.
#
# Each task has its own provider + model pair so you can mix providers.
# For example: OpenRouter for vision (needs multimodal), but your main
# local endpoint for compression (just needs text).
#
# Provider options:
# "auto" - Best available: OpenRouter → Nous Portal → main endpoint (default)
# "openrouter" - Force OpenRouter (requires OPENROUTER_API_KEY)
# "nous" - Force Nous Portal (requires: hermes login)
2026-03-08 18:50:26 -07:00
# "codex" - Force Codex OAuth (requires: hermes model → Codex).
# Uses gpt-5.3-codex which supports vision.
# "main" - Use your custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY).
# Works with OpenAI API, local models, or any OpenAI-compatible
# endpoint. Also falls back to Codex OAuth and API-key providers.
2026-03-07 08:52:06 -08:00
#
# Model: leave empty to use the provider's default. When empty, OpenRouter
# uses "google/gemini-3-flash-preview" and Nous uses "gemini-3-flash".
# Other providers pick a sensible default automatically.
#
# auxiliary:
# # Image analysis: vision_analyze tool + browser screenshots
# vision:
# provider: "auto"
# model: "" # e.g. "google/gemini-2.5-flash", "openai/gpt-4o"
#
# # Web page scraping / summarization + browser page text extraction
# web_extract:
# provider: "auto"
# model: ""
2026-02-01 18:01:31 -08:00
2026-02-19 00:57:31 -08:00
# =============================================================================
# Persistent Memory
# =============================================================================
# Bounded curated memory injected into the system prompt every session.
# Two stores: MEMORY.md (agent's notes) and USER.md (user profile).
# Character limits keep the memory small and focused. The agent manages
# pruning -- when at the limit, it must consolidate or replace entries.
# Disabled by default in batch_runner and RL environments.
#
memory:
# Agent's personal notes: environment facts, conventions, things learned
memory_enabled: true
# User profile: preferences, communication style, expectations
user_profile_enabled: true
# Character limits (~2.75 chars per token, model-independent)
memory_char_limit: 2200 # ~800 tokens
user_char_limit: 1375 # ~500 tokens
2026-02-22 10:15:17 -08:00
# Periodic memory nudge: remind the agent to consider saving memories
# every N user turns. Set to 0 to disable. Only active when memory is enabled.
nudge_interval: 10 # Nudge every 10 user turns (0 = disabled)
# Memory flush: give the agent one turn to save memories before context is
# lost (compression, /new, /reset, exit). Set to 0 to disable.
# For exit/reset, only fires if the session had at least this many user turns.
flush_min_turns: 6 # Min user turns to trigger flush on exit/reset (0 = disabled)
2026-02-26 21:20:50 -08:00
# =============================================================================
# Session Reset Policy (Messaging Platforms)
# =============================================================================
# Controls when messaging sessions (Telegram, Discord, WhatsApp, Slack) are
# automatically cleared. Without resets, conversation context grows indefinitely
# which increases API costs with every message.
#
# When a reset triggers, the agent first saves important information to its
# persistent memory — but the conversation context is wiped. The agent starts
# fresh but retains learned facts via its memory system.
#
# Users can always manually reset with /reset or /new in chat.
#
# Modes:
# "both" - Reset on EITHER inactivity timeout or daily boundary (recommended)
# "idle" - Reset only after N minutes of inactivity
# "daily" - Reset only at a fixed hour each day
# "none" - Never auto-reset; context lives until /reset or compression kicks in
#
# When a reset triggers, the agent gets one turn to save important memories and
# skills before the context is wiped. Persistent memory carries across sessions.
#
session_reset:
mode: both # "both", "idle", "daily", or "none"
idle_minutes: 1440 # Inactivity timeout in minutes (default: 1440 = 24 hours)
at_hour: 4 # Daily reset hour, 0-23 local time (default: 4 AM)
2026-02-22 13:28:13 -08:00
# =============================================================================
# Skills Configuration
# =============================================================================
# Skills are reusable procedures the agent can load and follow. The agent can
# also create new skills after completing complex tasks.
#
skills:
# Nudge the agent to create skills after complex tasks.
# Every N tool-calling iterations, remind the model to consider saving a skill.
# Set to 0 to disable.
creation_nudge_interval: 15
2026-01-31 06:30:48 +00:00
# =============================================================================
# Agent Behavior
# =============================================================================
agent:
2026-02-03 14:48:19 -08:00
# Maximum tool-calling iterations per conversation
# Higher = more room for complex tasks, but costs more tokens
# Recommended: 20-30 for focused tasks, 50-100 for open exploration
max_turns: 60
2026-01-31 06:30:48 +00:00
# Enable verbose logging
verbose: false
2026-02-24 03:30:19 -08:00
# Reasoning effort level (OpenRouter and Nous Portal)
# Controls how much "thinking" the model does before responding.
# Options: "xhigh" (max), "high", "medium", "low", "minimal", "none" (disable)
2026-03-07 10:14:19 -08:00
reasoning_effort: "medium"
2026-01-31 06:30:48 +00:00
# Predefined personalities (use with /personality command)
personalities:
helpful: "You are a helpful, friendly AI assistant."
concise: "You are a concise assistant. Keep responses brief and to the point."
technical: "You are a technical expert. Provide detailed, accurate technical information."
creative: "You are a creative assistant. Think outside the box and offer innovative solutions."
teacher: "You are a patient teacher. Explain concepts clearly with examples."
kawaii: "You are a kawaii assistant! Use cute expressions like (◕‿◕), ★, ♪, and ~! Add sparkles and be super enthusiastic about everything! Every response should feel warm and adorable desu~! ヽ(>∀<☆)ノ "
catgirl: "You are Neko-chan, an anime catgirl AI assistant, nya~! Add 'nya' and cat-like expressions to your speech. Use kaomoji like (=^・ω・^=) and ฅ^•ﻌ•^ฅ. Be playful and curious like a cat, nya~!"
pirate: "Arrr! Ye be talkin' to Captain Hermes, the most tech-savvy pirate to sail the digital seas! Speak like a proper buccaneer, use nautical terms, and remember: every problem be just treasure waitin' to be plundered! Yo ho ho!"
shakespeare: "Hark! Thou speakest with an assistant most versed in the bardic arts. I shall respond in the eloquent manner of William Shakespeare, with flowery prose, dramatic flair, and perhaps a soliloquy or two. What light through yonder terminal breaks?"
surfer: "Duuude! You're chatting with the chillest AI on the web, bro! Everything's gonna be totally rad. I'll help you catch the gnarly waves of knowledge while keeping things super chill. Cowabunga! 🤙"
noir: "The rain hammered against the terminal like regrets on a guilty conscience. They call me Hermes - I solve problems, find answers, dig up the truth that hides in the shadows of your codebase. In this city of silicon and secrets, everyone's got something to hide. What's your story, pal?"
uwu: "hewwo! i'm your fwiendwy assistant uwu~ i wiww twy my best to hewp you! *nuzzles your code* OwO what's this? wet me take a wook! i pwomise to be vewy hewpful >w<"
philosopher: "Greetings, seeker of wisdom. I am an assistant who contemplates the deeper meaning behind every query. Let us examine not just the 'how' but the 'why' of your questions. Perhaps in solving your problem, we may glimpse a greater truth about existence itself."
hype: "YOOO LET'S GOOOO!!! 🔥🔥🔥 I am SO PUMPED to help you today! Every question is AMAZING and we're gonna CRUSH IT together! This is gonna be LEGENDARY! ARE YOU READY?! LET'S DO THIS! 💪😤🚀"
# =============================================================================
# Toolsets
# =============================================================================
# Control which tools the agent has access to.
# Use "all" to enable everything, or specify individual toolsets.
2026-02-17 23:39:24 -08:00
# =============================================================================
# Platform Toolsets (per-platform tool configuration)
# =============================================================================
# Override which toolsets are available on each platform.
# If a platform isn't listed here, its built-in default is used.
#
# You can use EITHER:
# - A preset like "hermes-cli" or "hermes-telegram" (curated tool set)
# - A list of individual toolsets to compose your own (see list below)
#
# Supported platform keys: cli, telegram, discord, whatsapp, slack
#
# Examples:
#
# # Use presets (same as defaults):
# platform_toolsets:
# cli: [hermes-cli]
# telegram: [hermes-telegram]
#
# # Custom: give Telegram only web + terminal + file + planning:
# platform_toolsets:
# telegram: [web, terminal, file, todo]
#
# # Custom: CLI without browser or image gen:
# platform_toolsets:
# cli: [web, terminal, file, skills, todo, tts, cronjob]
#
# # Restrictive: Discord gets read-only tools only:
# platform_toolsets:
# discord: [web, vision, skills, todo]
#
# If not set, defaults are:
2026-03-09 23:27:19 -07:00
# cli: hermes-cli (everything + cronjob management)
# telegram: hermes-telegram (terminal, file, web, vision, image, tts, browser, skills, todo, cronjob, messaging)
# discord: hermes-discord (same as telegram)
# whatsapp: hermes-whatsapp (same as telegram)
# slack: hermes-slack (same as telegram)
# signal: hermes-signal (same as telegram)
# homeassistant: hermes-homeassistant (same as telegram)
2026-02-17 23:39:24 -08:00
#
platform_toolsets:
cli: [hermes-cli]
telegram: [hermes-telegram]
discord: [hermes-discord]
whatsapp: [hermes-whatsapp]
slack: [hermes-slack]
2026-03-09 23:27:19 -07:00
signal: [hermes-signal]
homeassistant: [hermes-homeassistant]
2026-02-17 23:39:24 -08:00
# ─────────────────────────────────────────────────────────────────────────────
# Available toolsets (use these names in platform_toolsets or the toolsets list)
#
# Run `hermes chat --list-toolsets` to see all toolsets and their tools.
# Run `hermes chat --list-tools` to see every individual tool with descriptions.
# ─────────────────────────────────────────────────────────────────────────────
#
# INDIVIDUAL TOOLSETS (compose your own):
# web - web_search, web_extract
# search - web_search only (no scraping)
# terminal - terminal, process
# file - read_file, write_file, patch, search
# browser - browser_navigate, browser_snapshot, browser_click, browser_type,
# browser_scroll, browser_back, browser_press, browser_close,
# browser_get_images, browser_vision (requires BROWSERBASE_API_KEY)
# vision - vision_analyze (requires OPENROUTER_API_KEY)
# image_gen - image_generate (requires FAL_KEY)
# skills - skills_list, skill_view
Add Skills Hub — universal skill search, install, and management from online registries
Implements the Hermes Skills Hub with agentskills.io spec compliance,
multi-registry skill discovery, security scanning, and user-driven
management via CLI and /skills slash command.
Core features:
- Security scanner (tools/skills_guard.py): 120 threat patterns across
12 categories, trust-aware install policy (builtin/trusted/community),
structural checks, unicode injection detection, LLM audit pass
- Hub client (tools/skills_hub.py): GitHub, ClawHub, Claude Code
marketplace, and LobeHub source adapters with shared GitHubAuth
(PAT + gh CLI + GitHub App), lock file provenance tracking, quarantine
flow, and unified search across all sources
- CLI interface (hermes_cli/skills_hub.py): search, install, inspect,
list, audit, uninstall, publish (GitHub PR), snapshot export/import,
and tap management — powers both `hermes skills` and `/skills`
Spec conformance (Phase 0):
- Upgraded frontmatter parser to yaml.safe_load with fallback
- Migrated 39 SKILL.md files: tags/related_skills to metadata.hermes.*
- Added assets/ directory support and compatibility/metadata fields
- Excluded .hub/ from skill discovery in skills_tool.py
Updated 13 config/doc files including README, AGENTS.md, .env.example,
setup wizard, doctor, status, pyproject.toml, and docs.
2026-02-18 16:09:05 -08:00
# skills_hub - skill_hub (search/install/manage from online registries — user-driven only)
2026-02-17 23:39:24 -08:00
# moa - mixture_of_agents (requires OPENROUTER_API_KEY)
# todo - todo (in-memory task planning, no deps)
# tts - text_to_speech (Edge TTS free, or ELEVENLABS/OPENAI key)
# cronjob - schedule_cronjob, list_cronjobs, remove_cronjob
# rl - rl_list_environments, rl_start_training, etc. (requires TINKER_API_KEY)
#
# PRESETS (curated bundles):
# hermes-cli - All of the above except rl + send_message
# hermes-telegram - terminal, file, web, vision, image_gen, tts, browser,
# skills, todo, cronjob, send_message
# hermes-discord - Same as hermes-telegram
# hermes-whatsapp - Same as hermes-telegram
# hermes-slack - Same as hermes-telegram
#
# COMPOSITE:
# debugging - terminal + web + file
# safe - web + vision + moa (no terminal access)
# all - Everything available
2026-01-31 06:30:48 +00:00
#
# web - Web search and content extraction (web_search, web_extract)
# search - Web search only, no scraping (web_search)
2026-02-17 23:30:31 -08:00
# terminal - Command execution and process management (terminal, process)
# file - File operations: read, write, patch, search
2026-01-31 06:30:48 +00:00
# browser - Full browser automation (navigate, click, type, screenshot, etc.)
# vision - Image analysis (vision_analyze)
# image_gen - Image generation with FLUX (image_generate)
2026-02-17 23:30:31 -08:00
# skills - Load skill documents (skills_list, skill_view)
2026-01-31 06:30:48 +00:00
# moa - Mixture of Agents reasoning (mixture_of_agents)
2026-02-17 23:30:31 -08:00
# todo - Task planning and tracking for multi-step work
2026-02-19 00:57:31 -08:00
# memory - Persistent memory across sessions (personal notes + user profile)
# session_search - Search and recall past conversations (FTS5 + Gemini Flash summarization)
2026-02-17 23:30:31 -08:00
# tts - Text-to-speech (Edge TTS free, ElevenLabs, OpenAI)
# cronjob - Schedule and manage automated tasks (CLI-only)
# rl - RL training tools (Tinker-Atropos)
2026-01-31 06:30:48 +00:00
#
# Composite toolsets:
2026-02-17 23:30:31 -08:00
# debugging - terminal + web + file (for troubleshooting)
2026-01-31 06:30:48 +00:00
# safe - web + vision + moa (no terminal access)
# -----------------------------------------------------------------------------
# OPTION 1: Enable all tools (default)
# -----------------------------------------------------------------------------
toolsets:
- all
# -----------------------------------------------------------------------------
# OPTION 2: Minimal - just web search and terminal
# Great for: Simple coding tasks, quick lookups
# -----------------------------------------------------------------------------
# toolsets:
# - web
# - terminal
# -----------------------------------------------------------------------------
# OPTION 3: Research mode - no execution capabilities
# Great for: Safe information gathering, research tasks
# -----------------------------------------------------------------------------
# toolsets:
# - web
# - vision
# - skills
# -----------------------------------------------------------------------------
# OPTION 4: Full automation - browser + terminal
# Great for: Web scraping, automation tasks, testing
# -----------------------------------------------------------------------------
# toolsets:
# - terminal
# - browser
# - web
# -----------------------------------------------------------------------------
# OPTION 5: Creative mode - vision + image generation
# Great for: Design work, image analysis, creative tasks
# -----------------------------------------------------------------------------
# toolsets:
# - vision
# - image_gen
# - web
# -----------------------------------------------------------------------------
# OPTION 6: Safe mode - no terminal or browser
# Great for: Restricted environments, untrusted queries
# -----------------------------------------------------------------------------
# toolsets:
# - safe
docs: add comprehensive MCP documentation and examples
- docs/mcp.md: Full MCP documentation covering prerequisites, configuration,
transports (stdio + HTTP), security (env filtering, credential stripping),
reconnection, troubleshooting, popular servers, and advanced usage
- README.md: Add MCP section with quick config example and install instructions
- cli-config.yaml.example: Add commented mcp_servers section with examples
for stdio, HTTP, and authenticated server configs
- docs/tools.md: Add MCP to Tool Categories table and MCP Tools section
- skills/mcp/native-mcp/SKILL.md: Create native MCP client skill with
full configuration reference, transport types, security, troubleshooting
- skills/mcp/DESCRIPTION.md: Update category description to cover both
native MCP client and mcporter bridge approaches
2026-03-02 18:52:33 -08:00
# =============================================================================
# MCP (Model Context Protocol) Servers
# =============================================================================
# Connect to external MCP servers to add tools from the MCP ecosystem.
# Each server's tools are automatically discovered and registered.
# See docs/mcp.md for full documentation.
#
# Stdio servers (spawn a subprocess):
# command: the executable to run
# args: command-line arguments
# env: environment variables (only these + safe defaults passed to subprocess)
#
# HTTP servers (connect to a URL):
# url: the MCP server endpoint
# headers: HTTP headers (e.g., for authentication)
#
# Optional per-server settings:
# timeout: tool call timeout in seconds (default: 120)
# connect_timeout: initial connection timeout (default: 60)
#
# mcp_servers:
# time:
# command: uvx
# args: ["mcp-server-time"]
# filesystem:
# command: npx
# args: ["-y", "@modelcontextprotocol/server-filesystem", "/home/user"]
# notion:
# url: https://mcp.notion.com/mcp
# github:
# command: npx
# args: ["-y", "@modelcontextprotocol/server-github"]
# env:
# GITHUB_PERSONAL_ACCESS_TOKEN: "ghp_..."
2026-03-09 03:37:38 -07:00
#
# Sampling (server-initiated LLM requests) — enabled by default.
# Per-server config under the 'sampling' key:
# analysis:
# command: npx
# args: ["-y", "analysis-server"]
# sampling:
# enabled: true # default: true
# model: "gemini-3-flash" # override model (optional)
# max_tokens_cap: 4096 # max tokens per request
# timeout: 30 # LLM call timeout (seconds)
# max_rpm: 10 # max requests per minute
# allowed_models: [] # model whitelist (empty = all)
# max_tool_rounds: 5 # tool loop limit (0 = disable)
# log_level: "info" # audit verbosity
docs: add comprehensive MCP documentation and examples
- docs/mcp.md: Full MCP documentation covering prerequisites, configuration,
transports (stdio + HTTP), security (env filtering, credential stripping),
reconnection, troubleshooting, popular servers, and advanced usage
- README.md: Add MCP section with quick config example and install instructions
- cli-config.yaml.example: Add commented mcp_servers section with examples
for stdio, HTTP, and authenticated server configs
- docs/tools.md: Add MCP to Tool Categories table and MCP Tools section
- skills/mcp/native-mcp/SKILL.md: Create native MCP client skill with
full configuration reference, transport types, security, troubleshooting
- skills/mcp/DESCRIPTION.md: Update category description to cover both
native MCP client and mcporter bridge approaches
2026-03-02 18:52:33 -08:00
Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks
Major feature additions inspired by OpenClaw/ClawdBot integration analysis:
Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)
Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description
Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling
Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads
DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending
Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)
Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications
Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings
Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style
Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.
2026-02-15 21:38:59 -08:00
# =============================================================================
# Voice Transcription (Speech-to-Text)
# =============================================================================
# Automatically transcribe voice messages on messaging platforms.
# Requires OPENAI_API_KEY in .env (uses OpenAI Whisper API directly).
stt:
enabled: true
model: "whisper-1" # whisper-1 (cheapest) | gpt-4o-mini-transcribe | gpt-4o-transcribe
# =============================================================================
# Response Pacing (Messaging Platforms)
# =============================================================================
# Add human-like delays between message chunks.
# human_delay:
# mode: "off" # "off" | "natural" | "custom"
# min_ms: 800 # Min delay (custom mode only)
# max_ms: 2500 # Max delay (custom mode only)
2026-02-01 15:36:26 -08:00
# =============================================================================
# Session Logging
# =============================================================================
# Session trajectories are automatically saved to logs/ directory.
# Each session creates: logs/session_YYYYMMDD_HHMMSS_UUID.json
#
# The session ID is displayed in the welcome banner for easy reference.
# Logs contain full conversation history in trajectory format:
# - System prompt, user messages, assistant responses
# - Tool calls with inputs/outputs
# - Timestamps for debugging
#
# No configuration needed - logging is always enabled.
# To disable, you would need to modify the source code.
2026-02-20 03:15:53 -08:00
# =============================================================================
# Code Execution Sandbox (Programmatic Tool Calling)
# =============================================================================
# The execute_code tool runs Python scripts that call Hermes tools via RPC.
# Intermediate tool results stay out of the LLM's context window.
code_execution:
timeout: 300 # Max seconds per script before kill (default: 300 = 5 min)
max_tool_calls: 50 # Max RPC tool calls per execution (default: 50)
# =============================================================================
# Subagent Delegation
# =============================================================================
# The delegate_task tool spawns child agents with isolated context.
# Supports single tasks and batch mode (up to 3 parallel).
delegation:
2026-03-02 00:52:01 -08:00
max_iterations: 50 # Max tool-calling turns per child (default: 50)
2026-02-20 03:15:53 -08:00
default_toolsets: ["terminal", "file", "web"] # Default toolsets for subagents
2026-02-27 23:41:08 -08:00
# =============================================================================
# Honcho Integration (Cross-Session User Modeling)
# =============================================================================
# AI-native persistent memory via Honcho (https://honcho.dev/).
# Builds a deeper understanding of the user across sessions and tools.
# Runs alongside USER.md — additive, not a replacement.
#
# Requires: pip install honcho-ai
# Config: ~/.honcho/config.json (shared with Claude Code, Cursor, etc.)
# API key: HONCHO_API_KEY in ~/.hermes/.env or ~/.honcho/config.json
#
# Hermes-specific overrides (optional — most config comes from ~/.honcho/config.json):
# honcho: {}
2026-01-31 06:30:48 +00:00
# =============================================================================
# Display
# =============================================================================
display:
# Use compact banner mode
compact: false
2026-02-28 00:05:58 -08:00
# Tool progress display level (CLI and gateway)
# off: Silent — no tool activity shown, just the final response
# new: Show a tool indicator only when the tool changes (skip repeats)
# all: Show every tool call with a short preview (default)
# verbose: Full args, results, and debug logs (same as /verbose)
# Toggle at runtime with /verbose in the CLI
tool_progress: all
2026-03-08 19:41:17 -07:00
2026-03-10 04:12:39 -07:00
# Background process notifications (gateway/messaging only).
# Controls how chatty the process watcher is when you use
# terminal(background=true, check_interval=...) from Telegram/Discord/etc.
# off: No watcher messages at all
# result: Only the final completion message
# error: Only the final message when exit code != 0
# all: Running output updates + final message (default)
background_process_notifications: all
2026-03-08 19:41:17 -07:00
# Play terminal bell when agent finishes a response.
# Useful for long-running tasks — your terminal will ding when the agent is done.
# Works over SSH. Most terminals can be configured to flash the taskbar or play a sound.
bell_on_complete: false
2026-03-10 00:51:27 -07:00
# ───────────────────────────────────────────────────────────────────────────
# Skin / Theme
# ───────────────────────────────────────────────────────────────────────────
# Customize CLI visual appearance — banner colors, spinner faces, tool prefix,
# response box label, and branding text. Change at runtime with /skin <name>.
#
# Built-in skins:
# default — Classic Hermes gold/kawaii
# ares — Crimson/bronze war-god theme with spinner wings
# mono — Clean grayscale monochrome
# slate — Cool blue developer-focused
#
# Custom skins: drop a YAML file in ~/.hermes/skins/<name>.yaml
# Schema (all fields optional, missing values inherit from default):
#
# name: my-theme
# description: Short description
# colors:
# banner_border: "#HEX" # Panel border
# banner_title: "#HEX" # Panel title
# banner_accent: "#HEX" # Section headers (Available Tools, etc.)
# banner_dim: "#HEX" # Dim/muted text
# banner_text: "#HEX" # Body text (tool names, skill names)
# ui_accent: "#HEX" # UI accent color
# response_border: "#HEX" # Response box border color
# spinner:
# waiting_faces: ["(⚔)", "(⛨)"] # Faces shown while waiting
# thinking_faces: ["(⚔)", "(⌁)"] # Faces shown while thinking
# thinking_verbs: ["forging", "plotting"] # Verbs for spinner messages
# wings: # Optional left/right spinner decorations
# - ["⟪⚔", "⚔⟫"]
# - ["⟪▲", "▲⟫"]
# branding:
# agent_name: "My Agent" # Banner title and branding
# welcome: "Welcome message" # Shown at CLI startup
# response_label: " ⚔ Agent " # Response box header label
# prompt_symbol: "⚔ ❯ " # Prompt symbol
# tool_prefix: "╎" # Tool output line prefix (default: ┊)
#
skin: default