2026-02-02 19:01:51 -08:00
"""
Gateway runner - entry point for messaging platform integrations .
This module provides :
- start_gateway ( ) : Start all configured platform adapters
- GatewayRunner : Main class managing the gateway lifecycle
Usage :
# Start the gateway
python - m gateway . run
# Or from CLI
python cli . py - - gateway
"""
import asyncio
2026-03-10 22:55:36 +03:00
import json
2026-02-21 03:11:11 -08:00
import logging
2026-02-02 19:01:51 -08:00
import os
2026-02-14 16:08:14 -08:00
import re
2026-03-11 20:43:31 +01:00
import shlex
2026-02-02 19:01:51 -08:00
import sys
import signal
2026-03-10 22:55:36 +03:00
import tempfile
2026-02-21 16:21:19 -08:00
import threading
2026-03-10 22:55:36 +03:00
import time
2026-02-22 02:16:11 -08:00
from logging . handlers import RotatingFileHandler
2026-02-02 19:01:51 -08:00
from pathlib import Path
from datetime import datetime
from typing import Dict , Optional , Any , List
2026-03-15 23:04:34 -07:00
# ---------------------------------------------------------------------------
# SSL certificate auto-detection for NixOS and other non-standard systems.
# Must run BEFORE any HTTP library (discord, aiohttp, etc.) is imported.
# ---------------------------------------------------------------------------
def _ensure_ssl_certs ( ) - > None :
""" Set SSL_CERT_FILE if the system doesn ' t expose CA certs to Python. """
if " SSL_CERT_FILE " in os . environ :
return # user already configured it
import ssl
# 1. Python's compiled-in defaults
paths = ssl . get_default_verify_paths ( )
for candidate in ( paths . cafile , paths . openssl_cafile ) :
if candidate and os . path . exists ( candidate ) :
os . environ [ " SSL_CERT_FILE " ] = candidate
return
# 2. certifi (ships its own Mozilla bundle)
try :
import certifi
os . environ [ " SSL_CERT_FILE " ] = certifi . where ( )
return
except ImportError :
pass
# 3. Common distro / macOS locations
for candidate in (
" /etc/ssl/certs/ca-certificates.crt " , # Debian/Ubuntu/Gentoo
" /etc/pki/tls/certs/ca-bundle.crt " , # RHEL/CentOS 7
" /etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem " , # RHEL/CentOS 8+
" /etc/ssl/ca-bundle.pem " , # SUSE/OpenSUSE
" /etc/ssl/cert.pem " , # Alpine / macOS
" /etc/pki/tls/cert.pem " , # Fedora
" /usr/local/etc/openssl@1.1/cert.pem " , # macOS Homebrew Intel
" /opt/homebrew/etc/openssl@1.1/cert.pem " , # macOS Homebrew ARM
) :
if os . path . exists ( candidate ) :
os . environ [ " SSL_CERT_FILE " ] = candidate
return
_ensure_ssl_certs ( )
2026-02-02 19:01:51 -08:00
# Add parent directory to path
sys . path . insert ( 0 , str ( Path ( __file__ ) . parent . parent ) )
2026-02-26 18:51:46 +11:00
# Resolve Hermes home directory (respects HERMES_HOME override)
refactor: consolidate get_hermes_home() and parse_reasoning_effort() (#3062)
Centralizes two widely-duplicated patterns into hermes_constants.py:
1. get_hermes_home() — Path resolution for ~/.hermes (HERMES_HOME env var)
- Was copy-pasted inline across 30+ files as:
Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
- Now defined once in hermes_constants.py (zero-dependency module)
- hermes_cli/config.py re-exports it for backward compatibility
- Removed local wrapper functions in honcho_integration/client.py,
tools/website_policy.py, tools/tirith_security.py, hermes_cli/uninstall.py
2. parse_reasoning_effort() — Reasoning effort string validation
- Was copy-pasted in cli.py, gateway/run.py, cron/scheduler.py
- Same validation logic: check against (xhigh, high, medium, low, minimal, none)
- Now defined once in hermes_constants.py, called from all 3 locations
- Warning log for unknown values kept at call sites (context-specific)
31 files changed, net +31 lines (125 insertions, 94 deletions)
Full test suite: 6179 passed, 0 failed
2026-03-25 15:54:28 -07:00
from hermes_constants import get_hermes_home
_hermes_home = get_hermes_home ( )
2026-02-26 18:51:46 +11:00
2026-03-15 06:46:28 -07:00
# Load environment variables from ~/.hermes/.env first.
# User-managed env files should override stale shell exports on restart.
from dotenv import load_dotenv # backward-compat for tests that monkeypatch this symbol
from hermes_cli . env_loader import load_hermes_dotenv
2026-02-26 18:51:46 +11:00
_env_path = _hermes_home / ' .env '
2026-03-15 06:46:28 -07:00
load_hermes_dotenv ( hermes_home = _hermes_home , project_env = Path ( __file__ ) . resolve ( ) . parents [ 1 ] / ' .env ' )
2026-02-03 07:02:59 -08:00
2026-02-22 17:35:45 -08:00
# Bridge config.yaml values into the environment so os.getenv() picks them up.
2026-02-26 20:05:35 -08:00
# config.yaml is authoritative for terminal settings — overrides .env.
2026-02-26 18:51:46 +11:00
_config_path = _hermes_home / ' config.yaml '
2026-02-22 17:35:45 -08:00
if _config_path . exists ( ) :
try :
import yaml as _yaml
2026-03-05 17:04:33 -05:00
with open ( _config_path , encoding = " utf-8 " ) as _f :
2026-02-22 17:35:45 -08:00
_cfg = _yaml . safe_load ( _f ) or { }
2026-03-23 16:02:06 -07:00
# Expand ${ENV_VAR} references before bridging to env vars.
from hermes_cli . config import _expand_env_vars
_cfg = _expand_env_vars ( _cfg )
2026-02-26 20:05:35 -08:00
# Top-level simple values (fallback only — don't override .env)
2026-02-22 17:35:45 -08:00
for _key , _val in _cfg . items ( ) :
if isinstance ( _val , ( str , int , float , bool ) ) and _key not in os . environ :
os . environ [ _key ] = str ( _val )
2026-02-26 20:05:35 -08:00
# Terminal config is nested — bridge to TERMINAL_* env vars.
# config.yaml overrides .env for these since it's the documented config path.
_terminal_cfg = _cfg . get ( " terminal " , { } )
if _terminal_cfg and isinstance ( _terminal_cfg , dict ) :
_terminal_env_map = {
" backend " : " TERMINAL_ENV " ,
" cwd " : " TERMINAL_CWD " ,
" timeout " : " TERMINAL_TIMEOUT " ,
" lifetime_seconds " : " TERMINAL_LIFETIME_SECONDS " ,
" docker_image " : " TERMINAL_DOCKER_IMAGE " ,
2026-03-17 02:34:25 -07:00
" docker_forward_env " : " TERMINAL_DOCKER_FORWARD_ENV " ,
2026-02-26 20:05:35 -08:00
" singularity_image " : " TERMINAL_SINGULARITY_IMAGE " ,
" modal_image " : " TERMINAL_MODAL_IMAGE " ,
2026-03-05 11:12:50 -08:00
" daytona_image " : " TERMINAL_DAYTONA_IMAGE " ,
2026-02-26 20:05:35 -08:00
" ssh_host " : " TERMINAL_SSH_HOST " ,
" ssh_user " : " TERMINAL_SSH_USER " ,
" ssh_port " : " TERMINAL_SSH_PORT " ,
" ssh_key " : " TERMINAL_SSH_KEY " ,
" container_cpu " : " TERMINAL_CONTAINER_CPU " ,
" container_memory " : " TERMINAL_CONTAINER_MEMORY " ,
" container_disk " : " TERMINAL_CONTAINER_DISK " ,
" container_persistent " : " TERMINAL_CONTAINER_PERSISTENT " ,
2026-03-09 17:24:00 -07:00
" docker_volumes " : " TERMINAL_DOCKER_VOLUMES " ,
2026-03-08 01:33:46 -08:00
" sandbox_dir " : " TERMINAL_SANDBOX_DIR " ,
2026-03-15 20:17:13 -07:00
" persistent_shell " : " TERMINAL_PERSISTENT_SHELL " ,
2026-02-26 20:05:35 -08:00
}
for _cfg_key , _env_var in _terminal_env_map . items ( ) :
if _cfg_key in _terminal_cfg :
2026-03-09 17:24:00 -07:00
_val = _terminal_cfg [ _cfg_key ]
if isinstance ( _val , list ) :
os . environ [ _env_var ] = json . dumps ( _val )
else :
os . environ [ _env_var ] = str ( _val )
2026-03-17 04:46:15 -07:00
# Compression config is read directly from config.yaml by run_agent.py
# and auxiliary_client.py — no env var bridging needed.
2026-03-14 20:48:29 -07:00
# Auxiliary model/direct-endpoint overrides (vision, web_extract).
# Each task has provider/model/base_url/api_key; bridge non-default values to env vars.
2026-03-08 18:06:40 -07:00
_auxiliary_cfg = _cfg . get ( " auxiliary " , { } )
if _auxiliary_cfg and isinstance ( _auxiliary_cfg , dict ) :
_aux_task_env = {
2026-03-14 20:48:29 -07:00
" vision " : {
" provider " : " AUXILIARY_VISION_PROVIDER " ,
" model " : " AUXILIARY_VISION_MODEL " ,
" base_url " : " AUXILIARY_VISION_BASE_URL " ,
" api_key " : " AUXILIARY_VISION_API_KEY " ,
} ,
" web_extract " : {
" provider " : " AUXILIARY_WEB_EXTRACT_PROVIDER " ,
" model " : " AUXILIARY_WEB_EXTRACT_MODEL " ,
" base_url " : " AUXILIARY_WEB_EXTRACT_BASE_URL " ,
" api_key " : " AUXILIARY_WEB_EXTRACT_API_KEY " ,
} ,
feat: smart approvals + /stop command (inspired by OpenAI Codex)
* feat: smart approvals — LLM-based risk assessment for dangerous commands
Adds a 'smart' approval mode that uses the auxiliary LLM to assess
whether a flagged command is genuinely dangerous or a false positive,
auto-approving low-risk commands without prompting the user.
Inspired by OpenAI Codex's Smart Approvals guardian subagent
(openai/codex#13860).
Config (config.yaml):
approvals:
mode: manual # manual (default), smart, off
Modes:
- manual — current behavior, always prompt the user
- smart — aux LLM evaluates risk: APPROVE (auto-allow), DENY (block),
or ESCALATE (fall through to manual prompt)
- off — skip all approval prompts (equivalent to --yolo)
When smart mode auto-approves, the pattern gets session-level approval
so subsequent uses of the same pattern don't trigger another LLM call.
When it denies, the command is blocked without user prompt. When
uncertain, it escalates to the normal manual approval flow.
The LLM prompt is carefully scoped: it sees only the command text and
the flagged reason, assesses actual risk vs false positive, and returns
a single-word verdict.
* feat: make smart approval model configurable via config.yaml
Adds auxiliary.approval section to config.yaml with the same
provider/model/base_url/api_key pattern as other aux tasks (vision,
web_extract, compression, etc.).
Config:
auxiliary:
approval:
provider: auto
model: '' # fast/cheap model recommended
base_url: ''
api_key: ''
Bridged to env vars in both CLI and gateway paths so the aux client
picks them up automatically.
* feat: add /stop command to kill all background processes
Adds a /stop slash command that kills all running background processes
at once. Currently users have to process(list) then process(kill) for
each one individually.
Inspired by OpenAI Codex's separation of interrupt (Ctrl+C stops current
turn) from /stop (cleans up background processes). See openai/codex#14602.
Ctrl+C continues to only interrupt the active agent turn — background
dev servers, watchers, etc. are preserved. /stop is the explicit way
to clean them all up.
2026-03-16 06:20:11 -07:00
" approval " : {
" provider " : " AUXILIARY_APPROVAL_PROVIDER " ,
" model " : " AUXILIARY_APPROVAL_MODEL " ,
" base_url " : " AUXILIARY_APPROVAL_BASE_URL " ,
" api_key " : " AUXILIARY_APPROVAL_API_KEY " ,
} ,
2026-03-08 18:06:40 -07:00
}
2026-03-14 20:48:29 -07:00
for _task_key , _env_map in _aux_task_env . items ( ) :
2026-03-08 18:06:40 -07:00
_task_cfg = _auxiliary_cfg . get ( _task_key , { } )
if not isinstance ( _task_cfg , dict ) :
continue
_prov = str ( _task_cfg . get ( " provider " , " " ) ) . strip ( )
_model = str ( _task_cfg . get ( " model " , " " ) ) . strip ( )
2026-03-14 20:48:29 -07:00
_base_url = str ( _task_cfg . get ( " base_url " , " " ) ) . strip ( )
_api_key = str ( _task_cfg . get ( " api_key " , " " ) ) . strip ( )
2026-03-08 18:06:40 -07:00
if _prov and _prov != " auto " :
2026-03-14 20:48:29 -07:00
os . environ [ _env_map [ " provider " ] ] = _prov
2026-03-08 18:06:40 -07:00
if _model :
2026-03-14 20:48:29 -07:00
os . environ [ _env_map [ " model " ] ] = _model
if _base_url :
os . environ [ _env_map [ " base_url " ] ] = _base_url
if _api_key :
os . environ [ _env_map [ " api_key " ] ] = _api_key
2026-02-28 10:35:49 -08:00
_agent_cfg = _cfg . get ( " agent " , { } )
if _agent_cfg and isinstance ( _agent_cfg , dict ) :
if " max_turns " in _agent_cfg :
os . environ [ " HERMES_MAX_ITERATIONS " ] = str ( _agent_cfg [ " max_turns " ] )
2026-03-03 11:57:18 +05:30
# Timezone: bridge config.yaml → HERMES_TIMEZONE env var.
# HERMES_TIMEZONE from .env takes precedence (already in os.environ).
_tz_cfg = _cfg . get ( " timezone " , " " )
if _tz_cfg and isinstance ( _tz_cfg , str ) and " HERMES_TIMEZONE " not in os . environ :
os . environ [ " HERMES_TIMEZONE " ] = _tz_cfg . strip ( )
2026-03-09 01:04:33 -07:00
# Security settings
_security_cfg = _cfg . get ( " security " , { } )
if isinstance ( _security_cfg , dict ) :
_redact = _security_cfg . get ( " redact_secrets " )
if _redact is not None :
os . environ [ " HERMES_REDACT_SECRETS " ] = str ( _redact ) . lower ( )
2026-02-22 17:35:45 -08:00
except Exception :
pass # Non-fatal; gateway can still run with .env values
2026-02-03 10:46:23 -08:00
# Gateway runs in quiet mode - suppress debug output and use cwd directly (no temp dirs)
os . environ [ " HERMES_QUIET " ] = " 1 "
2026-02-12 10:05:08 -08:00
# Enable interactive exec approval for dangerous commands on messaging platforms
os . environ [ " HERMES_EXEC_ASK " ] = " 1 "
2026-03-08 01:33:46 -08:00
# Set terminal working directory for messaging platforms.
# If the user set an explicit path in config.yaml (not "." or "auto"),
# respect it. Otherwise use MESSAGING_CWD or default to home directory.
_configured_cwd = os . environ . get ( " TERMINAL_CWD " , " " )
if not _configured_cwd or _configured_cwd in ( " . " , " auto " , " cwd " ) :
messaging_cwd = os . getenv ( " MESSAGING_CWD " ) or str ( Path . home ( ) )
os . environ [ " TERMINAL_CWD " ] = messaging_cwd
2026-02-03 10:46:23 -08:00
2026-02-02 19:01:51 -08:00
from gateway . config import (
Platform ,
GatewayConfig ,
load_gateway_config ,
)
from gateway . session import (
SessionStore ,
SessionSource ,
SessionContext ,
build_session_context ,
build_session_context_prompt ,
2026-03-04 03:34:45 -08:00
build_session_key ,
2026-02-02 19:01:51 -08:00
)
chore: remove ~100 unused imports across 55 files (#3016)
Automated cleanup via pyflakes + autoflake with manual review.
Changes:
- Removed unused stdlib imports (os, sys, json, pathlib.Path, etc.)
- Removed unused typing imports (List, Dict, Any, Optional, Tuple, Set, etc.)
- Removed unused internal imports (hermes_cli.auth, hermes_cli.config, etc.)
- Fixed cli.py: removed 8 shadowed banner imports (imported from hermes_cli.banner
then immediately redefined locally — only build_welcome_banner is actually used)
- Added noqa comments to imports that appear unused but serve a purpose:
- Re-exports (gateway/session.py SessionResetPolicy, tools/terminal_tool.py
is_interrupted/_interrupt_event)
- SDK presence checks in try/except (daytona, fal_client, discord)
- Test mock targets (auxiliary_client.py Path, mcp_config.py get_hermes_home)
Zero behavioral changes. Full test suite passes (6162/6162, 2 pre-existing
streaming test failures unrelated to this change).
2026-03-25 15:02:03 -07:00
from gateway . delivery import DeliveryRouter
2026-02-15 16:10:50 -08:00
from gateway . platforms . base import BasePlatformAdapter , MessageEvent , MessageType
2026-02-02 19:01:51 -08:00
2026-02-21 03:11:11 -08:00
logger = logging . getLogger ( __name__ )
2026-03-19 22:32:37 +03:00
# Sentinel placed into _running_agents immediately when a session starts
# processing, *before* any await. Prevents a second message for the same
# session from bypassing the "already running" guard during the async gap
# between the guard check and actual agent creation.
_AGENT_PENDING_SENTINEL = object ( )
2026-02-02 19:01:51 -08:00
2026-02-25 18:20:38 -08:00
def _resolve_runtime_agent_kwargs ( ) - > dict :
""" Resolve provider credentials for gateway-created AIAgent instances. """
from hermes_cli . runtime_provider import (
resolve_runtime_provider ,
format_runtime_provider_error ,
)
try :
runtime = resolve_runtime_provider (
requested = os . getenv ( " HERMES_INFERENCE_PROVIDER " ) ,
)
except Exception as exc :
raise RuntimeError ( format_runtime_provider_error ( exc ) ) from exc
return {
" api_key " : runtime . get ( " api_key " ) ,
" base_url " : runtime . get ( " base_url " ) ,
" provider " : runtime . get ( " provider " ) ,
" api_mode " : runtime . get ( " api_mode " ) ,
2026-03-17 23:40:22 -07:00
" command " : runtime . get ( " command " ) ,
" args " : list ( runtime . get ( " args " ) or [ ] ) ,
2026-02-25 18:20:38 -08:00
}
2026-03-26 13:39:41 -07:00
def _platform_config_key ( platform : " Platform " ) - > str :
""" Map a Platform enum to its config.yaml key (LOCAL→ " cli " , rest→enum value). """
return " cli " if platform == Platform . LOCAL else platform . value
def _load_gateway_config ( ) - > dict :
""" Load and parse ~/.hermes/config.yaml, returning {} on any error. """
try :
config_path = _hermes_home / ' config.yaml '
if config_path . exists ( ) :
import yaml
with open ( config_path , ' r ' , encoding = ' utf-8 ' ) as f :
return yaml . safe_load ( f ) or { }
except Exception :
logger . debug ( " Could not load gateway config from %s " , _hermes_home / ' config.yaml ' )
return { }
def _resolve_gateway_model ( config : dict | None = None ) - > str :
2026-03-11 00:09:37 +01:00
""" Read model from env/config — mirrors the resolution in _run_agent_sync.
Without this , temporary AIAgent instances ( memory flush , / compress ) fall
back to the hardcoded default ( " anthropic/claude-opus-4.6 " ) which fails
when the active provider is openai - codex .
"""
model = os . getenv ( " HERMES_MODEL " ) or os . getenv ( " LLM_MODEL " ) or " anthropic/claude-opus-4.6 "
2026-03-26 13:39:41 -07:00
cfg = config if config is not None else _load_gateway_config ( )
model_cfg = cfg . get ( " model " , { } )
if isinstance ( model_cfg , str ) :
model = model_cfg
elif isinstance ( model_cfg , dict ) :
model = model_cfg . get ( " default " , model )
2026-03-11 00:09:37 +01:00
return model
2026-03-12 18:02:21 +03:00
def _resolve_hermes_bin ( ) - > Optional [ list [ str ] ] :
""" Resolve the Hermes update command as argv parts.
Tries in order :
1. ` ` shutil . which ( " hermes " ) ` ` — standard PATH lookup
2. ` ` sys . executable - m hermes_cli . main ` ` — fallback when Hermes is running
from a venv / module invocation and the ` ` hermes ` ` shim is not on PATH
Returns argv parts ready for quoting / joining , or ` ` None ` ` if neither works .
"""
import shutil
hermes_bin = shutil . which ( " hermes " )
if hermes_bin :
return [ hermes_bin ]
try :
import importlib . util
if importlib . util . find_spec ( " hermes_cli " ) is not None :
return [ sys . executable , " -m " , " hermes_cli.main " ]
except Exception :
pass
return None
2026-02-02 19:01:51 -08:00
class GatewayRunner :
"""
Main gateway controller .
Manages the lifecycle of all platform adapters and routes
messages to / from the agent .
"""
def __init__ ( self , config : Optional [ GatewayConfig ] = None ) :
self . config = config or load_gateway_config ( )
self . adapters : Dict [ Platform , BasePlatformAdapter ] = { }
Add background process management with process tool, wait, PTY, and stdin support
New process registry and tool for managing long-running background processes
across all terminal backends (local, Docker, Singularity, Modal, SSH).
Process Registry (tools/process_registry.py):
- ProcessSession tracking with rolling 200KB output buffer
- spawn_local() with optional PTY via ptyprocess for interactive CLIs
- spawn_via_env() for non-local backends (runs inside sandbox, never on host)
- Background reader threads per process (Popen stdout or PTY)
- wait() with timeout clamping, interrupt support, and transparent limit reporting
- JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery
- Module-level singleton shared across agent loop, gateway, and RL
Process Tool (model_tools.py):
- 7 actions: list, poll, log, wait, kill, write, submit
- Paired with terminal in all toolsets (CLI, messaging, RL)
- Timeout clamping with transparent notes in response
Terminal Tool Updates (tools/terminal_tool.py):
- Replaced nohup background mode with registry spawn (returns session_id)
- Added workdir parameter for per-command working directory
- Added check_interval parameter for gateway auto-check watchers
- Added pty parameter for interactive CLI tools (Codex, Claude Code)
- Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs
- Cleanup thread now respects active background processes (won't reap sandbox)
Gateway Integration (gateway/run.py, session.py, config.py):
- Session reset protection: sessions with active processes exempt from reset
- Default idle timeout increased from 2 hours to 24 hours
- from_dict fallback aligned to match (was 120, now 1440)
- session_key env var propagated to process registry for session mapping
- Crash recovery on gateway startup via checkpoint probe
- check_interval watcher: asyncio task polls process, delivers updates to platform
RL Safety (environments/):
- tool_context.py cleanup() kills background processes on episode end
- hermes_base_env.py warns when enabled_toolsets is None (loads all tools)
- Process tool safe in RL via wait() blocking the agent loop
Also:
- Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all])
- Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP
- Updated AGENTS.md with process management docs and project structure
- Updated README.md terminal section with process management overview
2026-02-17 02:51:31 -08:00
2026-02-23 23:55:42 -08:00
# Load ephemeral config from config.yaml / env vars.
# Both are injected at API-call time only and never persisted.
self . _prefill_messages = self . _load_prefill_messages ( )
self . _ephemeral_system_prompt = self . _load_ephemeral_system_prompt ( )
2026-02-24 03:30:19 -08:00
self . _reasoning_config = self . _load_reasoning_config ( )
fix: /reasoning command — add gateway support, fix display, persist settings (#1031)
* fix: /reasoning command output ordering, display, and inline think extraction
Three issues with the /reasoning command:
1. Output interleaving: The command echo used print() while feedback
used _cprint(), causing them to render out-of-order under
prompt_toolkit's patch_stdout. Changed echo to use _cprint() so
all output renders through the same path in correct order.
2. Reasoning display not working: /reasoning show toggled a flag
but reasoning never appeared for models that embed thinking in
inline <think> blocks rather than structured API fields. Added
fallback extraction in _build_assistant_message to capture
<think> block content as reasoning when no structured reasoning
fields (reasoning, reasoning_content, reasoning_details) are
present. This feeds into both the reasoning callback (during
tool loops) and the post-response reasoning box display.
3. Feedback clarity: Added checkmarks to confirm actions, persisted
show/hide to config (was session-only before), and aligned the
status display for readability.
Tests: 7 new tests for inline think block extraction (41 total).
* feat: add /reasoning command to gateway (Telegram/Discord/etc)
The /reasoning command only existed in the CLI — messaging platforms
had no way to view or change reasoning settings. This adds:
1. /reasoning command handler in the gateway:
- No args: shows current effort level and display state
- /reasoning <level>: sets reasoning effort (none/low/medium/high/xhigh)
- /reasoning show|hide: toggles reasoning display in responses
- All changes saved to config.yaml immediately
2. Reasoning display in gateway responses:
- When show_reasoning is enabled, prepends a 'Reasoning' block
with the model's last_reasoning content before the response
- Collapses long reasoning (>15 lines) to keep messages readable
- Uses last_reasoning from run_conversation result dict
3. Plumbing:
- Added _show_reasoning attribute loaded from config at startup
- Propagated last_reasoning through _run_agent return dict
- Added /reasoning to help text and known_commands set
- Uses getattr for _show_reasoning to handle test stubs
2026-03-12 05:38:19 -07:00
self . _show_reasoning = self . _load_show_reasoning ( )
2026-03-01 18:24:27 -08:00
self . _provider_routing = self . _load_provider_routing ( )
feat: simple fallback model for provider resilience
When the primary model/provider fails after retries (rate limit, overload,
auth errors, connection failures), Hermes automatically switches to a
configured fallback model for the remainder of the session.
Config (in ~/.hermes/config.yaml):
fallback_model:
provider: openrouter
model: anthropic/claude-sonnet-4
Supports all major providers: OpenRouter, OpenAI, Nous, DeepSeek, Together,
Groq, Fireworks, Mistral, Gemini — plus custom endpoints via base_url and
api_key_env overrides.
Design principles:
- Dead simple: one fallback model, not a chain
- One-shot: switches once, doesn't ping-pong back
- Zero new dependencies: uses existing OpenAI client
- Minimal code: ~100 lines in run_agent.py, ~5 lines in cli.py/gateway
- Three trigger points: max retries exhausted, non-retryable client errors,
and invalid response exhaustion
Does NOT trigger on context overflow or payload-too-large errors (those
are handled by the existing compression system).
Addresses #737.
25 new tests, 2492 total passing.
2026-03-08 20:22:33 -07:00
self . _fallback_model = self . _load_fallback_model ( )
fix: hermes update causes dual gateways on macOS (launchd) (#1567)
* feat: add optional smart model routing
Add a conservative cheap-vs-strong routing option that can send very short/simple turns to a cheaper model across providers while keeping the primary model for complex work. Wire it through CLI, gateway, and cron, and document the config.yaml workflow.
* fix(gateway): remove recursive ExecStop from systemd units, extend TimeoutStopSec to 60s
* fix(gateway): avoid recursive ExecStop in user systemd unit
* fix: extend ExecStop removal and TimeoutStopSec=60 to system unit
The cherry-picked PR #1448 fix only covered the user systemd unit.
The system unit had the same TimeoutStopSec=15 and could benefit
from the same 60s timeout for clean shutdown. Also adds a regression
test for the system unit.
---------
Co-authored-by: Ninja <ninja@local>
* feat(skills): add blender-mcp optional skill for 3D modeling
Control a running Blender instance from Hermes via socket connection
to the blender-mcp addon (port 9876). Supports creating 3D objects,
materials, animations, and running arbitrary bpy code.
Placed in optional-skills/ since it requires Blender 4.3+ desktop
with a third-party addon manually started each session.
* feat(acp): support slash commands in ACP adapter (#1532)
Adds /help, /model, /tools, /context, /reset, /compact, /version
to the ACP adapter (VS Code, Zed, JetBrains). Commands are handled
directly in the server without instantiating the TUI — each command
queries agent/session state and returns plain text.
Unrecognized /commands fall through to the LLM as normal messages.
/model uses detect_provider_for_model() for auto-detection when
switching models, matching the CLI and gateway behavior.
Fixes #1402
* fix(logging): improve error logging in session search tool (#1533)
* fix(gateway): restart on retryable startup failures (#1517)
* feat(email): add skip_attachments option via config.yaml
* feat(email): add skip_attachments option via config.yaml
Adds a config.yaml-driven option to skip email attachments in the
gateway email adapter. Useful for malware protection and bandwidth
savings.
Configure in config.yaml:
platforms:
email:
skip_attachments: true
Based on PR #1521 by @an420eth, changed from env var to config.yaml
(via PlatformConfig.extra) to match the project's config-first pattern.
* docs: document skip_attachments option for email adapter
* fix(telegram): retry on transient TLS failures during connect and send
Add exponential-backoff retry (3 attempts) around initialize() to
handle transient TLS resets during gateway startup. Also catches
TimedOut and OSError in addition to NetworkError.
Add exponential-backoff retry (3 attempts) around send_message() for
NetworkError during message delivery, wrapping the existing Markdown
fallback logic.
Both imports are guarded with try/except ImportError for test
environments where telegram is mocked.
Based on PR #1527 by cmd8. Closes #1526.
* feat: permissive block_anchor thresholds and unicode normalization (#1539)
Salvaged from PR #1528 by an420eth. Closes #517.
Improves _strategy_block_anchor in fuzzy_match.py:
- Add unicode normalization (smart quotes, em/en-dashes, ellipsis,
non-breaking spaces → ASCII) so LLM-produced unicode artifacts
don't break anchor line matching
- Lower thresholds: 0.10 for unique matches (was 0.70), 0.30 for
multiple candidates — if first/last lines match exactly, the
block is almost certainly correct
- Use original (non-normalized) content for offset calculation to
preserve correct character positions
Tested: 3 new scenarios fixed (em-dash anchors, non-breaking space
anchors, very-low-similarity unique matches), zero regressions on
all 9 existing fuzzy match tests.
Co-authored-by: an420eth <an420eth@users.noreply.github.com>
* feat(cli): add file path autocomplete in the input prompt (#1545)
When typing a path-like token (./ ../ ~/ / or containing /),
the CLI now shows filesystem completions in the dropdown menu.
Directories show a trailing slash and 'dir' label; files show
their size. Completions are case-insensitive and capped at 30
entries.
Triggered by tokens like:
edit ./src/ma → shows ./src/main.py, ./src/manifest.json, ...
check ~/doc → shows ~/docs/, ~/documents/, ...
read /etc/hos → shows /etc/hosts, /etc/hostname, ...
open tools/reg → shows tools/registry.py
Slash command autocomplete (/help, /model, etc.) is unaffected —
it still triggers when the input starts with /.
Inspired by OpenCode PR #145 (file path completion menu).
Implementation:
- hermes_cli/commands.py: _extract_path_word() detects path-like
tokens, _path_completions() yields filesystem Completions with
size labels, get_completions() routes to paths vs slash commands
- tests/hermes_cli/test_path_completion.py: 26 tests covering
path extraction, prefix filtering, directory markers, home
expansion, case-insensitivity, integration with slash commands
* feat(privacy): redact PII from LLM context when privacy.redact_pii is enabled
Add privacy.redact_pii config option (boolean, default false). When
enabled, the gateway redacts personally identifiable information from
the system prompt before sending it to the LLM provider:
- Phone numbers (user IDs on WhatsApp/Signal) → hashed to user_<sha256>
- User IDs → hashed to user_<sha256>
- Chat IDs → numeric portion hashed, platform prefix preserved
- Home channel IDs → hashed
- Names/usernames → NOT affected (user-chosen, publicly visible)
Hashes are deterministic (same user → same hash) so the model can
still distinguish users in group chats. Routing and delivery use
the original values internally — redaction only affects LLM context.
Inspired by OpenClaw PR #47959.
* fix(privacy): skip PII redaction on Discord/Slack (mentions need real IDs)
Discord uses <@user_id> for mentions and Slack uses <@U12345> — the LLM
needs the real ID to tag users. Redaction now only applies to WhatsApp,
Signal, and Telegram where IDs are pure routing metadata.
Add 4 platform-specific tests covering Discord, WhatsApp, Signal, Slack.
* feat: smart approvals + /stop command (inspired by OpenAI Codex)
* feat: smart approvals — LLM-based risk assessment for dangerous commands
Adds a 'smart' approval mode that uses the auxiliary LLM to assess
whether a flagged command is genuinely dangerous or a false positive,
auto-approving low-risk commands without prompting the user.
Inspired by OpenAI Codex's Smart Approvals guardian subagent
(openai/codex#13860).
Config (config.yaml):
approvals:
mode: manual # manual (default), smart, off
Modes:
- manual — current behavior, always prompt the user
- smart — aux LLM evaluates risk: APPROVE (auto-allow), DENY (block),
or ESCALATE (fall through to manual prompt)
- off — skip all approval prompts (equivalent to --yolo)
When smart mode auto-approves, the pattern gets session-level approval
so subsequent uses of the same pattern don't trigger another LLM call.
When it denies, the command is blocked without user prompt. When
uncertain, it escalates to the normal manual approval flow.
The LLM prompt is carefully scoped: it sees only the command text and
the flagged reason, assesses actual risk vs false positive, and returns
a single-word verdict.
* feat: make smart approval model configurable via config.yaml
Adds auxiliary.approval section to config.yaml with the same
provider/model/base_url/api_key pattern as other aux tasks (vision,
web_extract, compression, etc.).
Config:
auxiliary:
approval:
provider: auto
model: '' # fast/cheap model recommended
base_url: ''
api_key: ''
Bridged to env vars in both CLI and gateway paths so the aux client
picks them up automatically.
* feat: add /stop command to kill all background processes
Adds a /stop slash command that kills all running background processes
at once. Currently users have to process(list) then process(kill) for
each one individually.
Inspired by OpenAI Codex's separation of interrupt (Ctrl+C stops current
turn) from /stop (cleans up background processes). See openai/codex#14602.
Ctrl+C continues to only interrupt the active agent turn — background
dev servers, watchers, etc. are preserved. /stop is the explicit way
to clean them all up.
* feat: first-class plugin architecture + hide status bar cost by default (#1544)
The persistent status bar now shows context %, token counts, and
duration but NOT $ cost by default. Cost display is opt-in via:
display:
show_cost: true
in config.yaml, or: hermes config set display.show_cost true
The /usage command still shows full cost breakdown since the user
explicitly asked for it — this only affects the always-visible bar.
Status bar without cost:
⚕ claude-sonnet-4 │ 12K/200K │ 6% │ 15m
Status bar with show_cost: true:
⚕ claude-sonnet-4 │ 12K/200K │ 6% │ $0.06 │ 15m
* feat: improve memory prioritization + aggressive skill updates (inspired by OpenAI Codex)
* feat: improve memory prioritization — user preferences over procedural knowledge
Inspired by OpenAI Codex's memory prompt improvements (openai/codex#14493)
which focus memory writes on user preferences and recurring patterns
rather than procedural task details.
Key insight: 'Optimize for reducing future user steering — the most
valuable memory prevents the user from having to repeat themselves.'
Changes:
- MEMORY_GUIDANCE (prompt_builder.py): added prioritization hierarchy
and the core principle about reducing user steering
- MEMORY_SCHEMA (memory_tool.py): reordered WHEN TO SAVE list to put
corrections first, added explicit PRIORITY guidance
- Memory nudge (run_agent.py): now asks specifically about preferences,
corrections, and workflow patterns instead of generic 'anything'
- Memory flush (run_agent.py): now instructs to prioritize user
preferences and corrections over task-specific details
* feat: more aggressive skill creation and update prompting
Press harder on skill updates — the agent should proactively patch
skills when it encounters issues during use, not wait to be asked.
Changes:
- SKILLS_GUIDANCE: 'consider saving' → 'save'; added explicit instruction
to patch skills immediately when found outdated/wrong
- Skills header: added instruction to update loaded skills before finishing
if they had missing steps or wrong commands
- Skill nudge: more assertive ('save the approach' not 'consider saving'),
now also prompts for updating existing skills used in the task
- Skill nudge interval: lowered default from 15 to 10 iterations
- skill_manage schema: added 'patch it immediately' to update triggers
* feat: first-class plugin architecture (#1555)
Plugin system for extending Hermes with custom tools, hooks, and
integrations — no source code changes required.
Core system (hermes_cli/plugins.py):
- Plugin discovery from ~/.hermes/plugins/, .hermes/plugins/, and
pip entry_points (hermes_agent.plugins group)
- PluginContext with register_tool() and register_hook()
- 6 lifecycle hooks: pre/post tool_call, pre/post llm_call,
on_session_start/end
- Namespace package handling for relative imports in plugins
- Graceful error isolation — broken plugins never crash the agent
Integration (model_tools.py):
- Plugin discovery runs after built-in + MCP tools
- Plugin tools bypass toolset filter via get_plugin_tool_names()
- Pre/post tool call hooks fire in handle_function_call()
CLI:
- /plugins command shows loaded plugins, tool counts, status
- Added to COMMANDS dict for autocomplete
Docs:
- Getting started guide (build-a-hermes-plugin.md) — full tutorial
building a calculator plugin step by step
- Reference page (features/plugins.md) — quick overview + tables
- Covers: file structure, schemas, handlers, hooks, data files,
bundled skills, env var gating, pip distribution, common mistakes
Tests: 16 tests covering discovery, loading, hooks, tool visibility.
* fix: hermes update causes dual gateways on macOS (launchd)
Three bugs worked together to create the dual-gateway problem:
1. cmd_update only checked systemd for gateway restart, completely
ignoring launchd on macOS. After killing the PID it would print
'Restart it with: hermes gateway run' even when launchd was about
to auto-respawn the process.
2. launchd's KeepAlive.SuccessfulExit=false respawns the gateway
after SIGTERM (non-zero exit), so the user's manual restart
created a second instance.
3. The launchd plist lacked --replace (systemd had it), so the
respawned gateway didn't kill stale instances on startup.
Fixes:
- Add --replace to launchd ProgramArguments (matches systemd)
- Add launchd detection to cmd_update's auto-restart logic
- Print 'auto-restart via launchd' instead of manual restart hint
* fix: add launchd plist auto-refresh + explicit restart in cmd_update
Two integration issues with the initial fix:
1. Existing macOS users with old plist (no --replace) would never
get the fix until manual uninstall/reinstall. Added
refresh_launchd_plist_if_needed() — mirrors the existing
refresh_systemd_unit_if_needed(). Called from launchd_start(),
launchd_restart(), and cmd_update.
2. cmd_update relied on KeepAlive respawn after SIGTERM rather than
explicit launchctl stop/start. This caused races: launchd would
respawn the old process before the PID file was cleaned up.
Now does explicit stop+start (matching how systemd gets an
explicit systemctl restart), with plist refresh first so the
new --replace flag is picked up.
---------
Co-authored-by: Ninja <ninja@local>
Co-authored-by: alireza78a <alireza78a@users.noreply.github.com>
Co-authored-by: Oktay Aydin <113846926+aydnOktay@users.noreply.github.com>
Co-authored-by: JP Lew <polydegen@protonmail.com>
Co-authored-by: an420eth <an420eth@users.noreply.github.com>
2026-03-16 12:36:29 -07:00
self . _smart_model_routing = self . _load_smart_model_routing ( )
2026-02-23 23:55:42 -08:00
Add background process management with process tool, wait, PTY, and stdin support
New process registry and tool for managing long-running background processes
across all terminal backends (local, Docker, Singularity, Modal, SSH).
Process Registry (tools/process_registry.py):
- ProcessSession tracking with rolling 200KB output buffer
- spawn_local() with optional PTY via ptyprocess for interactive CLIs
- spawn_via_env() for non-local backends (runs inside sandbox, never on host)
- Background reader threads per process (Popen stdout or PTY)
- wait() with timeout clamping, interrupt support, and transparent limit reporting
- JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery
- Module-level singleton shared across agent loop, gateway, and RL
Process Tool (model_tools.py):
- 7 actions: list, poll, log, wait, kill, write, submit
- Paired with terminal in all toolsets (CLI, messaging, RL)
- Timeout clamping with transparent notes in response
Terminal Tool Updates (tools/terminal_tool.py):
- Replaced nohup background mode with registry spawn (returns session_id)
- Added workdir parameter for per-command working directory
- Added check_interval parameter for gateway auto-check watchers
- Added pty parameter for interactive CLI tools (Codex, Claude Code)
- Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs
- Cleanup thread now respects active background processes (won't reap sandbox)
Gateway Integration (gateway/run.py, session.py, config.py):
- Session reset protection: sessions with active processes exempt from reset
- Default idle timeout increased from 2 hours to 24 hours
- from_dict fallback aligned to match (was 120, now 1440)
- session_key env var propagated to process registry for session mapping
- Crash recovery on gateway startup via checkpoint probe
- check_interval watcher: asyncio task polls process, delivers updates to platform
RL Safety (environments/):
- tool_context.py cleanup() kills background processes on episode end
- hermes_base_env.py warns when enabled_toolsets is None (loads all tools)
- Process tool safe in RL via wait() blocking the agent loop
Also:
- Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all])
- Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP
- Updated AGENTS.md with process management docs and project structure
- Updated README.md terminal section with process management overview
2026-02-17 02:51:31 -08:00
# Wire process registry into session store for reset protection
from tools . process_registry import process_registry
self . session_store = SessionStore (
self . config . sessions_dir , self . config ,
has_active_processes_fn = lambda key : process_registry . has_active_for_session ( key ) ,
)
2026-02-02 19:01:51 -08:00
self . delivery_router = DeliveryRouter ( self . config )
self . _running = False
self . _shutdown_event = asyncio . Event ( )
2026-03-14 18:03:50 -07:00
self . _exit_cleanly = False
2026-03-21 09:38:52 -07:00
self . _exit_with_failure = False
2026-03-14 18:03:50 -07:00
self . _exit_reason : Optional [ str ] = None
2026-02-03 16:15:49 -08:00
# Track running agents per session for interrupt support
# Key: session_key, Value: AIAgent instance
self . _running_agents : Dict [ str , Any ] = { }
self . _pending_messages : Dict [ str , str ] = { } # Queued messages during interrupt
fix(gateway): /model shows active fallback model instead of config default (#1615)
* fix: prevent infinite 400 failure loop on context overflow (#1630)
When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message. This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error. Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.
Three-layer fix:
1. run_agent.py — Fallback heuristic: when a 400 error has a very short
generic message AND the session is large (>40% of context or >80
messages), treat it as a probable context overflow and trigger
compression instead of aborting.
2. run_agent.py + gateway/run.py — Don't persist failed messages:
when the agent returns failed=True before generating any response,
skip writing the user's message to the transcript/DB. This prevents
the session from growing on each failure.
3. gateway/run.py — Smarter error messages: detect context-overflow
failures and suggest /compact or /reset specifically, instead of a
generic 'try again' that will fail identically.
* fix(skills): detect prompt injection patterns and block cache file reads
Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):
1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
(index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
was the original injection vector — untrusted skill descriptions
in the catalog contained adversarial text that the model executed.
2. skill_view: warns when skills are loaded from outside the trusted
~/.hermes/skills/ directory, and detects common injection patterns
in skill content ("ignore previous instructions", "<system>", etc.).
Cherry-picked from PR #1562 by ygd58.
* fix(tools): chunk long messages in send_message_tool before dispatch (#1552)
Long messages sent via send_message tool or cron delivery silently
failed when exceeding platform limits. Gateway adapters handle this
via truncate_message(), but the standalone senders in send_message_tool
bypassed that entirely.
- Apply truncate_message() chunking in _send_to_platform() before
dispatching to individual platform senders
- Remove naive message[i:i+2000] character split in _send_discord()
in favor of centralized smart splitting
- Attach media files to last chunk only for Telegram
- Add regression tests for chunking and media placement
Cherry-picked from PR #1557 by llbn.
* fix(approval): show full command in dangerous command approval (#1553)
Previously the command was truncated to 80 chars in CLI (with a
[v]iew full option), 500 chars in Discord embeds, and missing entirely
in Telegram/Slack approval messages. Now the full command is always
displayed everywhere:
- CLI: removed 80-char truncation and [v]iew full menu option
- Gateway (TG/Slack): approval_required message includes full command
in a code block
- Discord: embed shows full command up to 4096-char limit
- Windows: skip SIGALRM-based test timeout (Unix-only)
- Updated tests: replaced view-flow tests with direct approval tests
Cherry-picked from PR #1566 by crazywriter1.
* fix(cli): flush stdout during agent loop to prevent macOS display freeze (#1624)
The interrupt polling loop in chat() waited on the queue without
invalidating the prompt_toolkit renderer. On macOS, the StdoutProxy
buffer only flushed on input events, causing the CLI to appear frozen
during tool execution until the user typed a key.
Fix: call _invalidate() on each queue timeout (every ~100ms, throttled
to 150ms) to force the renderer to flush buffered agent output.
* fix(claw): warn when API keys are skipped during OpenClaw migration (#1580)
When --migrate-secrets is not passed (the default), API keys like
OPENROUTER_API_KEY are silently skipped with no warning. Users don't
realize their keys weren't migrated until the agent fails to connect.
Add a post-migration warning with actionable instructions: either
re-run with --migrate-secrets or add the key manually via
hermes config set.
Cherry-picked from PR #1593 by ygd58.
* fix(security): block sandbox backend creds from subprocess env (#1264)
Add Modal and Daytona sandbox credentials to the subprocess env
blocklist so they're not leaked to agent terminal sessions via
printenv/env.
Cherry-picked from PR #1571 by ygd58.
* fix(gateway): cap interrupt recursion depth to prevent resource exhaustion (#816)
When a user sends multiple messages while the agent keeps failing,
_run_agent() calls itself recursively with no depth limit. This can
exhaust stack/memory if the agent is in a failure loop.
Add _MAX_INTERRUPT_DEPTH = 3. When exceeded, the pending message is
logged and the current result is returned instead of recursing deeper.
The log handler duplication bug described in #816 was already fixed
separately (AIAgent.__init__ deduplicates handlers).
* fix(gateway): /model shows active fallback model instead of config default (#1615)
When the agent falls back to a different model (e.g. due to rate
limiting), /model still showed the config default. Now tracks the
effective model/provider after each agent run and displays it.
Cleared when the primary model succeeds again or the user explicitly
switches via /model.
Cherry-picked from PR #1616 by MaxKerkula. Added hasattr guard for
test compatibility.
---------
Co-authored-by: buray <ygd58@users.noreply.github.com>
Co-authored-by: lbn <llbn@users.noreply.github.com>
Co-authored-by: crazywriter1 <53251494+crazywriter1@users.noreply.github.com>
Co-authored-by: Max K <MaxKerkula@users.noreply.github.com>
2026-03-17 02:26:51 -07:00
feat(gateway): cache AIAgent per session for prompt caching
The gateway created a fresh AIAgent per message, rebuilding the system
prompt (including memory, skills, context files) every turn. This broke
prompt prefix caching — providers like Anthropic charge ~10x more for
uncached prefixes.
Now caches AIAgent instances per session_key with a config signature.
The cached agent is reused across messages in the same session,
preserving the frozen system prompt and tool schemas. Cache is
invalidated when:
- Config changes (model, provider, toolsets, reasoning, ephemeral
prompt) — detected via signature mismatch
- /new, /reset, /clear — explicit session reset
- /model — global model change clears all cached agents
- /reasoning — global reasoning change clears all cached agents
Per-message state (callbacks, stream consumers, progress queues) is
set on the agent instance before each run_conversation() call.
This matches CLI behavior where a single AIAgent lives across all turns
in a session, with _cached_system_prompt built once and reused.
2026-03-21 13:07:08 -07:00
# Cache AIAgent instances per session to preserve prompt caching.
# Without this, a new AIAgent is created per message, rebuilding the
# system prompt (including memory) every turn — breaking prefix cache
# and costing ~10x more on providers with prompt caching (Anthropic).
# Key: session_key, Value: (AIAgent, config_signature_str)
import threading as _threading
self . _agent_cache : Dict [ str , tuple ] = { }
self . _agent_cache_lock = _threading . Lock ( )
fix(gateway): /model shows active fallback model instead of config default (#1615)
* fix: prevent infinite 400 failure loop on context overflow (#1630)
When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message. This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error. Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.
Three-layer fix:
1. run_agent.py — Fallback heuristic: when a 400 error has a very short
generic message AND the session is large (>40% of context or >80
messages), treat it as a probable context overflow and trigger
compression instead of aborting.
2. run_agent.py + gateway/run.py — Don't persist failed messages:
when the agent returns failed=True before generating any response,
skip writing the user's message to the transcript/DB. This prevents
the session from growing on each failure.
3. gateway/run.py — Smarter error messages: detect context-overflow
failures and suggest /compact or /reset specifically, instead of a
generic 'try again' that will fail identically.
* fix(skills): detect prompt injection patterns and block cache file reads
Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):
1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
(index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
was the original injection vector — untrusted skill descriptions
in the catalog contained adversarial text that the model executed.
2. skill_view: warns when skills are loaded from outside the trusted
~/.hermes/skills/ directory, and detects common injection patterns
in skill content ("ignore previous instructions", "<system>", etc.).
Cherry-picked from PR #1562 by ygd58.
* fix(tools): chunk long messages in send_message_tool before dispatch (#1552)
Long messages sent via send_message tool or cron delivery silently
failed when exceeding platform limits. Gateway adapters handle this
via truncate_message(), but the standalone senders in send_message_tool
bypassed that entirely.
- Apply truncate_message() chunking in _send_to_platform() before
dispatching to individual platform senders
- Remove naive message[i:i+2000] character split in _send_discord()
in favor of centralized smart splitting
- Attach media files to last chunk only for Telegram
- Add regression tests for chunking and media placement
Cherry-picked from PR #1557 by llbn.
* fix(approval): show full command in dangerous command approval (#1553)
Previously the command was truncated to 80 chars in CLI (with a
[v]iew full option), 500 chars in Discord embeds, and missing entirely
in Telegram/Slack approval messages. Now the full command is always
displayed everywhere:
- CLI: removed 80-char truncation and [v]iew full menu option
- Gateway (TG/Slack): approval_required message includes full command
in a code block
- Discord: embed shows full command up to 4096-char limit
- Windows: skip SIGALRM-based test timeout (Unix-only)
- Updated tests: replaced view-flow tests with direct approval tests
Cherry-picked from PR #1566 by crazywriter1.
* fix(cli): flush stdout during agent loop to prevent macOS display freeze (#1624)
The interrupt polling loop in chat() waited on the queue without
invalidating the prompt_toolkit renderer. On macOS, the StdoutProxy
buffer only flushed on input events, causing the CLI to appear frozen
during tool execution until the user typed a key.
Fix: call _invalidate() on each queue timeout (every ~100ms, throttled
to 150ms) to force the renderer to flush buffered agent output.
* fix(claw): warn when API keys are skipped during OpenClaw migration (#1580)
When --migrate-secrets is not passed (the default), API keys like
OPENROUTER_API_KEY are silently skipped with no warning. Users don't
realize their keys weren't migrated until the agent fails to connect.
Add a post-migration warning with actionable instructions: either
re-run with --migrate-secrets or add the key manually via
hermes config set.
Cherry-picked from PR #1593 by ygd58.
* fix(security): block sandbox backend creds from subprocess env (#1264)
Add Modal and Daytona sandbox credentials to the subprocess env
blocklist so they're not leaked to agent terminal sessions via
printenv/env.
Cherry-picked from PR #1571 by ygd58.
* fix(gateway): cap interrupt recursion depth to prevent resource exhaustion (#816)
When a user sends multiple messages while the agent keeps failing,
_run_agent() calls itself recursively with no depth limit. This can
exhaust stack/memory if the agent is in a failure loop.
Add _MAX_INTERRUPT_DEPTH = 3. When exceeded, the pending message is
logged and the current result is returned instead of recursing deeper.
The log handler duplication bug described in #816 was already fixed
separately (AIAgent.__init__ deduplicates handlers).
* fix(gateway): /model shows active fallback model instead of config default (#1615)
When the agent falls back to a different model (e.g. due to rate
limiting), /model still showed the config default. Now tracks the
effective model/provider after each agent run and displays it.
Cleared when the primary model succeeds again or the user explicitly
switches via /model.
Cherry-picked from PR #1616 by MaxKerkula. Added hasattr guard for
test compatibility.
---------
Co-authored-by: buray <ygd58@users.noreply.github.com>
Co-authored-by: lbn <llbn@users.noreply.github.com>
Co-authored-by: crazywriter1 <53251494+crazywriter1@users.noreply.github.com>
Co-authored-by: Max K <MaxKerkula@users.noreply.github.com>
2026-03-17 02:26:51 -07:00
# Track active fallback model/provider when primary is rate-limited.
# Set after an agent run where fallback was activated; cleared when
# the primary model succeeds again or the user switches via /model.
self . _effective_model : Optional [ str ] = None
self . _effective_provider : Optional [ str ] = None
2026-02-12 10:05:08 -08:00
# Track pending exec approvals per session
2026-03-14 00:17:04 -07:00
# Key: session_key, Value: {"command": str, "pattern_key": str, ...}
self . _pending_approvals : Dict [ str , Dict [ str , Any ] ] = { }
2026-03-10 02:06:17 -07:00
2026-03-22 23:48:24 -07:00
# Track platforms that failed to connect for background reconnection.
# Key: Platform enum, Value: {"config": platform_config, "attempts": int, "next_retry": float}
self . _failed_platforms : Dict [ Platform , Dict [ str , Any ] ] = { }
2026-03-10 02:06:17 -07:00
# Persistent Honcho managers keyed by gateway session key.
# This preserves write_frequency="session" semantics across short-lived
# per-message AIAgent instances.
self . _honcho_managers : Dict [ str , Any ] = { }
self . _honcho_configs : Dict [ str , Any ] = { }
feat(security): add tirith pre-exec command scanning
Integrate tirith as a pre-execution security scanner that detects
homograph URLs, pipe-to-interpreter patterns, terminal injection,
zero-width Unicode, and environment variable manipulation — threats
the existing 50-pattern dangerous command detector doesn't cover.
Architecture: gather-then-decide — both tirith and the dangerous
command detector run before any approval prompt, preventing gateway
force=True replay from bypassing one check when only the other was
shown to the user.
New files:
- tools/tirith_security.py: subprocess wrapper with auto-installer,
mandatory cosign provenance verification, non-blocking background
download, disk-persistent failure markers with retryable-cause
tracking (cosign_missing auto-clears when cosign appears on PATH)
- tests/tools/test_tirith_security.py: 62 tests covering exit code
mapping, fail_open, cosign verification, background install,
HERMES_HOME isolation, and failure recovery
- tests/tools/test_command_guards.py: 21 integration tests for the
combined guard orchestration
Modified files:
- tools/approval.py: add check_all_command_guards() orchestrator,
add allow_permanent parameter to prompt_dangerous_approval()
- tools/terminal_tool.py: replace _check_dangerous_command with
consolidated check_all_command_guards
- cli.py: update _approval_callback for allow_permanent kwarg,
call ensure_installed() at startup
- gateway/run.py: iterate pattern_keys list on replay approval,
call ensure_installed() at startup
- hermes_cli/config.py: add security config defaults, split
commented sections for independent fallback
- cli-config.yaml.example: document tirith security config
2026-03-11 14:20:32 +05:30
# Ensure tirith security scanner is available (downloads if needed)
try :
from tools . tirith_security import ensure_installed
2026-03-15 18:07:03 -07:00
ensure_installed ( log_failures = False )
feat(security): add tirith pre-exec command scanning
Integrate tirith as a pre-execution security scanner that detects
homograph URLs, pipe-to-interpreter patterns, terminal injection,
zero-width Unicode, and environment variable manipulation — threats
the existing 50-pattern dangerous command detector doesn't cover.
Architecture: gather-then-decide — both tirith and the dangerous
command detector run before any approval prompt, preventing gateway
force=True replay from bypassing one check when only the other was
shown to the user.
New files:
- tools/tirith_security.py: subprocess wrapper with auto-installer,
mandatory cosign provenance verification, non-blocking background
download, disk-persistent failure markers with retryable-cause
tracking (cosign_missing auto-clears when cosign appears on PATH)
- tests/tools/test_tirith_security.py: 62 tests covering exit code
mapping, fail_open, cosign verification, background install,
HERMES_HOME isolation, and failure recovery
- tests/tools/test_command_guards.py: 21 integration tests for the
combined guard orchestration
Modified files:
- tools/approval.py: add check_all_command_guards() orchestrator,
add allow_permanent parameter to prompt_dangerous_approval()
- tools/terminal_tool.py: replace _check_dangerous_command with
consolidated check_all_command_guards
- cli.py: update _approval_callback for allow_permanent kwarg,
call ensure_installed() at startup
- gateway/run.py: iterate pattern_keys list on replay approval,
call ensure_installed() at startup
- hermes_cli/config.py: add security config defaults, split
commented sections for independent fallback
- cli-config.yaml.example: document tirith security config
2026-03-11 14:20:32 +05:30
except Exception :
pass # Non-fatal — fail-open at scan time if unavailable
Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks
Major feature additions inspired by OpenClaw/ClawdBot integration analysis:
Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)
Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description
Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling
Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads
DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending
Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)
Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications
Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings
Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style
Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.
2026-02-15 21:38:59 -08:00
2026-02-27 00:32:17 -05:00
# Initialize session database for session_search tool support
self . _session_db = None
try :
from hermes_state import SessionDB
self . _session_db = SessionDB ( )
except Exception as e :
logger . debug ( " SQLite session store not available: %s " , e )
Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks
Major feature additions inspired by OpenClaw/ClawdBot integration analysis:
Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)
Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description
Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling
Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads
DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending
Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)
Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications
Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings
Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style
Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.
2026-02-15 21:38:59 -08:00
# DM pairing store for code-based user authorization
from gateway . pairing import PairingStore
self . pairing_store = PairingStore ( )
# Event hook system
from gateway . hooks import HookRegistry
self . hooks = HookRegistry ( )
2026-03-10 02:06:17 -07:00
2026-03-10 22:55:36 +03:00
# Per-chat voice reply mode: "off" | "voice_only" | "all"
self . _voice_mode : Dict [ str , str ] = self . _load_voice_modes ( )
2026-03-26 14:33:48 -07:00
# Track background tasks to prevent garbage collection mid-execution
self . _background_tasks : set = set ( )
2026-03-10 02:06:17 -07:00
def _get_or_create_gateway_honcho ( self , session_key : str ) :
""" Return a persistent Honcho manager/config pair for this gateway session. """
if not hasattr ( self , " _honcho_managers " ) :
self . _honcho_managers = { }
if not hasattr ( self , " _honcho_configs " ) :
self . _honcho_configs = { }
if session_key in self . _honcho_managers :
return self . _honcho_managers [ session_key ] , self . _honcho_configs . get ( session_key )
try :
from honcho_integration . client import HonchoClientConfig , get_honcho_client
from honcho_integration . session import HonchoSessionManager
hcfg = HonchoClientConfig . from_global_config ( )
2026-03-12 16:23:34 -04:00
if not hcfg . enabled or not hcfg . api_key :
2026-03-10 02:06:17 -07:00
return None , hcfg
client = get_honcho_client ( hcfg )
manager = HonchoSessionManager (
honcho = client ,
config = hcfg ,
context_tokens = hcfg . context_tokens ,
)
self . _honcho_managers [ session_key ] = manager
self . _honcho_configs [ session_key ] = hcfg
return manager , hcfg
except Exception as e :
logger . debug ( " Gateway Honcho init failed for %s : %s " , session_key , e )
return None , None
def _shutdown_gateway_honcho ( self , session_key : str ) - > None :
""" Flush and close the persistent Honcho manager for a gateway session. """
managers = getattr ( self , " _honcho_managers " , None )
configs = getattr ( self , " _honcho_configs " , None )
if managers is None or configs is None :
return
manager = managers . pop ( session_key , None )
configs . pop ( session_key , None )
if not manager :
return
try :
manager . shutdown ( )
except Exception as e :
logger . debug ( " Gateway Honcho shutdown failed for %s : %s " , session_key , e )
def _shutdown_all_gateway_honcho ( self ) - > None :
""" Flush and close all persistent Honcho managers. """
managers = getattr ( self , " _honcho_managers " , None )
if not managers :
return
for session_key in list ( managers . keys ( ) ) :
self . _shutdown_gateway_honcho ( session_key )
2026-02-02 19:01:51 -08:00
2026-03-18 03:17:23 -07:00
# -- Setup skill availability ----------------------------------------
def _has_setup_skill ( self ) - > bool :
""" Check if the hermes-agent-setup skill is installed. """
try :
from tools . skill_manager_tool import _find_skill
return _find_skill ( " hermes-agent-setup " ) is not None
except Exception :
return False
2026-03-10 22:55:36 +03:00
# -- Voice mode persistence ------------------------------------------
_VOICE_MODE_PATH = _hermes_home / " gateway_voice_mode.json "
def _load_voice_modes ( self ) - > Dict [ str , str ] :
try :
2026-03-14 06:14:22 -07:00
data = json . loads ( self . _VOICE_MODE_PATH . read_text ( ) )
2026-03-10 22:55:36 +03:00
except ( FileNotFoundError , json . JSONDecodeError , OSError ) :
return { }
2026-03-14 06:14:22 -07:00
if not isinstance ( data , dict ) :
return { }
valid_modes = { " off " , " voice_only " , " all " }
return {
str ( chat_id ) : mode
for chat_id , mode in data . items ( )
if mode in valid_modes
}
2026-03-10 22:55:36 +03:00
def _save_voice_modes ( self ) - > None :
try :
self . _VOICE_MODE_PATH . parent . mkdir ( parents = True , exist_ok = True )
self . _VOICE_MODE_PATH . write_text (
json . dumps ( self . _voice_mode , indent = 2 )
)
except OSError as e :
logger . warning ( " Failed to save voice modes: %s " , e )
2026-03-14 06:14:22 -07:00
def _set_adapter_auto_tts_disabled ( self , adapter , chat_id : str , disabled : bool ) - > None :
""" Update an adapter ' s in-memory auto-TTS suppression set if present. """
disabled_chats = getattr ( adapter , " _auto_tts_disabled_chats " , None )
if not isinstance ( disabled_chats , set ) :
return
if disabled :
disabled_chats . add ( chat_id )
else :
disabled_chats . discard ( chat_id )
def _sync_voice_mode_state_to_adapter ( self , adapter ) - > None :
""" Restore persisted /voice off state into a live platform adapter. """
disabled_chats = getattr ( adapter , " _auto_tts_disabled_chats " , None )
if not isinstance ( disabled_chats , set ) :
return
disabled_chats . clear ( )
disabled_chats . update (
chat_id for chat_id , mode in self . _voice_mode . items ( ) if mode == " off "
)
2026-03-10 22:55:36 +03:00
# -----------------------------------------------------------------
2026-03-16 00:23:47 -07:00
def _flush_memories_for_session (
self ,
old_session_id : str ,
honcho_session_key : Optional [ str ] = None ,
) :
2026-03-07 11:27:50 -08:00
""" Prompt the agent to save memories/skills before context is lost.
Synchronous worker — meant to be called via run_in_executor from
an async context so it doesn ' t block the event loop.
2026-02-26 21:20:50 -08:00
"""
2026-03-23 16:08:38 -07:00
# Skip cron sessions — they run headless with no meaningful user
# conversation to extract memories from.
if old_session_id and old_session_id . startswith ( " cron_ " ) :
logger . debug ( " Skipping memory flush for cron session: %s " , old_session_id )
return
2026-02-26 21:20:50 -08:00
try :
2026-03-07 11:27:50 -08:00
history = self . session_store . load_transcript ( old_session_id )
2026-02-26 21:20:50 -08:00
if not history or len ( history ) < 4 :
return
from run_agent import AIAgent
2026-02-28 21:47:51 -08:00
runtime_kwargs = _resolve_runtime_agent_kwargs ( )
if not runtime_kwargs . get ( " api_key " ) :
2026-02-26 21:20:50 -08:00
return
2026-03-11 00:09:37 +01:00
# Resolve model from config — AIAgent's default is OpenRouter-
# formatted ("anthropic/claude-opus-4.6") which fails when the
# active provider is openai-codex.
model = _resolve_gateway_model ( )
2026-02-26 21:20:50 -08:00
tmp_agent = AIAgent (
2026-02-28 21:47:51 -08:00
* * runtime_kwargs ,
2026-03-11 00:09:37 +01:00
model = model ,
2026-02-26 21:20:50 -08:00
max_iterations = 8 ,
quiet_mode = True ,
enabled_toolsets = [ " memory " , " skills " ] ,
2026-03-07 11:27:50 -08:00
session_id = old_session_id ,
2026-03-16 00:23:47 -07:00
honcho_session_key = honcho_session_key ,
2026-02-26 21:20:50 -08:00
)
2026-03-26 17:40:31 -07:00
# Fully silence the flush agent — quiet_mode only suppresses init
# messages; tool call output still leaks to the terminal through
# _safe_print → _print_fn. Set a no-op to prevent that.
tmp_agent . _print_fn = lambda * a , * * kw : None
2026-02-26 21:20:50 -08:00
# Build conversation history from transcript
msgs = [
{ " role " : m . get ( " role " ) , " content " : m . get ( " content " ) }
for m in history
if m . get ( " role " ) in ( " user " , " assistant " ) and m . get ( " content " )
]
2026-03-23 16:08:38 -07:00
# Read live memory state from disk so the flush agent can see
# what's already saved and avoid overwriting newer entries.
_current_memory = " "
try :
from tools . memory_tool import MEMORY_DIR
for fname , label in [
( " MEMORY.md " , " MEMORY (your personal notes) " ) ,
( " USER.md " , " USER PROFILE (who the user is) " ) ,
] :
fpath = MEMORY_DIR / fname
if fpath . exists ( ) :
content = fpath . read_text ( encoding = " utf-8 " ) . strip ( )
if content :
_current_memory + = f " \n \n ## Current { label } : \n { content } "
except Exception :
pass # Non-fatal — flush still works, just without the guard
2026-02-26 21:20:50 -08:00
# Give the agent a real turn to think about what to save
flush_prompt = (
" [System: This session is about to be automatically reset due to "
" inactivity or a scheduled daily reset. The conversation context "
" will be cleared after this turn. \n \n "
" Review the conversation above and: \n "
" 1. Save any important facts, preferences, or decisions to memory "
" (user profile or your notes) that would be useful in future sessions. \n "
" 2. If you discovered a reusable workflow or solved a non-trivial "
" problem, consider saving it as a skill. \n "
" 3. If nothing is worth saving, that ' s fine — just skip. \n \n "
2026-03-23 16:08:38 -07:00
)
if _current_memory :
flush_prompt + = (
" IMPORTANT — here is the current live state of memory. Other "
" sessions, cron jobs, or the user may have updated it since this "
" conversation ended. Do NOT overwrite or remove entries unless "
" the conversation above reveals something that genuinely "
" supersedes them. Only add new information that is not already "
" captured below. "
f " { _current_memory } \n \n "
)
flush_prompt + = (
2026-02-26 21:20:50 -08:00
" Do NOT respond to the user. Just use the memory and skill_manage "
" tools if needed, then stop.] "
)
tmp_agent . run_conversation (
user_message = flush_prompt ,
conversation_history = msgs ,
2026-03-16 00:23:47 -07:00
sync_honcho = False ,
2026-02-26 21:20:50 -08:00
)
2026-03-07 11:27:50 -08:00
logger . info ( " Pre-reset memory flush completed for session %s " , old_session_id )
feat(honcho): async memory integration with prefetch pipeline and recallMode
Adds full Honcho memory integration to Hermes:
- Session manager with async background writes, memory modes (honcho/hybrid/local),
and dialectic prefetch for first-turn context warming
- Agent integration: prefetch pipeline, tool surface gated by recallMode,
system prompt context injection, SIGTERM/SIGINT flush handlers
- CLI commands: setup, status, mode, tokens, peer, identity, migrate
- recallMode setting (auto | context | tools) for A/B testing retrieval strategies
- Session strategies: per-session, per-repo (git tree root), per-directory, global
- Polymorphic memoryMode config: string shorthand or per-peer object overrides
- 97 tests covering async writes, client config, session resolution, and memory modes
2026-03-09 15:58:22 -04:00
# Flush any queued Honcho writes before the session is dropped
if getattr ( tmp_agent , ' _honcho ' , None ) :
try :
tmp_agent . _honcho . shutdown ( )
except Exception :
pass
2026-02-26 21:20:50 -08:00
except Exception as e :
2026-03-07 11:27:50 -08:00
logger . debug ( " Pre-reset memory flush failed for session %s : %s " , old_session_id , e )
2026-03-16 00:23:47 -07:00
async def _async_flush_memories (
self ,
old_session_id : str ,
honcho_session_key : Optional [ str ] = None ,
) :
2026-03-07 11:27:50 -08:00
""" Run the sync memory flush in a thread pool so it won ' t block the event loop. """
loop = asyncio . get_event_loop ( )
2026-03-16 00:23:47 -07:00
await loop . run_in_executor (
None ,
self . _flush_memories_for_session ,
old_session_id ,
honcho_session_key ,
)
2026-03-14 18:03:50 -07:00
@property
def should_exit_cleanly ( self ) - > bool :
return self . _exit_cleanly
2026-03-21 09:38:52 -07:00
@property
def should_exit_with_failure ( self ) - > bool :
return self . _exit_with_failure
2026-03-14 18:03:50 -07:00
@property
def exit_reason ( self ) - > Optional [ str ] :
return self . _exit_reason
2026-03-16 00:22:23 -07:00
def _session_key_for_source ( self , source : SessionSource ) - > str :
""" Resolve the current session key for a source, honoring gateway config when available. """
if hasattr ( self , " session_store " ) and self . session_store is not None :
try :
session_key = self . session_store . _generate_session_key ( source )
if isinstance ( session_key , str ) and session_key :
return session_key
except Exception :
pass
config = getattr ( self , " config " , None )
return build_session_key (
source ,
group_sessions_per_user = getattr ( config , " group_sessions_per_user " , True ) ,
)
fix: hermes update causes dual gateways on macOS (launchd) (#1567)
* feat: add optional smart model routing
Add a conservative cheap-vs-strong routing option that can send very short/simple turns to a cheaper model across providers while keeping the primary model for complex work. Wire it through CLI, gateway, and cron, and document the config.yaml workflow.
* fix(gateway): remove recursive ExecStop from systemd units, extend TimeoutStopSec to 60s
* fix(gateway): avoid recursive ExecStop in user systemd unit
* fix: extend ExecStop removal and TimeoutStopSec=60 to system unit
The cherry-picked PR #1448 fix only covered the user systemd unit.
The system unit had the same TimeoutStopSec=15 and could benefit
from the same 60s timeout for clean shutdown. Also adds a regression
test for the system unit.
---------
Co-authored-by: Ninja <ninja@local>
* feat(skills): add blender-mcp optional skill for 3D modeling
Control a running Blender instance from Hermes via socket connection
to the blender-mcp addon (port 9876). Supports creating 3D objects,
materials, animations, and running arbitrary bpy code.
Placed in optional-skills/ since it requires Blender 4.3+ desktop
with a third-party addon manually started each session.
* feat(acp): support slash commands in ACP adapter (#1532)
Adds /help, /model, /tools, /context, /reset, /compact, /version
to the ACP adapter (VS Code, Zed, JetBrains). Commands are handled
directly in the server without instantiating the TUI — each command
queries agent/session state and returns plain text.
Unrecognized /commands fall through to the LLM as normal messages.
/model uses detect_provider_for_model() for auto-detection when
switching models, matching the CLI and gateway behavior.
Fixes #1402
* fix(logging): improve error logging in session search tool (#1533)
* fix(gateway): restart on retryable startup failures (#1517)
* feat(email): add skip_attachments option via config.yaml
* feat(email): add skip_attachments option via config.yaml
Adds a config.yaml-driven option to skip email attachments in the
gateway email adapter. Useful for malware protection and bandwidth
savings.
Configure in config.yaml:
platforms:
email:
skip_attachments: true
Based on PR #1521 by @an420eth, changed from env var to config.yaml
(via PlatformConfig.extra) to match the project's config-first pattern.
* docs: document skip_attachments option for email adapter
* fix(telegram): retry on transient TLS failures during connect and send
Add exponential-backoff retry (3 attempts) around initialize() to
handle transient TLS resets during gateway startup. Also catches
TimedOut and OSError in addition to NetworkError.
Add exponential-backoff retry (3 attempts) around send_message() for
NetworkError during message delivery, wrapping the existing Markdown
fallback logic.
Both imports are guarded with try/except ImportError for test
environments where telegram is mocked.
Based on PR #1527 by cmd8. Closes #1526.
* feat: permissive block_anchor thresholds and unicode normalization (#1539)
Salvaged from PR #1528 by an420eth. Closes #517.
Improves _strategy_block_anchor in fuzzy_match.py:
- Add unicode normalization (smart quotes, em/en-dashes, ellipsis,
non-breaking spaces → ASCII) so LLM-produced unicode artifacts
don't break anchor line matching
- Lower thresholds: 0.10 for unique matches (was 0.70), 0.30 for
multiple candidates — if first/last lines match exactly, the
block is almost certainly correct
- Use original (non-normalized) content for offset calculation to
preserve correct character positions
Tested: 3 new scenarios fixed (em-dash anchors, non-breaking space
anchors, very-low-similarity unique matches), zero regressions on
all 9 existing fuzzy match tests.
Co-authored-by: an420eth <an420eth@users.noreply.github.com>
* feat(cli): add file path autocomplete in the input prompt (#1545)
When typing a path-like token (./ ../ ~/ / or containing /),
the CLI now shows filesystem completions in the dropdown menu.
Directories show a trailing slash and 'dir' label; files show
their size. Completions are case-insensitive and capped at 30
entries.
Triggered by tokens like:
edit ./src/ma → shows ./src/main.py, ./src/manifest.json, ...
check ~/doc → shows ~/docs/, ~/documents/, ...
read /etc/hos → shows /etc/hosts, /etc/hostname, ...
open tools/reg → shows tools/registry.py
Slash command autocomplete (/help, /model, etc.) is unaffected —
it still triggers when the input starts with /.
Inspired by OpenCode PR #145 (file path completion menu).
Implementation:
- hermes_cli/commands.py: _extract_path_word() detects path-like
tokens, _path_completions() yields filesystem Completions with
size labels, get_completions() routes to paths vs slash commands
- tests/hermes_cli/test_path_completion.py: 26 tests covering
path extraction, prefix filtering, directory markers, home
expansion, case-insensitivity, integration with slash commands
* feat(privacy): redact PII from LLM context when privacy.redact_pii is enabled
Add privacy.redact_pii config option (boolean, default false). When
enabled, the gateway redacts personally identifiable information from
the system prompt before sending it to the LLM provider:
- Phone numbers (user IDs on WhatsApp/Signal) → hashed to user_<sha256>
- User IDs → hashed to user_<sha256>
- Chat IDs → numeric portion hashed, platform prefix preserved
- Home channel IDs → hashed
- Names/usernames → NOT affected (user-chosen, publicly visible)
Hashes are deterministic (same user → same hash) so the model can
still distinguish users in group chats. Routing and delivery use
the original values internally — redaction only affects LLM context.
Inspired by OpenClaw PR #47959.
* fix(privacy): skip PII redaction on Discord/Slack (mentions need real IDs)
Discord uses <@user_id> for mentions and Slack uses <@U12345> — the LLM
needs the real ID to tag users. Redaction now only applies to WhatsApp,
Signal, and Telegram where IDs are pure routing metadata.
Add 4 platform-specific tests covering Discord, WhatsApp, Signal, Slack.
* feat: smart approvals + /stop command (inspired by OpenAI Codex)
* feat: smart approvals — LLM-based risk assessment for dangerous commands
Adds a 'smart' approval mode that uses the auxiliary LLM to assess
whether a flagged command is genuinely dangerous or a false positive,
auto-approving low-risk commands without prompting the user.
Inspired by OpenAI Codex's Smart Approvals guardian subagent
(openai/codex#13860).
Config (config.yaml):
approvals:
mode: manual # manual (default), smart, off
Modes:
- manual — current behavior, always prompt the user
- smart — aux LLM evaluates risk: APPROVE (auto-allow), DENY (block),
or ESCALATE (fall through to manual prompt)
- off — skip all approval prompts (equivalent to --yolo)
When smart mode auto-approves, the pattern gets session-level approval
so subsequent uses of the same pattern don't trigger another LLM call.
When it denies, the command is blocked without user prompt. When
uncertain, it escalates to the normal manual approval flow.
The LLM prompt is carefully scoped: it sees only the command text and
the flagged reason, assesses actual risk vs false positive, and returns
a single-word verdict.
* feat: make smart approval model configurable via config.yaml
Adds auxiliary.approval section to config.yaml with the same
provider/model/base_url/api_key pattern as other aux tasks (vision,
web_extract, compression, etc.).
Config:
auxiliary:
approval:
provider: auto
model: '' # fast/cheap model recommended
base_url: ''
api_key: ''
Bridged to env vars in both CLI and gateway paths so the aux client
picks them up automatically.
* feat: add /stop command to kill all background processes
Adds a /stop slash command that kills all running background processes
at once. Currently users have to process(list) then process(kill) for
each one individually.
Inspired by OpenAI Codex's separation of interrupt (Ctrl+C stops current
turn) from /stop (cleans up background processes). See openai/codex#14602.
Ctrl+C continues to only interrupt the active agent turn — background
dev servers, watchers, etc. are preserved. /stop is the explicit way
to clean them all up.
* feat: first-class plugin architecture + hide status bar cost by default (#1544)
The persistent status bar now shows context %, token counts, and
duration but NOT $ cost by default. Cost display is opt-in via:
display:
show_cost: true
in config.yaml, or: hermes config set display.show_cost true
The /usage command still shows full cost breakdown since the user
explicitly asked for it — this only affects the always-visible bar.
Status bar without cost:
⚕ claude-sonnet-4 │ 12K/200K │ 6% │ 15m
Status bar with show_cost: true:
⚕ claude-sonnet-4 │ 12K/200K │ 6% │ $0.06 │ 15m
* feat: improve memory prioritization + aggressive skill updates (inspired by OpenAI Codex)
* feat: improve memory prioritization — user preferences over procedural knowledge
Inspired by OpenAI Codex's memory prompt improvements (openai/codex#14493)
which focus memory writes on user preferences and recurring patterns
rather than procedural task details.
Key insight: 'Optimize for reducing future user steering — the most
valuable memory prevents the user from having to repeat themselves.'
Changes:
- MEMORY_GUIDANCE (prompt_builder.py): added prioritization hierarchy
and the core principle about reducing user steering
- MEMORY_SCHEMA (memory_tool.py): reordered WHEN TO SAVE list to put
corrections first, added explicit PRIORITY guidance
- Memory nudge (run_agent.py): now asks specifically about preferences,
corrections, and workflow patterns instead of generic 'anything'
- Memory flush (run_agent.py): now instructs to prioritize user
preferences and corrections over task-specific details
* feat: more aggressive skill creation and update prompting
Press harder on skill updates — the agent should proactively patch
skills when it encounters issues during use, not wait to be asked.
Changes:
- SKILLS_GUIDANCE: 'consider saving' → 'save'; added explicit instruction
to patch skills immediately when found outdated/wrong
- Skills header: added instruction to update loaded skills before finishing
if they had missing steps or wrong commands
- Skill nudge: more assertive ('save the approach' not 'consider saving'),
now also prompts for updating existing skills used in the task
- Skill nudge interval: lowered default from 15 to 10 iterations
- skill_manage schema: added 'patch it immediately' to update triggers
* feat: first-class plugin architecture (#1555)
Plugin system for extending Hermes with custom tools, hooks, and
integrations — no source code changes required.
Core system (hermes_cli/plugins.py):
- Plugin discovery from ~/.hermes/plugins/, .hermes/plugins/, and
pip entry_points (hermes_agent.plugins group)
- PluginContext with register_tool() and register_hook()
- 6 lifecycle hooks: pre/post tool_call, pre/post llm_call,
on_session_start/end
- Namespace package handling for relative imports in plugins
- Graceful error isolation — broken plugins never crash the agent
Integration (model_tools.py):
- Plugin discovery runs after built-in + MCP tools
- Plugin tools bypass toolset filter via get_plugin_tool_names()
- Pre/post tool call hooks fire in handle_function_call()
CLI:
- /plugins command shows loaded plugins, tool counts, status
- Added to COMMANDS dict for autocomplete
Docs:
- Getting started guide (build-a-hermes-plugin.md) — full tutorial
building a calculator plugin step by step
- Reference page (features/plugins.md) — quick overview + tables
- Covers: file structure, schemas, handlers, hooks, data files,
bundled skills, env var gating, pip distribution, common mistakes
Tests: 16 tests covering discovery, loading, hooks, tool visibility.
* fix: hermes update causes dual gateways on macOS (launchd)
Three bugs worked together to create the dual-gateway problem:
1. cmd_update only checked systemd for gateway restart, completely
ignoring launchd on macOS. After killing the PID it would print
'Restart it with: hermes gateway run' even when launchd was about
to auto-respawn the process.
2. launchd's KeepAlive.SuccessfulExit=false respawns the gateway
after SIGTERM (non-zero exit), so the user's manual restart
created a second instance.
3. The launchd plist lacked --replace (systemd had it), so the
respawned gateway didn't kill stale instances on startup.
Fixes:
- Add --replace to launchd ProgramArguments (matches systemd)
- Add launchd detection to cmd_update's auto-restart logic
- Print 'auto-restart via launchd' instead of manual restart hint
* fix: add launchd plist auto-refresh + explicit restart in cmd_update
Two integration issues with the initial fix:
1. Existing macOS users with old plist (no --replace) would never
get the fix until manual uninstall/reinstall. Added
refresh_launchd_plist_if_needed() — mirrors the existing
refresh_systemd_unit_if_needed(). Called from launchd_start(),
launchd_restart(), and cmd_update.
2. cmd_update relied on KeepAlive respawn after SIGTERM rather than
explicit launchctl stop/start. This caused races: launchd would
respawn the old process before the PID file was cleaned up.
Now does explicit stop+start (matching how systemd gets an
explicit systemctl restart), with plist refresh first so the
new --replace flag is picked up.
---------
Co-authored-by: Ninja <ninja@local>
Co-authored-by: alireza78a <alireza78a@users.noreply.github.com>
Co-authored-by: Oktay Aydin <113846926+aydnOktay@users.noreply.github.com>
Co-authored-by: JP Lew <polydegen@protonmail.com>
Co-authored-by: an420eth <an420eth@users.noreply.github.com>
2026-03-16 12:36:29 -07:00
def _resolve_turn_agent_config ( self , user_message : str , model : str , runtime_kwargs : dict ) - > dict :
from agent . smart_model_routing import resolve_turn_route
primary = {
" model " : model ,
" api_key " : runtime_kwargs . get ( " api_key " ) ,
" base_url " : runtime_kwargs . get ( " base_url " ) ,
" provider " : runtime_kwargs . get ( " provider " ) ,
" api_mode " : runtime_kwargs . get ( " api_mode " ) ,
2026-03-17 23:40:22 -07:00
" command " : runtime_kwargs . get ( " command " ) ,
" args " : list ( runtime_kwargs . get ( " args " ) or [ ] ) ,
fix: hermes update causes dual gateways on macOS (launchd) (#1567)
* feat: add optional smart model routing
Add a conservative cheap-vs-strong routing option that can send very short/simple turns to a cheaper model across providers while keeping the primary model for complex work. Wire it through CLI, gateway, and cron, and document the config.yaml workflow.
* fix(gateway): remove recursive ExecStop from systemd units, extend TimeoutStopSec to 60s
* fix(gateway): avoid recursive ExecStop in user systemd unit
* fix: extend ExecStop removal and TimeoutStopSec=60 to system unit
The cherry-picked PR #1448 fix only covered the user systemd unit.
The system unit had the same TimeoutStopSec=15 and could benefit
from the same 60s timeout for clean shutdown. Also adds a regression
test for the system unit.
---------
Co-authored-by: Ninja <ninja@local>
* feat(skills): add blender-mcp optional skill for 3D modeling
Control a running Blender instance from Hermes via socket connection
to the blender-mcp addon (port 9876). Supports creating 3D objects,
materials, animations, and running arbitrary bpy code.
Placed in optional-skills/ since it requires Blender 4.3+ desktop
with a third-party addon manually started each session.
* feat(acp): support slash commands in ACP adapter (#1532)
Adds /help, /model, /tools, /context, /reset, /compact, /version
to the ACP adapter (VS Code, Zed, JetBrains). Commands are handled
directly in the server without instantiating the TUI — each command
queries agent/session state and returns plain text.
Unrecognized /commands fall through to the LLM as normal messages.
/model uses detect_provider_for_model() for auto-detection when
switching models, matching the CLI and gateway behavior.
Fixes #1402
* fix(logging): improve error logging in session search tool (#1533)
* fix(gateway): restart on retryable startup failures (#1517)
* feat(email): add skip_attachments option via config.yaml
* feat(email): add skip_attachments option via config.yaml
Adds a config.yaml-driven option to skip email attachments in the
gateway email adapter. Useful for malware protection and bandwidth
savings.
Configure in config.yaml:
platforms:
email:
skip_attachments: true
Based on PR #1521 by @an420eth, changed from env var to config.yaml
(via PlatformConfig.extra) to match the project's config-first pattern.
* docs: document skip_attachments option for email adapter
* fix(telegram): retry on transient TLS failures during connect and send
Add exponential-backoff retry (3 attempts) around initialize() to
handle transient TLS resets during gateway startup. Also catches
TimedOut and OSError in addition to NetworkError.
Add exponential-backoff retry (3 attempts) around send_message() for
NetworkError during message delivery, wrapping the existing Markdown
fallback logic.
Both imports are guarded with try/except ImportError for test
environments where telegram is mocked.
Based on PR #1527 by cmd8. Closes #1526.
* feat: permissive block_anchor thresholds and unicode normalization (#1539)
Salvaged from PR #1528 by an420eth. Closes #517.
Improves _strategy_block_anchor in fuzzy_match.py:
- Add unicode normalization (smart quotes, em/en-dashes, ellipsis,
non-breaking spaces → ASCII) so LLM-produced unicode artifacts
don't break anchor line matching
- Lower thresholds: 0.10 for unique matches (was 0.70), 0.30 for
multiple candidates — if first/last lines match exactly, the
block is almost certainly correct
- Use original (non-normalized) content for offset calculation to
preserve correct character positions
Tested: 3 new scenarios fixed (em-dash anchors, non-breaking space
anchors, very-low-similarity unique matches), zero regressions on
all 9 existing fuzzy match tests.
Co-authored-by: an420eth <an420eth@users.noreply.github.com>
* feat(cli): add file path autocomplete in the input prompt (#1545)
When typing a path-like token (./ ../ ~/ / or containing /),
the CLI now shows filesystem completions in the dropdown menu.
Directories show a trailing slash and 'dir' label; files show
their size. Completions are case-insensitive and capped at 30
entries.
Triggered by tokens like:
edit ./src/ma → shows ./src/main.py, ./src/manifest.json, ...
check ~/doc → shows ~/docs/, ~/documents/, ...
read /etc/hos → shows /etc/hosts, /etc/hostname, ...
open tools/reg → shows tools/registry.py
Slash command autocomplete (/help, /model, etc.) is unaffected —
it still triggers when the input starts with /.
Inspired by OpenCode PR #145 (file path completion menu).
Implementation:
- hermes_cli/commands.py: _extract_path_word() detects path-like
tokens, _path_completions() yields filesystem Completions with
size labels, get_completions() routes to paths vs slash commands
- tests/hermes_cli/test_path_completion.py: 26 tests covering
path extraction, prefix filtering, directory markers, home
expansion, case-insensitivity, integration with slash commands
* feat(privacy): redact PII from LLM context when privacy.redact_pii is enabled
Add privacy.redact_pii config option (boolean, default false). When
enabled, the gateway redacts personally identifiable information from
the system prompt before sending it to the LLM provider:
- Phone numbers (user IDs on WhatsApp/Signal) → hashed to user_<sha256>
- User IDs → hashed to user_<sha256>
- Chat IDs → numeric portion hashed, platform prefix preserved
- Home channel IDs → hashed
- Names/usernames → NOT affected (user-chosen, publicly visible)
Hashes are deterministic (same user → same hash) so the model can
still distinguish users in group chats. Routing and delivery use
the original values internally — redaction only affects LLM context.
Inspired by OpenClaw PR #47959.
* fix(privacy): skip PII redaction on Discord/Slack (mentions need real IDs)
Discord uses <@user_id> for mentions and Slack uses <@U12345> — the LLM
needs the real ID to tag users. Redaction now only applies to WhatsApp,
Signal, and Telegram where IDs are pure routing metadata.
Add 4 platform-specific tests covering Discord, WhatsApp, Signal, Slack.
* feat: smart approvals + /stop command (inspired by OpenAI Codex)
* feat: smart approvals — LLM-based risk assessment for dangerous commands
Adds a 'smart' approval mode that uses the auxiliary LLM to assess
whether a flagged command is genuinely dangerous or a false positive,
auto-approving low-risk commands without prompting the user.
Inspired by OpenAI Codex's Smart Approvals guardian subagent
(openai/codex#13860).
Config (config.yaml):
approvals:
mode: manual # manual (default), smart, off
Modes:
- manual — current behavior, always prompt the user
- smart — aux LLM evaluates risk: APPROVE (auto-allow), DENY (block),
or ESCALATE (fall through to manual prompt)
- off — skip all approval prompts (equivalent to --yolo)
When smart mode auto-approves, the pattern gets session-level approval
so subsequent uses of the same pattern don't trigger another LLM call.
When it denies, the command is blocked without user prompt. When
uncertain, it escalates to the normal manual approval flow.
The LLM prompt is carefully scoped: it sees only the command text and
the flagged reason, assesses actual risk vs false positive, and returns
a single-word verdict.
* feat: make smart approval model configurable via config.yaml
Adds auxiliary.approval section to config.yaml with the same
provider/model/base_url/api_key pattern as other aux tasks (vision,
web_extract, compression, etc.).
Config:
auxiliary:
approval:
provider: auto
model: '' # fast/cheap model recommended
base_url: ''
api_key: ''
Bridged to env vars in both CLI and gateway paths so the aux client
picks them up automatically.
* feat: add /stop command to kill all background processes
Adds a /stop slash command that kills all running background processes
at once. Currently users have to process(list) then process(kill) for
each one individually.
Inspired by OpenAI Codex's separation of interrupt (Ctrl+C stops current
turn) from /stop (cleans up background processes). See openai/codex#14602.
Ctrl+C continues to only interrupt the active agent turn — background
dev servers, watchers, etc. are preserved. /stop is the explicit way
to clean them all up.
* feat: first-class plugin architecture + hide status bar cost by default (#1544)
The persistent status bar now shows context %, token counts, and
duration but NOT $ cost by default. Cost display is opt-in via:
display:
show_cost: true
in config.yaml, or: hermes config set display.show_cost true
The /usage command still shows full cost breakdown since the user
explicitly asked for it — this only affects the always-visible bar.
Status bar without cost:
⚕ claude-sonnet-4 │ 12K/200K │ 6% │ 15m
Status bar with show_cost: true:
⚕ claude-sonnet-4 │ 12K/200K │ 6% │ $0.06 │ 15m
* feat: improve memory prioritization + aggressive skill updates (inspired by OpenAI Codex)
* feat: improve memory prioritization — user preferences over procedural knowledge
Inspired by OpenAI Codex's memory prompt improvements (openai/codex#14493)
which focus memory writes on user preferences and recurring patterns
rather than procedural task details.
Key insight: 'Optimize for reducing future user steering — the most
valuable memory prevents the user from having to repeat themselves.'
Changes:
- MEMORY_GUIDANCE (prompt_builder.py): added prioritization hierarchy
and the core principle about reducing user steering
- MEMORY_SCHEMA (memory_tool.py): reordered WHEN TO SAVE list to put
corrections first, added explicit PRIORITY guidance
- Memory nudge (run_agent.py): now asks specifically about preferences,
corrections, and workflow patterns instead of generic 'anything'
- Memory flush (run_agent.py): now instructs to prioritize user
preferences and corrections over task-specific details
* feat: more aggressive skill creation and update prompting
Press harder on skill updates — the agent should proactively patch
skills when it encounters issues during use, not wait to be asked.
Changes:
- SKILLS_GUIDANCE: 'consider saving' → 'save'; added explicit instruction
to patch skills immediately when found outdated/wrong
- Skills header: added instruction to update loaded skills before finishing
if they had missing steps or wrong commands
- Skill nudge: more assertive ('save the approach' not 'consider saving'),
now also prompts for updating existing skills used in the task
- Skill nudge interval: lowered default from 15 to 10 iterations
- skill_manage schema: added 'patch it immediately' to update triggers
* feat: first-class plugin architecture (#1555)
Plugin system for extending Hermes with custom tools, hooks, and
integrations — no source code changes required.
Core system (hermes_cli/plugins.py):
- Plugin discovery from ~/.hermes/plugins/, .hermes/plugins/, and
pip entry_points (hermes_agent.plugins group)
- PluginContext with register_tool() and register_hook()
- 6 lifecycle hooks: pre/post tool_call, pre/post llm_call,
on_session_start/end
- Namespace package handling for relative imports in plugins
- Graceful error isolation — broken plugins never crash the agent
Integration (model_tools.py):
- Plugin discovery runs after built-in + MCP tools
- Plugin tools bypass toolset filter via get_plugin_tool_names()
- Pre/post tool call hooks fire in handle_function_call()
CLI:
- /plugins command shows loaded plugins, tool counts, status
- Added to COMMANDS dict for autocomplete
Docs:
- Getting started guide (build-a-hermes-plugin.md) — full tutorial
building a calculator plugin step by step
- Reference page (features/plugins.md) — quick overview + tables
- Covers: file structure, schemas, handlers, hooks, data files,
bundled skills, env var gating, pip distribution, common mistakes
Tests: 16 tests covering discovery, loading, hooks, tool visibility.
* fix: hermes update causes dual gateways on macOS (launchd)
Three bugs worked together to create the dual-gateway problem:
1. cmd_update only checked systemd for gateway restart, completely
ignoring launchd on macOS. After killing the PID it would print
'Restart it with: hermes gateway run' even when launchd was about
to auto-respawn the process.
2. launchd's KeepAlive.SuccessfulExit=false respawns the gateway
after SIGTERM (non-zero exit), so the user's manual restart
created a second instance.
3. The launchd plist lacked --replace (systemd had it), so the
respawned gateway didn't kill stale instances on startup.
Fixes:
- Add --replace to launchd ProgramArguments (matches systemd)
- Add launchd detection to cmd_update's auto-restart logic
- Print 'auto-restart via launchd' instead of manual restart hint
* fix: add launchd plist auto-refresh + explicit restart in cmd_update
Two integration issues with the initial fix:
1. Existing macOS users with old plist (no --replace) would never
get the fix until manual uninstall/reinstall. Added
refresh_launchd_plist_if_needed() — mirrors the existing
refresh_systemd_unit_if_needed(). Called from launchd_start(),
launchd_restart(), and cmd_update.
2. cmd_update relied on KeepAlive respawn after SIGTERM rather than
explicit launchctl stop/start. This caused races: launchd would
respawn the old process before the PID file was cleaned up.
Now does explicit stop+start (matching how systemd gets an
explicit systemctl restart), with plist refresh first so the
new --replace flag is picked up.
---------
Co-authored-by: Ninja <ninja@local>
Co-authored-by: alireza78a <alireza78a@users.noreply.github.com>
Co-authored-by: Oktay Aydin <113846926+aydnOktay@users.noreply.github.com>
Co-authored-by: JP Lew <polydegen@protonmail.com>
Co-authored-by: an420eth <an420eth@users.noreply.github.com>
2026-03-16 12:36:29 -07:00
}
return resolve_turn_route ( user_message , getattr ( self , " _smart_model_routing " , { } ) , primary )
2026-03-14 18:03:50 -07:00
async def _handle_adapter_fatal_error ( self , adapter : BasePlatformAdapter ) - > None :
2026-03-22 23:48:24 -07:00
""" React to an adapter failure after startup.
If the error is retryable ( e . g . network blip , DNS failure ) , queue the
platform for background reconnection instead of giving up permanently .
"""
2026-03-14 18:03:50 -07:00
logger . error (
" Fatal %s adapter error ( %s ): %s " ,
adapter . platform . value ,
adapter . fatal_error_code or " unknown " ,
adapter . fatal_error_message or " unknown error " ,
)
existing = self . adapters . get ( adapter . platform )
if existing is adapter :
try :
await adapter . disconnect ( )
finally :
self . adapters . pop ( adapter . platform , None )
self . delivery_router . adapters = self . adapters
2026-03-22 23:48:24 -07:00
# Queue retryable failures for background reconnection
if adapter . fatal_error_retryable :
platform_config = self . config . platforms . get ( adapter . platform )
if platform_config and adapter . platform not in self . _failed_platforms :
self . _failed_platforms [ adapter . platform ] = {
" config " : platform_config ,
" attempts " : 0 ,
" next_retry " : time . monotonic ( ) + 30 ,
}
logger . info (
" %s queued for background reconnection " ,
adapter . platform . value ,
)
if not self . adapters and not self . _failed_platforms :
2026-03-14 18:03:50 -07:00
self . _exit_reason = adapter . fatal_error_message or " All messaging adapters disconnected "
2026-03-21 09:38:52 -07:00
if adapter . fatal_error_retryable :
self . _exit_with_failure = True
logger . error ( " No connected messaging platforms remain. Shutting down gateway for service restart. " )
else :
logger . error ( " No connected messaging platforms remain. Shutting down gateway cleanly. " )
2026-03-14 18:03:50 -07:00
await self . stop ( )
2026-03-22 23:48:24 -07:00
elif not self . adapters and self . _failed_platforms :
2026-03-28 14:25:12 -07:00
# All platforms are down and queued for background reconnection.
# If the error is retryable, exit with failure so systemd Restart=on-failure
# can restart the process. Otherwise stay alive and keep retrying in background.
if adapter . fatal_error_retryable :
self . _exit_reason = adapter . fatal_error_message or " All messaging platforms failed with retryable errors "
self . _exit_with_failure = True
logger . error (
" All messaging platforms failed with retryable errors. "
" Shutting down gateway for service restart (systemd will retry). "
)
await self . stop ( )
else :
logger . warning (
" No connected messaging platforms remain, but %d platform(s) queued for reconnection " ,
len ( self . _failed_platforms ) ,
)
2026-03-14 18:03:50 -07:00
def _request_clean_exit ( self , reason : str ) - > None :
self . _exit_cleanly = True
self . _exit_reason = reason
self . _shutdown_event . set ( )
2026-02-02 19:01:51 -08:00
2026-02-23 23:55:42 -08:00
@staticmethod
def _load_prefill_messages ( ) - > List [ Dict [ str , Any ] ] :
""" Load ephemeral prefill messages from config or env var.
Checks HERMES_PREFILL_MESSAGES_FILE env var first , then falls back to
the prefill_messages_file key in ~ / . hermes / config . yaml .
Relative paths are resolved from ~ / . hermes / .
"""
import json as _json
file_path = os . getenv ( " HERMES_PREFILL_MESSAGES_FILE " , " " )
if not file_path :
try :
import yaml as _y
2026-02-26 18:51:46 +11:00
cfg_path = _hermes_home / " config.yaml "
2026-02-23 23:55:42 -08:00
if cfg_path . exists ( ) :
2026-03-05 17:04:33 -05:00
with open ( cfg_path , encoding = " utf-8 " ) as _f :
2026-02-23 23:55:42 -08:00
cfg = _y . safe_load ( _f ) or { }
file_path = cfg . get ( " prefill_messages_file " , " " )
except Exception :
pass
if not file_path :
return [ ]
path = Path ( file_path ) . expanduser ( )
if not path . is_absolute ( ) :
2026-02-26 18:51:46 +11:00
path = _hermes_home / path
2026-02-23 23:55:42 -08:00
if not path . exists ( ) :
logger . warning ( " Prefill messages file not found: %s " , path )
return [ ]
try :
with open ( path , " r " , encoding = " utf-8 " ) as f :
data = _json . load ( f )
if not isinstance ( data , list ) :
logger . warning ( " Prefill messages file must contain a JSON array: %s " , path )
return [ ]
return data
except Exception as e :
logger . warning ( " Failed to load prefill messages from %s : %s " , path , e )
return [ ]
@staticmethod
def _load_ephemeral_system_prompt ( ) - > str :
""" Load ephemeral system prompt from config or env var.
Checks HERMES_EPHEMERAL_SYSTEM_PROMPT env var first , then falls back to
agent . system_prompt in ~ / . hermes / config . yaml .
"""
prompt = os . getenv ( " HERMES_EPHEMERAL_SYSTEM_PROMPT " , " " )
if prompt :
return prompt
try :
import yaml as _y
2026-02-26 18:51:46 +11:00
cfg_path = _hermes_home / " config.yaml "
2026-02-23 23:55:42 -08:00
if cfg_path . exists ( ) :
2026-03-05 17:04:33 -05:00
with open ( cfg_path , encoding = " utf-8 " ) as _f :
2026-02-23 23:55:42 -08:00
cfg = _y . safe_load ( _f ) or { }
return ( cfg . get ( " agent " , { } ) . get ( " system_prompt " , " " ) or " " ) . strip ( )
except Exception :
pass
return " "
2026-02-24 03:30:19 -08:00
@staticmethod
def _load_reasoning_config ( ) - > dict | None :
2026-03-11 22:12:11 +08:00
""" Load reasoning effort from config with env fallback.
Checks agent . reasoning_effort in config . yaml first , then
HERMES_REASONING_EFFORT as a fallback . Valid : " xhigh " , " high " ,
" medium " , " low " , " minimal " , " none " . Returns None to use default
( medium ) .
2026-02-24 03:30:19 -08:00
"""
refactor: consolidate get_hermes_home() and parse_reasoning_effort() (#3062)
Centralizes two widely-duplicated patterns into hermes_constants.py:
1. get_hermes_home() — Path resolution for ~/.hermes (HERMES_HOME env var)
- Was copy-pasted inline across 30+ files as:
Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
- Now defined once in hermes_constants.py (zero-dependency module)
- hermes_cli/config.py re-exports it for backward compatibility
- Removed local wrapper functions in honcho_integration/client.py,
tools/website_policy.py, tools/tirith_security.py, hermes_cli/uninstall.py
2. parse_reasoning_effort() — Reasoning effort string validation
- Was copy-pasted in cli.py, gateway/run.py, cron/scheduler.py
- Same validation logic: check against (xhigh, high, medium, low, minimal, none)
- Now defined once in hermes_constants.py, called from all 3 locations
- Warning log for unknown values kept at call sites (context-specific)
31 files changed, net +31 lines (125 insertions, 94 deletions)
Full test suite: 6179 passed, 0 failed
2026-03-25 15:54:28 -07:00
from hermes_constants import parse_reasoning_effort
2026-03-11 22:12:11 +08:00
effort = " "
try :
import yaml as _y
cfg_path = _hermes_home / " config.yaml "
if cfg_path . exists ( ) :
with open ( cfg_path , encoding = " utf-8 " ) as _f :
cfg = _y . safe_load ( _f ) or { }
effort = str ( cfg . get ( " agent " , { } ) . get ( " reasoning_effort " , " " ) or " " ) . strip ( )
except Exception :
pass
2026-02-24 03:30:19 -08:00
if not effort :
2026-03-11 22:12:11 +08:00
effort = os . getenv ( " HERMES_REASONING_EFFORT " , " " )
refactor: consolidate get_hermes_home() and parse_reasoning_effort() (#3062)
Centralizes two widely-duplicated patterns into hermes_constants.py:
1. get_hermes_home() — Path resolution for ~/.hermes (HERMES_HOME env var)
- Was copy-pasted inline across 30+ files as:
Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
- Now defined once in hermes_constants.py (zero-dependency module)
- hermes_cli/config.py re-exports it for backward compatibility
- Removed local wrapper functions in honcho_integration/client.py,
tools/website_policy.py, tools/tirith_security.py, hermes_cli/uninstall.py
2. parse_reasoning_effort() — Reasoning effort string validation
- Was copy-pasted in cli.py, gateway/run.py, cron/scheduler.py
- Same validation logic: check against (xhigh, high, medium, low, minimal, none)
- Now defined once in hermes_constants.py, called from all 3 locations
- Warning log for unknown values kept at call sites (context-specific)
31 files changed, net +31 lines (125 insertions, 94 deletions)
Full test suite: 6179 passed, 0 failed
2026-03-25 15:54:28 -07:00
result = parse_reasoning_effort ( effort )
if effort and effort . strip ( ) and result is None :
logger . warning ( " Unknown reasoning_effort ' %s ' , using default (medium) " , effort )
return result
2026-02-24 03:30:19 -08:00
fix: /reasoning command — add gateway support, fix display, persist settings (#1031)
* fix: /reasoning command output ordering, display, and inline think extraction
Three issues with the /reasoning command:
1. Output interleaving: The command echo used print() while feedback
used _cprint(), causing them to render out-of-order under
prompt_toolkit's patch_stdout. Changed echo to use _cprint() so
all output renders through the same path in correct order.
2. Reasoning display not working: /reasoning show toggled a flag
but reasoning never appeared for models that embed thinking in
inline <think> blocks rather than structured API fields. Added
fallback extraction in _build_assistant_message to capture
<think> block content as reasoning when no structured reasoning
fields (reasoning, reasoning_content, reasoning_details) are
present. This feeds into both the reasoning callback (during
tool loops) and the post-response reasoning box display.
3. Feedback clarity: Added checkmarks to confirm actions, persisted
show/hide to config (was session-only before), and aligned the
status display for readability.
Tests: 7 new tests for inline think block extraction (41 total).
* feat: add /reasoning command to gateway (Telegram/Discord/etc)
The /reasoning command only existed in the CLI — messaging platforms
had no way to view or change reasoning settings. This adds:
1. /reasoning command handler in the gateway:
- No args: shows current effort level and display state
- /reasoning <level>: sets reasoning effort (none/low/medium/high/xhigh)
- /reasoning show|hide: toggles reasoning display in responses
- All changes saved to config.yaml immediately
2. Reasoning display in gateway responses:
- When show_reasoning is enabled, prepends a 'Reasoning' block
with the model's last_reasoning content before the response
- Collapses long reasoning (>15 lines) to keep messages readable
- Uses last_reasoning from run_conversation result dict
3. Plumbing:
- Added _show_reasoning attribute loaded from config at startup
- Propagated last_reasoning through _run_agent return dict
- Added /reasoning to help text and known_commands set
- Uses getattr for _show_reasoning to handle test stubs
2026-03-12 05:38:19 -07:00
@staticmethod
def _load_show_reasoning ( ) - > bool :
""" Load show_reasoning toggle from config.yaml display section. """
try :
import yaml as _y
cfg_path = _hermes_home / " config.yaml "
if cfg_path . exists ( ) :
with open ( cfg_path , encoding = " utf-8 " ) as _f :
cfg = _y . safe_load ( _f ) or { }
return bool ( cfg . get ( " display " , { } ) . get ( " show_reasoning " , False ) )
except Exception :
pass
return False
2026-03-10 04:12:39 -07:00
@staticmethod
def _load_background_notifications_mode ( ) - > str :
""" Load background process notification mode from config or env var.
Modes :
- ` ` all ` ` — push running - output updates * and * the final message ( default )
- ` ` result ` ` — only the final completion message ( regardless of exit code )
- ` ` error ` ` — only the final message when exit code is non - zero
- ` ` off ` ` — no watcher messages at all
"""
mode = os . getenv ( " HERMES_BACKGROUND_NOTIFICATIONS " , " " )
if not mode :
try :
import yaml as _y
cfg_path = _hermes_home / " config.yaml "
if cfg_path . exists ( ) :
with open ( cfg_path , encoding = " utf-8 " ) as _f :
cfg = _y . safe_load ( _f ) or { }
raw = cfg . get ( " display " , { } ) . get ( " background_process_notifications " )
if raw is False :
mode = " off "
elif raw not in ( None , " " ) :
mode = str ( raw )
except Exception :
pass
mode = ( mode or " all " ) . strip ( ) . lower ( )
valid = { " all " , " result " , " error " , " off " }
if mode not in valid :
logger . warning (
" Unknown background_process_notifications ' %s ' , defaulting to ' all ' " ,
mode ,
)
return " all "
return mode
2026-03-01 18:24:27 -08:00
@staticmethod
def _load_provider_routing ( ) - > dict :
""" Load OpenRouter provider routing preferences from config.yaml. """
try :
import yaml as _y
cfg_path = _hermes_home / " config.yaml "
if cfg_path . exists ( ) :
2026-03-05 17:04:33 -05:00
with open ( cfg_path , encoding = " utf-8 " ) as _f :
2026-03-01 18:24:27 -08:00
cfg = _y . safe_load ( _f ) or { }
return cfg . get ( " provider_routing " , { } ) or { }
except Exception :
pass
return { }
feat: simple fallback model for provider resilience
When the primary model/provider fails after retries (rate limit, overload,
auth errors, connection failures), Hermes automatically switches to a
configured fallback model for the remainder of the session.
Config (in ~/.hermes/config.yaml):
fallback_model:
provider: openrouter
model: anthropic/claude-sonnet-4
Supports all major providers: OpenRouter, OpenAI, Nous, DeepSeek, Together,
Groq, Fireworks, Mistral, Gemini — plus custom endpoints via base_url and
api_key_env overrides.
Design principles:
- Dead simple: one fallback model, not a chain
- One-shot: switches once, doesn't ping-pong back
- Zero new dependencies: uses existing OpenAI client
- Minimal code: ~100 lines in run_agent.py, ~5 lines in cli.py/gateway
- Three trigger points: max retries exhausted, non-retryable client errors,
and invalid response exhaustion
Does NOT trigger on context overflow or payload-too-large errors (those
are handled by the existing compression system).
Addresses #737.
25 new tests, 2492 total passing.
2026-03-08 20:22:33 -07:00
@staticmethod
def _load_fallback_model ( ) - > dict | None :
""" Load fallback model config from config.yaml.
Returns a dict with ' provider ' and ' model ' keys , or None if
not configured / both fields empty .
"""
try :
import yaml as _y
cfg_path = _hermes_home / " config.yaml "
if cfg_path . exists ( ) :
2026-03-09 21:19:20 -07:00
with open ( cfg_path , encoding = " utf-8 " ) as _f :
feat: simple fallback model for provider resilience
When the primary model/provider fails after retries (rate limit, overload,
auth errors, connection failures), Hermes automatically switches to a
configured fallback model for the remainder of the session.
Config (in ~/.hermes/config.yaml):
fallback_model:
provider: openrouter
model: anthropic/claude-sonnet-4
Supports all major providers: OpenRouter, OpenAI, Nous, DeepSeek, Together,
Groq, Fireworks, Mistral, Gemini — plus custom endpoints via base_url and
api_key_env overrides.
Design principles:
- Dead simple: one fallback model, not a chain
- One-shot: switches once, doesn't ping-pong back
- Zero new dependencies: uses existing OpenAI client
- Minimal code: ~100 lines in run_agent.py, ~5 lines in cli.py/gateway
- Three trigger points: max retries exhausted, non-retryable client errors,
and invalid response exhaustion
Does NOT trigger on context overflow or payload-too-large errors (those
are handled by the existing compression system).
Addresses #737.
25 new tests, 2492 total passing.
2026-03-08 20:22:33 -07:00
cfg = _y . safe_load ( _f ) or { }
fb = cfg . get ( " fallback_model " , { } ) or { }
if fb . get ( " provider " ) and fb . get ( " model " ) :
return fb
except Exception :
pass
return None
fix: hermes update causes dual gateways on macOS (launchd) (#1567)
* feat: add optional smart model routing
Add a conservative cheap-vs-strong routing option that can send very short/simple turns to a cheaper model across providers while keeping the primary model for complex work. Wire it through CLI, gateway, and cron, and document the config.yaml workflow.
* fix(gateway): remove recursive ExecStop from systemd units, extend TimeoutStopSec to 60s
* fix(gateway): avoid recursive ExecStop in user systemd unit
* fix: extend ExecStop removal and TimeoutStopSec=60 to system unit
The cherry-picked PR #1448 fix only covered the user systemd unit.
The system unit had the same TimeoutStopSec=15 and could benefit
from the same 60s timeout for clean shutdown. Also adds a regression
test for the system unit.
---------
Co-authored-by: Ninja <ninja@local>
* feat(skills): add blender-mcp optional skill for 3D modeling
Control a running Blender instance from Hermes via socket connection
to the blender-mcp addon (port 9876). Supports creating 3D objects,
materials, animations, and running arbitrary bpy code.
Placed in optional-skills/ since it requires Blender 4.3+ desktop
with a third-party addon manually started each session.
* feat(acp): support slash commands in ACP adapter (#1532)
Adds /help, /model, /tools, /context, /reset, /compact, /version
to the ACP adapter (VS Code, Zed, JetBrains). Commands are handled
directly in the server without instantiating the TUI — each command
queries agent/session state and returns plain text.
Unrecognized /commands fall through to the LLM as normal messages.
/model uses detect_provider_for_model() for auto-detection when
switching models, matching the CLI and gateway behavior.
Fixes #1402
* fix(logging): improve error logging in session search tool (#1533)
* fix(gateway): restart on retryable startup failures (#1517)
* feat(email): add skip_attachments option via config.yaml
* feat(email): add skip_attachments option via config.yaml
Adds a config.yaml-driven option to skip email attachments in the
gateway email adapter. Useful for malware protection and bandwidth
savings.
Configure in config.yaml:
platforms:
email:
skip_attachments: true
Based on PR #1521 by @an420eth, changed from env var to config.yaml
(via PlatformConfig.extra) to match the project's config-first pattern.
* docs: document skip_attachments option for email adapter
* fix(telegram): retry on transient TLS failures during connect and send
Add exponential-backoff retry (3 attempts) around initialize() to
handle transient TLS resets during gateway startup. Also catches
TimedOut and OSError in addition to NetworkError.
Add exponential-backoff retry (3 attempts) around send_message() for
NetworkError during message delivery, wrapping the existing Markdown
fallback logic.
Both imports are guarded with try/except ImportError for test
environments where telegram is mocked.
Based on PR #1527 by cmd8. Closes #1526.
* feat: permissive block_anchor thresholds and unicode normalization (#1539)
Salvaged from PR #1528 by an420eth. Closes #517.
Improves _strategy_block_anchor in fuzzy_match.py:
- Add unicode normalization (smart quotes, em/en-dashes, ellipsis,
non-breaking spaces → ASCII) so LLM-produced unicode artifacts
don't break anchor line matching
- Lower thresholds: 0.10 for unique matches (was 0.70), 0.30 for
multiple candidates — if first/last lines match exactly, the
block is almost certainly correct
- Use original (non-normalized) content for offset calculation to
preserve correct character positions
Tested: 3 new scenarios fixed (em-dash anchors, non-breaking space
anchors, very-low-similarity unique matches), zero regressions on
all 9 existing fuzzy match tests.
Co-authored-by: an420eth <an420eth@users.noreply.github.com>
* feat(cli): add file path autocomplete in the input prompt (#1545)
When typing a path-like token (./ ../ ~/ / or containing /),
the CLI now shows filesystem completions in the dropdown menu.
Directories show a trailing slash and 'dir' label; files show
their size. Completions are case-insensitive and capped at 30
entries.
Triggered by tokens like:
edit ./src/ma → shows ./src/main.py, ./src/manifest.json, ...
check ~/doc → shows ~/docs/, ~/documents/, ...
read /etc/hos → shows /etc/hosts, /etc/hostname, ...
open tools/reg → shows tools/registry.py
Slash command autocomplete (/help, /model, etc.) is unaffected —
it still triggers when the input starts with /.
Inspired by OpenCode PR #145 (file path completion menu).
Implementation:
- hermes_cli/commands.py: _extract_path_word() detects path-like
tokens, _path_completions() yields filesystem Completions with
size labels, get_completions() routes to paths vs slash commands
- tests/hermes_cli/test_path_completion.py: 26 tests covering
path extraction, prefix filtering, directory markers, home
expansion, case-insensitivity, integration with slash commands
* feat(privacy): redact PII from LLM context when privacy.redact_pii is enabled
Add privacy.redact_pii config option (boolean, default false). When
enabled, the gateway redacts personally identifiable information from
the system prompt before sending it to the LLM provider:
- Phone numbers (user IDs on WhatsApp/Signal) → hashed to user_<sha256>
- User IDs → hashed to user_<sha256>
- Chat IDs → numeric portion hashed, platform prefix preserved
- Home channel IDs → hashed
- Names/usernames → NOT affected (user-chosen, publicly visible)
Hashes are deterministic (same user → same hash) so the model can
still distinguish users in group chats. Routing and delivery use
the original values internally — redaction only affects LLM context.
Inspired by OpenClaw PR #47959.
* fix(privacy): skip PII redaction on Discord/Slack (mentions need real IDs)
Discord uses <@user_id> for mentions and Slack uses <@U12345> — the LLM
needs the real ID to tag users. Redaction now only applies to WhatsApp,
Signal, and Telegram where IDs are pure routing metadata.
Add 4 platform-specific tests covering Discord, WhatsApp, Signal, Slack.
* feat: smart approvals + /stop command (inspired by OpenAI Codex)
* feat: smart approvals — LLM-based risk assessment for dangerous commands
Adds a 'smart' approval mode that uses the auxiliary LLM to assess
whether a flagged command is genuinely dangerous or a false positive,
auto-approving low-risk commands without prompting the user.
Inspired by OpenAI Codex's Smart Approvals guardian subagent
(openai/codex#13860).
Config (config.yaml):
approvals:
mode: manual # manual (default), smart, off
Modes:
- manual — current behavior, always prompt the user
- smart — aux LLM evaluates risk: APPROVE (auto-allow), DENY (block),
or ESCALATE (fall through to manual prompt)
- off — skip all approval prompts (equivalent to --yolo)
When smart mode auto-approves, the pattern gets session-level approval
so subsequent uses of the same pattern don't trigger another LLM call.
When it denies, the command is blocked without user prompt. When
uncertain, it escalates to the normal manual approval flow.
The LLM prompt is carefully scoped: it sees only the command text and
the flagged reason, assesses actual risk vs false positive, and returns
a single-word verdict.
* feat: make smart approval model configurable via config.yaml
Adds auxiliary.approval section to config.yaml with the same
provider/model/base_url/api_key pattern as other aux tasks (vision,
web_extract, compression, etc.).
Config:
auxiliary:
approval:
provider: auto
model: '' # fast/cheap model recommended
base_url: ''
api_key: ''
Bridged to env vars in both CLI and gateway paths so the aux client
picks them up automatically.
* feat: add /stop command to kill all background processes
Adds a /stop slash command that kills all running background processes
at once. Currently users have to process(list) then process(kill) for
each one individually.
Inspired by OpenAI Codex's separation of interrupt (Ctrl+C stops current
turn) from /stop (cleans up background processes). See openai/codex#14602.
Ctrl+C continues to only interrupt the active agent turn — background
dev servers, watchers, etc. are preserved. /stop is the explicit way
to clean them all up.
* feat: first-class plugin architecture + hide status bar cost by default (#1544)
The persistent status bar now shows context %, token counts, and
duration but NOT $ cost by default. Cost display is opt-in via:
display:
show_cost: true
in config.yaml, or: hermes config set display.show_cost true
The /usage command still shows full cost breakdown since the user
explicitly asked for it — this only affects the always-visible bar.
Status bar without cost:
⚕ claude-sonnet-4 │ 12K/200K │ 6% │ 15m
Status bar with show_cost: true:
⚕ claude-sonnet-4 │ 12K/200K │ 6% │ $0.06 │ 15m
* feat: improve memory prioritization + aggressive skill updates (inspired by OpenAI Codex)
* feat: improve memory prioritization — user preferences over procedural knowledge
Inspired by OpenAI Codex's memory prompt improvements (openai/codex#14493)
which focus memory writes on user preferences and recurring patterns
rather than procedural task details.
Key insight: 'Optimize for reducing future user steering — the most
valuable memory prevents the user from having to repeat themselves.'
Changes:
- MEMORY_GUIDANCE (prompt_builder.py): added prioritization hierarchy
and the core principle about reducing user steering
- MEMORY_SCHEMA (memory_tool.py): reordered WHEN TO SAVE list to put
corrections first, added explicit PRIORITY guidance
- Memory nudge (run_agent.py): now asks specifically about preferences,
corrections, and workflow patterns instead of generic 'anything'
- Memory flush (run_agent.py): now instructs to prioritize user
preferences and corrections over task-specific details
* feat: more aggressive skill creation and update prompting
Press harder on skill updates — the agent should proactively patch
skills when it encounters issues during use, not wait to be asked.
Changes:
- SKILLS_GUIDANCE: 'consider saving' → 'save'; added explicit instruction
to patch skills immediately when found outdated/wrong
- Skills header: added instruction to update loaded skills before finishing
if they had missing steps or wrong commands
- Skill nudge: more assertive ('save the approach' not 'consider saving'),
now also prompts for updating existing skills used in the task
- Skill nudge interval: lowered default from 15 to 10 iterations
- skill_manage schema: added 'patch it immediately' to update triggers
* feat: first-class plugin architecture (#1555)
Plugin system for extending Hermes with custom tools, hooks, and
integrations — no source code changes required.
Core system (hermes_cli/plugins.py):
- Plugin discovery from ~/.hermes/plugins/, .hermes/plugins/, and
pip entry_points (hermes_agent.plugins group)
- PluginContext with register_tool() and register_hook()
- 6 lifecycle hooks: pre/post tool_call, pre/post llm_call,
on_session_start/end
- Namespace package handling for relative imports in plugins
- Graceful error isolation — broken plugins never crash the agent
Integration (model_tools.py):
- Plugin discovery runs after built-in + MCP tools
- Plugin tools bypass toolset filter via get_plugin_tool_names()
- Pre/post tool call hooks fire in handle_function_call()
CLI:
- /plugins command shows loaded plugins, tool counts, status
- Added to COMMANDS dict for autocomplete
Docs:
- Getting started guide (build-a-hermes-plugin.md) — full tutorial
building a calculator plugin step by step
- Reference page (features/plugins.md) — quick overview + tables
- Covers: file structure, schemas, handlers, hooks, data files,
bundled skills, env var gating, pip distribution, common mistakes
Tests: 16 tests covering discovery, loading, hooks, tool visibility.
* fix: hermes update causes dual gateways on macOS (launchd)
Three bugs worked together to create the dual-gateway problem:
1. cmd_update only checked systemd for gateway restart, completely
ignoring launchd on macOS. After killing the PID it would print
'Restart it with: hermes gateway run' even when launchd was about
to auto-respawn the process.
2. launchd's KeepAlive.SuccessfulExit=false respawns the gateway
after SIGTERM (non-zero exit), so the user's manual restart
created a second instance.
3. The launchd plist lacked --replace (systemd had it), so the
respawned gateway didn't kill stale instances on startup.
Fixes:
- Add --replace to launchd ProgramArguments (matches systemd)
- Add launchd detection to cmd_update's auto-restart logic
- Print 'auto-restart via launchd' instead of manual restart hint
* fix: add launchd plist auto-refresh + explicit restart in cmd_update
Two integration issues with the initial fix:
1. Existing macOS users with old plist (no --replace) would never
get the fix until manual uninstall/reinstall. Added
refresh_launchd_plist_if_needed() — mirrors the existing
refresh_systemd_unit_if_needed(). Called from launchd_start(),
launchd_restart(), and cmd_update.
2. cmd_update relied on KeepAlive respawn after SIGTERM rather than
explicit launchctl stop/start. This caused races: launchd would
respawn the old process before the PID file was cleaned up.
Now does explicit stop+start (matching how systemd gets an
explicit systemctl restart), with plist refresh first so the
new --replace flag is picked up.
---------
Co-authored-by: Ninja <ninja@local>
Co-authored-by: alireza78a <alireza78a@users.noreply.github.com>
Co-authored-by: Oktay Aydin <113846926+aydnOktay@users.noreply.github.com>
Co-authored-by: JP Lew <polydegen@protonmail.com>
Co-authored-by: an420eth <an420eth@users.noreply.github.com>
2026-03-16 12:36:29 -07:00
@staticmethod
def _load_smart_model_routing ( ) - > dict :
""" Load optional smart cheap-vs-strong model routing config. """
try :
import yaml as _y
cfg_path = _hermes_home / " config.yaml "
if cfg_path . exists ( ) :
with open ( cfg_path , encoding = " utf-8 " ) as _f :
cfg = _y . safe_load ( _f ) or { }
return cfg . get ( " smart_model_routing " , { } ) or { }
except Exception :
pass
return { }
2026-02-02 19:01:51 -08:00
async def start ( self ) - > bool :
"""
Start the gateway and all configured platform adapters .
Returns True if at least one adapter connected successfully .
"""
2026-02-21 03:11:11 -08:00
logger . info ( " Starting Hermes Gateway... " )
logger . info ( " Session storage: %s " , self . config . sessions_dir )
2026-03-14 18:03:50 -07:00
try :
from gateway . status import write_runtime_status
write_runtime_status ( gateway_state = " starting " , exit_reason = None )
except Exception :
pass
2026-02-02 19:01:51 -08:00
refactor: deduplicate toolsets, unify async bridging, fix approval race condition, harden security
- Replace 4 copy-pasted messaging platform toolsets with shared _HERMES_CORE_TOOLS list
- Consolidate 5 ad-hoc async-bridging patterns into single _run_async() in model_tools.py
- Removes deprecated get_event_loop()/set_event_loop() calls
- Makes all tool handlers self-protecting regardless of caller's event loop state
- RL handler refactored from if/elif chain to dispatch dict
- Fix exec approval race condition: replace module-level globals with thread-safe
per-session tools/approval.py (submit_pending, pop_pending, approve_session, is_approved)
- Session A approving "rm" no longer approves it for all other sessions
- Fix config deep merge: user overriding tts.elevenlabs.voice_id no longer clobbers
tts.elevenlabs.model_id; migration detection now recurses to arbitrary depth
- Gateway default-deny: unauthenticated users denied unless GATEWAY_ALLOW_ALL_USERS=true
- Add 10 dangerous command patterns: rm --recursive, bash -c, python -e, curl|bash,
xargs rm, find -delete
- Sanitize gateway error messages: users see generic message, full traceback goes to logs
2026-02-21 18:28:49 -08:00
# Warn if no user allowlists are configured and open access is not opted in
_any_allowlist = any (
os . getenv ( v )
for v in ( " TELEGRAM_ALLOWED_USERS " , " DISCORD_ALLOWED_USERS " ,
" WHATSAPP_ALLOWED_USERS " , " SLACK_ALLOWED_USERS " ,
2026-03-26 18:23:49 -07:00
" SIGNAL_ALLOWED_USERS " , " SIGNAL_GROUP_ALLOWED_USERS " ,
" EMAIL_ALLOWED_USERS " ,
2026-03-23 07:19:14 -07:00
" SMS_ALLOWED_USERS " , " MATTERMOST_ALLOWED_USERS " ,
" MATRIX_ALLOWED_USERS " , " DINGTALK_ALLOWED_USERS " ,
2026-03-17 02:53:30 -07:00
" GATEWAY_ALLOWED_USERS " )
refactor: deduplicate toolsets, unify async bridging, fix approval race condition, harden security
- Replace 4 copy-pasted messaging platform toolsets with shared _HERMES_CORE_TOOLS list
- Consolidate 5 ad-hoc async-bridging patterns into single _run_async() in model_tools.py
- Removes deprecated get_event_loop()/set_event_loop() calls
- Makes all tool handlers self-protecting regardless of caller's event loop state
- RL handler refactored from if/elif chain to dispatch dict
- Fix exec approval race condition: replace module-level globals with thread-safe
per-session tools/approval.py (submit_pending, pop_pending, approve_session, is_approved)
- Session A approving "rm" no longer approves it for all other sessions
- Fix config deep merge: user overriding tts.elevenlabs.voice_id no longer clobbers
tts.elevenlabs.model_id; migration detection now recurses to arbitrary depth
- Gateway default-deny: unauthenticated users denied unless GATEWAY_ALLOW_ALL_USERS=true
- Add 10 dangerous command patterns: rm --recursive, bash -c, python -e, curl|bash,
xargs rm, find -delete
- Sanitize gateway error messages: users see generic message, full traceback goes to logs
2026-02-21 18:28:49 -08:00
)
2026-03-26 18:23:49 -07:00
_allow_all = os . getenv ( " GATEWAY_ALLOW_ALL_USERS " , " " ) . lower ( ) in ( " true " , " 1 " , " yes " ) or any (
os . getenv ( v , " " ) . lower ( ) in ( " true " , " 1 " , " yes " )
for v in ( " TELEGRAM_ALLOW_ALL_USERS " , " DISCORD_ALLOW_ALL_USERS " ,
" WHATSAPP_ALLOW_ALL_USERS " , " SLACK_ALLOW_ALL_USERS " ,
" SIGNAL_ALLOW_ALL_USERS " , " EMAIL_ALLOW_ALL_USERS " ,
" SMS_ALLOW_ALL_USERS " , " MATTERMOST_ALLOW_ALL_USERS " ,
" MATRIX_ALLOW_ALL_USERS " , " DINGTALK_ALLOW_ALL_USERS " )
)
refactor: deduplicate toolsets, unify async bridging, fix approval race condition, harden security
- Replace 4 copy-pasted messaging platform toolsets with shared _HERMES_CORE_TOOLS list
- Consolidate 5 ad-hoc async-bridging patterns into single _run_async() in model_tools.py
- Removes deprecated get_event_loop()/set_event_loop() calls
- Makes all tool handlers self-protecting regardless of caller's event loop state
- RL handler refactored from if/elif chain to dispatch dict
- Fix exec approval race condition: replace module-level globals with thread-safe
per-session tools/approval.py (submit_pending, pop_pending, approve_session, is_approved)
- Session A approving "rm" no longer approves it for all other sessions
- Fix config deep merge: user overriding tts.elevenlabs.voice_id no longer clobbers
tts.elevenlabs.model_id; migration detection now recurses to arbitrary depth
- Gateway default-deny: unauthenticated users denied unless GATEWAY_ALLOW_ALL_USERS=true
- Add 10 dangerous command patterns: rm --recursive, bash -c, python -e, curl|bash,
xargs rm, find -delete
- Sanitize gateway error messages: users see generic message, full traceback goes to logs
2026-02-21 18:28:49 -08:00
if not _any_allowlist and not _allow_all :
logger . warning (
" No user allowlists configured. All unauthorized users will be denied. "
" Set GATEWAY_ALLOW_ALL_USERS=true in ~/.hermes/.env to allow open access, "
" or configure platform allowlists (e.g., TELEGRAM_ALLOWED_USERS=your_id). "
)
Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks
Major feature additions inspired by OpenClaw/ClawdBot integration analysis:
Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)
Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description
Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling
Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads
DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending
Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)
Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications
Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings
Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style
Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.
2026-02-15 21:38:59 -08:00
# Discover and load event hooks
self . hooks . discover_and_load ( )
Add background process management with process tool, wait, PTY, and stdin support
New process registry and tool for managing long-running background processes
across all terminal backends (local, Docker, Singularity, Modal, SSH).
Process Registry (tools/process_registry.py):
- ProcessSession tracking with rolling 200KB output buffer
- spawn_local() with optional PTY via ptyprocess for interactive CLIs
- spawn_via_env() for non-local backends (runs inside sandbox, never on host)
- Background reader threads per process (Popen stdout or PTY)
- wait() with timeout clamping, interrupt support, and transparent limit reporting
- JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery
- Module-level singleton shared across agent loop, gateway, and RL
Process Tool (model_tools.py):
- 7 actions: list, poll, log, wait, kill, write, submit
- Paired with terminal in all toolsets (CLI, messaging, RL)
- Timeout clamping with transparent notes in response
Terminal Tool Updates (tools/terminal_tool.py):
- Replaced nohup background mode with registry spawn (returns session_id)
- Added workdir parameter for per-command working directory
- Added check_interval parameter for gateway auto-check watchers
- Added pty parameter for interactive CLI tools (Codex, Claude Code)
- Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs
- Cleanup thread now respects active background processes (won't reap sandbox)
Gateway Integration (gateway/run.py, session.py, config.py):
- Session reset protection: sessions with active processes exempt from reset
- Default idle timeout increased from 2 hours to 24 hours
- from_dict fallback aligned to match (was 120, now 1440)
- session_key env var propagated to process registry for session mapping
- Crash recovery on gateway startup via checkpoint probe
- check_interval watcher: asyncio task polls process, delivers updates to platform
RL Safety (environments/):
- tool_context.py cleanup() kills background processes on episode end
- hermes_base_env.py warns when enabled_toolsets is None (loads all tools)
- Process tool safe in RL via wait() blocking the agent loop
Also:
- Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all])
- Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP
- Updated AGENTS.md with process management docs and project structure
- Updated README.md terminal section with process management overview
2026-02-17 02:51:31 -08:00
# Recover background processes from checkpoint (crash recovery)
try :
from tools . process_registry import process_registry
recovered = process_registry . recover_from_checkpoint ( )
if recovered :
2026-02-21 03:11:11 -08:00
logger . info ( " Recovered %s background process(es) from previous run " , recovered )
Add background process management with process tool, wait, PTY, and stdin support
New process registry and tool for managing long-running background processes
across all terminal backends (local, Docker, Singularity, Modal, SSH).
Process Registry (tools/process_registry.py):
- ProcessSession tracking with rolling 200KB output buffer
- spawn_local() with optional PTY via ptyprocess for interactive CLIs
- spawn_via_env() for non-local backends (runs inside sandbox, never on host)
- Background reader threads per process (Popen stdout or PTY)
- wait() with timeout clamping, interrupt support, and transparent limit reporting
- JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery
- Module-level singleton shared across agent loop, gateway, and RL
Process Tool (model_tools.py):
- 7 actions: list, poll, log, wait, kill, write, submit
- Paired with terminal in all toolsets (CLI, messaging, RL)
- Timeout clamping with transparent notes in response
Terminal Tool Updates (tools/terminal_tool.py):
- Replaced nohup background mode with registry spawn (returns session_id)
- Added workdir parameter for per-command working directory
- Added check_interval parameter for gateway auto-check watchers
- Added pty parameter for interactive CLI tools (Codex, Claude Code)
- Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs
- Cleanup thread now respects active background processes (won't reap sandbox)
Gateway Integration (gateway/run.py, session.py, config.py):
- Session reset protection: sessions with active processes exempt from reset
- Default idle timeout increased from 2 hours to 24 hours
- from_dict fallback aligned to match (was 120, now 1440)
- session_key env var propagated to process registry for session mapping
- Crash recovery on gateway startup via checkpoint probe
- check_interval watcher: asyncio task polls process, delivers updates to platform
RL Safety (environments/):
- tool_context.py cleanup() kills background processes on episode end
- hermes_base_env.py warns when enabled_toolsets is None (loads all tools)
- Process tool safe in RL via wait() blocking the agent loop
Also:
- Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all])
- Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP
- Updated AGENTS.md with process management docs and project structure
- Updated README.md terminal section with process management overview
2026-02-17 02:51:31 -08:00
except Exception as e :
2026-02-21 03:11:11 -08:00
logger . warning ( " Process checkpoint recovery: %s " , e )
Add background process management with process tool, wait, PTY, and stdin support
New process registry and tool for managing long-running background processes
across all terminal backends (local, Docker, Singularity, Modal, SSH).
Process Registry (tools/process_registry.py):
- ProcessSession tracking with rolling 200KB output buffer
- spawn_local() with optional PTY via ptyprocess for interactive CLIs
- spawn_via_env() for non-local backends (runs inside sandbox, never on host)
- Background reader threads per process (Popen stdout or PTY)
- wait() with timeout clamping, interrupt support, and transparent limit reporting
- JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery
- Module-level singleton shared across agent loop, gateway, and RL
Process Tool (model_tools.py):
- 7 actions: list, poll, log, wait, kill, write, submit
- Paired with terminal in all toolsets (CLI, messaging, RL)
- Timeout clamping with transparent notes in response
Terminal Tool Updates (tools/terminal_tool.py):
- Replaced nohup background mode with registry spawn (returns session_id)
- Added workdir parameter for per-command working directory
- Added check_interval parameter for gateway auto-check watchers
- Added pty parameter for interactive CLI tools (Codex, Claude Code)
- Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs
- Cleanup thread now respects active background processes (won't reap sandbox)
Gateway Integration (gateway/run.py, session.py, config.py):
- Session reset protection: sessions with active processes exempt from reset
- Default idle timeout increased from 2 hours to 24 hours
- from_dict fallback aligned to match (was 120, now 1440)
- session_key env var propagated to process registry for session mapping
- Crash recovery on gateway startup via checkpoint probe
- check_interval watcher: asyncio task polls process, delivers updates to platform
RL Safety (environments/):
- tool_context.py cleanup() kills background processes on episode end
- hermes_base_env.py warns when enabled_toolsets is None (loads all tools)
- Process tool safe in RL via wait() blocking the agent loop
Also:
- Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all])
- Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP
- Updated AGENTS.md with process management docs and project structure
- Updated README.md terminal section with process management overview
2026-02-17 02:51:31 -08:00
2026-02-02 19:01:51 -08:00
connected_count = 0
2026-03-16 17:56:31 +05:30
enabled_platform_count = 0
2026-03-14 18:03:50 -07:00
startup_nonretryable_errors : list [ str ] = [ ]
2026-03-16 17:56:31 +05:30
startup_retryable_errors : list [ str ] = [ ]
2026-02-02 19:01:51 -08:00
# Initialize and connect each configured platform
for platform , platform_config in self . config . platforms . items ( ) :
if not platform_config . enabled :
continue
2026-03-16 17:56:31 +05:30
enabled_platform_count + = 1
2026-02-02 19:01:51 -08:00
adapter = self . _create_adapter ( platform , platform_config )
if not adapter :
2026-02-21 03:11:11 -08:00
logger . warning ( " No adapter available for %s " , platform . value )
2026-02-02 19:01:51 -08:00
continue
2026-03-14 18:03:50 -07:00
# Set up message + fatal error handlers
2026-02-02 19:01:51 -08:00
adapter . set_message_handler ( self . _handle_message )
2026-03-14 18:03:50 -07:00
adapter . set_fatal_error_handler ( self . _handle_adapter_fatal_error )
2026-02-02 19:01:51 -08:00
# Try to connect
2026-02-21 03:11:11 -08:00
logger . info ( " Connecting to %s ... " , platform . value )
2026-02-02 19:01:51 -08:00
try :
success = await adapter . connect ( )
if success :
self . adapters [ platform ] = adapter
2026-03-14 06:14:22 -07:00
self . _sync_voice_mode_state_to_adapter ( adapter )
2026-02-02 19:01:51 -08:00
connected_count + = 1
2026-02-21 03:11:11 -08:00
logger . info ( " ✓ %s connected " , platform . value )
2026-02-02 19:01:51 -08:00
else :
2026-02-21 03:11:11 -08:00
logger . warning ( " ✗ %s failed to connect " , platform . value )
2026-03-16 17:56:31 +05:30
if adapter . has_fatal_error :
target = (
startup_retryable_errors
if adapter . fatal_error_retryable
else startup_nonretryable_errors
)
target . append (
2026-03-14 18:03:50 -07:00
f " { platform . value } : { adapter . fatal_error_message } "
)
2026-03-22 23:48:24 -07:00
# Queue for reconnection if the error is retryable
if adapter . fatal_error_retryable :
self . _failed_platforms [ platform ] = {
" config " : platform_config ,
" attempts " : 1 ,
" next_retry " : time . monotonic ( ) + 30 ,
}
2026-03-16 17:56:31 +05:30
else :
startup_retryable_errors . append (
f " { platform . value } : failed to connect "
)
2026-03-22 23:48:24 -07:00
# No fatal error info means likely a transient issue — queue for retry
self . _failed_platforms [ platform ] = {
" config " : platform_config ,
" attempts " : 1 ,
" next_retry " : time . monotonic ( ) + 30 ,
}
2026-02-02 19:01:51 -08:00
except Exception as e :
2026-02-21 03:11:11 -08:00
logger . error ( " ✗ %s error: %s " , platform . value , e )
2026-03-16 17:56:31 +05:30
startup_retryable_errors . append ( f " { platform . value } : { e } " )
2026-03-22 23:48:24 -07:00
# Unexpected exceptions are typically transient — queue for retry
self . _failed_platforms [ platform ] = {
" config " : platform_config ,
" attempts " : 1 ,
" next_retry " : time . monotonic ( ) + 30 ,
}
2026-02-02 19:01:51 -08:00
if connected_count == 0 :
2026-03-14 18:03:50 -07:00
if startup_nonretryable_errors :
reason = " ; " . join ( startup_nonretryable_errors )
logger . error ( " Gateway hit a non-retryable startup conflict: %s " , reason )
try :
from gateway . status import write_runtime_status
write_runtime_status ( gateway_state = " startup_failed " , exit_reason = reason )
except Exception :
pass
self . _request_clean_exit ( reason )
return True
2026-03-16 17:56:31 +05:30
if enabled_platform_count > 0 :
reason = " ; " . join ( startup_retryable_errors ) or " all configured messaging platforms failed to connect "
logger . error ( " Gateway failed to connect any configured messaging platform: %s " , reason )
try :
from gateway . status import write_runtime_status
write_runtime_status ( gateway_state = " startup_failed " , exit_reason = reason )
except Exception :
pass
return False
logger . warning ( " No messaging platforms enabled. " )
2026-02-21 16:21:19 -08:00
logger . info ( " Gateway will continue running for cron job execution. " )
2026-02-02 19:01:51 -08:00
# Update delivery router with adapters
self . delivery_router . adapters = self . adapters
self . _running = True
2026-03-14 18:03:50 -07:00
try :
from gateway . status import write_runtime_status
write_runtime_status ( gateway_state = " running " , exit_reason = None )
except Exception :
pass
Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks
Major feature additions inspired by OpenClaw/ClawdBot integration analysis:
Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)
Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description
Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling
Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads
DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending
Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)
Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications
Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings
Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style
Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.
2026-02-15 21:38:59 -08:00
# Emit gateway:startup hook
hook_count = len ( self . hooks . loaded_hooks )
if hook_count :
2026-02-21 03:11:11 -08:00
logger . info ( " %s hook(s) loaded " , hook_count )
Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks
Major feature additions inspired by OpenClaw/ClawdBot integration analysis:
Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)
Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description
Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling
Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads
DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending
Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)
Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications
Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings
Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style
Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.
2026-02-15 21:38:59 -08:00
await self . hooks . emit ( " gateway:startup " , {
" platforms " : [ p . value for p in self . adapters . keys ( ) ] ,
} )
2026-02-21 16:21:19 -08:00
if connected_count > 0 :
logger . info ( " Gateway running with %s platform(s) " , connected_count )
2026-02-22 20:44:15 -08:00
# Build initial channel directory for send_message name resolution
try :
from gateway . channel_directory import build_channel_directory
directory = build_channel_directory ( self . adapters )
ch_count = sum ( len ( chs ) for chs in directory . get ( " platforms " , { } ) . values ( ) )
logger . info ( " Channel directory built: %d target(s) " , ch_count )
except Exception as e :
logger . warning ( " Channel directory build failed: %s " , e )
2026-03-11 20:43:31 +01:00
# Check if we're restarting after a /update command. If the update is
# still running, keep watching so we notify once it actually finishes.
notified = await self . _send_update_notification ( )
if not notified and any (
path . exists ( )
for path in (
_hermes_home / " .update_pending.json " ,
_hermes_home / " .update_pending.claimed.json " ,
)
) :
self . _schedule_update_notification_watch ( )
feat: add /update slash command for gateway platforms
Adds a /update command to Telegram, Discord, and other gateway platforms
that runs `hermes update` to pull the latest code, update dependencies,
sync skills, and restart the gateway.
Implementation:
- Spawns `hermes update` in a separate systemd scope (systemd-run --user
--scope) so the process survives the gateway restart that hermes update
triggers at the end. Falls back to nohup if systemd-run is unavailable.
- Writes a marker file (.update_pending.json) with the originating
platform and chat_id before spawning the update.
- On gateway startup, _send_update_notification() checks for the marker,
reads the captured update output, sends the results back to the user,
and cleans up.
Also:
- Registers /update as a Discord slash command
- Updates README.md, docs/messaging.md, docs/slash-commands.md
- Adds 18 tests covering handler, notification, and edge cases
2026-03-05 01:20:58 -08:00
2026-03-17 03:52:15 -07:00
# Drain any recovered process watchers (from crash recovery checkpoint)
try :
from tools . process_registry import process_registry
while process_registry . pending_watchers :
watcher = process_registry . pending_watchers . pop ( 0 )
asyncio . create_task ( self . _run_process_watcher ( watcher ) )
logger . info ( " Resumed watcher for recovered process %s " , watcher . get ( " session_id " ) )
except Exception as e :
logger . error ( " Recovered watcher setup error: %s " , e )
2026-03-07 11:27:50 -08:00
# Start background session expiry watcher for proactive memory flushing
asyncio . create_task ( self . _session_expiry_watcher ( ) )
2026-03-22 23:48:24 -07:00
# Start background reconnection watcher for platforms that failed at startup
if self . _failed_platforms :
logger . info (
" Starting reconnection watcher for %d failed platform(s): %s " ,
len ( self . _failed_platforms ) ,
" , " . join ( p . value for p in self . _failed_platforms ) ,
)
asyncio . create_task ( self . _platform_reconnect_watcher ( ) )
2026-02-21 03:11:11 -08:00
logger . info ( " Press Ctrl+C to stop " )
2026-02-02 19:01:51 -08:00
return True
2026-03-07 11:27:50 -08:00
async def _session_expiry_watcher ( self , interval : int = 300 ) :
""" Background task that proactively flushes memories for expired sessions.
Runs every ` interval ` seconds ( default 5 min ) . For each session that
has expired according to its reset policy , flushes memories in a thread
pool and marks the session so it won ' t be flushed again.
This means memories are already saved by the time the user sends their
next message , so there ' s no blocking delay.
"""
await asyncio . sleep ( 60 ) # initial delay — let the gateway fully start
while self . _running :
try :
self . session_store . _ensure_loaded ( )
for key , entry in list ( self . session_store . _entries . items ( ) ) :
if entry . session_id in self . session_store . _pre_flushed_sessions :
continue # already flushed this session
if not self . session_store . _is_session_expired ( entry ) :
continue # session still active
# Session has expired — flush memories in the background
logger . info (
" Session %s expired (key= %s ), flushing memories proactively " ,
entry . session_id , key ,
)
try :
2026-03-16 00:23:47 -07:00
await self . _async_flush_memories ( entry . session_id , key )
2026-03-10 02:06:17 -07:00
self . _shutdown_gateway_honcho ( key )
2026-03-07 11:27:50 -08:00
self . session_store . _pre_flushed_sessions . add ( entry . session_id )
except Exception as e :
logger . debug ( " Proactive memory flush failed for %s : %s " , entry . session_id , e )
except Exception as e :
logger . debug ( " Session expiry watcher error: %s " , e )
# Sleep in small increments so we can stop quickly
for _ in range ( interval ) :
if not self . _running :
break
await asyncio . sleep ( 1 )
2026-03-22 23:48:24 -07:00
async def _platform_reconnect_watcher ( self ) - > None :
""" Background task that periodically retries connecting failed platforms.
Uses exponential backoff : 30 s → 60 s → 120 s → 240 s → 300 s ( cap ) .
Stops retrying a platform after 20 failed attempts or if the error
is non - retryable ( e . g . bad auth token ) .
"""
_MAX_ATTEMPTS = 20
_BACKOFF_CAP = 300 # 5 minutes max between retries
await asyncio . sleep ( 10 ) # initial delay — let startup finish
while self . _running :
if not self . _failed_platforms :
# Nothing to reconnect — sleep and check again
for _ in range ( 30 ) :
if not self . _running :
return
await asyncio . sleep ( 1 )
continue
now = time . monotonic ( )
for platform in list ( self . _failed_platforms . keys ( ) ) :
if not self . _running :
return
info = self . _failed_platforms [ platform ]
if now < info [ " next_retry " ] :
continue # not time yet
if info [ " attempts " ] > = _MAX_ATTEMPTS :
logger . warning (
" Giving up reconnecting %s after %d attempts " ,
platform . value , info [ " attempts " ] ,
)
del self . _failed_platforms [ platform ]
continue
platform_config = info [ " config " ]
attempt = info [ " attempts " ] + 1
logger . info (
" Reconnecting %s (attempt %d / %d )... " ,
platform . value , attempt , _MAX_ATTEMPTS ,
)
try :
adapter = self . _create_adapter ( platform , platform_config )
if not adapter :
logger . warning (
" Reconnect %s : adapter creation returned None, removing from retry queue " ,
platform . value ,
)
del self . _failed_platforms [ platform ]
continue
adapter . set_message_handler ( self . _handle_message )
adapter . set_fatal_error_handler ( self . _handle_adapter_fatal_error )
success = await adapter . connect ( )
if success :
self . adapters [ platform ] = adapter
self . _sync_voice_mode_state_to_adapter ( adapter )
self . delivery_router . adapters = self . adapters
del self . _failed_platforms [ platform ]
logger . info ( " ✓ %s reconnected successfully " , platform . value )
# Rebuild channel directory with the new adapter
try :
from gateway . channel_directory import build_channel_directory
build_channel_directory ( self . adapters )
except Exception :
pass
else :
# Check if the failure is non-retryable
if adapter . has_fatal_error and not adapter . fatal_error_retryable :
logger . warning (
" Reconnect %s : non-retryable error ( %s ), removing from retry queue " ,
platform . value , adapter . fatal_error_message ,
)
del self . _failed_platforms [ platform ]
else :
backoff = min ( 30 * ( 2 * * ( attempt - 1 ) ) , _BACKOFF_CAP )
info [ " attempts " ] = attempt
info [ " next_retry " ] = time . monotonic ( ) + backoff
logger . info (
" Reconnect %s failed, next retry in %d s " ,
platform . value , backoff ,
)
except Exception as e :
backoff = min ( 30 * ( 2 * * ( attempt - 1 ) ) , _BACKOFF_CAP )
info [ " attempts " ] = attempt
info [ " next_retry " ] = time . monotonic ( ) + backoff
logger . warning (
" Reconnect %s error: %s , next retry in %d s " ,
platform . value , e , backoff ,
)
# Check every 10 seconds for platforms that need reconnection
for _ in range ( 10 ) :
if not self . _running :
return
await asyncio . sleep ( 1 )
2026-02-02 19:01:51 -08:00
async def stop ( self ) - > None :
""" Stop the gateway and disconnect all adapters. """
2026-02-21 03:11:11 -08:00
logger . info ( " Stopping gateway... " )
2026-02-02 19:01:51 -08:00
self . _running = False
2026-03-15 04:21:50 -07:00
for session_key , agent in list ( self . _running_agents . items ( ) ) :
2026-03-19 18:26:09 -07:00
if agent is _AGENT_PENDING_SENTINEL :
continue
2026-03-15 04:21:50 -07:00
try :
agent . interrupt ( " Gateway shutting down " )
logger . debug ( " Interrupted running agent for session %s during shutdown " , session_key [ : 20 ] )
except Exception as e :
logger . debug ( " Failed interrupting agent during shutdown: %s " , e )
2026-03-13 16:59:03 +03:00
for platform , adapter in list ( self . adapters . items ( ) ) :
2026-03-15 04:21:50 -07:00
try :
await adapter . cancel_background_tasks ( )
except Exception as e :
logger . debug ( " ✗ %s background-task cancel error: %s " , platform . value , e )
2026-02-02 19:01:51 -08:00
try :
await adapter . disconnect ( )
2026-02-21 03:11:11 -08:00
logger . info ( " ✓ %s disconnected " , platform . value )
2026-02-02 19:01:51 -08:00
except Exception as e :
2026-02-21 03:11:11 -08:00
logger . error ( " ✗ %s disconnect error: %s " , platform . value , e )
2026-03-10 02:06:17 -07:00
2026-03-26 14:33:48 -07:00
# Cancel any pending background tasks
for _task in list ( self . _background_tasks ) :
_task . cancel ( )
self . _background_tasks . clear ( )
2026-02-02 19:01:51 -08:00
self . adapters . clear ( )
2026-03-15 04:21:50 -07:00
self . _running_agents . clear ( )
self . _pending_messages . clear ( )
self . _pending_approvals . clear ( )
2026-03-10 02:06:17 -07:00
self . _shutdown_all_gateway_honcho ( )
2026-02-02 19:01:51 -08:00
self . _shutdown_event . set ( )
2026-02-22 20:44:15 -08:00
2026-03-14 18:03:50 -07:00
from gateway . status import remove_pid_file , write_runtime_status
2026-02-22 20:44:15 -08:00
remove_pid_file ( )
2026-03-14 18:03:50 -07:00
try :
write_runtime_status ( gateway_state = " stopped " , exit_reason = self . _exit_reason )
except Exception :
pass
2026-02-22 20:44:15 -08:00
2026-02-21 03:11:11 -08:00
logger . info ( " Gateway stopped " )
2026-02-02 19:01:51 -08:00
async def wait_for_shutdown ( self ) - > None :
""" Wait for shutdown signal. """
await self . _shutdown_event . wait ( )
def _create_adapter (
self ,
platform : Platform ,
config : Any
) - > Optional [ BasePlatformAdapter ] :
""" Create the appropriate adapter for a platform. """
2026-03-16 00:22:23 -07:00
if hasattr ( config , " extra " ) and isinstance ( config . extra , dict ) :
config . extra . setdefault (
" group_sessions_per_user " ,
self . config . group_sessions_per_user ,
)
2026-02-02 19:01:51 -08:00
if platform == Platform . TELEGRAM :
from gateway . platforms . telegram import TelegramAdapter , check_telegram_requirements
if not check_telegram_requirements ( ) :
2026-02-21 03:11:11 -08:00
logger . warning ( " Telegram: python-telegram-bot not installed " )
2026-02-02 19:01:51 -08:00
return None
return TelegramAdapter ( config )
elif platform == Platform . DISCORD :
from gateway . platforms . discord import DiscordAdapter , check_discord_requirements
if not check_discord_requirements ( ) :
2026-02-21 03:11:11 -08:00
logger . warning ( " Discord: discord.py not installed " )
2026-02-02 19:01:51 -08:00
return None
return DiscordAdapter ( config )
elif platform == Platform . WHATSAPP :
from gateway . platforms . whatsapp import WhatsAppAdapter , check_whatsapp_requirements
if not check_whatsapp_requirements ( ) :
2026-02-21 03:11:11 -08:00
logger . warning ( " WhatsApp: Node.js not installed or bridge not configured " )
2026-02-02 19:01:51 -08:00
return None
return WhatsAppAdapter ( config )
Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks
Major feature additions inspired by OpenClaw/ClawdBot integration analysis:
Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)
Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description
Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling
Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads
DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending
Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)
Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications
Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings
Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style
Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.
2026-02-15 21:38:59 -08:00
elif platform == Platform . SLACK :
from gateway . platforms . slack import SlackAdapter , check_slack_requirements
if not check_slack_requirements ( ) :
2026-02-21 03:11:11 -08:00
logger . warning ( " Slack: slack-bolt not installed. Run: pip install ' hermes-agent[slack] ' " )
Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks
Major feature additions inspired by OpenClaw/ClawdBot integration analysis:
Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)
Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description
Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling
Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads
DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending
Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)
Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications
Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings
Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style
Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.
2026-02-15 21:38:59 -08:00
return None
return SlackAdapter ( config )
2026-02-28 13:32:48 +03:00
feat: add Signal messenger gateway platform (#405)
Complete Signal adapter using signal-cli daemon HTTP API.
Based on PR #268 by ibhagwan, rebuilt on current main with bug fixes.
Architecture:
- SSE streaming for inbound messages with exponential backoff (2s→60s)
- JSON-RPC 2.0 for outbound (send, typing, attachments, contacts)
- Health monitor detects stale SSE connections (120s threshold)
- Phone number redaction in all logs and global redact.py
Features:
- DM and group message support with separate access policies
- DM policies: pairing (default), allowlist, open
- Group policies: disabled (default), allowlist, open
- Attachment download with magic-byte type detection
- Typing indicators (8s refresh interval)
- 100MB attachment size limit, 8000 char message limit
- E.164 phone + UUID allowlist support
Integration:
- Platform.SIGNAL enum in gateway/config.py
- Signal in _is_user_authorized() allowlist maps (gateway/run.py)
- Adapter factory in _create_adapter() (gateway/run.py)
- user_id_alt/chat_id_alt fields in SessionSource for UUIDs
- send_message tool support via httpx JSON-RPC (not aiohttp)
- Interactive setup wizard in 'hermes gateway setup'
- Connectivity testing during setup (pings /api/v1/check)
- signal-cli detection and install guidance
Bug fixes from PR #268:
- Timestamp reads from envelope_data (not outer wrapper)
- Uses httpx consistently (not aiohttp in send_message tool)
- SIGNAL_DEBUG scoped to signal logger (not root)
- extract_images regex NOT modified (preserves group numbering)
- pairing.py NOT modified (no cross-platform side effects)
- No dual authorization (adapter defers to run.py for user auth)
- Wildcard uses set membership ('*' in set, not list equality)
- .zip default for PK magic bytes (not .docx)
No new Python dependencies — uses httpx (already core).
External requirement: signal-cli daemon (user-installed).
Tests: 30 new tests covering config, init, helpers, session source,
phone redaction, authorization, and send_message integration.
Co-authored-by: ibhagwan <ibhagwan@users.noreply.github.com>
2026-03-08 20:20:35 -07:00
elif platform == Platform . SIGNAL :
from gateway . platforms . signal import SignalAdapter , check_signal_requirements
if not check_signal_requirements ( ) :
logger . warning ( " Signal: SIGNAL_HTTP_URL or SIGNAL_ACCOUNT not configured " )
return None
return SignalAdapter ( config )
2026-02-28 13:32:48 +03:00
elif platform == Platform . HOMEASSISTANT :
from gateway . platforms . homeassistant import HomeAssistantAdapter , check_ha_requirements
if not check_ha_requirements ( ) :
logger . warning ( " HomeAssistant: aiohttp not installed or HASS_TOKEN not set " )
return None
return HomeAssistantAdapter ( config )
feat: add email gateway platform (IMAP/SMTP)
Allow users to interact with Hermes by sending and receiving emails.
Uses IMAP polling for incoming messages and SMTP for replies with
proper threading (In-Reply-To, References headers).
Integrates with all 14 gateway extension points: config, adapter
factory, authorization, send_message tool, cron delivery, toolsets,
prompt hints, channel directory, setup wizard, status display, and
env example.
65 tests covering config, parsing, dispatch, threading, IMAP fetch,
SMTP send, attachments, and all integration points.
2026-03-10 03:15:38 +03:00
elif platform == Platform . EMAIL :
from gateway . platforms . email import EmailAdapter , check_email_requirements
if not check_email_requirements ( ) :
logger . warning ( " Email: EMAIL_ADDRESS, EMAIL_PASSWORD, EMAIL_IMAP_HOST, or EMAIL_SMTP_HOST not set " )
return None
return EmailAdapter ( config )
feat: add SMS (Telnyx) platform adapter
Implement SMS as a first-class messaging platform following
ADDING_A_PLATFORM.md checklist. All 16 integration points covered:
- gateway/platforms/sms.py: Core adapter with aiohttp webhook server,
Telnyx REST API send, markdown stripping, 1600-char chunking,
echo loop prevention, multi-number reply-from tracking
- gateway/config.py: Platform.SMS enum + env override block
- gateway/run.py: Adapter factory + auth maps (SMS_ALLOWED_USERS,
SMS_ALLOW_ALL_USERS)
- toolsets.py: hermes-sms toolset + included in hermes-gateway
- cron/scheduler.py: SMS in platform_map for cron delivery
- tools/send_message_tool.py: SMS routing + _send_sms() standalone sender
- tools/cronjob_tools.py: 'sms' in deliver description
- gateway/channel_directory.py: SMS in session-based discovery
- agent/prompt_builder.py: SMS platform hint (plain text, concise)
- hermes_cli/status.py: SMS in platforms status display
- hermes_cli/gateway.py: SMS in setup wizard with Telnyx instructions
- pyproject.toml: sms optional dependency group (aiohttp>=3.9.0)
- tests/gateway/test_sms.py: Unit tests for config, format, truncate,
echo prevention, requirements, toolset integration
Co-authored-by: sunsakis <teo@sunsakis.com>
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-17 02:52:34 -07:00
elif platform == Platform . SMS :
from gateway . platforms . sms import SmsAdapter , check_sms_requirements
if not check_sms_requirements ( ) :
feat: add SMS (Twilio) platform adapter
Add SMS as a first-class messaging platform via the Twilio API.
Shares credentials with the existing telephony skill — same
TWILIO_ACCOUNT_SID, TWILIO_AUTH_TOKEN, TWILIO_PHONE_NUMBER env vars.
Adapter (gateway/platforms/sms.py):
- aiohttp webhook server for inbound (Twilio form-encoded POSTs)
- Twilio REST API with Basic auth for outbound
- Markdown stripping, smart chunking at 1600 chars
- Echo loop prevention, phone number redaction in logs
Integration (13 files):
- gateway config, run, channel_directory
- agent prompt_builder (SMS platform hint)
- cron scheduler, cronjob tools
- send_message_tool (_send_sms via Twilio API)
- toolsets (hermes-sms + hermes-gateway)
- gateway setup wizard, status display
- pyproject.toml (sms optional extra)
- 21 tests
Docs:
- website/docs/user-guide/messaging/sms.md (full setup guide)
- Updated messaging index (architecture, toolsets, security, links)
- Updated environment-variables.md reference
Inspired by PR #1575 (@sunsakis), rewritten for Twilio.
2026-03-17 03:14:53 -07:00
logger . warning ( " SMS: aiohttp not installed or TWILIO_ACCOUNT_SID/TWILIO_AUTH_TOKEN not set " )
feat: add SMS (Telnyx) platform adapter
Implement SMS as a first-class messaging platform following
ADDING_A_PLATFORM.md checklist. All 16 integration points covered:
- gateway/platforms/sms.py: Core adapter with aiohttp webhook server,
Telnyx REST API send, markdown stripping, 1600-char chunking,
echo loop prevention, multi-number reply-from tracking
- gateway/config.py: Platform.SMS enum + env override block
- gateway/run.py: Adapter factory + auth maps (SMS_ALLOWED_USERS,
SMS_ALLOW_ALL_USERS)
- toolsets.py: hermes-sms toolset + included in hermes-gateway
- cron/scheduler.py: SMS in platform_map for cron delivery
- tools/send_message_tool.py: SMS routing + _send_sms() standalone sender
- tools/cronjob_tools.py: 'sms' in deliver description
- gateway/channel_directory.py: SMS in session-based discovery
- agent/prompt_builder.py: SMS platform hint (plain text, concise)
- hermes_cli/status.py: SMS in platforms status display
- hermes_cli/gateway.py: SMS in setup wizard with Telnyx instructions
- pyproject.toml: sms optional dependency group (aiohttp>=3.9.0)
- tests/gateway/test_sms.py: Unit tests for config, format, truncate,
echo prevention, requirements, toolset integration
Co-authored-by: sunsakis <teo@sunsakis.com>
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-17 02:52:34 -07:00
return None
return SmsAdapter ( config )
2026-03-17 03:04:58 -07:00
elif platform == Platform . DINGTALK :
from gateway . platforms . dingtalk import DingTalkAdapter , check_dingtalk_requirements
if not check_dingtalk_requirements ( ) :
logger . warning ( " DingTalk: dingtalk-stream not installed or DINGTALK_CLIENT_ID/SECRET not set " )
return None
return DingTalkAdapter ( config )
2026-03-17 02:59:36 -07:00
elif platform == Platform . MATTERMOST :
from gateway . platforms . mattermost import MattermostAdapter , check_mattermost_requirements
if not check_mattermost_requirements ( ) :
logger . warning ( " Mattermost: MATTERMOST_TOKEN or MATTERMOST_URL not set, or aiohttp missing " )
return None
return MattermostAdapter ( config )
elif platform == Platform . MATRIX :
from gateway . platforms . matrix import MatrixAdapter , check_matrix_requirements
if not check_matrix_requirements ( ) :
logger . warning ( " Matrix: matrix-nio not installed or credentials not set. Run: pip install ' matrix-nio[e2e] ' " )
return None
return MatrixAdapter ( config )
feat: OpenAI-compatible API server + WhatsApp configurable reply prefix (#1756)
* feat: OpenAI-compatible API server platform adapter
Salvaged from PR #956, updated for current main.
Adds an HTTP API server as a gateway platform adapter that exposes
hermes-agent via the OpenAI Chat Completions and Responses APIs.
Any OpenAI-compatible frontend (Open WebUI, LobeChat, LibreChat,
AnythingLLM, NextChat, ChatBox, etc.) can connect by pointing at
http://localhost:8642/v1.
Endpoints:
- POST /v1/chat/completions — stateless Chat Completions API
- POST /v1/responses — stateful Responses API with chaining
- GET /v1/responses/{id} — retrieve stored response
- DELETE /v1/responses/{id} — delete stored response
- GET /v1/models — list hermes-agent as available model
- GET /health — health check
Features:
- Real SSE streaming via stream_delta_callback (uses main's streaming)
- In-memory LRU response store for Responses API conversation chaining
- Named conversations via 'conversation' parameter
- Bearer token auth (optional, via API_SERVER_KEY)
- CORS support for browser-based frontends
- System prompt layering (frontend system messages on top of core)
- Real token usage tracking in responses
Integration points:
- Platform.API_SERVER in gateway/config.py
- _create_adapter() branch in gateway/run.py
- API_SERVER_* env vars in hermes_cli/config.py
- Env var overrides in gateway/config.py _apply_env_overrides()
Changes vs original PR #956:
- Removed streaming infrastructure (already on main via stream_consumer.py)
- Removed Telegram reply_to_mode (separate feature, not included)
- Updated _resolve_model() -> _resolve_gateway_model()
- Updated stream_callback -> stream_delta_callback
- Updated connect()/disconnect() to use _mark_connected()/_mark_disconnected()
- Adapted to current Platform enum (includes MATTERMOST, MATRIX, DINGTALK)
Tests: 72 new tests, all passing
Docs: API server guide, Open WebUI integration guide, env var reference
* feat(whatsapp): make reply prefix configurable via config.yaml
Reworked from PR #1764 (ifrederico) to use config.yaml instead of .env.
The WhatsApp bridge prepends a header to every outgoing message.
This was hardcoded to '⚕ *Hermes Agent*'. Users can now customize
or disable it via config.yaml:
whatsapp:
reply_prefix: '' # disable header
reply_prefix: '🤖 *My Bot*\n───\n' # custom prefix
How it works:
- load_gateway_config() reads whatsapp.reply_prefix from config.yaml
and stores it in PlatformConfig.extra['reply_prefix']
- WhatsAppAdapter reads it from config.extra at init
- When spawning bridge.js, the adapter passes it as
WHATSAPP_REPLY_PREFIX in the subprocess environment
- bridge.js handles undefined (default), empty (no header),
or custom values with \\n escape support
- Self-chat echo suppression uses the configured prefix
Also fixes _config_version: was 9 but ENV_VARS_BY_VERSION had a
key 10 (TAVILY_API_KEY), so existing users at v9 would never be
prompted for Tavily. Bumped to 10 to close the gap. Added a
regression test to prevent this from happening again.
Credit: ifrederico (PR #1764) for the bridge.js implementation
and the config version gap discovery.
---------
Co-authored-by: Test <test@test.com>
2026-03-17 10:44:37 -07:00
elif platform == Platform . API_SERVER :
from gateway . platforms . api_server import APIServerAdapter , check_api_server_requirements
if not check_api_server_requirements ( ) :
logger . warning ( " API Server: aiohttp not installed " )
return None
return APIServerAdapter ( config )
feat(gateway): add webhook platform adapter for external event triggers
Add a generic webhook platform adapter that receives HTTP POSTs from
external services (GitHub, GitLab, JIRA, Stripe, etc.), validates HMAC
signatures, transforms payloads into agent prompts, and routes responses
back to the source or to another platform.
Features:
- Configurable routes with per-route HMAC secrets, event filters,
prompt templates with dot-notation payload access, skill loading,
and pluggable delivery (github_comment, telegram, discord, log)
- HMAC signature validation (GitHub SHA-256, GitLab token, generic)
- Rate limiting (30 req/min per route, configurable)
- Idempotency cache (1hr TTL, prevents duplicate runs on retries)
- Body size limits (1MB default, checked before reading payload)
- Setup wizard integration with security warnings and docs links
- 33 tests (29 unit + 4 integration), all passing
Security:
- HMAC secret required per route (startup validation)
- Setup wizard warns about internet exposure for webhook/SMS platforms
- Sandboxing (Docker/VM) recommended in docs for public-facing deployments
Files changed:
- gateway/config.py — Platform.WEBHOOK enum + env var overrides
- gateway/platforms/webhook.py — WebhookAdapter (~420 lines)
- gateway/run.py — factory wiring + auth bypass for webhook events
- hermes_cli/config.py — WEBHOOK_* env var definitions
- hermes_cli/setup.py — webhook section in setup_gateway()
- tests/gateway/test_webhook_adapter.py — 29 unit tests
- tests/gateway/test_webhook_integration.py — 4 integration tests
- website/docs/user-guide/messaging/webhooks.md — full user docs
- website/docs/reference/environment-variables.md — WEBHOOK_* vars
- website/sidebars.ts — nav entry
2026-03-20 06:33:36 -07:00
elif platform == Platform . WEBHOOK :
from gateway . platforms . webhook import WebhookAdapter , check_webhook_requirements
if not check_webhook_requirements ( ) :
logger . warning ( " Webhook: aiohttp not installed " )
return None
adapter = WebhookAdapter ( config )
adapter . gateway_runner = self # For cross-platform delivery
return adapter
2026-02-02 19:01:51 -08:00
return None
2026-02-03 10:46:23 -08:00
def _is_user_authorized ( self , source : SessionSource ) - > bool :
"""
Check if a user is authorized to use the bot .
Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks
Major feature additions inspired by OpenClaw/ClawdBot integration analysis:
Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)
Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description
Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling
Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads
DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending
Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)
Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications
Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings
Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style
Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.
2026-02-15 21:38:59 -08:00
Checks in order :
2026-02-22 16:32:08 -08:00
1. Per - platform allow - all flag ( e . g . , DISCORD_ALLOW_ALL_USERS = true )
2. Environment variable allowlists ( TELEGRAM_ALLOWED_USERS , etc . )
3. DM pairing approved list
4. Global allow - all ( GATEWAY_ALLOW_ALL_USERS = true )
5. Default : deny
2026-02-03 10:46:23 -08:00
"""
2026-02-28 15:12:18 +03:00
# Home Assistant events are system-generated (state changes), not
# user-initiated messages. The HASS_TOKEN already authenticates the
# connection, so HA events are always authorized.
feat(gateway): add webhook platform adapter for external event triggers
Add a generic webhook platform adapter that receives HTTP POSTs from
external services (GitHub, GitLab, JIRA, Stripe, etc.), validates HMAC
signatures, transforms payloads into agent prompts, and routes responses
back to the source or to another platform.
Features:
- Configurable routes with per-route HMAC secrets, event filters,
prompt templates with dot-notation payload access, skill loading,
and pluggable delivery (github_comment, telegram, discord, log)
- HMAC signature validation (GitHub SHA-256, GitLab token, generic)
- Rate limiting (30 req/min per route, configurable)
- Idempotency cache (1hr TTL, prevents duplicate runs on retries)
- Body size limits (1MB default, checked before reading payload)
- Setup wizard integration with security warnings and docs links
- 33 tests (29 unit + 4 integration), all passing
Security:
- HMAC secret required per route (startup validation)
- Setup wizard warns about internet exposure for webhook/SMS platforms
- Sandboxing (Docker/VM) recommended in docs for public-facing deployments
Files changed:
- gateway/config.py — Platform.WEBHOOK enum + env var overrides
- gateway/platforms/webhook.py — WebhookAdapter (~420 lines)
- gateway/run.py — factory wiring + auth bypass for webhook events
- hermes_cli/config.py — WEBHOOK_* env var definitions
- hermes_cli/setup.py — webhook section in setup_gateway()
- tests/gateway/test_webhook_adapter.py — 29 unit tests
- tests/gateway/test_webhook_integration.py — 4 integration tests
- website/docs/user-guide/messaging/webhooks.md — full user docs
- website/docs/reference/environment-variables.md — WEBHOOK_* vars
- website/sidebars.ts — nav entry
2026-03-20 06:33:36 -07:00
# Webhook events are authenticated via HMAC signature validation in
# the adapter itself — no user allowlist applies.
if source . platform in ( Platform . HOMEASSISTANT , Platform . WEBHOOK ) :
2026-02-28 15:12:18 +03:00
return True
2026-02-03 10:46:23 -08:00
user_id = source . user_id
if not user_id :
2026-02-22 16:32:08 -08:00
return False
2026-02-03 10:46:23 -08:00
platform_env_map = {
Platform . TELEGRAM : " TELEGRAM_ALLOWED_USERS " ,
Platform . DISCORD : " DISCORD_ALLOWED_USERS " ,
Platform . WHATSAPP : " WHATSAPP_ALLOWED_USERS " ,
Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks
Major feature additions inspired by OpenClaw/ClawdBot integration analysis:
Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)
Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description
Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling
Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads
DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending
Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)
Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications
Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings
Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style
Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.
2026-02-15 21:38:59 -08:00
Platform . SLACK : " SLACK_ALLOWED_USERS " ,
feat: add Signal messenger gateway platform (#405)
Complete Signal adapter using signal-cli daemon HTTP API.
Based on PR #268 by ibhagwan, rebuilt on current main with bug fixes.
Architecture:
- SSE streaming for inbound messages with exponential backoff (2s→60s)
- JSON-RPC 2.0 for outbound (send, typing, attachments, contacts)
- Health monitor detects stale SSE connections (120s threshold)
- Phone number redaction in all logs and global redact.py
Features:
- DM and group message support with separate access policies
- DM policies: pairing (default), allowlist, open
- Group policies: disabled (default), allowlist, open
- Attachment download with magic-byte type detection
- Typing indicators (8s refresh interval)
- 100MB attachment size limit, 8000 char message limit
- E.164 phone + UUID allowlist support
Integration:
- Platform.SIGNAL enum in gateway/config.py
- Signal in _is_user_authorized() allowlist maps (gateway/run.py)
- Adapter factory in _create_adapter() (gateway/run.py)
- user_id_alt/chat_id_alt fields in SessionSource for UUIDs
- send_message tool support via httpx JSON-RPC (not aiohttp)
- Interactive setup wizard in 'hermes gateway setup'
- Connectivity testing during setup (pings /api/v1/check)
- signal-cli detection and install guidance
Bug fixes from PR #268:
- Timestamp reads from envelope_data (not outer wrapper)
- Uses httpx consistently (not aiohttp in send_message tool)
- SIGNAL_DEBUG scoped to signal logger (not root)
- extract_images regex NOT modified (preserves group numbering)
- pairing.py NOT modified (no cross-platform side effects)
- No dual authorization (adapter defers to run.py for user auth)
- Wildcard uses set membership ('*' in set, not list equality)
- .zip default for PK magic bytes (not .docx)
No new Python dependencies — uses httpx (already core).
External requirement: signal-cli daemon (user-installed).
Tests: 30 new tests covering config, init, helpers, session source,
phone redaction, authorization, and send_message integration.
Co-authored-by: ibhagwan <ibhagwan@users.noreply.github.com>
2026-03-08 20:20:35 -07:00
Platform . SIGNAL : " SIGNAL_ALLOWED_USERS " ,
feat: add email gateway platform (IMAP/SMTP)
Allow users to interact with Hermes by sending and receiving emails.
Uses IMAP polling for incoming messages and SMTP for replies with
proper threading (In-Reply-To, References headers).
Integrates with all 14 gateway extension points: config, adapter
factory, authorization, send_message tool, cron delivery, toolsets,
prompt hints, channel directory, setup wizard, status display, and
env example.
65 tests covering config, parsing, dispatch, threading, IMAP fetch,
SMTP send, attachments, and all integration points.
2026-03-10 03:15:38 +03:00
Platform . EMAIL : " EMAIL_ALLOWED_USERS " ,
feat: add SMS (Telnyx) platform adapter
Implement SMS as a first-class messaging platform following
ADDING_A_PLATFORM.md checklist. All 16 integration points covered:
- gateway/platforms/sms.py: Core adapter with aiohttp webhook server,
Telnyx REST API send, markdown stripping, 1600-char chunking,
echo loop prevention, multi-number reply-from tracking
- gateway/config.py: Platform.SMS enum + env override block
- gateway/run.py: Adapter factory + auth maps (SMS_ALLOWED_USERS,
SMS_ALLOW_ALL_USERS)
- toolsets.py: hermes-sms toolset + included in hermes-gateway
- cron/scheduler.py: SMS in platform_map for cron delivery
- tools/send_message_tool.py: SMS routing + _send_sms() standalone sender
- tools/cronjob_tools.py: 'sms' in deliver description
- gateway/channel_directory.py: SMS in session-based discovery
- agent/prompt_builder.py: SMS platform hint (plain text, concise)
- hermes_cli/status.py: SMS in platforms status display
- hermes_cli/gateway.py: SMS in setup wizard with Telnyx instructions
- pyproject.toml: sms optional dependency group (aiohttp>=3.9.0)
- tests/gateway/test_sms.py: Unit tests for config, format, truncate,
echo prevention, requirements, toolset integration
Co-authored-by: sunsakis <teo@sunsakis.com>
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-17 02:52:34 -07:00
Platform . SMS : " SMS_ALLOWED_USERS " ,
2026-03-17 02:59:36 -07:00
Platform . MATTERMOST : " MATTERMOST_ALLOWED_USERS " ,
Platform . MATRIX : " MATRIX_ALLOWED_USERS " ,
2026-03-17 03:19:45 -07:00
Platform . DINGTALK : " DINGTALK_ALLOWED_USERS " ,
2026-02-03 10:46:23 -08:00
}
2026-02-22 16:32:08 -08:00
platform_allow_all_map = {
Platform . TELEGRAM : " TELEGRAM_ALLOW_ALL_USERS " ,
Platform . DISCORD : " DISCORD_ALLOW_ALL_USERS " ,
Platform . WHATSAPP : " WHATSAPP_ALLOW_ALL_USERS " ,
Platform . SLACK : " SLACK_ALLOW_ALL_USERS " ,
feat: add Signal messenger gateway platform (#405)
Complete Signal adapter using signal-cli daemon HTTP API.
Based on PR #268 by ibhagwan, rebuilt on current main with bug fixes.
Architecture:
- SSE streaming for inbound messages with exponential backoff (2s→60s)
- JSON-RPC 2.0 for outbound (send, typing, attachments, contacts)
- Health monitor detects stale SSE connections (120s threshold)
- Phone number redaction in all logs and global redact.py
Features:
- DM and group message support with separate access policies
- DM policies: pairing (default), allowlist, open
- Group policies: disabled (default), allowlist, open
- Attachment download with magic-byte type detection
- Typing indicators (8s refresh interval)
- 100MB attachment size limit, 8000 char message limit
- E.164 phone + UUID allowlist support
Integration:
- Platform.SIGNAL enum in gateway/config.py
- Signal in _is_user_authorized() allowlist maps (gateway/run.py)
- Adapter factory in _create_adapter() (gateway/run.py)
- user_id_alt/chat_id_alt fields in SessionSource for UUIDs
- send_message tool support via httpx JSON-RPC (not aiohttp)
- Interactive setup wizard in 'hermes gateway setup'
- Connectivity testing during setup (pings /api/v1/check)
- signal-cli detection and install guidance
Bug fixes from PR #268:
- Timestamp reads from envelope_data (not outer wrapper)
- Uses httpx consistently (not aiohttp in send_message tool)
- SIGNAL_DEBUG scoped to signal logger (not root)
- extract_images regex NOT modified (preserves group numbering)
- pairing.py NOT modified (no cross-platform side effects)
- No dual authorization (adapter defers to run.py for user auth)
- Wildcard uses set membership ('*' in set, not list equality)
- .zip default for PK magic bytes (not .docx)
No new Python dependencies — uses httpx (already core).
External requirement: signal-cli daemon (user-installed).
Tests: 30 new tests covering config, init, helpers, session source,
phone redaction, authorization, and send_message integration.
Co-authored-by: ibhagwan <ibhagwan@users.noreply.github.com>
2026-03-08 20:20:35 -07:00
Platform . SIGNAL : " SIGNAL_ALLOW_ALL_USERS " ,
feat: add email gateway platform (IMAP/SMTP)
Allow users to interact with Hermes by sending and receiving emails.
Uses IMAP polling for incoming messages and SMTP for replies with
proper threading (In-Reply-To, References headers).
Integrates with all 14 gateway extension points: config, adapter
factory, authorization, send_message tool, cron delivery, toolsets,
prompt hints, channel directory, setup wizard, status display, and
env example.
65 tests covering config, parsing, dispatch, threading, IMAP fetch,
SMTP send, attachments, and all integration points.
2026-03-10 03:15:38 +03:00
Platform . EMAIL : " EMAIL_ALLOW_ALL_USERS " ,
feat: add SMS (Telnyx) platform adapter
Implement SMS as a first-class messaging platform following
ADDING_A_PLATFORM.md checklist. All 16 integration points covered:
- gateway/platforms/sms.py: Core adapter with aiohttp webhook server,
Telnyx REST API send, markdown stripping, 1600-char chunking,
echo loop prevention, multi-number reply-from tracking
- gateway/config.py: Platform.SMS enum + env override block
- gateway/run.py: Adapter factory + auth maps (SMS_ALLOWED_USERS,
SMS_ALLOW_ALL_USERS)
- toolsets.py: hermes-sms toolset + included in hermes-gateway
- cron/scheduler.py: SMS in platform_map for cron delivery
- tools/send_message_tool.py: SMS routing + _send_sms() standalone sender
- tools/cronjob_tools.py: 'sms' in deliver description
- gateway/channel_directory.py: SMS in session-based discovery
- agent/prompt_builder.py: SMS platform hint (plain text, concise)
- hermes_cli/status.py: SMS in platforms status display
- hermes_cli/gateway.py: SMS in setup wizard with Telnyx instructions
- pyproject.toml: sms optional dependency group (aiohttp>=3.9.0)
- tests/gateway/test_sms.py: Unit tests for config, format, truncate,
echo prevention, requirements, toolset integration
Co-authored-by: sunsakis <teo@sunsakis.com>
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-17 02:52:34 -07:00
Platform . SMS : " SMS_ALLOW_ALL_USERS " ,
2026-03-17 02:59:36 -07:00
Platform . MATTERMOST : " MATTERMOST_ALLOW_ALL_USERS " ,
Platform . MATRIX : " MATRIX_ALLOW_ALL_USERS " ,
2026-03-17 03:19:45 -07:00
Platform . DINGTALK : " DINGTALK_ALLOW_ALL_USERS " ,
2026-02-22 16:32:08 -08:00
}
# Per-platform allow-all flag (e.g., DISCORD_ALLOW_ALL_USERS=true)
platform_allow_all_var = platform_allow_all_map . get ( source . platform , " " )
if platform_allow_all_var and os . getenv ( platform_allow_all_var , " " ) . lower ( ) in ( " true " , " 1 " , " yes " ) :
return True
Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks
Major feature additions inspired by OpenClaw/ClawdBot integration analysis:
Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)
Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description
Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling
Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads
DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending
Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)
Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications
Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings
Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style
Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.
2026-02-15 21:38:59 -08:00
# Check pairing store (always checked, regardless of allowlists)
platform_name = source . platform . value if source . platform else " "
if self . pairing_store . is_approved ( platform_name , user_id ) :
return True
2026-02-22 16:32:08 -08:00
# Check platform-specific and global allowlists
platform_allowlist = os . getenv ( platform_env_map . get ( source . platform , " " ) , " " ) . strip ( )
global_allowlist = os . getenv ( " GATEWAY_ALLOWED_USERS " , " " ) . strip ( )
2026-02-03 10:46:23 -08:00
if not platform_allowlist and not global_allowlist :
2026-02-22 16:32:08 -08:00
# No allowlists configured -- check global allow-all flag
return os . getenv ( " GATEWAY_ALLOW_ALL_USERS " , " " ) . lower ( ) in ( " true " , " 1 " , " yes " )
2026-02-03 10:46:23 -08:00
# Check if user is in any allowlist
allowed_ids = set ( )
if platform_allowlist :
2026-02-22 16:32:08 -08:00
allowed_ids . update ( uid . strip ( ) for uid in platform_allowlist . split ( " , " ) if uid . strip ( ) )
2026-02-03 10:46:23 -08:00
if global_allowlist :
2026-02-22 16:32:08 -08:00
allowed_ids . update ( uid . strip ( ) for uid in global_allowlist . split ( " , " ) if uid . strip ( ) )
2026-02-25 21:04:36 -08:00
# WhatsApp JIDs have @s.whatsapp.net suffix — strip it for comparison
check_ids = { user_id }
if " @ " in user_id :
check_ids . add ( user_id . split ( " @ " ) [ 0 ] )
return bool ( check_ids & allowed_ids )
2026-03-18 04:06:08 -07:00
def _get_unauthorized_dm_behavior ( self , platform : Optional [ Platform ] ) - > str :
""" Return how unauthorized DMs should be handled for a platform. """
config = getattr ( self , " config " , None )
if config and hasattr ( config , " get_unauthorized_dm_behavior " ) :
return config . get_unauthorized_dm_behavior ( platform )
return " pair "
2026-02-03 10:46:23 -08:00
2026-02-02 19:01:51 -08:00
async def _handle_message ( self , event : MessageEvent ) - > Optional [ str ] :
"""
Handle an incoming message from any platform .
This is the core message processing pipeline :
2026-02-03 10:46:23 -08:00
1. Check user authorization
2. Check for commands ( / new , / reset , etc . )
2026-02-03 16:15:49 -08:00
3. Check for running agent and interrupt if needed
4. Get or create session
5. Build context for agent
6. Run agent conversation
7. Return response
2026-02-02 19:01:51 -08:00
"""
source = event . source
2026-03-10 22:55:36 +03:00
2026-02-03 10:46:23 -08:00
# Check if user is authorized
if not self . _is_user_authorized ( source ) :
2026-02-21 03:11:11 -08:00
logger . warning ( " Unauthorized user: %s ( %s ) on %s " , source . user_id , source . user_name , source . platform . value )
Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks
Major feature additions inspired by OpenClaw/ClawdBot integration analysis:
Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)
Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description
Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling
Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads
DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending
Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)
Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications
Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings
Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style
Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.
2026-02-15 21:38:59 -08:00
# In DMs: offer pairing code. In groups: silently ignore.
2026-03-18 04:06:08 -07:00
if source . chat_type == " dm " and self . _get_unauthorized_dm_behavior ( source . platform ) == " pair " :
Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks
Major feature additions inspired by OpenClaw/ClawdBot integration analysis:
Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)
Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description
Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling
Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads
DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending
Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)
Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications
Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings
Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style
Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.
2026-02-15 21:38:59 -08:00
platform_name = source . platform . value if source . platform else " unknown "
code = self . pairing_store . generate_code (
platform_name , source . user_id , source . user_name or " "
)
if code :
adapter = self . adapters . get ( source . platform )
if adapter :
await adapter . send (
source . chat_id ,
f " Hi~ I don ' t recognize you yet! \n \n "
f " Here ' s your pairing code: ` { code } ` \n \n "
f " Ask the bot owner to run: \n "
f " `hermes pairing approve { platform_name } { code } ` "
)
else :
adapter = self . adapters . get ( source . platform )
if adapter :
await adapter . send (
source . chat_id ,
" Too many pairing requests right now~ "
" Please try again later! "
)
return None
2026-02-03 10:46:23 -08:00
2026-03-15 11:58:19 +05:30
# PRIORITY handling when an agent is already running for this session.
# Default behavior is to interrupt immediately so user text/stop messages
# are handled with minimal latency.
#
# Special case: Telegram/photo bursts often arrive as multiple near-
# simultaneous updates. Do NOT interrupt for photo-only follow-ups here;
# let the adapter-level batching/queueing logic absorb them.
2026-03-16 00:22:23 -07:00
_quick_key = self . _session_key_for_source ( source )
2026-02-23 02:11:33 -08:00
if _quick_key in self . _running_agents :
2026-03-15 19:18:58 -07:00
if event . get_command ( ) == " status " :
return await self . _handle_status_command ( event )
2026-03-25 18:46:50 -07:00
# Resolve the command once for all early-intercept checks below.
from hermes_cli . commands import resolve_command as _resolve_cmd_inner
_evt_cmd = event . get_command ( )
_cmd_def_inner = _resolve_cmd_inner ( _evt_cmd ) if _evt_cmd else None
# /stop must hard-kill the session when an agent is running.
# A soft interrupt (agent.interrupt()) doesn't help when the agent
# is truly hung — the executor thread is blocked and never checks
# _interrupt_requested. Force-clean _running_agents so the session
# is unlocked and subsequent messages are processed normally.
if _cmd_def_inner and _cmd_def_inner . name == " stop " :
running_agent = self . _running_agents . get ( _quick_key )
if running_agent and running_agent is not _AGENT_PENDING_SENTINEL :
running_agent . interrupt ( " Stop requested " )
# Force-clean: remove the session lock regardless of agent state
adapter = self . adapters . get ( source . platform )
if adapter and hasattr ( adapter , ' get_pending_message ' ) :
adapter . get_pending_message ( _quick_key ) # consume and discard
self . _pending_messages . pop ( _quick_key , None )
if _quick_key in self . _running_agents :
del self . _running_agents [ _quick_key ]
logger . info ( " HARD STOP for session %s — session lock released " , _quick_key [ : 20 ] )
return " ⚡ Force-stopped. The session is unlocked — you can send a new message. "
2026-03-20 08:39:49 -07:00
# /reset and /new must bypass the running-agent guard so they
# actually dispatch as commands instead of being queued as user
# text (which would be fed back to the agent with the same
# broken history — #2170). Interrupt the agent first, then
# clear the adapter's pending queue so the stale "/reset" text
# doesn't get re-processed as a user message after the
# interrupt completes.
if _cmd_def_inner and _cmd_def_inner . name == " new " :
running_agent = self . _running_agents . get ( _quick_key )
if running_agent and running_agent is not _AGENT_PENDING_SENTINEL :
running_agent . interrupt ( " Session reset requested " )
# Clear any pending messages so the old text doesn't replay
adapter = self . adapters . get ( source . platform )
if adapter and hasattr ( adapter , ' get_pending_message ' ) :
adapter . get_pending_message ( _quick_key ) # consume and discard
self . _pending_messages . pop ( _quick_key , None )
# Clean up the running agent entry so the reset handler
# doesn't think an agent is still active.
if _quick_key in self . _running_agents :
del self . _running_agents [ _quick_key ]
return await self . _handle_reset_command ( event )
2026-03-20 09:44:27 -07:00
# /queue <prompt> — queue without interrupting
if event . get_command ( ) in ( " queue " , " q " ) :
queued_text = event . get_command_args ( ) . strip ( )
if not queued_text :
return " Usage: /queue <prompt> "
adapter = self . adapters . get ( source . platform )
if adapter :
from gateway . platforms . base import MessageEvent as _ME , MessageType as _MT
queued_event = _ME (
text = queued_text ,
message_type = _MT . TEXT ,
source = event . source ,
message_id = event . message_id ,
)
adapter . _pending_messages [ _quick_key ] = queued_event
return " Queued for the next turn. "
2026-03-15 11:58:19 +05:30
if event . message_type == MessageType . PHOTO :
logger . debug ( " PRIORITY photo follow-up for session %s — queueing without interrupt " , _quick_key [ : 20 ] )
adapter = self . adapters . get ( source . platform )
if adapter :
# Reuse adapter queue semantics so photo bursts merge cleanly.
if _quick_key in adapter . _pending_messages :
existing = adapter . _pending_messages [ _quick_key ]
if getattr ( existing , " message_type " , None ) == MessageType . PHOTO :
existing . media_urls . extend ( event . media_urls )
existing . media_types . extend ( event . media_types )
if event . text :
if not existing . text :
existing . text = event . text
elif event . text not in existing . text :
existing . text = f " { existing . text } \n \n { event . text } " . strip ( )
else :
adapter . _pending_messages [ _quick_key ] = event
else :
adapter . _pending_messages [ _quick_key ] = event
return None
2026-03-19 22:32:37 +03:00
running_agent = self . _running_agents . get ( _quick_key )
if running_agent is _AGENT_PENDING_SENTINEL :
2026-03-19 18:26:09 -07:00
# Agent is being set up but not ready yet.
if event . get_command ( ) == " stop " :
2026-03-25 18:46:50 -07:00
# Force-clean the sentinel so the session is unlocked.
if _quick_key in self . _running_agents :
del self . _running_agents [ _quick_key ]
logger . info ( " HARD STOP (pending) for session %s — sentinel cleared " , _quick_key [ : 20 ] )
return " ⚡ Force-stopped. The agent was still starting — session unlocked. "
2026-03-19 18:26:09 -07:00
# Queue the message so it will be picked up after the
# agent starts.
2026-03-19 22:32:37 +03:00
adapter = self . adapters . get ( source . platform )
if adapter :
adapter . _pending_messages [ _quick_key ] = event
return None
2026-02-23 02:11:33 -08:00
logger . debug ( " PRIORITY interrupt for session %s " , _quick_key [ : 20 ] )
running_agent . interrupt ( event . text )
if _quick_key in self . _pending_messages :
self . _pending_messages [ _quick_key ] + = " \n " + event . text
else :
self . _pending_messages [ _quick_key ] = event . text
return None
2026-03-19 22:32:37 +03:00
2026-02-03 16:15:49 -08:00
# Check for commands
2026-02-02 19:01:51 -08:00
command = event . get_command ( )
2026-02-28 17:09:26 -08:00
2026-03-16 23:21:03 -07:00
# Emit command:* hook for any recognized slash command.
# GATEWAY_KNOWN_COMMANDS is derived from the central COMMAND_REGISTRY
# in hermes_cli/commands.py — no hardcoded set to maintain here.
from hermes_cli . commands import GATEWAY_KNOWN_COMMANDS , resolve_command as _resolve_cmd
if command and command in GATEWAY_KNOWN_COMMANDS :
2026-02-28 17:09:26 -08:00
await self . hooks . emit ( f " command: { command } " , {
" platform " : source . platform . value if source . platform else " " ,
" user_id " : source . user_id ,
" command " : command ,
" args " : event . get_command_args ( ) . strip ( ) ,
} )
2026-03-16 23:21:03 -07:00
# Resolve aliases to canonical name so dispatch only checks canonicals.
_cmd_def = _resolve_cmd ( command ) if command else None
canonical = _cmd_def . name if _cmd_def else command
if canonical == " new " :
2026-02-02 19:01:51 -08:00
return await self . _handle_reset_command ( event )
2026-03-16 23:21:03 -07:00
if canonical == " help " :
2026-02-19 14:31:53 -08:00
return await self . _handle_help_command ( event )
2026-03-16 23:21:03 -07:00
if canonical == " status " :
2026-02-02 19:01:51 -08:00
return await self . _handle_status_command ( event )
2026-03-16 23:21:03 -07:00
if canonical == " stop " :
2026-02-03 16:15:49 -08:00
return await self . _handle_stop_command ( event )
2026-03-16 23:21:03 -07:00
if canonical == " reasoning " :
2026-03-11 22:12:11 +08:00
return await self . _handle_reasoning_command ( event )
2026-03-26 14:41:04 -07:00
if canonical == " verbose " :
return await self . _handle_verbose_command ( event )
2026-03-16 23:21:03 -07:00
if canonical == " provider " :
2026-03-08 06:09:36 -07:00
return await self . _handle_provider_command ( event )
2026-03-16 23:21:03 -07:00
if canonical == " personality " :
2026-02-19 14:31:53 -08:00
return await self . _handle_personality_command ( event )
2026-03-14 21:18:17 -07:00
2026-03-16 23:21:03 -07:00
if canonical == " plan " :
2026-03-14 21:18:17 -07:00
try :
from agent . skill_commands import build_plan_path , build_skill_invocation_message
user_instruction = event . get_command_args ( ) . strip ( )
plan_path = build_plan_path ( user_instruction )
event . text = build_skill_invocation_message (
" /plan " ,
user_instruction ,
task_id = _quick_key ,
runtime_note = (
2026-03-14 21:28:51 -07:00
" Save the markdown plan with write_file to this exact relative path "
f " inside the active workspace/backend cwd: { plan_path } "
2026-03-14 21:18:17 -07:00
) ,
)
if not event . text :
return " Failed to load the bundled /plan skill. "
2026-03-16 23:21:03 -07:00
canonical = None
2026-03-14 21:18:17 -07:00
except Exception as e :
logger . exception ( " Failed to prepare /plan command " )
return f " Failed to enter plan mode: { e } "
2026-02-19 14:31:53 -08:00
2026-03-16 23:21:03 -07:00
if canonical == " retry " :
2026-02-19 14:31:53 -08:00
return await self . _handle_retry_command ( event )
2026-03-16 23:21:03 -07:00
if canonical == " undo " :
2026-02-19 14:31:53 -08:00
return await self . _handle_undo_command ( event )
2026-03-16 23:21:03 -07:00
if canonical == " sethome " :
2026-02-22 20:44:15 -08:00
return await self . _handle_set_home_command ( event )
2026-03-01 00:25:44 -08:00
2026-03-16 23:21:03 -07:00
if canonical == " compress " :
2026-03-01 00:25:44 -08:00
return await self . _handle_compress_command ( event )
2026-03-16 23:21:03 -07:00
if canonical == " usage " :
2026-03-01 00:25:44 -08:00
return await self . _handle_usage_command ( event )
feat(mcp): banner integration, /reload-mcp command, resources & prompts
Banner integration:
- MCP Servers section in CLI startup banner between Tools and Skills
- Shows each server with transport type, tool count, connection status
- Failed servers shown in red; section hidden when no MCP configured
- Summary line includes MCP server count
- Removed raw print() calls from discovery (banner handles display)
/reload-mcp command:
- New slash command in both CLI and gateway
- Disconnects all MCP servers, re-reads config.yaml, reconnects
- Reports what changed (added/removed/reconnected servers)
- Allows adding/removing MCP servers without restarting
Resources & Prompts support:
- 4 utility tools registered per server: list_resources, read_resource,
list_prompts, get_prompt
- Exposes MCP Resources (data sources) and Prompts (templates) as tools
- Proper parameter schemas (uri for read_resource, name for get_prompt)
- Handles text and binary resource content
- 23 new tests covering schemas, handlers, and registration
Test coverage: 74 MCP tests total, 1186 tests pass overall.
2026-03-02 19:15:59 -08:00
2026-03-16 23:21:03 -07:00
if canonical == " insights " :
feat: add /insights command with usage analytics and cost estimation
Inspired by Claude Code's /insights, adapted for Hermes Agent's multi-platform
architecture. Analyzes session history from state.db to produce comprehensive
usage insights.
Features:
- Overview stats: sessions, messages, tokens, estimated cost, active time
- Model breakdown: per-model sessions, tokens, and cost estimation
- Platform breakdown: CLI vs Telegram vs Discord etc. (unique to Hermes)
- Tool usage ranking: most-used tools with percentages
- Activity patterns: day-of-week chart, peak hours, streaks
- Notable sessions: longest, most messages, most tokens, most tool calls
- Cost estimation: real pricing data for 25+ models (OpenAI, Anthropic,
DeepSeek, Google, Meta) with fuzzy model name matching
- Configurable time window: --days flag (default 30)
- Source filtering: --source flag to filter by platform
Three entry points:
- /insights slash command in CLI (supports --days and --source flags)
- /insights slash command in gateway (compact markdown format)
- hermes insights CLI subcommand (standalone)
Includes 56 tests covering pricing helpers, format helpers, empty DB,
populated DB with multi-platform data, filtering, formatting, and edge cases.
2026-03-06 14:04:59 -08:00
return await self . _handle_insights_command ( event )
2026-03-16 23:21:03 -07:00
if canonical == " reload-mcp " :
feat(mcp): banner integration, /reload-mcp command, resources & prompts
Banner integration:
- MCP Servers section in CLI startup banner between Tools and Skills
- Shows each server with transport type, tool count, connection status
- Failed servers shown in red; section hidden when no MCP configured
- Summary line includes MCP server count
- Removed raw print() calls from discovery (banner handles display)
/reload-mcp command:
- New slash command in both CLI and gateway
- Disconnects all MCP servers, re-reads config.yaml, reconnects
- Reports what changed (added/removed/reconnected servers)
- Allows adding/removing MCP servers without restarting
Resources & Prompts support:
- 4 utility tools registered per server: list_resources, read_resource,
list_prompts, get_prompt
- Exposes MCP Resources (data sources) and Prompts (templates) as tools
- Proper parameter schemas (uri for read_resource, name for get_prompt)
- Handles text and binary resource content
- 23 new tests covering schemas, handlers, and registration
Test coverage: 74 MCP tests total, 1186 tests pass overall.
2026-03-02 19:15:59 -08:00
return await self . _handle_reload_mcp_command ( event )
feat: add /update slash command for gateway platforms
Adds a /update command to Telegram, Discord, and other gateway platforms
that runs `hermes update` to pull the latest code, update dependencies,
sync skills, and restart the gateway.
Implementation:
- Spawns `hermes update` in a separate systemd scope (systemd-run --user
--scope) so the process survives the gateway restart that hermes update
triggers at the end. Falls back to nohup if systemd-run is unavailable.
- Writes a marker file (.update_pending.json) with the originating
platform and chat_id before spawning the update.
- On gateway startup, _send_update_notification() checks for the marker,
reads the captured update output, sends the results back to the user,
and cleans up.
Also:
- Registers /update as a Discord slash command
- Updates README.md, docs/messaging.md, docs/slash-commands.md
- Adds 18 tests covering handler, notification, and edge cases
2026-03-05 01:20:58 -08:00
fix(gateway): replace bare text approval with /approve and /deny commands (#2002)
The gateway approval system previously intercepted bare 'yes'/'no' text
from the user's next message to approve/deny dangerous commands. This was
fragile and dangerous — if the agent asked a clarify question and the user
said 'yes' to answer it, the gateway would execute the pending dangerous
command instead. (Fixes #1888)
Changes:
- Remove bare text matching ('yes', 'y', 'approve', 'ok', etc.) from
_handle_message approval check
- Add /approve and /deny as gateway-only slash commands in the command
registry
- /approve supports scoping: /approve (one-time), /approve session,
/approve always (permanent)
- Add 5-minute timeout for stale approvals
- Gateway appends structured instructions to the agent response when a
dangerous command is pending, telling the user exactly how to respond
- 9 tests covering approve, deny, timeout, scoping, and verification
that bare 'yes' no longer triggers execution
Credit to @solo386 and @FlyByNight69420 for identifying and reporting
this security issue in PR #1971 and issue #1888.
Co-authored-by: Test <test@test.com>
2026-03-18 16:58:20 -07:00
if canonical == " approve " :
return await self . _handle_approve_command ( event )
if canonical == " deny " :
return await self . _handle_deny_command ( event )
2026-03-16 23:21:03 -07:00
if canonical == " update " :
feat: add /update slash command for gateway platforms
Adds a /update command to Telegram, Discord, and other gateway platforms
that runs `hermes update` to pull the latest code, update dependencies,
sync skills, and restart the gateway.
Implementation:
- Spawns `hermes update` in a separate systemd scope (systemd-run --user
--scope) so the process survives the gateway restart that hermes update
triggers at the end. Falls back to nohup if systemd-run is unavailable.
- Writes a marker file (.update_pending.json) with the originating
platform and chat_id before spawning the update.
- On gateway startup, _send_update_notification() checks for the marker,
reads the captured update output, sends the results back to the user,
and cleans up.
Also:
- Registers /update as a Discord slash command
- Updates README.md, docs/messaging.md, docs/slash-commands.md
- Adds 18 tests covering handler, notification, and edge cases
2026-03-05 01:20:58 -08:00
return await self . _handle_update_command ( event )
2026-03-08 15:48:09 -07:00
2026-03-16 23:21:03 -07:00
if canonical == " title " :
2026-03-08 15:48:09 -07:00
return await self . _handle_title_command ( event )
2026-03-08 17:09:00 -07:00
2026-03-16 23:21:03 -07:00
if canonical == " resume " :
2026-03-08 17:09:00 -07:00
return await self . _handle_resume_command ( event )
feat: filesystem checkpoints and /rollback command
Automatic filesystem snapshots before destructive file operations,
with user-facing rollback. Inspired by PR #559 (by @alireza78a).
Architecture:
- Shadow git repos at ~/.hermes/checkpoints/{hash}/ via GIT_DIR
- CheckpointManager: take/list/restore, turn-scoped dedup, pruning
- Transparent — the LLM never sees it, no tool schema, no tokens
- Once per turn — only first write_file/patch triggers a snapshot
Integration:
- Config: checkpoints.enabled + checkpoints.max_snapshots
- CLI flag: hermes --checkpoints
- Trigger: run_agent.py _execute_tool_calls() before write_file/patch
- /rollback slash command in CLI + gateway (list, restore by number)
- Pre-rollback snapshot auto-created on restore (undo the undo)
Safety:
- Never blocks file operations — all errors silently logged
- Skips root dir, home dir, dirs >50K files
- Disables gracefully when git not installed
- Shadow repo completely isolated from project git
Tests: 35 new tests, all passing (2798 total suite)
Docs: feature page, config reference, CLI commands reference
2026-03-10 00:49:15 -07:00
2026-03-16 23:21:03 -07:00
if canonical == " rollback " :
feat: filesystem checkpoints and /rollback command
Automatic filesystem snapshots before destructive file operations,
with user-facing rollback. Inspired by PR #559 (by @alireza78a).
Architecture:
- Shadow git repos at ~/.hermes/checkpoints/{hash}/ via GIT_DIR
- CheckpointManager: take/list/restore, turn-scoped dedup, pruning
- Transparent — the LLM never sees it, no tool schema, no tokens
- Once per turn — only first write_file/patch triggers a snapshot
Integration:
- Config: checkpoints.enabled + checkpoints.max_snapshots
- CLI flag: hermes --checkpoints
- Trigger: run_agent.py _execute_tool_calls() before write_file/patch
- /rollback slash command in CLI + gateway (list, restore by number)
- Pre-rollback snapshot auto-created on restore (undo the undo)
Safety:
- Never blocks file operations — all errors silently logged
- Skips root dir, home dir, dirs >50K files
- Disables gracefully when git not installed
- Shadow repo completely isolated from project git
Tests: 35 new tests, all passing (2798 total suite)
Docs: feature page, config reference, CLI commands reference
2026-03-10 00:49:15 -07:00
return await self . _handle_rollback_command ( event )
feat: add /background command to gateway and CLI commands registry
Add /background <prompt> to the gateway, allowing users on Telegram,
Discord, Slack, etc. to fire off a prompt in a separate agent session.
The result is delivered back to the same chat when done, without
modifying the active conversation history.
Implementation:
- _handle_background_command: validates input, spawns asyncio task
- _run_background_task: creates AIAgent in executor thread, delivers
result (text, images, media files) back via the platform adapter
- Inherits model, toolsets, provider routing from gateway config
- Error handling with user-visible failure messages
Also adds /background to hermes_cli/commands.py registry so it
appears in /help and autocomplete.
Tests: 15 new tests covering usage, task creation, uniqueness,
multi-platform, error paths, and help/autocomplete integration.
2026-03-11 02:41:36 -07:00
2026-03-16 23:21:03 -07:00
if canonical == " background " :
feat: add /background command to gateway and CLI commands registry
Add /background <prompt> to the gateway, allowing users on Telegram,
Discord, Slack, etc. to fire off a prompt in a separate agent session.
The result is delivered back to the same chat when done, without
modifying the active conversation history.
Implementation:
- _handle_background_command: validates input, spawns asyncio task
- _run_background_task: creates AIAgent in executor thread, delivers
result (text, images, media files) back via the platform adapter
- Inherits model, toolsets, provider routing from gateway config
- Error handling with user-visible failure messages
Also adds /background to hermes_cli/commands.py registry so it
appears in /help and autocomplete.
Tests: 15 new tests covering usage, task creation, uniqueness,
multi-platform, error paths, and help/autocomplete integration.
2026-03-11 02:41:36 -07:00
return await self . _handle_background_command ( event )
fix: /reasoning command — add gateway support, fix display, persist settings (#1031)
* fix: /reasoning command output ordering, display, and inline think extraction
Three issues with the /reasoning command:
1. Output interleaving: The command echo used print() while feedback
used _cprint(), causing them to render out-of-order under
prompt_toolkit's patch_stdout. Changed echo to use _cprint() so
all output renders through the same path in correct order.
2. Reasoning display not working: /reasoning show toggled a flag
but reasoning never appeared for models that embed thinking in
inline <think> blocks rather than structured API fields. Added
fallback extraction in _build_assistant_message to capture
<think> block content as reasoning when no structured reasoning
fields (reasoning, reasoning_content, reasoning_details) are
present. This feeds into both the reasoning callback (during
tool loops) and the post-response reasoning box display.
3. Feedback clarity: Added checkmarks to confirm actions, persisted
show/hide to config (was session-only before), and aligned the
status display for readability.
Tests: 7 new tests for inline think block extraction (41 total).
* feat: add /reasoning command to gateway (Telegram/Discord/etc)
The /reasoning command only existed in the CLI — messaging platforms
had no way to view or change reasoning settings. This adds:
1. /reasoning command handler in the gateway:
- No args: shows current effort level and display state
- /reasoning <level>: sets reasoning effort (none/low/medium/high/xhigh)
- /reasoning show|hide: toggles reasoning display in responses
- All changes saved to config.yaml immediately
2. Reasoning display in gateway responses:
- When show_reasoning is enabled, prepends a 'Reasoning' block
with the model's last_reasoning content before the response
- Collapses long reasoning (>15 lines) to keep messages readable
- Uses last_reasoning from run_conversation result dict
3. Plumbing:
- Added _show_reasoning attribute loaded from config at startup
- Propagated last_reasoning through _run_agent return dict
- Added /reasoning to help text and known_commands set
- Uses getattr for _show_reasoning to handle test stubs
2026-03-12 05:38:19 -07:00
2026-03-16 23:21:03 -07:00
if canonical == " voice " :
2026-03-10 22:55:36 +03:00
return await self . _handle_voice_command ( event )
2026-03-09 07:38:06 +03:00
# User-defined quick commands (bypass agent loop, no LLM call)
if command :
2026-03-11 12:46:56 -07:00
if isinstance ( self . config , dict ) :
quick_commands = self . config . get ( " quick_commands " , { } ) or { }
else :
quick_commands = getattr ( self . config , " quick_commands " , { } ) or { }
2026-03-14 03:57:25 -07:00
if not isinstance ( quick_commands , dict ) :
quick_commands = { }
2026-03-09 07:38:06 +03:00
if command in quick_commands :
qcmd = quick_commands [ command ]
if qcmd . get ( " type " ) == " exec " :
exec_cmd = qcmd . get ( " command " , " " )
if exec_cmd :
try :
proc = await asyncio . create_subprocess_shell (
exec_cmd ,
stdout = asyncio . subprocess . PIPE ,
stderr = asyncio . subprocess . PIPE ,
)
stdout , stderr = await asyncio . wait_for ( proc . communicate ( ) , timeout = 30 )
output = ( stdout or stderr ) . decode ( ) . strip ( )
return output if output else " Command returned no output. "
except asyncio . TimeoutError :
return " Quick command timed out (30s). "
except Exception as e :
return f " Quick command error: { e } "
else :
return f " Quick command ' / { command } ' has no command defined. "
2026-03-17 02:53:33 -07:00
elif qcmd . get ( " type " ) == " alias " :
target = qcmd . get ( " target " , " " ) . strip ( )
if target :
target = target if target . startswith ( " / " ) else f " / { target } "
target_command = target . lstrip ( " / " )
user_args = event . get_command_args ( ) . strip ( )
event . text = f " { target } { user_args } " . strip ( )
command = target_command
# Fall through to normal command dispatch below
else :
return f " Quick command ' / { command } ' has no target defined. "
2026-03-09 07:38:06 +03:00
else :
2026-03-17 02:53:33 -07:00
return f " Quick command ' / { command } ' has unsupported type (supported: ' exec ' , ' alias ' ). "
2026-03-09 07:38:06 +03:00
2026-03-21 16:00:30 -07:00
# Plugin-registered slash commands
if command :
try :
from hermes_cli . plugins import get_plugin_command_handler
plugin_handler = get_plugin_command_handler ( command )
if plugin_handler :
user_args = event . get_command_args ( ) . strip ( )
import asyncio as _aio
result = plugin_handler ( user_args )
if _aio . iscoroutine ( result ) :
result = await result
return str ( result ) if result else None
except Exception as e :
logger . debug ( " Plugin command dispatch failed (non-fatal): %s " , e )
2026-02-28 11:18:50 -08:00
# Skill slash commands: /skill-name loads the skill and sends to agent
if command :
try :
from agent . skill_commands import get_skill_commands , build_skill_invocation_message
skill_cmds = get_skill_commands ( )
cmd_key = f " / { command } "
if cmd_key in skill_cmds :
user_instruction = event . get_command_args ( ) . strip ( )
2026-03-13 03:14:04 -07:00
msg = build_skill_invocation_message (
2026-03-17 03:42:15 -07:00
cmd_key , user_instruction , task_id = _quick_key
2026-03-13 03:14:04 -07:00
)
2026-02-28 11:18:50 -08:00
if msg :
event . text = msg
# Fall through to normal message processing with skill content
except Exception as e :
logger . debug ( " Skill command check failed (non-fatal): %s " , e )
2026-02-22 20:44:15 -08:00
fix(gateway): replace bare text approval with /approve and /deny commands (#2002)
The gateway approval system previously intercepted bare 'yes'/'no' text
from the user's next message to approve/deny dangerous commands. This was
fragile and dangerous — if the agent asked a clarify question and the user
said 'yes' to answer it, the gateway would execute the pending dangerous
command instead. (Fixes #1888)
Changes:
- Remove bare text matching ('yes', 'y', 'approve', 'ok', etc.) from
_handle_message approval check
- Add /approve and /deny as gateway-only slash commands in the command
registry
- /approve supports scoping: /approve (one-time), /approve session,
/approve always (permanent)
- Add 5-minute timeout for stale approvals
- Gateway appends structured instructions to the agent response when a
dangerous command is pending, telling the user exactly how to respond
- 9 tests covering approve, deny, timeout, scoping, and verification
that bare 'yes' no longer triggers execution
Credit to @solo386 and @FlyByNight69420 for identifying and reporting
this security issue in PR #1971 and issue #1888.
Co-authored-by: Test <test@test.com>
2026-03-18 16:58:20 -07:00
# Pending exec approvals are handled by /approve and /deny commands above.
# No bare text matching — "yes" in normal conversation must not trigger
# execution of a dangerous command.
2026-03-19 22:32:37 +03:00
# ── Claim this session before any await ───────────────────────
# Between here and _run_agent registering the real AIAgent, there
# are numerous await points (hooks, vision enrichment, STT,
# session hygiene compression). Without this sentinel a second
# message arriving during any of those yields would pass the
# "already running" guard and spin up a duplicate agent for the
# same session — corrupting the transcript.
self . _running_agents [ _quick_key ] = _AGENT_PENDING_SENTINEL
try :
return await self . _handle_message_with_agent ( event , source , _quick_key )
finally :
# If _run_agent replaced the sentinel with a real agent and
# then cleaned it up, this is a no-op. If we exited early
# (exception, command fallthrough, etc.) the sentinel must
# not linger or the session would be permanently locked out.
if self . _running_agents . get ( _quick_key ) is _AGENT_PENDING_SENTINEL :
del self . _running_agents [ _quick_key ]
async def _handle_message_with_agent ( self , event , source , _quick_key : str ) :
""" Inner handler that runs under the _running_agents sentinel guard. """
2026-02-02 19:01:51 -08:00
# Get or create session
session_entry = self . session_store . get_or_create_session ( source )
2026-02-03 16:15:49 -08:00
session_key = session_entry . session_key
2026-02-28 17:09:26 -08:00
# Emit session:start for new or auto-reset sessions
_is_new_session = (
session_entry . created_at == session_entry . updated_at
or getattr ( session_entry , " was_auto_reset " , False )
)
if _is_new_session :
await self . hooks . emit ( " session:start " , {
" platform " : source . platform . value if source . platform else " " ,
" user_id " : source . user_id ,
" session_id " : session_entry . session_id ,
" session_key " : session_key ,
} )
2026-02-02 19:01:51 -08:00
# Build session context
context = build_session_context ( source , self . config , session_entry )
# Set environment variables for tools
self . _set_session_env ( context )
2026-03-16 05:48:45 -07:00
# Read privacy.redact_pii from config (re-read per message)
_redact_pii = False
try :
2026-03-17 03:48:15 -07:00
import yaml as _pii_yaml
2026-03-16 05:48:45 -07:00
with open ( _config_path , encoding = " utf-8 " ) as _pf :
2026-03-17 03:48:15 -07:00
_pcfg = _pii_yaml . safe_load ( _pf ) or { }
2026-03-16 05:48:45 -07:00
_redact_pii = bool ( ( _pcfg . get ( " privacy " ) or { } ) . get ( " redact_pii " , False ) )
except Exception :
pass
2026-02-02 19:01:51 -08:00
# Build the context prompt to inject
2026-03-16 05:48:45 -07:00
context_prompt = build_session_context_prompt ( context , redact_pii = _redact_pii )
2026-02-02 19:01:51 -08:00
2026-02-22 02:16:11 -08:00
# If the previous session expired and was auto-reset, prepend a notice
# so the agent knows this is a fresh conversation (not an intentional /reset).
if getattr ( session_entry , ' was_auto_reset ' , False ) :
2026-03-22 09:33:39 -07:00
reset_reason = getattr ( session_entry , ' auto_reset_reason ' , None ) or ' idle '
if reset_reason == " daily " :
context_note = " [System note: The user ' s session was automatically reset by the daily schedule. This is a fresh conversation with no prior context.] "
else :
context_note = " [System note: The user ' s previous session expired due to inactivity. This is a fresh conversation with no prior context.] "
context_prompt = context_note + " \n \n " + context_prompt
# Send a user-facing notification explaining the reset, unless:
# - notifications are disabled in config
# - the platform is excluded (e.g. api_server, webhook)
# - the expired session had no activity (nothing was cleared)
try :
policy = self . session_store . config . get_reset_policy (
platform = source . platform ,
session_type = getattr ( source , ' chat_type ' , ' dm ' ) ,
)
platform_name = source . platform . value if source . platform else " "
had_activity = getattr ( session_entry , ' reset_had_activity ' , False )
should_notify = (
policy . notify
and had_activity
and platform_name not in policy . notify_exclude_platforms
)
if should_notify :
adapter = self . adapters . get ( source . platform )
if adapter :
if reset_reason == " daily " :
reason_text = f " daily schedule at { policy . at_hour } :00 "
else :
hours = policy . idle_minutes / / 60
mins = policy . idle_minutes % 60
duration = f " { hours } h " if not mins else f " { hours } h { mins } m " if hours else f " { mins } m "
reason_text = f " inactive for { duration } "
notice = (
f " ◐ Session automatically reset ( { reason_text } ). "
f " Conversation history cleared. \n "
f " Use /resume to browse and restore a previous session. \n "
f " Adjust reset timing in config.yaml under session_reset. "
)
feat(gateway): surface session config on /new, /reset, and auto-reset (#3321)
When a new session starts in the gateway (via /new, /reset, or
auto-reset), send the user a summary of the detected configuration:
✨ Session reset! Starting fresh.
◆ Model: qwen3.5:27b-q4_K_M
◆ Provider: custom
◆ Context: 8K tokens (config)
◆ Endpoint: http://localhost:11434/v1
This makes misconfigured context length immediately visible — a user
running a local 8K model that falls to the 128K default will see:
◆ Context: 128K tokens (default — set model.context_length in config to override)
Instead of silently getting no compression and degrading responses.
- _format_session_info() resolves model, provider, context length,
and endpoint from config + runtime, matching the hygiene code's
resolution chain
- Local/custom endpoints shown; cloud endpoints hidden (not useful)
- Context source annotated: config, detected, or default with hint
- Appended to /new and /reset responses, and auto-reset notifications
- 9 tests covering all formatting paths and failure resilience
Addresses the user-facing side of #2708 — instead of trying to fix
every edge case in context detection, surface the values so users
can immediately see when something is wrong.
2026-03-26 19:27:58 -07:00
try :
session_info = self . _format_session_info ( )
if session_info :
notice = f " { notice } \n \n { session_info } "
except Exception :
pass
2026-03-22 09:33:39 -07:00
await adapter . send (
source . chat_id , notice ,
metadata = getattr ( event , ' metadata ' , None ) ,
)
except Exception as e :
logger . debug ( " Auto-reset notification failed (non-fatal): %s " , e )
2026-02-22 02:16:11 -08:00
session_entry . was_auto_reset = False
2026-03-22 09:33:39 -07:00
session_entry . auto_reset_reason = None
2026-03-26 02:04:11 -07:00
# Auto-load skill for DM topic bindings (e.g., Telegram Private Chat Topics)
# Only inject on NEW sessions — for ongoing conversations the skill content
# is already in the conversation history from the first message.
if _is_new_session and getattr ( event , " auto_skill " , None ) :
try :
from agent . skill_commands import _load_skill_payload , _build_skill_message
_skill_name = event . auto_skill
_loaded = _load_skill_payload ( _skill_name , task_id = _quick_key )
if _loaded :
_loaded_skill , _skill_dir , _display_name = _loaded
_activation_note = (
f ' [SYSTEM: This conversation is in a topic with the " { _display_name } " skill '
f " auto-loaded. Follow its instructions for the duration of this session.] "
)
_skill_msg = _build_skill_message (
_loaded_skill , _skill_dir , _activation_note ,
user_instruction = event . text ,
)
if _skill_msg :
event . text = _skill_msg
logger . info (
" [Gateway] Auto-loaded skill ' %s ' for DM topic session %s " ,
_skill_name , session_key ,
)
else :
logger . warning (
" [Gateway] DM topic skill ' %s ' not found in available skills " ,
_skill_name ,
)
except Exception as e :
logger . warning ( " [Gateway] Failed to auto-load topic skill ' %s ' : %s " , event . auto_skill , e )
2026-02-02 19:01:51 -08:00
# Load conversation history from transcript
history = self . session_store . load_transcript ( session_entry . session_id )
2026-03-07 20:09:48 -08:00
# -----------------------------------------------------------------
# Session hygiene: auto-compress pathologically large transcripts
#
# Long-lived gateway sessions can accumulate enough history that
# every new message rehydrates an oversized transcript, causing
# repeated truncation/context failures. Detect this early and
# compress proactively — before the agent even starts. (#628)
2026-03-08 20:08:02 -07:00
#
2026-03-10 23:28:18 -07:00
# Token source priority:
# 1. Actual API-reported prompt_tokens from the last turn
# (stored in session_entry.last_prompt_tokens)
2026-03-22 15:21:18 -07:00
# 2. Rough char-based estimate (str(msg)//4). Overestimates
# by 30-50% on code/JSON-heavy sessions, but that just
# means hygiene fires a bit early — safe and harmless.
2026-03-07 20:09:48 -08:00
# -----------------------------------------------------------------
if history and len ( history ) > = 4 :
2026-03-08 20:08:02 -07:00
from agent . model_metadata import (
estimate_messages_tokens_rough ,
get_model_context_length ,
)
2026-03-07 20:09:48 -08:00
2026-03-13 04:17:45 -07:00
# Read model + compression config from config.yaml.
# NOTE: hygiene threshold is intentionally HIGHER than the agent's
# own compressor (0.85 vs 0.50). Hygiene is a safety net for
# sessions that grew too large between turns — it fires pre-agent
# to prevent API failures. The agent's own compressor handles
# normal context management during its tool loop with accurate
# real token counts. Having hygiene at 0.50 caused premature
# compression on every turn in long gateway sessions.
2026-03-08 20:08:02 -07:00
_hyg_model = " anthropic/claude-sonnet-4.6 "
2026-03-13 04:17:45 -07:00
_hyg_threshold_pct = 0.85
2026-03-08 20:08:02 -07:00
_hyg_compression_enabled = True
2026-03-22 15:15:25 -07:00
_hyg_config_context_length = None
_hyg_provider = None
_hyg_base_url = None
_hyg_api_key = None
2026-03-07 20:09:48 -08:00
try :
_hyg_cfg_path = _hermes_home / " config.yaml "
if _hyg_cfg_path . exists ( ) :
import yaml as _hyg_yaml
2026-03-09 21:19:20 -07:00
with open ( _hyg_cfg_path , encoding = " utf-8 " ) as _hyg_f :
2026-03-07 20:09:48 -08:00
_hyg_data = _hyg_yaml . safe_load ( _hyg_f ) or { }
2026-03-08 20:08:02 -07:00
# Resolve model name (same logic as run_sync)
_model_cfg = _hyg_data . get ( " model " , { } )
if isinstance ( _model_cfg , str ) :
_hyg_model = _model_cfg
elif isinstance ( _model_cfg , dict ) :
_hyg_model = _model_cfg . get ( " default " , _hyg_model )
2026-03-22 15:15:25 -07:00
# Read explicit context_length override from model config
# (same as run_agent.py lines 995-1005)
_raw_ctx = _model_cfg . get ( " context_length " )
if _raw_ctx is not None :
try :
_hyg_config_context_length = int ( _raw_ctx )
except ( TypeError , ValueError ) :
pass
# Read provider for accurate context detection
_hyg_provider = _model_cfg . get ( " provider " ) or None
_hyg_base_url = _model_cfg . get ( " base_url " ) or None
2026-03-08 20:08:02 -07:00
2026-03-13 04:17:45 -07:00
# Read compression settings — only use enabled flag.
# The threshold is intentionally separate from the agent's
# compression.threshold (hygiene runs higher).
2026-03-08 20:08:02 -07:00
_comp_cfg = _hyg_data . get ( " compression " , { } )
if isinstance ( _comp_cfg , dict ) :
_hyg_compression_enabled = str (
_comp_cfg . get ( " enabled " , True )
) . lower ( ) in ( " true " , " 1 " , " yes " )
2026-03-22 15:15:25 -07:00
# Resolve provider/base_url from runtime if not in config
if not _hyg_provider or not _hyg_base_url :
try :
_hyg_runtime = _resolve_runtime_agent_kwargs ( )
_hyg_provider = _hyg_provider or _hyg_runtime . get ( " provider " )
_hyg_base_url = _hyg_base_url or _hyg_runtime . get ( " base_url " )
_hyg_api_key = _hyg_runtime . get ( " api_key " )
except Exception :
pass
2026-03-07 20:09:48 -08:00
except Exception :
pass
2026-03-08 20:08:02 -07:00
if _hyg_compression_enabled :
2026-03-22 15:15:25 -07:00
_hyg_context_length = get_model_context_length (
_hyg_model ,
base_url = _hyg_base_url or " " ,
api_key = _hyg_api_key or " " ,
config_context_length = _hyg_config_context_length ,
provider = _hyg_provider or " " ,
)
2026-03-08 20:08:02 -07:00
_compress_token_threshold = int (
2026-03-10 23:28:18 -07:00
_hyg_context_length * _hyg_threshold_pct
2026-03-08 20:08:02 -07:00
)
2026-03-10 23:28:18 -07:00
_warn_token_threshold = int ( _hyg_context_length * 0.95 )
2026-03-07 20:09:48 -08:00
2026-03-08 20:08:02 -07:00
_msg_count = len ( history )
2026-03-10 23:28:18 -07:00
# Prefer actual API-reported tokens from the last turn
# (stored in session entry) over the rough char-based estimate.
_stored_tokens = session_entry . last_prompt_tokens
if _stored_tokens > 0 :
_approx_tokens = _stored_tokens
_token_source = " actual "
else :
_approx_tokens = estimate_messages_tokens_rough ( history )
_token_source = " estimated "
2026-03-22 15:21:18 -07:00
# Note: rough estimates overestimate by 30-50% for code/JSON-heavy
# sessions, but that just means hygiene fires a bit early — which
# is safe and harmless. The 85% threshold already provides ample
# headroom (agent's own compressor runs at 50%). A previous 1.4x
# multiplier tried to compensate by inflating the threshold, but
# 85% * 1.4 = 119% of context — which exceeds the model's limit
# and prevented hygiene from ever firing for ~200K models (GLM-5).
2026-03-07 20:09:48 -08:00
2026-03-08 20:08:02 -07:00
_needs_compress = _approx_tokens > = _compress_token_threshold
2026-03-07 20:09:48 -08:00
2026-03-08 20:08:02 -07:00
if _needs_compress :
logger . info (
2026-03-10 23:28:18 -07:00
" Session hygiene: %s messages, ~ %s tokens ( %s ) — auto-compressing "
2026-03-08 20:08:02 -07:00
" (threshold: %s %% of %s = %s tokens) " ,
2026-03-10 23:28:18 -07:00
_msg_count , f " { _approx_tokens : , } " , _token_source ,
2026-03-08 20:08:02 -07:00
int ( _hyg_threshold_pct * 100 ) ,
f " { _hyg_context_length : , } " ,
f " { _compress_token_threshold : , } " ,
)
2026-03-07 20:09:48 -08:00
2026-03-08 20:08:02 -07:00
_hyg_adapter = self . adapters . get ( source . platform )
2026-03-11 09:15:34 +01:00
_hyg_meta = { " thread_id " : source . thread_id } if source . thread_id else None
2026-03-08 20:08:02 -07:00
if _hyg_adapter :
try :
await _hyg_adapter . send (
source . chat_id ,
f " 🗜️ Session is large ( { _msg_count } messages, "
2026-03-11 09:15:34 +01:00
f " ~ { _approx_tokens : , } tokens). Auto-compressing... " ,
metadata = _hyg_meta ,
2026-03-07 20:09:48 -08:00
)
2026-03-08 20:08:02 -07:00
except Exception :
pass
2026-03-07 20:09:48 -08:00
2026-03-08 20:08:02 -07:00
try :
from run_agent import AIAgent
_hyg_runtime = _resolve_runtime_agent_kwargs ( )
if _hyg_runtime . get ( " api_key " ) :
_hyg_msgs = [
{ " role " : m . get ( " role " ) , " content " : m . get ( " content " ) }
for m in history
if m . get ( " role " ) in ( " user " , " assistant " )
and m . get ( " content " )
]
if len ( _hyg_msgs ) > = 4 :
_hyg_agent = AIAgent (
* * _hyg_runtime ,
2026-03-11 00:09:37 +01:00
model = _hyg_model ,
2026-03-08 20:08:02 -07:00
max_iterations = 4 ,
quiet_mode = True ,
enabled_toolsets = [ " memory " ] ,
session_id = session_entry . session_id ,
)
2026-03-26 17:40:31 -07:00
_hyg_agent . _print_fn = lambda * a , * * kw : None
2026-03-07 20:09:48 -08:00
2026-03-08 20:08:02 -07:00
loop = asyncio . get_event_loop ( )
_compressed , _ = await loop . run_in_executor (
None ,
lambda : _hyg_agent . _compress_context (
_hyg_msgs , " " ,
approx_tokens = _approx_tokens ,
) ,
)
2026-03-07 20:09:48 -08:00
2026-03-28 12:23:43 -07:00
# _compress_context ends the old session and creates
# a new session_id. Write compressed messages into
# the NEW session so the old transcript stays intact
# and searchable via session_search.
_hyg_new_sid = _hyg_agent . session_id
if _hyg_new_sid != session_entry . session_id :
session_entry . session_id = _hyg_new_sid
self . session_store . _save ( )
2026-03-08 20:08:02 -07:00
self . session_store . rewrite_transcript (
session_entry . session_id , _compressed
)
2026-03-10 23:40:24 -07:00
# Reset stored token count — transcript was rewritten
session_entry . last_prompt_tokens = 0
2026-03-08 20:08:02 -07:00
history = _compressed
_new_count = len ( _compressed )
_new_tokens = estimate_messages_tokens_rough (
_compressed
)
2026-03-07 20:09:48 -08:00
2026-03-08 20:08:02 -07:00
logger . info (
" Session hygiene: compressed %s → %s msgs, "
" ~ %s → ~ %s tokens " ,
_msg_count , _new_count ,
f " { _approx_tokens : , } " , f " { _new_tokens : , } " ,
2026-03-07 20:09:48 -08:00
)
2026-03-08 20:08:02 -07:00
2026-03-07 20:09:48 -08:00
if _hyg_adapter :
try :
await _hyg_adapter . send (
source . chat_id ,
2026-03-08 20:08:02 -07:00
f " 🗜️ Compressed: { _msg_count } → "
f " { _new_count } messages, "
f " ~ { _approx_tokens : , } → "
2026-03-11 09:15:34 +01:00
f " ~ { _new_tokens : , } tokens " ,
metadata = _hyg_meta ,
2026-03-07 20:09:48 -08:00
)
except Exception :
pass
2026-03-08 20:08:02 -07:00
# Still too large after compression — warn user
if _new_tokens > = _warn_token_threshold :
logger . warning (
" Session hygiene: still ~ %s tokens after "
" compression — suggesting /reset " ,
f " { _new_tokens : , } " ,
)
if _hyg_adapter :
try :
await _hyg_adapter . send (
source . chat_id ,
" ⚠️ Session is still very large "
" after compression "
f " (~ { _new_tokens : , } tokens). "
" Consider using /reset to start "
2026-03-11 09:15:34 +01:00
" fresh if you experience issues. " ,
metadata = _hyg_meta ,
2026-03-08 20:08:02 -07:00
)
except Exception :
pass
except Exception as e :
logger . warning (
" Session hygiene auto-compress failed: %s " , e
)
# Compression failed and session is dangerously large
if _approx_tokens > = _warn_token_threshold :
_hyg_adapter = self . adapters . get ( source . platform )
2026-03-11 09:15:34 +01:00
_hyg_meta = { " thread_id " : source . thread_id } if source . thread_id else None
2026-03-08 20:08:02 -07:00
if _hyg_adapter :
try :
await _hyg_adapter . send (
source . chat_id ,
f " ⚠️ Session is very large "
f " ( { _msg_count } messages, "
f " ~ { _approx_tokens : , } tokens) and "
" auto-compression failed. Consider "
" using /compress or /reset to avoid "
2026-03-11 09:15:34 +01:00
" issues. " ,
metadata = _hyg_meta ,
2026-03-08 20:08:02 -07:00
)
except Exception :
pass
2026-03-07 20:09:48 -08:00
2026-02-22 20:44:15 -08:00
# First-message onboarding -- only on the very first interaction ever
if not history and not self . session_store . has_any_sessions ( ) :
2026-02-22 02:16:11 -08:00
context_prompt + = (
2026-02-22 20:44:15 -08:00
" \n \n [System note: This is the user ' s very first message ever. "
2026-02-22 02:16:11 -08:00
" Briefly introduce yourself and mention that /help shows available commands. "
" Keep the introduction concise -- one or two sentences max.] "
)
2026-02-22 20:44:15 -08:00
# One-time prompt if no home channel is set for this platform
if not history and source . platform and source . platform != Platform . LOCAL :
platform_name = source . platform . value
env_key = f " { platform_name . upper ( ) } _HOME_CHANNEL "
if not os . getenv ( env_key ) :
adapter = self . adapters . get ( source . platform )
if adapter :
await adapter . send (
source . chat_id ,
f " 📬 No home channel is set for { platform_name . title ( ) } . "
f " A home channel is where Hermes delivers cron job results "
f " and cross-platform messages. \n \n "
2026-02-23 15:01:22 -08:00
f " Type /sethome to make this chat your home channel, "
2026-02-22 20:44:15 -08:00
f " or ignore to skip. "
)
2026-03-14 02:14:34 +03:00
# -----------------------------------------------------------------
# Voice channel awareness — inject current voice channel state
# into context so the agent knows who is in the channel and who
# is speaking, without needing a separate tool call.
# -----------------------------------------------------------------
if source . platform == Platform . DISCORD :
adapter = self . adapters . get ( Platform . DISCORD )
guild_id = self . _get_guild_id ( event )
if guild_id and adapter and hasattr ( adapter , " get_voice_channel_context " ) :
vc_context = adapter . get_voice_channel_context ( guild_id )
if vc_context :
context_prompt + = f " \n \n { vc_context } "
2026-02-15 16:10:50 -08:00
# -----------------------------------------------------------------
# Auto-analyze images sent by the user
#
# If the user attached image(s), we run the vision tool eagerly so
# the conversation model always receives a text description. The
# local file path is also included so the model can re-examine the
# image later with a more targeted question via vision_analyze.
#
# We filter to image paths only (by media_type) so that non-image
# attachments (documents, audio, etc.) are not sent to the vision
# tool even when they appear in the same message.
# -----------------------------------------------------------------
message_text = event . text or " "
if event . media_urls :
image_paths = [ ]
for i , path in enumerate ( event . media_urls ) :
# Check media_types if available; otherwise infer from message type
mtype = event . media_types [ i ] if i < len ( event . media_types ) else " "
is_image = (
mtype . startswith ( " image/ " )
or event . message_type == MessageType . PHOTO
)
if is_image :
image_paths . append ( path )
if image_paths :
message_text = await self . _enrich_message_with_vision (
message_text , image_paths
)
Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks
Major feature additions inspired by OpenClaw/ClawdBot integration analysis:
Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)
Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description
Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling
Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads
DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending
Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)
Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications
Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings
Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style
Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.
2026-02-15 21:38:59 -08:00
# -----------------------------------------------------------------
# Auto-transcribe voice/audio messages sent by the user
# -----------------------------------------------------------------
if event . media_urls :
audio_paths = [ ]
for i , path in enumerate ( event . media_urls ) :
mtype = event . media_types [ i ] if i < len ( event . media_types ) else " "
is_audio = (
mtype . startswith ( " audio/ " )
or event . message_type in ( MessageType . VOICE , MessageType . AUDIO )
)
if is_audio :
audio_paths . append ( path )
if audio_paths :
message_text = await self . _enrich_message_with_transcription (
message_text , audio_paths
)
2026-03-18 03:01:41 -07:00
# If STT failed, send a direct message to the user so they
# know voice isn't configured — don't rely on the agent to
# relay the error clearly.
_stt_fail_markers = (
" No STT provider " ,
" STT is disabled " ,
" can ' t listen " ,
" VOICE_TOOLS_OPENAI_KEY " ,
)
if any ( m in message_text for m in _stt_fail_markers ) :
_stt_adapter = self . adapters . get ( source . platform )
_stt_meta = { " thread_id " : source . thread_id } if source . thread_id else None
if _stt_adapter :
try :
2026-03-18 03:17:23 -07:00
_stt_msg = (
2026-03-18 03:01:41 -07:00
" 🎤 I received your voice message but can ' t transcribe it — "
" no speech-to-text provider is configured. \n \n "
" To enable voice: install faster-whisper "
" (`pip install faster-whisper` in the Hermes venv) "
" and set `stt.enabled: true` in config.yaml, "
2026-03-18 03:17:23 -07:00
" then /restart the gateway. "
)
# Point to setup skill if it's installed
if self . _has_setup_skill ( ) :
_stt_msg + = " \n \n For full setup instructions, type: `/skill hermes-agent-setup` "
await _stt_adapter . send (
source . chat_id , _stt_msg ,
2026-03-18 03:01:41 -07:00
metadata = _stt_meta ,
)
except Exception :
pass
feat(telegram): add document file processing for PDF, text, and Office files
Download, cache, and enrich document files sent via Telegram. Supports
.pdf, .md, .txt, .docx, .xlsx, .pptx with size validation, unsupported
type rejection, text content injection for .md/.txt, and hourly cache
cleanup.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-27 11:44:57 -05:00
# -----------------------------------------------------------------
# Enrich document messages with context notes for the agent
# -----------------------------------------------------------------
if event . media_urls and event . message_type == MessageType . DOCUMENT :
for i , path in enumerate ( event . media_urls ) :
mtype = event . media_types [ i ] if i < len ( event . media_types ) else " "
if not ( mtype . startswith ( " application/ " ) or mtype . startswith ( " text/ " ) ) :
continue
# Extract display filename by stripping the doc_{uuid12}_ prefix
import os as _os
basename = _os . path . basename ( path )
# Format: doc_<12hex>_<original_filename>
parts = basename . split ( " _ " , 2 )
display_name = parts [ 2 ] if len ( parts ) > = 3 else basename
2026-02-27 11:53:46 -05:00
# Sanitize to prevent prompt injection via filenames
import re as _re
display_name = _re . sub ( r ' [^ \ w. \ - ] ' , ' _ ' , display_name )
feat(telegram): add document file processing for PDF, text, and Office files
Download, cache, and enrich document files sent via Telegram. Supports
.pdf, .md, .txt, .docx, .xlsx, .pptx with size validation, unsupported
type rejection, text content injection for .md/.txt, and hourly cache
cleanup.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-27 11:44:57 -05:00
if mtype . startswith ( " text/ " ) :
context_note = (
f " [The user sent a text document: ' { display_name } ' . "
f " Its content has been included below. "
f " The file is also saved at: { path } ] "
)
else :
context_note = (
f " [The user sent a document: ' { display_name } ' . "
f " The file is saved at: { path } . "
f " Ask the user what they ' d like you to do with it.] "
)
message_text = f " { context_note } \n \n { message_text } "
feat(gateway): inject reply-to message context for out-of-session replies (#1594)
* fix: prevent infinite 400 failure loop on context overflow (#1630)
When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message. This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error. Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.
Three-layer fix:
1. run_agent.py — Fallback heuristic: when a 400 error has a very short
generic message AND the session is large (>40% of context or >80
messages), treat it as a probable context overflow and trigger
compression instead of aborting.
2. run_agent.py + gateway/run.py — Don't persist failed messages:
when the agent returns failed=True before generating any response,
skip writing the user's message to the transcript/DB. This prevents
the session from growing on each failure.
3. gateway/run.py — Smarter error messages: detect context-overflow
failures and suggest /compact or /reset specifically, instead of a
generic 'try again' that will fail identically.
* fix(skills): detect prompt injection patterns and block cache file reads
Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):
1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
(index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
was the original injection vector — untrusted skill descriptions
in the catalog contained adversarial text that the model executed.
2. skill_view: warns when skills are loaded from outside the trusted
~/.hermes/skills/ directory, and detects common injection patterns
in skill content ("ignore previous instructions", "<system>", etc.).
Cherry-picked from PR #1562 by ygd58.
* fix(tools): chunk long messages in send_message_tool before dispatch (#1552)
Long messages sent via send_message tool or cron delivery silently
failed when exceeding platform limits. Gateway adapters handle this
via truncate_message(), but the standalone senders in send_message_tool
bypassed that entirely.
- Apply truncate_message() chunking in _send_to_platform() before
dispatching to individual platform senders
- Remove naive message[i:i+2000] character split in _send_discord()
in favor of centralized smart splitting
- Attach media files to last chunk only for Telegram
- Add regression tests for chunking and media placement
Cherry-picked from PR #1557 by llbn.
* fix(approval): show full command in dangerous command approval (#1553)
Previously the command was truncated to 80 chars in CLI (with a
[v]iew full option), 500 chars in Discord embeds, and missing entirely
in Telegram/Slack approval messages. Now the full command is always
displayed everywhere:
- CLI: removed 80-char truncation and [v]iew full menu option
- Gateway (TG/Slack): approval_required message includes full command
in a code block
- Discord: embed shows full command up to 4096-char limit
- Windows: skip SIGALRM-based test timeout (Unix-only)
- Updated tests: replaced view-flow tests with direct approval tests
Cherry-picked from PR #1566 by crazywriter1.
* fix(cli): flush stdout during agent loop to prevent macOS display freeze (#1624)
The interrupt polling loop in chat() waited on the queue without
invalidating the prompt_toolkit renderer. On macOS, the StdoutProxy
buffer only flushed on input events, causing the CLI to appear frozen
during tool execution until the user typed a key.
Fix: call _invalidate() on each queue timeout (every ~100ms, throttled
to 150ms) to force the renderer to flush buffered agent output.
* fix(claw): warn when API keys are skipped during OpenClaw migration (#1580)
When --migrate-secrets is not passed (the default), API keys like
OPENROUTER_API_KEY are silently skipped with no warning. Users don't
realize their keys weren't migrated until the agent fails to connect.
Add a post-migration warning with actionable instructions: either
re-run with --migrate-secrets or add the key manually via
hermes config set.
Cherry-picked from PR #1593 by ygd58.
* fix(security): block sandbox backend creds from subprocess env (#1264)
Add Modal and Daytona sandbox credentials to the subprocess env
blocklist so they're not leaked to agent terminal sessions via
printenv/env.
Cherry-picked from PR #1571 by ygd58.
* fix(gateway): cap interrupt recursion depth to prevent resource exhaustion (#816)
When a user sends multiple messages while the agent keeps failing,
_run_agent() calls itself recursively with no depth limit. This can
exhaust stack/memory if the agent is in a failure loop.
Add _MAX_INTERRUPT_DEPTH = 3. When exceeded, the pending message is
logged and the current result is returned instead of recursing deeper.
The log handler duplication bug described in #816 was already fixed
separately (AIAgent.__init__ deduplicates handlers).
* fix(gateway): /model shows active fallback model instead of config default (#1615)
When the agent falls back to a different model (e.g. due to rate
limiting), /model still showed the config default. Now tracks the
effective model/provider after each agent run and displays it.
Cleared when the primary model succeeds again or the user explicitly
switches via /model.
Cherry-picked from PR #1616 by MaxKerkula. Added hasattr guard for
test compatibility.
* feat(gateway): inject reply-to message context for out-of-session replies (#1594)
When a user replies to a Telegram message, check if the quoted text
exists in the current session transcript. If missing (from cron jobs,
background tasks, or old sessions), prepend [Replying to: "..."] to
the message so the agent has context about what's being referenced.
- Add reply_to_text field to MessageEvent (base.py)
- Populate from Telegram's reply_to_message (text or caption)
- Inject context in _handle_message when not found in history
Based on PR #1596 by anpicasso (cherry-picked reply-to feature only,
excluded unrelated /server command and background delegation changes).
---------
Co-authored-by: buray <ygd58@users.noreply.github.com>
Co-authored-by: lbn <llbn@users.noreply.github.com>
Co-authored-by: crazywriter1 <53251494+crazywriter1@users.noreply.github.com>
Co-authored-by: Max K <MaxKerkula@users.noreply.github.com>
Co-authored-by: Angello Picasso <angello.picasso@devsu.com>
2026-03-17 02:31:27 -07:00
# -----------------------------------------------------------------
# Inject reply context when user replies to a message not in history.
# Telegram (and other platforms) let users reply to specific messages,
# but if the quoted message is from a previous session, cron delivery,
# or background task, the agent has no context about what's being
# referenced. Prepend the quoted text so the agent understands. (#1594)
# -----------------------------------------------------------------
if getattr ( event , ' reply_to_text ' , None ) and event . reply_to_message_id :
reply_snippet = event . reply_to_text [ : 500 ]
found_in_history = any (
reply_snippet [ : 200 ] in ( msg . get ( " content " ) or " " )
for msg in history
if msg . get ( " role " ) in ( " assistant " , " user " , " tool " )
)
if not found_in_history :
message_text = f ' [Replying to: " { reply_snippet } " ] \n \n { message_text } '
2026-02-02 19:01:51 -08:00
try :
Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks
Major feature additions inspired by OpenClaw/ClawdBot integration analysis:
Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)
Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description
Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling
Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads
DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending
Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)
Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications
Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings
Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style
Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.
2026-02-15 21:38:59 -08:00
# Emit agent:start hook
hook_ctx = {
" platform " : source . platform . value if source . platform else " " ,
" user_id " : source . user_id ,
" session_id " : session_entry . session_id ,
" message " : message_text [ : 500 ] ,
}
await self . hooks . emit ( " agent:start " , hook_ctx )
feat: @ context references — inline file, folder, diff, git, and URL injection
Add @file:path, @folder:dir, @diff, @staged, @git:N, and @url:
references that expand inline before the message reaches the LLM.
Supports line ranges (@file:main.py:10-50), token budget enforcement
(soft warn at 25%, hard block at 50%), and path sandboxing for gateway.
Core module from PR #2090 by @kshitijk4poor. CLI and gateway wiring
rewritten against current main. Fixed asyncio.run() crash when called
from inside a running event loop (gateway).
Closes #682.
2026-03-21 15:57:13 -07:00
# Expand @ context references (@file:, @folder:, @diff, etc.)
if " @ " in message_text :
try :
from agent . context_references import preprocess_context_references_async
from agent . model_metadata import get_model_context_length
_msg_cwd = os . environ . get ( " MESSAGING_CWD " , os . path . expanduser ( " ~ " ) )
_msg_ctx_len = get_model_context_length (
self . _model , base_url = self . _base_url or " " )
_ctx_result = await preprocess_context_references_async (
message_text , cwd = _msg_cwd ,
context_length = _msg_ctx_len , allowed_root = _msg_cwd )
if _ctx_result . blocked :
_adapter = self . adapters . get ( source . platform )
if _adapter :
await _adapter . send (
source . chat_id ,
" \n " . join ( _ctx_result . warnings ) or " Context injection refused. " ,
)
return
if _ctx_result . expanded :
message_text = _ctx_result . message
except Exception as exc :
logger . debug ( " @ context reference expansion failed: %s " , exc )
2026-02-02 19:01:51 -08:00
# Run the agent
2026-02-16 00:53:17 -08:00
agent_result = await self . _run_agent (
2026-02-15 16:10:50 -08:00
message = message_text ,
2026-02-02 19:01:51 -08:00
context_prompt = context_prompt ,
history = history ,
source = source ,
2026-02-03 16:15:49 -08:00
session_id = session_entry . session_id ,
2026-03-25 15:51:15 -07:00
session_key = session_key ,
event_message_id = event . message_id ,
2026-02-02 19:01:51 -08:00
)
2026-03-22 04:47:53 -07:00
# Stop persistent typing indicator now that the agent is done
try :
_typing_adapter = self . adapters . get ( source . platform )
if _typing_adapter and hasattr ( _typing_adapter , " stop_typing " ) :
await _typing_adapter . stop_typing ( source . chat_id )
except Exception :
pass
2026-03-17 00:30:26 +03:00
response = agent_result . get ( " final_response " ) or " "
2026-02-16 00:53:17 -08:00
agent_messages = agent_result . get ( " messages " , [ ] )
fix: /reasoning command — add gateway support, fix display, persist settings (#1031)
* fix: /reasoning command output ordering, display, and inline think extraction
Three issues with the /reasoning command:
1. Output interleaving: The command echo used print() while feedback
used _cprint(), causing them to render out-of-order under
prompt_toolkit's patch_stdout. Changed echo to use _cprint() so
all output renders through the same path in correct order.
2. Reasoning display not working: /reasoning show toggled a flag
but reasoning never appeared for models that embed thinking in
inline <think> blocks rather than structured API fields. Added
fallback extraction in _build_assistant_message to capture
<think> block content as reasoning when no structured reasoning
fields (reasoning, reasoning_content, reasoning_details) are
present. This feeds into both the reasoning callback (during
tool loops) and the post-response reasoning box display.
3. Feedback clarity: Added checkmarks to confirm actions, persisted
show/hide to config (was session-only before), and aligned the
status display for readability.
Tests: 7 new tests for inline think block extraction (41 total).
* feat: add /reasoning command to gateway (Telegram/Discord/etc)
The /reasoning command only existed in the CLI — messaging platforms
had no way to view or change reasoning settings. This adds:
1. /reasoning command handler in the gateway:
- No args: shows current effort level and display state
- /reasoning <level>: sets reasoning effort (none/low/medium/high/xhigh)
- /reasoning show|hide: toggles reasoning display in responses
- All changes saved to config.yaml immediately
2. Reasoning display in gateway responses:
- When show_reasoning is enabled, prepends a 'Reasoning' block
with the model's last_reasoning content before the response
- Collapses long reasoning (>15 lines) to keep messages readable
- Uses last_reasoning from run_conversation result dict
3. Plumbing:
- Added _show_reasoning attribute loaded from config at startup
- Propagated last_reasoning through _run_agent return dict
- Added /reasoning to help text and known_commands set
- Uses getattr for _show_reasoning to handle test stubs
2026-03-12 05:38:19 -07:00
2026-03-17 00:30:26 +03:00
# Surface error details when the agent failed silently (final_response=None)
if not response and agent_result . get ( " failed " ) :
error_detail = agent_result . get ( " error " , " unknown error " )
fix: prevent infinite 400 loop on context overflow + block prompt injection via cache files (#1630, #1558)
* fix: prevent infinite 400 failure loop on context overflow (#1630)
When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message. This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error. Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.
Three-layer fix:
1. run_agent.py — Fallback heuristic: when a 400 error has a very short
generic message AND the session is large (>40% of context or >80
messages), treat it as a probable context overflow and trigger
compression instead of aborting.
2. run_agent.py + gateway/run.py — Don't persist failed messages:
when the agent returns failed=True before generating any response,
skip writing the user's message to the transcript/DB. This prevents
the session from growing on each failure.
3. gateway/run.py — Smarter error messages: detect context-overflow
failures and suggest /compact or /reset specifically, instead of a
generic 'try again' that will fail identically.
* fix(skills): detect prompt injection patterns and block cache file reads
Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):
1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
(index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
was the original injection vector — untrusted skill descriptions
in the catalog contained adversarial text that the model executed.
2. skill_view: warns when skills are loaded from outside the trusted
~/.hermes/skills/ directory, and detects common injection patterns
in skill content ("ignore previous instructions", "<system>", etc.).
Cherry-picked from PR #1562 by ygd58.
---------
Co-authored-by: buray <ygd58@users.noreply.github.com>
2026-03-17 01:50:59 -07:00
error_str = str ( error_detail ) . lower ( )
# Detect context-overflow failures and give specific guidance.
# Generic 400 "Error" from Anthropic with large sessions is the
# most common cause of this (#1630).
_is_ctx_fail = any ( p in error_str for p in (
" context " , " token " , " too large " , " too long " ,
" exceed " , " payload " ,
) ) or (
" 400 " in error_str
and len ( history ) > 50
2026-03-17 00:30:26 +03:00
)
fix: prevent infinite 400 loop on context overflow + block prompt injection via cache files (#1630, #1558)
* fix: prevent infinite 400 failure loop on context overflow (#1630)
When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message. This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error. Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.
Three-layer fix:
1. run_agent.py — Fallback heuristic: when a 400 error has a very short
generic message AND the session is large (>40% of context or >80
messages), treat it as a probable context overflow and trigger
compression instead of aborting.
2. run_agent.py + gateway/run.py — Don't persist failed messages:
when the agent returns failed=True before generating any response,
skip writing the user's message to the transcript/DB. This prevents
the session from growing on each failure.
3. gateway/run.py — Smarter error messages: detect context-overflow
failures and suggest /compact or /reset specifically, instead of a
generic 'try again' that will fail identically.
* fix(skills): detect prompt injection patterns and block cache file reads
Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):
1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
(index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
was the original injection vector — untrusted skill descriptions
in the catalog contained adversarial text that the model executed.
2. skill_view: warns when skills are loaded from outside the trusted
~/.hermes/skills/ directory, and detects common injection patterns
in skill content ("ignore previous instructions", "<system>", etc.).
Cherry-picked from PR #1562 by ygd58.
---------
Co-authored-by: buray <ygd58@users.noreply.github.com>
2026-03-17 01:50:59 -07:00
if _is_ctx_fail :
response = (
" ⚠️ Session too large for the model ' s context window. \n "
" Use /compact to compress the conversation, or "
" /reset to start fresh. "
)
else :
response = (
f " The request failed: { str ( error_detail ) [ : 300 ] } \n "
" Try again or use /reset to start a fresh session. "
)
2026-03-13 04:14:35 -07:00
# If the agent's session_id changed during compression, update
# session_entry so transcript writes below go to the right session.
if agent_result . get ( " session_id " ) and agent_result [ " session_id " ] != session_entry . session_id :
session_entry . session_id = agent_result [ " session_id " ]
fix: /reasoning command — add gateway support, fix display, persist settings (#1031)
* fix: /reasoning command output ordering, display, and inline think extraction
Three issues with the /reasoning command:
1. Output interleaving: The command echo used print() while feedback
used _cprint(), causing them to render out-of-order under
prompt_toolkit's patch_stdout. Changed echo to use _cprint() so
all output renders through the same path in correct order.
2. Reasoning display not working: /reasoning show toggled a flag
but reasoning never appeared for models that embed thinking in
inline <think> blocks rather than structured API fields. Added
fallback extraction in _build_assistant_message to capture
<think> block content as reasoning when no structured reasoning
fields (reasoning, reasoning_content, reasoning_details) are
present. This feeds into both the reasoning callback (during
tool loops) and the post-response reasoning box display.
3. Feedback clarity: Added checkmarks to confirm actions, persisted
show/hide to config (was session-only before), and aligned the
status display for readability.
Tests: 7 new tests for inline think block extraction (41 total).
* feat: add /reasoning command to gateway (Telegram/Discord/etc)
The /reasoning command only existed in the CLI — messaging platforms
had no way to view or change reasoning settings. This adds:
1. /reasoning command handler in the gateway:
- No args: shows current effort level and display state
- /reasoning <level>: sets reasoning effort (none/low/medium/high/xhigh)
- /reasoning show|hide: toggles reasoning display in responses
- All changes saved to config.yaml immediately
2. Reasoning display in gateway responses:
- When show_reasoning is enabled, prepends a 'Reasoning' block
with the model's last_reasoning content before the response
- Collapses long reasoning (>15 lines) to keep messages readable
- Uses last_reasoning from run_conversation result dict
3. Plumbing:
- Added _show_reasoning attribute loaded from config at startup
- Propagated last_reasoning through _run_agent return dict
- Added /reasoning to help text and known_commands set
- Uses getattr for _show_reasoning to handle test stubs
2026-03-12 05:38:19 -07:00
# Prepend reasoning/thinking if display is enabled
if getattr ( self , " _show_reasoning " , False ) and response :
last_reasoning = agent_result . get ( " last_reasoning " )
if last_reasoning :
# Collapse long reasoning to keep messages readable
lines = last_reasoning . strip ( ) . splitlines ( )
if len ( lines ) > 15 :
display_reasoning = " \n " . join ( lines [ : 15 ] )
display_reasoning + = f " \n _... ( { len ( lines ) - 15 } more lines)_ "
else :
display_reasoning = last_reasoning . strip ( )
response = f " 💭 **Reasoning:** \n ``` \n { display_reasoning } \n ``` \n \n { response } "
Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks
Major feature additions inspired by OpenClaw/ClawdBot integration analysis:
Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)
Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description
Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling
Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads
DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending
Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)
Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications
Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings
Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style
Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.
2026-02-15 21:38:59 -08:00
# Emit agent:end hook
await self . hooks . emit ( " agent:end " , {
* * hook_ctx ,
" response " : ( response or " " ) [ : 500 ] ,
} )
Add background process management with process tool, wait, PTY, and stdin support
New process registry and tool for managing long-running background processes
across all terminal backends (local, Docker, Singularity, Modal, SSH).
Process Registry (tools/process_registry.py):
- ProcessSession tracking with rolling 200KB output buffer
- spawn_local() with optional PTY via ptyprocess for interactive CLIs
- spawn_via_env() for non-local backends (runs inside sandbox, never on host)
- Background reader threads per process (Popen stdout or PTY)
- wait() with timeout clamping, interrupt support, and transparent limit reporting
- JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery
- Module-level singleton shared across agent loop, gateway, and RL
Process Tool (model_tools.py):
- 7 actions: list, poll, log, wait, kill, write, submit
- Paired with terminal in all toolsets (CLI, messaging, RL)
- Timeout clamping with transparent notes in response
Terminal Tool Updates (tools/terminal_tool.py):
- Replaced nohup background mode with registry spawn (returns session_id)
- Added workdir parameter for per-command working directory
- Added check_interval parameter for gateway auto-check watchers
- Added pty parameter for interactive CLI tools (Codex, Claude Code)
- Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs
- Cleanup thread now respects active background processes (won't reap sandbox)
Gateway Integration (gateway/run.py, session.py, config.py):
- Session reset protection: sessions with active processes exempt from reset
- Default idle timeout increased from 2 hours to 24 hours
- from_dict fallback aligned to match (was 120, now 1440)
- session_key env var propagated to process registry for session mapping
- Crash recovery on gateway startup via checkpoint probe
- check_interval watcher: asyncio task polls process, delivers updates to platform
RL Safety (environments/):
- tool_context.py cleanup() kills background processes on episode end
- hermes_base_env.py warns when enabled_toolsets is None (loads all tools)
- Process tool safe in RL via wait() blocking the agent loop
Also:
- Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all])
- Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP
- Updated AGENTS.md with process management docs and project structure
- Updated README.md terminal section with process management overview
2026-02-17 02:51:31 -08:00
# Check for pending process watchers (check_interval on background processes)
try :
from tools . process_registry import process_registry
while process_registry . pending_watchers :
watcher = process_registry . pending_watchers . pop ( 0 )
asyncio . create_task ( self . _run_process_watcher ( watcher ) )
except Exception as e :
2026-02-21 03:11:11 -08:00
logger . error ( " Process watcher setup error: %s " , e )
Add background process management with process tool, wait, PTY, and stdin support
New process registry and tool for managing long-running background processes
across all terminal backends (local, Docker, Singularity, Modal, SSH).
Process Registry (tools/process_registry.py):
- ProcessSession tracking with rolling 200KB output buffer
- spawn_local() with optional PTY via ptyprocess for interactive CLIs
- spawn_via_env() for non-local backends (runs inside sandbox, never on host)
- Background reader threads per process (Popen stdout or PTY)
- wait() with timeout clamping, interrupt support, and transparent limit reporting
- JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery
- Module-level singleton shared across agent loop, gateway, and RL
Process Tool (model_tools.py):
- 7 actions: list, poll, log, wait, kill, write, submit
- Paired with terminal in all toolsets (CLI, messaging, RL)
- Timeout clamping with transparent notes in response
Terminal Tool Updates (tools/terminal_tool.py):
- Replaced nohup background mode with registry spawn (returns session_id)
- Added workdir parameter for per-command working directory
- Added check_interval parameter for gateway auto-check watchers
- Added pty parameter for interactive CLI tools (Codex, Claude Code)
- Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs
- Cleanup thread now respects active background processes (won't reap sandbox)
Gateway Integration (gateway/run.py, session.py, config.py):
- Session reset protection: sessions with active processes exempt from reset
- Default idle timeout increased from 2 hours to 24 hours
- from_dict fallback aligned to match (was 120, now 1440)
- session_key env var propagated to process registry for session mapping
- Crash recovery on gateway startup via checkpoint probe
- check_interval watcher: asyncio task polls process, delivers updates to platform
RL Safety (environments/):
- tool_context.py cleanup() kills background processes on episode end
- hermes_base_env.py warns when enabled_toolsets is None (loads all tools)
- Process tool safe in RL via wait() blocking the agent loop
Also:
- Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all])
- Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP
- Updated AGENTS.md with process management docs and project structure
- Updated README.md terminal section with process management overview
2026-02-17 02:51:31 -08:00
2026-02-12 10:05:08 -08:00
# Check if the agent encountered a dangerous command needing approval
try :
refactor: deduplicate toolsets, unify async bridging, fix approval race condition, harden security
- Replace 4 copy-pasted messaging platform toolsets with shared _HERMES_CORE_TOOLS list
- Consolidate 5 ad-hoc async-bridging patterns into single _run_async() in model_tools.py
- Removes deprecated get_event_loop()/set_event_loop() calls
- Makes all tool handlers self-protecting regardless of caller's event loop state
- RL handler refactored from if/elif chain to dispatch dict
- Fix exec approval race condition: replace module-level globals with thread-safe
per-session tools/approval.py (submit_pending, pop_pending, approve_session, is_approved)
- Session A approving "rm" no longer approves it for all other sessions
- Fix config deep merge: user overriding tts.elevenlabs.voice_id no longer clobbers
tts.elevenlabs.model_id; migration detection now recurses to arbitrary depth
- Gateway default-deny: unauthenticated users denied unless GATEWAY_ALLOW_ALL_USERS=true
- Add 10 dangerous command patterns: rm --recursive, bash -c, python -e, curl|bash,
xargs rm, find -delete
- Sanitize gateway error messages: users see generic message, full traceback goes to logs
2026-02-21 18:28:49 -08:00
from tools . approval import pop_pending
fix(gateway): replace bare text approval with /approve and /deny commands (#2002)
The gateway approval system previously intercepted bare 'yes'/'no' text
from the user's next message to approve/deny dangerous commands. This was
fragile and dangerous — if the agent asked a clarify question and the user
said 'yes' to answer it, the gateway would execute the pending dangerous
command instead. (Fixes #1888)
Changes:
- Remove bare text matching ('yes', 'y', 'approve', 'ok', etc.) from
_handle_message approval check
- Add /approve and /deny as gateway-only slash commands in the command
registry
- /approve supports scoping: /approve (one-time), /approve session,
/approve always (permanent)
- Add 5-minute timeout for stale approvals
- Gateway appends structured instructions to the agent response when a
dangerous command is pending, telling the user exactly how to respond
- 9 tests covering approve, deny, timeout, scoping, and verification
that bare 'yes' no longer triggers execution
Credit to @solo386 and @FlyByNight69420 for identifying and reporting
this security issue in PR #1971 and issue #1888.
Co-authored-by: Test <test@test.com>
2026-03-18 16:58:20 -07:00
import time as _time
refactor: deduplicate toolsets, unify async bridging, fix approval race condition, harden security
- Replace 4 copy-pasted messaging platform toolsets with shared _HERMES_CORE_TOOLS list
- Consolidate 5 ad-hoc async-bridging patterns into single _run_async() in model_tools.py
- Removes deprecated get_event_loop()/set_event_loop() calls
- Makes all tool handlers self-protecting regardless of caller's event loop state
- RL handler refactored from if/elif chain to dispatch dict
- Fix exec approval race condition: replace module-level globals with thread-safe
per-session tools/approval.py (submit_pending, pop_pending, approve_session, is_approved)
- Session A approving "rm" no longer approves it for all other sessions
- Fix config deep merge: user overriding tts.elevenlabs.voice_id no longer clobbers
tts.elevenlabs.model_id; migration detection now recurses to arbitrary depth
- Gateway default-deny: unauthenticated users denied unless GATEWAY_ALLOW_ALL_USERS=true
- Add 10 dangerous command patterns: rm --recursive, bash -c, python -e, curl|bash,
xargs rm, find -delete
- Sanitize gateway error messages: users see generic message, full traceback goes to logs
2026-02-21 18:28:49 -08:00
pending = pop_pending ( session_key )
if pending :
fix(gateway): replace bare text approval with /approve and /deny commands (#2002)
The gateway approval system previously intercepted bare 'yes'/'no' text
from the user's next message to approve/deny dangerous commands. This was
fragile and dangerous — if the agent asked a clarify question and the user
said 'yes' to answer it, the gateway would execute the pending dangerous
command instead. (Fixes #1888)
Changes:
- Remove bare text matching ('yes', 'y', 'approve', 'ok', etc.) from
_handle_message approval check
- Add /approve and /deny as gateway-only slash commands in the command
registry
- /approve supports scoping: /approve (one-time), /approve session,
/approve always (permanent)
- Add 5-minute timeout for stale approvals
- Gateway appends structured instructions to the agent response when a
dangerous command is pending, telling the user exactly how to respond
- 9 tests covering approve, deny, timeout, scoping, and verification
that bare 'yes' no longer triggers execution
Credit to @solo386 and @FlyByNight69420 for identifying and reporting
this security issue in PR #1971 and issue #1888.
Co-authored-by: Test <test@test.com>
2026-03-18 16:58:20 -07:00
pending [ " timestamp " ] = _time . time ( )
refactor: deduplicate toolsets, unify async bridging, fix approval race condition, harden security
- Replace 4 copy-pasted messaging platform toolsets with shared _HERMES_CORE_TOOLS list
- Consolidate 5 ad-hoc async-bridging patterns into single _run_async() in model_tools.py
- Removes deprecated get_event_loop()/set_event_loop() calls
- Makes all tool handlers self-protecting regardless of caller's event loop state
- RL handler refactored from if/elif chain to dispatch dict
- Fix exec approval race condition: replace module-level globals with thread-safe
per-session tools/approval.py (submit_pending, pop_pending, approve_session, is_approved)
- Session A approving "rm" no longer approves it for all other sessions
- Fix config deep merge: user overriding tts.elevenlabs.voice_id no longer clobbers
tts.elevenlabs.model_id; migration detection now recurses to arbitrary depth
- Gateway default-deny: unauthenticated users denied unless GATEWAY_ALLOW_ALL_USERS=true
- Add 10 dangerous command patterns: rm --recursive, bash -c, python -e, curl|bash,
xargs rm, find -delete
- Sanitize gateway error messages: users see generic message, full traceback goes to logs
2026-02-21 18:28:49 -08:00
self . _pending_approvals [ session_key ] = pending
fix(gateway): replace bare text approval with /approve and /deny commands (#2002)
The gateway approval system previously intercepted bare 'yes'/'no' text
from the user's next message to approve/deny dangerous commands. This was
fragile and dangerous — if the agent asked a clarify question and the user
said 'yes' to answer it, the gateway would execute the pending dangerous
command instead. (Fixes #1888)
Changes:
- Remove bare text matching ('yes', 'y', 'approve', 'ok', etc.) from
_handle_message approval check
- Add /approve and /deny as gateway-only slash commands in the command
registry
- /approve supports scoping: /approve (one-time), /approve session,
/approve always (permanent)
- Add 5-minute timeout for stale approvals
- Gateway appends structured instructions to the agent response when a
dangerous command is pending, telling the user exactly how to respond
- 9 tests covering approve, deny, timeout, scoping, and verification
that bare 'yes' no longer triggers execution
Credit to @solo386 and @FlyByNight69420 for identifying and reporting
this security issue in PR #1971 and issue #1888.
Co-authored-by: Test <test@test.com>
2026-03-18 16:58:20 -07:00
# Append structured instructions so the user knows how to respond
cmd_preview = pending . get ( " command " , " " )
if len ( cmd_preview ) > 200 :
cmd_preview = cmd_preview [ : 200 ] + " ... "
approval_hint = (
f " \n \n ⚠️ **Dangerous command requires approval:** \n "
f " ``` \n { cmd_preview } \n ``` \n "
f " Reply `/approve` to execute, `/approve session` to approve this pattern "
f " for the session, or `/deny` to cancel. "
)
response = ( response or " " ) + approval_hint
2026-02-21 03:32:11 -08:00
except Exception as e :
logger . debug ( " Failed to check pending approvals: %s " , e )
2026-02-12 10:05:08 -08:00
2026-02-16 00:53:17 -08:00
# Save the full conversation to the transcript, including tool calls.
# This preserves the complete agent loop (tool_calls, tool results,
# intermediate reasoning) so sessions can be resumed with full context
# and transcripts are useful for debugging and training data.
fix: prevent infinite 400 loop on context overflow + block prompt injection via cache files (#1630, #1558)
* fix: prevent infinite 400 failure loop on context overflow (#1630)
When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message. This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error. Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.
Three-layer fix:
1. run_agent.py — Fallback heuristic: when a 400 error has a very short
generic message AND the session is large (>40% of context or >80
messages), treat it as a probable context overflow and trigger
compression instead of aborting.
2. run_agent.py + gateway/run.py — Don't persist failed messages:
when the agent returns failed=True before generating any response,
skip writing the user's message to the transcript/DB. This prevents
the session from growing on each failure.
3. gateway/run.py — Smarter error messages: detect context-overflow
failures and suggest /compact or /reset specifically, instead of a
generic 'try again' that will fail identically.
* fix(skills): detect prompt injection patterns and block cache file reads
Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):
1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
(index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
was the original injection vector — untrusted skill descriptions
in the catalog contained adversarial text that the model executed.
2. skill_view: warns when skills are loaded from outside the trusted
~/.hermes/skills/ directory, and detects common injection patterns
in skill content ("ignore previous instructions", "<system>", etc.).
Cherry-picked from PR #1562 by ygd58.
---------
Co-authored-by: buray <ygd58@users.noreply.github.com>
2026-03-17 01:50:59 -07:00
#
# IMPORTANT: When the agent failed before producing any response
# (e.g. context-overflow 400), do NOT persist the user's message.
# Persisting it would make the session even larger, causing the
# same failure on the next attempt — an infinite loop. (#1630)
agent_failed_early = (
agent_result . get ( " failed " )
and not agent_result . get ( " final_response " )
)
if agent_failed_early :
logger . info (
" Skipping transcript persistence for failed request in "
" session %s to prevent session growth loop. " ,
session_entry . session_id ,
)
2026-02-16 00:53:17 -08:00
ts = datetime . now ( ) . isoformat ( )
# If this is a fresh session (no history), write the full tool
# definitions as the first entry so the transcript is self-describing
# -- the same list of dicts sent as tools=[...] in the API request.
fix: prevent infinite 400 loop on context overflow + block prompt injection via cache files (#1630, #1558)
* fix: prevent infinite 400 failure loop on context overflow (#1630)
When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message. This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error. Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.
Three-layer fix:
1. run_agent.py — Fallback heuristic: when a 400 error has a very short
generic message AND the session is large (>40% of context or >80
messages), treat it as a probable context overflow and trigger
compression instead of aborting.
2. run_agent.py + gateway/run.py — Don't persist failed messages:
when the agent returns failed=True before generating any response,
skip writing the user's message to the transcript/DB. This prevents
the session from growing on each failure.
3. gateway/run.py — Smarter error messages: detect context-overflow
failures and suggest /compact or /reset specifically, instead of a
generic 'try again' that will fail identically.
* fix(skills): detect prompt injection patterns and block cache file reads
Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):
1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
(index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
was the original injection vector — untrusted skill descriptions
in the catalog contained adversarial text that the model executed.
2. skill_view: warns when skills are loaded from outside the trusted
~/.hermes/skills/ directory, and detects common injection patterns
in skill content ("ignore previous instructions", "<system>", etc.).
Cherry-picked from PR #1562 by ygd58.
---------
Co-authored-by: buray <ygd58@users.noreply.github.com>
2026-03-17 01:50:59 -07:00
if agent_failed_early :
pass # Skip all transcript writes — don't grow a broken session
elif not history :
2026-02-16 00:55:18 -08:00
tool_defs = agent_result . get ( " tools " , [ ] )
2026-02-16 00:53:17 -08:00
self . session_store . append_to_transcript (
session_entry . session_id ,
{
" role " : " session_meta " ,
" tools " : tool_defs or [ ] ,
" model " : os . getenv ( " HERMES_MODEL " , " " ) ,
" platform " : source . platform . value if source . platform else " " ,
" timestamp " : ts ,
}
)
2026-03-04 21:34:40 +03:00
# Find only the NEW messages from this turn (skip history we loaded).
# Use the filtered history length (history_offset) that was actually
# passed to the agent, not len(history) which includes session_meta
# entries that were stripped before the agent saw them.
fix: prevent infinite 400 loop on context overflow + block prompt injection via cache files (#1630, #1558)
* fix: prevent infinite 400 failure loop on context overflow (#1630)
When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message. This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error. Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.
Three-layer fix:
1. run_agent.py — Fallback heuristic: when a 400 error has a very short
generic message AND the session is large (>40% of context or >80
messages), treat it as a probable context overflow and trigger
compression instead of aborting.
2. run_agent.py + gateway/run.py — Don't persist failed messages:
when the agent returns failed=True before generating any response,
skip writing the user's message to the transcript/DB. This prevents
the session from growing on each failure.
3. gateway/run.py — Smarter error messages: detect context-overflow
failures and suggest /compact or /reset specifically, instead of a
generic 'try again' that will fail identically.
* fix(skills): detect prompt injection patterns and block cache file reads
Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):
1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
(index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
was the original injection vector — untrusted skill descriptions
in the catalog contained adversarial text that the model executed.
2. skill_view: warns when skills are loaded from outside the trusted
~/.hermes/skills/ directory, and detects common injection patterns
in skill content ("ignore previous instructions", "<system>", etc.).
Cherry-picked from PR #1562 by ygd58.
---------
Co-authored-by: buray <ygd58@users.noreply.github.com>
2026-03-17 01:50:59 -07:00
if not agent_failed_early :
history_len = agent_result . get ( " history_offset " , len ( history ) )
new_messages = agent_messages [ history_len : ] if len ( agent_messages ) > history_len else [ ]
# If no new messages found (edge case), fall back to simple user/assistant
if not new_messages :
2026-02-16 00:53:17 -08:00
self . session_store . append_to_transcript (
session_entry . session_id ,
fix: prevent infinite 400 loop on context overflow + block prompt injection via cache files (#1630, #1558)
* fix: prevent infinite 400 failure loop on context overflow (#1630)
When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message. This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error. Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.
Three-layer fix:
1. run_agent.py — Fallback heuristic: when a 400 error has a very short
generic message AND the session is large (>40% of context or >80
messages), treat it as a probable context overflow and trigger
compression instead of aborting.
2. run_agent.py + gateway/run.py — Don't persist failed messages:
when the agent returns failed=True before generating any response,
skip writing the user's message to the transcript/DB. This prevents
the session from growing on each failure.
3. gateway/run.py — Smarter error messages: detect context-overflow
failures and suggest /compact or /reset specifically, instead of a
generic 'try again' that will fail identically.
* fix(skills): detect prompt injection patterns and block cache file reads
Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):
1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
(index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
was the original injection vector — untrusted skill descriptions
in the catalog contained adversarial text that the model executed.
2. skill_view: warns when skills are loaded from outside the trusted
~/.hermes/skills/ directory, and detects common injection patterns
in skill content ("ignore previous instructions", "<system>", etc.).
Cherry-picked from PR #1562 by ygd58.
---------
Co-authored-by: buray <ygd58@users.noreply.github.com>
2026-03-17 01:50:59 -07:00
{ " role " : " user " , " content " : message_text , " timestamp " : ts }
2026-02-16 00:53:17 -08:00
)
fix: prevent infinite 400 loop on context overflow + block prompt injection via cache files (#1630, #1558)
* fix: prevent infinite 400 failure loop on context overflow (#1630)
When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message. This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error. Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.
Three-layer fix:
1. run_agent.py — Fallback heuristic: when a 400 error has a very short
generic message AND the session is large (>40% of context or >80
messages), treat it as a probable context overflow and trigger
compression instead of aborting.
2. run_agent.py + gateway/run.py — Don't persist failed messages:
when the agent returns failed=True before generating any response,
skip writing the user's message to the transcript/DB. This prevents
the session from growing on each failure.
3. gateway/run.py — Smarter error messages: detect context-overflow
failures and suggest /compact or /reset specifically, instead of a
generic 'try again' that will fail identically.
* fix(skills): detect prompt injection patterns and block cache file reads
Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):
1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
(index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
was the original injection vector — untrusted skill descriptions
in the catalog contained adversarial text that the model executed.
2. skill_view: warns when skills are loaded from outside the trusted
~/.hermes/skills/ directory, and detects common injection patterns
in skill content ("ignore previous instructions", "<system>", etc.).
Cherry-picked from PR #1562 by ygd58.
---------
Co-authored-by: buray <ygd58@users.noreply.github.com>
2026-03-17 01:50:59 -07:00
if response :
self . session_store . append_to_transcript (
session_entry . session_id ,
{ " role " : " assistant " , " content " : response , " timestamp " : ts }
)
else :
# The agent already persisted these messages to SQLite via
# _flush_messages_to_session_db(), so skip the DB write here
# to prevent the duplicate-write bug (#860). We still write
# to JSONL for backward compatibility and as a backup.
agent_persisted = self . _session_db is not None
for msg in new_messages :
# Skip system messages (they're rebuilt each run)
if msg . get ( " role " ) == " system " :
continue
# Add timestamp to each message for debugging
entry = { * * msg , " timestamp " : ts }
self . session_store . append_to_transcript (
session_entry . session_id , entry ,
skip_db = agent_persisted ,
)
2026-02-02 19:01:51 -08:00
2026-03-11 17:44:37 -07:00
# Update session with actual prompt token count and model from the agent
2026-03-10 23:28:18 -07:00
self . session_store . update_session (
session_entry . session_key ,
2026-03-15 19:18:58 -07:00
input_tokens = agent_result . get ( " input_tokens " , 0 ) ,
output_tokens = agent_result . get ( " output_tokens " , 0 ) ,
2026-03-17 03:44:44 -07:00
cache_read_tokens = agent_result . get ( " cache_read_tokens " , 0 ) ,
cache_write_tokens = agent_result . get ( " cache_write_tokens " , 0 ) ,
2026-03-10 23:28:18 -07:00
last_prompt_tokens = agent_result . get ( " last_prompt_tokens " , 0 ) ,
2026-03-11 17:44:37 -07:00
model = agent_result . get ( " model " ) ,
2026-03-17 03:44:44 -07:00
estimated_cost_usd = agent_result . get ( " estimated_cost_usd " ) ,
cost_status = agent_result . get ( " cost_status " ) ,
cost_source = agent_result . get ( " cost_source " ) ,
provider = agent_result . get ( " provider " ) ,
base_url = agent_result . get ( " base_url " ) ,
2026-03-10 23:28:18 -07:00
)
2026-03-10 22:55:36 +03:00
# Auto voice reply: send TTS audio before the text response
2026-03-21 08:08:37 -07:00
_already_sent = bool ( agent_result . get ( " already_sent " ) )
if self . _should_send_voice_reply ( event , response , agent_messages , already_sent = _already_sent ) :
2026-03-11 23:18:49 +03:00
await self . _send_voice_reply ( event , response )
2026-03-10 22:55:36 +03:00
2026-03-21 16:01:25 -07:00
# If streaming already delivered the response, extract and
# deliver any MEDIA: files before returning None. Streaming
# sends raw text chunks that include MEDIA: tags — the normal
# post-processing in _process_message_background is skipped
# when already_sent is True, so media files would never be
# delivered without this.
feat(gateway): streaming token delivery — StreamingConfig, GatewayStreamConsumer, already_sent
Stage 3 of streaming support. Gateway now streams tokens to messaging platforms:
- StreamingConfig dataclass (enabled, transport, edit_interval, buffer_threshold, cursor)
on GatewayConfig with from_dict/to_dict serialization
- GatewayStreamConsumer: async queue-based consumer that progressively edits
a single message on the target platform (edit transport)
- on_delta() → queue → run() async task → send_or_edit() with rate limiting
- already_sent propagation: when streaming delivered the response, handler
returns None so base adapter skips duplicate send()
- stream_delta_callback wired into AIAgent constructor in _run_agent
- Consumer lifecycle: started as asyncio task, awaited with timeout in finally
Config (config.yaml):
streaming:
enabled: true
transport: edit # progressive editMessageText
edit_interval: 0.3 # seconds between edits
buffer_threshold: 40 # chars before forcing flush
cursor: ' ▉'
Credit: jobless0x (#774, #1312), OutThisLife (#798), clicksingh (#697).
2026-03-16 05:52:42 -07:00
if agent_result . get ( " already_sent " ) :
2026-03-21 16:01:25 -07:00
if response :
2026-03-22 04:35:27 -07:00
_media_adapter = self . adapters . get ( source . platform )
if _media_adapter :
await self . _deliver_media_from_response (
response , event , _media_adapter ,
)
feat(gateway): streaming token delivery — StreamingConfig, GatewayStreamConsumer, already_sent
Stage 3 of streaming support. Gateway now streams tokens to messaging platforms:
- StreamingConfig dataclass (enabled, transport, edit_interval, buffer_threshold, cursor)
on GatewayConfig with from_dict/to_dict serialization
- GatewayStreamConsumer: async queue-based consumer that progressively edits
a single message on the target platform (edit transport)
- on_delta() → queue → run() async task → send_or_edit() with rate limiting
- already_sent propagation: when streaming delivered the response, handler
returns None so base adapter skips duplicate send()
- stream_delta_callback wired into AIAgent constructor in _run_agent
- Consumer lifecycle: started as asyncio task, awaited with timeout in finally
Config (config.yaml):
streaming:
enabled: true
transport: edit # progressive editMessageText
edit_interval: 0.3 # seconds between edits
buffer_threshold: 40 # chars before forcing flush
cursor: ' ▉'
Credit: jobless0x (#774, #1312), OutThisLife (#798), clicksingh (#697).
2026-03-16 05:52:42 -07:00
return None
2026-02-02 19:01:51 -08:00
return response
except Exception as e :
2026-03-22 04:47:53 -07:00
# Stop typing indicator on error too
try :
_err_adapter = self . adapters . get ( source . platform )
if _err_adapter and hasattr ( _err_adapter , " stop_typing " ) :
await _err_adapter . stop_typing ( source . chat_id )
except Exception :
pass
refactor: deduplicate toolsets, unify async bridging, fix approval race condition, harden security
- Replace 4 copy-pasted messaging platform toolsets with shared _HERMES_CORE_TOOLS list
- Consolidate 5 ad-hoc async-bridging patterns into single _run_async() in model_tools.py
- Removes deprecated get_event_loop()/set_event_loop() calls
- Makes all tool handlers self-protecting regardless of caller's event loop state
- RL handler refactored from if/elif chain to dispatch dict
- Fix exec approval race condition: replace module-level globals with thread-safe
per-session tools/approval.py (submit_pending, pop_pending, approve_session, is_approved)
- Session A approving "rm" no longer approves it for all other sessions
- Fix config deep merge: user overriding tts.elevenlabs.voice_id no longer clobbers
tts.elevenlabs.model_id; migration detection now recurses to arbitrary depth
- Gateway default-deny: unauthenticated users denied unless GATEWAY_ALLOW_ALL_USERS=true
- Add 10 dangerous command patterns: rm --recursive, bash -c, python -e, curl|bash,
xargs rm, find -delete
- Sanitize gateway error messages: users see generic message, full traceback goes to logs
2026-02-21 18:28:49 -08:00
logger . exception ( " Agent error in session %s " , session_key )
2026-03-17 00:30:26 +03:00
error_type = type ( e ) . __name__
error_detail = str ( e ) [ : 300 ] if str ( e ) else " no details available "
status_hint = " "
status_code = getattr ( e , " status_code " , None )
2026-03-17 13:25:20 -07:00
_hist_len = len ( history ) if ' history ' in locals ( ) else 0
2026-03-17 00:30:26 +03:00
if status_code == 401 :
status_hint = " Check your API key or run `claude /login` to refresh OAuth credentials. "
elif status_code == 429 :
2026-03-17 13:25:20 -07:00
# Check if this is a plan usage limit (resets on a schedule) vs a transient rate limit
_err_body = getattr ( e , " response " , None )
_err_json = { }
try :
if _err_body is not None :
_err_json = _err_body . json ( ) . get ( " error " , { } )
except Exception :
pass
if _err_json . get ( " type " ) == " usage_limit_reached " :
_resets_in = _err_json . get ( " resets_in_seconds " )
if _resets_in and _resets_in > 0 :
import math
_hours = math . ceil ( _resets_in / 3600 )
status_hint = f " Your plan ' s usage limit has been reached. It resets in ~ { _hours } h. "
else :
status_hint = " Your plan ' s usage limit has been reached. Please wait until it resets. "
else :
status_hint = " You are being rate-limited. Please wait a moment and try again. "
2026-03-17 00:30:26 +03:00
elif status_code == 529 :
status_hint = " The API is temporarily overloaded. Please try again shortly. "
2026-03-17 13:25:20 -07:00
elif status_code in ( 400 , 500 ) :
# 400 with a large session is context overflow.
# 500 with a large session often means the payload is too large
# for the API to process — treat it the same way.
fix: prevent infinite 400 loop on context overflow + block prompt injection via cache files (#1630, #1558)
* fix: prevent infinite 400 failure loop on context overflow (#1630)
When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message. This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error. Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.
Three-layer fix:
1. run_agent.py — Fallback heuristic: when a 400 error has a very short
generic message AND the session is large (>40% of context or >80
messages), treat it as a probable context overflow and trigger
compression instead of aborting.
2. run_agent.py + gateway/run.py — Don't persist failed messages:
when the agent returns failed=True before generating any response,
skip writing the user's message to the transcript/DB. This prevents
the session from growing on each failure.
3. gateway/run.py — Smarter error messages: detect context-overflow
failures and suggest /compact or /reset specifically, instead of a
generic 'try again' that will fail identically.
* fix(skills): detect prompt injection patterns and block cache file reads
Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):
1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
(index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
was the original injection vector — untrusted skill descriptions
in the catalog contained adversarial text that the model executed.
2. skill_view: warns when skills are loaded from outside the trusted
~/.hermes/skills/ directory, and detects common injection patterns
in skill content ("ignore previous instructions", "<system>", etc.).
Cherry-picked from PR #1562 by ygd58.
---------
Co-authored-by: buray <ygd58@users.noreply.github.com>
2026-03-17 01:50:59 -07:00
if _hist_len > 50 :
return (
" ⚠️ Session too large for the model ' s context window. \n "
" Use /compact to compress the conversation, or "
" /reset to start fresh. "
)
2026-03-17 13:25:20 -07:00
elif status_code == 400 :
fix: prevent infinite 400 loop on context overflow + block prompt injection via cache files (#1630, #1558)
* fix: prevent infinite 400 failure loop on context overflow (#1630)
When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message. This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error. Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.
Three-layer fix:
1. run_agent.py — Fallback heuristic: when a 400 error has a very short
generic message AND the session is large (>40% of context or >80
messages), treat it as a probable context overflow and trigger
compression instead of aborting.
2. run_agent.py + gateway/run.py — Don't persist failed messages:
when the agent returns failed=True before generating any response,
skip writing the user's message to the transcript/DB. This prevents
the session from growing on each failure.
3. gateway/run.py — Smarter error messages: detect context-overflow
failures and suggest /compact or /reset specifically, instead of a
generic 'try again' that will fail identically.
* fix(skills): detect prompt injection patterns and block cache file reads
Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):
1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
(index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
was the original injection vector — untrusted skill descriptions
in the catalog contained adversarial text that the model executed.
2. skill_view: warns when skills are loaded from outside the trusted
~/.hermes/skills/ directory, and detects common injection patterns
in skill content ("ignore previous instructions", "<system>", etc.).
Cherry-picked from PR #1562 by ygd58.
---------
Co-authored-by: buray <ygd58@users.noreply.github.com>
2026-03-17 01:50:59 -07:00
status_hint = " The request was rejected by the API. "
refactor: deduplicate toolsets, unify async bridging, fix approval race condition, harden security
- Replace 4 copy-pasted messaging platform toolsets with shared _HERMES_CORE_TOOLS list
- Consolidate 5 ad-hoc async-bridging patterns into single _run_async() in model_tools.py
- Removes deprecated get_event_loop()/set_event_loop() calls
- Makes all tool handlers self-protecting regardless of caller's event loop state
- RL handler refactored from if/elif chain to dispatch dict
- Fix exec approval race condition: replace module-level globals with thread-safe
per-session tools/approval.py (submit_pending, pop_pending, approve_session, is_approved)
- Session A approving "rm" no longer approves it for all other sessions
- Fix config deep merge: user overriding tts.elevenlabs.voice_id no longer clobbers
tts.elevenlabs.model_id; migration detection now recurses to arbitrary depth
- Gateway default-deny: unauthenticated users denied unless GATEWAY_ALLOW_ALL_USERS=true
- Add 10 dangerous command patterns: rm --recursive, bash -c, python -e, curl|bash,
xargs rm, find -delete
- Sanitize gateway error messages: users see generic message, full traceback goes to logs
2026-02-21 18:28:49 -08:00
return (
2026-03-17 00:30:26 +03:00
f " Sorry, I encountered an error ( { error_type } ). \n "
f " { error_detail } \n "
f " { status_hint } "
refactor: deduplicate toolsets, unify async bridging, fix approval race condition, harden security
- Replace 4 copy-pasted messaging platform toolsets with shared _HERMES_CORE_TOOLS list
- Consolidate 5 ad-hoc async-bridging patterns into single _run_async() in model_tools.py
- Removes deprecated get_event_loop()/set_event_loop() calls
- Makes all tool handlers self-protecting regardless of caller's event loop state
- RL handler refactored from if/elif chain to dispatch dict
- Fix exec approval race condition: replace module-level globals with thread-safe
per-session tools/approval.py (submit_pending, pop_pending, approve_session, is_approved)
- Session A approving "rm" no longer approves it for all other sessions
- Fix config deep merge: user overriding tts.elevenlabs.voice_id no longer clobbers
tts.elevenlabs.model_id; migration detection now recurses to arbitrary depth
- Gateway default-deny: unauthenticated users denied unless GATEWAY_ALLOW_ALL_USERS=true
- Add 10 dangerous command patterns: rm --recursive, bash -c, python -e, curl|bash,
xargs rm, find -delete
- Sanitize gateway error messages: users see generic message, full traceback goes to logs
2026-02-21 18:28:49 -08:00
" Try again or use /reset to start a fresh session. "
)
2026-02-02 19:01:51 -08:00
finally :
# Clear session env
self . _clear_session_env ( )
feat(gateway): surface session config on /new, /reset, and auto-reset (#3321)
When a new session starts in the gateway (via /new, /reset, or
auto-reset), send the user a summary of the detected configuration:
✨ Session reset! Starting fresh.
◆ Model: qwen3.5:27b-q4_K_M
◆ Provider: custom
◆ Context: 8K tokens (config)
◆ Endpoint: http://localhost:11434/v1
This makes misconfigured context length immediately visible — a user
running a local 8K model that falls to the 128K default will see:
◆ Context: 128K tokens (default — set model.context_length in config to override)
Instead of silently getting no compression and degrading responses.
- _format_session_info() resolves model, provider, context length,
and endpoint from config + runtime, matching the hygiene code's
resolution chain
- Local/custom endpoints shown; cloud endpoints hidden (not useful)
- Context source annotated: config, detected, or default with hint
- Appended to /new and /reset responses, and auto-reset notifications
- 9 tests covering all formatting paths and failure resilience
Addresses the user-facing side of #2708 — instead of trying to fix
every edge case in context detection, surface the values so users
can immediately see when something is wrong.
2026-03-26 19:27:58 -07:00
def _format_session_info ( self ) - > str :
""" Resolve current model config and return a formatted info block.
Surfaces model , provider , context length , and endpoint so gateway
users can immediately see if context detection went wrong ( e . g .
local models falling to the 128 K default ) .
"""
from agent . model_metadata import get_model_context_length , DEFAULT_FALLBACK_CONTEXT
model = _resolve_gateway_model ( )
config_context_length = None
provider = None
base_url = None
api_key = None
try :
cfg_path = _hermes_home / " config.yaml "
if cfg_path . exists ( ) :
import yaml as _info_yaml
with open ( cfg_path , encoding = " utf-8 " ) as f :
data = _info_yaml . safe_load ( f ) or { }
model_cfg = data . get ( " model " , { } )
if isinstance ( model_cfg , dict ) :
raw_ctx = model_cfg . get ( " context_length " )
if raw_ctx is not None :
try :
config_context_length = int ( raw_ctx )
except ( TypeError , ValueError ) :
pass
provider = model_cfg . get ( " provider " ) or None
base_url = model_cfg . get ( " base_url " ) or None
except Exception :
pass
# Resolve runtime credentials for probing
try :
runtime = _resolve_runtime_agent_kwargs ( )
provider = provider or runtime . get ( " provider " )
base_url = base_url or runtime . get ( " base_url " )
api_key = runtime . get ( " api_key " )
except Exception :
pass
context_length = get_model_context_length (
model ,
base_url = base_url or " " ,
api_key = api_key or " " ,
config_context_length = config_context_length ,
provider = provider or " " ,
)
# Format context source hint
if config_context_length is not None :
ctx_source = " config "
elif context_length == DEFAULT_FALLBACK_CONTEXT :
ctx_source = " default — set model.context_length in config to override "
else :
ctx_source = " detected "
# Format context length for display
if context_length > = 1_000_000 :
ctx_display = f " { context_length / 1_000_000 : .1f } M "
elif context_length > = 1_000 :
ctx_display = f " { context_length / / 1_000 } K "
else :
ctx_display = str ( context_length )
lines = [
f " ◆ Model: ` { model } ` " ,
f " ◆ Provider: { provider or ' openrouter ' } " ,
f " ◆ Context: { ctx_display } tokens ( { ctx_source } ) " ,
]
# Show endpoint for local/custom setups
if base_url and ( " localhost " in base_url or " 127.0.0.1 " in base_url or " 0.0.0.0 " in base_url ) :
lines . append ( f " ◆ Endpoint: { base_url } " )
return " \n " . join ( lines )
2026-02-02 19:01:51 -08:00
async def _handle_reset_command ( self , event : MessageEvent ) - > str :
""" Handle /new or /reset command. """
source = event . source
# Get existing session key
2026-03-16 00:22:23 -07:00
session_key = self . _session_key_for_source ( source )
2026-02-02 19:01:51 -08:00
2026-03-07 11:27:50 -08:00
# Flush memories in the background (fire-and-forget) so the user
# gets the "Session reset!" response immediately.
2026-02-22 13:28:13 -08:00
try :
2026-03-02 00:14:49 -08:00
old_entry = self . session_store . _entries . get ( session_key )
2026-02-22 13:28:13 -08:00
if old_entry :
2026-03-26 14:33:48 -07:00
_flush_task = asyncio . create_task (
2026-03-16 00:23:47 -07:00
self . _async_flush_memories ( old_entry . session_id , session_key )
)
2026-03-26 14:33:48 -07:00
self . _background_tasks . add ( _flush_task )
_flush_task . add_done_callback ( self . _background_tasks . discard )
2026-02-22 13:28:13 -08:00
except Exception as e :
logger . debug ( " Gateway memory flush on reset failed: %s " , e )
2026-03-10 02:06:17 -07:00
self . _shutdown_gateway_honcho ( session_key )
feat(gateway): cache AIAgent per session for prompt caching
The gateway created a fresh AIAgent per message, rebuilding the system
prompt (including memory, skills, context files) every turn. This broke
prompt prefix caching — providers like Anthropic charge ~10x more for
uncached prefixes.
Now caches AIAgent instances per session_key with a config signature.
The cached agent is reused across messages in the same session,
preserving the frozen system prompt and tool schemas. Cache is
invalidated when:
- Config changes (model, provider, toolsets, reasoning, ephemeral
prompt) — detected via signature mismatch
- /new, /reset, /clear — explicit session reset
- /model — global model change clears all cached agents
- /reasoning — global reasoning change clears all cached agents
Per-message state (callbacks, stream consumers, progress queues) is
set on the agent instance before each run_conversation() call.
This matches CLI behavior where a single AIAgent lives across all turns
in a session, with _cached_system_prompt built once and reused.
2026-03-21 13:07:08 -07:00
self . _evict_cached_agent ( session_key )
2026-02-22 13:28:13 -08:00
2026-02-02 19:01:51 -08:00
# Reset the session
new_entry = self . session_store . reset_session ( session_key )
2026-03-17 04:17:44 -07:00
# Emit session:end hook (session is ending)
await self . hooks . emit ( " session:end " , {
" platform " : source . platform . value if source . platform else " " ,
" user_id " : source . user_id ,
" session_key " : session_key ,
} )
Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks
Major feature additions inspired by OpenClaw/ClawdBot integration analysis:
Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)
Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description
Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling
Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads
DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending
Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)
Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications
Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings
Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style
Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.
2026-02-15 21:38:59 -08:00
# Emit session:reset hook
await self . hooks . emit ( " session:reset " , {
" platform " : source . platform . value if source . platform else " " ,
" user_id " : source . user_id ,
" session_key " : session_key ,
} )
feat(gateway): surface session config on /new, /reset, and auto-reset (#3321)
When a new session starts in the gateway (via /new, /reset, or
auto-reset), send the user a summary of the detected configuration:
✨ Session reset! Starting fresh.
◆ Model: qwen3.5:27b-q4_K_M
◆ Provider: custom
◆ Context: 8K tokens (config)
◆ Endpoint: http://localhost:11434/v1
This makes misconfigured context length immediately visible — a user
running a local 8K model that falls to the 128K default will see:
◆ Context: 128K tokens (default — set model.context_length in config to override)
Instead of silently getting no compression and degrading responses.
- _format_session_info() resolves model, provider, context length,
and endpoint from config + runtime, matching the hygiene code's
resolution chain
- Local/custom endpoints shown; cloud endpoints hidden (not useful)
- Context source annotated: config, detected, or default with hint
- Appended to /new and /reset responses, and auto-reset notifications
- 9 tests covering all formatting paths and failure resilience
Addresses the user-facing side of #2708 — instead of trying to fix
every edge case in context detection, surface the values so users
can immediately see when something is wrong.
2026-03-26 19:27:58 -07:00
# Resolve session config info to surface to the user
try :
session_info = self . _format_session_info ( )
except Exception :
session_info = " "
2026-02-02 19:01:51 -08:00
if new_entry :
feat(gateway): surface session config on /new, /reset, and auto-reset (#3321)
When a new session starts in the gateway (via /new, /reset, or
auto-reset), send the user a summary of the detected configuration:
✨ Session reset! Starting fresh.
◆ Model: qwen3.5:27b-q4_K_M
◆ Provider: custom
◆ Context: 8K tokens (config)
◆ Endpoint: http://localhost:11434/v1
This makes misconfigured context length immediately visible — a user
running a local 8K model that falls to the 128K default will see:
◆ Context: 128K tokens (default — set model.context_length in config to override)
Instead of silently getting no compression and degrading responses.
- _format_session_info() resolves model, provider, context length,
and endpoint from config + runtime, matching the hygiene code's
resolution chain
- Local/custom endpoints shown; cloud endpoints hidden (not useful)
- Context source annotated: config, detected, or default with hint
- Appended to /new and /reset responses, and auto-reset notifications
- 9 tests covering all formatting paths and failure resilience
Addresses the user-facing side of #2708 — instead of trying to fix
every edge case in context detection, surface the values so users
can immediately see when something is wrong.
2026-03-26 19:27:58 -07:00
header = " ✨ Session reset! Starting fresh. "
2026-02-02 19:01:51 -08:00
else :
# No existing session, just create one
self . session_store . get_or_create_session ( source , force_new = True )
feat(gateway): surface session config on /new, /reset, and auto-reset (#3321)
When a new session starts in the gateway (via /new, /reset, or
auto-reset), send the user a summary of the detected configuration:
✨ Session reset! Starting fresh.
◆ Model: qwen3.5:27b-q4_K_M
◆ Provider: custom
◆ Context: 8K tokens (config)
◆ Endpoint: http://localhost:11434/v1
This makes misconfigured context length immediately visible — a user
running a local 8K model that falls to the 128K default will see:
◆ Context: 128K tokens (default — set model.context_length in config to override)
Instead of silently getting no compression and degrading responses.
- _format_session_info() resolves model, provider, context length,
and endpoint from config + runtime, matching the hygiene code's
resolution chain
- Local/custom endpoints shown; cloud endpoints hidden (not useful)
- Context source annotated: config, detected, or default with hint
- Appended to /new and /reset responses, and auto-reset notifications
- 9 tests covering all formatting paths and failure resilience
Addresses the user-facing side of #2708 — instead of trying to fix
every edge case in context detection, surface the values so users
can immediately see when something is wrong.
2026-03-26 19:27:58 -07:00
header = " ✨ New session started! "
if session_info :
return f " { header } \n \n { session_info } "
return header
2026-02-02 19:01:51 -08:00
async def _handle_status_command ( self , event : MessageEvent ) - > str :
""" Handle /status command. """
source = event . source
session_entry = self . session_store . get_or_create_session ( source )
connected_platforms = [ p . value for p in self . adapters . keys ( ) ]
2026-02-03 16:15:49 -08:00
# Check if there's an active agent
session_key = session_entry . session_key
is_running = session_key in self . _running_agents
2026-02-02 19:01:51 -08:00
lines = [
" 📊 **Hermes Gateway Status** " ,
" " ,
f " **Session ID:** ` { session_entry . session_id [ : 12 ] } ...` " ,
f " **Created:** { session_entry . created_at . strftime ( ' % Y- % m- %d % H: % M ' ) } " ,
f " **Last Activity:** { session_entry . updated_at . strftime ( ' % Y- % m- %d % H: % M ' ) } " ,
f " **Tokens:** { session_entry . total_tokens : , } " ,
2026-02-03 16:15:49 -08:00
f " **Agent Running:** { ' Yes ⚡ ' if is_running else ' No ' } " ,
2026-02-02 19:01:51 -08:00
" " ,
f " **Connected Platforms:** { ' , ' . join ( connected_platforms ) } " ,
]
return " \n " . join ( lines )
2026-02-03 16:15:49 -08:00
async def _handle_stop_command ( self , event : MessageEvent ) - > str :
2026-03-25 18:46:50 -07:00
""" Handle /stop command - interrupt a running agent.
When an agent is truly hung ( blocked thread that never checks
_interrupt_requested ) , the early intercept in _handle_message ( )
handles / stop before this method is reached . This handler fires
only through normal command dispatch ( no running agent ) or as a
fallback . Force - clean the session lock in all cases for safety .
"""
2026-02-03 16:15:49 -08:00
source = event . source
session_entry = self . session_store . get_or_create_session ( source )
session_key = session_entry . session_key
2026-03-19 18:26:09 -07:00
agent = self . _running_agents . get ( session_key )
if agent is _AGENT_PENDING_SENTINEL :
2026-03-25 18:46:50 -07:00
# Force-clean the sentinel so the session is unlocked.
if session_key in self . _running_agents :
del self . _running_agents [ session_key ]
logger . info ( " HARD STOP (pending) for session %s — sentinel cleared " , session_key [ : 20 ] )
return " ⚡ Force-stopped. The agent was still starting — session unlocked. "
2026-03-19 18:26:09 -07:00
if agent :
2026-03-25 18:46:50 -07:00
agent . interrupt ( " Stop requested " )
# Force-clean the session lock so a truly hung agent doesn't
# keep it locked forever.
if session_key in self . _running_agents :
del self . _running_agents [ session_key ]
return " ⚡ Force-stopped. The session is unlocked — you can send a new message. "
2026-02-03 16:15:49 -08:00
else :
return " No active task to stop. "
2026-02-19 14:31:53 -08:00
async def _handle_help_command ( self , event : MessageEvent ) - > str :
""" Handle /help command - list available commands. """
2026-03-16 23:21:03 -07:00
from hermes_cli . commands import gateway_help_lines
2026-02-28 11:18:50 -08:00
lines = [
" 📖 **Hermes Commands** \n " ,
2026-03-16 23:21:03 -07:00
* gateway_help_lines ( ) ,
2026-02-28 11:18:50 -08:00
]
try :
from agent . skill_commands import get_skill_commands
skill_cmds = get_skill_commands ( )
if skill_cmds :
lines . append ( f " \n ⚡ **Skill Commands** ( { len ( skill_cmds ) } installed): " )
for cmd in sorted ( skill_cmds ) :
lines . append ( f " ` { cmd } ` — { skill_cmds [ cmd ] [ ' description ' ] } " )
except Exception :
pass
return " \n " . join ( lines )
2026-02-19 14:31:53 -08:00
2026-03-08 06:09:36 -07:00
async def _handle_provider_command ( self , event : MessageEvent ) - > str :
""" Handle /provider command - show available providers. """
import yaml
from hermes_cli . models import (
list_available_providers ,
normalize_provider ,
_PROVIDER_LABELS ,
)
# Resolve current provider from config
current_provider = " openrouter "
config_path = _hermes_home / ' config.yaml '
try :
if config_path . exists ( ) :
2026-03-05 17:04:33 -05:00
with open ( config_path , encoding = " utf-8 " ) as f :
2026-03-08 06:09:36 -07:00
cfg = yaml . safe_load ( f ) or { }
model_cfg = cfg . get ( " model " , { } )
if isinstance ( model_cfg , dict ) :
current_provider = model_cfg . get ( " provider " , current_provider )
except Exception :
pass
current_provider = normalize_provider ( current_provider )
if current_provider == " auto " :
try :
from hermes_cli . auth import resolve_provider as _resolve_provider
current_provider = _resolve_provider ( current_provider )
except Exception :
current_provider = " openrouter "
2026-03-09 02:38:34 -07:00
# Detect custom endpoint
if current_provider == " openrouter " and os . getenv ( " OPENAI_BASE_URL " , " " ) . strip ( ) :
current_provider = " custom "
2026-03-08 06:09:36 -07:00
current_label = _PROVIDER_LABELS . get ( current_provider , current_provider )
lines = [
f " 🔌 **Current provider:** { current_label } (` { current_provider } `) " ,
" " ,
" **Available providers:** " ,
]
providers = list_available_providers ( )
for p in providers :
marker = " ← active " if p [ " id " ] == current_provider else " "
auth = " ✅ " if p [ " authenticated " ] else " ❌ "
aliases = f " _(also: { ' , ' . join ( p [ ' aliases ' ] ) } )_ " if p [ " aliases " ] else " "
lines . append ( f " { auth } ` { p [ ' id ' ] } ` — { p [ ' label ' ] } { aliases } { marker } " )
lines . append ( " " )
lines . append ( " Switch: `/model provider:model-name` " )
lines . append ( " Setup: `hermes setup` " )
return " \n " . join ( lines )
2026-02-19 14:31:53 -08:00
async def _handle_personality_command ( self , event : MessageEvent ) - > str :
""" Handle /personality command - list or set a personality. """
2026-02-27 11:14:14 -05:00
import yaml
2026-02-19 14:31:53 -08:00
args = event . get_command_args ( ) . strip ( ) . lower ( )
2026-02-27 11:14:14 -05:00
config_path = _hermes_home / ' config.yaml '
2026-02-19 14:31:53 -08:00
try :
if config_path . exists ( ) :
2026-03-05 17:04:33 -05:00
with open ( config_path , ' r ' , encoding = " utf-8 " ) as f :
2026-02-19 14:31:53 -08:00
config = yaml . safe_load ( f ) or { }
personalities = config . get ( " agent " , { } ) . get ( " personalities " , { } )
else :
2026-02-27 11:14:14 -05:00
config = { }
2026-02-19 14:31:53 -08:00
personalities = { }
except Exception :
2026-02-27 11:14:14 -05:00
config = { }
2026-02-19 14:31:53 -08:00
personalities = { }
2026-02-27 11:14:14 -05:00
2026-02-19 14:31:53 -08:00
if not personalities :
return " No personalities configured in `~/.hermes/config.yaml` "
2026-02-27 11:14:14 -05:00
2026-02-19 14:31:53 -08:00
if not args :
lines = [ " 🎭 **Available Personalities** \n " ]
2026-03-09 17:18:09 +03:00
lines . append ( " • `none` — (no personality overlay) " )
2026-02-19 14:31:53 -08:00
for name , prompt in personalities . items ( ) :
2026-03-09 17:18:09 +03:00
if isinstance ( prompt , dict ) :
preview = prompt . get ( " description " ) or prompt . get ( " system_prompt " , " " ) [ : 50 ]
else :
preview = prompt [ : 50 ] + " ... " if len ( prompt ) > 50 else prompt
2026-02-19 14:31:53 -08:00
lines . append ( f " • ` { name } ` — { preview } " )
chore: fix 154 f-strings, simplify getattr/URL patterns, remove dead code (#3119)
Three categories of cleanup, all zero-behavioral-change:
1. F-strings without placeholders (154 fixes across 29 files)
- Converted f'...' to '...' where no {expression} was present
- Heaviest files: run_agent.py (24), cli.py (20), honcho_integration/cli.py (34)
2. Simplify defensive patterns in run_agent.py
- Added explicit self._is_anthropic_oauth = False in __init__ (before
the api_mode branch that conditionally sets it)
- Replaced 7x getattr(self, '_is_anthropic_oauth', False) with direct
self._is_anthropic_oauth (attribute always initialized now)
- Added _is_openrouter_url() and _is_anthropic_url() helper methods
- Replaced 3 inline 'openrouter' in self._base_url_lower checks
3. Remove dead code in small files
- hermes_cli/claw.py: removed unused 'total' computation
- tools/fuzzy_match.py: removed unused strip_indent() function and
pattern_stripped variable
Full test suite: 6184 passed, 0 failures
E2E PTY: banner clean, tool calls work, zero garbled ANSI
2026-03-25 19:47:58 -07:00
lines . append ( " \n Usage: `/personality <name>` " )
2026-02-19 14:31:53 -08:00
return " \n " . join ( lines )
2026-02-27 11:14:14 -05:00
2026-03-09 17:18:09 +03:00
def _resolve_prompt ( value ) :
if isinstance ( value , dict ) :
parts = [ value . get ( " system_prompt " , " " ) ]
if value . get ( " tone " ) :
parts . append ( f ' Tone: { value [ " tone " ] } ' )
if value . get ( " style " ) :
parts . append ( f ' Style: { value [ " style " ] } ' )
return " \n " . join ( p for p in parts if p )
return str ( value )
if args in ( " none " , " default " , " neutral " ) :
try :
if " agent " not in config or not isinstance ( config . get ( " agent " ) , dict ) :
config [ " agent " ] = { }
config [ " agent " ] [ " system_prompt " ] = " "
with open ( config_path , " w " ) as f :
yaml . dump ( config , f , default_flow_style = False , sort_keys = False )
except Exception as e :
return f " ⚠️ Failed to save personality change: { e } "
self . _ephemeral_system_prompt = " "
return " 🎭 Personality cleared — using base agent behavior. \n _(takes effect on next message)_ "
elif args in personalities :
new_prompt = _resolve_prompt ( personalities [ args ] )
2026-02-27 11:14:14 -05:00
# Write to config.yaml, same pattern as CLI save_config_value.
try :
if " agent " not in config or not isinstance ( config . get ( " agent " ) , dict ) :
config [ " agent " ] = { }
config [ " agent " ] [ " system_prompt " ] = new_prompt
2026-03-05 17:04:33 -05:00
with open ( config_path , ' w ' , encoding = " utf-8 " ) as f :
2026-02-27 11:14:14 -05:00
yaml . dump ( config , f , default_flow_style = False , sort_keys = False )
except Exception as e :
return f " ⚠️ Failed to save personality change: { e } "
# Update in-memory so it takes effect on the very next message.
self . _ephemeral_system_prompt = new_prompt
2026-02-19 14:31:53 -08:00
return f " 🎭 Personality set to ** { args } ** \n _(takes effect on next message)_ "
2026-02-27 11:14:14 -05:00
2026-03-09 17:18:09 +03:00
available = " `none`, " + " , " . join ( f " ` { n } ` " for n in personalities . keys ( ) )
2026-02-19 14:31:53 -08:00
return f " Unknown personality: ` { args } ` \n \n Available: { available } "
async def _handle_retry_command ( self , event : MessageEvent ) - > str :
""" Handle /retry command - re-send the last user message. """
source = event . source
session_entry = self . session_store . get_or_create_session ( source )
history = self . session_store . load_transcript ( session_entry . session_id )
# Find the last user message
last_user_msg = None
last_user_idx = None
for i in range ( len ( history ) - 1 , - 1 , - 1 ) :
if history [ i ] . get ( " role " ) == " user " :
last_user_msg = history [ i ] . get ( " content " , " " )
last_user_idx = i
break
if not last_user_msg :
return " No previous message to retry. "
2026-03-02 00:14:49 -08:00
# Truncate history to before the last user message and persist
2026-02-19 14:31:53 -08:00
truncated = history [ : last_user_idx ]
2026-03-02 00:14:49 -08:00
self . session_store . rewrite_transcript ( session_entry . session_id , truncated )
2026-03-10 23:40:24 -07:00
# Reset stored token count — transcript was truncated
session_entry . last_prompt_tokens = 0
2026-02-19 14:31:53 -08:00
# Re-send by creating a fake text event with the old message
retry_event = MessageEvent (
text = last_user_msg ,
message_type = MessageType . TEXT ,
source = source ,
raw_message = event . raw_message ,
)
# Let the normal message handler process it
2026-03-05 19:59:54 +03:00
return await self . _handle_message ( retry_event )
2026-02-19 14:31:53 -08:00
async def _handle_undo_command ( self , event : MessageEvent ) - > str :
""" Handle /undo command - remove the last user/assistant exchange. """
source = event . source
session_entry = self . session_store . get_or_create_session ( source )
2026-03-14 00:17:04 -07:00
history = self . session_store . load_transcript ( session_entry . session_id )
# Find the last user message and remove everything from it onward
last_user_idx = None
for i in range ( len ( history ) - 1 , - 1 , - 1 ) :
if history [ i ] . get ( " role " ) == " user " :
last_user_idx = i
break
if last_user_idx is None :
return " Nothing to undo. "
removed_msg = history [ last_user_idx ] . get ( " content " , " " )
removed_count = len ( history ) - last_user_idx
self . session_store . rewrite_transcript ( session_entry . session_id , history [ : last_user_idx ] )
# Reset stored token count — transcript was truncated
session_entry . last_prompt_tokens = 0
preview = removed_msg [ : 40 ] + " ... " if len ( removed_msg ) > 40 else removed_msg
return f " ↩️ Undid { removed_count } message(s). \n Removed: \" { preview } \" "
async def _handle_set_home_command ( self , event : MessageEvent ) - > str :
""" Handle /sethome command -- set the current chat as the platform ' s home channel. """
source = event . source
platform_name = source . platform . value if source . platform else " unknown "
chat_id = source . chat_id
chat_name = source . chat_name or chat_id
env_key = f " { platform_name . upper ( ) } _HOME_CHANNEL "
# Save to config.yaml
try :
import yaml
config_path = _hermes_home / ' config.yaml '
user_config = { }
if config_path . exists ( ) :
with open ( config_path , encoding = " utf-8 " ) as f :
user_config = yaml . safe_load ( f ) or { }
user_config [ env_key ] = chat_id
with open ( config_path , ' w ' , encoding = " utf-8 " ) as f :
yaml . dump ( user_config , f , default_flow_style = False )
# Also set in the current environment so it takes effect immediately
os . environ [ env_key ] = str ( chat_id )
except Exception as e :
return f " Failed to save home channel: { e } "
return (
f " ✅ Home channel set to ** { chat_name } ** (ID: { chat_id } ). \n "
f " Cron jobs and cross-platform messages will be delivered here. "
)
2026-03-11 02:13:43 +03:00
@staticmethod
def _get_guild_id ( event : MessageEvent ) - > Optional [ int ] :
""" Extract Discord guild_id from the raw message object. """
raw = getattr ( event , " raw_message " , None )
if raw is None :
return None
# Slash command interaction
if hasattr ( raw , " guild_id " ) and raw . guild_id :
return int ( raw . guild_id )
# Regular message
if hasattr ( raw , " guild " ) and raw . guild :
return raw . guild . id
return None
2026-03-10 22:55:36 +03:00
async def _handle_voice_command ( self , event : MessageEvent ) - > str :
2026-03-11 02:13:43 +03:00
""" Handle /voice [on|off|tts|channel|leave|status] command. """
2026-03-10 22:55:36 +03:00
args = event . get_command_args ( ) . strip ( ) . lower ( )
chat_id = event . source . chat_id
2026-03-14 10:31:49 +03:00
adapter = self . adapters . get ( event . source . platform )
2026-03-10 22:55:36 +03:00
if args in ( " on " , " enable " ) :
self . _voice_mode [ chat_id ] = " voice_only "
self . _save_voice_modes ( )
2026-03-14 10:31:49 +03:00
if adapter :
2026-03-14 06:14:22 -07:00
self . _set_adapter_auto_tts_disabled ( adapter , chat_id , disabled = False )
2026-03-10 22:55:36 +03:00
return (
" Voice mode enabled. \n "
" I ' ll reply with voice when you send voice messages. \n "
" Use /voice tts to get voice replies for all messages. "
)
elif args in ( " off " , " disable " ) :
2026-03-14 06:14:22 -07:00
self . _voice_mode [ chat_id ] = " off "
2026-03-10 22:55:36 +03:00
self . _save_voice_modes ( )
2026-03-14 10:31:49 +03:00
if adapter :
2026-03-14 06:14:22 -07:00
self . _set_adapter_auto_tts_disabled ( adapter , chat_id , disabled = True )
2026-03-10 22:55:36 +03:00
return " Voice mode disabled. Text-only replies. "
elif args == " tts " :
self . _voice_mode [ chat_id ] = " all "
self . _save_voice_modes ( )
2026-03-14 10:31:49 +03:00
if adapter :
2026-03-14 06:14:22 -07:00
self . _set_adapter_auto_tts_disabled ( adapter , chat_id , disabled = False )
2026-03-10 22:55:36 +03:00
return (
" Auto-TTS enabled. \n "
" All replies will include a voice message. "
)
2026-03-11 02:13:43 +03:00
elif args in ( " channel " , " join " ) :
return await self . _handle_voice_channel_join ( event )
elif args == " leave " :
return await self . _handle_voice_channel_leave ( event )
2026-03-10 22:55:36 +03:00
elif args == " status " :
mode = self . _voice_mode . get ( chat_id , " off " )
labels = {
" off " : " Off (text only) " ,
" voice_only " : " On (voice reply to voice messages) " ,
" all " : " TTS (voice reply to all messages) " ,
}
2026-03-11 02:13:43 +03:00
# Append voice channel info if connected
adapter = self . adapters . get ( event . source . platform )
guild_id = self . _get_guild_id ( event )
2026-03-14 02:14:34 +03:00
if guild_id and hasattr ( adapter , " get_voice_channel_info " ) :
info = adapter . get_voice_channel_info ( guild_id )
if info :
lines = [
f " Voice mode: { labels . get ( mode , mode ) } " ,
f " Voice channel: # { info [ ' channel_name ' ] } " ,
f " Participants: { info [ ' member_count ' ] } " ,
]
for m in info [ " members " ] :
status = " (speaking) " if m . get ( " is_speaking " ) else " "
lines . append ( f " - { m [ ' display_name ' ] } { status } " )
return " \n " . join ( lines )
2026-03-10 22:55:36 +03:00
return f " Voice mode: { labels . get ( mode , mode ) } "
else :
# Toggle: off → on, on/all → off
current = self . _voice_mode . get ( chat_id , " off " )
if current == " off " :
self . _voice_mode [ chat_id ] = " voice_only "
self . _save_voice_modes ( )
2026-03-14 10:31:49 +03:00
if adapter :
2026-03-14 06:14:22 -07:00
self . _set_adapter_auto_tts_disabled ( adapter , chat_id , disabled = False )
2026-03-10 22:55:36 +03:00
return " Voice mode enabled. "
else :
2026-03-14 06:14:22 -07:00
self . _voice_mode [ chat_id ] = " off "
2026-03-10 22:55:36 +03:00
self . _save_voice_modes ( )
2026-03-14 10:31:49 +03:00
if adapter :
2026-03-14 06:14:22 -07:00
self . _set_adapter_auto_tts_disabled ( adapter , chat_id , disabled = True )
2026-03-10 22:55:36 +03:00
return " Voice mode disabled. "
2026-03-11 02:13:43 +03:00
async def _handle_voice_channel_join ( self , event : MessageEvent ) - > str :
""" Join the user ' s current Discord voice channel. """
adapter = self . adapters . get ( event . source . platform )
if not hasattr ( adapter , " join_voice_channel " ) :
return " Voice channels are not supported on this platform. "
guild_id = self . _get_guild_id ( event )
if not guild_id :
return " This command only works in a Discord server. "
voice_channel = await adapter . get_user_voice_channel (
guild_id , event . source . user_id
)
if not voice_channel :
return " You need to be in a voice channel first. "
2026-03-11 23:57:42 +03:00
# Wire callbacks BEFORE join so voice input arriving immediately
2026-03-11 23:36:47 +03:00
# after connection is not lost.
if hasattr ( adapter , " _voice_input_callback " ) :
adapter . _voice_input_callback = self . _handle_voice_channel_input
2026-03-11 23:57:42 +03:00
if hasattr ( adapter , " _on_voice_disconnect " ) :
adapter . _on_voice_disconnect = self . _handle_voice_timeout_cleanup
2026-03-11 23:36:47 +03:00
2026-03-11 02:13:43 +03:00
try :
success = await adapter . join_voice_channel ( voice_channel )
except Exception as e :
logger . warning ( " Failed to join voice channel: %s " , e )
2026-03-11 23:36:47 +03:00
adapter . _voice_input_callback = None
2026-03-15 11:58:48 +03:00
err_lower = str ( e ) . lower ( )
if " pynacl " in err_lower or " nacl " in err_lower or " davey " in err_lower :
2026-03-15 05:24:34 -07:00
return (
" Voice dependencies are missing (PyNaCl / davey). "
" Install or reinstall Hermes with the messaging extra, e.g. "
" `pip install hermes-agent[messaging]`. "
)
2026-03-11 02:13:43 +03:00
return f " Failed to join voice channel: { e } "
if success :
adapter . _voice_text_channels [ guild_id ] = int ( event . source . chat_id )
self . _voice_mode [ event . source . chat_id ] = " all "
self . _save_voice_modes ( )
2026-03-14 06:14:22 -07:00
self . _set_adapter_auto_tts_disabled ( adapter , event . source . chat_id , disabled = False )
2026-03-11 02:13:43 +03:00
return (
f " Joined voice channel ** { voice_channel . name } **. \n "
2026-03-11 04:34:58 +03:00
f " I ' ll speak my replies and listen to you. Use /voice leave to disconnect. "
2026-03-11 02:13:43 +03:00
)
2026-03-11 23:36:47 +03:00
# Join failed — clear callback
adapter . _voice_input_callback = None
2026-03-11 02:13:43 +03:00
return " Failed to join voice channel. Check bot permissions (Connect + Speak). "
async def _handle_voice_channel_leave ( self , event : MessageEvent ) - > str :
""" Leave the Discord voice channel. """
adapter = self . adapters . get ( event . source . platform )
guild_id = self . _get_guild_id ( event )
if not guild_id or not hasattr ( adapter , " leave_voice_channel " ) :
return " Not in a voice channel. "
if not hasattr ( adapter , " is_in_voice_channel " ) or not adapter . is_in_voice_channel ( guild_id ) :
return " Not in a voice channel. "
2026-03-11 23:36:47 +03:00
try :
await adapter . leave_voice_channel ( guild_id )
except Exception as e :
logger . warning ( " Error leaving voice channel: %s " , e )
# Always clean up state even if leave raised an exception
2026-03-14 06:14:22 -07:00
self . _voice_mode [ event . source . chat_id ] = " off "
2026-03-11 02:13:43 +03:00
self . _save_voice_modes ( )
2026-03-14 06:14:22 -07:00
self . _set_adapter_auto_tts_disabled ( adapter , event . source . chat_id , disabled = True )
2026-03-11 23:36:47 +03:00
if hasattr ( adapter , " _voice_input_callback " ) :
adapter . _voice_input_callback = None
2026-03-11 02:13:43 +03:00
return " Left voice channel. "
2026-03-11 23:57:42 +03:00
def _handle_voice_timeout_cleanup ( self , chat_id : str ) - > None :
""" Called by the adapter when a voice channel times out.
Cleans up runner - side voice_mode state that the adapter cannot reach .
"""
2026-03-14 06:14:22 -07:00
self . _voice_mode [ chat_id ] = " off "
2026-03-11 23:57:42 +03:00
self . _save_voice_modes ( )
2026-03-14 06:14:22 -07:00
adapter = self . adapters . get ( Platform . DISCORD )
self . _set_adapter_auto_tts_disabled ( adapter , chat_id , disabled = True )
2026-03-11 23:57:42 +03:00
2026-03-11 04:34:58 +03:00
async def _handle_voice_channel_input (
self , guild_id : int , user_id : int , transcript : str
) :
""" Handle transcribed voice from a user in a voice channel.
Creates a synthetic MessageEvent and processes it through the
adapter ' s full message pipeline (session, typing, agent, TTS reply).
"""
adapter = self . adapters . get ( Platform . DISCORD )
if not adapter :
return
text_ch_id = adapter . _voice_text_channels . get ( guild_id )
if not text_ch_id :
return
2026-03-14 10:31:49 +03:00
# Check authorization before processing voice input
source = SessionSource (
platform = Platform . DISCORD ,
chat_id = str ( text_ch_id ) ,
user_id = str ( user_id ) ,
user_name = str ( user_id ) ,
chat_type = " channel " ,
)
if not self . _is_user_authorized ( source ) :
logger . debug ( " Unauthorized voice input from user %d , ignoring " , user_id )
return
# Show transcript in text channel (after auth, with mention sanitization)
2026-03-11 04:34:58 +03:00
try :
channel = adapter . _client . get_channel ( text_ch_id )
if channel :
2026-03-14 10:31:49 +03:00
safe_text = transcript [ : 2000 ] . replace ( " @everyone " , " @ \u200b everyone " ) . replace ( " @here " , " @ \u200b here " )
await channel . send ( f " **[Voice]** <@ { user_id } >: { safe_text } " )
2026-03-11 04:34:58 +03:00
except Exception :
pass
# Build a synthetic MessageEvent and feed through the normal pipeline
# Use SimpleNamespace as raw_message so _get_guild_id() can extract
# guild_id and _send_voice_reply() plays audio in the voice channel.
from types import SimpleNamespace
event = MessageEvent (
source = source ,
text = transcript ,
message_type = MessageType . VOICE ,
raw_message = SimpleNamespace ( guild_id = guild_id , guild = None ) ,
)
await adapter . handle_message ( event )
2026-03-11 23:18:49 +03:00
def _should_send_voice_reply (
self ,
event : MessageEvent ,
response : str ,
agent_messages : list ,
2026-03-21 08:08:37 -07:00
already_sent : bool = False ,
2026-03-11 23:18:49 +03:00
) - > bool :
""" Decide whether the runner should send a TTS voice reply.
Returns False when :
- voice_mode is off for this chat
- response is empty or an error
- agent already called text_to_speech tool ( dedup )
- voice input and base adapter auto - TTS already handled it ( skip_double )
2026-03-21 08:08:37 -07:00
UNLESS streaming already consumed the response ( already_sent = True ) ,
in which case the base adapter won ' t have text for auto-TTS so the
runner must handle it .
2026-03-11 23:18:49 +03:00
"""
if not response or response . startswith ( " Error: " ) :
return False
chat_id = event . source . chat_id
voice_mode = self . _voice_mode . get ( chat_id , " off " )
is_voice_input = ( event . message_type == MessageType . VOICE )
should = (
( voice_mode == " all " )
or ( voice_mode == " voice_only " and is_voice_input )
)
if not should :
return False
# Dedup: agent already called TTS tool
has_agent_tts = any (
msg . get ( " role " ) == " assistant "
and any (
tc . get ( " function " , { } ) . get ( " name " ) == " text_to_speech "
for tc in ( msg . get ( " tool_calls " ) or [ ] )
)
for msg in agent_messages
)
if has_agent_tts :
return False
2026-03-14 23:57:40 +03:00
# Dedup: base adapter auto-TTS already handles voice input
# (play_tts plays in VC when connected, so runner can skip).
2026-03-21 08:08:37 -07:00
# When streaming already delivered the text (already_sent=True),
# the base adapter will receive None and can't run auto-TTS,
# so the runner must take over.
if is_voice_input and not already_sent :
2026-03-11 23:18:49 +03:00
return False
return True
2026-03-10 22:55:36 +03:00
async def _send_voice_reply ( self , event : MessageEvent , text : str ) - > None :
""" Generate TTS audio and send as a voice message before the text reply. """
2026-03-11 23:57:42 +03:00
import uuid as _uuid
audio_path = None
actual_path = None
2026-03-10 22:55:36 +03:00
try :
from tools . tts_tool import text_to_speech_tool , _strip_markdown_for_tts
tts_text = _strip_markdown_for_tts ( text [ : 4000 ] )
if not tts_text :
return
2026-03-11 00:24:29 +03:00
# Use .mp3 extension so edge-tts conversion to opus works correctly.
# The TTS tool may convert to .ogg — use file_path from result.
audio_path = os . path . join (
2026-03-10 22:55:36 +03:00
tempfile . gettempdir ( ) , " hermes_voice " ,
2026-03-11 23:57:42 +03:00
f " tts_reply_ { _uuid . uuid4 ( ) . hex [ : 12 ] } .mp3 " ,
2026-03-10 22:55:36 +03:00
)
2026-03-11 00:24:29 +03:00
os . makedirs ( os . path . dirname ( audio_path ) , exist_ok = True )
2026-03-10 22:55:36 +03:00
result_json = await asyncio . to_thread (
2026-03-11 00:24:29 +03:00
text_to_speech_tool , text = tts_text , output_path = audio_path
2026-03-10 22:55:36 +03:00
)
result = json . loads ( result_json )
2026-03-11 00:24:29 +03:00
# Use the actual file path from result (may differ after opus conversion)
actual_path = result . get ( " file_path " , audio_path )
if not result . get ( " success " ) or not os . path . isfile ( actual_path ) :
2026-03-10 22:55:36 +03:00
logger . warning ( " Auto voice reply TTS failed: %s " , result . get ( " error " ) )
return
adapter = self . adapters . get ( event . source . platform )
2026-03-11 02:13:43 +03:00
# If connected to a voice channel, play there instead of sending a file
guild_id = self . _get_guild_id ( event )
if ( guild_id
and hasattr ( adapter , " play_in_voice_channel " )
and hasattr ( adapter , " is_in_voice_channel " )
and adapter . is_in_voice_channel ( guild_id ) ) :
await adapter . play_in_voice_channel ( guild_id , actual_path )
elif adapter and hasattr ( adapter , " send_voice " ) :
2026-03-10 23:37:02 +03:00
send_kwargs : Dict [ str , Any ] = {
" chat_id " : event . source . chat_id ,
2026-03-11 00:24:29 +03:00
" audio_path " : actual_path ,
2026-03-10 23:37:02 +03:00
" reply_to " : event . message_id ,
}
if event . source . thread_id :
send_kwargs [ " metadata " ] = { " thread_id " : event . source . thread_id }
await adapter . send_voice ( * * send_kwargs )
2026-03-11 23:57:42 +03:00
except Exception as e :
logger . warning ( " Auto voice reply failed: %s " , e , exc_info = True )
finally :
for p in { audio_path , actual_path } - { None } :
2026-03-11 00:24:29 +03:00
try :
os . unlink ( p )
except OSError :
pass
2026-03-10 22:55:36 +03:00
2026-03-21 16:01:25 -07:00
async def _deliver_media_from_response (
self ,
response : str ,
event : MessageEvent ,
adapter ,
) - > None :
""" Extract MEDIA: tags and local file paths from a response and deliver them.
Called after streaming has already sent the text to the user , so the
text itself is already delivered — this only handles file attachments
that the normal _process_message_background path would have caught .
"""
from pathlib import Path
try :
media_files , _ = adapter . extract_media ( response )
_ , cleaned = adapter . extract_images ( response )
local_files , _ = adapter . extract_local_files ( cleaned )
_thread_meta = { " thread_id " : event . source . thread_id } if event . source . thread_id else None
_AUDIO_EXTS = { ' .ogg ' , ' .opus ' , ' .mp3 ' , ' .wav ' , ' .m4a ' }
_VIDEO_EXTS = { ' .mp4 ' , ' .mov ' , ' .avi ' , ' .mkv ' , ' .webm ' , ' .3gp ' }
_IMAGE_EXTS = { ' .jpg ' , ' .jpeg ' , ' .png ' , ' .webp ' , ' .gif ' }
for media_path , is_voice in media_files :
try :
ext = Path ( media_path ) . suffix . lower ( )
if ext in _AUDIO_EXTS :
await adapter . send_voice (
chat_id = event . source . chat_id ,
audio_path = media_path ,
metadata = _thread_meta ,
)
elif ext in _VIDEO_EXTS :
await adapter . send_video (
chat_id = event . source . chat_id ,
video_path = media_path ,
metadata = _thread_meta ,
)
elif ext in _IMAGE_EXTS :
await adapter . send_image_file (
chat_id = event . source . chat_id ,
image_path = media_path ,
metadata = _thread_meta ,
)
else :
await adapter . send_document (
chat_id = event . source . chat_id ,
file_path = media_path ,
metadata = _thread_meta ,
)
except Exception as e :
logger . warning ( " [ %s ] Post-stream media delivery failed: %s " , adapter . name , e )
for file_path in local_files :
try :
ext = Path ( file_path ) . suffix . lower ( )
if ext in _IMAGE_EXTS :
await adapter . send_image_file (
chat_id = event . source . chat_id ,
image_path = file_path ,
metadata = _thread_meta ,
)
else :
await adapter . send_document (
chat_id = event . source . chat_id ,
file_path = file_path ,
metadata = _thread_meta ,
)
except Exception as e :
logger . warning ( " [ %s ] Post-stream file delivery failed: %s " , adapter . name , e )
except Exception as e :
logger . warning ( " Post-stream media extraction failed: %s " , e )
2026-03-14 00:17:04 -07:00
async def _handle_rollback_command ( self , event : MessageEvent ) - > str :
""" Handle /rollback command — list or restore filesystem checkpoints. """
from tools . checkpoint_manager import CheckpointManager , format_checkpoint_list
# Read checkpoint config from config.yaml
cp_cfg = { }
try :
import yaml as _y
_cfg_path = _hermes_home / " config.yaml "
if _cfg_path . exists ( ) :
with open ( _cfg_path , encoding = " utf-8 " ) as _f :
_data = _y . safe_load ( _f ) or { }
cp_cfg = _data . get ( " checkpoints " , { } )
if isinstance ( cp_cfg , bool ) :
cp_cfg = { " enabled " : cp_cfg }
except Exception :
pass
if not cp_cfg . get ( " enabled " , False ) :
return (
" Checkpoints are not enabled. \n "
" Enable in config.yaml: \n ``` \n checkpoints: \n enabled: true \n ``` "
)
mgr = CheckpointManager (
enabled = True ,
max_snapshots = cp_cfg . get ( " max_snapshots " , 50 ) ,
)
cwd = os . getenv ( " MESSAGING_CWD " , str ( Path . home ( ) ) )
arg = event . get_command_args ( ) . strip ( )
if not arg :
checkpoints = mgr . list_checkpoints ( cwd )
return format_checkpoint_list ( checkpoints , cwd )
# Restore by number or hash
checkpoints = mgr . list_checkpoints ( cwd )
if not checkpoints :
return f " No checkpoints found for { cwd } "
target_hash = None
try :
idx = int ( arg ) - 1
if 0 < = idx < len ( checkpoints ) :
target_hash = checkpoints [ idx ] [ " hash " ]
else :
return f " Invalid checkpoint number. Use 1- { len ( checkpoints ) } . "
except ValueError :
target_hash = arg
result = mgr . restore ( cwd , target_hash )
if result [ " success " ] :
return (
f " ✅ Restored to checkpoint { result [ ' restored_to ' ] } : { result [ ' reason ' ] } \n "
f " A pre-rollback snapshot was saved automatically. "
)
return f " ❌ { result [ ' error ' ] } "
async def _handle_background_command ( self , event : MessageEvent ) - > str :
""" Handle /background <prompt> — run a prompt in a separate background session.
Spawns a new AIAgent in a background thread with its own session .
When it completes , sends the result back to the same chat without
modifying the active session ' s conversation history.
"""
prompt = event . get_command_args ( ) . strip ( )
if not prompt :
return (
" Usage: /background <prompt> \n "
" Example: /background Summarize the top HN stories today \n \n "
" Runs the prompt in a separate session. "
" You can keep chatting — the result will appear here when done. "
)
source = event . source
task_id = f " bg_ { datetime . now ( ) . strftime ( ' % H % M % S ' ) } _ { os . urandom ( 3 ) . hex ( ) } "
# Fire-and-forget the background task
2026-03-26 14:33:48 -07:00
_task = asyncio . create_task (
2026-03-14 00:17:04 -07:00
self . _run_background_task ( prompt , source , task_id )
)
2026-03-26 14:33:48 -07:00
self . _background_tasks . add ( _task )
_task . add_done_callback ( self . _background_tasks . discard )
2026-03-14 00:17:04 -07:00
preview = prompt [ : 60 ] + ( " ... " if len ( prompt ) > 60 else " " )
return f ' 🔄 Background task started: " { preview } " \n Task ID: { task_id } \n You can keep chatting — results will appear when done. '
async def _run_background_task (
self , prompt : str , source : " SessionSource " , task_id : str
) - > None :
""" Execute a background agent task and deliver the result to the chat. """
from run_agent import AIAgent
adapter = self . adapters . get ( source . platform )
if not adapter :
logger . warning ( " No adapter for platform %s in background task %s " , source . platform , task_id )
return
_thread_metadata = { " thread_id " : source . thread_id } if source . thread_id else None
try :
runtime_kwargs = _resolve_runtime_agent_kwargs ( )
if not runtime_kwargs . get ( " api_key " ) :
await adapter . send (
source . chat_id ,
f " ❌ Background task { task_id } failed: no provider credentials configured. " ,
metadata = _thread_metadata ,
)
return
2026-03-26 13:39:41 -07:00
user_config = _load_gateway_config ( )
model = _resolve_gateway_model ( user_config )
platform_key = _platform_config_key ( source . platform )
2026-03-14 00:17:04 -07:00
2026-03-26 13:39:41 -07:00
from hermes_cli . tools_config import _get_platform_tools
enabled_toolsets = sorted ( _get_platform_tools ( user_config , platform_key ) )
2026-03-14 00:17:04 -07:00
pr = self . _provider_routing
max_iterations = int ( os . getenv ( " HERMES_MAX_ITERATIONS " , " 90 " ) )
2026-03-11 22:12:11 +08:00
reasoning_config = self . _load_reasoning_config ( )
self . _reasoning_config = reasoning_config
fix: hermes update causes dual gateways on macOS (launchd) (#1567)
* feat: add optional smart model routing
Add a conservative cheap-vs-strong routing option that can send very short/simple turns to a cheaper model across providers while keeping the primary model for complex work. Wire it through CLI, gateway, and cron, and document the config.yaml workflow.
* fix(gateway): remove recursive ExecStop from systemd units, extend TimeoutStopSec to 60s
* fix(gateway): avoid recursive ExecStop in user systemd unit
* fix: extend ExecStop removal and TimeoutStopSec=60 to system unit
The cherry-picked PR #1448 fix only covered the user systemd unit.
The system unit had the same TimeoutStopSec=15 and could benefit
from the same 60s timeout for clean shutdown. Also adds a regression
test for the system unit.
---------
Co-authored-by: Ninja <ninja@local>
* feat(skills): add blender-mcp optional skill for 3D modeling
Control a running Blender instance from Hermes via socket connection
to the blender-mcp addon (port 9876). Supports creating 3D objects,
materials, animations, and running arbitrary bpy code.
Placed in optional-skills/ since it requires Blender 4.3+ desktop
with a third-party addon manually started each session.
* feat(acp): support slash commands in ACP adapter (#1532)
Adds /help, /model, /tools, /context, /reset, /compact, /version
to the ACP adapter (VS Code, Zed, JetBrains). Commands are handled
directly in the server without instantiating the TUI — each command
queries agent/session state and returns plain text.
Unrecognized /commands fall through to the LLM as normal messages.
/model uses detect_provider_for_model() for auto-detection when
switching models, matching the CLI and gateway behavior.
Fixes #1402
* fix(logging): improve error logging in session search tool (#1533)
* fix(gateway): restart on retryable startup failures (#1517)
* feat(email): add skip_attachments option via config.yaml
* feat(email): add skip_attachments option via config.yaml
Adds a config.yaml-driven option to skip email attachments in the
gateway email adapter. Useful for malware protection and bandwidth
savings.
Configure in config.yaml:
platforms:
email:
skip_attachments: true
Based on PR #1521 by @an420eth, changed from env var to config.yaml
(via PlatformConfig.extra) to match the project's config-first pattern.
* docs: document skip_attachments option for email adapter
* fix(telegram): retry on transient TLS failures during connect and send
Add exponential-backoff retry (3 attempts) around initialize() to
handle transient TLS resets during gateway startup. Also catches
TimedOut and OSError in addition to NetworkError.
Add exponential-backoff retry (3 attempts) around send_message() for
NetworkError during message delivery, wrapping the existing Markdown
fallback logic.
Both imports are guarded with try/except ImportError for test
environments where telegram is mocked.
Based on PR #1527 by cmd8. Closes #1526.
* feat: permissive block_anchor thresholds and unicode normalization (#1539)
Salvaged from PR #1528 by an420eth. Closes #517.
Improves _strategy_block_anchor in fuzzy_match.py:
- Add unicode normalization (smart quotes, em/en-dashes, ellipsis,
non-breaking spaces → ASCII) so LLM-produced unicode artifacts
don't break anchor line matching
- Lower thresholds: 0.10 for unique matches (was 0.70), 0.30 for
multiple candidates — if first/last lines match exactly, the
block is almost certainly correct
- Use original (non-normalized) content for offset calculation to
preserve correct character positions
Tested: 3 new scenarios fixed (em-dash anchors, non-breaking space
anchors, very-low-similarity unique matches), zero regressions on
all 9 existing fuzzy match tests.
Co-authored-by: an420eth <an420eth@users.noreply.github.com>
* feat(cli): add file path autocomplete in the input prompt (#1545)
When typing a path-like token (./ ../ ~/ / or containing /),
the CLI now shows filesystem completions in the dropdown menu.
Directories show a trailing slash and 'dir' label; files show
their size. Completions are case-insensitive and capped at 30
entries.
Triggered by tokens like:
edit ./src/ma → shows ./src/main.py, ./src/manifest.json, ...
check ~/doc → shows ~/docs/, ~/documents/, ...
read /etc/hos → shows /etc/hosts, /etc/hostname, ...
open tools/reg → shows tools/registry.py
Slash command autocomplete (/help, /model, etc.) is unaffected —
it still triggers when the input starts with /.
Inspired by OpenCode PR #145 (file path completion menu).
Implementation:
- hermes_cli/commands.py: _extract_path_word() detects path-like
tokens, _path_completions() yields filesystem Completions with
size labels, get_completions() routes to paths vs slash commands
- tests/hermes_cli/test_path_completion.py: 26 tests covering
path extraction, prefix filtering, directory markers, home
expansion, case-insensitivity, integration with slash commands
* feat(privacy): redact PII from LLM context when privacy.redact_pii is enabled
Add privacy.redact_pii config option (boolean, default false). When
enabled, the gateway redacts personally identifiable information from
the system prompt before sending it to the LLM provider:
- Phone numbers (user IDs on WhatsApp/Signal) → hashed to user_<sha256>
- User IDs → hashed to user_<sha256>
- Chat IDs → numeric portion hashed, platform prefix preserved
- Home channel IDs → hashed
- Names/usernames → NOT affected (user-chosen, publicly visible)
Hashes are deterministic (same user → same hash) so the model can
still distinguish users in group chats. Routing and delivery use
the original values internally — redaction only affects LLM context.
Inspired by OpenClaw PR #47959.
* fix(privacy): skip PII redaction on Discord/Slack (mentions need real IDs)
Discord uses <@user_id> for mentions and Slack uses <@U12345> — the LLM
needs the real ID to tag users. Redaction now only applies to WhatsApp,
Signal, and Telegram where IDs are pure routing metadata.
Add 4 platform-specific tests covering Discord, WhatsApp, Signal, Slack.
* feat: smart approvals + /stop command (inspired by OpenAI Codex)
* feat: smart approvals — LLM-based risk assessment for dangerous commands
Adds a 'smart' approval mode that uses the auxiliary LLM to assess
whether a flagged command is genuinely dangerous or a false positive,
auto-approving low-risk commands without prompting the user.
Inspired by OpenAI Codex's Smart Approvals guardian subagent
(openai/codex#13860).
Config (config.yaml):
approvals:
mode: manual # manual (default), smart, off
Modes:
- manual — current behavior, always prompt the user
- smart — aux LLM evaluates risk: APPROVE (auto-allow), DENY (block),
or ESCALATE (fall through to manual prompt)
- off — skip all approval prompts (equivalent to --yolo)
When smart mode auto-approves, the pattern gets session-level approval
so subsequent uses of the same pattern don't trigger another LLM call.
When it denies, the command is blocked without user prompt. When
uncertain, it escalates to the normal manual approval flow.
The LLM prompt is carefully scoped: it sees only the command text and
the flagged reason, assesses actual risk vs false positive, and returns
a single-word verdict.
* feat: make smart approval model configurable via config.yaml
Adds auxiliary.approval section to config.yaml with the same
provider/model/base_url/api_key pattern as other aux tasks (vision,
web_extract, compression, etc.).
Config:
auxiliary:
approval:
provider: auto
model: '' # fast/cheap model recommended
base_url: ''
api_key: ''
Bridged to env vars in both CLI and gateway paths so the aux client
picks them up automatically.
* feat: add /stop command to kill all background processes
Adds a /stop slash command that kills all running background processes
at once. Currently users have to process(list) then process(kill) for
each one individually.
Inspired by OpenAI Codex's separation of interrupt (Ctrl+C stops current
turn) from /stop (cleans up background processes). See openai/codex#14602.
Ctrl+C continues to only interrupt the active agent turn — background
dev servers, watchers, etc. are preserved. /stop is the explicit way
to clean them all up.
* feat: first-class plugin architecture + hide status bar cost by default (#1544)
The persistent status bar now shows context %, token counts, and
duration but NOT $ cost by default. Cost display is opt-in via:
display:
show_cost: true
in config.yaml, or: hermes config set display.show_cost true
The /usage command still shows full cost breakdown since the user
explicitly asked for it — this only affects the always-visible bar.
Status bar without cost:
⚕ claude-sonnet-4 │ 12K/200K │ 6% │ 15m
Status bar with show_cost: true:
⚕ claude-sonnet-4 │ 12K/200K │ 6% │ $0.06 │ 15m
* feat: improve memory prioritization + aggressive skill updates (inspired by OpenAI Codex)
* feat: improve memory prioritization — user preferences over procedural knowledge
Inspired by OpenAI Codex's memory prompt improvements (openai/codex#14493)
which focus memory writes on user preferences and recurring patterns
rather than procedural task details.
Key insight: 'Optimize for reducing future user steering — the most
valuable memory prevents the user from having to repeat themselves.'
Changes:
- MEMORY_GUIDANCE (prompt_builder.py): added prioritization hierarchy
and the core principle about reducing user steering
- MEMORY_SCHEMA (memory_tool.py): reordered WHEN TO SAVE list to put
corrections first, added explicit PRIORITY guidance
- Memory nudge (run_agent.py): now asks specifically about preferences,
corrections, and workflow patterns instead of generic 'anything'
- Memory flush (run_agent.py): now instructs to prioritize user
preferences and corrections over task-specific details
* feat: more aggressive skill creation and update prompting
Press harder on skill updates — the agent should proactively patch
skills when it encounters issues during use, not wait to be asked.
Changes:
- SKILLS_GUIDANCE: 'consider saving' → 'save'; added explicit instruction
to patch skills immediately when found outdated/wrong
- Skills header: added instruction to update loaded skills before finishing
if they had missing steps or wrong commands
- Skill nudge: more assertive ('save the approach' not 'consider saving'),
now also prompts for updating existing skills used in the task
- Skill nudge interval: lowered default from 15 to 10 iterations
- skill_manage schema: added 'patch it immediately' to update triggers
* feat: first-class plugin architecture (#1555)
Plugin system for extending Hermes with custom tools, hooks, and
integrations — no source code changes required.
Core system (hermes_cli/plugins.py):
- Plugin discovery from ~/.hermes/plugins/, .hermes/plugins/, and
pip entry_points (hermes_agent.plugins group)
- PluginContext with register_tool() and register_hook()
- 6 lifecycle hooks: pre/post tool_call, pre/post llm_call,
on_session_start/end
- Namespace package handling for relative imports in plugins
- Graceful error isolation — broken plugins never crash the agent
Integration (model_tools.py):
- Plugin discovery runs after built-in + MCP tools
- Plugin tools bypass toolset filter via get_plugin_tool_names()
- Pre/post tool call hooks fire in handle_function_call()
CLI:
- /plugins command shows loaded plugins, tool counts, status
- Added to COMMANDS dict for autocomplete
Docs:
- Getting started guide (build-a-hermes-plugin.md) — full tutorial
building a calculator plugin step by step
- Reference page (features/plugins.md) — quick overview + tables
- Covers: file structure, schemas, handlers, hooks, data files,
bundled skills, env var gating, pip distribution, common mistakes
Tests: 16 tests covering discovery, loading, hooks, tool visibility.
* fix: hermes update causes dual gateways on macOS (launchd)
Three bugs worked together to create the dual-gateway problem:
1. cmd_update only checked systemd for gateway restart, completely
ignoring launchd on macOS. After killing the PID it would print
'Restart it with: hermes gateway run' even when launchd was about
to auto-respawn the process.
2. launchd's KeepAlive.SuccessfulExit=false respawns the gateway
after SIGTERM (non-zero exit), so the user's manual restart
created a second instance.
3. The launchd plist lacked --replace (systemd had it), so the
respawned gateway didn't kill stale instances on startup.
Fixes:
- Add --replace to launchd ProgramArguments (matches systemd)
- Add launchd detection to cmd_update's auto-restart logic
- Print 'auto-restart via launchd' instead of manual restart hint
* fix: add launchd plist auto-refresh + explicit restart in cmd_update
Two integration issues with the initial fix:
1. Existing macOS users with old plist (no --replace) would never
get the fix until manual uninstall/reinstall. Added
refresh_launchd_plist_if_needed() — mirrors the existing
refresh_systemd_unit_if_needed(). Called from launchd_start(),
launchd_restart(), and cmd_update.
2. cmd_update relied on KeepAlive respawn after SIGTERM rather than
explicit launchctl stop/start. This caused races: launchd would
respawn the old process before the PID file was cleaned up.
Now does explicit stop+start (matching how systemd gets an
explicit systemctl restart), with plist refresh first so the
new --replace flag is picked up.
---------
Co-authored-by: Ninja <ninja@local>
Co-authored-by: alireza78a <alireza78a@users.noreply.github.com>
Co-authored-by: Oktay Aydin <113846926+aydnOktay@users.noreply.github.com>
Co-authored-by: JP Lew <polydegen@protonmail.com>
Co-authored-by: an420eth <an420eth@users.noreply.github.com>
2026-03-16 12:36:29 -07:00
turn_route = self . _resolve_turn_agent_config ( prompt , model , runtime_kwargs )
2026-03-14 00:17:04 -07:00
def run_sync ( ) :
agent = AIAgent (
fix: hermes update causes dual gateways on macOS (launchd) (#1567)
* feat: add optional smart model routing
Add a conservative cheap-vs-strong routing option that can send very short/simple turns to a cheaper model across providers while keeping the primary model for complex work. Wire it through CLI, gateway, and cron, and document the config.yaml workflow.
* fix(gateway): remove recursive ExecStop from systemd units, extend TimeoutStopSec to 60s
* fix(gateway): avoid recursive ExecStop in user systemd unit
* fix: extend ExecStop removal and TimeoutStopSec=60 to system unit
The cherry-picked PR #1448 fix only covered the user systemd unit.
The system unit had the same TimeoutStopSec=15 and could benefit
from the same 60s timeout for clean shutdown. Also adds a regression
test for the system unit.
---------
Co-authored-by: Ninja <ninja@local>
* feat(skills): add blender-mcp optional skill for 3D modeling
Control a running Blender instance from Hermes via socket connection
to the blender-mcp addon (port 9876). Supports creating 3D objects,
materials, animations, and running arbitrary bpy code.
Placed in optional-skills/ since it requires Blender 4.3+ desktop
with a third-party addon manually started each session.
* feat(acp): support slash commands in ACP adapter (#1532)
Adds /help, /model, /tools, /context, /reset, /compact, /version
to the ACP adapter (VS Code, Zed, JetBrains). Commands are handled
directly in the server without instantiating the TUI — each command
queries agent/session state and returns plain text.
Unrecognized /commands fall through to the LLM as normal messages.
/model uses detect_provider_for_model() for auto-detection when
switching models, matching the CLI and gateway behavior.
Fixes #1402
* fix(logging): improve error logging in session search tool (#1533)
* fix(gateway): restart on retryable startup failures (#1517)
* feat(email): add skip_attachments option via config.yaml
* feat(email): add skip_attachments option via config.yaml
Adds a config.yaml-driven option to skip email attachments in the
gateway email adapter. Useful for malware protection and bandwidth
savings.
Configure in config.yaml:
platforms:
email:
skip_attachments: true
Based on PR #1521 by @an420eth, changed from env var to config.yaml
(via PlatformConfig.extra) to match the project's config-first pattern.
* docs: document skip_attachments option for email adapter
* fix(telegram): retry on transient TLS failures during connect and send
Add exponential-backoff retry (3 attempts) around initialize() to
handle transient TLS resets during gateway startup. Also catches
TimedOut and OSError in addition to NetworkError.
Add exponential-backoff retry (3 attempts) around send_message() for
NetworkError during message delivery, wrapping the existing Markdown
fallback logic.
Both imports are guarded with try/except ImportError for test
environments where telegram is mocked.
Based on PR #1527 by cmd8. Closes #1526.
* feat: permissive block_anchor thresholds and unicode normalization (#1539)
Salvaged from PR #1528 by an420eth. Closes #517.
Improves _strategy_block_anchor in fuzzy_match.py:
- Add unicode normalization (smart quotes, em/en-dashes, ellipsis,
non-breaking spaces → ASCII) so LLM-produced unicode artifacts
don't break anchor line matching
- Lower thresholds: 0.10 for unique matches (was 0.70), 0.30 for
multiple candidates — if first/last lines match exactly, the
block is almost certainly correct
- Use original (non-normalized) content for offset calculation to
preserve correct character positions
Tested: 3 new scenarios fixed (em-dash anchors, non-breaking space
anchors, very-low-similarity unique matches), zero regressions on
all 9 existing fuzzy match tests.
Co-authored-by: an420eth <an420eth@users.noreply.github.com>
* feat(cli): add file path autocomplete in the input prompt (#1545)
When typing a path-like token (./ ../ ~/ / or containing /),
the CLI now shows filesystem completions in the dropdown menu.
Directories show a trailing slash and 'dir' label; files show
their size. Completions are case-insensitive and capped at 30
entries.
Triggered by tokens like:
edit ./src/ma → shows ./src/main.py, ./src/manifest.json, ...
check ~/doc → shows ~/docs/, ~/documents/, ...
read /etc/hos → shows /etc/hosts, /etc/hostname, ...
open tools/reg → shows tools/registry.py
Slash command autocomplete (/help, /model, etc.) is unaffected —
it still triggers when the input starts with /.
Inspired by OpenCode PR #145 (file path completion menu).
Implementation:
- hermes_cli/commands.py: _extract_path_word() detects path-like
tokens, _path_completions() yields filesystem Completions with
size labels, get_completions() routes to paths vs slash commands
- tests/hermes_cli/test_path_completion.py: 26 tests covering
path extraction, prefix filtering, directory markers, home
expansion, case-insensitivity, integration with slash commands
* feat(privacy): redact PII from LLM context when privacy.redact_pii is enabled
Add privacy.redact_pii config option (boolean, default false). When
enabled, the gateway redacts personally identifiable information from
the system prompt before sending it to the LLM provider:
- Phone numbers (user IDs on WhatsApp/Signal) → hashed to user_<sha256>
- User IDs → hashed to user_<sha256>
- Chat IDs → numeric portion hashed, platform prefix preserved
- Home channel IDs → hashed
- Names/usernames → NOT affected (user-chosen, publicly visible)
Hashes are deterministic (same user → same hash) so the model can
still distinguish users in group chats. Routing and delivery use
the original values internally — redaction only affects LLM context.
Inspired by OpenClaw PR #47959.
* fix(privacy): skip PII redaction on Discord/Slack (mentions need real IDs)
Discord uses <@user_id> for mentions and Slack uses <@U12345> — the LLM
needs the real ID to tag users. Redaction now only applies to WhatsApp,
Signal, and Telegram where IDs are pure routing metadata.
Add 4 platform-specific tests covering Discord, WhatsApp, Signal, Slack.
* feat: smart approvals + /stop command (inspired by OpenAI Codex)
* feat: smart approvals — LLM-based risk assessment for dangerous commands
Adds a 'smart' approval mode that uses the auxiliary LLM to assess
whether a flagged command is genuinely dangerous or a false positive,
auto-approving low-risk commands without prompting the user.
Inspired by OpenAI Codex's Smart Approvals guardian subagent
(openai/codex#13860).
Config (config.yaml):
approvals:
mode: manual # manual (default), smart, off
Modes:
- manual — current behavior, always prompt the user
- smart — aux LLM evaluates risk: APPROVE (auto-allow), DENY (block),
or ESCALATE (fall through to manual prompt)
- off — skip all approval prompts (equivalent to --yolo)
When smart mode auto-approves, the pattern gets session-level approval
so subsequent uses of the same pattern don't trigger another LLM call.
When it denies, the command is blocked without user prompt. When
uncertain, it escalates to the normal manual approval flow.
The LLM prompt is carefully scoped: it sees only the command text and
the flagged reason, assesses actual risk vs false positive, and returns
a single-word verdict.
* feat: make smart approval model configurable via config.yaml
Adds auxiliary.approval section to config.yaml with the same
provider/model/base_url/api_key pattern as other aux tasks (vision,
web_extract, compression, etc.).
Config:
auxiliary:
approval:
provider: auto
model: '' # fast/cheap model recommended
base_url: ''
api_key: ''
Bridged to env vars in both CLI and gateway paths so the aux client
picks them up automatically.
* feat: add /stop command to kill all background processes
Adds a /stop slash command that kills all running background processes
at once. Currently users have to process(list) then process(kill) for
each one individually.
Inspired by OpenAI Codex's separation of interrupt (Ctrl+C stops current
turn) from /stop (cleans up background processes). See openai/codex#14602.
Ctrl+C continues to only interrupt the active agent turn — background
dev servers, watchers, etc. are preserved. /stop is the explicit way
to clean them all up.
* feat: first-class plugin architecture + hide status bar cost by default (#1544)
The persistent status bar now shows context %, token counts, and
duration but NOT $ cost by default. Cost display is opt-in via:
display:
show_cost: true
in config.yaml, or: hermes config set display.show_cost true
The /usage command still shows full cost breakdown since the user
explicitly asked for it — this only affects the always-visible bar.
Status bar without cost:
⚕ claude-sonnet-4 │ 12K/200K │ 6% │ 15m
Status bar with show_cost: true:
⚕ claude-sonnet-4 │ 12K/200K │ 6% │ $0.06 │ 15m
* feat: improve memory prioritization + aggressive skill updates (inspired by OpenAI Codex)
* feat: improve memory prioritization — user preferences over procedural knowledge
Inspired by OpenAI Codex's memory prompt improvements (openai/codex#14493)
which focus memory writes on user preferences and recurring patterns
rather than procedural task details.
Key insight: 'Optimize for reducing future user steering — the most
valuable memory prevents the user from having to repeat themselves.'
Changes:
- MEMORY_GUIDANCE (prompt_builder.py): added prioritization hierarchy
and the core principle about reducing user steering
- MEMORY_SCHEMA (memory_tool.py): reordered WHEN TO SAVE list to put
corrections first, added explicit PRIORITY guidance
- Memory nudge (run_agent.py): now asks specifically about preferences,
corrections, and workflow patterns instead of generic 'anything'
- Memory flush (run_agent.py): now instructs to prioritize user
preferences and corrections over task-specific details
* feat: more aggressive skill creation and update prompting
Press harder on skill updates — the agent should proactively patch
skills when it encounters issues during use, not wait to be asked.
Changes:
- SKILLS_GUIDANCE: 'consider saving' → 'save'; added explicit instruction
to patch skills immediately when found outdated/wrong
- Skills header: added instruction to update loaded skills before finishing
if they had missing steps or wrong commands
- Skill nudge: more assertive ('save the approach' not 'consider saving'),
now also prompts for updating existing skills used in the task
- Skill nudge interval: lowered default from 15 to 10 iterations
- skill_manage schema: added 'patch it immediately' to update triggers
* feat: first-class plugin architecture (#1555)
Plugin system for extending Hermes with custom tools, hooks, and
integrations — no source code changes required.
Core system (hermes_cli/plugins.py):
- Plugin discovery from ~/.hermes/plugins/, .hermes/plugins/, and
pip entry_points (hermes_agent.plugins group)
- PluginContext with register_tool() and register_hook()
- 6 lifecycle hooks: pre/post tool_call, pre/post llm_call,
on_session_start/end
- Namespace package handling for relative imports in plugins
- Graceful error isolation — broken plugins never crash the agent
Integration (model_tools.py):
- Plugin discovery runs after built-in + MCP tools
- Plugin tools bypass toolset filter via get_plugin_tool_names()
- Pre/post tool call hooks fire in handle_function_call()
CLI:
- /plugins command shows loaded plugins, tool counts, status
- Added to COMMANDS dict for autocomplete
Docs:
- Getting started guide (build-a-hermes-plugin.md) — full tutorial
building a calculator plugin step by step
- Reference page (features/plugins.md) — quick overview + tables
- Covers: file structure, schemas, handlers, hooks, data files,
bundled skills, env var gating, pip distribution, common mistakes
Tests: 16 tests covering discovery, loading, hooks, tool visibility.
* fix: hermes update causes dual gateways on macOS (launchd)
Three bugs worked together to create the dual-gateway problem:
1. cmd_update only checked systemd for gateway restart, completely
ignoring launchd on macOS. After killing the PID it would print
'Restart it with: hermes gateway run' even when launchd was about
to auto-respawn the process.
2. launchd's KeepAlive.SuccessfulExit=false respawns the gateway
after SIGTERM (non-zero exit), so the user's manual restart
created a second instance.
3. The launchd plist lacked --replace (systemd had it), so the
respawned gateway didn't kill stale instances on startup.
Fixes:
- Add --replace to launchd ProgramArguments (matches systemd)
- Add launchd detection to cmd_update's auto-restart logic
- Print 'auto-restart via launchd' instead of manual restart hint
* fix: add launchd plist auto-refresh + explicit restart in cmd_update
Two integration issues with the initial fix:
1. Existing macOS users with old plist (no --replace) would never
get the fix until manual uninstall/reinstall. Added
refresh_launchd_plist_if_needed() — mirrors the existing
refresh_systemd_unit_if_needed(). Called from launchd_start(),
launchd_restart(), and cmd_update.
2. cmd_update relied on KeepAlive respawn after SIGTERM rather than
explicit launchctl stop/start. This caused races: launchd would
respawn the old process before the PID file was cleaned up.
Now does explicit stop+start (matching how systemd gets an
explicit systemctl restart), with plist refresh first so the
new --replace flag is picked up.
---------
Co-authored-by: Ninja <ninja@local>
Co-authored-by: alireza78a <alireza78a@users.noreply.github.com>
Co-authored-by: Oktay Aydin <113846926+aydnOktay@users.noreply.github.com>
Co-authored-by: JP Lew <polydegen@protonmail.com>
Co-authored-by: an420eth <an420eth@users.noreply.github.com>
2026-03-16 12:36:29 -07:00
model = turn_route [ " model " ] ,
* * turn_route [ " runtime " ] ,
2026-03-14 00:17:04 -07:00
max_iterations = max_iterations ,
quiet_mode = True ,
verbose_logging = False ,
enabled_toolsets = enabled_toolsets ,
2026-03-11 22:12:11 +08:00
reasoning_config = reasoning_config ,
2026-03-14 00:17:04 -07:00
providers_allowed = pr . get ( " only " ) ,
providers_ignored = pr . get ( " ignore " ) ,
providers_order = pr . get ( " order " ) ,
provider_sort = pr . get ( " sort " ) ,
provider_require_parameters = pr . get ( " require_parameters " , False ) ,
provider_data_collection = pr . get ( " data_collection " ) ,
session_id = task_id ,
platform = platform_key ,
session_db = self . _session_db ,
fallback_model = self . _fallback_model ,
)
return agent . run_conversation (
user_message = prompt ,
task_id = task_id ,
)
loop = asyncio . get_event_loop ( )
result = await loop . run_in_executor ( None , run_sync )
response = result . get ( " final_response " , " " ) if result else " "
if not response and result and result . get ( " error " ) :
response = f " Error: { result [ ' error ' ] } "
# Extract media files from the response
if response :
media_files , response = adapter . extract_media ( response )
images , text_content = adapter . extract_images ( response )
preview = prompt [ : 60 ] + ( " ... " if len ( prompt ) > 60 else " " )
header = f ' ✅ Background task complete \n Prompt: " { preview } " \n \n '
if text_content :
await adapter . send (
chat_id = source . chat_id ,
content = header + text_content ,
metadata = _thread_metadata ,
)
elif not images and not media_files :
await adapter . send (
chat_id = source . chat_id ,
content = header + " (No response generated) " ,
metadata = _thread_metadata ,
)
# Send extracted images
for image_url , alt_text in ( images or [ ] ) :
try :
await adapter . send_image (
chat_id = source . chat_id ,
image_url = image_url ,
caption = alt_text ,
)
except Exception :
pass
# Send media files
for media_path in ( media_files or [ ] ) :
try :
await adapter . send_file (
chat_id = source . chat_id ,
file_path = media_path ,
)
except Exception :
pass
else :
preview = prompt [ : 60 ] + ( " ... " if len ( prompt ) > 60 else " " )
await adapter . send (
chat_id = source . chat_id ,
content = f ' ✅ Background task complete \n Prompt: " { preview } " \n \n (No response generated) ' ,
metadata = _thread_metadata ,
)
except Exception as e :
logger . exception ( " Background task %s failed " , task_id )
try :
await adapter . send (
chat_id = source . chat_id ,
content = f " ❌ Background task { task_id } failed: { e } " ,
metadata = _thread_metadata ,
)
except Exception :
pass
async def _handle_reasoning_command ( self , event : MessageEvent ) - > str :
""" Handle /reasoning command — manage reasoning effort and display toggle.
Usage :
/ reasoning Show current effort level and display state
/ reasoning < level > Set reasoning effort ( none , low , medium , high , xhigh )
/ reasoning show | on Show model reasoning in responses
/ reasoning hide | off Hide model reasoning from responses
"""
import yaml
args = event . get_command_args ( ) . strip ( ) . lower ( )
config_path = _hermes_home / " config.yaml "
2026-03-11 22:12:11 +08:00
self . _reasoning_config = self . _load_reasoning_config ( )
self . _show_reasoning = self . _load_show_reasoning ( )
2026-03-14 00:17:04 -07:00
def _save_config_key ( key_path : str , value ) :
""" Save a dot-separated key to config.yaml. """
try :
user_config = { }
if config_path . exists ( ) :
with open ( config_path , encoding = " utf-8 " ) as f :
user_config = yaml . safe_load ( f ) or { }
keys = key_path . split ( " . " )
current = user_config
for k in keys [ : - 1 ] :
if k not in current or not isinstance ( current [ k ] , dict ) :
current [ k ] = { }
current = current [ k ]
current [ keys [ - 1 ] ] = value
with open ( config_path , " w " , encoding = " utf-8 " ) as f :
yaml . dump ( user_config , f , default_flow_style = False , sort_keys = False )
return True
except Exception as e :
logger . error ( " Failed to save config key %s : %s " , key_path , e )
return False
if not args :
# Show current state
rc = self . _reasoning_config
if rc is None :
level = " medium (default) "
elif rc . get ( " enabled " ) is False :
level = " none (disabled) "
else :
level = rc . get ( " effort " , " medium " )
display_state = " on ✓ " if self . _show_reasoning else " off "
return (
" 🧠 **Reasoning Settings** \n \n "
f " **Effort:** ` { level } ` \n "
f " **Display:** { display_state } \n \n "
" _Usage:_ `/reasoning <none|low|medium|high|xhigh|show|hide>` "
)
# Display toggle
if args in ( " show " , " on " ) :
self . _show_reasoning = True
_save_config_key ( " display.show_reasoning " , True )
return " 🧠 ✓ Reasoning display: **ON** \n Model thinking will be shown before each response. "
if args in ( " hide " , " off " ) :
self . _show_reasoning = False
_save_config_key ( " display.show_reasoning " , False )
return " 🧠 ✓ Reasoning display: **OFF** "
# Effort level change
effort = args . strip ( )
if effort == " none " :
parsed = { " enabled " : False }
elif effort in ( " xhigh " , " high " , " medium " , " low " , " minimal " ) :
parsed = { " enabled " : True , " effort " : effort }
else :
return (
f " ⚠️ Unknown argument: ` { effort } ` \n \n "
" **Valid levels:** none, low, minimal, medium, high, xhigh \n "
" **Display:** show, hide "
)
self . _reasoning_config = parsed
if _save_config_key ( " agent.reasoning_effort " , effort ) :
return f " 🧠 ✓ Reasoning effort set to ` { effort } ` (saved to config) \n _(takes effect on next message)_ "
else :
return f " 🧠 ✓ Reasoning effort set to ` { effort } ` (this session only) "
2026-03-26 14:41:04 -07:00
async def _handle_verbose_command ( self , event : MessageEvent ) - > str :
""" Handle /verbose command — cycle tool progress display mode.
Gated by ` ` display . tool_progress_command ` ` in config . yaml ( default off ) .
When enabled , cycles the tool progress mode through off → new → all →
verbose → off , same as the CLI .
"""
import yaml
config_path = _hermes_home / " config.yaml "
# --- check config gate ------------------------------------------------
try :
user_config = { }
if config_path . exists ( ) :
with open ( config_path , encoding = " utf-8 " ) as f :
user_config = yaml . safe_load ( f ) or { }
gate_enabled = user_config . get ( " display " , { } ) . get ( " tool_progress_command " , False )
except Exception :
gate_enabled = False
if not gate_enabled :
return (
" The `/verbose` command is not enabled for messaging platforms. \n \n "
" Enable it in `config.yaml`: \n ```yaml \n "
" display: \n tool_progress_command: true \n ``` "
)
# --- cycle mode -------------------------------------------------------
cycle = [ " off " , " new " , " all " , " verbose " ]
descriptions = {
" off " : " ⚙️ Tool progress: **OFF** — no tool activity shown. " ,
" new " : " ⚙️ Tool progress: **NEW** — shown when tool changes. " ,
" all " : " ⚙️ Tool progress: **ALL** — every tool call shown. " ,
" verbose " : " ⚙️ Tool progress: **VERBOSE** — full args and results. " ,
}
raw_progress = user_config . get ( " display " , { } ) . get ( " tool_progress " , " all " )
# YAML 1.1 parses bare "off" as boolean False — normalise back
if raw_progress is False :
current = " off "
elif raw_progress is True :
current = " all "
else :
current = str ( raw_progress ) . lower ( )
if current not in cycle :
current = " all "
idx = ( cycle . index ( current ) + 1 ) % len ( cycle )
new_mode = cycle [ idx ]
# Save to config.yaml
try :
if " display " not in user_config or not isinstance ( user_config . get ( " display " ) , dict ) :
user_config [ " display " ] = { }
user_config [ " display " ] [ " tool_progress " ] = new_mode
with open ( config_path , " w " , encoding = " utf-8 " ) as f :
yaml . dump ( user_config , f , default_flow_style = False , sort_keys = False )
return f " { descriptions [ new_mode ] } \n _(saved to config — takes effect on next message)_ "
except Exception as e :
logger . warning ( " Failed to save tool_progress mode: %s " , e )
return f " { descriptions [ new_mode ] } \n _(could not save to config: { e } )_ "
2026-03-14 00:17:04 -07:00
async def _handle_compress_command ( self , event : MessageEvent ) - > str :
""" Handle /compress command -- manually compress conversation context. """
source = event . source
session_entry = self . session_store . get_or_create_session ( source )
history = self . session_store . load_transcript ( session_entry . session_id )
if not history or len ( history ) < 4 :
return " Not enough conversation to compress (need at least 4 messages). "
try :
from run_agent import AIAgent
from agent . model_metadata import estimate_messages_tokens_rough
runtime_kwargs = _resolve_runtime_agent_kwargs ( )
if not runtime_kwargs . get ( " api_key " ) :
return " No provider configured -- cannot compress. "
# Resolve model from config (same reason as memory flush above).
model = _resolve_gateway_model ( )
msgs = [
{ " role " : m . get ( " role " ) , " content " : m . get ( " content " ) }
for m in history
if m . get ( " role " ) in ( " user " , " assistant " ) and m . get ( " content " )
]
original_count = len ( msgs )
approx_tokens = estimate_messages_tokens_rough ( msgs )
tmp_agent = AIAgent (
* * runtime_kwargs ,
model = model ,
max_iterations = 4 ,
quiet_mode = True ,
enabled_toolsets = [ " memory " ] ,
session_id = session_entry . session_id ,
)
2026-03-26 17:40:31 -07:00
tmp_agent . _print_fn = lambda * a , * * kw : None
2026-03-14 00:17:04 -07:00
loop = asyncio . get_event_loop ( )
compressed , _ = await loop . run_in_executor (
None ,
2026-03-28 12:23:43 -07:00
lambda : tmp_agent . _compress_context ( msgs , " " , approx_tokens = approx_tokens )
2026-03-14 00:17:04 -07:00
)
2026-03-28 12:23:43 -07:00
# _compress_context already calls end_session() on the old session
# (preserving its full transcript in SQLite) and creates a new
# session_id for the continuation. Write the compressed messages
# into the NEW session so the original history stays searchable.
new_session_id = tmp_agent . session_id
if new_session_id != session_entry . session_id :
session_entry . session_id = new_session_id
self . session_store . _save ( )
self . session_store . rewrite_transcript ( new_session_id , compressed )
2026-03-14 00:17:04 -07:00
# Reset stored token count — transcript changed, old value is stale
self . session_store . update_session (
2026-03-28 12:23:43 -07:00
session_entry . session_key , last_prompt_tokens = 0
2026-03-14 00:17:04 -07:00
)
new_count = len ( compressed )
new_tokens = estimate_messages_tokens_rough ( compressed )
return (
f " 🗜️ Compressed: { original_count } → { new_count } messages \n "
f " ~ { approx_tokens : , } → ~ { new_tokens : , } tokens "
)
except Exception as e :
logger . warning ( " Manual compress failed: %s " , e )
return f " Compression failed: { e } "
async def _handle_title_command ( self , event : MessageEvent ) - > str :
""" Handle /title command — set or show the current session ' s title. """
source = event . source
session_entry = self . session_store . get_or_create_session ( source )
session_id = session_entry . session_id
if not self . _session_db :
return " Session database not available. "
2026-03-21 19:04:53 -04:00
# Ensure session exists in SQLite DB (it may only exist in session_store
# if this is the first command in a new session)
existing_title = self . _session_db . get_session_title ( session_id )
if existing_title is None :
# Session doesn't exist in DB yet — create it
try :
self . _session_db . create_session (
session_id = session_id ,
source = source . platform . value if source . platform else " unknown " ,
user_id = source . user_id ,
)
except Exception :
pass # Session might already exist, ignore errors
2026-03-14 00:17:04 -07:00
title_arg = event . get_command_args ( ) . strip ( )
if title_arg :
# Sanitize the title before setting
try :
sanitized = self . _session_db . sanitize_title ( title_arg )
except ValueError as e :
return f " ⚠️ { e } "
if not sanitized :
return " ⚠️ Title is empty after cleanup. Please use printable characters. "
# Set the title
try :
if self . _session_db . set_session_title ( session_id , sanitized ) :
return f " ✏️ Session title set: ** { sanitized } ** "
else :
return " Session not found in database. "
except ValueError as e :
return f " ⚠️ { e } "
else :
2026-03-17 04:14:40 -07:00
# Show the current title and session ID
2026-03-14 00:17:04 -07:00
title = self . _session_db . get_session_title ( session_id )
if title :
2026-03-17 04:14:40 -07:00
return f " 📌 Session: ` { session_id } ` \n Title: ** { title } ** "
2026-03-14 00:17:04 -07:00
else :
2026-03-17 04:14:40 -07:00
return f " 📌 Session: ` { session_id } ` \n No title set. Usage: `/title My Session Name` "
2026-03-14 00:17:04 -07:00
async def _handle_resume_command ( self , event : MessageEvent ) - > str :
""" Handle /resume command — switch to a previously-named session. """
if not self . _session_db :
return " Session database not available. "
source = event . source
2026-03-16 00:22:23 -07:00
session_key = self . _session_key_for_source ( source )
2026-03-14 00:17:04 -07:00
name = event . get_command_args ( ) . strip ( )
if not name :
# List recent titled sessions for this user/platform
try :
user_source = source . platform . value if source . platform else None
sessions = self . _session_db . list_sessions_rich (
source = user_source , limit = 10
)
titled = [ s for s in sessions if s . get ( " title " ) ]
if not titled :
return (
" No named sessions found. \n "
" Use `/title My Session` to name your current session, "
" then `/resume My Session` to return to it later. "
)
lines = [ " 📋 **Named Sessions** \n " ]
for s in titled [ : 10 ] :
title = s [ " title " ]
preview = s . get ( " preview " , " " ) [ : 40 ]
preview_part = f " — _ { preview } _ " if preview else " "
lines . append ( f " • ** { title } ** { preview_part } " )
lines . append ( " \n Usage: `/resume <session name>` " )
return " \n " . join ( lines )
except Exception as e :
logger . debug ( " Failed to list titled sessions: %s " , e )
return f " Could not list sessions: { e } "
# Resolve the name to a session ID
target_id = self . _session_db . resolve_session_by_title ( name )
if not target_id :
return (
f " No session found matching ' ** { name } ** ' . \n "
" Use `/resume` with no arguments to see available sessions. "
)
# Check if already on that session
current_entry = self . session_store . get_or_create_session ( source )
if current_entry . session_id == target_id :
return f " 📌 Already on session ** { name } **. "
# Flush memories for current session before switching
try :
2026-03-26 14:33:48 -07:00
_flush_task = asyncio . create_task (
2026-03-16 00:23:47 -07:00
self . _async_flush_memories ( current_entry . session_id , session_key )
)
2026-03-26 14:33:48 -07:00
self . _background_tasks . add ( _flush_task )
_flush_task . add_done_callback ( self . _background_tasks . discard )
2026-03-14 00:17:04 -07:00
except Exception as e :
logger . debug ( " Memory flush on resume failed: %s " , e )
self . _shutdown_gateway_honcho ( session_key )
# Clear any running agent for this session key
if session_key in self . _running_agents :
del self . _running_agents [ session_key ]
# Switch the session entry to point at the old session
new_entry = self . session_store . switch_session ( session_key , target_id )
if not new_entry :
return " Failed to switch session. "
# Get the title for confirmation
title = self . _session_db . get_session_title ( target_id ) or name
# Count messages for context
history = self . session_store . load_transcript ( target_id )
msg_count = len ( [ m for m in history if m . get ( " role " ) == " user " ] ) if history else 0
msg_part = f " ( { msg_count } message { ' s ' if msg_count != 1 else ' ' } ) " if msg_count else " "
return f " ↻ Resumed session ** { title } ** { msg_part } . Conversation restored. "
async def _handle_usage_command ( self , event : MessageEvent ) - > str :
""" Handle /usage command -- show token usage for the session ' s last agent run. """
source = event . source
2026-03-16 00:22:23 -07:00
session_key = self . _session_key_for_source ( source )
2026-03-14 00:17:04 -07:00
agent = self . _running_agents . get ( session_key )
if agent and hasattr ( agent , " session_total_tokens " ) and agent . session_api_calls > 0 :
lines = [
" 📊 **Session Token Usage** " ,
f " Prompt (input): { agent . session_prompt_tokens : , } " ,
f " Completion (output): { agent . session_completion_tokens : , } " ,
f " Total: { agent . session_total_tokens : , } " ,
f " API calls: { agent . session_api_calls } " ,
]
ctx = agent . context_compressor
if ctx . last_prompt_tokens :
pct = ctx . last_prompt_tokens / ctx . context_length * 100 if ctx . context_length else 0
lines . append ( f " Context: { ctx . last_prompt_tokens : , } / { ctx . context_length : , } ( { pct : .0f } %) " )
if ctx . compression_count :
lines . append ( f " Compressions: { ctx . compression_count } " )
return " \n " . join ( lines )
# No running agent -- check session history for a rough count
session_entry = self . session_store . get_or_create_session ( source )
history = self . session_store . load_transcript ( session_entry . session_id )
if history :
from agent . model_metadata import estimate_messages_tokens_rough
msgs = [ m for m in history if m . get ( " role " ) in ( " user " , " assistant " ) and m . get ( " content " ) ]
approx = estimate_messages_tokens_rough ( msgs )
return (
f " 📊 **Session Info** \n "
f " Messages: { len ( msgs ) } \n "
f " Estimated context: ~ { approx : , } tokens \n "
f " _(Detailed usage available during active conversations)_ "
)
return " No usage data available for this session. "
async def _handle_insights_command ( self , event : MessageEvent ) - > str :
""" Handle /insights command -- show usage insights and analytics. """
import asyncio as _asyncio
args = event . get_command_args ( ) . strip ( )
days = 30
source = None
# Parse simple args: /insights 7 or /insights --days 7
if args :
parts = args . split ( )
i = 0
while i < len ( parts ) :
if parts [ i ] == " --days " and i + 1 < len ( parts ) :
try :
days = int ( parts [ i + 1 ] )
except ValueError :
return f " Invalid --days value: { parts [ i + 1 ] } "
i + = 2
elif parts [ i ] == " --source " and i + 1 < len ( parts ) :
source = parts [ i + 1 ]
i + = 2
elif parts [ i ] . isdigit ( ) :
days = int ( parts [ i ] )
i + = 1
else :
i + = 1
try :
from hermes_state import SessionDB
from agent . insights import InsightsEngine
loop = _asyncio . get_event_loop ( )
def _run_insights ( ) :
db = SessionDB ( )
engine = InsightsEngine ( db )
report = engine . generate ( days = days , source = source )
result = engine . format_gateway ( report )
db . close ( )
return result
return await loop . run_in_executor ( None , _run_insights )
except Exception as e :
logger . error ( " Insights command error: %s " , e , exc_info = True )
return f " Error generating insights: { e } "
async def _handle_reload_mcp_command ( self , event : MessageEvent ) - > str :
""" Handle /reload-mcp command -- disconnect and reconnect all MCP servers. """
loop = asyncio . get_event_loop ( )
try :
from tools . mcp_tool import shutdown_mcp_servers , discover_mcp_tools , _load_mcp_config , _servers , _lock
# Capture old server names before shutdown
with _lock :
old_servers = set ( _servers . keys ( ) )
# Read new config before shutting down, so we know what will be added/removed
new_config = _load_mcp_config ( )
new_server_names = set ( new_config . keys ( ) )
# Shutdown existing connections
await loop . run_in_executor ( None , shutdown_mcp_servers )
# Reconnect by discovering tools (reads config.yaml fresh)
new_tools = await loop . run_in_executor ( None , discover_mcp_tools )
# Compute what changed
with _lock :
connected_servers = set ( _servers . keys ( ) )
added = connected_servers - old_servers
removed = old_servers - connected_servers
reconnected = connected_servers & old_servers
lines = [ " 🔄 **MCP Servers Reloaded** \n " ]
if reconnected :
lines . append ( f " ♻️ Reconnected: { ' , ' . join ( sorted ( reconnected ) ) } " )
if added :
lines . append ( f " ➕ Added: { ' , ' . join ( sorted ( added ) ) } " )
if removed :
lines . append ( f " ➖ Removed: { ' , ' . join ( sorted ( removed ) ) } " )
if not connected_servers :
lines . append ( " No MCP servers connected. " )
else :
lines . append ( f " \n 🔧 { len ( new_tools ) } tool(s) available from { len ( connected_servers ) } server(s) " )
# Inject a message at the END of the session history so the
# model knows tools changed on its next turn. Appended after
# all existing messages to preserve prompt-cache for the prefix.
change_parts = [ ]
if added :
change_parts . append ( f " Added servers: { ' , ' . join ( sorted ( added ) ) } " )
if removed :
change_parts . append ( f " Removed servers: { ' , ' . join ( sorted ( removed ) ) } " )
if reconnected :
change_parts . append ( f " Reconnected servers: { ' , ' . join ( sorted ( reconnected ) ) } " )
tool_summary = f " { len ( new_tools ) } MCP tool(s) now available " if new_tools else " No MCP tools available "
change_detail = " . " . join ( change_parts ) + " . " if change_parts else " "
reload_msg = {
" role " : " user " ,
" content " : f " [SYSTEM: MCP servers have been reloaded. { change_detail } { tool_summary } . The tool list for this conversation has been updated accordingly.] " ,
}
try :
session_entry = self . session_store . get_or_create_session ( event . source )
self . session_store . append_to_transcript (
session_entry . session_id , reload_msg
)
except Exception :
pass # Best-effort; don't fail the reload over a transcript write
return " \n " . join ( lines )
except Exception as e :
logger . warning ( " MCP reload failed: %s " , e )
return f " ❌ MCP reload failed: { e } "
fix(gateway): replace bare text approval with /approve and /deny commands (#2002)
The gateway approval system previously intercepted bare 'yes'/'no' text
from the user's next message to approve/deny dangerous commands. This was
fragile and dangerous — if the agent asked a clarify question and the user
said 'yes' to answer it, the gateway would execute the pending dangerous
command instead. (Fixes #1888)
Changes:
- Remove bare text matching ('yes', 'y', 'approve', 'ok', etc.) from
_handle_message approval check
- Add /approve and /deny as gateway-only slash commands in the command
registry
- /approve supports scoping: /approve (one-time), /approve session,
/approve always (permanent)
- Add 5-minute timeout for stale approvals
- Gateway appends structured instructions to the agent response when a
dangerous command is pending, telling the user exactly how to respond
- 9 tests covering approve, deny, timeout, scoping, and verification
that bare 'yes' no longer triggers execution
Credit to @solo386 and @FlyByNight69420 for identifying and reporting
this security issue in PR #1971 and issue #1888.
Co-authored-by: Test <test@test.com>
2026-03-18 16:58:20 -07:00
# ------------------------------------------------------------------
# /approve & /deny — explicit dangerous-command approval
# ------------------------------------------------------------------
_APPROVAL_TIMEOUT_SECONDS = 300 # 5 minutes
async def _handle_approve_command ( self , event : MessageEvent ) - > str :
""" Handle /approve command — execute a pending dangerous command.
Usage :
/ approve — approve and execute the pending command
/ approve session — approve and remember for this session
/ approve always — approve this pattern permanently
"""
source = event . source
session_key = self . _session_key_for_source ( source )
if session_key not in self . _pending_approvals :
return " No pending command to approve. "
import time as _time
approval = self . _pending_approvals [ session_key ]
# Check for timeout
ts = approval . get ( " timestamp " , 0 )
if _time . time ( ) - ts > self . _APPROVAL_TIMEOUT_SECONDS :
self . _pending_approvals . pop ( session_key , None )
return " ⚠️ Approval expired (timed out after 5 minutes). Ask the agent to try again. "
self . _pending_approvals . pop ( session_key )
cmd = approval [ " command " ]
pattern_keys = approval . get ( " pattern_keys " , [ ] )
if not pattern_keys :
pk = approval . get ( " pattern_key " , " " )
pattern_keys = [ pk ] if pk else [ ]
# Determine approval scope from args
args = event . get_command_args ( ) . strip ( ) . lower ( )
from tools . approval import approve_session , approve_permanent
if args in ( " always " , " permanent " , " permanently " ) :
for pk in pattern_keys :
approve_permanent ( pk )
scope_msg = " (pattern approved permanently) "
elif args in ( " session " , " ses " ) :
for pk in pattern_keys :
approve_session ( session_key , pk )
scope_msg = " (pattern approved for this session) "
else :
# One-time approval — just approve for session so the immediate
# replay works, but don't advertise it as session-wide
for pk in pattern_keys :
approve_session ( session_key , pk )
scope_msg = " "
logger . info ( " User approved dangerous command via /approve: %s ... %s " , cmd [ : 60 ] , scope_msg )
from tools . terminal_tool import terminal_tool
result = terminal_tool ( command = cmd , force = True )
return f " ✅ Command approved and executed { scope_msg } . \n \n ``` \n { result [ : 3500 ] } \n ``` "
async def _handle_deny_command ( self , event : MessageEvent ) - > str :
""" Handle /deny command — reject a pending dangerous command. """
source = event . source
session_key = self . _session_key_for_source ( source )
if session_key not in self . _pending_approvals :
return " No pending command to deny. "
self . _pending_approvals . pop ( session_key )
logger . info ( " User denied dangerous command via /deny " )
return " ❌ Command denied. "
2026-03-14 00:17:04 -07:00
async def _handle_update_command ( self , event : MessageEvent ) - > str :
""" Handle /update command — update Hermes Agent to the latest version.
Spawns ` ` hermes update ` ` in a separate systemd scope so it survives the
2026-03-11 20:43:31 +01:00
gateway restart that ` ` hermes update ` ` may trigger at the end . Marker
files are written so either the current gateway process or the next one
can notify the user when the update finishes .
2026-03-14 00:17:04 -07:00
"""
import json
import shutil
import subprocess
from datetime import datetime
project_root = Path ( __file__ ) . parent . parent . resolve ( )
git_dir = project_root / ' .git '
if not git_dir . exists ( ) :
return " ✗ Not a git repository — cannot update. "
2026-03-12 18:02:21 +03:00
hermes_cmd = _resolve_hermes_bin ( )
if not hermes_cmd :
return (
" ✗ Could not locate the `hermes` command. "
" Hermes is running, but the update command could not find the "
" executable on PATH or via the current Python interpreter. "
" Try running `hermes update` manually in your terminal. "
)
2026-03-14 00:17:04 -07:00
pending_path = _hermes_home / " .update_pending.json "
output_path = _hermes_home / " .update_output.txt "
2026-03-11 20:43:31 +01:00
exit_code_path = _hermes_home / " .update_exit_code "
2026-03-14 00:17:04 -07:00
pending = {
" platform " : event . source . platform . value ,
" chat_id " : event . source . chat_id ,
" user_id " : event . source . user_id ,
" timestamp " : datetime . now ( ) . isoformat ( ) ,
}
pending_path . write_text ( json . dumps ( pending ) )
2026-03-11 20:43:31 +01:00
exit_code_path . unlink ( missing_ok = True )
2026-03-14 00:17:04 -07:00
# Spawn `hermes update` in a separate cgroup so it survives gateway
2026-03-11 20:43:31 +01:00
# restart. systemd-run --user --scope creates a transient scope unit.
2026-03-12 18:02:21 +03:00
hermes_cmd_str = " " . join ( shlex . quote ( part ) for part in hermes_cmd )
2026-03-11 20:43:31 +01:00
update_cmd = (
2026-03-12 18:02:21 +03:00
f " { hermes_cmd_str } update > { shlex . quote ( str ( output_path ) ) } 2>&1; "
2026-03-11 20:43:31 +01:00
f " status=$?; printf ' %s ' \" $status \" > { shlex . quote ( str ( exit_code_path ) ) } "
)
2026-03-14 00:17:04 -07:00
try :
systemd_run = shutil . which ( " systemd-run " )
if systemd_run :
subprocess . Popen (
[ systemd_run , " --user " , " --scope " ,
" --unit=hermes-update " , " -- " ,
" bash " , " -c " , update_cmd ] ,
stdout = subprocess . DEVNULL ,
stderr = subprocess . DEVNULL ,
start_new_session = True ,
)
else :
# Fallback: best-effort detach with start_new_session
subprocess . Popen (
[ " bash " , " -c " , f " nohup { update_cmd } & " ] ,
stdout = subprocess . DEVNULL ,
stderr = subprocess . DEVNULL ,
start_new_session = True ,
)
except Exception as e :
pending_path . unlink ( missing_ok = True )
2026-03-11 20:43:31 +01:00
exit_code_path . unlink ( missing_ok = True )
2026-03-14 00:17:04 -07:00
return f " ✗ Failed to start update: { e } "
2026-03-11 20:43:31 +01:00
self . _schedule_update_notification_watch ( )
2026-03-14 00:17:04 -07:00
return " ⚕ Starting Hermes update… I ' ll notify you when it ' s done. "
2026-03-11 20:43:31 +01:00
def _schedule_update_notification_watch ( self ) - > None :
""" Ensure a background task is watching for update completion. """
existing_task = getattr ( self , " _update_notification_task " , None )
if existing_task and not existing_task . done ( ) :
return
try :
self . _update_notification_task = asyncio . create_task (
self . _watch_for_update_completion ( )
)
except RuntimeError :
logger . debug ( " Skipping update notification watcher: no running event loop " )
async def _watch_for_update_completion (
self ,
poll_interval : float = 2.0 ,
timeout : float = 1800.0 ,
) - > None :
""" Wait for ``hermes update`` to finish, then send its notification. """
pending_path = _hermes_home / " .update_pending.json "
claimed_path = _hermes_home / " .update_pending.claimed.json "
exit_code_path = _hermes_home / " .update_exit_code "
loop = asyncio . get_running_loop ( )
deadline = loop . time ( ) + timeout
while ( pending_path . exists ( ) or claimed_path . exists ( ) ) and loop . time ( ) < deadline :
if exit_code_path . exists ( ) :
await self . _send_update_notification ( )
return
await asyncio . sleep ( poll_interval )
if ( pending_path . exists ( ) or claimed_path . exists ( ) ) and not exit_code_path . exists ( ) :
logger . warning ( " Update watcher timed out waiting for completion marker " )
exit_code_path . write_text ( " 124 " )
await self . _send_update_notification ( )
async def _send_update_notification ( self ) - > bool :
""" If an update finished, notify the user.
Returns False when the update is still running so a caller can retry
later . Returns True after a definitive send / skip decision .
"""
2026-03-14 00:17:04 -07:00
import json
import re as _re
pending_path = _hermes_home / " .update_pending.json "
2026-03-11 20:43:31 +01:00
claimed_path = _hermes_home / " .update_pending.claimed.json "
2026-03-14 00:17:04 -07:00
output_path = _hermes_home / " .update_output.txt "
2026-03-11 20:43:31 +01:00
exit_code_path = _hermes_home / " .update_exit_code "
2026-03-14 00:17:04 -07:00
2026-03-11 20:43:31 +01:00
if not pending_path . exists ( ) and not claimed_path . exists ( ) :
return False
2026-03-14 00:17:04 -07:00
2026-03-11 20:43:31 +01:00
cleanup = True
active_pending_path = claimed_path
2026-03-14 00:17:04 -07:00
try :
2026-03-11 20:43:31 +01:00
if pending_path . exists ( ) :
try :
pending_path . replace ( claimed_path )
except FileNotFoundError :
if not claimed_path . exists ( ) :
return True
elif not claimed_path . exists ( ) :
return True
pending = json . loads ( claimed_path . read_text ( ) )
2026-03-14 00:17:04 -07:00
platform_str = pending . get ( " platform " )
chat_id = pending . get ( " chat_id " )
2026-03-11 20:43:31 +01:00
if not exit_code_path . exists ( ) :
logger . info ( " Update notification deferred: update still running " )
cleanup = False
active_pending_path = pending_path
claimed_path . replace ( pending_path )
return False
exit_code_raw = exit_code_path . read_text ( ) . strip ( ) or " 1 "
exit_code = int ( exit_code_raw )
2026-03-14 00:17:04 -07:00
# Read the captured update output
output = " "
if output_path . exists ( ) :
output = output_path . read_text ( )
# Resolve adapter
platform = Platform ( platform_str )
adapter = self . adapters . get ( platform )
if adapter and chat_id :
# Strip ANSI escape codes for clean display
output = _re . sub ( r ' \ x1b \ [[0-9;]*m ' , ' ' , output ) . strip ( )
if output :
if len ( output ) > 3500 :
output = " … " + output [ - 3500 : ]
2026-03-11 20:43:31 +01:00
if exit_code == 0 :
msg = f " ✅ Hermes update finished. \n \n ``` \n { output } \n ``` "
else :
msg = f " ❌ Hermes update failed. \n \n ``` \n { output } \n ``` "
2026-03-14 00:17:04 -07:00
else :
2026-03-11 20:43:31 +01:00
if exit_code == 0 :
msg = " ✅ Hermes update finished successfully. "
else :
msg = " ❌ Hermes update failed. Check the gateway logs or run `hermes update` manually for details. "
2026-03-14 00:17:04 -07:00
await adapter . send ( chat_id , msg )
2026-03-11 20:43:31 +01:00
logger . info (
" Sent post-update notification to %s : %s (exit= %s ) " ,
platform_str ,
chat_id ,
exit_code ,
)
2026-03-14 00:17:04 -07:00
except Exception as e :
logger . warning ( " Post-update notification failed: %s " , e )
finally :
2026-03-11 20:43:31 +01:00
if cleanup :
active_pending_path . unlink ( missing_ok = True )
claimed_path . unlink ( missing_ok = True )
output_path . unlink ( missing_ok = True )
exit_code_path . unlink ( missing_ok = True )
return True
2026-03-14 00:17:04 -07:00
def _set_session_env ( self , context : SessionContext ) - > None :
""" Set environment variables for the current session. """
os . environ [ " HERMES_SESSION_PLATFORM " ] = context . source . platform . value
os . environ [ " HERMES_SESSION_CHAT_ID " ] = context . source . chat_id
if context . source . chat_name :
os . environ [ " HERMES_SESSION_CHAT_NAME " ] = context . source . chat_name
2026-03-14 06:06:44 +03:00
if context . source . thread_id :
os . environ [ " HERMES_SESSION_THREAD_ID " ] = str ( context . source . thread_id )
2026-03-14 00:17:04 -07:00
def _clear_session_env ( self ) - > None :
""" Clear session environment variables. """
2026-03-14 06:06:44 +03:00
for var in [ " HERMES_SESSION_PLATFORM " , " HERMES_SESSION_CHAT_ID " , " HERMES_SESSION_CHAT_NAME " , " HERMES_SESSION_THREAD_ID " ] :
2026-03-14 00:17:04 -07:00
if var in os . environ :
del os . environ [ var ]
async def _enrich_message_with_vision (
self ,
user_text : str ,
image_paths : List [ str ] ,
) - > str :
"""
Auto - analyze user - attached images with the vision tool and prepend
the descriptions to the message text .
Each image is analyzed with a general - purpose prompt . The resulting
description * and * the local cache path are injected so the model can :
1. Immediately understand what the user sent ( no extra tool call ) .
2. Re - examine the image with vision_analyze if it needs more detail .
Args :
2026-03-16 05:02:58 -07:00
user_text : The user ' s original caption / message text.
image_paths : List of local file paths to cached images .
2026-03-14 00:17:04 -07:00
Returns :
The enriched message string with vision descriptions prepended .
"""
from tools . vision_tools import vision_analyze_tool
import json as _json
analysis_prompt = (
" Describe everything visible in this image in thorough detail. "
" Include any text, code, data, objects, people, layout, colors, "
" and any other notable visual information. "
)
enriched_parts = [ ]
for path in image_paths :
try :
logger . debug ( " Auto-analyzing user image: %s " , path )
result_json = await vision_analyze_tool (
image_url = path ,
user_prompt = analysis_prompt ,
)
result = _json . loads ( result_json )
if result . get ( " success " ) :
description = result . get ( " analysis " , " " )
enriched_parts . append (
f " [The user sent an image~ Here ' s what I can see: \n { description } ] \n "
f " [If you need a closer look, use vision_analyze with "
f " image_url: { path } ~] "
)
else :
enriched_parts . append (
" [The user sent an image but I couldn ' t quite see it "
" this time (>_<) You can try looking at it yourself "
f " with vision_analyze using image_url: { path } ] "
)
except Exception as e :
logger . error ( " Vision auto-analysis error: %s " , e )
enriched_parts . append (
f " [The user sent an image but something went wrong when I "
f " tried to look at it~ You can try examining it yourself "
f " with vision_analyze using image_url: { path } ] "
)
# Combine: vision descriptions first, then the user's original text
if enriched_parts :
prefix = " \n \n " . join ( enriched_parts )
if user_text :
return f " { prefix } \n \n { user_text } "
return prefix
return user_text
async def _enrich_message_with_transcription (
self ,
user_text : str ,
audio_paths : List [ str ] ,
) - > str :
"""
2026-03-14 22:09:53 -07:00
Auto - transcribe user voice / audio messages using the configured STT provider
2026-03-14 00:17:04 -07:00
and prepend the transcript to the message text .
Args :
user_text : The user ' s original caption / message text.
audio_paths : List of local file paths to cached audio files .
Returns :
The enriched message string with transcriptions prepended .
"""
2026-03-14 22:09:53 -07:00
if not getattr ( self . config , " stt_enabled " , True ) :
2026-03-18 03:17:23 -07:00
disabled_note = " [The user sent voice message(s), but transcription is disabled in config. "
if self . _has_setup_skill ( ) :
disabled_note + = (
" You have a skill called hermes-agent-setup that can help "
" users configure Hermes features including voice, tools, and more. "
)
disabled_note + = " ] "
2026-03-14 22:09:53 -07:00
if user_text :
return f " { disabled_note } \n \n { user_text } "
return disabled_note
2026-03-12 00:26:40 +03:00
from tools . transcription_tools import transcribe_audio , get_stt_model_from_config
2026-03-14 00:17:04 -07:00
import asyncio
2026-03-12 00:26:40 +03:00
stt_model = get_stt_model_from_config ( )
2026-03-12 00:15:38 +03:00
2026-03-14 00:17:04 -07:00
enriched_parts = [ ]
for path in audio_paths :
try :
logger . debug ( " Transcribing user voice: %s " , path )
2026-03-12 00:15:38 +03:00
result = await asyncio . to_thread ( transcribe_audio , path , model = stt_model )
2026-03-14 00:17:04 -07:00
if result [ " success " ] :
transcript = result [ " transcript " ]
enriched_parts . append (
f ' [The user sent a voice message~ '
f ' Here \' s what they said: " { transcript } " ] '
)
else :
error = result . get ( " error " , " unknown error " )
2026-03-15 21:51:40 -07:00
if (
" No STT provider " in error
or error . startswith ( " Neither VOICE_TOOLS_OPENAI_KEY nor OPENAI_API_KEY is set " )
) :
2026-03-18 03:17:23 -07:00
_no_stt_note = (
2026-03-14 00:17:04 -07:00
" [The user sent a voice message but I can ' t listen "
2026-03-18 03:05:17 -07:00
" to it right now — no STT provider is configured. "
" A direct message has already been sent to the user "
2026-03-18 03:17:23 -07:00
" with setup instructions. "
2026-03-14 00:17:04 -07:00
)
2026-03-18 03:17:23 -07:00
if self . _has_setup_skill ( ) :
_no_stt_note + = (
" You have a skill called hermes-agent-setup "
" that can help users configure Hermes features "
" including voice, tools, and more. "
)
_no_stt_note + = " ] "
enriched_parts . append ( _no_stt_note )
2026-03-14 00:17:04 -07:00
else :
enriched_parts . append (
" [The user sent a voice message but I had trouble "
f " transcribing it~ ( { error } )] "
)
except Exception as e :
logger . error ( " Transcription error: %s " , e )
enriched_parts . append (
" [The user sent a voice message but something went wrong "
" when I tried to listen to it~ Let them know!] "
)
if enriched_parts :
prefix = " \n \n " . join ( enriched_parts )
if user_text :
return f " { prefix } \n \n { user_text } "
return prefix
return user_text
async def _run_process_watcher ( self , watcher : dict ) - > None :
"""
Periodically check a background process and push updates to the user .
Runs as an asyncio task . Stays silent when nothing changed .
Auto - removes when the process exits or is killed .
Notification mode ( from ` ` display . background_process_notifications ` ` ) :
- ` ` all ` ` — running - output updates + final message
- ` ` result ` ` — final completion message only
- ` ` error ` ` — final message only when exit code != 0
- ` ` off ` ` — no messages at all
"""
from tools . process_registry import process_registry
session_id = watcher [ " session_id " ]
interval = watcher [ " check_interval " ]
session_key = watcher . get ( " session_key " , " " )
platform_name = watcher . get ( " platform " , " " )
chat_id = watcher . get ( " chat_id " , " " )
2026-03-15 23:01:57 -07:00
thread_id = watcher . get ( " thread_id " , " " )
2026-03-14 00:17:04 -07:00
notify_mode = self . _load_background_notifications_mode ( )
logger . debug ( " Process watcher started: %s (every %s s, notify= %s ) " ,
session_id , interval , notify_mode )
if notify_mode == " off " :
# Still wait for the process to exit so we can log it, but don't
# push any messages to the user.
while True :
await asyncio . sleep ( interval )
session = process_registry . get ( session_id )
if session is None or session . exited :
break
logger . debug ( " Process watcher ended (silent): %s " , session_id )
return
last_output_len = 0
while True :
await asyncio . sleep ( interval )
session = process_registry . get ( session_id )
if session is None :
break
current_output_len = len ( session . output_buffer )
has_new_output = current_output_len > last_output_len
last_output_len = current_output_len
if session . exited :
# Decide whether to notify based on mode
should_notify = (
notify_mode in ( " all " , " result " )
or ( notify_mode == " error " and session . exit_code not in ( 0 , None ) )
)
if should_notify :
new_output = session . output_buffer [ - 1000 : ] if session . output_buffer else " "
message_text = (
f " [Background process { session_id } finished with exit code { session . exit_code } ~ "
f " Here ' s the final output: \n { new_output } ] "
)
adapter = None
for p , a in self . adapters . items ( ) :
if p . value == platform_name :
adapter = a
break
if adapter and chat_id :
try :
2026-03-15 23:01:57 -07:00
send_meta = { " thread_id " : thread_id } if thread_id else None
await adapter . send ( chat_id , message_text , metadata = send_meta )
2026-03-14 00:17:04 -07:00
except Exception as e :
logger . error ( " Watcher delivery error: %s " , e )
break
elif has_new_output and notify_mode == " all " :
# New output available -- deliver status update (only in "all" mode)
new_output = session . output_buffer [ - 500 : ] if session . output_buffer else " "
message_text = (
f " [Background process { session_id } is still running~ "
f " New output: \n { new_output } ] "
)
adapter = None
for p , a in self . adapters . items ( ) :
if p . value == platform_name :
adapter = a
break
if adapter and chat_id :
try :
2026-03-15 23:01:57 -07:00
send_meta = { " thread_id " : thread_id } if thread_id else None
await adapter . send ( chat_id , message_text , metadata = send_meta )
2026-03-14 00:17:04 -07:00
except Exception as e :
logger . error ( " Watcher delivery error: %s " , e )
logger . debug ( " Process watcher ended: %s " , session_id )
fix(gateway): cap interrupt recursion depth to prevent resource exhaustion (#816)
* fix: prevent infinite 400 failure loop on context overflow (#1630)
When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message. This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error. Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.
Three-layer fix:
1. run_agent.py — Fallback heuristic: when a 400 error has a very short
generic message AND the session is large (>40% of context or >80
messages), treat it as a probable context overflow and trigger
compression instead of aborting.
2. run_agent.py + gateway/run.py — Don't persist failed messages:
when the agent returns failed=True before generating any response,
skip writing the user's message to the transcript/DB. This prevents
the session from growing on each failure.
3. gateway/run.py — Smarter error messages: detect context-overflow
failures and suggest /compact or /reset specifically, instead of a
generic 'try again' that will fail identically.
* fix(skills): detect prompt injection patterns and block cache file reads
Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):
1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
(index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
was the original injection vector — untrusted skill descriptions
in the catalog contained adversarial text that the model executed.
2. skill_view: warns when skills are loaded from outside the trusted
~/.hermes/skills/ directory, and detects common injection patterns
in skill content ("ignore previous instructions", "<system>", etc.).
Cherry-picked from PR #1562 by ygd58.
* fix(tools): chunk long messages in send_message_tool before dispatch (#1552)
Long messages sent via send_message tool or cron delivery silently
failed when exceeding platform limits. Gateway adapters handle this
via truncate_message(), but the standalone senders in send_message_tool
bypassed that entirely.
- Apply truncate_message() chunking in _send_to_platform() before
dispatching to individual platform senders
- Remove naive message[i:i+2000] character split in _send_discord()
in favor of centralized smart splitting
- Attach media files to last chunk only for Telegram
- Add regression tests for chunking and media placement
Cherry-picked from PR #1557 by llbn.
* fix(approval): show full command in dangerous command approval (#1553)
Previously the command was truncated to 80 chars in CLI (with a
[v]iew full option), 500 chars in Discord embeds, and missing entirely
in Telegram/Slack approval messages. Now the full command is always
displayed everywhere:
- CLI: removed 80-char truncation and [v]iew full menu option
- Gateway (TG/Slack): approval_required message includes full command
in a code block
- Discord: embed shows full command up to 4096-char limit
- Windows: skip SIGALRM-based test timeout (Unix-only)
- Updated tests: replaced view-flow tests with direct approval tests
Cherry-picked from PR #1566 by crazywriter1.
* fix(cli): flush stdout during agent loop to prevent macOS display freeze (#1624)
The interrupt polling loop in chat() waited on the queue without
invalidating the prompt_toolkit renderer. On macOS, the StdoutProxy
buffer only flushed on input events, causing the CLI to appear frozen
during tool execution until the user typed a key.
Fix: call _invalidate() on each queue timeout (every ~100ms, throttled
to 150ms) to force the renderer to flush buffered agent output.
* fix(claw): warn when API keys are skipped during OpenClaw migration (#1580)
When --migrate-secrets is not passed (the default), API keys like
OPENROUTER_API_KEY are silently skipped with no warning. Users don't
realize their keys weren't migrated until the agent fails to connect.
Add a post-migration warning with actionable instructions: either
re-run with --migrate-secrets or add the key manually via
hermes config set.
Cherry-picked from PR #1593 by ygd58.
* fix(security): block sandbox backend creds from subprocess env (#1264)
Add Modal and Daytona sandbox credentials to the subprocess env
blocklist so they're not leaked to agent terminal sessions via
printenv/env.
Cherry-picked from PR #1571 by ygd58.
* fix(gateway): cap interrupt recursion depth to prevent resource exhaustion (#816)
When a user sends multiple messages while the agent keeps failing,
_run_agent() calls itself recursively with no depth limit. This can
exhaust stack/memory if the agent is in a failure loop.
Add _MAX_INTERRUPT_DEPTH = 3. When exceeded, the pending message is
logged and the current result is returned instead of recursing deeper.
The log handler duplication bug described in #816 was already fixed
separately (AIAgent.__init__ deduplicates handlers).
---------
Co-authored-by: buray <ygd58@users.noreply.github.com>
Co-authored-by: lbn <llbn@users.noreply.github.com>
Co-authored-by: crazywriter1 <53251494+crazywriter1@users.noreply.github.com>
2026-03-17 02:23:07 -07:00
_MAX_INTERRUPT_DEPTH = 3 # Cap recursive interrupt handling (#816)
feat(gateway): cache AIAgent per session for prompt caching
The gateway created a fresh AIAgent per message, rebuilding the system
prompt (including memory, skills, context files) every turn. This broke
prompt prefix caching — providers like Anthropic charge ~10x more for
uncached prefixes.
Now caches AIAgent instances per session_key with a config signature.
The cached agent is reused across messages in the same session,
preserving the frozen system prompt and tool schemas. Cache is
invalidated when:
- Config changes (model, provider, toolsets, reasoning, ephemeral
prompt) — detected via signature mismatch
- /new, /reset, /clear — explicit session reset
- /model — global model change clears all cached agents
- /reasoning — global reasoning change clears all cached agents
Per-message state (callbacks, stream consumers, progress queues) is
set on the agent instance before each run_conversation() call.
This matches CLI behavior where a single AIAgent lives across all turns
in a session, with _cached_system_prompt built once and reused.
2026-03-21 13:07:08 -07:00
@staticmethod
def _agent_config_signature (
model : str ,
runtime : dict ,
enabled_toolsets : list ,
ephemeral_prompt : str ,
) - > str :
""" Compute a stable string key from agent config values.
When this signature changes between messages , the cached AIAgent is
discarded and rebuilt . When it stays the same , the cached agent is
reused — preserving the frozen system prompt and tool schemas for
prompt cache hits .
"""
import hashlib , json as _j
2026-03-26 13:19:43 -07:00
# Fingerprint the FULL credential string instead of using a short
# prefix. OAuth/JWT-style tokens frequently share a common prefix
# (e.g. "eyJhbGci"), which can cause false cache hits across auth
# switches if only the first few characters are considered.
_api_key = str ( runtime . get ( " api_key " , " " ) or " " )
_api_key_fingerprint = hashlib . sha256 ( _api_key . encode ( ) ) . hexdigest ( ) if _api_key else " "
feat(gateway): cache AIAgent per session for prompt caching
The gateway created a fresh AIAgent per message, rebuilding the system
prompt (including memory, skills, context files) every turn. This broke
prompt prefix caching — providers like Anthropic charge ~10x more for
uncached prefixes.
Now caches AIAgent instances per session_key with a config signature.
The cached agent is reused across messages in the same session,
preserving the frozen system prompt and tool schemas. Cache is
invalidated when:
- Config changes (model, provider, toolsets, reasoning, ephemeral
prompt) — detected via signature mismatch
- /new, /reset, /clear — explicit session reset
- /model — global model change clears all cached agents
- /reasoning — global reasoning change clears all cached agents
Per-message state (callbacks, stream consumers, progress queues) is
set on the agent instance before each run_conversation() call.
This matches CLI behavior where a single AIAgent lives across all turns
in a session, with _cached_system_prompt built once and reused.
2026-03-21 13:07:08 -07:00
blob = _j . dumps (
[
model ,
2026-03-26 13:19:43 -07:00
_api_key_fingerprint ,
feat(gateway): cache AIAgent per session for prompt caching
The gateway created a fresh AIAgent per message, rebuilding the system
prompt (including memory, skills, context files) every turn. This broke
prompt prefix caching — providers like Anthropic charge ~10x more for
uncached prefixes.
Now caches AIAgent instances per session_key with a config signature.
The cached agent is reused across messages in the same session,
preserving the frozen system prompt and tool schemas. Cache is
invalidated when:
- Config changes (model, provider, toolsets, reasoning, ephemeral
prompt) — detected via signature mismatch
- /new, /reset, /clear — explicit session reset
- /model — global model change clears all cached agents
- /reasoning — global reasoning change clears all cached agents
Per-message state (callbacks, stream consumers, progress queues) is
set on the agent instance before each run_conversation() call.
This matches CLI behavior where a single AIAgent lives across all turns
in a session, with _cached_system_prompt built once and reused.
2026-03-21 13:07:08 -07:00
runtime . get ( " base_url " , " " ) ,
runtime . get ( " provider " , " " ) ,
runtime . get ( " api_mode " , " " ) ,
sorted ( enabled_toolsets ) if enabled_toolsets else [ ] ,
# reasoning_config excluded — it's set per-message on the
# cached agent and doesn't affect system prompt or tools.
ephemeral_prompt or " " ,
] ,
sort_keys = True ,
default = str ,
)
return hashlib . sha256 ( blob . encode ( ) ) . hexdigest ( ) [ : 16 ]
def _evict_cached_agent ( self , session_key : str ) - > None :
""" Remove a cached agent for a session (called on /new, /model, etc). """
_lock = getattr ( self , " _agent_cache_lock " , None )
if _lock :
with _lock :
self . _agent_cache . pop ( session_key , None )
2026-03-14 00:17:04 -07:00
async def _run_agent (
self ,
message : str ,
context_prompt : str ,
history : List [ Dict [ str , Any ] ] ,
source : SessionSource ,
session_id : str ,
fix(gateway): cap interrupt recursion depth to prevent resource exhaustion (#816)
* fix: prevent infinite 400 failure loop on context overflow (#1630)
When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message. This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error. Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.
Three-layer fix:
1. run_agent.py — Fallback heuristic: when a 400 error has a very short
generic message AND the session is large (>40% of context or >80
messages), treat it as a probable context overflow and trigger
compression instead of aborting.
2. run_agent.py + gateway/run.py — Don't persist failed messages:
when the agent returns failed=True before generating any response,
skip writing the user's message to the transcript/DB. This prevents
the session from growing on each failure.
3. gateway/run.py — Smarter error messages: detect context-overflow
failures and suggest /compact or /reset specifically, instead of a
generic 'try again' that will fail identically.
* fix(skills): detect prompt injection patterns and block cache file reads
Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):
1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
(index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
was the original injection vector — untrusted skill descriptions
in the catalog contained adversarial text that the model executed.
2. skill_view: warns when skills are loaded from outside the trusted
~/.hermes/skills/ directory, and detects common injection patterns
in skill content ("ignore previous instructions", "<system>", etc.).
Cherry-picked from PR #1562 by ygd58.
* fix(tools): chunk long messages in send_message_tool before dispatch (#1552)
Long messages sent via send_message tool or cron delivery silently
failed when exceeding platform limits. Gateway adapters handle this
via truncate_message(), but the standalone senders in send_message_tool
bypassed that entirely.
- Apply truncate_message() chunking in _send_to_platform() before
dispatching to individual platform senders
- Remove naive message[i:i+2000] character split in _send_discord()
in favor of centralized smart splitting
- Attach media files to last chunk only for Telegram
- Add regression tests for chunking and media placement
Cherry-picked from PR #1557 by llbn.
* fix(approval): show full command in dangerous command approval (#1553)
Previously the command was truncated to 80 chars in CLI (with a
[v]iew full option), 500 chars in Discord embeds, and missing entirely
in Telegram/Slack approval messages. Now the full command is always
displayed everywhere:
- CLI: removed 80-char truncation and [v]iew full menu option
- Gateway (TG/Slack): approval_required message includes full command
in a code block
- Discord: embed shows full command up to 4096-char limit
- Windows: skip SIGALRM-based test timeout (Unix-only)
- Updated tests: replaced view-flow tests with direct approval tests
Cherry-picked from PR #1566 by crazywriter1.
* fix(cli): flush stdout during agent loop to prevent macOS display freeze (#1624)
The interrupt polling loop in chat() waited on the queue without
invalidating the prompt_toolkit renderer. On macOS, the StdoutProxy
buffer only flushed on input events, causing the CLI to appear frozen
during tool execution until the user typed a key.
Fix: call _invalidate() on each queue timeout (every ~100ms, throttled
to 150ms) to force the renderer to flush buffered agent output.
* fix(claw): warn when API keys are skipped during OpenClaw migration (#1580)
When --migrate-secrets is not passed (the default), API keys like
OPENROUTER_API_KEY are silently skipped with no warning. Users don't
realize their keys weren't migrated until the agent fails to connect.
Add a post-migration warning with actionable instructions: either
re-run with --migrate-secrets or add the key manually via
hermes config set.
Cherry-picked from PR #1593 by ygd58.
* fix(security): block sandbox backend creds from subprocess env (#1264)
Add Modal and Daytona sandbox credentials to the subprocess env
blocklist so they're not leaked to agent terminal sessions via
printenv/env.
Cherry-picked from PR #1571 by ygd58.
* fix(gateway): cap interrupt recursion depth to prevent resource exhaustion (#816)
When a user sends multiple messages while the agent keeps failing,
_run_agent() calls itself recursively with no depth limit. This can
exhaust stack/memory if the agent is in a failure loop.
Add _MAX_INTERRUPT_DEPTH = 3. When exceeded, the pending message is
logged and the current result is returned instead of recursing deeper.
The log handler duplication bug described in #816 was already fixed
separately (AIAgent.__init__ deduplicates handlers).
---------
Co-authored-by: buray <ygd58@users.noreply.github.com>
Co-authored-by: lbn <llbn@users.noreply.github.com>
Co-authored-by: crazywriter1 <53251494+crazywriter1@users.noreply.github.com>
2026-03-17 02:23:07 -07:00
session_key : str = None ,
_interrupt_depth : int = 0 ,
2026-03-25 15:51:15 -07:00
event_message_id : Optional [ str ] = None ,
2026-03-14 00:17:04 -07:00
) - > Dict [ str , Any ] :
"""
Run the agent with the given message and context .
Returns the full result dict from run_conversation , including :
- " final_response " : str ( the text to send back )
- " messages " : list ( full conversation including tool calls )
- " api_calls " : int
- " completed " : bool
This is run in a thread pool to not block the event loop .
Supports interruption via new messages .
"""
from run_agent import AIAgent
import queue
2026-03-26 13:39:41 -07:00
user_config = _load_gateway_config ( )
platform_key = _platform_config_key ( source . platform )
from hermes_cli . tools_config import _get_platform_tools
enabled_toolsets = sorted ( _get_platform_tools ( user_config , platform_key ) )
2026-03-11 20:16:57 +03:00
2026-03-14 00:17:04 -07:00
# Tool progress mode from config.yaml: "all", "new", "verbose", "off"
2026-03-26 17:58:50 -07:00
# Falls back to env vars for backward compatibility.
# YAML 1.1 parses bare `off` as boolean False — normalise before
# the `or` chain so it doesn't silently fall through to "all".
_raw_tp = user_config . get ( " display " , { } ) . get ( " tool_progress " )
if _raw_tp is False :
_raw_tp = " off "
2026-03-14 00:17:04 -07:00
progress_mode = (
2026-03-26 17:58:50 -07:00
_raw_tp
2026-03-14 00:17:04 -07:00
or os . getenv ( " HERMES_TOOL_PROGRESS_MODE " )
or " all "
)
tool_progress_enabled = progress_mode != " off "
# Queue for progress messages (thread-safe)
progress_queue = queue . Queue ( ) if tool_progress_enabled else None
last_tool = [ None ] # Mutable container for tracking in closure
last_progress_msg = [ None ] # Track last message for dedup
repeat_count = [ 0 ] # How many times the same message repeated
def progress_callback ( tool_name : str , preview : str = None , args : dict = None ) :
""" Callback invoked by agent when a tool is called. """
if not progress_queue :
return
# "new" mode: only report when tool changes
if progress_mode == " new " and tool_name == last_tool [ 0 ] :
return
last_tool [ 0 ] = tool_name
# Build progress message with primary argument preview
2026-03-15 20:21:21 -07:00
from agent . display import get_tool_emoji
emoji = get_tool_emoji ( tool_name , default = " ⚙️ " )
2026-03-14 00:17:04 -07:00
# Verbose mode: show detailed arguments
if progress_mode == " verbose " and args :
import json as _json
args_str = _json . dumps ( args , ensure_ascii = False , default = str )
if len ( args_str ) > 200 :
args_str = args_str [ : 197 ] + " ... "
msg = f " { emoji } { tool_name } ( { list ( args . keys ( ) ) } ) \n { args_str } "
progress_queue . put ( msg )
return
if preview :
# Truncate preview to keep messages clean
if len ( preview ) > 80 :
preview = preview [ : 77 ] + " ... "
msg = f " { emoji } { tool_name } : \" { preview } \" "
else :
msg = f " { emoji } { tool_name } ... "
# Dedup: collapse consecutive identical progress messages.
# Common with execute_code where models iterate with the same
# code (same boilerplate imports → identical previews).
if msg == last_progress_msg [ 0 ] :
repeat_count [ 0 ] + = 1
# Update the last line in progress_lines with a counter
# via a special "dedup" queue message.
progress_queue . put ( ( " __dedup__ " , msg , repeat_count [ 0 ] ) )
return
last_progress_msg [ 0 ] = msg
repeat_count [ 0 ] = 0
progress_queue . put ( msg )
# Background task to send progress messages
2026-03-27 22:37:53 -07:00
# Accumulates tool lines into a single message that gets edited.
#
# Threading metadata is platform-specific:
# - Slack DM threading needs event_message_id fallback (reply thread)
# - Telegram uses message_thread_id only for forum topics; passing a
# normal DM/group message id as thread_id causes send failures
# - Other platforms should use explicit source.thread_id only
if source . platform == Platform . SLACK :
_progress_thread_id = source . thread_id or event_message_id
else :
_progress_thread_id = source . thread_id
2026-03-25 15:51:15 -07:00
_progress_metadata = { " thread_id " : _progress_thread_id } if _progress_thread_id else None
2026-03-14 00:17:04 -07:00
async def send_progress_messages ( ) :
if not progress_queue :
return
adapter = self . adapters . get ( source . platform )
if not adapter :
return
progress_lines = [ ] # Accumulated tool lines
progress_msg_id = None # ID of the progress message to edit
can_edit = True # False once an edit fails (platform doesn't support it)
while True :
try :
raw = progress_queue . get_nowait ( )
# Handle dedup messages: update last line with repeat counter
if isinstance ( raw , tuple ) and len ( raw ) == 3 and raw [ 0 ] == " __dedup__ " :
_ , base_msg , count = raw
if progress_lines :
progress_lines [ - 1 ] = f " { base_msg } (× { count + 1 } ) "
msg = progress_lines [ - 1 ] if progress_lines else base_msg
else :
msg = raw
progress_lines . append ( msg )
if can_edit and progress_msg_id is not None :
# Try to edit the existing progress message
full_text = " \n " . join ( progress_lines )
result = await adapter . edit_message (
chat_id = source . chat_id ,
message_id = progress_msg_id ,
content = full_text ,
)
if not result . success :
# Platform doesn't support editing — stop trying,
# send just this new line as a separate message
can_edit = False
await adapter . send ( chat_id = source . chat_id , content = msg , metadata = _progress_metadata )
else :
if can_edit :
# First tool: send all accumulated text as new message
full_text = " \n " . join ( progress_lines )
result = await adapter . send ( chat_id = source . chat_id , content = full_text , metadata = _progress_metadata )
else :
# Editing unsupported: send just this line
result = await adapter . send ( chat_id = source . chat_id , content = msg , metadata = _progress_metadata )
if result . success and result . message_id :
progress_msg_id = result . message_id
# Restore typing indicator
await asyncio . sleep ( 0.3 )
await adapter . send_typing ( source . chat_id , metadata = _progress_metadata )
except queue . Empty :
await asyncio . sleep ( 0.3 )
except asyncio . CancelledError :
# Drain remaining queued messages
while not progress_queue . empty ( ) :
try :
raw = progress_queue . get_nowait ( )
if isinstance ( raw , tuple ) and len ( raw ) == 3 and raw [ 0 ] == " __dedup__ " :
_ , base_msg , count = raw
if progress_lines :
progress_lines [ - 1 ] = f " { base_msg } (× { count + 1 } ) "
else :
progress_lines . append ( raw )
except Exception :
break
# Final edit with all remaining tools (only if editing works)
if can_edit and progress_lines and progress_msg_id :
full_text = " \n " . join ( progress_lines )
try :
await adapter . edit_message (
chat_id = source . chat_id ,
message_id = progress_msg_id ,
content = full_text ,
)
except Exception :
pass
return
except Exception as e :
logger . error ( " Progress message error: %s " , e )
await asyncio . sleep ( 1 )
# We need to share the agent instance for interrupt support
agent_holder = [ None ] # Mutable container for the agent instance
result_holder = [ None ] # Mutable container for the result
tools_holder = [ None ] # Mutable container for the tool definitions
feat(gateway): streaming token delivery — StreamingConfig, GatewayStreamConsumer, already_sent
Stage 3 of streaming support. Gateway now streams tokens to messaging platforms:
- StreamingConfig dataclass (enabled, transport, edit_interval, buffer_threshold, cursor)
on GatewayConfig with from_dict/to_dict serialization
- GatewayStreamConsumer: async queue-based consumer that progressively edits
a single message on the target platform (edit transport)
- on_delta() → queue → run() async task → send_or_edit() with rate limiting
- already_sent propagation: when streaming delivered the response, handler
returns None so base adapter skips duplicate send()
- stream_delta_callback wired into AIAgent constructor in _run_agent
- Consumer lifecycle: started as asyncio task, awaited with timeout in finally
Config (config.yaml):
streaming:
enabled: true
transport: edit # progressive editMessageText
edit_interval: 0.3 # seconds between edits
buffer_threshold: 40 # chars before forcing flush
cursor: ' ▉'
Credit: jobless0x (#774, #1312), OutThisLife (#798), clicksingh (#697).
2026-03-16 05:52:42 -07:00
stream_consumer_holder = [ None ] # Mutable container for stream consumer
2026-03-14 00:17:04 -07:00
# Bridge sync step_callback → async hooks.emit for agent:step events
_loop_for_step = asyncio . get_event_loop ( )
_hooks_ref = self . hooks
def _step_callback_sync ( iteration : int , tool_names : list ) - > None :
try :
asyncio . run_coroutine_threadsafe (
_hooks_ref . emit ( " agent:step " , {
" platform " : source . platform . value if source . platform else " " ,
" user_id " : source . user_id ,
" session_id " : session_id ,
" iteration " : iteration ,
" tool_names " : tool_names ,
} ) ,
_loop_for_step ,
)
except Exception as _e :
logger . debug ( " agent:step hook error: %s " , _e )
2026-03-20 08:37:36 -07:00
# Bridge sync status_callback → async adapter.send for context pressure
_status_adapter = self . adapters . get ( source . platform )
_status_chat_id = source . chat_id
2026-03-25 15:51:15 -07:00
_status_thread_metadata = { " thread_id " : _progress_thread_id } if _progress_thread_id else None
2026-03-20 08:37:36 -07:00
def _status_callback_sync ( event_type : str , message : str ) - > None :
if not _status_adapter :
return
try :
asyncio . run_coroutine_threadsafe (
_status_adapter . send (
_status_chat_id ,
message ,
metadata = _status_thread_metadata ,
) ,
_loop_for_step ,
)
except Exception as _e :
logger . debug ( " status_callback error ( %s ): %s " , event_type , _e )
2026-03-14 00:17:04 -07:00
def run_sync ( ) :
# Pass session_key to process registry via env var so background
# processes can be mapped back to this gateway session
os . environ [ " HERMES_SESSION_KEY " ] = session_key or " "
# Read from env var or use default (same as CLI)
max_iterations = int ( os . getenv ( " HERMES_MAX_ITERATIONS " , " 90 " ) )
# Map platform enum to the platform hint key the agent understands.
# Platform.LOCAL ("local") maps to "cli"; others pass through as-is.
platform_key = " cli " if source . platform == Platform . LOCAL else source . platform . value
# Combine platform context with user-configured ephemeral system prompt
combined_ephemeral = context_prompt or " "
if self . _ephemeral_system_prompt :
combined_ephemeral = ( combined_ephemeral + " \n \n " + self . _ephemeral_system_prompt ) . strip ( )
# Re-read .env and config for fresh credentials (gateway is long-lived,
# keys may change without restart).
try :
load_dotenv ( _env_path , override = True , encoding = " utf-8 " )
except UnicodeDecodeError :
load_dotenv ( _env_path , override = True , encoding = " latin-1 " )
except Exception :
pass
2026-03-26 13:39:41 -07:00
model = _resolve_gateway_model ( user_config )
2026-03-14 00:17:04 -07:00
try :
runtime_kwargs = _resolve_runtime_agent_kwargs ( )
except Exception as exc :
return {
" final_response " : f " ⚠️ Provider authentication failed: { exc } " ,
" messages " : [ ] ,
" api_calls " : 0 ,
" tools " : [ ] ,
}
pr = self . _provider_routing
honcho_manager , honcho_config = self . _get_or_create_gateway_honcho ( session_key )
2026-03-11 22:12:11 +08:00
reasoning_config = self . _load_reasoning_config ( )
self . _reasoning_config = reasoning_config
feat(gateway): streaming token delivery — StreamingConfig, GatewayStreamConsumer, already_sent
Stage 3 of streaming support. Gateway now streams tokens to messaging platforms:
- StreamingConfig dataclass (enabled, transport, edit_interval, buffer_threshold, cursor)
on GatewayConfig with from_dict/to_dict serialization
- GatewayStreamConsumer: async queue-based consumer that progressively edits
a single message on the target platform (edit transport)
- on_delta() → queue → run() async task → send_or_edit() with rate limiting
- already_sent propagation: when streaming delivered the response, handler
returns None so base adapter skips duplicate send()
- stream_delta_callback wired into AIAgent constructor in _run_agent
- Consumer lifecycle: started as asyncio task, awaited with timeout in finally
Config (config.yaml):
streaming:
enabled: true
transport: edit # progressive editMessageText
edit_interval: 0.3 # seconds between edits
buffer_threshold: 40 # chars before forcing flush
cursor: ' ▉'
Credit: jobless0x (#774, #1312), OutThisLife (#798), clicksingh (#697).
2026-03-16 05:52:42 -07:00
# Set up streaming consumer if enabled
_stream_consumer = None
_stream_delta_cb = None
_scfg = getattr ( getattr ( self , ' config ' , None ) , ' streaming ' , None )
if _scfg is None :
from gateway . config import StreamingConfig
_scfg = StreamingConfig ( )
if _scfg . enabled and _scfg . transport != " off " :
try :
from gateway . stream_consumer import GatewayStreamConsumer , StreamConsumerConfig
_adapter = self . adapters . get ( source . platform )
if _adapter :
_consumer_cfg = StreamConsumerConfig (
edit_interval = _scfg . edit_interval ,
buffer_threshold = _scfg . buffer_threshold ,
cursor = _scfg . cursor ,
)
_stream_consumer = GatewayStreamConsumer (
adapter = _adapter ,
chat_id = source . chat_id ,
config = _consumer_cfg ,
2026-03-25 15:51:15 -07:00
metadata = { " thread_id " : _progress_thread_id } if _progress_thread_id else None ,
feat(gateway): streaming token delivery — StreamingConfig, GatewayStreamConsumer, already_sent
Stage 3 of streaming support. Gateway now streams tokens to messaging platforms:
- StreamingConfig dataclass (enabled, transport, edit_interval, buffer_threshold, cursor)
on GatewayConfig with from_dict/to_dict serialization
- GatewayStreamConsumer: async queue-based consumer that progressively edits
a single message on the target platform (edit transport)
- on_delta() → queue → run() async task → send_or_edit() with rate limiting
- already_sent propagation: when streaming delivered the response, handler
returns None so base adapter skips duplicate send()
- stream_delta_callback wired into AIAgent constructor in _run_agent
- Consumer lifecycle: started as asyncio task, awaited with timeout in finally
Config (config.yaml):
streaming:
enabled: true
transport: edit # progressive editMessageText
edit_interval: 0.3 # seconds between edits
buffer_threshold: 40 # chars before forcing flush
cursor: ' ▉'
Credit: jobless0x (#774, #1312), OutThisLife (#798), clicksingh (#697).
2026-03-16 05:52:42 -07:00
)
_stream_delta_cb = _stream_consumer . on_delta
stream_consumer_holder [ 0 ] = _stream_consumer
except Exception as _sc_err :
logger . debug ( " Could not set up stream consumer: %s " , _sc_err )
fix: hermes update causes dual gateways on macOS (launchd) (#1567)
* feat: add optional smart model routing
Add a conservative cheap-vs-strong routing option that can send very short/simple turns to a cheaper model across providers while keeping the primary model for complex work. Wire it through CLI, gateway, and cron, and document the config.yaml workflow.
* fix(gateway): remove recursive ExecStop from systemd units, extend TimeoutStopSec to 60s
* fix(gateway): avoid recursive ExecStop in user systemd unit
* fix: extend ExecStop removal and TimeoutStopSec=60 to system unit
The cherry-picked PR #1448 fix only covered the user systemd unit.
The system unit had the same TimeoutStopSec=15 and could benefit
from the same 60s timeout for clean shutdown. Also adds a regression
test for the system unit.
---------
Co-authored-by: Ninja <ninja@local>
* feat(skills): add blender-mcp optional skill for 3D modeling
Control a running Blender instance from Hermes via socket connection
to the blender-mcp addon (port 9876). Supports creating 3D objects,
materials, animations, and running arbitrary bpy code.
Placed in optional-skills/ since it requires Blender 4.3+ desktop
with a third-party addon manually started each session.
* feat(acp): support slash commands in ACP adapter (#1532)
Adds /help, /model, /tools, /context, /reset, /compact, /version
to the ACP adapter (VS Code, Zed, JetBrains). Commands are handled
directly in the server without instantiating the TUI — each command
queries agent/session state and returns plain text.
Unrecognized /commands fall through to the LLM as normal messages.
/model uses detect_provider_for_model() for auto-detection when
switching models, matching the CLI and gateway behavior.
Fixes #1402
* fix(logging): improve error logging in session search tool (#1533)
* fix(gateway): restart on retryable startup failures (#1517)
* feat(email): add skip_attachments option via config.yaml
* feat(email): add skip_attachments option via config.yaml
Adds a config.yaml-driven option to skip email attachments in the
gateway email adapter. Useful for malware protection and bandwidth
savings.
Configure in config.yaml:
platforms:
email:
skip_attachments: true
Based on PR #1521 by @an420eth, changed from env var to config.yaml
(via PlatformConfig.extra) to match the project's config-first pattern.
* docs: document skip_attachments option for email adapter
* fix(telegram): retry on transient TLS failures during connect and send
Add exponential-backoff retry (3 attempts) around initialize() to
handle transient TLS resets during gateway startup. Also catches
TimedOut and OSError in addition to NetworkError.
Add exponential-backoff retry (3 attempts) around send_message() for
NetworkError during message delivery, wrapping the existing Markdown
fallback logic.
Both imports are guarded with try/except ImportError for test
environments where telegram is mocked.
Based on PR #1527 by cmd8. Closes #1526.
* feat: permissive block_anchor thresholds and unicode normalization (#1539)
Salvaged from PR #1528 by an420eth. Closes #517.
Improves _strategy_block_anchor in fuzzy_match.py:
- Add unicode normalization (smart quotes, em/en-dashes, ellipsis,
non-breaking spaces → ASCII) so LLM-produced unicode artifacts
don't break anchor line matching
- Lower thresholds: 0.10 for unique matches (was 0.70), 0.30 for
multiple candidates — if first/last lines match exactly, the
block is almost certainly correct
- Use original (non-normalized) content for offset calculation to
preserve correct character positions
Tested: 3 new scenarios fixed (em-dash anchors, non-breaking space
anchors, very-low-similarity unique matches), zero regressions on
all 9 existing fuzzy match tests.
Co-authored-by: an420eth <an420eth@users.noreply.github.com>
* feat(cli): add file path autocomplete in the input prompt (#1545)
When typing a path-like token (./ ../ ~/ / or containing /),
the CLI now shows filesystem completions in the dropdown menu.
Directories show a trailing slash and 'dir' label; files show
their size. Completions are case-insensitive and capped at 30
entries.
Triggered by tokens like:
edit ./src/ma → shows ./src/main.py, ./src/manifest.json, ...
check ~/doc → shows ~/docs/, ~/documents/, ...
read /etc/hos → shows /etc/hosts, /etc/hostname, ...
open tools/reg → shows tools/registry.py
Slash command autocomplete (/help, /model, etc.) is unaffected —
it still triggers when the input starts with /.
Inspired by OpenCode PR #145 (file path completion menu).
Implementation:
- hermes_cli/commands.py: _extract_path_word() detects path-like
tokens, _path_completions() yields filesystem Completions with
size labels, get_completions() routes to paths vs slash commands
- tests/hermes_cli/test_path_completion.py: 26 tests covering
path extraction, prefix filtering, directory markers, home
expansion, case-insensitivity, integration with slash commands
* feat(privacy): redact PII from LLM context when privacy.redact_pii is enabled
Add privacy.redact_pii config option (boolean, default false). When
enabled, the gateway redacts personally identifiable information from
the system prompt before sending it to the LLM provider:
- Phone numbers (user IDs on WhatsApp/Signal) → hashed to user_<sha256>
- User IDs → hashed to user_<sha256>
- Chat IDs → numeric portion hashed, platform prefix preserved
- Home channel IDs → hashed
- Names/usernames → NOT affected (user-chosen, publicly visible)
Hashes are deterministic (same user → same hash) so the model can
still distinguish users in group chats. Routing and delivery use
the original values internally — redaction only affects LLM context.
Inspired by OpenClaw PR #47959.
* fix(privacy): skip PII redaction on Discord/Slack (mentions need real IDs)
Discord uses <@user_id> for mentions and Slack uses <@U12345> — the LLM
needs the real ID to tag users. Redaction now only applies to WhatsApp,
Signal, and Telegram where IDs are pure routing metadata.
Add 4 platform-specific tests covering Discord, WhatsApp, Signal, Slack.
* feat: smart approvals + /stop command (inspired by OpenAI Codex)
* feat: smart approvals — LLM-based risk assessment for dangerous commands
Adds a 'smart' approval mode that uses the auxiliary LLM to assess
whether a flagged command is genuinely dangerous or a false positive,
auto-approving low-risk commands without prompting the user.
Inspired by OpenAI Codex's Smart Approvals guardian subagent
(openai/codex#13860).
Config (config.yaml):
approvals:
mode: manual # manual (default), smart, off
Modes:
- manual — current behavior, always prompt the user
- smart — aux LLM evaluates risk: APPROVE (auto-allow), DENY (block),
or ESCALATE (fall through to manual prompt)
- off — skip all approval prompts (equivalent to --yolo)
When smart mode auto-approves, the pattern gets session-level approval
so subsequent uses of the same pattern don't trigger another LLM call.
When it denies, the command is blocked without user prompt. When
uncertain, it escalates to the normal manual approval flow.
The LLM prompt is carefully scoped: it sees only the command text and
the flagged reason, assesses actual risk vs false positive, and returns
a single-word verdict.
* feat: make smart approval model configurable via config.yaml
Adds auxiliary.approval section to config.yaml with the same
provider/model/base_url/api_key pattern as other aux tasks (vision,
web_extract, compression, etc.).
Config:
auxiliary:
approval:
provider: auto
model: '' # fast/cheap model recommended
base_url: ''
api_key: ''
Bridged to env vars in both CLI and gateway paths so the aux client
picks them up automatically.
* feat: add /stop command to kill all background processes
Adds a /stop slash command that kills all running background processes
at once. Currently users have to process(list) then process(kill) for
each one individually.
Inspired by OpenAI Codex's separation of interrupt (Ctrl+C stops current
turn) from /stop (cleans up background processes). See openai/codex#14602.
Ctrl+C continues to only interrupt the active agent turn — background
dev servers, watchers, etc. are preserved. /stop is the explicit way
to clean them all up.
* feat: first-class plugin architecture + hide status bar cost by default (#1544)
The persistent status bar now shows context %, token counts, and
duration but NOT $ cost by default. Cost display is opt-in via:
display:
show_cost: true
in config.yaml, or: hermes config set display.show_cost true
The /usage command still shows full cost breakdown since the user
explicitly asked for it — this only affects the always-visible bar.
Status bar without cost:
⚕ claude-sonnet-4 │ 12K/200K │ 6% │ 15m
Status bar with show_cost: true:
⚕ claude-sonnet-4 │ 12K/200K │ 6% │ $0.06 │ 15m
* feat: improve memory prioritization + aggressive skill updates (inspired by OpenAI Codex)
* feat: improve memory prioritization — user preferences over procedural knowledge
Inspired by OpenAI Codex's memory prompt improvements (openai/codex#14493)
which focus memory writes on user preferences and recurring patterns
rather than procedural task details.
Key insight: 'Optimize for reducing future user steering — the most
valuable memory prevents the user from having to repeat themselves.'
Changes:
- MEMORY_GUIDANCE (prompt_builder.py): added prioritization hierarchy
and the core principle about reducing user steering
- MEMORY_SCHEMA (memory_tool.py): reordered WHEN TO SAVE list to put
corrections first, added explicit PRIORITY guidance
- Memory nudge (run_agent.py): now asks specifically about preferences,
corrections, and workflow patterns instead of generic 'anything'
- Memory flush (run_agent.py): now instructs to prioritize user
preferences and corrections over task-specific details
* feat: more aggressive skill creation and update prompting
Press harder on skill updates — the agent should proactively patch
skills when it encounters issues during use, not wait to be asked.
Changes:
- SKILLS_GUIDANCE: 'consider saving' → 'save'; added explicit instruction
to patch skills immediately when found outdated/wrong
- Skills header: added instruction to update loaded skills before finishing
if they had missing steps or wrong commands
- Skill nudge: more assertive ('save the approach' not 'consider saving'),
now also prompts for updating existing skills used in the task
- Skill nudge interval: lowered default from 15 to 10 iterations
- skill_manage schema: added 'patch it immediately' to update triggers
* feat: first-class plugin architecture (#1555)
Plugin system for extending Hermes with custom tools, hooks, and
integrations — no source code changes required.
Core system (hermes_cli/plugins.py):
- Plugin discovery from ~/.hermes/plugins/, .hermes/plugins/, and
pip entry_points (hermes_agent.plugins group)
- PluginContext with register_tool() and register_hook()
- 6 lifecycle hooks: pre/post tool_call, pre/post llm_call,
on_session_start/end
- Namespace package handling for relative imports in plugins
- Graceful error isolation — broken plugins never crash the agent
Integration (model_tools.py):
- Plugin discovery runs after built-in + MCP tools
- Plugin tools bypass toolset filter via get_plugin_tool_names()
- Pre/post tool call hooks fire in handle_function_call()
CLI:
- /plugins command shows loaded plugins, tool counts, status
- Added to COMMANDS dict for autocomplete
Docs:
- Getting started guide (build-a-hermes-plugin.md) — full tutorial
building a calculator plugin step by step
- Reference page (features/plugins.md) — quick overview + tables
- Covers: file structure, schemas, handlers, hooks, data files,
bundled skills, env var gating, pip distribution, common mistakes
Tests: 16 tests covering discovery, loading, hooks, tool visibility.
* fix: hermes update causes dual gateways on macOS (launchd)
Three bugs worked together to create the dual-gateway problem:
1. cmd_update only checked systemd for gateway restart, completely
ignoring launchd on macOS. After killing the PID it would print
'Restart it with: hermes gateway run' even when launchd was about
to auto-respawn the process.
2. launchd's KeepAlive.SuccessfulExit=false respawns the gateway
after SIGTERM (non-zero exit), so the user's manual restart
created a second instance.
3. The launchd plist lacked --replace (systemd had it), so the
respawned gateway didn't kill stale instances on startup.
Fixes:
- Add --replace to launchd ProgramArguments (matches systemd)
- Add launchd detection to cmd_update's auto-restart logic
- Print 'auto-restart via launchd' instead of manual restart hint
* fix: add launchd plist auto-refresh + explicit restart in cmd_update
Two integration issues with the initial fix:
1. Existing macOS users with old plist (no --replace) would never
get the fix until manual uninstall/reinstall. Added
refresh_launchd_plist_if_needed() — mirrors the existing
refresh_systemd_unit_if_needed(). Called from launchd_start(),
launchd_restart(), and cmd_update.
2. cmd_update relied on KeepAlive respawn after SIGTERM rather than
explicit launchctl stop/start. This caused races: launchd would
respawn the old process before the PID file was cleaned up.
Now does explicit stop+start (matching how systemd gets an
explicit systemctl restart), with plist refresh first so the
new --replace flag is picked up.
---------
Co-authored-by: Ninja <ninja@local>
Co-authored-by: alireza78a <alireza78a@users.noreply.github.com>
Co-authored-by: Oktay Aydin <113846926+aydnOktay@users.noreply.github.com>
Co-authored-by: JP Lew <polydegen@protonmail.com>
Co-authored-by: an420eth <an420eth@users.noreply.github.com>
2026-03-16 12:36:29 -07:00
turn_route = self . _resolve_turn_agent_config ( message , model , runtime_kwargs )
feat(gateway): cache AIAgent per session for prompt caching
The gateway created a fresh AIAgent per message, rebuilding the system
prompt (including memory, skills, context files) every turn. This broke
prompt prefix caching — providers like Anthropic charge ~10x more for
uncached prefixes.
Now caches AIAgent instances per session_key with a config signature.
The cached agent is reused across messages in the same session,
preserving the frozen system prompt and tool schemas. Cache is
invalidated when:
- Config changes (model, provider, toolsets, reasoning, ephemeral
prompt) — detected via signature mismatch
- /new, /reset, /clear — explicit session reset
- /model — global model change clears all cached agents
- /reasoning — global reasoning change clears all cached agents
Per-message state (callbacks, stream consumers, progress queues) is
set on the agent instance before each run_conversation() call.
This matches CLI behavior where a single AIAgent lives across all turns
in a session, with _cached_system_prompt built once and reused.
2026-03-21 13:07:08 -07:00
# Check agent cache — reuse the AIAgent from the previous message
# in this session to preserve the frozen system prompt and tool
# schemas for prompt cache hits.
_sig = self . _agent_config_signature (
turn_route [ " model " ] ,
turn_route [ " runtime " ] ,
enabled_toolsets ,
combined_ephemeral ,
2026-03-14 00:17:04 -07:00
)
feat(gateway): cache AIAgent per session for prompt caching
The gateway created a fresh AIAgent per message, rebuilding the system
prompt (including memory, skills, context files) every turn. This broke
prompt prefix caching — providers like Anthropic charge ~10x more for
uncached prefixes.
Now caches AIAgent instances per session_key with a config signature.
The cached agent is reused across messages in the same session,
preserving the frozen system prompt and tool schemas. Cache is
invalidated when:
- Config changes (model, provider, toolsets, reasoning, ephemeral
prompt) — detected via signature mismatch
- /new, /reset, /clear — explicit session reset
- /model — global model change clears all cached agents
- /reasoning — global reasoning change clears all cached agents
Per-message state (callbacks, stream consumers, progress queues) is
set on the agent instance before each run_conversation() call.
This matches CLI behavior where a single AIAgent lives across all turns
in a session, with _cached_system_prompt built once and reused.
2026-03-21 13:07:08 -07:00
agent = None
_cache_lock = getattr ( self , " _agent_cache_lock " , None )
_cache = getattr ( self , " _agent_cache " , None )
if _cache_lock and _cache is not None :
with _cache_lock :
cached = _cache . get ( session_key )
if cached and cached [ 1 ] == _sig :
agent = cached [ 0 ]
logger . debug ( " Reusing cached agent for session %s " , session_key )
if agent is None :
# Config changed or first message — create fresh agent
agent = AIAgent (
model = turn_route [ " model " ] ,
* * turn_route [ " runtime " ] ,
max_iterations = max_iterations ,
quiet_mode = True ,
verbose_logging = False ,
enabled_toolsets = enabled_toolsets ,
ephemeral_system_prompt = combined_ephemeral or None ,
prefill_messages = self . _prefill_messages or None ,
reasoning_config = reasoning_config ,
providers_allowed = pr . get ( " only " ) ,
providers_ignored = pr . get ( " ignore " ) ,
providers_order = pr . get ( " order " ) ,
provider_sort = pr . get ( " sort " ) ,
provider_require_parameters = pr . get ( " require_parameters " , False ) ,
provider_data_collection = pr . get ( " data_collection " ) ,
session_id = session_id ,
platform = platform_key ,
honcho_session_key = session_key ,
honcho_manager = honcho_manager ,
honcho_config = honcho_config ,
session_db = self . _session_db ,
fallback_model = self . _fallback_model ,
)
if _cache_lock and _cache is not None :
with _cache_lock :
_cache [ session_key ] = ( agent , _sig )
logger . debug ( " Created new agent for session %s (sig= %s ) " , session_key , _sig )
# Per-message state — callbacks and reasoning config change every
# turn and must not be baked into the cached agent constructor.
agent . tool_progress_callback = progress_callback if tool_progress_enabled else None
agent . step_callback = _step_callback_sync if _hooks_ref . loaded_hooks else None
agent . stream_delta_callback = _stream_delta_cb
agent . status_callback = _status_callback_sync
agent . reasoning_config = reasoning_config
2026-03-26 17:38:24 -07:00
# Background review delivery — send "💾 Memory updated" etc. to user
def _bg_review_send ( message : str ) - > None :
if not _status_adapter :
return
try :
asyncio . run_coroutine_threadsafe (
_status_adapter . send (
_status_chat_id ,
message ,
metadata = _status_thread_metadata ,
) ,
_loop_for_step ,
)
except Exception as _e :
logger . debug ( " background_review_callback error: %s " , _e )
agent . background_review_callback = _bg_review_send
2026-03-14 00:17:04 -07:00
# Store agent reference for interrupt support
agent_holder [ 0 ] = agent
# Capture the full tool definitions for transcript logging
tools_holder [ 0 ] = agent . tools if hasattr ( agent , ' tools ' ) else None
# Convert history to agent format.
# Two cases:
# 1. Normal path (from transcript): simple {role, content, timestamp} dicts
# - Strip timestamps, keep role+content
# 2. Interrupt path (from agent result["messages"]): full agent messages
# that may include tool_calls, tool_call_id, reasoning, etc.
# - These must be passed through intact so the API sees valid
# assistant→tool sequences (dropping tool_calls causes 500 errors)
agent_history = [ ]
for msg in history :
role = msg . get ( " role " )
if not role :
continue
# Skip metadata entries (tool definitions, session info)
# -- these are for transcript logging, not for the LLM
if role in ( " session_meta " , ) :
continue
# Skip system messages -- the agent rebuilds its own system prompt
if role == " system " :
continue
# Rich agent messages (tool_calls, tool results) must be passed
# through intact so the API sees valid assistant→tool sequences
has_tool_calls = " tool_calls " in msg
has_tool_call_id = " tool_call_id " in msg
is_tool_message = role == " tool "
if has_tool_calls or has_tool_call_id or is_tool_message :
clean_msg = { k : v for k , v in msg . items ( ) if k != " timestamp " }
agent_history . append ( clean_msg )
else :
# Simple text message - just need role and content
content = msg . get ( " content " )
if content :
# Tag cross-platform mirror messages so the agent knows their origin
if msg . get ( " mirror " ) :
mirror_src = msg . get ( " mirror_source " , " another session " )
content = f " [Delivered from { mirror_src } ] { content } "
2026-03-25 09:47:28 -07:00
entry = { " role " : role , " content " : content }
# Preserve reasoning fields on assistant messages so
# multi-turn reasoning context survives session reload.
# The agent's _build_api_kwargs converts these to the
# provider-specific format (reasoning_content, etc.).
if role == " assistant " :
for _rkey in ( " reasoning " , " reasoning_details " ,
" codex_reasoning_items " ) :
_rval = msg . get ( _rkey )
if _rval :
entry [ _rkey ] = _rval
agent_history . append ( entry )
2026-03-14 00:17:04 -07:00
# Collect MEDIA paths already in history so we can exclude them
# from the current turn's extraction. This is compression-safe:
# even if the message list shrinks, we know which paths are old.
_history_media_paths : set = set ( )
for _hm in agent_history :
if _hm . get ( " role " ) in ( " tool " , " function " ) :
_hc = _hm . get ( " content " , " " )
if " MEDIA: " in _hc :
for _match in re . finditer ( r ' MEDIA:( \ S+) ' , _hc ) :
_p = _match . group ( 1 ) . strip ( ) . rstrip ( ' " ,} ' )
if _p :
_history_media_paths . add ( _p )
result = agent . run_conversation ( message , conversation_history = agent_history , task_id = session_id )
result_holder [ 0 ] = result
feat(gateway): streaming token delivery — StreamingConfig, GatewayStreamConsumer, already_sent
Stage 3 of streaming support. Gateway now streams tokens to messaging platforms:
- StreamingConfig dataclass (enabled, transport, edit_interval, buffer_threshold, cursor)
on GatewayConfig with from_dict/to_dict serialization
- GatewayStreamConsumer: async queue-based consumer that progressively edits
a single message on the target platform (edit transport)
- on_delta() → queue → run() async task → send_or_edit() with rate limiting
- already_sent propagation: when streaming delivered the response, handler
returns None so base adapter skips duplicate send()
- stream_delta_callback wired into AIAgent constructor in _run_agent
- Consumer lifecycle: started as asyncio task, awaited with timeout in finally
Config (config.yaml):
streaming:
enabled: true
transport: edit # progressive editMessageText
edit_interval: 0.3 # seconds between edits
buffer_threshold: 40 # chars before forcing flush
cursor: ' ▉'
Credit: jobless0x (#774, #1312), OutThisLife (#798), clicksingh (#697).
2026-03-16 05:52:42 -07:00
# Signal the stream consumer that the agent is done
if _stream_consumer is not None :
_stream_consumer . finish ( )
2026-03-14 00:17:04 -07:00
# Return final response, or a message if something went wrong
final_response = result . get ( " final_response " )
2026-03-15 19:18:58 -07:00
# Extract actual token counts from the agent instance used for this run
2026-03-14 00:17:04 -07:00
_last_prompt_toks = 0
2026-03-15 19:18:58 -07:00
_input_toks = 0
_output_toks = 0
2026-03-14 00:17:04 -07:00
_agent = agent_holder [ 0 ]
if _agent and hasattr ( _agent , " context_compressor " ) :
_last_prompt_toks = getattr ( _agent . context_compressor , " last_prompt_tokens " , 0 )
2026-03-15 19:18:58 -07:00
_input_toks = getattr ( _agent , " session_prompt_tokens " , 0 )
_output_toks = getattr ( _agent , " session_completion_tokens " , 0 )
2026-03-14 06:47:39 -07:00
_resolved_model = getattr ( _agent , " model " , None ) if _agent else None
2026-03-14 00:17:04 -07:00
if not final_response :
error_msg = f " ⚠️ { result [ ' error ' ] } " if result . get ( " error " ) else " (No response generated) "
return {
" final_response " : error_msg ,
" messages " : result . get ( " messages " , [ ] ) ,
" api_calls " : result . get ( " api_calls " , 0 ) ,
" tools " : tools_holder [ 0 ] or [ ] ,
" history_offset " : len ( agent_history ) ,
" last_prompt_tokens " : _last_prompt_toks ,
2026-03-15 19:18:58 -07:00
" input_tokens " : _input_toks ,
" output_tokens " : _output_toks ,
2026-03-14 06:47:39 -07:00
" model " : _resolved_model ,
2026-03-14 00:17:04 -07:00
}
# Scan tool results for MEDIA:<path> tags that need to be delivered
# as native audio/file attachments. The TTS tool embeds MEDIA: tags
# in its JSON response, but the model's final text reply usually
# doesn't include them. We collect unique tags from tool results and
# append any that aren't already present in the final response, so the
# adapter's extract_media() can find and deliver the files exactly once.
#
# Uses path-based deduplication against _history_media_paths (collected
# before run_conversation) instead of index slicing. This is safe even
# when context compression shrinks the message list. (Fixes #160)
if " MEDIA: " not in final_response :
media_tags = [ ]
has_voice_directive = False
for msg in result . get ( " messages " , [ ] ) :
if msg . get ( " role " ) in ( " tool " , " function " ) :
content = msg . get ( " content " , " " )
if " MEDIA: " in content :
for match in re . finditer ( r ' MEDIA:( \ S+) ' , content ) :
path = match . group ( 1 ) . strip ( ) . rstrip ( ' " ,} ' )
if path and path not in _history_media_paths :
media_tags . append ( f " MEDIA: { path } " )
if " [[audio_as_voice]] " in content :
has_voice_directive = True
if media_tags :
seen = set ( )
unique_tags = [ ]
for tag in media_tags :
if tag not in seen :
seen . add ( tag )
unique_tags . append ( tag )
if has_voice_directive :
unique_tags . insert ( 0 , " [[audio_as_voice]] " )
final_response = final_response + " \n " + " \n " . join ( unique_tags )
# Sync session_id: the agent may have created a new session during
# mid-run context compression (_compress_context splits sessions).
# If so, update the session store entry so the NEXT message loads
# the compressed transcript, not the stale pre-compression one.
agent = agent_holder [ 0 ]
if agent and session_key and hasattr ( agent , ' session_id ' ) and agent . session_id != session_id :
logger . info (
" Session split detected: %s → %s (compression) " ,
session_id , agent . session_id ,
)
entry = self . session_store . _entries . get ( session_key )
if entry :
entry . session_id = agent . session_id
self . session_store . _save ( )
effective_session_id = getattr ( agent , ' session_id ' , session_id ) if agent else session_id
2026-03-17 04:14:40 -07:00
# Auto-generate session title after first exchange (non-blocking)
if final_response and self . _session_db :
try :
from agent . title_generator import maybe_auto_title
all_msgs = result_holder [ 0 ] . get ( " messages " , [ ] ) if result_holder [ 0 ] else [ ]
maybe_auto_title (
self . _session_db ,
effective_session_id ,
message ,
final_response ,
all_msgs ,
)
except Exception :
pass
2026-03-14 00:17:04 -07:00
return {
" final_response " : final_response ,
" last_reasoning " : result . get ( " last_reasoning " ) ,
" messages " : result_holder [ 0 ] . get ( " messages " , [ ] ) if result_holder [ 0 ] else [ ] ,
" api_calls " : result_holder [ 0 ] . get ( " api_calls " , 0 ) if result_holder [ 0 ] else 0 ,
" tools " : tools_holder [ 0 ] or [ ] ,
" history_offset " : len ( agent_history ) ,
" last_prompt_tokens " : _last_prompt_toks ,
2026-03-15 19:18:58 -07:00
" input_tokens " : _input_toks ,
" output_tokens " : _output_toks ,
2026-03-14 06:47:39 -07:00
" model " : _resolved_model ,
2026-03-14 00:17:04 -07:00
" session_id " : effective_session_id ,
}
# Start progress message sender if enabled
progress_task = None
if tool_progress_enabled :
progress_task = asyncio . create_task ( send_progress_messages ( ) )
feat(gateway): streaming token delivery — StreamingConfig, GatewayStreamConsumer, already_sent
Stage 3 of streaming support. Gateway now streams tokens to messaging platforms:
- StreamingConfig dataclass (enabled, transport, edit_interval, buffer_threshold, cursor)
on GatewayConfig with from_dict/to_dict serialization
- GatewayStreamConsumer: async queue-based consumer that progressively edits
a single message on the target platform (edit transport)
- on_delta() → queue → run() async task → send_or_edit() with rate limiting
- already_sent propagation: when streaming delivered the response, handler
returns None so base adapter skips duplicate send()
- stream_delta_callback wired into AIAgent constructor in _run_agent
- Consumer lifecycle: started as asyncio task, awaited with timeout in finally
Config (config.yaml):
streaming:
enabled: true
transport: edit # progressive editMessageText
edit_interval: 0.3 # seconds between edits
buffer_threshold: 40 # chars before forcing flush
cursor: ' ▉'
Credit: jobless0x (#774, #1312), OutThisLife (#798), clicksingh (#697).
2026-03-16 05:52:42 -07:00
2026-03-16 06:35:46 -07:00
# Start stream consumer task — polls for consumer creation since it
# happens inside run_sync (thread pool) after the agent is constructed.
feat(gateway): streaming token delivery — StreamingConfig, GatewayStreamConsumer, already_sent
Stage 3 of streaming support. Gateway now streams tokens to messaging platforms:
- StreamingConfig dataclass (enabled, transport, edit_interval, buffer_threshold, cursor)
on GatewayConfig with from_dict/to_dict serialization
- GatewayStreamConsumer: async queue-based consumer that progressively edits
a single message on the target platform (edit transport)
- on_delta() → queue → run() async task → send_or_edit() with rate limiting
- already_sent propagation: when streaming delivered the response, handler
returns None so base adapter skips duplicate send()
- stream_delta_callback wired into AIAgent constructor in _run_agent
- Consumer lifecycle: started as asyncio task, awaited with timeout in finally
Config (config.yaml):
streaming:
enabled: true
transport: edit # progressive editMessageText
edit_interval: 0.3 # seconds between edits
buffer_threshold: 40 # chars before forcing flush
cursor: ' ▉'
Credit: jobless0x (#774, #1312), OutThisLife (#798), clicksingh (#697).
2026-03-16 05:52:42 -07:00
stream_task = None
2026-03-16 06:35:46 -07:00
async def _start_stream_consumer ( ) :
""" Wait for the stream consumer to be created, then run it. """
for _ in range ( 200 ) : # Up to 10s wait
if stream_consumer_holder [ 0 ] is not None :
await stream_consumer_holder [ 0 ] . run ( )
return
await asyncio . sleep ( 0.05 )
stream_task = asyncio . create_task ( _start_stream_consumer ( ) )
2026-03-14 00:17:04 -07:00
# Track this agent as running for this session (for interrupt support)
# We do this in a callback after the agent is created
async def track_agent ( ) :
# Wait for agent to be created
while agent_holder [ 0 ] is None :
await asyncio . sleep ( 0.05 )
if session_key :
self . _running_agents [ session_key ] = agent_holder [ 0 ]
tracking_task = asyncio . create_task ( track_agent ( ) )
# Monitor for interrupts from the adapter (new messages arriving)
async def monitor_for_interrupt ( ) :
adapter = self . adapters . get ( source . platform )
if not adapter or not session_key :
return
while True :
await asyncio . sleep ( 0.2 ) # Check every 200ms
# Check if adapter has a pending interrupt for this session.
# Must use session_key (build_session_key output) — NOT
# source.chat_id — because the adapter stores interrupt events
# under the full session key.
if hasattr ( adapter , ' has_pending_interrupt ' ) and adapter . has_pending_interrupt ( session_key ) :
agent = agent_holder [ 0 ]
if agent :
pending_event = adapter . get_pending_message ( session_key )
pending_text = pending_event . text if pending_event else None
logger . debug ( " Interrupt detected from adapter, signaling agent... " )
agent . interrupt ( pending_text )
break
interrupt_monitor = asyncio . create_task ( monitor_for_interrupt ( ) )
try :
# Run in thread pool to not block
loop = asyncio . get_event_loop ( )
response = await loop . run_in_executor ( None , run_sync )
fix(gateway): /model shows active fallback model instead of config default (#1615)
* fix: prevent infinite 400 failure loop on context overflow (#1630)
When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message. This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error. Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.
Three-layer fix:
1. run_agent.py — Fallback heuristic: when a 400 error has a very short
generic message AND the session is large (>40% of context or >80
messages), treat it as a probable context overflow and trigger
compression instead of aborting.
2. run_agent.py + gateway/run.py — Don't persist failed messages:
when the agent returns failed=True before generating any response,
skip writing the user's message to the transcript/DB. This prevents
the session from growing on each failure.
3. gateway/run.py — Smarter error messages: detect context-overflow
failures and suggest /compact or /reset specifically, instead of a
generic 'try again' that will fail identically.
* fix(skills): detect prompt injection patterns and block cache file reads
Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):
1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
(index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
was the original injection vector — untrusted skill descriptions
in the catalog contained adversarial text that the model executed.
2. skill_view: warns when skills are loaded from outside the trusted
~/.hermes/skills/ directory, and detects common injection patterns
in skill content ("ignore previous instructions", "<system>", etc.).
Cherry-picked from PR #1562 by ygd58.
* fix(tools): chunk long messages in send_message_tool before dispatch (#1552)
Long messages sent via send_message tool or cron delivery silently
failed when exceeding platform limits. Gateway adapters handle this
via truncate_message(), but the standalone senders in send_message_tool
bypassed that entirely.
- Apply truncate_message() chunking in _send_to_platform() before
dispatching to individual platform senders
- Remove naive message[i:i+2000] character split in _send_discord()
in favor of centralized smart splitting
- Attach media files to last chunk only for Telegram
- Add regression tests for chunking and media placement
Cherry-picked from PR #1557 by llbn.
* fix(approval): show full command in dangerous command approval (#1553)
Previously the command was truncated to 80 chars in CLI (with a
[v]iew full option), 500 chars in Discord embeds, and missing entirely
in Telegram/Slack approval messages. Now the full command is always
displayed everywhere:
- CLI: removed 80-char truncation and [v]iew full menu option
- Gateway (TG/Slack): approval_required message includes full command
in a code block
- Discord: embed shows full command up to 4096-char limit
- Windows: skip SIGALRM-based test timeout (Unix-only)
- Updated tests: replaced view-flow tests with direct approval tests
Cherry-picked from PR #1566 by crazywriter1.
* fix(cli): flush stdout during agent loop to prevent macOS display freeze (#1624)
The interrupt polling loop in chat() waited on the queue without
invalidating the prompt_toolkit renderer. On macOS, the StdoutProxy
buffer only flushed on input events, causing the CLI to appear frozen
during tool execution until the user typed a key.
Fix: call _invalidate() on each queue timeout (every ~100ms, throttled
to 150ms) to force the renderer to flush buffered agent output.
* fix(claw): warn when API keys are skipped during OpenClaw migration (#1580)
When --migrate-secrets is not passed (the default), API keys like
OPENROUTER_API_KEY are silently skipped with no warning. Users don't
realize their keys weren't migrated until the agent fails to connect.
Add a post-migration warning with actionable instructions: either
re-run with --migrate-secrets or add the key manually via
hermes config set.
Cherry-picked from PR #1593 by ygd58.
* fix(security): block sandbox backend creds from subprocess env (#1264)
Add Modal and Daytona sandbox credentials to the subprocess env
blocklist so they're not leaked to agent terminal sessions via
printenv/env.
Cherry-picked from PR #1571 by ygd58.
* fix(gateway): cap interrupt recursion depth to prevent resource exhaustion (#816)
When a user sends multiple messages while the agent keeps failing,
_run_agent() calls itself recursively with no depth limit. This can
exhaust stack/memory if the agent is in a failure loop.
Add _MAX_INTERRUPT_DEPTH = 3. When exceeded, the pending message is
logged and the current result is returned instead of recursing deeper.
The log handler duplication bug described in #816 was already fixed
separately (AIAgent.__init__ deduplicates handlers).
* fix(gateway): /model shows active fallback model instead of config default (#1615)
When the agent falls back to a different model (e.g. due to rate
limiting), /model still showed the config default. Now tracks the
effective model/provider after each agent run and displays it.
Cleared when the primary model succeeds again or the user explicitly
switches via /model.
Cherry-picked from PR #1616 by MaxKerkula. Added hasattr guard for
test compatibility.
---------
Co-authored-by: buray <ygd58@users.noreply.github.com>
Co-authored-by: lbn <llbn@users.noreply.github.com>
Co-authored-by: crazywriter1 <53251494+crazywriter1@users.noreply.github.com>
Co-authored-by: Max K <MaxKerkula@users.noreply.github.com>
2026-03-17 02:26:51 -07:00
# Track fallback model state: if the agent switched to a
# fallback model during this run, persist it so /model shows
# the actually-active model instead of the config default.
_agent = agent_holder [ 0 ]
if _agent is not None and hasattr ( _agent , ' model ' ) :
_cfg_model = _resolve_gateway_model ( )
if _agent . model != _cfg_model :
self . _effective_model = _agent . model
self . _effective_provider = getattr ( _agent , ' provider ' , None )
feat(gateway): cache AIAgent per session for prompt caching
The gateway created a fresh AIAgent per message, rebuilding the system
prompt (including memory, skills, context files) every turn. This broke
prompt prefix caching — providers like Anthropic charge ~10x more for
uncached prefixes.
Now caches AIAgent instances per session_key with a config signature.
The cached agent is reused across messages in the same session,
preserving the frozen system prompt and tool schemas. Cache is
invalidated when:
- Config changes (model, provider, toolsets, reasoning, ephemeral
prompt) — detected via signature mismatch
- /new, /reset, /clear — explicit session reset
- /model — global model change clears all cached agents
- /reasoning — global reasoning change clears all cached agents
Per-message state (callbacks, stream consumers, progress queues) is
set on the agent instance before each run_conversation() call.
This matches CLI behavior where a single AIAgent lives across all turns
in a session, with _cached_system_prompt built once and reused.
2026-03-21 13:07:08 -07:00
# Fallback activated — evict cached agent so the next
# message starts fresh and retries the primary model.
self . _evict_cached_agent ( session_key )
fix(gateway): /model shows active fallback model instead of config default (#1615)
* fix: prevent infinite 400 failure loop on context overflow (#1630)
When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message. This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error. Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.
Three-layer fix:
1. run_agent.py — Fallback heuristic: when a 400 error has a very short
generic message AND the session is large (>40% of context or >80
messages), treat it as a probable context overflow and trigger
compression instead of aborting.
2. run_agent.py + gateway/run.py — Don't persist failed messages:
when the agent returns failed=True before generating any response,
skip writing the user's message to the transcript/DB. This prevents
the session from growing on each failure.
3. gateway/run.py — Smarter error messages: detect context-overflow
failures and suggest /compact or /reset specifically, instead of a
generic 'try again' that will fail identically.
* fix(skills): detect prompt injection patterns and block cache file reads
Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):
1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
(index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
was the original injection vector — untrusted skill descriptions
in the catalog contained adversarial text that the model executed.
2. skill_view: warns when skills are loaded from outside the trusted
~/.hermes/skills/ directory, and detects common injection patterns
in skill content ("ignore previous instructions", "<system>", etc.).
Cherry-picked from PR #1562 by ygd58.
* fix(tools): chunk long messages in send_message_tool before dispatch (#1552)
Long messages sent via send_message tool or cron delivery silently
failed when exceeding platform limits. Gateway adapters handle this
via truncate_message(), but the standalone senders in send_message_tool
bypassed that entirely.
- Apply truncate_message() chunking in _send_to_platform() before
dispatching to individual platform senders
- Remove naive message[i:i+2000] character split in _send_discord()
in favor of centralized smart splitting
- Attach media files to last chunk only for Telegram
- Add regression tests for chunking and media placement
Cherry-picked from PR #1557 by llbn.
* fix(approval): show full command in dangerous command approval (#1553)
Previously the command was truncated to 80 chars in CLI (with a
[v]iew full option), 500 chars in Discord embeds, and missing entirely
in Telegram/Slack approval messages. Now the full command is always
displayed everywhere:
- CLI: removed 80-char truncation and [v]iew full menu option
- Gateway (TG/Slack): approval_required message includes full command
in a code block
- Discord: embed shows full command up to 4096-char limit
- Windows: skip SIGALRM-based test timeout (Unix-only)
- Updated tests: replaced view-flow tests with direct approval tests
Cherry-picked from PR #1566 by crazywriter1.
* fix(cli): flush stdout during agent loop to prevent macOS display freeze (#1624)
The interrupt polling loop in chat() waited on the queue without
invalidating the prompt_toolkit renderer. On macOS, the StdoutProxy
buffer only flushed on input events, causing the CLI to appear frozen
during tool execution until the user typed a key.
Fix: call _invalidate() on each queue timeout (every ~100ms, throttled
to 150ms) to force the renderer to flush buffered agent output.
* fix(claw): warn when API keys are skipped during OpenClaw migration (#1580)
When --migrate-secrets is not passed (the default), API keys like
OPENROUTER_API_KEY are silently skipped with no warning. Users don't
realize their keys weren't migrated until the agent fails to connect.
Add a post-migration warning with actionable instructions: either
re-run with --migrate-secrets or add the key manually via
hermes config set.
Cherry-picked from PR #1593 by ygd58.
* fix(security): block sandbox backend creds from subprocess env (#1264)
Add Modal and Daytona sandbox credentials to the subprocess env
blocklist so they're not leaked to agent terminal sessions via
printenv/env.
Cherry-picked from PR #1571 by ygd58.
* fix(gateway): cap interrupt recursion depth to prevent resource exhaustion (#816)
When a user sends multiple messages while the agent keeps failing,
_run_agent() calls itself recursively with no depth limit. This can
exhaust stack/memory if the agent is in a failure loop.
Add _MAX_INTERRUPT_DEPTH = 3. When exceeded, the pending message is
logged and the current result is returned instead of recursing deeper.
The log handler duplication bug described in #816 was already fixed
separately (AIAgent.__init__ deduplicates handlers).
* fix(gateway): /model shows active fallback model instead of config default (#1615)
When the agent falls back to a different model (e.g. due to rate
limiting), /model still showed the config default. Now tracks the
effective model/provider after each agent run and displays it.
Cleared when the primary model succeeds again or the user explicitly
switches via /model.
Cherry-picked from PR #1616 by MaxKerkula. Added hasattr guard for
test compatibility.
---------
Co-authored-by: buray <ygd58@users.noreply.github.com>
Co-authored-by: lbn <llbn@users.noreply.github.com>
Co-authored-by: crazywriter1 <53251494+crazywriter1@users.noreply.github.com>
Co-authored-by: Max K <MaxKerkula@users.noreply.github.com>
2026-03-17 02:26:51 -07:00
else :
# Primary model worked — clear any stale fallback state
self . _effective_model = None
self . _effective_provider = None
2026-03-22 04:56:13 -07:00
# Check if we were interrupted OR have a queued message (/queue).
2026-03-14 00:17:04 -07:00
result = result_holder [ 0 ]
adapter = self . adapters . get ( source . platform )
2026-03-22 04:56:13 -07:00
# Get pending message from adapter.
2026-03-14 00:17:04 -07:00
# Use session_key (not source.chat_id) to match adapter's storage keys.
pending = None
2026-03-22 04:56:13 -07:00
if result and adapter and session_key :
if result . get ( " interrupted " ) :
# Interrupted — consume the interrupt message
pending_event = adapter . get_pending_message ( session_key )
if pending_event :
pending = pending_event . text
elif result . get ( " interrupt_message " ) :
pending = result . get ( " interrupt_message " )
else :
# Normal completion — check for /queue'd messages that were
# stored without triggering an interrupt.
pending_event = adapter . get_pending_message ( session_key )
if pending_event :
pending = pending_event . text
logger . debug ( " Processing queued message after agent completion: ' %s ... ' " , pending [ : 40 ] )
2026-03-14 00:17:04 -07:00
if pending :
2026-03-22 04:56:13 -07:00
logger . debug ( " Processing pending message: ' %s ... ' " , pending [ : 40 ] )
2026-03-14 00:17:04 -07:00
# Clear the adapter's interrupt event so the next _run_agent call
# doesn't immediately re-trigger the interrupt before the new agent
# even makes its first API call (this was causing an infinite loop).
if adapter and hasattr ( adapter , ' _active_sessions ' ) and session_key and session_key in adapter . _active_sessions :
adapter . _active_sessions [ session_key ] . clear ( )
fix(gateway): cap interrupt recursion depth to prevent resource exhaustion (#816)
* fix: prevent infinite 400 failure loop on context overflow (#1630)
When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message. This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error. Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.
Three-layer fix:
1. run_agent.py — Fallback heuristic: when a 400 error has a very short
generic message AND the session is large (>40% of context or >80
messages), treat it as a probable context overflow and trigger
compression instead of aborting.
2. run_agent.py + gateway/run.py — Don't persist failed messages:
when the agent returns failed=True before generating any response,
skip writing the user's message to the transcript/DB. This prevents
the session from growing on each failure.
3. gateway/run.py — Smarter error messages: detect context-overflow
failures and suggest /compact or /reset specifically, instead of a
generic 'try again' that will fail identically.
* fix(skills): detect prompt injection patterns and block cache file reads
Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):
1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
(index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
was the original injection vector — untrusted skill descriptions
in the catalog contained adversarial text that the model executed.
2. skill_view: warns when skills are loaded from outside the trusted
~/.hermes/skills/ directory, and detects common injection patterns
in skill content ("ignore previous instructions", "<system>", etc.).
Cherry-picked from PR #1562 by ygd58.
* fix(tools): chunk long messages in send_message_tool before dispatch (#1552)
Long messages sent via send_message tool or cron delivery silently
failed when exceeding platform limits. Gateway adapters handle this
via truncate_message(), but the standalone senders in send_message_tool
bypassed that entirely.
- Apply truncate_message() chunking in _send_to_platform() before
dispatching to individual platform senders
- Remove naive message[i:i+2000] character split in _send_discord()
in favor of centralized smart splitting
- Attach media files to last chunk only for Telegram
- Add regression tests for chunking and media placement
Cherry-picked from PR #1557 by llbn.
* fix(approval): show full command in dangerous command approval (#1553)
Previously the command was truncated to 80 chars in CLI (with a
[v]iew full option), 500 chars in Discord embeds, and missing entirely
in Telegram/Slack approval messages. Now the full command is always
displayed everywhere:
- CLI: removed 80-char truncation and [v]iew full menu option
- Gateway (TG/Slack): approval_required message includes full command
in a code block
- Discord: embed shows full command up to 4096-char limit
- Windows: skip SIGALRM-based test timeout (Unix-only)
- Updated tests: replaced view-flow tests with direct approval tests
Cherry-picked from PR #1566 by crazywriter1.
* fix(cli): flush stdout during agent loop to prevent macOS display freeze (#1624)
The interrupt polling loop in chat() waited on the queue without
invalidating the prompt_toolkit renderer. On macOS, the StdoutProxy
buffer only flushed on input events, causing the CLI to appear frozen
during tool execution until the user typed a key.
Fix: call _invalidate() on each queue timeout (every ~100ms, throttled
to 150ms) to force the renderer to flush buffered agent output.
* fix(claw): warn when API keys are skipped during OpenClaw migration (#1580)
When --migrate-secrets is not passed (the default), API keys like
OPENROUTER_API_KEY are silently skipped with no warning. Users don't
realize their keys weren't migrated until the agent fails to connect.
Add a post-migration warning with actionable instructions: either
re-run with --migrate-secrets or add the key manually via
hermes config set.
Cherry-picked from PR #1593 by ygd58.
* fix(security): block sandbox backend creds from subprocess env (#1264)
Add Modal and Daytona sandbox credentials to the subprocess env
blocklist so they're not leaked to agent terminal sessions via
printenv/env.
Cherry-picked from PR #1571 by ygd58.
* fix(gateway): cap interrupt recursion depth to prevent resource exhaustion (#816)
When a user sends multiple messages while the agent keeps failing,
_run_agent() calls itself recursively with no depth limit. This can
exhaust stack/memory if the agent is in a failure loop.
Add _MAX_INTERRUPT_DEPTH = 3. When exceeded, the pending message is
logged and the current result is returned instead of recursing deeper.
The log handler duplication bug described in #816 was already fixed
separately (AIAgent.__init__ deduplicates handlers).
---------
Co-authored-by: buray <ygd58@users.noreply.github.com>
Co-authored-by: lbn <llbn@users.noreply.github.com>
Co-authored-by: crazywriter1 <53251494+crazywriter1@users.noreply.github.com>
2026-03-17 02:23:07 -07:00
# Cap recursion depth to prevent resource exhaustion when the
# user sends multiple messages while the agent keeps failing. (#816)
if _interrupt_depth > = self . _MAX_INTERRUPT_DEPTH :
logger . warning (
" Interrupt recursion depth %d reached for session %s — "
" queueing message instead of recursing. " ,
_interrupt_depth , session_key ,
)
# Queue the pending message for normal processing on next turn
adapter = self . adapters . get ( source . platform )
if adapter and hasattr ( adapter , ' queue_message ' ) :
adapter . queue_message ( session_key , pending )
return result_holder [ 0 ] or { " final_response " : response , " messages " : history }
2026-03-22 04:56:13 -07:00
was_interrupted = result . get ( " interrupted " )
if not was_interrupted :
# Queued message after normal completion — deliver the first
# response before processing the queued follow-up.
# Skip if streaming already delivered it.
_sc = stream_consumer_holder [ 0 ]
_already_streamed = _sc and getattr ( _sc , " already_sent " , False )
first_response = result . get ( " final_response " , " " )
if first_response and not _already_streamed :
try :
await adapter . send ( source . chat_id , first_response ,
metadata = getattr ( event , " metadata " , None ) )
except Exception as e :
logger . warning ( " Failed to send first response before queued message: %s " , e )
# else: interrupted — discard the interrupted response ("Operation
# interrupted." is just noise; the user already knows they sent a
# new message).
# Process the pending message with updated history
2026-03-14 00:17:04 -07:00
updated_history = result . get ( " messages " , history )
return await self . _run_agent (
message = pending ,
context_prompt = context_prompt ,
history = updated_history ,
source = source ,
session_id = session_id ,
fix(gateway): cap interrupt recursion depth to prevent resource exhaustion (#816)
* fix: prevent infinite 400 failure loop on context overflow (#1630)
When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message. This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error. Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.
Three-layer fix:
1. run_agent.py — Fallback heuristic: when a 400 error has a very short
generic message AND the session is large (>40% of context or >80
messages), treat it as a probable context overflow and trigger
compression instead of aborting.
2. run_agent.py + gateway/run.py — Don't persist failed messages:
when the agent returns failed=True before generating any response,
skip writing the user's message to the transcript/DB. This prevents
the session from growing on each failure.
3. gateway/run.py — Smarter error messages: detect context-overflow
failures and suggest /compact or /reset specifically, instead of a
generic 'try again' that will fail identically.
* fix(skills): detect prompt injection patterns and block cache file reads
Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):
1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
(index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
was the original injection vector — untrusted skill descriptions
in the catalog contained adversarial text that the model executed.
2. skill_view: warns when skills are loaded from outside the trusted
~/.hermes/skills/ directory, and detects common injection patterns
in skill content ("ignore previous instructions", "<system>", etc.).
Cherry-picked from PR #1562 by ygd58.
* fix(tools): chunk long messages in send_message_tool before dispatch (#1552)
Long messages sent via send_message tool or cron delivery silently
failed when exceeding platform limits. Gateway adapters handle this
via truncate_message(), but the standalone senders in send_message_tool
bypassed that entirely.
- Apply truncate_message() chunking in _send_to_platform() before
dispatching to individual platform senders
- Remove naive message[i:i+2000] character split in _send_discord()
in favor of centralized smart splitting
- Attach media files to last chunk only for Telegram
- Add regression tests for chunking and media placement
Cherry-picked from PR #1557 by llbn.
* fix(approval): show full command in dangerous command approval (#1553)
Previously the command was truncated to 80 chars in CLI (with a
[v]iew full option), 500 chars in Discord embeds, and missing entirely
in Telegram/Slack approval messages. Now the full command is always
displayed everywhere:
- CLI: removed 80-char truncation and [v]iew full menu option
- Gateway (TG/Slack): approval_required message includes full command
in a code block
- Discord: embed shows full command up to 4096-char limit
- Windows: skip SIGALRM-based test timeout (Unix-only)
- Updated tests: replaced view-flow tests with direct approval tests
Cherry-picked from PR #1566 by crazywriter1.
* fix(cli): flush stdout during agent loop to prevent macOS display freeze (#1624)
The interrupt polling loop in chat() waited on the queue without
invalidating the prompt_toolkit renderer. On macOS, the StdoutProxy
buffer only flushed on input events, causing the CLI to appear frozen
during tool execution until the user typed a key.
Fix: call _invalidate() on each queue timeout (every ~100ms, throttled
to 150ms) to force the renderer to flush buffered agent output.
* fix(claw): warn when API keys are skipped during OpenClaw migration (#1580)
When --migrate-secrets is not passed (the default), API keys like
OPENROUTER_API_KEY are silently skipped with no warning. Users don't
realize their keys weren't migrated until the agent fails to connect.
Add a post-migration warning with actionable instructions: either
re-run with --migrate-secrets or add the key manually via
hermes config set.
Cherry-picked from PR #1593 by ygd58.
* fix(security): block sandbox backend creds from subprocess env (#1264)
Add Modal and Daytona sandbox credentials to the subprocess env
blocklist so they're not leaked to agent terminal sessions via
printenv/env.
Cherry-picked from PR #1571 by ygd58.
* fix(gateway): cap interrupt recursion depth to prevent resource exhaustion (#816)
When a user sends multiple messages while the agent keeps failing,
_run_agent() calls itself recursively with no depth limit. This can
exhaust stack/memory if the agent is in a failure loop.
Add _MAX_INTERRUPT_DEPTH = 3. When exceeded, the pending message is
logged and the current result is returned instead of recursing deeper.
The log handler duplication bug described in #816 was already fixed
separately (AIAgent.__init__ deduplicates handlers).
---------
Co-authored-by: buray <ygd58@users.noreply.github.com>
Co-authored-by: lbn <llbn@users.noreply.github.com>
Co-authored-by: crazywriter1 <53251494+crazywriter1@users.noreply.github.com>
2026-03-17 02:23:07 -07:00
session_key = session_key ,
_interrupt_depth = _interrupt_depth + 1 ,
2026-03-14 00:17:04 -07:00
)
finally :
# Stop progress sender and interrupt monitor
if progress_task :
progress_task . cancel ( )
interrupt_monitor . cancel ( )
feat(gateway): streaming token delivery — StreamingConfig, GatewayStreamConsumer, already_sent
Stage 3 of streaming support. Gateway now streams tokens to messaging platforms:
- StreamingConfig dataclass (enabled, transport, edit_interval, buffer_threshold, cursor)
on GatewayConfig with from_dict/to_dict serialization
- GatewayStreamConsumer: async queue-based consumer that progressively edits
a single message on the target platform (edit transport)
- on_delta() → queue → run() async task → send_or_edit() with rate limiting
- already_sent propagation: when streaming delivered the response, handler
returns None so base adapter skips duplicate send()
- stream_delta_callback wired into AIAgent constructor in _run_agent
- Consumer lifecycle: started as asyncio task, awaited with timeout in finally
Config (config.yaml):
streaming:
enabled: true
transport: edit # progressive editMessageText
edit_interval: 0.3 # seconds between edits
buffer_threshold: 40 # chars before forcing flush
cursor: ' ▉'
Credit: jobless0x (#774, #1312), OutThisLife (#798), clicksingh (#697).
2026-03-16 05:52:42 -07:00
# Wait for stream consumer to finish its final edit
if stream_task :
try :
await asyncio . wait_for ( stream_task , timeout = 5.0 )
except ( asyncio . TimeoutError , asyncio . CancelledError ) :
stream_task . cancel ( )
try :
await stream_task
except asyncio . CancelledError :
pass
2026-03-14 00:17:04 -07:00
# Clean up tracking
tracking_task . cancel ( )
if session_key and session_key in self . _running_agents :
del self . _running_agents [ session_key ]
# Wait for cancelled tasks
for task in [ progress_task , interrupt_monitor , tracking_task ] :
if task :
try :
await task
except asyncio . CancelledError :
pass
feat(gateway): streaming token delivery — StreamingConfig, GatewayStreamConsumer, already_sent
Stage 3 of streaming support. Gateway now streams tokens to messaging platforms:
- StreamingConfig dataclass (enabled, transport, edit_interval, buffer_threshold, cursor)
on GatewayConfig with from_dict/to_dict serialization
- GatewayStreamConsumer: async queue-based consumer that progressively edits
a single message on the target platform (edit transport)
- on_delta() → queue → run() async task → send_or_edit() with rate limiting
- already_sent propagation: when streaming delivered the response, handler
returns None so base adapter skips duplicate send()
- stream_delta_callback wired into AIAgent constructor in _run_agent
- Consumer lifecycle: started as asyncio task, awaited with timeout in finally
Config (config.yaml):
streaming:
enabled: true
transport: edit # progressive editMessageText
edit_interval: 0.3 # seconds between edits
buffer_threshold: 40 # chars before forcing flush
cursor: ' ▉'
Credit: jobless0x (#774, #1312), OutThisLife (#798), clicksingh (#697).
2026-03-16 05:52:42 -07:00
# If streaming already delivered the response, mark it so the
# caller's send() is skipped (avoiding duplicate messages).
_sc = stream_consumer_holder [ 0 ]
if _sc and _sc . already_sent and isinstance ( response , dict ) :
response [ " already_sent " ] = True
2026-03-14 00:17:04 -07:00
return response
def _start_cron_ticker ( stop_event : threading . Event , adapters = None , interval : int = 60 ) :
"""
Background thread that ticks the cron scheduler at a regular interval .
Runs inside the gateway process so cronjobs fire automatically without
needing a separate ` hermes cron daemon ` or system cron entry .
Also refreshes the channel directory every 5 minutes and prunes the
image / audio / document cache once per hour .
"""
from cron . scheduler import tick as cron_tick
from gateway . platforms . base import cleanup_image_cache , cleanup_document_cache
IMAGE_CACHE_EVERY = 60 # ticks — once per hour at default 60s interval
CHANNEL_DIR_EVERY = 5 # ticks — every 5 minutes
logger . info ( " Cron ticker started (interval= %d s) " , interval )
tick_count = 0
while not stop_event . is_set ( ) :
try :
cron_tick ( verbose = False )
except Exception as e :
logger . debug ( " Cron tick error: %s " , e )
tick_count + = 1
if tick_count % CHANNEL_DIR_EVERY == 0 and adapters :
try :
from gateway . channel_directory import build_channel_directory
build_channel_directory ( adapters )
except Exception as e :
logger . debug ( " Channel directory refresh error: %s " , e )
if tick_count % IMAGE_CACHE_EVERY == 0 :
try :
removed = cleanup_image_cache ( max_age_hours = 24 )
if removed :
logger . info ( " Image cache cleanup: removed %d stale file(s) " , removed )
except Exception as e :
logger . debug ( " Image cache cleanup error: %s " , e )
try :
removed = cleanup_document_cache ( max_age_hours = 24 )
if removed :
logger . info ( " Document cache cleanup: removed %d stale file(s) " , removed )
except Exception as e :
logger . debug ( " Document cache cleanup error: %s " , e )
stop_event . wait ( timeout = interval )
logger . info ( " Cron ticker stopped " )
async def start_gateway ( config : Optional [ GatewayConfig ] = None , replace : bool = False ) - > bool :
"""
Start the gateway and run until interrupted .
This is the main entry point for running the gateway .
Returns True if the gateway ran successfully , False if it failed to start .
A False return causes a non - zero exit code so systemd can auto - restart .
Args :
config : Optional gateway configuration override .
replace : If True , kill any existing gateway instance before starting .
Useful for systemd services to avoid restart - loop deadlocks
when the previous process hasn ' t fully exited yet.
"""
# ── Duplicate-instance guard ──────────────────────────────────────
# Prevent two gateways from running under the same HERMES_HOME.
# The PID file is scoped to HERMES_HOME, so future multi-profile
# setups (each profile using a distinct HERMES_HOME) will naturally
# allow concurrent instances without tripping this guard.
import time as _time
from gateway . status import get_running_pid , remove_pid_file
existing_pid = get_running_pid ( )
if existing_pid is not None and existing_pid != os . getpid ( ) :
if replace :
logger . info (
" Replacing existing gateway instance (PID %d ) with --replace. " ,
existing_pid ,
)
try :
os . kill ( existing_pid , signal . SIGTERM )
except ProcessLookupError :
pass # Already gone
except PermissionError :
logger . error (
" Permission denied killing PID %d . Cannot replace. " ,
existing_pid ,
)
return False
# Wait up to 10 seconds for the old process to exit
for _ in range ( 20 ) :
try :
os . kill ( existing_pid , 0 )
_time . sleep ( 0.5 )
except ( ProcessLookupError , PermissionError ) :
break # Process is gone
else :
# Still alive after 10s — force kill
logger . warning (
" Old gateway (PID %d ) did not exit after SIGTERM, sending SIGKILL. " ,
existing_pid ,
)
try :
os . kill ( existing_pid , signal . SIGKILL )
_time . sleep ( 0.5 )
except ( ProcessLookupError , PermissionError ) :
pass
remove_pid_file ( )
2026-03-21 18:13:53 -07:00
# Also release all scoped locks left by the old process.
# Stopped (Ctrl+Z) processes don't release locks on exit,
# leaving stale lock files that block the new gateway from starting.
try :
from gateway . status import release_all_scoped_locks
_released = release_all_scoped_locks ( )
if _released :
logger . info ( " Released %d stale scoped lock(s) from old gateway. " , _released )
except Exception :
pass
2026-03-14 00:17:04 -07:00
else :
refactor: consolidate get_hermes_home() and parse_reasoning_effort() (#3062)
Centralizes two widely-duplicated patterns into hermes_constants.py:
1. get_hermes_home() — Path resolution for ~/.hermes (HERMES_HOME env var)
- Was copy-pasted inline across 30+ files as:
Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
- Now defined once in hermes_constants.py (zero-dependency module)
- hermes_cli/config.py re-exports it for backward compatibility
- Removed local wrapper functions in honcho_integration/client.py,
tools/website_policy.py, tools/tirith_security.py, hermes_cli/uninstall.py
2. parse_reasoning_effort() — Reasoning effort string validation
- Was copy-pasted in cli.py, gateway/run.py, cron/scheduler.py
- Same validation logic: check against (xhigh, high, medium, low, minimal, none)
- Now defined once in hermes_constants.py, called from all 3 locations
- Warning log for unknown values kept at call sites (context-specific)
31 files changed, net +31 lines (125 insertions, 94 deletions)
Full test suite: 6179 passed, 0 failed
2026-03-25 15:54:28 -07:00
hermes_home = str ( get_hermes_home ( ) )
2026-03-14 00:17:04 -07:00
logger . error (
" Another gateway instance is already running (PID %d , HERMES_HOME= %s ). "
" Use ' hermes gateway restart ' to replace it, or ' hermes gateway stop ' first. " ,
existing_pid , hermes_home ,
)
print (
f " \n ❌ Gateway already running (PID { existing_pid } ). \n "
f " Use ' hermes gateway restart ' to replace it, \n "
f " or ' hermes gateway stop ' to kill it first. \n "
f " Or use ' hermes gateway run --replace ' to auto-replace. \n "
)
return False
# Sync bundled skills on gateway start (fast -- skips unchanged)
try :
from tools . skills_sync import sync_skills
sync_skills ( quiet = True )
except Exception :
pass
# Configure rotating file log so gateway output is persisted for debugging
log_dir = _hermes_home / ' logs '
log_dir . mkdir ( parents = True , exist_ok = True )
file_handler = RotatingFileHandler (
log_dir / ' gateway.log ' ,
maxBytes = 5 * 1024 * 1024 ,
backupCount = 3 ,
)
from agent . redact import RedactingFormatter
file_handler . setFormatter ( RedactingFormatter ( ' %(asctime)s %(levelname)s %(name)s : %(message)s ' ) )
logging . getLogger ( ) . addHandler ( file_handler )
logging . getLogger ( ) . setLevel ( logging . INFO )
# Separate errors-only log for easy debugging
error_handler = RotatingFileHandler (
log_dir / ' errors.log ' ,
maxBytes = 2 * 1024 * 1024 ,
backupCount = 2 ,
)
error_handler . setLevel ( logging . WARNING )
error_handler . setFormatter ( RedactingFormatter ( ' %(asctime)s %(levelname)s %(name)s : %(message)s ' ) )
logging . getLogger ( ) . addHandler ( error_handler )
runner = GatewayRunner ( config )
# Set up signal handlers
def signal_handler ( ) :
asyncio . create_task ( runner . stop ( ) )
loop = asyncio . get_event_loop ( )
for sig in ( signal . SIGINT , signal . SIGTERM ) :
try :
loop . add_signal_handler ( sig , signal_handler )
except NotImplementedError :
pass
# Start the gateway
success = await runner . start ( )
if not success :
return False
2026-03-14 18:03:50 -07:00
if runner . should_exit_cleanly :
if runner . exit_reason :
logger . error ( " Gateway exiting cleanly: %s " , runner . exit_reason )
return True
2026-03-14 00:17:04 -07:00
# Write PID file so CLI can detect gateway is running
import atexit
from gateway . status import write_pid_file , remove_pid_file
write_pid_file ( )
atexit . register ( remove_pid_file )
# Start background cron ticker so scheduled jobs fire automatically
cron_stop = threading . Event ( )
cron_thread = threading . Thread (
target = _start_cron_ticker ,
args = ( cron_stop , ) ,
kwargs = { " adapters " : runner . adapters } ,
daemon = True ,
name = " cron-ticker " ,
)
cron_thread . start ( )
# Wait for shutdown
await runner . wait_for_shutdown ( )
2026-03-21 09:38:52 -07:00
if runner . should_exit_with_failure :
if runner . exit_reason :
logger . error ( " Gateway exiting with failure: %s " , runner . exit_reason )
return False
2026-03-14 00:17:04 -07:00
# Stop cron ticker cleanly
cron_stop . set ( )
cron_thread . join ( timeout = 5 )
# Close MCP server connections
try :
from tools . mcp_tool import shutdown_mcp_servers
shutdown_mcp_servers ( )
except Exception :
pass
return True
def main ( ) :
""" CLI entry point for the gateway. """
import argparse
parser = argparse . ArgumentParser ( description = " Hermes Gateway - Multi-platform messaging " )
parser . add_argument ( " --config " , " -c " , help = " Path to gateway config file " )
parser . add_argument ( " --verbose " , " -v " , action = " store_true " , help = " Verbose output " )
args = parser . parse_args ( )
config = None
if args . config :
import json
with open ( args . config , encoding = " utf-8 " ) as f :
data = json . load ( f )
config = GatewayConfig . from_dict ( data )
# Run the gateway - exit with code 1 if no platforms connected,
# so systemd Restart=on-failure will retry on transient errors (e.g. DNS)
success = asyncio . run ( start_gateway ( config ) )
if not success :
sys . exit ( 1 )
if __name__ == " __main__ " :
main ( )