gateway/run.py

"""
Gateway runner - entry point for messaging platform integrations.

This module provides:
- start_gateway(): Start all configured platform adapters
- GatewayRunner: Main class managing the gateway lifecycle

Usage:
    # Start the gateway
    python -m gateway.run
    
    # Or from CLI
    python cli.py --gateway
"""

import asyncio
import json
import logging
import os
import re
import shlex
import sys
import signal
import tempfile
import threading
import time
from logging.handlers import RotatingFileHandler
from pathlib import Path
from datetime import datetime
from typing import Dict, Optional, Any, List

# ---------------------------------------------------------------------------
# SSL certificate auto-detection for NixOS and other non-standard systems.
# Must run BEFORE any HTTP library (discord, aiohttp, etc.) is imported.
# ---------------------------------------------------------------------------
def _ensure_ssl_certs() -> None:
    """Set SSL_CERT_FILE if the system doesn't expose CA certs to Python."""
    if "SSL_CERT_FILE" in os.environ:
        return  # user already configured it

    import ssl

    # 1. Python's compiled-in defaults
    paths = ssl.get_default_verify_paths()
    for candidate in (paths.cafile, paths.openssl_cafile):
        if candidate and os.path.exists(candidate):
            os.environ["SSL_CERT_FILE"] = candidate
            return

    # 2. certifi (ships its own Mozilla bundle)
    try:
        import certifi
        os.environ["SSL_CERT_FILE"] = certifi.where()
        return
    except ImportError:
        pass

    # 3. Common distro / macOS locations
    for candidate in (
        "/etc/ssl/certs/ca-certificates.crt",               # Debian/Ubuntu/Gentoo
        "/etc/pki/tls/certs/ca-bundle.crt",                 # RHEL/CentOS 7
        "/etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem", # RHEL/CentOS 8+
        "/etc/ssl/ca-bundle.pem",                            # SUSE/OpenSUSE
        "/etc/ssl/cert.pem",                                 # Alpine / macOS
        "/etc/pki/tls/cert.pem",                             # Fedora
        "/usr/local/etc/openssl@1.1/cert.pem",               # macOS Homebrew Intel
        "/opt/homebrew/etc/openssl@1.1/cert.pem",            # macOS Homebrew ARM
    ):
        if os.path.exists(candidate):
            os.environ["SSL_CERT_FILE"] = candidate
            return

_ensure_ssl_certs()

# Add parent directory to path
sys.path.insert(0, str(Path(__file__).parent.parent))

# Resolve Hermes home directory (respects HERMES_HOME override)
from hermes_constants import get_hermes_home
_hermes_home = get_hermes_home()

# Load environment variables from ~/.hermes/.env first.
# User-managed env files should override stale shell exports on restart.
from dotenv import load_dotenv  # backward-compat for tests that monkeypatch this symbol
from hermes_cli.env_loader import load_hermes_dotenv
_env_path = _hermes_home / '.env'
load_hermes_dotenv(hermes_home=_hermes_home, project_env=Path(__file__).resolve().parents[1] / '.env')

# Bridge config.yaml values into the environment so os.getenv() picks them up.
# config.yaml is authoritative for terminal settings — overrides .env.
_config_path = _hermes_home / 'config.yaml'
if _config_path.exists():
    try:
        import yaml as _yaml
        with open(_config_path, encoding="utf-8") as _f:
            _cfg = _yaml.safe_load(_f) or {}
        # Expand ${ENV_VAR} references before bridging to env vars.
        from hermes_cli.config import _expand_env_vars
        _cfg = _expand_env_vars(_cfg)
        # Top-level simple values (fallback only — don't override .env)
        for _key, _val in _cfg.items():
            if isinstance(_val, (str, int, float, bool)) and _key not in os.environ:
                os.environ[_key] = str(_val)
        # Terminal config is nested — bridge to TERMINAL_* env vars.
        # config.yaml overrides .env for these since it's the documented config path.
        _terminal_cfg = _cfg.get("terminal", {})
        if _terminal_cfg and isinstance(_terminal_cfg, dict):
            _terminal_env_map = {
                "backend": "TERMINAL_ENV",
                "cwd": "TERMINAL_CWD",
                "timeout": "TERMINAL_TIMEOUT",
                "lifetime_seconds": "TERMINAL_LIFETIME_SECONDS",
                "docker_image": "TERMINAL_DOCKER_IMAGE",
                "docker_forward_env": "TERMINAL_DOCKER_FORWARD_ENV",
                "singularity_image": "TERMINAL_SINGULARITY_IMAGE",
                "modal_image": "TERMINAL_MODAL_IMAGE",
                "daytona_image": "TERMINAL_DAYTONA_IMAGE",
                "ssh_host": "TERMINAL_SSH_HOST",
                "ssh_user": "TERMINAL_SSH_USER",
                "ssh_port": "TERMINAL_SSH_PORT",
                "ssh_key": "TERMINAL_SSH_KEY",
                "container_cpu": "TERMINAL_CONTAINER_CPU",
                "container_memory": "TERMINAL_CONTAINER_MEMORY",
                "container_disk": "TERMINAL_CONTAINER_DISK",
                "container_persistent": "TERMINAL_CONTAINER_PERSISTENT",
                "docker_volumes": "TERMINAL_DOCKER_VOLUMES",
                "sandbox_dir": "TERMINAL_SANDBOX_DIR",
                "persistent_shell": "TERMINAL_PERSISTENT_SHELL",
            }
            for _cfg_key, _env_var in _terminal_env_map.items():
                if _cfg_key in _terminal_cfg:
                    _val = _terminal_cfg[_cfg_key]
                    if isinstance(_val, list):
                        os.environ[_env_var] = json.dumps(_val)
                    else:
                        os.environ[_env_var] = str(_val)
        # Compression config is read directly from config.yaml by run_agent.py
        # and auxiliary_client.py — no env var bridging needed.
        # Auxiliary model/direct-endpoint overrides (vision, web_extract).
        # Each task has provider/model/base_url/api_key; bridge non-default values to env vars.
        _auxiliary_cfg = _cfg.get("auxiliary", {})
        if _auxiliary_cfg and isinstance(_auxiliary_cfg, dict):
            _aux_task_env = {
                "vision": {
                    "provider": "AUXILIARY_VISION_PROVIDER",
                    "model": "AUXILIARY_VISION_MODEL",
                    "base_url": "AUXILIARY_VISION_BASE_URL",
                    "api_key": "AUXILIARY_VISION_API_KEY",
                },
                "web_extract": {
                    "provider": "AUXILIARY_WEB_EXTRACT_PROVIDER",
                    "model": "AUXILIARY_WEB_EXTRACT_MODEL",
                    "base_url": "AUXILIARY_WEB_EXTRACT_BASE_URL",
                    "api_key": "AUXILIARY_WEB_EXTRACT_API_KEY",
                },
                "approval": {
                    "provider": "AUXILIARY_APPROVAL_PROVIDER",
                    "model": "AUXILIARY_APPROVAL_MODEL",
                    "base_url": "AUXILIARY_APPROVAL_BASE_URL",
                    "api_key": "AUXILIARY_APPROVAL_API_KEY",
                },
            }
            for _task_key, _env_map in _aux_task_env.items():
                _task_cfg = _auxiliary_cfg.get(_task_key, {})
                if not isinstance(_task_cfg, dict):
                    continue
                _prov = str(_task_cfg.get("provider", "")).strip()
                _model = str(_task_cfg.get("model", "")).strip()
                _base_url = str(_task_cfg.get("base_url", "")).strip()
                _api_key = str(_task_cfg.get("api_key", "")).strip()
                if _prov and _prov != "auto":
                    os.environ[_env_map["provider"]] = _prov
                if _model:
                    os.environ[_env_map["model"]] = _model
                if _base_url:
                    os.environ[_env_map["base_url"]] = _base_url
                if _api_key:
                    os.environ[_env_map["api_key"]] = _api_key
        _agent_cfg = _cfg.get("agent", {})
        if _agent_cfg and isinstance(_agent_cfg, dict):
            if "max_turns" in _agent_cfg:
                os.environ["HERMES_MAX_ITERATIONS"] = str(_agent_cfg["max_turns"])
        # Timezone: bridge config.yaml → HERMES_TIMEZONE env var.
        # HERMES_TIMEZONE from .env takes precedence (already in os.environ).
        _tz_cfg = _cfg.get("timezone", "")
        if _tz_cfg and isinstance(_tz_cfg, str) and "HERMES_TIMEZONE" not in os.environ:
            os.environ["HERMES_TIMEZONE"] = _tz_cfg.strip()
        # Security settings
        _security_cfg = _cfg.get("security", {})
        if isinstance(_security_cfg, dict):
            _redact = _security_cfg.get("redact_secrets")
            if _redact is not None:
                os.environ["HERMES_REDACT_SECRETS"] = str(_redact).lower()
    except Exception:
        pass  # Non-fatal; gateway can still run with .env values

# Gateway runs in quiet mode - suppress debug output and use cwd directly (no temp dirs)
os.environ["HERMES_QUIET"] = "1"

# Enable interactive exec approval for dangerous commands on messaging platforms
os.environ["HERMES_EXEC_ASK"] = "1"

# Set terminal working directory for messaging platforms.
# If the user set an explicit path in config.yaml (not "." or "auto"),
# respect it. Otherwise use MESSAGING_CWD or default to home directory.
_configured_cwd = os.environ.get("TERMINAL_CWD", "")
if not _configured_cwd or _configured_cwd in (".", "auto", "cwd"):
    messaging_cwd = os.getenv("MESSAGING_CWD") or str(Path.home())
    os.environ["TERMINAL_CWD"] = messaging_cwd

from gateway.config import (
    Platform,
    GatewayConfig,
    load_gateway_config,
)
from gateway.session import (
    SessionStore,
    SessionSource,
    SessionContext,
    build_session_context,
    build_session_context_prompt,
    build_session_key,
)
from gateway.delivery import DeliveryRouter
from gateway.platforms.base import BasePlatformAdapter, MessageEvent, MessageType

logger = logging.getLogger(__name__)

# Sentinel placed into _running_agents immediately when a session starts
# processing, *before* any await.  Prevents a second message for the same
# session from bypassing the "already running" guard during the async gap
# between the guard check and actual agent creation.
_AGENT_PENDING_SENTINEL = object()


def _resolve_runtime_agent_kwargs() -> dict:
    """Resolve provider credentials for gateway-created AIAgent instances."""
    from hermes_cli.runtime_provider import (
        resolve_runtime_provider,
        format_runtime_provider_error,
    )

    try:
        runtime = resolve_runtime_provider(
            requested=os.getenv("HERMES_INFERENCE_PROVIDER"),
        )
    except Exception as exc:
        raise RuntimeError(format_runtime_provider_error(exc)) from exc

    return {
        "api_key": runtime.get("api_key"),
        "base_url": runtime.get("base_url"),
        "provider": runtime.get("provider"),
        "api_mode": runtime.get("api_mode"),
        "command": runtime.get("command"),
        "args": list(runtime.get("args") or []),
    }


def _platform_config_key(platform: "Platform") -> str:
    """Map a Platform enum to its config.yaml key (LOCAL→"cli", rest→enum value)."""
    return "cli" if platform == Platform.LOCAL else platform.value


def _load_gateway_config() -> dict:
    """Load and parse ~/.hermes/config.yaml, returning {} on any error."""
    try:
        config_path = _hermes_home / 'config.yaml'
        if config_path.exists():
            import yaml
            with open(config_path, 'r', encoding='utf-8') as f:
                return yaml.safe_load(f) or {}
    except Exception:
        logger.debug("Could not load gateway config from %s", _hermes_home / 'config.yaml')
    return {}


def _resolve_gateway_model(config: dict | None = None) -> str:
    """Read model from env/config — mirrors the resolution in _run_agent_sync.

    Without this, temporary AIAgent instances (memory flush, /compress) fall
    back to the hardcoded default ("anthropic/claude-opus-4.6") which fails
    when the active provider is openai-codex.
    """
    model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") or "anthropic/claude-opus-4.6"
    cfg = config if config is not None else _load_gateway_config()
    model_cfg = cfg.get("model", {})
    if isinstance(model_cfg, str):
        model = model_cfg
    elif isinstance(model_cfg, dict):
        model = model_cfg.get("default", model)
    return model


def _resolve_hermes_bin() -> Optional[list[str]]:
    """Resolve the Hermes update command as argv parts.

    Tries in order:
    1. ``shutil.which("hermes")`` — standard PATH lookup
    2. ``sys.executable -m hermes_cli.main`` — fallback when Hermes is running
       from a venv/module invocation and the ``hermes`` shim is not on PATH

    Returns argv parts ready for quoting/joining, or ``None`` if neither works.
    """
    import shutil

    hermes_bin = shutil.which("hermes")
    if hermes_bin:
        return [hermes_bin]

    try:
        import importlib.util

        if importlib.util.find_spec("hermes_cli") is not None:
            return [sys.executable, "-m", "hermes_cli.main"]
    except Exception:
        pass

    return None


class GatewayRunner:
    """
    Main gateway controller.
    
    Manages the lifecycle of all platform adapters and routes
    messages to/from the agent.
    """
    
    def __init__(self, config: Optional[GatewayConfig] = None):
        self.config = config or load_gateway_config()
        self.adapters: Dict[Platform, BasePlatformAdapter] = {}

        # Load ephemeral config from config.yaml / env vars.
        # Both are injected at API-call time only and never persisted.
        self._prefill_messages = self._load_prefill_messages()
        self._ephemeral_system_prompt = self._load_ephemeral_system_prompt()
        self._reasoning_config = self._load_reasoning_config()
        self._show_reasoning = self._load_show_reasoning()
        self._provider_routing = self._load_provider_routing()
        self._fallback_model = self._load_fallback_model()
        self._smart_model_routing = self._load_smart_model_routing()

        # Wire process registry into session store for reset protection
        from tools.process_registry import process_registry
        self.session_store = SessionStore(
            self.config.sessions_dir, self.config,
            has_active_processes_fn=lambda key: process_registry.has_active_for_session(key),
        )
        self.delivery_router = DeliveryRouter(self.config)
        self._running = False
        self._shutdown_event = asyncio.Event()
        self._exit_cleanly = False
        self._exit_with_failure = False
        self._exit_reason: Optional[str] = None
        
        # Track running agents per session for interrupt support
        # Key: session_key, Value: AIAgent instance
        self._running_agents: Dict[str, Any] = {}
        self._pending_messages: Dict[str, str] = {}  # Queued messages during interrupt

        # Cache AIAgent instances per session to preserve prompt caching.
        # Without this, a new AIAgent is created per message, rebuilding the
        # system prompt (including memory) every turn — breaking prefix cache
        # and costing ~10x more on providers with prompt caching (Anthropic).
        # Key: session_key, Value: (AIAgent, config_signature_str)
        import threading as _threading
        self._agent_cache: Dict[str, tuple] = {}
        self._agent_cache_lock = _threading.Lock()

        # Track active fallback model/provider when primary is rate-limited.
        # Set after an agent run where fallback was activated; cleared when
        # the primary model succeeds again or the user switches via /model.
        self._effective_model: Optional[str] = None
        self._effective_provider: Optional[str] = None

        # Track pending exec approvals per session
        # Key: session_key, Value: {"command": str, "pattern_key": str, ...}
        self._pending_approvals: Dict[str, Dict[str, Any]] = {}

        # Track platforms that failed to connect for background reconnection.
        # Key: Platform enum, Value: {"config": platform_config, "attempts": int, "next_retry": float}
        self._failed_platforms: Dict[Platform, Dict[str, Any]] = {}

        # Persistent Honcho managers keyed by gateway session key.
        # This preserves write_frequency="session" semantics across short-lived
        # per-message AIAgent instances.
        self._honcho_managers: Dict[str, Any] = {}
        self._honcho_configs: Dict[str, Any] = {}

        # Ensure tirith security scanner is available (downloads if needed)
        try:
            from tools.tirith_security import ensure_installed
            ensure_installed(log_failures=False)
        except Exception:
            pass  # Non-fatal — fail-open at scan time if unavailable
        
        # Initialize session database for session_search tool support
        self._session_db = None
        try:
            from hermes_state import SessionDB
            self._session_db = SessionDB()
        except Exception as e:
            logger.debug("SQLite session store not available: %s", e)
        
        # DM pairing store for code-based user authorization
        from gateway.pairing import PairingStore
        self.pairing_store = PairingStore()
        
        # Event hook system
        from gateway.hooks import HookRegistry
        self.hooks = HookRegistry()

        # Per-chat voice reply mode: "off" | "voice_only" | "all"
        self._voice_mode: Dict[str, str] = self._load_voice_modes()

        # Track background tasks to prevent garbage collection mid-execution
        self._background_tasks: set = set()

    def _get_or_create_gateway_honcho(self, session_key: str):
        """Return a persistent Honcho manager/config pair for this gateway session."""
        if not hasattr(self, "_honcho_managers"):
            self._honcho_managers = {}
        if not hasattr(self, "_honcho_configs"):
            self._honcho_configs = {}

        if session_key in self._honcho_managers:
            return self._honcho_managers[session_key], self._honcho_configs.get(session_key)

        try:
            from honcho_integration.client import HonchoClientConfig, get_honcho_client
            from honcho_integration.session import HonchoSessionManager

            hcfg = HonchoClientConfig.from_global_config()
            if not hcfg.enabled or not hcfg.api_key:
                return None, hcfg

            client = get_honcho_client(hcfg)
            manager = HonchoSessionManager(
                honcho=client,
                config=hcfg,
                context_tokens=hcfg.context_tokens,
            )
            self._honcho_managers[session_key] = manager
            self._honcho_configs[session_key] = hcfg
            return manager, hcfg
        except Exception as e:
            logger.debug("Gateway Honcho init failed for %s: %s", session_key, e)
            return None, None

    def _shutdown_gateway_honcho(self, session_key: str) -> None:
        """Flush and close the persistent Honcho manager for a gateway session."""
        managers = getattr(self, "_honcho_managers", None)
        configs = getattr(self, "_honcho_configs", None)
        if managers is None or configs is None:
            return

        manager = managers.pop(session_key, None)
        configs.pop(session_key, None)
        if not manager:
            return
        try:
            manager.shutdown()
        except Exception as e:
            logger.debug("Gateway Honcho shutdown failed for %s: %s", session_key, e)

    def _shutdown_all_gateway_honcho(self) -> None:
        """Flush and close all persistent Honcho managers."""
        managers = getattr(self, "_honcho_managers", None)
        if not managers:
            return
        for session_key in list(managers.keys()):
            self._shutdown_gateway_honcho(session_key)
    
    # -- Setup skill availability ----------------------------------------

    def _has_setup_skill(self) -> bool:
        """Check if the hermes-agent-setup skill is installed."""
        try:
            from tools.skill_manager_tool import _find_skill
            return _find_skill("hermes-agent-setup") is not None
        except Exception:
            return False

    # -- Voice mode persistence ------------------------------------------

    _VOICE_MODE_PATH = _hermes_home / "gateway_voice_mode.json"

    def _load_voice_modes(self) -> Dict[str, str]:
        try:
            data = json.loads(self._VOICE_MODE_PATH.read_text())
        except (FileNotFoundError, json.JSONDecodeError, OSError):
            return {}

        if not isinstance(data, dict):
            return {}

        valid_modes = {"off", "voice_only", "all"}
        return {
            str(chat_id): mode
            for chat_id, mode in data.items()
            if mode in valid_modes
        }

    def _save_voice_modes(self) -> None:
        try:
            self._VOICE_MODE_PATH.parent.mkdir(parents=True, exist_ok=True)
            self._VOICE_MODE_PATH.write_text(
                json.dumps(self._voice_mode, indent=2)
            )
        except OSError as e:
            logger.warning("Failed to save voice modes: %s", e)

    def _set_adapter_auto_tts_disabled(self, adapter, chat_id: str, disabled: bool) -> None:
        """Update an adapter's in-memory auto-TTS suppression set if present."""
        disabled_chats = getattr(adapter, "_auto_tts_disabled_chats", None)
        if not isinstance(disabled_chats, set):
            return
        if disabled:
            disabled_chats.add(chat_id)
        else:
            disabled_chats.discard(chat_id)

    def _sync_voice_mode_state_to_adapter(self, adapter) -> None:
        """Restore persisted /voice off state into a live platform adapter."""
        disabled_chats = getattr(adapter, "_auto_tts_disabled_chats", None)
        if not isinstance(disabled_chats, set):
            return
        disabled_chats.clear()
        disabled_chats.update(
            chat_id for chat_id, mode in self._voice_mode.items() if mode == "off"
        )

    # -----------------------------------------------------------------

    def _flush_memories_for_session(
        self,
        old_session_id: str,
        honcho_session_key: Optional[str] = None,
    ):
        """Prompt the agent to save memories/skills before context is lost.

        Synchronous worker — meant to be called via run_in_executor from
        an async context so it doesn't block the event loop.
        """
        # Skip cron sessions — they run headless with no meaningful user
        # conversation to extract memories from.
        if old_session_id and old_session_id.startswith("cron_"):
            logger.debug("Skipping memory flush for cron session: %s", old_session_id)
            return

        try:
            history = self.session_store.load_transcript(old_session_id)
            if not history or len(history) < 4:
                return

            from run_agent import AIAgent
            runtime_kwargs = _resolve_runtime_agent_kwargs()
            if not runtime_kwargs.get("api_key"):
                return

            # Resolve model from config — AIAgent's default is OpenRouter-
            # formatted ("anthropic/claude-opus-4.6") which fails when the
            # active provider is openai-codex.
            model = _resolve_gateway_model()

            tmp_agent = AIAgent(
                **runtime_kwargs,
                model=model,
                max_iterations=8,
                quiet_mode=True,
                enabled_toolsets=["memory", "skills"],
                session_id=old_session_id,
                honcho_session_key=honcho_session_key,
            )
            # Fully silence the flush agent — quiet_mode only suppresses init
            # messages; tool call output still leaks to the terminal through
            # _safe_print → _print_fn.  Set a no-op to prevent that.
            tmp_agent._print_fn = lambda *a, **kw: None

            # Build conversation history from transcript
            msgs = [
                {"role": m.get("role"), "content": m.get("content")}
                for m in history
                if m.get("role") in ("user", "assistant") and m.get("content")
            ]

            # Read live memory state from disk so the flush agent can see
            # what's already saved and avoid overwriting newer entries.
            _current_memory = ""
            try:
                from tools.memory_tool import MEMORY_DIR
                for fname, label in [
                    ("MEMORY.md", "MEMORY (your personal notes)"),
                    ("USER.md", "USER PROFILE (who the user is)"),
                ]:
                    fpath = MEMORY_DIR / fname
                    if fpath.exists():
                        content = fpath.read_text(encoding="utf-8").strip()
                        if content:
                            _current_memory += f"\n\n## Current {label}:\n{content}"
            except Exception:
                pass  # Non-fatal — flush still works, just without the guard

            # Give the agent a real turn to think about what to save
            flush_prompt = (
                "[System: This session is about to be automatically reset due to "
                "inactivity or a scheduled daily reset. The conversation context "
                "will be cleared after this turn.\n\n"
                "Review the conversation above and:\n"
                "1. Save any important facts, preferences, or decisions to memory "
                "(user profile or your notes) that would be useful in future sessions.\n"
                "2. If you discovered a reusable workflow or solved a non-trivial "
                "problem, consider saving it as a skill.\n"
                "3. If nothing is worth saving, that's fine — just skip.\n\n"
            )

            if _current_memory:
                flush_prompt += (
                    "IMPORTANT — here is the current live state of memory. Other "
                    "sessions, cron jobs, or the user may have updated it since this "
                    "conversation ended. Do NOT overwrite or remove entries unless "
                    "the conversation above reveals something that genuinely "
                    "supersedes them. Only add new information that is not already "
                    "captured below."
                    f"{_current_memory}\n\n"
                )

            flush_prompt += (
                "Do NOT respond to the user. Just use the memory and skill_manage "
                "tools if needed, then stop.]"
            )

            tmp_agent.run_conversation(
                user_message=flush_prompt,
                conversation_history=msgs,
                sync_honcho=False,
            )
            logger.info("Pre-reset memory flush completed for session %s", old_session_id)
            # Flush any queued Honcho writes before the session is dropped
            if getattr(tmp_agent, '_honcho', None):
                try:
                    tmp_agent._honcho.shutdown()
                except Exception:
                    pass
        except Exception as e:
            logger.debug("Pre-reset memory flush failed for session %s: %s", old_session_id, e)

    async def _async_flush_memories(
        self,
        old_session_id: str,
        honcho_session_key: Optional[str] = None,
    ):
        """Run the sync memory flush in a thread pool so it won't block the event loop."""
        loop = asyncio.get_event_loop()
        await loop.run_in_executor(
            None,
            self._flush_memories_for_session,
            old_session_id,
            honcho_session_key,
        )

    @property
    def should_exit_cleanly(self) -> bool:
        return self._exit_cleanly

    @property
    def should_exit_with_failure(self) -> bool:
        return self._exit_with_failure

    @property
    def exit_reason(self) -> Optional[str]:
        return self._exit_reason

    def _session_key_for_source(self, source: SessionSource) -> str:
        """Resolve the current session key for a source, honoring gateway config when available."""
        if hasattr(self, "session_store") and self.session_store is not None:
            try:
                session_key = self.session_store._generate_session_key(source)
                if isinstance(session_key, str) and session_key:
                    return session_key
            except Exception:
                pass
        config = getattr(self, "config", None)
        return build_session_key(
            source,
            group_sessions_per_user=getattr(config, "group_sessions_per_user", True),
        )

    def _resolve_turn_agent_config(self, user_message: str, model: str, runtime_kwargs: dict) -> dict:
        from agent.smart_model_routing import resolve_turn_route

        primary = {
            "model": model,
            "api_key": runtime_kwargs.get("api_key"),
            "base_url": runtime_kwargs.get("base_url"),
            "provider": runtime_kwargs.get("provider"),
            "api_mode": runtime_kwargs.get("api_mode"),
            "command": runtime_kwargs.get("command"),
            "args": list(runtime_kwargs.get("args") or []),
        }
        return resolve_turn_route(user_message, getattr(self, "_smart_model_routing", {}), primary)

    async def _handle_adapter_fatal_error(self, adapter: BasePlatformAdapter) -> None:
        """React to an adapter failure after startup.

        If the error is retryable (e.g. network blip, DNS failure), queue the
        platform for background reconnection instead of giving up permanently.
        """
        logger.error(
            "Fatal %s adapter error (%s): %s",
            adapter.platform.value,
            adapter.fatal_error_code or "unknown",
            adapter.fatal_error_message or "unknown error",
        )

        existing = self.adapters.get(adapter.platform)
        if existing is adapter:
            try:
                await adapter.disconnect()
            finally:
                self.adapters.pop(adapter.platform, None)
                self.delivery_router.adapters = self.adapters

        # Queue retryable failures for background reconnection
        if adapter.fatal_error_retryable:
            platform_config = self.config.platforms.get(adapter.platform)
            if platform_config and adapter.platform not in self._failed_platforms:
                self._failed_platforms[adapter.platform] = {
                    "config": platform_config,
                    "attempts": 0,
                    "next_retry": time.monotonic() + 30,
                }
                logger.info(
                    "%s queued for background reconnection",
                    adapter.platform.value,
                )

        if not self.adapters and not self._failed_platforms:
            self._exit_reason = adapter.fatal_error_message or "All messaging adapters disconnected"
            if adapter.fatal_error_retryable:
                self._exit_with_failure = True
                logger.error("No connected messaging platforms remain. Shutting down gateway for service restart.")
            else:
                logger.error("No connected messaging platforms remain. Shutting down gateway cleanly.")
            await self.stop()
        elif not self.adapters and self._failed_platforms:
            # All platforms are down and queued for background reconnection.
            # If the error is retryable, exit with failure so systemd Restart=on-failure
            # can restart the process. Otherwise stay alive and keep retrying in background.
            if adapter.fatal_error_retryable:
                self._exit_reason = adapter.fatal_error_message or "All messaging platforms failed with retryable errors"
                self._exit_with_failure = True
                logger.error(
                    "All messaging platforms failed with retryable errors. "
                    "Shutting down gateway for service restart (systemd will retry)."
                )
                await self.stop()
            else:
                logger.warning(
                    "No connected messaging platforms remain, but %d platform(s) queued for reconnection",
                    len(self._failed_platforms),
                )

    def _request_clean_exit(self, reason: str) -> None:
        self._exit_cleanly = True
        self._exit_reason = reason
        self._shutdown_event.set()
    
    @staticmethod
    def _load_prefill_messages() -> List[Dict[str, Any]]:
        """Load ephemeral prefill messages from config or env var.
        
        Checks HERMES_PREFILL_MESSAGES_FILE env var first, then falls back to
        the prefill_messages_file key in ~/.hermes/config.yaml.
        Relative paths are resolved from ~/.hermes/.
        """
        import json as _json
        file_path = os.getenv("HERMES_PREFILL_MESSAGES_FILE", "")
        if not file_path:
            try:
                import yaml as _y
                cfg_path = _hermes_home / "config.yaml"
                if cfg_path.exists():
                    with open(cfg_path, encoding="utf-8") as _f:
                        cfg = _y.safe_load(_f) or {}
                    file_path = cfg.get("prefill_messages_file", "")
            except Exception:
                pass
        if not file_path:
            return []
        path = Path(file_path).expanduser()
        if not path.is_absolute():
            path = _hermes_home / path
        if not path.exists():
            logger.warning("Prefill messages file not found: %s", path)
            return []
        try:
            with open(path, "r", encoding="utf-8") as f:
                data = _json.load(f)
            if not isinstance(data, list):
                logger.warning("Prefill messages file must contain a JSON array: %s", path)
                return []
            return data
        except Exception as e:
            logger.warning("Failed to load prefill messages from %s: %s", path, e)
            return []

    @staticmethod
    def _load_ephemeral_system_prompt() -> str:
        """Load ephemeral system prompt from config or env var.
        
        Checks HERMES_EPHEMERAL_SYSTEM_PROMPT env var first, then falls back to
        agent.system_prompt in ~/.hermes/config.yaml.
        """
        prompt = os.getenv("HERMES_EPHEMERAL_SYSTEM_PROMPT", "")
        if prompt:
            return prompt
        try:
            import yaml as _y
            cfg_path = _hermes_home / "config.yaml"
            if cfg_path.exists():
                with open(cfg_path, encoding="utf-8") as _f:
                    cfg = _y.safe_load(_f) or {}
                return (cfg.get("agent", {}).get("system_prompt", "") or "").strip()
        except Exception:
            pass
        return ""

    @staticmethod
    def _load_reasoning_config() -> dict | None:
        """Load reasoning effort from config with env fallback.

        Checks agent.reasoning_effort in config.yaml first, then
        HERMES_REASONING_EFFORT as a fallback. Valid: "xhigh", "high",
        "medium", "low", "minimal", "none". Returns None to use default
        (medium).
        """
        from hermes_constants import parse_reasoning_effort
        effort = ""
        try:
            import yaml as _y
            cfg_path = _hermes_home / "config.yaml"
            if cfg_path.exists():
                with open(cfg_path, encoding="utf-8") as _f:
                    cfg = _y.safe_load(_f) or {}
                effort = str(cfg.get("agent", {}).get("reasoning_effort", "") or "").strip()
        except Exception:
            pass
        if not effort:
            effort = os.getenv("HERMES_REASONING_EFFORT", "")
        result = parse_reasoning_effort(effort)
        if effort and effort.strip() and result is None:
            logger.warning("Unknown reasoning_effort '%s', using default (medium)", effort)
        return result

    @staticmethod
    def _load_show_reasoning() -> bool:
        """Load show_reasoning toggle from config.yaml display section."""
        try:
            import yaml as _y
            cfg_path = _hermes_home / "config.yaml"
            if cfg_path.exists():
                with open(cfg_path, encoding="utf-8") as _f:
                    cfg = _y.safe_load(_f) or {}
                return bool(cfg.get("display", {}).get("show_reasoning", False))
        except Exception:
            pass
        return False

    @staticmethod
    def _load_background_notifications_mode() -> str:
        """Load background process notification mode from config or env var.

        Modes:
          - ``all``    — push running-output updates *and* the final message (default)
          - ``result`` — only the final completion message (regardless of exit code)
          - ``error``  — only the final message when exit code is non-zero
          - ``off``    — no watcher messages at all
        """
        mode = os.getenv("HERMES_BACKGROUND_NOTIFICATIONS", "")
        if not mode:
            try:
                import yaml as _y
                cfg_path = _hermes_home / "config.yaml"
                if cfg_path.exists():
                    with open(cfg_path, encoding="utf-8") as _f:
                        cfg = _y.safe_load(_f) or {}
                    raw = cfg.get("display", {}).get("background_process_notifications")
                    if raw is False:
                        mode = "off"
                    elif raw not in (None, ""):
                        mode = str(raw)
            except Exception:
                pass
        mode = (mode or "all").strip().lower()
        valid = {"all", "result", "error", "off"}
        if mode not in valid:
            logger.warning(
                "Unknown background_process_notifications '%s', defaulting to 'all'",
                mode,
            )
            return "all"
        return mode

    @staticmethod
    def _load_provider_routing() -> dict:
        """Load OpenRouter provider routing preferences from config.yaml."""
        try:
            import yaml as _y
            cfg_path = _hermes_home / "config.yaml"
            if cfg_path.exists():
                with open(cfg_path, encoding="utf-8") as _f:
                    cfg = _y.safe_load(_f) or {}
                return cfg.get("provider_routing", {}) or {}
        except Exception:
            pass
        return {}

    @staticmethod
    def _load_fallback_model() -> dict | None:
        """Load fallback model config from config.yaml.

        Returns a dict with 'provider' and 'model' keys, or None if
        not configured / both fields empty.
        """
        try:
            import yaml as _y
            cfg_path = _hermes_home / "config.yaml"
            if cfg_path.exists():
                with open(cfg_path, encoding="utf-8") as _f:
                    cfg = _y.safe_load(_f) or {}
                fb = cfg.get("fallback_model", {}) or {}
                if fb.get("provider") and fb.get("model"):
                    return fb
        except Exception:
            pass
        return None

    @staticmethod
    def _load_smart_model_routing() -> dict:
        """Load optional smart cheap-vs-strong model routing config."""
        try:
            import yaml as _y
            cfg_path = _hermes_home / "config.yaml"
            if cfg_path.exists():
                with open(cfg_path, encoding="utf-8") as _f:
                    cfg = _y.safe_load(_f) or {}
                return cfg.get("smart_model_routing", {}) or {}
        except Exception:
            pass
        return {}

    async def start(self) -> bool:
        """
        Start the gateway and all configured platform adapters.
        
        Returns True if at least one adapter connected successfully.
        """
        logger.info("Starting Hermes Gateway...")
        logger.info("Session storage: %s", self.config.sessions_dir)
        try:
            from gateway.status import write_runtime_status
            write_runtime_status(gateway_state="starting", exit_reason=None)
        except Exception:
            pass
        
        # Warn if no user allowlists are configured and open access is not opted in
        _any_allowlist = any(
            os.getenv(v)
            for v in ("TELEGRAM_ALLOWED_USERS", "DISCORD_ALLOWED_USERS",
                       "WHATSAPP_ALLOWED_USERS", "SLACK_ALLOWED_USERS",
                       "SIGNAL_ALLOWED_USERS", "SIGNAL_GROUP_ALLOWED_USERS",
                       "EMAIL_ALLOWED_USERS",
                       "SMS_ALLOWED_USERS", "MATTERMOST_ALLOWED_USERS",
                       "MATRIX_ALLOWED_USERS", "DINGTALK_ALLOWED_USERS",
                       "GATEWAY_ALLOWED_USERS")
        )
        _allow_all = os.getenv("GATEWAY_ALLOW_ALL_USERS", "").lower() in ("true", "1", "yes") or any(
            os.getenv(v, "").lower() in ("true", "1", "yes")
            for v in ("TELEGRAM_ALLOW_ALL_USERS", "DISCORD_ALLOW_ALL_USERS",
                       "WHATSAPP_ALLOW_ALL_USERS", "SLACK_ALLOW_ALL_USERS",
                       "SIGNAL_ALLOW_ALL_USERS", "EMAIL_ALLOW_ALL_USERS",
                       "SMS_ALLOW_ALL_USERS", "MATTERMOST_ALLOW_ALL_USERS",
                       "MATRIX_ALLOW_ALL_USERS", "DINGTALK_ALLOW_ALL_USERS")
        )
        if not _any_allowlist and not _allow_all:
            logger.warning(
                "No user allowlists configured. All unauthorized users will be denied. "
                "Set GATEWAY_ALLOW_ALL_USERS=true in ~/.hermes/.env to allow open access, "
                "or configure platform allowlists (e.g., TELEGRAM_ALLOWED_USERS=your_id)."
            )
        
        # Discover and load event hooks
        self.hooks.discover_and_load()
        
        # Recover background processes from checkpoint (crash recovery)
        try:
            from tools.process_registry import process_registry
            recovered = process_registry.recover_from_checkpoint()
            if recovered:
                logger.info("Recovered %s background process(es) from previous run", recovered)
        except Exception as e:
            logger.warning("Process checkpoint recovery: %s", e)
        
        connected_count = 0
        enabled_platform_count = 0
        startup_nonretryable_errors: list[str] = []
        startup_retryable_errors: list[str] = []
        
        # Initialize and connect each configured platform
        for platform, platform_config in self.config.platforms.items():
            if not platform_config.enabled:
                continue
            enabled_platform_count += 1
            
            adapter = self._create_adapter(platform, platform_config)
            if not adapter:
                logger.warning("No adapter available for %s", platform.value)
                continue
            
            # Set up message + fatal error handlers
            adapter.set_message_handler(self._handle_message)
            adapter.set_fatal_error_handler(self._handle_adapter_fatal_error)
            
            # Try to connect
            logger.info("Connecting to %s...", platform.value)
            try:
                success = await adapter.connect()
                if success:
                    self.adapters[platform] = adapter
                    self._sync_voice_mode_state_to_adapter(adapter)
                    connected_count += 1
                    logger.info("✓ %s connected", platform.value)
                else:
                    logger.warning("✗ %s failed to connect", platform.value)
                    if adapter.has_fatal_error:
                        target = (
                            startup_retryable_errors
                            if adapter.fatal_error_retryable
                            else startup_nonretryable_errors
                        )
                        target.append(
                            f"{platform.value}: {adapter.fatal_error_message}"
                        )
                        # Queue for reconnection if the error is retryable
                        if adapter.fatal_error_retryable:
                            self._failed_platforms[platform] = {
                                "config": platform_config,
                                "attempts": 1,
                                "next_retry": time.monotonic() + 30,
                            }
                    else:
                        startup_retryable_errors.append(
                            f"{platform.value}: failed to connect"
                        )
                        # No fatal error info means likely a transient issue — queue for retry
                        self._failed_platforms[platform] = {
                            "config": platform_config,
                            "attempts": 1,
                            "next_retry": time.monotonic() + 30,
                        }
            except Exception as e:
                logger.error("✗ %s error: %s", platform.value, e)
                startup_retryable_errors.append(f"{platform.value}: {e}")
                # Unexpected exceptions are typically transient — queue for retry
                self._failed_platforms[platform] = {
                    "config": platform_config,
                    "attempts": 1,
                    "next_retry": time.monotonic() + 30,
                }
        
        if connected_count == 0:
            if startup_nonretryable_errors:
                reason = "; ".join(startup_nonretryable_errors)
                logger.error("Gateway hit a non-retryable startup conflict: %s", reason)
                try:
                    from gateway.status import write_runtime_status
                    write_runtime_status(gateway_state="startup_failed", exit_reason=reason)
                except Exception:
                    pass
                self._request_clean_exit(reason)
                return True
            if enabled_platform_count > 0:
                reason = "; ".join(startup_retryable_errors) or "all configured messaging platforms failed to connect"
                logger.error("Gateway failed to connect any configured messaging platform: %s", reason)
                try:
                    from gateway.status import write_runtime_status
                    write_runtime_status(gateway_state="startup_failed", exit_reason=reason)
                except Exception:
                    pass
                return False
            logger.warning("No messaging platforms enabled.")
            logger.info("Gateway will continue running for cron job execution.")
        
        # Update delivery router with adapters
        self.delivery_router.adapters = self.adapters
        
        self._running = True
        try:
            from gateway.status import write_runtime_status
            write_runtime_status(gateway_state="running", exit_reason=None)
        except Exception:
            pass
        
        # Emit gateway:startup hook
        hook_count = len(self.hooks.loaded_hooks)
        if hook_count:
            logger.info("%s hook(s) loaded", hook_count)
        await self.hooks.emit("gateway:startup", {
            "platforms": [p.value for p in self.adapters.keys()],
        })
        
        if connected_count > 0:
            logger.info("Gateway running with %s platform(s)", connected_count)
        
        # Build initial channel directory for send_message name resolution
        try:
            from gateway.channel_directory import build_channel_directory
            directory = build_channel_directory(self.adapters)
            ch_count = sum(len(chs) for chs in directory.get("platforms", {}).values())
            logger.info("Channel directory built: %d target(s)", ch_count)
        except Exception as e:
            logger.warning("Channel directory build failed: %s", e)
        
        # Check if we're restarting after a /update command. If the update is
        # still running, keep watching so we notify once it actually finishes.
        notified = await self._send_update_notification()
        if not notified and any(
            path.exists()
            for path in (
                _hermes_home / ".update_pending.json",
                _hermes_home / ".update_pending.claimed.json",
            )
        ):
            self._schedule_update_notification_watch()

        # Drain any recovered process watchers (from crash recovery checkpoint)
        try:
            from tools.process_registry import process_registry
            while process_registry.pending_watchers:
                watcher = process_registry.pending_watchers.pop(0)
                asyncio.create_task(self._run_process_watcher(watcher))
                logger.info("Resumed watcher for recovered process %s", watcher.get("session_id"))
        except Exception as e:
            logger.error("Recovered watcher setup error: %s", e)

        # Start background session expiry watcher for proactive memory flushing
        asyncio.create_task(self._session_expiry_watcher())

        # Start background reconnection watcher for platforms that failed at startup
        if self._failed_platforms:
            logger.info(
                "Starting reconnection watcher for %d failed platform(s): %s",
                len(self._failed_platforms),
                ", ".join(p.value for p in self._failed_platforms),
            )
        asyncio.create_task(self._platform_reconnect_watcher())

        logger.info("Press Ctrl+C to stop")
        
        return True
    
    async def _session_expiry_watcher(self, interval: int = 300):
        """Background task that proactively flushes memories for expired sessions.
        
        Runs every `interval` seconds (default 5 min).  For each session that
        has expired according to its reset policy, flushes memories in a thread
        pool and marks the session so it won't be flushed again.

        This means memories are already saved by the time the user sends their
        next message, so there's no blocking delay.
        """
        await asyncio.sleep(60)  # initial delay — let the gateway fully start
        while self._running:
            try:
                self.session_store._ensure_loaded()
                for key, entry in list(self.session_store._entries.items()):
                    if entry.session_id in self.session_store._pre_flushed_sessions:
                        continue  # already flushed this session
                    if not self.session_store._is_session_expired(entry):
                        continue  # session still active
                    # Session has expired — flush memories in the background
                    logger.info(
                        "Session %s expired (key=%s), flushing memories proactively",
                        entry.session_id, key,
                    )
                    try:
                        await self._async_flush_memories(entry.session_id, key)
                        self._shutdown_gateway_honcho(key)
                        self.session_store._pre_flushed_sessions.add(entry.session_id)
                    except Exception as e:
                        logger.debug("Proactive memory flush failed for %s: %s", entry.session_id, e)
            except Exception as e:
                logger.debug("Session expiry watcher error: %s", e)
            # Sleep in small increments so we can stop quickly
            for _ in range(interval):
                if not self._running:
                    break
                await asyncio.sleep(1)

    async def _platform_reconnect_watcher(self) -> None:
        """Background task that periodically retries connecting failed platforms.

        Uses exponential backoff: 30s → 60s → 120s → 240s → 300s (cap).
        Stops retrying a platform after 20 failed attempts or if the error
        is non-retryable (e.g. bad auth token).
        """
        _MAX_ATTEMPTS = 20
        _BACKOFF_CAP = 300  # 5 minutes max between retries

        await asyncio.sleep(10)  # initial delay — let startup finish
        while self._running:
            if not self._failed_platforms:
                # Nothing to reconnect — sleep and check again
                for _ in range(30):
                    if not self._running:
                        return
                    await asyncio.sleep(1)
                continue

            now = time.monotonic()
            for platform in list(self._failed_platforms.keys()):
                if not self._running:
                    return
                info = self._failed_platforms[platform]
                if now < info["next_retry"]:
                    continue  # not time yet

                if info["attempts"] >= _MAX_ATTEMPTS:
                    logger.warning(
                        "Giving up reconnecting %s after %d attempts",
                        platform.value, info["attempts"],
                    )
                    del self._failed_platforms[platform]
                    continue

                platform_config = info["config"]
                attempt = info["attempts"] + 1
                logger.info(
                    "Reconnecting %s (attempt %d/%d)...",
                    platform.value, attempt, _MAX_ATTEMPTS,
                )

                try:
                    adapter = self._create_adapter(platform, platform_config)
                    if not adapter:
                        logger.warning(
                            "Reconnect %s: adapter creation returned None, removing from retry queue",
                            platform.value,
                        )
                        del self._failed_platforms[platform]
                        continue

                    adapter.set_message_handler(self._handle_message)
                    adapter.set_fatal_error_handler(self._handle_adapter_fatal_error)

                    success = await adapter.connect()
                    if success:
                        self.adapters[platform] = adapter
                        self._sync_voice_mode_state_to_adapter(adapter)
                        self.delivery_router.adapters = self.adapters
                        del self._failed_platforms[platform]
                        logger.info("✓ %s reconnected successfully", platform.value)

                        # Rebuild channel directory with the new adapter
                        try:
                            from gateway.channel_directory import build_channel_directory
                            build_channel_directory(self.adapters)
                        except Exception:
                            pass
                    else:
                        # Check if the failure is non-retryable
                        if adapter.has_fatal_error and not adapter.fatal_error_retryable:
                            logger.warning(
                                "Reconnect %s: non-retryable error (%s), removing from retry queue",
                                platform.value, adapter.fatal_error_message,
                            )
                            del self._failed_platforms[platform]
                        else:
                            backoff = min(30 * (2 ** (attempt - 1)), _BACKOFF_CAP)
                            info["attempts"] = attempt
                            info["next_retry"] = time.monotonic() + backoff
                            logger.info(
                                "Reconnect %s failed, next retry in %ds",
                                platform.value, backoff,
                            )
                except Exception as e:
                    backoff = min(30 * (2 ** (attempt - 1)), _BACKOFF_CAP)
                    info["attempts"] = attempt
                    info["next_retry"] = time.monotonic() + backoff
                    logger.warning(
                        "Reconnect %s error: %s, next retry in %ds",
                        platform.value, e, backoff,
                    )

            # Check every 10 seconds for platforms that need reconnection
            for _ in range(10):
                if not self._running:
                    return
                await asyncio.sleep(1)

    async def stop(self) -> None:
        """Stop the gateway and disconnect all adapters."""
        logger.info("Stopping gateway...")
        self._running = False

        for session_key, agent in list(self._running_agents.items()):
            if agent is _AGENT_PENDING_SENTINEL:
                continue
            try:
                agent.interrupt("Gateway shutting down")
                logger.debug("Interrupted running agent for session %s during shutdown", session_key[:20])
            except Exception as e:
                logger.debug("Failed interrupting agent during shutdown: %s", e)

        for platform, adapter in list(self.adapters.items()):
            try:
                await adapter.cancel_background_tasks()
            except Exception as e:
                logger.debug("✗ %s background-task cancel error: %s", platform.value, e)
            try:
                await adapter.disconnect()
                logger.info("✓ %s disconnected", platform.value)
            except Exception as e:
                logger.error("✗ %s disconnect error: %s", platform.value, e)

        # Cancel any pending background tasks
        for _task in list(self._background_tasks):
            _task.cancel()
        self._background_tasks.clear()

        self.adapters.clear()
        self._running_agents.clear()
        self._pending_messages.clear()
        self._pending_approvals.clear()
        self._shutdown_all_gateway_honcho()
        self._shutdown_event.set()
        
        from gateway.status import remove_pid_file, write_runtime_status
        remove_pid_file()
        try:
            write_runtime_status(gateway_state="stopped", exit_reason=self._exit_reason)
        except Exception:
            pass
        
        logger.info("Gateway stopped")
    
    async def wait_for_shutdown(self) -> None:
        """Wait for shutdown signal."""
        await self._shutdown_event.wait()
    
    def _create_adapter(
        self, 
        platform: Platform, 
        config: Any
    ) -> Optional[BasePlatformAdapter]:
        """Create the appropriate adapter for a platform."""
        if hasattr(config, "extra") and isinstance(config.extra, dict):
            config.extra.setdefault(
                "group_sessions_per_user",
                self.config.group_sessions_per_user,
            )

        if platform == Platform.TELEGRAM:
            from gateway.platforms.telegram import TelegramAdapter, check_telegram_requirements
            if not check_telegram_requirements():
                logger.warning("Telegram: python-telegram-bot not installed")
                return None
            return TelegramAdapter(config)
        
        elif platform == Platform.DISCORD:
            from gateway.platforms.discord import DiscordAdapter, check_discord_requirements
            if not check_discord_requirements():
                logger.warning("Discord: discord.py not installed")
                return None
            return DiscordAdapter(config)
        
        elif platform == Platform.WHATSAPP:
            from gateway.platforms.whatsapp import WhatsAppAdapter, check_whatsapp_requirements
            if not check_whatsapp_requirements():
                logger.warning("WhatsApp: Node.js not installed or bridge not configured")
                return None
            return WhatsAppAdapter(config)
        
        elif platform == Platform.SLACK:
            from gateway.platforms.slack import SlackAdapter, check_slack_requirements
            if not check_slack_requirements():
                logger.warning("Slack: slack-bolt not installed. Run: pip install 'hermes-agent[slack]'")
                return None
            return SlackAdapter(config)

        elif platform == Platform.SIGNAL:
            from gateway.platforms.signal import SignalAdapter, check_signal_requirements
            if not check_signal_requirements():
                logger.warning("Signal: SIGNAL_HTTP_URL or SIGNAL_ACCOUNT not configured")
                return None
            return SignalAdapter(config)

        elif platform == Platform.HOMEASSISTANT:
            from gateway.platforms.homeassistant import HomeAssistantAdapter, check_ha_requirements
            if not check_ha_requirements():
                logger.warning("HomeAssistant: aiohttp not installed or HASS_TOKEN not set")
                return None
            return HomeAssistantAdapter(config)

        elif platform == Platform.EMAIL:
            from gateway.platforms.email import EmailAdapter, check_email_requirements
            if not check_email_requirements():
                logger.warning("Email: EMAIL_ADDRESS, EMAIL_PASSWORD, EMAIL_IMAP_HOST, or EMAIL_SMTP_HOST not set")
                return None
            return EmailAdapter(config)

        elif platform == Platform.SMS:
            from gateway.platforms.sms import SmsAdapter, check_sms_requirements
            if not check_sms_requirements():
                logger.warning("SMS: aiohttp not installed or TWILIO_ACCOUNT_SID/TWILIO_AUTH_TOKEN not set")
                return None
            return SmsAdapter(config)

        elif platform == Platform.DINGTALK:
            from gateway.platforms.dingtalk import DingTalkAdapter, check_dingtalk_requirements
            if not check_dingtalk_requirements():
                logger.warning("DingTalk: dingtalk-stream not installed or DINGTALK_CLIENT_ID/SECRET not set")
                return None
            return DingTalkAdapter(config)

        elif platform == Platform.MATTERMOST:
            from gateway.platforms.mattermost import MattermostAdapter, check_mattermost_requirements
            if not check_mattermost_requirements():
                logger.warning("Mattermost: MATTERMOST_TOKEN or MATTERMOST_URL not set, or aiohttp missing")
                return None
            return MattermostAdapter(config)

        elif platform == Platform.MATRIX:
            from gateway.platforms.matrix import MatrixAdapter, check_matrix_requirements
            if not check_matrix_requirements():
                logger.warning("Matrix: matrix-nio not installed or credentials not set. Run: pip install 'matrix-nio[e2e]'")
                return None
            return MatrixAdapter(config)

        elif platform == Platform.API_SERVER:
            from gateway.platforms.api_server import APIServerAdapter, check_api_server_requirements
            if not check_api_server_requirements():
                logger.warning("API Server: aiohttp not installed")
                return None
            return APIServerAdapter(config)

        elif platform == Platform.WEBHOOK:
            from gateway.platforms.webhook import WebhookAdapter, check_webhook_requirements
            if not check_webhook_requirements():
                logger.warning("Webhook: aiohttp not installed")
                return None
            adapter = WebhookAdapter(config)
            adapter.gateway_runner = self  # For cross-platform delivery
            return adapter

        return None
    
    def _is_user_authorized(self, source: SessionSource) -> bool:
        """
        Check if a user is authorized to use the bot.
        
        Checks in order:
        1. Per-platform allow-all flag (e.g., DISCORD_ALLOW_ALL_USERS=true)
        2. Environment variable allowlists (TELEGRAM_ALLOWED_USERS, etc.)
        3. DM pairing approved list
        4. Global allow-all (GATEWAY_ALLOW_ALL_USERS=true)
        5. Default: deny
        """
        # Home Assistant events are system-generated (state changes), not
        # user-initiated messages.  The HASS_TOKEN already authenticates the
        # connection, so HA events are always authorized.
        # Webhook events are authenticated via HMAC signature validation in
        # the adapter itself — no user allowlist applies.
        if source.platform in (Platform.HOMEASSISTANT, Platform.WEBHOOK):
            return True

        user_id = source.user_id
        if not user_id:
            return False

        platform_env_map = {
            Platform.TELEGRAM: "TELEGRAM_ALLOWED_USERS",
            Platform.DISCORD: "DISCORD_ALLOWED_USERS",
            Platform.WHATSAPP: "WHATSAPP_ALLOWED_USERS",
            Platform.SLACK: "SLACK_ALLOWED_USERS",
            Platform.SIGNAL: "SIGNAL_ALLOWED_USERS",
            Platform.EMAIL: "EMAIL_ALLOWED_USERS",
            Platform.SMS: "SMS_ALLOWED_USERS",
            Platform.MATTERMOST: "MATTERMOST_ALLOWED_USERS",
            Platform.MATRIX: "MATRIX_ALLOWED_USERS",
            Platform.DINGTALK: "DINGTALK_ALLOWED_USERS",
        }
        platform_allow_all_map = {
            Platform.TELEGRAM: "TELEGRAM_ALLOW_ALL_USERS",
            Platform.DISCORD: "DISCORD_ALLOW_ALL_USERS",
            Platform.WHATSAPP: "WHATSAPP_ALLOW_ALL_USERS",
            Platform.SLACK: "SLACK_ALLOW_ALL_USERS",
            Platform.SIGNAL: "SIGNAL_ALLOW_ALL_USERS",
            Platform.EMAIL: "EMAIL_ALLOW_ALL_USERS",
            Platform.SMS: "SMS_ALLOW_ALL_USERS",
            Platform.MATTERMOST: "MATTERMOST_ALLOW_ALL_USERS",
            Platform.MATRIX: "MATRIX_ALLOW_ALL_USERS",
            Platform.DINGTALK: "DINGTALK_ALLOW_ALL_USERS",
        }

        # Per-platform allow-all flag (e.g., DISCORD_ALLOW_ALL_USERS=true)
        platform_allow_all_var = platform_allow_all_map.get(source.platform, "")
        if platform_allow_all_var and os.getenv(platform_allow_all_var, "").lower() in ("true", "1", "yes"):
            return True

        # Check pairing store (always checked, regardless of allowlists)
        platform_name = source.platform.value if source.platform else ""
        if self.pairing_store.is_approved(platform_name, user_id):
            return True

        # Check platform-specific and global allowlists
        platform_allowlist = os.getenv(platform_env_map.get(source.platform, ""), "").strip()
        global_allowlist = os.getenv("GATEWAY_ALLOWED_USERS", "").strip()

        if not platform_allowlist and not global_allowlist:
            # No allowlists configured -- check global allow-all flag
            return os.getenv("GATEWAY_ALLOW_ALL_USERS", "").lower() in ("true", "1", "yes")

        # Check if user is in any allowlist
        allowed_ids = set()
        if platform_allowlist:
            allowed_ids.update(uid.strip() for uid in platform_allowlist.split(",") if uid.strip())
        if global_allowlist:
            allowed_ids.update(uid.strip() for uid in global_allowlist.split(",") if uid.strip())

        # WhatsApp JIDs have @s.whatsapp.net suffix — strip it for comparison
        check_ids = {user_id}
        if "@" in user_id:
            check_ids.add(user_id.split("@")[0])
        return bool(check_ids & allowed_ids)

    def _get_unauthorized_dm_behavior(self, platform: Optional[Platform]) -> str:
        """Return how unauthorized DMs should be handled for a platform."""
        config = getattr(self, "config", None)
        if config and hasattr(config, "get_unauthorized_dm_behavior"):
            return config.get_unauthorized_dm_behavior(platform)
        return "pair"
    
    async def _handle_message(self, event: MessageEvent) -> Optional[str]:
        """
        Handle an incoming message from any platform.
        
        This is the core message processing pipeline:
        1. Check user authorization
        2. Check for commands (/new, /reset, etc.)
        3. Check for running agent and interrupt if needed
        4. Get or create session
        5. Build context for agent
        6. Run agent conversation
        7. Return response
        """
        source = event.source

        # Check if user is authorized
        if not self._is_user_authorized(source):
            logger.warning("Unauthorized user: %s (%s) on %s", source.user_id, source.user_name, source.platform.value)
            # In DMs: offer pairing code. In groups: silently ignore.
            if source.chat_type == "dm" and self._get_unauthorized_dm_behavior(source.platform) == "pair":
                platform_name = source.platform.value if source.platform else "unknown"
                code = self.pairing_store.generate_code(
                    platform_name, source.user_id, source.user_name or ""
                )
                if code:
                    adapter = self.adapters.get(source.platform)
                    if adapter:
                        await adapter.send(
                            source.chat_id,
                            f"Hi~ I don't recognize you yet!\n\n"
                            f"Here's your pairing code: `{code}`\n\n"
                            f"Ask the bot owner to run:\n"
                            f"`hermes pairing approve {platform_name} {code}`"
                        )
                else:
                    adapter = self.adapters.get(source.platform)
                    if adapter:
                        await adapter.send(
                            source.chat_id,
                            "Too many pairing requests right now~ "
                            "Please try again later!"
                        )
            return None
        
        # PRIORITY handling when an agent is already running for this session.
        # Default behavior is to interrupt immediately so user text/stop messages
        # are handled with minimal latency.
        #
        # Special case: Telegram/photo bursts often arrive as multiple near-
        # simultaneous updates. Do NOT interrupt for photo-only follow-ups here;
        # let the adapter-level batching/queueing logic absorb them.
        _quick_key = self._session_key_for_source(source)
        if _quick_key in self._running_agents:
            if event.get_command() == "status":
                return await self._handle_status_command(event)

            # Resolve the command once for all early-intercept checks below.
            from hermes_cli.commands import resolve_command as _resolve_cmd_inner
            _evt_cmd = event.get_command()
            _cmd_def_inner = _resolve_cmd_inner(_evt_cmd) if _evt_cmd else None

            # /stop must hard-kill the session when an agent is running.
            # A soft interrupt (agent.interrupt()) doesn't help when the agent
            # is truly hung — the executor thread is blocked and never checks
            # _interrupt_requested.  Force-clean _running_agents so the session
            # is unlocked and subsequent messages are processed normally.
            if _cmd_def_inner and _cmd_def_inner.name == "stop":
                running_agent = self._running_agents.get(_quick_key)
                if running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
                    running_agent.interrupt("Stop requested")
                # Force-clean: remove the session lock regardless of agent state
                adapter = self.adapters.get(source.platform)
                if adapter and hasattr(adapter, 'get_pending_message'):
                    adapter.get_pending_message(_quick_key)  # consume and discard
                self._pending_messages.pop(_quick_key, None)
                if _quick_key in self._running_agents:
                    del self._running_agents[_quick_key]
                logger.info("HARD STOP for session %s — session lock released", _quick_key[:20])
                return "⚡ Force-stopped. The session is unlocked — you can send a new message."

            # /reset and /new must bypass the running-agent guard so they
            # actually dispatch as commands instead of being queued as user
            # text (which would be fed back to the agent with the same
            # broken history — #2170).  Interrupt the agent first, then
            # clear the adapter's pending queue so the stale "/reset" text
            # doesn't get re-processed as a user message after the
            # interrupt completes.
            if _cmd_def_inner and _cmd_def_inner.name == "new":
                running_agent = self._running_agents.get(_quick_key)
                if running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
                    running_agent.interrupt("Session reset requested")
                # Clear any pending messages so the old text doesn't replay
                adapter = self.adapters.get(source.platform)
                if adapter and hasattr(adapter, 'get_pending_message'):
                    adapter.get_pending_message(_quick_key)  # consume and discard
                self._pending_messages.pop(_quick_key, None)
                # Clean up the running agent entry so the reset handler
                # doesn't think an agent is still active.
                if _quick_key in self._running_agents:
                    del self._running_agents[_quick_key]
                return await self._handle_reset_command(event)

            # /queue <prompt> — queue without interrupting
            if event.get_command() in ("queue", "q"):
                queued_text = event.get_command_args().strip()
                if not queued_text:
                    return "Usage: /queue <prompt>"
                adapter = self.adapters.get(source.platform)
                if adapter:
                    from gateway.platforms.base import MessageEvent as _ME, MessageType as _MT
                    queued_event = _ME(
                        text=queued_text,
                        message_type=_MT.TEXT,
                        source=event.source,
                        message_id=event.message_id,
                    )
                    adapter._pending_messages[_quick_key] = queued_event
                return "Queued for the next turn."

            if event.message_type == MessageType.PHOTO:
                logger.debug("PRIORITY photo follow-up for session %s — queueing without interrupt", _quick_key[:20])
                adapter = self.adapters.get(source.platform)
                if adapter:
                    # Reuse adapter queue semantics so photo bursts merge cleanly.
                    if _quick_key in adapter._pending_messages:
                        existing = adapter._pending_messages[_quick_key]
                        if getattr(existing, "message_type", None) == MessageType.PHOTO:
                            existing.media_urls.extend(event.media_urls)
                            existing.media_types.extend(event.media_types)
                            if event.text:
                                if not existing.text:
                                    existing.text = event.text
                                elif event.text not in existing.text:
                                    existing.text = f"{existing.text}\n\n{event.text}".strip()
                        else:
                            adapter._pending_messages[_quick_key] = event
                    else:
                        adapter._pending_messages[_quick_key] = event
                return None

            running_agent = self._running_agents.get(_quick_key)
            if running_agent is _AGENT_PENDING_SENTINEL:
                # Agent is being set up but not ready yet.
                if event.get_command() == "stop":
                    # Force-clean the sentinel so the session is unlocked.
                    if _quick_key in self._running_agents:
                        del self._running_agents[_quick_key]
                    logger.info("HARD STOP (pending) for session %s — sentinel cleared", _quick_key[:20])
                    return "⚡ Force-stopped. The agent was still starting — session unlocked."
                # Queue the message so it will be picked up after the
                # agent starts.
                adapter = self.adapters.get(source.platform)
                if adapter:
                    adapter._pending_messages[_quick_key] = event
                return None
            logger.debug("PRIORITY interrupt for session %s", _quick_key[:20])
            running_agent.interrupt(event.text)
            if _quick_key in self._pending_messages:
                self._pending_messages[_quick_key] += "\n" + event.text
            else:
                self._pending_messages[_quick_key] = event.text
            return None

        # Check for commands
        command = event.get_command()
        
        # Emit command:* hook for any recognized slash command.
        # GATEWAY_KNOWN_COMMANDS is derived from the central COMMAND_REGISTRY
        # in hermes_cli/commands.py — no hardcoded set to maintain here.
        from hermes_cli.commands import GATEWAY_KNOWN_COMMANDS, resolve_command as _resolve_cmd
        if command and command in GATEWAY_KNOWN_COMMANDS:
            await self.hooks.emit(f"command:{command}", {
                "platform": source.platform.value if source.platform else "",
                "user_id": source.user_id,
                "command": command,
                "args": event.get_command_args().strip(),
            })

        # Resolve aliases to canonical name so dispatch only checks canonicals.
        _cmd_def = _resolve_cmd(command) if command else None
        canonical = _cmd_def.name if _cmd_def else command

        if canonical == "new":
            return await self._handle_reset_command(event)
        
        if canonical == "help":
            return await self._handle_help_command(event)
        
        if canonical == "status":
            return await self._handle_status_command(event)
        
        if canonical == "stop":
            return await self._handle_stop_command(event)
        
        if canonical == "reasoning":
            return await self._handle_reasoning_command(event)

        if canonical == "verbose":
            return await self._handle_verbose_command(event)

        if canonical == "provider":
            return await self._handle_provider_command(event)
        
        if canonical == "personality":
            return await self._handle_personality_command(event)

        if canonical == "plan":
            try:
                from agent.skill_commands import build_plan_path, build_skill_invocation_message

                user_instruction = event.get_command_args().strip()
                plan_path = build_plan_path(user_instruction)
                event.text = build_skill_invocation_message(
                    "/plan",
                    user_instruction,
                    task_id=_quick_key,
                    runtime_note=(
                        "Save the markdown plan with write_file to this exact relative path "
                        f"inside the active workspace/backend cwd: {plan_path}"
                    ),
                )
                if not event.text:
                    return "Failed to load the bundled /plan skill."
                canonical = None
            except Exception as e:
                logger.exception("Failed to prepare /plan command")
                return f"Failed to enter plan mode: {e}"
        
        if canonical == "retry":
            return await self._handle_retry_command(event)
        
        if canonical == "undo":
            return await self._handle_undo_command(event)
        
        if canonical == "sethome":
            return await self._handle_set_home_command(event)

        if canonical == "compress":
            return await self._handle_compress_command(event)

        if canonical == "usage":
            return await self._handle_usage_command(event)

        if canonical == "insights":
            return await self._handle_insights_command(event)

        if canonical == "reload-mcp":
            return await self._handle_reload_mcp_command(event)

        if canonical == "approve":
            return await self._handle_approve_command(event)

        if canonical == "deny":
            return await self._handle_deny_command(event)

        if canonical == "update":
            return await self._handle_update_command(event)

        if canonical == "title":
            return await self._handle_title_command(event)

        if canonical == "resume":
            return await self._handle_resume_command(event)

        if canonical == "rollback":
            return await self._handle_rollback_command(event)

        if canonical == "background":
            return await self._handle_background_command(event)

        if canonical == "voice":
            return await self._handle_voice_command(event)

        # User-defined quick commands (bypass agent loop, no LLM call)
        if command:
            if isinstance(self.config, dict):
                quick_commands = self.config.get("quick_commands", {}) or {}
            else:
                quick_commands = getattr(self.config, "quick_commands", {}) or {}
            if not isinstance(quick_commands, dict):
                quick_commands = {}
            if command in quick_commands:
                qcmd = quick_commands[command]
                if qcmd.get("type") == "exec":
                    exec_cmd = qcmd.get("command", "")
                    if exec_cmd:
                        try:
                            proc = await asyncio.create_subprocess_shell(
                                exec_cmd,
                                stdout=asyncio.subprocess.PIPE,
                                stderr=asyncio.subprocess.PIPE,
                            )
                            stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=30)
                            output = (stdout or stderr).decode().strip()
                            return output if output else "Command returned no output."
                        except asyncio.TimeoutError:
                            return "Quick command timed out (30s)."
                        except Exception as e:
                            return f"Quick command error: {e}"
                    else:
                        return f"Quick command '/{command}' has no command defined."
                elif qcmd.get("type") == "alias":
                    target = qcmd.get("target", "").strip()
                    if target:
                        target = target if target.startswith("/") else f"/{target}"
                        target_command = target.lstrip("/")
                        user_args = event.get_command_args().strip()
                        event.text = f"{target} {user_args}".strip()
                        command = target_command
                        # Fall through to normal command dispatch below
                    else:
                        return f"Quick command '/{command}' has no target defined."
                else:
                    return f"Quick command '/{command}' has unsupported type (supported: 'exec', 'alias')."

        # Plugin-registered slash commands
        if command:
            try:
                from hermes_cli.plugins import get_plugin_command_handler
                plugin_handler = get_plugin_command_handler(command)
                if plugin_handler:
                    user_args = event.get_command_args().strip()
                    import asyncio as _aio
                    result = plugin_handler(user_args)
                    if _aio.iscoroutine(result):
                        result = await result
                    return str(result) if result else None
            except Exception as e:
                logger.debug("Plugin command dispatch failed (non-fatal): %s", e)

        # Skill slash commands: /skill-name loads the skill and sends to agent
        if command:
            try:
                from agent.skill_commands import get_skill_commands, build_skill_invocation_message
                skill_cmds = get_skill_commands()
                cmd_key = f"/{command}"
                if cmd_key in skill_cmds:
                    user_instruction = event.get_command_args().strip()
                    msg = build_skill_invocation_message(
                        cmd_key, user_instruction, task_id=_quick_key
                    )
                    if msg:
                        event.text = msg
                        # Fall through to normal message processing with skill content
            except Exception as e:
                logger.debug("Skill command check failed (non-fatal): %s", e)
        
        # Pending exec approvals are handled by /approve and /deny commands above.
        # No bare text matching — "yes" in normal conversation must not trigger
        # execution of a dangerous command.

        # ── Claim this session before any await ───────────────────────
        # Between here and _run_agent registering the real AIAgent, there
        # are numerous await points (hooks, vision enrichment, STT,
        # session hygiene compression).  Without this sentinel a second
        # message arriving during any of those yields would pass the
        # "already running" guard and spin up a duplicate agent for the
        # same session — corrupting the transcript.
        self._running_agents[_quick_key] = _AGENT_PENDING_SENTINEL

        try:
            return await self._handle_message_with_agent(event, source, _quick_key)
        finally:
            # If _run_agent replaced the sentinel with a real agent and
            # then cleaned it up, this is a no-op.  If we exited early
            # (exception, command fallthrough, etc.) the sentinel must
            # not linger or the session would be permanently locked out.
            if self._running_agents.get(_quick_key) is _AGENT_PENDING_SENTINEL:
                del self._running_agents[_quick_key]

    async def _handle_message_with_agent(self, event, source, _quick_key: str):
        """Inner handler that runs under the _running_agents sentinel guard."""

        # Get or create session
        session_entry = self.session_store.get_or_create_session(source)
        session_key = session_entry.session_key
        
        # Emit session:start for new or auto-reset sessions
        _is_new_session = (
            session_entry.created_at == session_entry.updated_at
            or getattr(session_entry, "was_auto_reset", False)
        )
        if _is_new_session:
            await self.hooks.emit("session:start", {
                "platform": source.platform.value if source.platform else "",
                "user_id": source.user_id,
                "session_id": session_entry.session_id,
                "session_key": session_key,
            })
        
        # Build session context
        context = build_session_context(source, self.config, session_entry)
        
        # Set environment variables for tools
        self._set_session_env(context)
        
        # Read privacy.redact_pii from config (re-read per message)
        _redact_pii = False
        try:
            import yaml as _pii_yaml
            with open(_config_path, encoding="utf-8") as _pf:
                _pcfg = _pii_yaml.safe_load(_pf) or {}
            _redact_pii = bool((_pcfg.get("privacy") or {}).get("redact_pii", False))
        except Exception:
            pass

        # Build the context prompt to inject
        context_prompt = build_session_context_prompt(context, redact_pii=_redact_pii)
        
        # If the previous session expired and was auto-reset, prepend a notice
        # so the agent knows this is a fresh conversation (not an intentional /reset).
        if getattr(session_entry, 'was_auto_reset', False):
            reset_reason = getattr(session_entry, 'auto_reset_reason', None) or 'idle'
            if reset_reason == "daily":
                context_note = "[System note: The user's session was automatically reset by the daily schedule. This is a fresh conversation with no prior context.]"
            else:
                context_note = "[System note: The user's previous session expired due to inactivity. This is a fresh conversation with no prior context.]"
            context_prompt = context_note + "\n\n" + context_prompt

            # Send a user-facing notification explaining the reset, unless:
            # - notifications are disabled in config
            # - the platform is excluded (e.g. api_server, webhook)
            # - the expired session had no activity (nothing was cleared)
            try:
                policy = self.session_store.config.get_reset_policy(
                    platform=source.platform,
                    session_type=getattr(source, 'chat_type', 'dm'),
                )
                platform_name = source.platform.value if source.platform else ""
                had_activity = getattr(session_entry, 'reset_had_activity', False)
                should_notify = (
                    policy.notify
                    and had_activity
                    and platform_name not in policy.notify_exclude_platforms
                )
                if should_notify:
                    adapter = self.adapters.get(source.platform)
                    if adapter:
                        if reset_reason == "daily":
                            reason_text = f"daily schedule at {policy.at_hour}:00"
                        else:
                            hours = policy.idle_minutes // 60
                            mins = policy.idle_minutes % 60
                            duration = f"{hours}h" if not mins else f"{hours}h {mins}m" if hours else f"{mins}m"
                            reason_text = f"inactive for {duration}"
                        notice = (
                            f"◐ Session automatically reset ({reason_text}). "
                            f"Conversation history cleared.\n"
                            f"Use /resume to browse and restore a previous session.\n"
                            f"Adjust reset timing in config.yaml under session_reset."
                        )
                        try:
                            session_info = self._format_session_info()
                            if session_info:
                                notice = f"{notice}\n\n{session_info}"
                        except Exception:
                            pass
                        await adapter.send(
                            source.chat_id, notice,
                            metadata=getattr(event, 'metadata', None),
                        )
            except Exception as e:
                logger.debug("Auto-reset notification failed (non-fatal): %s", e)

            session_entry.was_auto_reset = False
            session_entry.auto_reset_reason = None

        # Auto-load skill for DM topic bindings (e.g., Telegram Private Chat Topics)
        # Only inject on NEW sessions — for ongoing conversations the skill content
        # is already in the conversation history from the first message.
        if _is_new_session and getattr(event, "auto_skill", None):
            try:
                from agent.skill_commands import _load_skill_payload, _build_skill_message
                _skill_name = event.auto_skill
                _loaded = _load_skill_payload(_skill_name, task_id=_quick_key)
                if _loaded:
                    _loaded_skill, _skill_dir, _display_name = _loaded
                    _activation_note = (
                        f'[SYSTEM: This conversation is in a topic with the "{_display_name}" skill '
                        f"auto-loaded. Follow its instructions for the duration of this session.]"
                    )
                    _skill_msg = _build_skill_message(
                        _loaded_skill, _skill_dir, _activation_note,
                        user_instruction=event.text,
                    )
                    if _skill_msg:
                        event.text = _skill_msg
                        logger.info(
                            "[Gateway] Auto-loaded skill '%s' for DM topic session %s",
                            _skill_name, session_key,
                        )
                else:
                    logger.warning(
                        "[Gateway] DM topic skill '%s' not found in available skills",
                        _skill_name,
                    )
            except Exception as e:
                logger.warning("[Gateway] Failed to auto-load topic skill '%s': %s", event.auto_skill, e)

        # Load conversation history from transcript
        history = self.session_store.load_transcript(session_entry.session_id)
        
        # -----------------------------------------------------------------
        # Session hygiene: auto-compress pathologically large transcripts
        #
        # Long-lived gateway sessions can accumulate enough history that
        # every new message rehydrates an oversized transcript, causing
        # repeated truncation/context failures.  Detect this early and
        # compress proactively — before the agent even starts.  (#628)
        #
        # Token source priority:
        # 1. Actual API-reported prompt_tokens from the last turn
        #    (stored in session_entry.last_prompt_tokens)
        # 2. Rough char-based estimate (str(msg)//4). Overestimates
        #    by 30-50% on code/JSON-heavy sessions, but that just
        #    means hygiene fires a bit early — safe and harmless.
        # -----------------------------------------------------------------
        if history and len(history) >= 4:
            from agent.model_metadata import (
                estimate_messages_tokens_rough,
                get_model_context_length,
            )

            # Read model + compression config from config.yaml.
            # NOTE: hygiene threshold is intentionally HIGHER than the agent's
            # own compressor (0.85 vs 0.50).  Hygiene is a safety net for
            # sessions that grew too large between turns — it fires pre-agent
            # to prevent API failures.  The agent's own compressor handles
            # normal context management during its tool loop with accurate
            # real token counts.  Having hygiene at 0.50 caused premature
            # compression on every turn in long gateway sessions.
            _hyg_model = "anthropic/claude-sonnet-4.6"
            _hyg_threshold_pct = 0.85
            _hyg_compression_enabled = True
            _hyg_config_context_length = None
            _hyg_provider = None
            _hyg_base_url = None
            _hyg_api_key = None
            try:
                _hyg_cfg_path = _hermes_home / "config.yaml"
                if _hyg_cfg_path.exists():
                    import yaml as _hyg_yaml
                    with open(_hyg_cfg_path, encoding="utf-8") as _hyg_f:
                        _hyg_data = _hyg_yaml.safe_load(_hyg_f) or {}

                    # Resolve model name (same logic as run_sync)
                    _model_cfg = _hyg_data.get("model", {})
                    if isinstance(_model_cfg, str):
                        _hyg_model = _model_cfg
                    elif isinstance(_model_cfg, dict):
                        _hyg_model = _model_cfg.get("default", _hyg_model)
                        # Read explicit context_length override from model config
                        # (same as run_agent.py lines 995-1005)
                        _raw_ctx = _model_cfg.get("context_length")
                        if _raw_ctx is not None:
                            try:
                                _hyg_config_context_length = int(_raw_ctx)
                            except (TypeError, ValueError):
                                pass
                        # Read provider for accurate context detection
                        _hyg_provider = _model_cfg.get("provider") or None
                        _hyg_base_url = _model_cfg.get("base_url") or None

                    # Read compression settings — only use enabled flag.
                    # The threshold is intentionally separate from the agent's
                    # compression.threshold (hygiene runs higher).
                    _comp_cfg = _hyg_data.get("compression", {})
                    if isinstance(_comp_cfg, dict):
                        _hyg_compression_enabled = str(
                            _comp_cfg.get("enabled", True)
                        ).lower() in ("true", "1", "yes")

                # Resolve provider/base_url from runtime if not in config
                if not _hyg_provider or not _hyg_base_url:
                    try:
                        _hyg_runtime = _resolve_runtime_agent_kwargs()
                        _hyg_provider = _hyg_provider or _hyg_runtime.get("provider")
                        _hyg_base_url = _hyg_base_url or _hyg_runtime.get("base_url")
                        _hyg_api_key = _hyg_runtime.get("api_key")
                    except Exception:
                        pass
            except Exception:
                pass

            if _hyg_compression_enabled:
                _hyg_context_length = get_model_context_length(
                    _hyg_model,
                    base_url=_hyg_base_url or "",
                    api_key=_hyg_api_key or "",
                    config_context_length=_hyg_config_context_length,
                    provider=_hyg_provider or "",
                )
                _compress_token_threshold = int(
                    _hyg_context_length * _hyg_threshold_pct
                )
                _warn_token_threshold = int(_hyg_context_length * 0.95)

                _msg_count = len(history)

                # Prefer actual API-reported tokens from the last turn
                # (stored in session entry) over the rough char-based estimate.
                _stored_tokens = session_entry.last_prompt_tokens
                if _stored_tokens > 0:
                    _approx_tokens = _stored_tokens
                    _token_source = "actual"
                else:
                    _approx_tokens = estimate_messages_tokens_rough(history)
                    _token_source = "estimated"
                    # Note: rough estimates overestimate by 30-50% for code/JSON-heavy
                    # sessions, but that just means hygiene fires a bit early — which
                    # is safe and harmless.  The 85% threshold already provides ample
                    # headroom (agent's own compressor runs at 50%).  A previous 1.4x
                    # multiplier tried to compensate by inflating the threshold, but
                    # 85% * 1.4 = 119% of context — which exceeds the model's limit
                    # and prevented hygiene from ever firing for ~200K models (GLM-5).

                _needs_compress = _approx_tokens >= _compress_token_threshold

                if _needs_compress:
                    logger.info(
                        "Session hygiene: %s messages, ~%s tokens (%s) — auto-compressing "
                        "(threshold: %s%% of %s = %s tokens)",
                        _msg_count, f"{_approx_tokens:,}", _token_source,
                        int(_hyg_threshold_pct * 100),
                        f"{_hyg_context_length:,}",
                        f"{_compress_token_threshold:,}",
                    )

                    _hyg_adapter = self.adapters.get(source.platform)
                    _hyg_meta = {"thread_id": source.thread_id} if source.thread_id else None
                    if _hyg_adapter:
                        try:
                            await _hyg_adapter.send(
                                source.chat_id,
                                f"🗜️ Session is large ({_msg_count} messages, "
                                f"~{_approx_tokens:,} tokens). Auto-compressing...",
                                metadata=_hyg_meta,
                            )
                        except Exception:
                            pass

                    try:
                        from run_agent import AIAgent

                        _hyg_runtime = _resolve_runtime_agent_kwargs()
                        if _hyg_runtime.get("api_key"):
                            _hyg_msgs = [
                                {"role": m.get("role"), "content": m.get("content")}
                                for m in history
                                if m.get("role") in ("user", "assistant")
                                and m.get("content")
                            ]

                            if len(_hyg_msgs) >= 4:
                                _hyg_agent = AIAgent(
                                    **_hyg_runtime,
                                    model=_hyg_model,
                                    max_iterations=4,
                                    quiet_mode=True,
                                    enabled_toolsets=["memory"],
                                    session_id=session_entry.session_id,
                                )
                                _hyg_agent._print_fn = lambda *a, **kw: None

                                loop = asyncio.get_event_loop()
                                _compressed, _ = await loop.run_in_executor(
                                    None,
                                    lambda: _hyg_agent._compress_context(
                                        _hyg_msgs, "",
                                        approx_tokens=_approx_tokens,
                                    ),
                                )

                                # _compress_context ends the old session and creates
                                # a new session_id.  Write compressed messages into
                                # the NEW session so the old transcript stays intact
                                # and searchable via session_search.
                                _hyg_new_sid = _hyg_agent.session_id
                                if _hyg_new_sid != session_entry.session_id:
                                    session_entry.session_id = _hyg_new_sid
                                    self.session_store._save()

                                self.session_store.rewrite_transcript(
                                    session_entry.session_id, _compressed
                                )
                                # Reset stored token count — transcript was rewritten
                                session_entry.last_prompt_tokens = 0
                                history = _compressed
                                _new_count = len(_compressed)
                                _new_tokens = estimate_messages_tokens_rough(
                                    _compressed
                                )

                                logger.info(
                                    "Session hygiene: compressed %s → %s msgs, "
                                    "~%s → ~%s tokens",
                                    _msg_count, _new_count,
                                    f"{_approx_tokens:,}", f"{_new_tokens:,}",
                                )

                                if _hyg_adapter:
                                    try:
                                        await _hyg_adapter.send(
                                            source.chat_id,
                                            f"🗜️ Compressed: {_msg_count} → "
                                            f"{_new_count} messages, "
                                            f"~{_approx_tokens:,} → "
                                            f"~{_new_tokens:,} tokens",
                                            metadata=_hyg_meta,
                                        )
                                    except Exception:
                                        pass

                                # Still too large after compression — warn user
                                if _new_tokens >= _warn_token_threshold:
                                    logger.warning(
                                        "Session hygiene: still ~%s tokens after "
                                        "compression — suggesting /reset",
                                        f"{_new_tokens:,}",
                                    )
                                    if _hyg_adapter:
                                        try:
                                            await _hyg_adapter.send(
                                                source.chat_id,
                                                "⚠️ Session is still very large "
                                                "after compression "
                                                f"(~{_new_tokens:,} tokens). "
                                                "Consider using /reset to start "
                                                "fresh if you experience issues.",
                                                metadata=_hyg_meta,
                                            )
                                        except Exception:
                                            pass

                    except Exception as e:
                        logger.warning(
                            "Session hygiene auto-compress failed: %s", e
                        )
                        # Compression failed and session is dangerously large
                        if _approx_tokens >= _warn_token_threshold:
                            _hyg_adapter = self.adapters.get(source.platform)
                            _hyg_meta = {"thread_id": source.thread_id} if source.thread_id else None
                            if _hyg_adapter:
                                try:
                                    await _hyg_adapter.send(
                                        source.chat_id,
                                        f"⚠️ Session is very large "
                                        f"({_msg_count} messages, "
                                        f"~{_approx_tokens:,} tokens) and "
                                        "auto-compression failed. Consider "
                                        "using /compress or /reset to avoid "
                                        "issues.",
                                        metadata=_hyg_meta,
                                    )
                                except Exception:
                                    pass

        # First-message onboarding -- only on the very first interaction ever
        if not history and not self.session_store.has_any_sessions():
            context_prompt += (
                "\n\n[System note: This is the user's very first message ever. "
                "Briefly introduce yourself and mention that /help shows available commands. "
                "Keep the introduction concise -- one or two sentences max.]"
            )
        
        # One-time prompt if no home channel is set for this platform
        if not history and source.platform and source.platform != Platform.LOCAL:
            platform_name = source.platform.value
            env_key = f"{platform_name.upper()}_HOME_CHANNEL"
            if not os.getenv(env_key):
                adapter = self.adapters.get(source.platform)
                if adapter:
                    await adapter.send(
                        source.chat_id,
                        f"📬 No home channel is set for {platform_name.title()}. "
                        f"A home channel is where Hermes delivers cron job results "
                        f"and cross-platform messages.\n\n"
                        f"Type /sethome to make this chat your home channel, "
                        f"or ignore to skip."
                    )
        
        # -----------------------------------------------------------------
        # Voice channel awareness — inject current voice channel state
        # into context so the agent knows who is in the channel and who
        # is speaking, without needing a separate tool call.
        # -----------------------------------------------------------------
        if source.platform == Platform.DISCORD:
            adapter = self.adapters.get(Platform.DISCORD)
            guild_id = self._get_guild_id(event)
            if guild_id and adapter and hasattr(adapter, "get_voice_channel_context"):
                vc_context = adapter.get_voice_channel_context(guild_id)
                if vc_context:
                    context_prompt += f"\n\n{vc_context}"

        # -----------------------------------------------------------------
        # Auto-analyze images sent by the user
        #
        # If the user attached image(s), we run the vision tool eagerly so
        # the conversation model always receives a text description.  The
        # local file path is also included so the model can re-examine the
        # image later with a more targeted question via vision_analyze.
        #
        # We filter to image paths only (by media_type) so that non-image
        # attachments (documents, audio, etc.) are not sent to the vision
        # tool even when they appear in the same message.
        # -----------------------------------------------------------------
        message_text = event.text or ""
        if event.media_urls:
            image_paths = []
            for i, path in enumerate(event.media_urls):
                # Check media_types if available; otherwise infer from message type
                mtype = event.media_types[i] if i < len(event.media_types) else ""
                is_image = (
                    mtype.startswith("image/")
                    or event.message_type == MessageType.PHOTO
                )
                if is_image:
                    image_paths.append(path)
            if image_paths:
                message_text = await self._enrich_message_with_vision(
                    message_text, image_paths
                )
        
        # -----------------------------------------------------------------
        # Auto-transcribe voice/audio messages sent by the user
        # -----------------------------------------------------------------
        if event.media_urls:
            audio_paths = []
            for i, path in enumerate(event.media_urls):
                mtype = event.media_types[i] if i < len(event.media_types) else ""
                is_audio = (
                    mtype.startswith("audio/")
                    or event.message_type in (MessageType.VOICE, MessageType.AUDIO)
                )
                if is_audio:
                    audio_paths.append(path)
            if audio_paths:
                message_text = await self._enrich_message_with_transcription(
                    message_text, audio_paths
                )
                # If STT failed, send a direct message to the user so they
                # know voice isn't configured — don't rely on the agent to
                # relay the error clearly.
                _stt_fail_markers = (
                    "No STT provider",
                    "STT is disabled",
                    "can't listen",
                    "VOICE_TOOLS_OPENAI_KEY",
                )
                if any(m in message_text for m in _stt_fail_markers):
                    _stt_adapter = self.adapters.get(source.platform)
                    _stt_meta = {"thread_id": source.thread_id} if source.thread_id else None
                    if _stt_adapter:
                        try:
                            _stt_msg = (
                                "🎤 I received your voice message but can't transcribe it — "
                                "no speech-to-text provider is configured.\n\n"
                                "To enable voice: install faster-whisper "
                                "(`pip install faster-whisper` in the Hermes venv) "
                                "and set `stt.enabled: true` in config.yaml, "
                                "then /restart the gateway."
                            )
                            # Point to setup skill if it's installed
                            if self._has_setup_skill():
                                _stt_msg += "\n\nFor full setup instructions, type: `/skill hermes-agent-setup`"
                            await _stt_adapter.send(
                                source.chat_id, _stt_msg,
                                metadata=_stt_meta,
                            )
                        except Exception:
                            pass

        # -----------------------------------------------------------------
        # Enrich document messages with context notes for the agent
        # -----------------------------------------------------------------
        if event.media_urls and event.message_type == MessageType.DOCUMENT:
            for i, path in enumerate(event.media_urls):
                mtype = event.media_types[i] if i < len(event.media_types) else ""
                if not (mtype.startswith("application/") or mtype.startswith("text/")):
                    continue
                # Extract display filename by stripping the doc_{uuid12}_ prefix
                import os as _os
                basename = _os.path.basename(path)
                # Format: doc_<12hex>_<original_filename>
                parts = basename.split("_", 2)
                display_name = parts[2] if len(parts) >= 3 else basename
                # Sanitize to prevent prompt injection via filenames
                import re as _re
                display_name = _re.sub(r'[^\w.\- ]', '_', display_name)

                if mtype.startswith("text/"):
                    context_note = (
                        f"[The user sent a text document: '{display_name}'. "
                        f"Its content has been included below. "
                        f"The file is also saved at: {path}]"
                    )
                else:
                    context_note = (
                        f"[The user sent a document: '{display_name}'. "
                        f"The file is saved at: {path}. "
                        f"Ask the user what they'd like you to do with it.]"
                    )
                message_text = f"{context_note}\n\n{message_text}"

        # -----------------------------------------------------------------
        # Inject reply context when user replies to a message not in history.
        # Telegram (and other platforms) let users reply to specific messages,
        # but if the quoted message is from a previous session, cron delivery,
        # or background task, the agent has no context about what's being
        # referenced. Prepend the quoted text so the agent understands. (#1594)
        # -----------------------------------------------------------------
        if getattr(event, 'reply_to_text', None) and event.reply_to_message_id:
            reply_snippet = event.reply_to_text[:500]
            found_in_history = any(
                reply_snippet[:200] in (msg.get("content") or "")
                for msg in history
                if msg.get("role") in ("assistant", "user", "tool")
            )
            if not found_in_history:
                message_text = f'[Replying to: "{reply_snippet}"]\n\n{message_text}'

        try:
            # Emit agent:start hook
            hook_ctx = {
                "platform": source.platform.value if source.platform else "",
                "user_id": source.user_id,
                "session_id": session_entry.session_id,
                "message": message_text[:500],
            }
            await self.hooks.emit("agent:start", hook_ctx)

            # Expand @ context references (@file:, @folder:, @diff, etc.)
            if "@" in message_text:
                try:
                    from agent.context_references import preprocess_context_references_async
                    from agent.model_metadata import get_model_context_length
                    _msg_cwd = os.environ.get("MESSAGING_CWD", os.path.expanduser("~"))
                    _msg_ctx_len = get_model_context_length(
                        self._model, base_url=self._base_url or "")
                    _ctx_result = await preprocess_context_references_async(
                        message_text, cwd=_msg_cwd,
                        context_length=_msg_ctx_len, allowed_root=_msg_cwd)
                    if _ctx_result.blocked:
                        _adapter = self.adapters.get(source.platform)
                        if _adapter:
                            await _adapter.send(
                                source.chat_id,
                                "\n".join(_ctx_result.warnings) or "Context injection refused.",
                            )
                        return
                    if _ctx_result.expanded:
                        message_text = _ctx_result.message
                except Exception as exc:
                    logger.debug("@ context reference expansion failed: %s", exc)

            # Run the agent
            agent_result = await self._run_agent(
                message=message_text,
                context_prompt=context_prompt,
                history=history,
                source=source,
                session_id=session_entry.session_id,
                session_key=session_key,
                event_message_id=event.message_id,
            )

            # Stop persistent typing indicator now that the agent is done
            try:
                _typing_adapter = self.adapters.get(source.platform)
                if _typing_adapter and hasattr(_typing_adapter, "stop_typing"):
                    await _typing_adapter.stop_typing(source.chat_id)
            except Exception:
                pass

            response = agent_result.get("final_response") or ""
            agent_messages = agent_result.get("messages", [])

            # Surface error details when the agent failed silently (final_response=None)
            if not response and agent_result.get("failed"):
                error_detail = agent_result.get("error", "unknown error")
                error_str = str(error_detail).lower()

                # Detect context-overflow failures and give specific guidance.
                # Generic 400 "Error" from Anthropic with large sessions is the
                # most common cause of this (#1630).
                _is_ctx_fail = any(p in error_str for p in (
                    "context", "token", "too large", "too long",
                    "exceed", "payload",
                )) or (
                    "400" in error_str
                    and len(history) > 50
                )

                if _is_ctx_fail:
                    response = (
                        "⚠️ Session too large for the model's context window.\n"
                        "Use /compact to compress the conversation, or "
                        "/reset to start fresh."
                    )
                else:
                    response = (
                        f"The request failed: {str(error_detail)[:300]}\n"
                        "Try again or use /reset to start a fresh session."
                    )

            # If the agent's session_id changed during compression, update
            # session_entry so transcript writes below go to the right session.
            if agent_result.get("session_id") and agent_result["session_id"] != session_entry.session_id:
                session_entry.session_id = agent_result["session_id"]

            # Prepend reasoning/thinking if display is enabled
            if getattr(self, "_show_reasoning", False) and response:
                last_reasoning = agent_result.get("last_reasoning")
                if last_reasoning:
                    # Collapse long reasoning to keep messages readable
                    lines = last_reasoning.strip().splitlines()
                    if len(lines) > 15:
                        display_reasoning = "\n".join(lines[:15])
                        display_reasoning += f"\n_... ({len(lines) - 15} more lines)_"
                    else:
                        display_reasoning = last_reasoning.strip()
                    response = f"💭 **Reasoning:**\n```\n{display_reasoning}\n```\n\n{response}"

            # Emit agent:end hook
            await self.hooks.emit("agent:end", {
                **hook_ctx,
                "response": (response or "")[:500],
            })
            
            # Check for pending process watchers (check_interval on background processes)
            try:
                from tools.process_registry import process_registry
                while process_registry.pending_watchers:
                    watcher = process_registry.pending_watchers.pop(0)
                    asyncio.create_task(self._run_process_watcher(watcher))
            except Exception as e:
                logger.error("Process watcher setup error: %s", e)

            # Check if the agent encountered a dangerous command needing approval
            try:
                from tools.approval import pop_pending
                import time as _time
                pending = pop_pending(session_key)
                if pending:
                    pending["timestamp"] = _time.time()
                    self._pending_approvals[session_key] = pending
                    # Append structured instructions so the user knows how to respond
                    cmd_preview = pending.get("command", "")
                    if len(cmd_preview) > 200:
                        cmd_preview = cmd_preview[:200] + "..."
                    approval_hint = (
                        f"\n\n⚠️ **Dangerous command requires approval:**\n"
                        f"```\n{cmd_preview}\n```\n"
                        f"Reply `/approve` to execute, `/approve session` to approve this pattern "
                        f"for the session, or `/deny` to cancel."
                    )
                    response = (response or "") + approval_hint
            except Exception as e:
                logger.debug("Failed to check pending approvals: %s", e)
            
            # Save the full conversation to the transcript, including tool calls.
            # This preserves the complete agent loop (tool_calls, tool results,
            # intermediate reasoning) so sessions can be resumed with full context
            # and transcripts are useful for debugging and training data.
            #
            # IMPORTANT: When the agent failed before producing any response
            # (e.g. context-overflow 400), do NOT persist the user's message.
            # Persisting it would make the session even larger, causing the
            # same failure on the next attempt — an infinite loop. (#1630)
            agent_failed_early = (
                agent_result.get("failed")
                and not agent_result.get("final_response")
            )
            if agent_failed_early:
                logger.info(
                    "Skipping transcript persistence for failed request in "
                    "session %s to prevent session growth loop.",
                    session_entry.session_id,
                )

            ts = datetime.now().isoformat()
            
            # If this is a fresh session (no history), write the full tool
            # definitions as the first entry so the transcript is self-describing
            # -- the same list of dicts sent as tools=[...] in the API request.
            if agent_failed_early:
                pass  # Skip all transcript writes — don't grow a broken session
            elif not history:
                tool_defs = agent_result.get("tools", [])
                self.session_store.append_to_transcript(
                    session_entry.session_id,
                    {
                        "role": "session_meta",
                        "tools": tool_defs or [],
                        "model": os.getenv("HERMES_MODEL", ""),
                        "platform": source.platform.value if source.platform else "",
                        "timestamp": ts,
                    }
                )
            
            # Find only the NEW messages from this turn (skip history we loaded).
            # Use the filtered history length (history_offset) that was actually
            # passed to the agent, not len(history) which includes session_meta
            # entries that were stripped before the agent saw them.
            if not agent_failed_early:
                history_len = agent_result.get("history_offset", len(history))
                new_messages = agent_messages[history_len:] if len(agent_messages) > history_len else []
                
                # If no new messages found (edge case), fall back to simple user/assistant
                if not new_messages:
                    self.session_store.append_to_transcript(
                        session_entry.session_id,
                        {"role": "user", "content": message_text, "timestamp": ts}
                    )
                    if response:
                        self.session_store.append_to_transcript(
                            session_entry.session_id,
                            {"role": "assistant", "content": response, "timestamp": ts}
                        )
                else:
                    # The agent already persisted these messages to SQLite via
                    # _flush_messages_to_session_db(), so skip the DB write here
                    # to prevent the duplicate-write bug (#860).  We still write
                    # to JSONL for backward compatibility and as a backup.
                    agent_persisted = self._session_db is not None
                    for msg in new_messages:
                        # Skip system messages (they're rebuilt each run)
                        if msg.get("role") == "system":
                            continue
                        # Add timestamp to each message for debugging
                        entry = {**msg, "timestamp": ts}
                        self.session_store.append_to_transcript(
                            session_entry.session_id, entry,
                            skip_db=agent_persisted,
                        )
            
            # Update session with actual prompt token count and model from the agent
            self.session_store.update_session(
                session_entry.session_key,
                input_tokens=agent_result.get("input_tokens", 0),
                output_tokens=agent_result.get("output_tokens", 0),
                cache_read_tokens=agent_result.get("cache_read_tokens", 0),
                cache_write_tokens=agent_result.get("cache_write_tokens", 0),
                last_prompt_tokens=agent_result.get("last_prompt_tokens", 0),
                model=agent_result.get("model"),
                estimated_cost_usd=agent_result.get("estimated_cost_usd"),
                cost_status=agent_result.get("cost_status"),
                cost_source=agent_result.get("cost_source"),
                provider=agent_result.get("provider"),
                base_url=agent_result.get("base_url"),
            )

            # Auto voice reply: send TTS audio before the text response
            _already_sent = bool(agent_result.get("already_sent"))
            if self._should_send_voice_reply(event, response, agent_messages, already_sent=_already_sent):
                await self._send_voice_reply(event, response)

            # If streaming already delivered the response, extract and
            # deliver any MEDIA: files before returning None.  Streaming
            # sends raw text chunks that include MEDIA: tags — the normal
            # post-processing in _process_message_background is skipped
            # when already_sent is True, so media files would never be
            # delivered without this.
            if agent_result.get("already_sent"):
                if response:
                    _media_adapter = self.adapters.get(source.platform)
                    if _media_adapter:
                        await self._deliver_media_from_response(
                            response, event, _media_adapter,
                        )
                return None

            return response
            
        except Exception as e:
            # Stop typing indicator on error too
            try:
                _err_adapter = self.adapters.get(source.platform)
                if _err_adapter and hasattr(_err_adapter, "stop_typing"):
                    await _err_adapter.stop_typing(source.chat_id)
            except Exception:
                pass
            logger.exception("Agent error in session %s", session_key)
            error_type = type(e).__name__
            error_detail = str(e)[:300] if str(e) else "no details available"
            status_hint = ""
            status_code = getattr(e, "status_code", None)
            _hist_len = len(history) if 'history' in locals() else 0
            if status_code == 401:
                status_hint = " Check your API key or run `claude /login` to refresh OAuth credentials."
            elif status_code == 429:
                # Check if this is a plan usage limit (resets on a schedule) vs a transient rate limit
                _err_body = getattr(e, "response", None)
                _err_json = {}
                try:
                    if _err_body is not None:
                        _err_json = _err_body.json().get("error", {})
                except Exception:
                    pass
                if _err_json.get("type") == "usage_limit_reached":
                    _resets_in = _err_json.get("resets_in_seconds")
                    if _resets_in and _resets_in > 0:
                        import math
                        _hours = math.ceil(_resets_in / 3600)
                        status_hint = f" Your plan's usage limit has been reached. It resets in ~{_hours}h."
                    else:
                        status_hint = " Your plan's usage limit has been reached. Please wait until it resets."
                else:
                    status_hint = " You are being rate-limited. Please wait a moment and try again."
            elif status_code == 529:
                status_hint = " The API is temporarily overloaded. Please try again shortly."
            elif status_code in (400, 500):
                # 400 with a large session is context overflow.
                # 500 with a large session often means the payload is too large
                # for the API to process — treat it the same way.
                if _hist_len > 50:
                    return (
                        "⚠️ Session too large for the model's context window.\n"
                        "Use /compact to compress the conversation, or "
                        "/reset to start fresh."
                    )
                elif status_code == 400:
                    status_hint = " The request was rejected by the API."
            return (
                f"Sorry, I encountered an error ({error_type}).\n"
                f"{error_detail}\n"
                f"{status_hint}"
                "Try again or use /reset to start a fresh session."
            )
        finally:
            # Clear session env
            self._clear_session_env()
    
    def _format_session_info(self) -> str:
        """Resolve current model config and return a formatted info block.

        Surfaces model, provider, context length, and endpoint so gateway
        users can immediately see if context detection went wrong (e.g.
        local models falling to the 128K default).
        """
        from agent.model_metadata import get_model_context_length, DEFAULT_FALLBACK_CONTEXT

        model = _resolve_gateway_model()
        config_context_length = None
        provider = None
        base_url = None
        api_key = None

        try:
            cfg_path = _hermes_home / "config.yaml"
            if cfg_path.exists():
                import yaml as _info_yaml
                with open(cfg_path, encoding="utf-8") as f:
                    data = _info_yaml.safe_load(f) or {}
                model_cfg = data.get("model", {})
                if isinstance(model_cfg, dict):
                    raw_ctx = model_cfg.get("context_length")
                    if raw_ctx is not None:
                        try:
                            config_context_length = int(raw_ctx)
                        except (TypeError, ValueError):
                            pass
                    provider = model_cfg.get("provider") or None
                    base_url = model_cfg.get("base_url") or None
        except Exception:
            pass

        # Resolve runtime credentials for probing
        try:
            runtime = _resolve_runtime_agent_kwargs()
            provider = provider or runtime.get("provider")
            base_url = base_url or runtime.get("base_url")
            api_key = runtime.get("api_key")
        except Exception:
            pass

        context_length = get_model_context_length(
            model,
            base_url=base_url or "",
            api_key=api_key or "",
            config_context_length=config_context_length,
            provider=provider or "",
        )

        # Format context source hint
        if config_context_length is not None:
            ctx_source = "config"
        elif context_length == DEFAULT_FALLBACK_CONTEXT:
            ctx_source = "default — set model.context_length in config to override"
        else:
            ctx_source = "detected"

        # Format context length for display
        if context_length >= 1_000_000:
            ctx_display = f"{context_length / 1_000_000:.1f}M"
        elif context_length >= 1_000:
            ctx_display = f"{context_length // 1_000}K"
        else:
            ctx_display = str(context_length)

        lines = [
            f"◆ Model: `{model}`",
            f"◆ Provider: {provider or 'openrouter'}",
            f"◆ Context: {ctx_display} tokens ({ctx_source})",
        ]

        # Show endpoint for local/custom setups
        if base_url and ("localhost" in base_url or "127.0.0.1" in base_url or "0.0.0.0" in base_url):
            lines.append(f"◆ Endpoint: {base_url}")

        return "\n".join(lines)

    async def _handle_reset_command(self, event: MessageEvent) -> str:
        """Handle /new or /reset command."""
        source = event.source
        
        # Get existing session key
        session_key = self._session_key_for_source(source)
        
        # Flush memories in the background (fire-and-forget) so the user
        # gets the "Session reset!" response immediately.
        try:
            old_entry = self.session_store._entries.get(session_key)
            if old_entry:
                _flush_task = asyncio.create_task(
                    self._async_flush_memories(old_entry.session_id, session_key)
                )
                self._background_tasks.add(_flush_task)
                _flush_task.add_done_callback(self._background_tasks.discard)
        except Exception as e:
            logger.debug("Gateway memory flush on reset failed: %s", e)

        self._shutdown_gateway_honcho(session_key)
        self._evict_cached_agent(session_key)
        
        # Reset the session
        new_entry = self.session_store.reset_session(session_key)

        # Emit session:end hook (session is ending)
        await self.hooks.emit("session:end", {
            "platform": source.platform.value if source.platform else "",
            "user_id": source.user_id,
            "session_key": session_key,
        })

        # Emit session:reset hook
        await self.hooks.emit("session:reset", {
            "platform": source.platform.value if source.platform else "",
            "user_id": source.user_id,
            "session_key": session_key,
        })
        
        # Resolve session config info to surface to the user
        try:
            session_info = self._format_session_info()
        except Exception:
            session_info = ""

        if new_entry:
            header = "✨ Session reset! Starting fresh."
        else:
            # No existing session, just create one
            self.session_store.get_or_create_session(source, force_new=True)
            header = "✨ New session started!"

        if session_info:
            return f"{header}\n\n{session_info}"
        return header
    
    async def _handle_status_command(self, event: MessageEvent) -> str:
        """Handle /status command."""
        source = event.source
        session_entry = self.session_store.get_or_create_session(source)
        
        connected_platforms = [p.value for p in self.adapters.keys()]
        
        # Check if there's an active agent
        session_key = session_entry.session_key
        is_running = session_key in self._running_agents
        
        lines = [
            "📊 **Hermes Gateway Status**",
            "",
            f"**Session ID:** `{session_entry.session_id[:12]}...`",
            f"**Created:** {session_entry.created_at.strftime('%Y-%m-%d %H:%M')}",
            f"**Last Activity:** {session_entry.updated_at.strftime('%Y-%m-%d %H:%M')}",
            f"**Tokens:** {session_entry.total_tokens:,}",
            f"**Agent Running:** {'Yes ⚡' if is_running else 'No'}",
            "",
            f"**Connected Platforms:** {', '.join(connected_platforms)}",
        ]
        
        return "\n".join(lines)
    
    async def _handle_stop_command(self, event: MessageEvent) -> str:
        """Handle /stop command - interrupt a running agent.

        When an agent is truly hung (blocked thread that never checks
        _interrupt_requested), the early intercept in _handle_message()
        handles /stop before this method is reached.  This handler fires
        only through normal command dispatch (no running agent) or as a
        fallback.  Force-clean the session lock in all cases for safety.
        """
        source = event.source
        session_entry = self.session_store.get_or_create_session(source)
        session_key = session_entry.session_key
        
        agent = self._running_agents.get(session_key)
        if agent is _AGENT_PENDING_SENTINEL:
            # Force-clean the sentinel so the session is unlocked.
            if session_key in self._running_agents:
                del self._running_agents[session_key]
            logger.info("HARD STOP (pending) for session %s — sentinel cleared", session_key[:20])
            return "⚡ Force-stopped. The agent was still starting — session unlocked."
        if agent:
            agent.interrupt("Stop requested")
            # Force-clean the session lock so a truly hung agent doesn't
            # keep it locked forever.
            if session_key in self._running_agents:
                del self._running_agents[session_key]
            return "⚡ Force-stopped. The session is unlocked — you can send a new message."
        else:
            return "No active task to stop."
    
    async def _handle_help_command(self, event: MessageEvent) -> str:
        """Handle /help command - list available commands."""
        from hermes_cli.commands import gateway_help_lines
        lines = [
            "📖 **Hermes Commands**\n",
            *gateway_help_lines(),
        ]
        try:
            from agent.skill_commands import get_skill_commands
            skill_cmds = get_skill_commands()
            if skill_cmds:
                lines.append(f"\n⚡ **Skill Commands** ({len(skill_cmds)} installed):")
                for cmd in sorted(skill_cmds):
                    lines.append(f"`{cmd}` — {skill_cmds[cmd]['description']}")
        except Exception:
            pass
        return "\n".join(lines)
    
    async def _handle_provider_command(self, event: MessageEvent) -> str:
        """Handle /provider command - show available providers."""
        import yaml
        from hermes_cli.models import (
            list_available_providers,
            normalize_provider,
            _PROVIDER_LABELS,
        )

        # Resolve current provider from config
        current_provider = "openrouter"
        config_path = _hermes_home / 'config.yaml'
        try:
            if config_path.exists():
                with open(config_path, encoding="utf-8") as f:
                    cfg = yaml.safe_load(f) or {}
                model_cfg = cfg.get("model", {})
                if isinstance(model_cfg, dict):
                    current_provider = model_cfg.get("provider", current_provider)
        except Exception:
            pass

        current_provider = normalize_provider(current_provider)
        if current_provider == "auto":
            try:
                from hermes_cli.auth import resolve_provider as _resolve_provider
                current_provider = _resolve_provider(current_provider)
            except Exception:
                current_provider = "openrouter"

        # Detect custom endpoint
        if current_provider == "openrouter" and os.getenv("OPENAI_BASE_URL", "").strip():
            current_provider = "custom"

        current_label = _PROVIDER_LABELS.get(current_provider, current_provider)

        lines = [
            f"🔌 **Current provider:** {current_label} (`{current_provider}`)",
            "",
            "**Available providers:**",
        ]

        providers = list_available_providers()
        for p in providers:
            marker = " ← active" if p["id"] == current_provider else ""
            auth = "✅" if p["authenticated"] else "❌"
            aliases = f"  _(also: {', '.join(p['aliases'])})_" if p["aliases"] else ""
            lines.append(f"{auth} `{p['id']}` — {p['label']}{aliases}{marker}")

        lines.append("")
        lines.append("Switch: `/model provider:model-name`")
        lines.append("Setup: `hermes setup`")
        return "\n".join(lines)
    
    async def _handle_personality_command(self, event: MessageEvent) -> str:
        """Handle /personality command - list or set a personality."""
        import yaml

        args = event.get_command_args().strip().lower()
        config_path = _hermes_home / 'config.yaml'

        try:
            if config_path.exists():
                with open(config_path, 'r', encoding="utf-8") as f:
                    config = yaml.safe_load(f) or {}
                personalities = config.get("agent", {}).get("personalities", {})
            else:
                config = {}
                personalities = {}
        except Exception:
            config = {}
            personalities = {}

        if not personalities:
            return "No personalities configured in `~/.hermes/config.yaml`"

        if not args:
            lines = ["🎭 **Available Personalities**\n"]
            lines.append("• `none` — (no personality overlay)")
            for name, prompt in personalities.items():
                if isinstance(prompt, dict):
                    preview = prompt.get("description") or prompt.get("system_prompt", "")[:50]
                else:
                    preview = prompt[:50] + "..." if len(prompt) > 50 else prompt
                lines.append(f"• `{name}` — {preview}")
            lines.append("\nUsage: `/personality <name>`")
            return "\n".join(lines)

        def _resolve_prompt(value):
            if isinstance(value, dict):
                parts = [value.get("system_prompt", "")]
                if value.get("tone"):
                    parts.append(f'Tone: {value["tone"]}')
                if value.get("style"):
                    parts.append(f'Style: {value["style"]}')
                return "\n".join(p for p in parts if p)
            return str(value)

        if args in ("none", "default", "neutral"):
            try:
                if "agent" not in config or not isinstance(config.get("agent"), dict):
                    config["agent"] = {}
                config["agent"]["system_prompt"] = ""
                with open(config_path, "w") as f:
                    yaml.dump(config, f, default_flow_style=False, sort_keys=False)
            except Exception as e:
                return f"⚠️ Failed to save personality change: {e}"
            self._ephemeral_system_prompt = ""
            return "🎭 Personality cleared — using base agent behavior.\n_(takes effect on next message)_"
        elif args in personalities:
            new_prompt = _resolve_prompt(personalities[args])

            # Write to config.yaml, same pattern as CLI save_config_value.
            try:
                if "agent" not in config or not isinstance(config.get("agent"), dict):
                    config["agent"] = {}
                config["agent"]["system_prompt"] = new_prompt
                with open(config_path, 'w', encoding="utf-8") as f:
                    yaml.dump(config, f, default_flow_style=False, sort_keys=False)
            except Exception as e:
                return f"⚠️ Failed to save personality change: {e}"

            # Update in-memory so it takes effect on the very next message.
            self._ephemeral_system_prompt = new_prompt

            return f"🎭 Personality set to **{args}**\n_(takes effect on next message)_"

        available = "`none`, " + ", ".join(f"`{n}`" for n in personalities.keys())
        return f"Unknown personality: `{args}`\n\nAvailable: {available}"
    
    async def _handle_retry_command(self, event: MessageEvent) -> str:
        """Handle /retry command - re-send the last user message."""
        source = event.source
        session_entry = self.session_store.get_or_create_session(source)
        history = self.session_store.load_transcript(session_entry.session_id)
        
        # Find the last user message
        last_user_msg = None
        last_user_idx = None
        for i in range(len(history) - 1, -1, -1):
            if history[i].get("role") == "user":
                last_user_msg = history[i].get("content", "")
                last_user_idx = i
                break
        
        if not last_user_msg:
            return "No previous message to retry."
        
        # Truncate history to before the last user message and persist
        truncated = history[:last_user_idx]
        self.session_store.rewrite_transcript(session_entry.session_id, truncated)
        # Reset stored token count — transcript was truncated
        session_entry.last_prompt_tokens = 0
        
        # Re-send by creating a fake text event with the old message
        retry_event = MessageEvent(
            text=last_user_msg,
            message_type=MessageType.TEXT,
            source=source,
            raw_message=event.raw_message,
        )
        
        # Let the normal message handler process it
        return await self._handle_message(retry_event)
    
    async def _handle_undo_command(self, event: MessageEvent) -> str:
        """Handle /undo command - remove the last user/assistant exchange."""
        source = event.source
        session_entry = self.session_store.get_or_create_session(source)
        history = self.session_store.load_transcript(session_entry.session_id)
        
        # Find the last user message and remove everything from it onward
        last_user_idx = None
        for i in range(len(history) - 1, -1, -1):
            if history[i].get("role") == "user":
                last_user_idx = i
                break
        
        if last_user_idx is None:
            return "Nothing to undo."
        
        removed_msg = history[last_user_idx].get("content", "")
        removed_count = len(history) - last_user_idx
        self.session_store.rewrite_transcript(session_entry.session_id, history[:last_user_idx])
        # Reset stored token count — transcript was truncated
        session_entry.last_prompt_tokens = 0
        
        preview = removed_msg[:40] + "..." if len(removed_msg) > 40 else removed_msg
        return f"↩️ Undid {removed_count} message(s).\nRemoved: \"{preview}\""
    
    async def _handle_set_home_command(self, event: MessageEvent) -> str:
        """Handle /sethome command -- set the current chat as the platform's home channel."""
        source = event.source
        platform_name = source.platform.value if source.platform else "unknown"
        chat_id = source.chat_id
        chat_name = source.chat_name or chat_id
        
        env_key = f"{platform_name.upper()}_HOME_CHANNEL"
        
        # Save to config.yaml
        try:
            import yaml
            config_path = _hermes_home / 'config.yaml'
            user_config = {}
            if config_path.exists():
                with open(config_path, encoding="utf-8") as f:
                    user_config = yaml.safe_load(f) or {}
            user_config[env_key] = chat_id
            with open(config_path, 'w', encoding="utf-8") as f:
                yaml.dump(user_config, f, default_flow_style=False)
            # Also set in the current environment so it takes effect immediately
            os.environ[env_key] = str(chat_id)
        except Exception as e:
            return f"Failed to save home channel: {e}"
        
        return (
            f"✅ Home channel set to **{chat_name}** (ID: {chat_id}).\n"
            f"Cron jobs and cross-platform messages will be delivered here."
        )
    
    @staticmethod
    def _get_guild_id(event: MessageEvent) -> Optional[int]:
        """Extract Discord guild_id from the raw message object."""
        raw = getattr(event, "raw_message", None)
        if raw is None:
            return None
        # Slash command interaction
        if hasattr(raw, "guild_id") and raw.guild_id:
            return int(raw.guild_id)
        # Regular message
        if hasattr(raw, "guild") and raw.guild:
            return raw.guild.id
        return None

    async def _handle_voice_command(self, event: MessageEvent) -> str:
        """Handle /voice [on|off|tts|channel|leave|status] command."""
        args = event.get_command_args().strip().lower()
        chat_id = event.source.chat_id

        adapter = self.adapters.get(event.source.platform)

        if args in ("on", "enable"):
            self._voice_mode[chat_id] = "voice_only"
            self._save_voice_modes()
            if adapter:
                self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=False)
            return (
                "Voice mode enabled.\n"
                "I'll reply with voice when you send voice messages.\n"
                "Use /voice tts to get voice replies for all messages."
            )
        elif args in ("off", "disable"):
            self._voice_mode[chat_id] = "off"
            self._save_voice_modes()
            if adapter:
                self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=True)
            return "Voice mode disabled. Text-only replies."
        elif args == "tts":
            self._voice_mode[chat_id] = "all"
            self._save_voice_modes()
            if adapter:
                self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=False)
            return (
                "Auto-TTS enabled.\n"
                "All replies will include a voice message."
            )
        elif args in ("channel", "join"):
            return await self._handle_voice_channel_join(event)
        elif args == "leave":
            return await self._handle_voice_channel_leave(event)
        elif args == "status":
            mode = self._voice_mode.get(chat_id, "off")
            labels = {
                "off": "Off (text only)",
                "voice_only": "On (voice reply to voice messages)",
                "all": "TTS (voice reply to all messages)",
            }
            # Append voice channel info if connected
            adapter = self.adapters.get(event.source.platform)
            guild_id = self._get_guild_id(event)
            if guild_id and hasattr(adapter, "get_voice_channel_info"):
                info = adapter.get_voice_channel_info(guild_id)
                if info:
                    lines = [
                        f"Voice mode: {labels.get(mode, mode)}",
                        f"Voice channel: #{info['channel_name']}",
                        f"Participants: {info['member_count']}",
                    ]
                    for m in info["members"]:
                        status = " (speaking)" if m.get("is_speaking") else ""
                        lines.append(f"  - {m['display_name']}{status}")
                    return "\n".join(lines)
            return f"Voice mode: {labels.get(mode, mode)}"
        else:
            # Toggle: off → on, on/all → off
            current = self._voice_mode.get(chat_id, "off")
            if current == "off":
                self._voice_mode[chat_id] = "voice_only"
                self._save_voice_modes()
                if adapter:
                    self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=False)
                return "Voice mode enabled."
            else:
                self._voice_mode[chat_id] = "off"
                self._save_voice_modes()
                if adapter:
                    self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=True)
                return "Voice mode disabled."

    async def _handle_voice_channel_join(self, event: MessageEvent) -> str:
        """Join the user's current Discord voice channel."""
        adapter = self.adapters.get(event.source.platform)
        if not hasattr(adapter, "join_voice_channel"):
            return "Voice channels are not supported on this platform."

        guild_id = self._get_guild_id(event)
        if not guild_id:
            return "This command only works in a Discord server."

        voice_channel = await adapter.get_user_voice_channel(
            guild_id, event.source.user_id
        )
        if not voice_channel:
            return "You need to be in a voice channel first."

        # Wire callbacks BEFORE join so voice input arriving immediately
        # after connection is not lost.
        if hasattr(adapter, "_voice_input_callback"):
            adapter._voice_input_callback = self._handle_voice_channel_input
        if hasattr(adapter, "_on_voice_disconnect"):
            adapter._on_voice_disconnect = self._handle_voice_timeout_cleanup

        try:
            success = await adapter.join_voice_channel(voice_channel)
        except Exception as e:
            logger.warning("Failed to join voice channel: %s", e)
            adapter._voice_input_callback = None
            err_lower = str(e).lower()
            if "pynacl" in err_lower or "nacl" in err_lower or "davey" in err_lower:
                return (
                    "Voice dependencies are missing (PyNaCl / davey). "
                    "Install or reinstall Hermes with the messaging extra, e.g. "
                    "`pip install hermes-agent[messaging]`."
                )
            return f"Failed to join voice channel: {e}"

        if success:
            adapter._voice_text_channels[guild_id] = int(event.source.chat_id)
            self._voice_mode[event.source.chat_id] = "all"
            self._save_voice_modes()
            self._set_adapter_auto_tts_disabled(adapter, event.source.chat_id, disabled=False)
            return (
                f"Joined voice channel **{voice_channel.name}**.\n"
                f"I'll speak my replies and listen to you. Use /voice leave to disconnect."
            )
        # Join failed — clear callback
        adapter._voice_input_callback = None
        return "Failed to join voice channel. Check bot permissions (Connect + Speak)."

    async def _handle_voice_channel_leave(self, event: MessageEvent) -> str:
        """Leave the Discord voice channel."""
        adapter = self.adapters.get(event.source.platform)
        guild_id = self._get_guild_id(event)

        if not guild_id or not hasattr(adapter, "leave_voice_channel"):
            return "Not in a voice channel."

        if not hasattr(adapter, "is_in_voice_channel") or not adapter.is_in_voice_channel(guild_id):
            return "Not in a voice channel."

        try:
            await adapter.leave_voice_channel(guild_id)
        except Exception as e:
            logger.warning("Error leaving voice channel: %s", e)
        # Always clean up state even if leave raised an exception
        self._voice_mode[event.source.chat_id] = "off"
        self._save_voice_modes()
        self._set_adapter_auto_tts_disabled(adapter, event.source.chat_id, disabled=True)
        if hasattr(adapter, "_voice_input_callback"):
            adapter._voice_input_callback = None
        return "Left voice channel."

    def _handle_voice_timeout_cleanup(self, chat_id: str) -> None:
        """Called by the adapter when a voice channel times out.

        Cleans up runner-side voice_mode state that the adapter cannot reach.
        """
        self._voice_mode[chat_id] = "off"
        self._save_voice_modes()
        adapter = self.adapters.get(Platform.DISCORD)
        self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=True)

    async def _handle_voice_channel_input(
        self, guild_id: int, user_id: int, transcript: str
    ):
        """Handle transcribed voice from a user in a voice channel.

        Creates a synthetic MessageEvent and processes it through the
        adapter's full message pipeline (session, typing, agent, TTS reply).
        """
        adapter = self.adapters.get(Platform.DISCORD)
        if not adapter:
            return

        text_ch_id = adapter._voice_text_channels.get(guild_id)
        if not text_ch_id:
            return

        # Check authorization before processing voice input
        source = SessionSource(
            platform=Platform.DISCORD,
            chat_id=str(text_ch_id),
            user_id=str(user_id),
            user_name=str(user_id),
            chat_type="channel",
        )
        if not self._is_user_authorized(source):
            logger.debug("Unauthorized voice input from user %d, ignoring", user_id)
            return

        # Show transcript in text channel (after auth, with mention sanitization)
        try:
            channel = adapter._client.get_channel(text_ch_id)
            if channel:
                safe_text = transcript[:2000].replace("@everyone", "@\u200beveryone").replace("@here", "@\u200bhere")
                await channel.send(f"**[Voice]** <@{user_id}>: {safe_text}")
        except Exception:
            pass

        # Build a synthetic MessageEvent and feed through the normal pipeline
        # Use SimpleNamespace as raw_message so _get_guild_id() can extract
        # guild_id and _send_voice_reply() plays audio in the voice channel.
        from types import SimpleNamespace
        event = MessageEvent(
            source=source,
            text=transcript,
            message_type=MessageType.VOICE,
            raw_message=SimpleNamespace(guild_id=guild_id, guild=None),
        )

        await adapter.handle_message(event)

    def _should_send_voice_reply(
        self,
        event: MessageEvent,
        response: str,
        agent_messages: list,
        already_sent: bool = False,
    ) -> bool:
        """Decide whether the runner should send a TTS voice reply.

        Returns False when:
        - voice_mode is off for this chat
        - response is empty or an error
        - agent already called text_to_speech tool (dedup)
        - voice input and base adapter auto-TTS already handled it (skip_double)
          UNLESS streaming already consumed the response (already_sent=True),
          in which case the base adapter won't have text for auto-TTS so the
          runner must handle it.
        """
        if not response or response.startswith("Error:"):
            return False

        chat_id = event.source.chat_id
        voice_mode = self._voice_mode.get(chat_id, "off")
        is_voice_input = (event.message_type == MessageType.VOICE)

        should = (
            (voice_mode == "all")
            or (voice_mode == "voice_only" and is_voice_input)
        )
        if not should:
            return False

        # Dedup: agent already called TTS tool
        has_agent_tts = any(
            msg.get("role") == "assistant"
            and any(
                tc.get("function", {}).get("name") == "text_to_speech"
                for tc in (msg.get("tool_calls") or [])
            )
            for msg in agent_messages
        )
        if has_agent_tts:
            return False

        # Dedup: base adapter auto-TTS already handles voice input
        # (play_tts plays in VC when connected, so runner can skip).
        # When streaming already delivered the text (already_sent=True),
        # the base adapter will receive None and can't run auto-TTS,
        # so the runner must take over.
        if is_voice_input and not already_sent:
            return False

        return True

    async def _send_voice_reply(self, event: MessageEvent, text: str) -> None:
        """Generate TTS audio and send as a voice message before the text reply."""
        import uuid as _uuid
        audio_path = None
        actual_path = None
        try:
            from tools.tts_tool import text_to_speech_tool, _strip_markdown_for_tts

            tts_text = _strip_markdown_for_tts(text[:4000])
            if not tts_text:
                return

            # Use .mp3 extension so edge-tts conversion to opus works correctly.
            # The TTS tool may convert to .ogg — use file_path from result.
            audio_path = os.path.join(
                tempfile.gettempdir(), "hermes_voice",
                f"tts_reply_{_uuid.uuid4().hex[:12]}.mp3",
            )
            os.makedirs(os.path.dirname(audio_path), exist_ok=True)

            result_json = await asyncio.to_thread(
                text_to_speech_tool, text=tts_text, output_path=audio_path
            )
            result = json.loads(result_json)

            # Use the actual file path from result (may differ after opus conversion)
            actual_path = result.get("file_path", audio_path)
            if not result.get("success") or not os.path.isfile(actual_path):
                logger.warning("Auto voice reply TTS failed: %s", result.get("error"))
                return

            adapter = self.adapters.get(event.source.platform)

            # If connected to a voice channel, play there instead of sending a file
            guild_id = self._get_guild_id(event)
            if (guild_id
                    and hasattr(adapter, "play_in_voice_channel")
                    and hasattr(adapter, "is_in_voice_channel")
                    and adapter.is_in_voice_channel(guild_id)):
                await adapter.play_in_voice_channel(guild_id, actual_path)
            elif adapter and hasattr(adapter, "send_voice"):
                send_kwargs: Dict[str, Any] = {
                    "chat_id": event.source.chat_id,
                    "audio_path": actual_path,
                    "reply_to": event.message_id,
                }
                if event.source.thread_id:
                    send_kwargs["metadata"] = {"thread_id": event.source.thread_id}
                await adapter.send_voice(**send_kwargs)
        except Exception as e:
            logger.warning("Auto voice reply failed: %s", e, exc_info=True)
        finally:
            for p in {audio_path, actual_path} - {None}:
                try:
                    os.unlink(p)
                except OSError:
                    pass

    async def _deliver_media_from_response(
        self,
        response: str,
        event: MessageEvent,
        adapter,
    ) -> None:
        """Extract MEDIA: tags and local file paths from a response and deliver them.

        Called after streaming has already sent the text to the user, so the
        text itself is already delivered — this only handles file attachments
        that the normal _process_message_background path would have caught.
        """
        from pathlib import Path

        try:
            media_files, _ = adapter.extract_media(response)
            _, cleaned = adapter.extract_images(response)
            local_files, _ = adapter.extract_local_files(cleaned)

            _thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None

            _AUDIO_EXTS = {'.ogg', '.opus', '.mp3', '.wav', '.m4a'}
            _VIDEO_EXTS = {'.mp4', '.mov', '.avi', '.mkv', '.webm', '.3gp'}
            _IMAGE_EXTS = {'.jpg', '.jpeg', '.png', '.webp', '.gif'}

            for media_path, is_voice in media_files:
                try:
                    ext = Path(media_path).suffix.lower()
                    if ext in _AUDIO_EXTS:
                        await adapter.send_voice(
                            chat_id=event.source.chat_id,
                            audio_path=media_path,
                            metadata=_thread_meta,
                        )
                    elif ext in _VIDEO_EXTS:
                        await adapter.send_video(
                            chat_id=event.source.chat_id,
                            video_path=media_path,
                            metadata=_thread_meta,
                        )
                    elif ext in _IMAGE_EXTS:
                        await adapter.send_image_file(
                            chat_id=event.source.chat_id,
                            image_path=media_path,
                            metadata=_thread_meta,
                        )
                    else:
                        await adapter.send_document(
                            chat_id=event.source.chat_id,
                            file_path=media_path,
                            metadata=_thread_meta,
                        )
                except Exception as e:
                    logger.warning("[%s] Post-stream media delivery failed: %s", adapter.name, e)

            for file_path in local_files:
                try:
                    ext = Path(file_path).suffix.lower()
                    if ext in _IMAGE_EXTS:
                        await adapter.send_image_file(
                            chat_id=event.source.chat_id,
                            image_path=file_path,
                            metadata=_thread_meta,
                        )
                    else:
                        await adapter.send_document(
                            chat_id=event.source.chat_id,
                            file_path=file_path,
                            metadata=_thread_meta,
                        )
                except Exception as e:
                    logger.warning("[%s] Post-stream file delivery failed: %s", adapter.name, e)

        except Exception as e:
            logger.warning("Post-stream media extraction failed: %s", e)

    async def _handle_rollback_command(self, event: MessageEvent) -> str:
        """Handle /rollback command — list or restore filesystem checkpoints."""
        from tools.checkpoint_manager import CheckpointManager, format_checkpoint_list

        # Read checkpoint config from config.yaml
        cp_cfg = {}
        try:
            import yaml as _y
            _cfg_path = _hermes_home / "config.yaml"
            if _cfg_path.exists():
                with open(_cfg_path, encoding="utf-8") as _f:
                    _data = _y.safe_load(_f) or {}
                cp_cfg = _data.get("checkpoints", {})
                if isinstance(cp_cfg, bool):
                    cp_cfg = {"enabled": cp_cfg}
        except Exception:
            pass

        if not cp_cfg.get("enabled", False):
            return (
                "Checkpoints are not enabled.\n"
                "Enable in config.yaml:\n```\ncheckpoints:\n  enabled: true\n```"
            )

        mgr = CheckpointManager(
            enabled=True,
            max_snapshots=cp_cfg.get("max_snapshots", 50),
        )

        cwd = os.getenv("MESSAGING_CWD", str(Path.home()))
        arg = event.get_command_args().strip()

        if not arg:
            checkpoints = mgr.list_checkpoints(cwd)
            return format_checkpoint_list(checkpoints, cwd)

        # Restore by number or hash
        checkpoints = mgr.list_checkpoints(cwd)
        if not checkpoints:
            return f"No checkpoints found for {cwd}"

        target_hash = None
        try:
            idx = int(arg) - 1
            if 0 <= idx < len(checkpoints):
                target_hash = checkpoints[idx]["hash"]
            else:
                return f"Invalid checkpoint number. Use 1-{len(checkpoints)}."
        except ValueError:
            target_hash = arg

        result = mgr.restore(cwd, target_hash)
        if result["success"]:
            return (
                f"✅ Restored to checkpoint {result['restored_to']}: {result['reason']}\n"
                f"A pre-rollback snapshot was saved automatically."
            )
        return f"❌ {result['error']}"

    async def _handle_background_command(self, event: MessageEvent) -> str:
        """Handle /background <prompt> — run a prompt in a separate background session.

        Spawns a new AIAgent in a background thread with its own session.
        When it completes, sends the result back to the same chat without
        modifying the active session's conversation history.
        """
        prompt = event.get_command_args().strip()
        if not prompt:
            return (
                "Usage: /background <prompt>\n"
                "Example: /background Summarize the top HN stories today\n\n"
                "Runs the prompt in a separate session. "
                "You can keep chatting — the result will appear here when done."
            )

        source = event.source
        task_id = f"bg_{datetime.now().strftime('%H%M%S')}_{os.urandom(3).hex()}"

        # Fire-and-forget the background task
        _task = asyncio.create_task(
            self._run_background_task(prompt, source, task_id)
        )
        self._background_tasks.add(_task)
        _task.add_done_callback(self._background_tasks.discard)

        preview = prompt[:60] + ("..." if len(prompt) > 60 else "")
        return f'🔄 Background task started: "{preview}"\nTask ID: {task_id}\nYou can keep chatting — results will appear when done.'

    async def _run_background_task(
        self, prompt: str, source: "SessionSource", task_id: str
    ) -> None:
        """Execute a background agent task and deliver the result to the chat."""
        from run_agent import AIAgent

        adapter = self.adapters.get(source.platform)
        if not adapter:
            logger.warning("No adapter for platform %s in background task %s", source.platform, task_id)
            return

        _thread_metadata = {"thread_id": source.thread_id} if source.thread_id else None

        try:
            runtime_kwargs = _resolve_runtime_agent_kwargs()
            if not runtime_kwargs.get("api_key"):
                await adapter.send(
                    source.chat_id,
                    f"❌ Background task {task_id} failed: no provider credentials configured.",
                    metadata=_thread_metadata,
                )
                return

            user_config = _load_gateway_config()
            model = _resolve_gateway_model(user_config)
            platform_key = _platform_config_key(source.platform)

            from hermes_cli.tools_config import _get_platform_tools
            enabled_toolsets = sorted(_get_platform_tools(user_config, platform_key))

            pr = self._provider_routing
            max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "90"))
            reasoning_config = self._load_reasoning_config()
            self._reasoning_config = reasoning_config
            turn_route = self._resolve_turn_agent_config(prompt, model, runtime_kwargs)

            def run_sync():
                agent = AIAgent(
                    model=turn_route["model"],
                    **turn_route["runtime"],
                    max_iterations=max_iterations,
                    quiet_mode=True,
                    verbose_logging=False,
                    enabled_toolsets=enabled_toolsets,
                    reasoning_config=reasoning_config,
                    providers_allowed=pr.get("only"),
                    providers_ignored=pr.get("ignore"),
                    providers_order=pr.get("order"),
                    provider_sort=pr.get("sort"),
                    provider_require_parameters=pr.get("require_parameters", False),
                    provider_data_collection=pr.get("data_collection"),
                    session_id=task_id,
                    platform=platform_key,
                    session_db=self._session_db,
                    fallback_model=self._fallback_model,
                )

                return agent.run_conversation(
                    user_message=prompt,
                    task_id=task_id,
                )

            loop = asyncio.get_event_loop()
            result = await loop.run_in_executor(None, run_sync)

            response = result.get("final_response", "") if result else ""
            if not response and result and result.get("error"):
                response = f"Error: {result['error']}"

            # Extract media files from the response
            if response:
                media_files, response = adapter.extract_media(response)
                images, text_content = adapter.extract_images(response)

                preview = prompt[:60] + ("..." if len(prompt) > 60 else "")
                header = f'✅ Background task complete\nPrompt: "{preview}"\n\n'

                if text_content:
                    await adapter.send(
                        chat_id=source.chat_id,
                        content=header + text_content,
                        metadata=_thread_metadata,
                    )
                elif not images and not media_files:
                    await adapter.send(
                        chat_id=source.chat_id,
                        content=header + "(No response generated)",
                        metadata=_thread_metadata,
                    )

                # Send extracted images
                for image_url, alt_text in (images or []):
                    try:
                        await adapter.send_image(
                            chat_id=source.chat_id,
                            image_url=image_url,
                            caption=alt_text,
                        )
                    except Exception:
                        pass

                # Send media files
                for media_path in (media_files or []):
                    try:
                        await adapter.send_file(
                            chat_id=source.chat_id,
                            file_path=media_path,
                        )
                    except Exception:
                        pass
            else:
                preview = prompt[:60] + ("..." if len(prompt) > 60 else "")
                await adapter.send(
                    chat_id=source.chat_id,
                    content=f'✅ Background task complete\nPrompt: "{preview}"\n\n(No response generated)',
                    metadata=_thread_metadata,
                )

        except Exception as e:
            logger.exception("Background task %s failed", task_id)
            try:
                await adapter.send(
                    chat_id=source.chat_id,
                    content=f"❌ Background task {task_id} failed: {e}",
                    metadata=_thread_metadata,
                )
            except Exception:
                pass

    async def _handle_reasoning_command(self, event: MessageEvent) -> str:
        """Handle /reasoning command — manage reasoning effort and display toggle.

        Usage:
            /reasoning              Show current effort level and display state
            /reasoning <level>      Set reasoning effort (none, low, medium, high, xhigh)
            /reasoning show|on      Show model reasoning in responses
            /reasoning hide|off     Hide model reasoning from responses
        """
        import yaml

        args = event.get_command_args().strip().lower()
        config_path = _hermes_home / "config.yaml"
        self._reasoning_config = self._load_reasoning_config()
        self._show_reasoning = self._load_show_reasoning()

        def _save_config_key(key_path: str, value):
            """Save a dot-separated key to config.yaml."""
            try:
                user_config = {}
                if config_path.exists():
                    with open(config_path, encoding="utf-8") as f:
                        user_config = yaml.safe_load(f) or {}
                keys = key_path.split(".")
                current = user_config
                for k in keys[:-1]:
                    if k not in current or not isinstance(current[k], dict):
                        current[k] = {}
                    current = current[k]
                current[keys[-1]] = value
                with open(config_path, "w", encoding="utf-8") as f:
                    yaml.dump(user_config, f, default_flow_style=False, sort_keys=False)
                return True
            except Exception as e:
                logger.error("Failed to save config key %s: %s", key_path, e)
                return False

        if not args:
            # Show current state
            rc = self._reasoning_config
            if rc is None:
                level = "medium (default)"
            elif rc.get("enabled") is False:
                level = "none (disabled)"
            else:
                level = rc.get("effort", "medium")
            display_state = "on ✓" if self._show_reasoning else "off"
            return (
                "🧠 **Reasoning Settings**\n\n"
                f"**Effort:** `{level}`\n"
                f"**Display:** {display_state}\n\n"
                "_Usage:_ `/reasoning <none|low|medium|high|xhigh|show|hide>`"
            )

        # Display toggle
        if args in ("show", "on"):
            self._show_reasoning = True
            _save_config_key("display.show_reasoning", True)
            return "🧠 ✓ Reasoning display: **ON**\nModel thinking will be shown before each response."

        if args in ("hide", "off"):
            self._show_reasoning = False
            _save_config_key("display.show_reasoning", False)
            return "🧠 ✓ Reasoning display: **OFF**"

        # Effort level change
        effort = args.strip()
        if effort == "none":
            parsed = {"enabled": False}
        elif effort in ("xhigh", "high", "medium", "low", "minimal"):
            parsed = {"enabled": True, "effort": effort}
        else:
            return (
                f"⚠️ Unknown argument: `{effort}`\n\n"
                "**Valid levels:** none, low, minimal, medium, high, xhigh\n"
                "**Display:** show, hide"
            )

        self._reasoning_config = parsed
        if _save_config_key("agent.reasoning_effort", effort):
            return f"🧠 ✓ Reasoning effort set to `{effort}` (saved to config)\n_(takes effect on next message)_"
        else:
            return f"🧠 ✓ Reasoning effort set to `{effort}` (this session only)"

    async def _handle_verbose_command(self, event: MessageEvent) -> str:
        """Handle /verbose command — cycle tool progress display mode.

        Gated by ``display.tool_progress_command`` in config.yaml (default off).
        When enabled, cycles the tool progress mode through off → new → all →
        verbose → off, same as the CLI.
        """
        import yaml

        config_path = _hermes_home / "config.yaml"

        # --- check config gate ------------------------------------------------
        try:
            user_config = {}
            if config_path.exists():
                with open(config_path, encoding="utf-8") as f:
                    user_config = yaml.safe_load(f) or {}
            gate_enabled = user_config.get("display", {}).get("tool_progress_command", False)
        except Exception:
            gate_enabled = False

        if not gate_enabled:
            return (
                "The `/verbose` command is not enabled for messaging platforms.\n\n"
                "Enable it in `config.yaml`:\n```yaml\n"
                "display:\n  tool_progress_command: true\n```"
            )

        # --- cycle mode -------------------------------------------------------
        cycle = ["off", "new", "all", "verbose"]
        descriptions = {
            "off": "⚙️ Tool progress: **OFF** — no tool activity shown.",
            "new": "⚙️ Tool progress: **NEW** — shown when tool changes.",
            "all": "⚙️ Tool progress: **ALL** — every tool call shown.",
            "verbose": "⚙️ Tool progress: **VERBOSE** — full args and results.",
        }

        raw_progress = user_config.get("display", {}).get("tool_progress", "all")
        # YAML 1.1 parses bare "off" as boolean False — normalise back
        if raw_progress is False:
            current = "off"
        elif raw_progress is True:
            current = "all"
        else:
            current = str(raw_progress).lower()
        if current not in cycle:
            current = "all"
        idx = (cycle.index(current) + 1) % len(cycle)
        new_mode = cycle[idx]

        # Save to config.yaml
        try:
            if "display" not in user_config or not isinstance(user_config.get("display"), dict):
                user_config["display"] = {}
            user_config["display"]["tool_progress"] = new_mode
            with open(config_path, "w", encoding="utf-8") as f:
                yaml.dump(user_config, f, default_flow_style=False, sort_keys=False)
            return f"{descriptions[new_mode]}\n_(saved to config — takes effect on next message)_"
        except Exception as e:
            logger.warning("Failed to save tool_progress mode: %s", e)
            return f"{descriptions[new_mode]}\n_(could not save to config: {e})_"

    async def _handle_compress_command(self, event: MessageEvent) -> str:
        """Handle /compress command -- manually compress conversation context."""
        source = event.source
        session_entry = self.session_store.get_or_create_session(source)
        history = self.session_store.load_transcript(session_entry.session_id)

        if not history or len(history) < 4:
            return "Not enough conversation to compress (need at least 4 messages)."

        try:
            from run_agent import AIAgent
            from agent.model_metadata import estimate_messages_tokens_rough

            runtime_kwargs = _resolve_runtime_agent_kwargs()
            if not runtime_kwargs.get("api_key"):
                return "No provider configured -- cannot compress."

            # Resolve model from config (same reason as memory flush above).
            model = _resolve_gateway_model()

            msgs = [
                {"role": m.get("role"), "content": m.get("content")}
                for m in history
                if m.get("role") in ("user", "assistant") and m.get("content")
            ]
            original_count = len(msgs)
            approx_tokens = estimate_messages_tokens_rough(msgs)

            tmp_agent = AIAgent(
                **runtime_kwargs,
                model=model,
                max_iterations=4,
                quiet_mode=True,
                enabled_toolsets=["memory"],
                session_id=session_entry.session_id,
            )
            tmp_agent._print_fn = lambda *a, **kw: None

            loop = asyncio.get_event_loop()
            compressed, _ = await loop.run_in_executor(
                None,
                lambda: tmp_agent._compress_context(msgs, "", approx_tokens=approx_tokens)
            )

            # _compress_context already calls end_session() on the old session
            # (preserving its full transcript in SQLite) and creates a new
            # session_id for the continuation.  Write the compressed messages
            # into the NEW session so the original history stays searchable.
            new_session_id = tmp_agent.session_id
            if new_session_id != session_entry.session_id:
                session_entry.session_id = new_session_id
                self.session_store._save()

            self.session_store.rewrite_transcript(new_session_id, compressed)
            # Reset stored token count — transcript changed, old value is stale
            self.session_store.update_session(
                session_entry.session_key, last_prompt_tokens=0
            )
            new_count = len(compressed)
            new_tokens = estimate_messages_tokens_rough(compressed)

            return (
                f"🗜️ Compressed: {original_count} → {new_count} messages\n"
                f"~{approx_tokens:,} → ~{new_tokens:,} tokens"
            )
        except Exception as e:
            logger.warning("Manual compress failed: %s", e)
            return f"Compression failed: {e}"

    async def _handle_title_command(self, event: MessageEvent) -> str:
        """Handle /title command — set or show the current session's title."""
        source = event.source
        session_entry = self.session_store.get_or_create_session(source)
        session_id = session_entry.session_id

        if not self._session_db:
            return "Session database not available."

        # Ensure session exists in SQLite DB (it may only exist in session_store
        # if this is the first command in a new session)
        existing_title = self._session_db.get_session_title(session_id)
        if existing_title is None:
            # Session doesn't exist in DB yet — create it
            try:
                self._session_db.create_session(
                    session_id=session_id,
                    source=source.platform.value if source.platform else "unknown",
                    user_id=source.user_id,
                )
            except Exception:
                pass  # Session might already exist, ignore errors

        title_arg = event.get_command_args().strip()
        if title_arg:
            # Sanitize the title before setting
            try:
                sanitized = self._session_db.sanitize_title(title_arg)
            except ValueError as e:
                return f"⚠️ {e}"
            if not sanitized:
                return "⚠️ Title is empty after cleanup. Please use printable characters."
            # Set the title
            try:
                if self._session_db.set_session_title(session_id, sanitized):
                    return f"✏️ Session title set: **{sanitized}**"
                else:
                    return "Session not found in database."
            except ValueError as e:
                return f"⚠️ {e}"
        else:
            # Show the current title and session ID
            title = self._session_db.get_session_title(session_id)
            if title:
                return f"📌 Session: `{session_id}`\nTitle: **{title}**"
            else:
                return f"📌 Session: `{session_id}`\nNo title set. Usage: `/title My Session Name`"

    async def _handle_resume_command(self, event: MessageEvent) -> str:
        """Handle /resume command — switch to a previously-named session."""
        if not self._session_db:
            return "Session database not available."

        source = event.source
        session_key = self._session_key_for_source(source)
        name = event.get_command_args().strip()

        if not name:
            # List recent titled sessions for this user/platform
            try:
                user_source = source.platform.value if source.platform else None
                sessions = self._session_db.list_sessions_rich(
                    source=user_source, limit=10
                )
                titled = [s for s in sessions if s.get("title")]
                if not titled:
                    return (
                        "No named sessions found.\n"
                        "Use `/title My Session` to name your current session, "
                        "then `/resume My Session` to return to it later."
                    )
                lines = ["📋 **Named Sessions**\n"]
                for s in titled[:10]:
                    title = s["title"]
                    preview = s.get("preview", "")[:40]
                    preview_part = f" — _{preview}_" if preview else ""
                    lines.append(f"• **{title}**{preview_part}")
                lines.append("\nUsage: `/resume <session name>`")
                return "\n".join(lines)
            except Exception as e:
                logger.debug("Failed to list titled sessions: %s", e)
                return f"Could not list sessions: {e}"

        # Resolve the name to a session ID
        target_id = self._session_db.resolve_session_by_title(name)
        if not target_id:
            return (
                f"No session found matching '**{name}**'.\n"
                "Use `/resume` with no arguments to see available sessions."
            )

        # Check if already on that session
        current_entry = self.session_store.get_or_create_session(source)
        if current_entry.session_id == target_id:
            return f"📌 Already on session **{name}**."

        # Flush memories for current session before switching
        try:
            _flush_task = asyncio.create_task(
                self._async_flush_memories(current_entry.session_id, session_key)
            )
            self._background_tasks.add(_flush_task)
            _flush_task.add_done_callback(self._background_tasks.discard)
        except Exception as e:
            logger.debug("Memory flush on resume failed: %s", e)

        self._shutdown_gateway_honcho(session_key)

        # Clear any running agent for this session key
        if session_key in self._running_agents:
            del self._running_agents[session_key]

        # Switch the session entry to point at the old session
        new_entry = self.session_store.switch_session(session_key, target_id)
        if not new_entry:
            return "Failed to switch session."

        # Get the title for confirmation
        title = self._session_db.get_session_title(target_id) or name

        # Count messages for context
        history = self.session_store.load_transcript(target_id)
        msg_count = len([m for m in history if m.get("role") == "user"]) if history else 0
        msg_part = f" ({msg_count} message{'s' if msg_count != 1 else ''})" if msg_count else ""

        return f"↻ Resumed session **{title}**{msg_part}. Conversation restored."

    async def _handle_usage_command(self, event: MessageEvent) -> str:
        """Handle /usage command -- show token usage for the session's last agent run."""
        source = event.source
        session_key = self._session_key_for_source(source)

        agent = self._running_agents.get(session_key)
        if agent and hasattr(agent, "session_total_tokens") and agent.session_api_calls > 0:
            lines = [
                "📊 **Session Token Usage**",
                f"Prompt (input): {agent.session_prompt_tokens:,}",
                f"Completion (output): {agent.session_completion_tokens:,}",
                f"Total: {agent.session_total_tokens:,}",
                f"API calls: {agent.session_api_calls}",
            ]
            ctx = agent.context_compressor
            if ctx.last_prompt_tokens:
                pct = ctx.last_prompt_tokens / ctx.context_length * 100 if ctx.context_length else 0
                lines.append(f"Context: {ctx.last_prompt_tokens:,} / {ctx.context_length:,} ({pct:.0f}%)")
            if ctx.compression_count:
                lines.append(f"Compressions: {ctx.compression_count}")
            return "\n".join(lines)

        # No running agent -- check session history for a rough count
        session_entry = self.session_store.get_or_create_session(source)
        history = self.session_store.load_transcript(session_entry.session_id)
        if history:
            from agent.model_metadata import estimate_messages_tokens_rough
            msgs = [m for m in history if m.get("role") in ("user", "assistant") and m.get("content")]
            approx = estimate_messages_tokens_rough(msgs)
            return (
                f"📊 **Session Info**\n"
                f"Messages: {len(msgs)}\n"
                f"Estimated context: ~{approx:,} tokens\n"
                f"_(Detailed usage available during active conversations)_"
            )
        return "No usage data available for this session."

    async def _handle_insights_command(self, event: MessageEvent) -> str:
        """Handle /insights command -- show usage insights and analytics."""
        import asyncio as _asyncio

        args = event.get_command_args().strip()
        days = 30
        source = None

        # Parse simple args: /insights 7  or  /insights --days 7
        if args:
            parts = args.split()
            i = 0
            while i < len(parts):
                if parts[i] == "--days" and i + 1 < len(parts):
                    try:
                        days = int(parts[i + 1])
                    except ValueError:
                        return f"Invalid --days value: {parts[i + 1]}"
                    i += 2
                elif parts[i] == "--source" and i + 1 < len(parts):
                    source = parts[i + 1]
                    i += 2
                elif parts[i].isdigit():
                    days = int(parts[i])
                    i += 1
                else:
                    i += 1

        try:
            from hermes_state import SessionDB
            from agent.insights import InsightsEngine

            loop = _asyncio.get_event_loop()

            def _run_insights():
                db = SessionDB()
                engine = InsightsEngine(db)
                report = engine.generate(days=days, source=source)
                result = engine.format_gateway(report)
                db.close()
                return result

            return await loop.run_in_executor(None, _run_insights)
        except Exception as e:
            logger.error("Insights command error: %s", e, exc_info=True)
            return f"Error generating insights: {e}"

    async def _handle_reload_mcp_command(self, event: MessageEvent) -> str:
        """Handle /reload-mcp command -- disconnect and reconnect all MCP servers."""
        loop = asyncio.get_event_loop()
        try:
            from tools.mcp_tool import shutdown_mcp_servers, discover_mcp_tools, _load_mcp_config, _servers, _lock

            # Capture old server names before shutdown
            with _lock:
                old_servers = set(_servers.keys())

            # Read new config before shutting down, so we know what will be added/removed
            new_config = _load_mcp_config()
            new_server_names = set(new_config.keys())

            # Shutdown existing connections
            await loop.run_in_executor(None, shutdown_mcp_servers)

            # Reconnect by discovering tools (reads config.yaml fresh)
            new_tools = await loop.run_in_executor(None, discover_mcp_tools)

            # Compute what changed
            with _lock:
                connected_servers = set(_servers.keys())

            added = connected_servers - old_servers
            removed = old_servers - connected_servers
            reconnected = connected_servers & old_servers

            lines = ["🔄 **MCP Servers Reloaded**\n"]
            if reconnected:
                lines.append(f"♻️ Reconnected: {', '.join(sorted(reconnected))}")
            if added:
                lines.append(f"➕ Added: {', '.join(sorted(added))}")
            if removed:
                lines.append(f"➖ Removed: {', '.join(sorted(removed))}")
            if not connected_servers:
                lines.append("No MCP servers connected.")
            else:
                lines.append(f"\n🔧 {len(new_tools)} tool(s) available from {len(connected_servers)} server(s)")

            # Inject a message at the END of the session history so the
            # model knows tools changed on its next turn.  Appended after
            # all existing messages to preserve prompt-cache for the prefix.
            change_parts = []
            if added:
                change_parts.append(f"Added servers: {', '.join(sorted(added))}")
            if removed:
                change_parts.append(f"Removed servers: {', '.join(sorted(removed))}")
            if reconnected:
                change_parts.append(f"Reconnected servers: {', '.join(sorted(reconnected))}")
            tool_summary = f"{len(new_tools)} MCP tool(s) now available" if new_tools else "No MCP tools available"
            change_detail = ". ".join(change_parts) + ". " if change_parts else ""
            reload_msg = {
                "role": "user",
                "content": f"[SYSTEM: MCP servers have been reloaded. {change_detail}{tool_summary}. The tool list for this conversation has been updated accordingly.]",
            }
            try:
                session_entry = self.session_store.get_or_create_session(event.source)
                self.session_store.append_to_transcript(
                    session_entry.session_id, reload_msg
                )
            except Exception:
                pass  # Best-effort; don't fail the reload over a transcript write

            return "\n".join(lines)

        except Exception as e:
            logger.warning("MCP reload failed: %s", e)
            return f"❌ MCP reload failed: {e}"

    # ------------------------------------------------------------------
    # /approve & /deny — explicit dangerous-command approval
    # ------------------------------------------------------------------

    _APPROVAL_TIMEOUT_SECONDS = 300  # 5 minutes

    async def _handle_approve_command(self, event: MessageEvent) -> str:
        """Handle /approve command — execute a pending dangerous command.

        Usage:
            /approve          — approve and execute the pending command
            /approve session  — approve and remember for this session
            /approve always   — approve this pattern permanently
        """
        source = event.source
        session_key = self._session_key_for_source(source)

        if session_key not in self._pending_approvals:
            return "No pending command to approve."

        import time as _time
        approval = self._pending_approvals[session_key]

        # Check for timeout
        ts = approval.get("timestamp", 0)
        if _time.time() - ts > self._APPROVAL_TIMEOUT_SECONDS:
            self._pending_approvals.pop(session_key, None)
            return "⚠️ Approval expired (timed out after 5 minutes). Ask the agent to try again."

        self._pending_approvals.pop(session_key)
        cmd = approval["command"]
        pattern_keys = approval.get("pattern_keys", [])
        if not pattern_keys:
            pk = approval.get("pattern_key", "")
            pattern_keys = [pk] if pk else []

        # Determine approval scope from args
        args = event.get_command_args().strip().lower()
        from tools.approval import approve_session, approve_permanent

        if args in ("always", "permanent", "permanently"):
            for pk in pattern_keys:
                approve_permanent(pk)
            scope_msg = " (pattern approved permanently)"
        elif args in ("session", "ses"):
            for pk in pattern_keys:
                approve_session(session_key, pk)
            scope_msg = " (pattern approved for this session)"
        else:
            # One-time approval — just approve for session so the immediate
            # replay works, but don't advertise it as session-wide
            for pk in pattern_keys:
                approve_session(session_key, pk)
            scope_msg = ""

        logger.info("User approved dangerous command via /approve: %s...%s", cmd[:60], scope_msg)
        from tools.terminal_tool import terminal_tool
        result = terminal_tool(command=cmd, force=True)
        return f"✅ Command approved and executed{scope_msg}.\n\n```\n{result[:3500]}\n```"

    async def _handle_deny_command(self, event: MessageEvent) -> str:
        """Handle /deny command — reject a pending dangerous command."""
        source = event.source
        session_key = self._session_key_for_source(source)

        if session_key not in self._pending_approvals:
            return "No pending command to deny."

        self._pending_approvals.pop(session_key)
        logger.info("User denied dangerous command via /deny")
        return "❌ Command denied."

    async def _handle_update_command(self, event: MessageEvent) -> str:
        """Handle /update command — update Hermes Agent to the latest version.

        Spawns ``hermes update`` in a separate systemd scope so it survives the
        gateway restart that ``hermes update`` may trigger at the end. Marker
        files are written so either the current gateway process or the next one
        can notify the user when the update finishes.
        """
        import json
        import shutil
        import subprocess
        from datetime import datetime

        project_root = Path(__file__).parent.parent.resolve()
        git_dir = project_root / '.git'

        if not git_dir.exists():
            return "✗ Not a git repository — cannot update."

        hermes_cmd = _resolve_hermes_bin()
        if not hermes_cmd:
            return (
                "✗ Could not locate the `hermes` command. "
                "Hermes is running, but the update command could not find the "
                "executable on PATH or via the current Python interpreter. "
                "Try running `hermes update` manually in your terminal."
            )

        pending_path = _hermes_home / ".update_pending.json"
        output_path = _hermes_home / ".update_output.txt"
        exit_code_path = _hermes_home / ".update_exit_code"
        pending = {
            "platform": event.source.platform.value,
            "chat_id": event.source.chat_id,
            "user_id": event.source.user_id,
            "timestamp": datetime.now().isoformat(),
        }
        pending_path.write_text(json.dumps(pending))
        exit_code_path.unlink(missing_ok=True)

        # Spawn `hermes update` in a separate cgroup so it survives gateway
        # restart. systemd-run --user --scope creates a transient scope unit.
        hermes_cmd_str = " ".join(shlex.quote(part) for part in hermes_cmd)
        update_cmd = (
            f"{hermes_cmd_str} update > {shlex.quote(str(output_path))} 2>&1; "
            f"status=$?; printf '%s' \"$status\" > {shlex.quote(str(exit_code_path))}"
        )
        try:
            systemd_run = shutil.which("systemd-run")
            if systemd_run:
                subprocess.Popen(
                    [systemd_run, "--user", "--scope",
                     "--unit=hermes-update", "--",
                     "bash", "-c", update_cmd],
                    stdout=subprocess.DEVNULL,
                    stderr=subprocess.DEVNULL,
                    start_new_session=True,
                )
            else:
                # Fallback: best-effort detach with start_new_session
                subprocess.Popen(
                    ["bash", "-c", f"nohup {update_cmd} &"],
                    stdout=subprocess.DEVNULL,
                    stderr=subprocess.DEVNULL,
                    start_new_session=True,
                )
        except Exception as e:
            pending_path.unlink(missing_ok=True)
            exit_code_path.unlink(missing_ok=True)
            return f"✗ Failed to start update: {e}"

        self._schedule_update_notification_watch()
        return "⚕ Starting Hermes update… I'll notify you when it's done."

    def _schedule_update_notification_watch(self) -> None:
        """Ensure a background task is watching for update completion."""
        existing_task = getattr(self, "_update_notification_task", None)
        if existing_task and not existing_task.done():
            return

        try:
            self._update_notification_task = asyncio.create_task(
                self._watch_for_update_completion()
            )
        except RuntimeError:
            logger.debug("Skipping update notification watcher: no running event loop")

    async def _watch_for_update_completion(
        self,
        poll_interval: float = 2.0,
        timeout: float = 1800.0,
    ) -> None:
        """Wait for ``hermes update`` to finish, then send its notification."""
        pending_path = _hermes_home / ".update_pending.json"
        claimed_path = _hermes_home / ".update_pending.claimed.json"
        exit_code_path = _hermes_home / ".update_exit_code"
        loop = asyncio.get_running_loop()
        deadline = loop.time() + timeout

        while (pending_path.exists() or claimed_path.exists()) and loop.time() < deadline:
            if exit_code_path.exists():
                await self._send_update_notification()
                return
            await asyncio.sleep(poll_interval)

        if (pending_path.exists() or claimed_path.exists()) and not exit_code_path.exists():
            logger.warning("Update watcher timed out waiting for completion marker")
            exit_code_path.write_text("124")
            await self._send_update_notification()

    async def _send_update_notification(self) -> bool:
        """If an update finished, notify the user.

        Returns False when the update is still running so a caller can retry
        later. Returns True after a definitive send/skip decision.
        """
        import json
        import re as _re

        pending_path = _hermes_home / ".update_pending.json"
        claimed_path = _hermes_home / ".update_pending.claimed.json"
        output_path = _hermes_home / ".update_output.txt"
        exit_code_path = _hermes_home / ".update_exit_code"

        if not pending_path.exists() and not claimed_path.exists():
            return False

        cleanup = True
        active_pending_path = claimed_path
        try:
            if pending_path.exists():
                try:
                    pending_path.replace(claimed_path)
                except FileNotFoundError:
                    if not claimed_path.exists():
                        return True
            elif not claimed_path.exists():
                return True

            pending = json.loads(claimed_path.read_text())
            platform_str = pending.get("platform")
            chat_id = pending.get("chat_id")

            if not exit_code_path.exists():
                logger.info("Update notification deferred: update still running")
                cleanup = False
                active_pending_path = pending_path
                claimed_path.replace(pending_path)
                return False

            exit_code_raw = exit_code_path.read_text().strip() or "1"
            exit_code = int(exit_code_raw)

            # Read the captured update output
            output = ""
            if output_path.exists():
                output = output_path.read_text()

            # Resolve adapter
            platform = Platform(platform_str)
            adapter = self.adapters.get(platform)

            if adapter and chat_id:
                # Strip ANSI escape codes for clean display
                output = _re.sub(r'\x1b\[[0-9;]*m', '', output).strip()
                if output:
                    if len(output) > 3500:
                        output = "…" + output[-3500:]
                    if exit_code == 0:
                        msg = f"✅ Hermes update finished.\n\n```\n{output}\n```"
                    else:
                        msg = f"❌ Hermes update failed.\n\n```\n{output}\n```"
                else:
                    if exit_code == 0:
                        msg = "✅ Hermes update finished successfully."
                    else:
                        msg = "❌ Hermes update failed. Check the gateway logs or run `hermes update` manually for details."
                await adapter.send(chat_id, msg)
                logger.info(
                    "Sent post-update notification to %s:%s (exit=%s)",
                    platform_str,
                    chat_id,
                    exit_code,
                )
        except Exception as e:
            logger.warning("Post-update notification failed: %s", e)
        finally:
            if cleanup:
                active_pending_path.unlink(missing_ok=True)
                claimed_path.unlink(missing_ok=True)
                output_path.unlink(missing_ok=True)
                exit_code_path.unlink(missing_ok=True)

        return True

    def _set_session_env(self, context: SessionContext) -> None:
        """Set environment variables for the current session."""
        os.environ["HERMES_SESSION_PLATFORM"] = context.source.platform.value
        os.environ["HERMES_SESSION_CHAT_ID"] = context.source.chat_id
        if context.source.chat_name:
            os.environ["HERMES_SESSION_CHAT_NAME"] = context.source.chat_name
        if context.source.thread_id:
            os.environ["HERMES_SESSION_THREAD_ID"] = str(context.source.thread_id)
    
    def _clear_session_env(self) -> None:
        """Clear session environment variables."""
        for var in ["HERMES_SESSION_PLATFORM", "HERMES_SESSION_CHAT_ID", "HERMES_SESSION_CHAT_NAME", "HERMES_SESSION_THREAD_ID"]:
            if var in os.environ:
                del os.environ[var]
    
    async def _enrich_message_with_vision(
        self,
        user_text: str,
        image_paths: List[str],
    ) -> str:
        """
        Auto-analyze user-attached images with the vision tool and prepend
        the descriptions to the message text.

        Each image is analyzed with a general-purpose prompt.  The resulting
        description *and* the local cache path are injected so the model can:
          1. Immediately understand what the user sent (no extra tool call).
          2. Re-examine the image with vision_analyze if it needs more detail.

        Args:
            user_text:   The user's original caption / message text.
            image_paths: List of local file paths to cached images.

        Returns:
            The enriched message string with vision descriptions prepended.
        """
        from tools.vision_tools import vision_analyze_tool
        import json as _json

        analysis_prompt = (
            "Describe everything visible in this image in thorough detail. "
            "Include any text, code, data, objects, people, layout, colors, "
            "and any other notable visual information."
        )

        enriched_parts = []
        for path in image_paths:
            try:
                logger.debug("Auto-analyzing user image: %s", path)
                result_json = await vision_analyze_tool(
                    image_url=path,
                    user_prompt=analysis_prompt,
                )
                result = _json.loads(result_json)
                if result.get("success"):
                    description = result.get("analysis", "")
                    enriched_parts.append(
                        f"[The user sent an image~ Here's what I can see:\n{description}]\n"
                        f"[If you need a closer look, use vision_analyze with "
                        f"image_url: {path} ~]"
                    )
                else:
                    enriched_parts.append(
                        "[The user sent an image but I couldn't quite see it "
                        "this time (>_<) You can try looking at it yourself "
                        f"with vision_analyze using image_url: {path}]"
                    )
            except Exception as e:
                logger.error("Vision auto-analysis error: %s", e)
                enriched_parts.append(
                    f"[The user sent an image but something went wrong when I "
                    f"tried to look at it~ You can try examining it yourself "
                    f"with vision_analyze using image_url: {path}]"
                )

        # Combine: vision descriptions first, then the user's original text
        if enriched_parts:
            prefix = "\n\n".join(enriched_parts)
            if user_text:
                return f"{prefix}\n\n{user_text}"
            return prefix
        return user_text

    async def _enrich_message_with_transcription(
        self,
        user_text: str,
        audio_paths: List[str],
    ) -> str:
        """
        Auto-transcribe user voice/audio messages using the configured STT provider
        and prepend the transcript to the message text.

        Args:
            user_text:   The user's original caption / message text.
            audio_paths: List of local file paths to cached audio files.

        Returns:
            The enriched message string with transcriptions prepended.
        """
        if not getattr(self.config, "stt_enabled", True):
            disabled_note = "[The user sent voice message(s), but transcription is disabled in config."
            if self._has_setup_skill():
                disabled_note += (
                    " You have a skill called hermes-agent-setup that can help "
                    "users configure Hermes features including voice, tools, and more."
                )
            disabled_note += "]"
            if user_text:
                return f"{disabled_note}\n\n{user_text}"
            return disabled_note

        from tools.transcription_tools import transcribe_audio, get_stt_model_from_config
        import asyncio

        stt_model = get_stt_model_from_config()

        enriched_parts = []
        for path in audio_paths:
            try:
                logger.debug("Transcribing user voice: %s", path)
                result = await asyncio.to_thread(transcribe_audio, path, model=stt_model)
                if result["success"]:
                    transcript = result["transcript"]
                    enriched_parts.append(
                        f'[The user sent a voice message~ '
                        f'Here\'s what they said: "{transcript}"]'
                    )
                else:
                    error = result.get("error", "unknown error")
                    if (
                        "No STT provider" in error
                        or error.startswith("Neither VOICE_TOOLS_OPENAI_KEY nor OPENAI_API_KEY is set")
                    ):
                        _no_stt_note = (
                            "[The user sent a voice message but I can't listen "
                            "to it right now — no STT provider is configured. "
                            "A direct message has already been sent to the user "
                            "with setup instructions."
                        )
                        if self._has_setup_skill():
                            _no_stt_note += (
                                " You have a skill called hermes-agent-setup "
                                "that can help users configure Hermes features "
                                "including voice, tools, and more."
                            )
                        _no_stt_note += "]"
                        enriched_parts.append(_no_stt_note)
                    else:
                        enriched_parts.append(
                            "[The user sent a voice message but I had trouble "
                            f"transcribing it~ ({error})]"
                        )
            except Exception as e:
                logger.error("Transcription error: %s", e)
                enriched_parts.append(
                    "[The user sent a voice message but something went wrong "
                    "when I tried to listen to it~ Let them know!]"
                )

        if enriched_parts:
            prefix = "\n\n".join(enriched_parts)
            if user_text:
                return f"{prefix}\n\n{user_text}"
            return prefix
        return user_text

    async def _run_process_watcher(self, watcher: dict) -> None:
        """
        Periodically check a background process and push updates to the user.

        Runs as an asyncio task. Stays silent when nothing changed.
        Auto-removes when the process exits or is killed.

        Notification mode (from ``display.background_process_notifications``):
          - ``all``    — running-output updates + final message
          - ``result`` — final completion message only
          - ``error``  — final message only when exit code != 0
          - ``off``    — no messages at all
        """
        from tools.process_registry import process_registry

        session_id = watcher["session_id"]
        interval = watcher["check_interval"]
        session_key = watcher.get("session_key", "")
        platform_name = watcher.get("platform", "")
        chat_id = watcher.get("chat_id", "")
        thread_id = watcher.get("thread_id", "")
        notify_mode = self._load_background_notifications_mode()

        logger.debug("Process watcher started: %s (every %ss, notify=%s)",
                      session_id, interval, notify_mode)

        if notify_mode == "off":
            # Still wait for the process to exit so we can log it, but don't
            # push any messages to the user.
            while True:
                await asyncio.sleep(interval)
                session = process_registry.get(session_id)
                if session is None or session.exited:
                    break
            logger.debug("Process watcher ended (silent): %s", session_id)
            return

        last_output_len = 0
        while True:
            await asyncio.sleep(interval)

            session = process_registry.get(session_id)
            if session is None:
                break

            current_output_len = len(session.output_buffer)
            has_new_output = current_output_len > last_output_len
            last_output_len = current_output_len

            if session.exited:
                # Decide whether to notify based on mode
                should_notify = (
                    notify_mode in ("all", "result")
                    or (notify_mode == "error" and session.exit_code not in (0, None))
                )
                if should_notify:
                    new_output = session.output_buffer[-1000:] if session.output_buffer else ""
                    message_text = (
                        f"[Background process {session_id} finished with exit code {session.exit_code}~ "
                        f"Here's the final output:\n{new_output}]"
                    )
                    adapter = None
                    for p, a in self.adapters.items():
                        if p.value == platform_name:
                            adapter = a
                            break
                    if adapter and chat_id:
                        try:
                            send_meta = {"thread_id": thread_id} if thread_id else None
                            await adapter.send(chat_id, message_text, metadata=send_meta)
                        except Exception as e:
                            logger.error("Watcher delivery error: %s", e)
                break

            elif has_new_output and notify_mode == "all":
                # New output available -- deliver status update (only in "all" mode)
                new_output = session.output_buffer[-500:] if session.output_buffer else ""
                message_text = (
                    f"[Background process {session_id} is still running~ "
                    f"New output:\n{new_output}]"
                )
                adapter = None
                for p, a in self.adapters.items():
                    if p.value == platform_name:
                        adapter = a
                        break
                if adapter and chat_id:
                    try:
                        send_meta = {"thread_id": thread_id} if thread_id else None
                        await adapter.send(chat_id, message_text, metadata=send_meta)
                    except Exception as e:
                        logger.error("Watcher delivery error: %s", e)

        logger.debug("Process watcher ended: %s", session_id)

    _MAX_INTERRUPT_DEPTH = 3  # Cap recursive interrupt handling (#816)

    @staticmethod
    def _agent_config_signature(
        model: str,
        runtime: dict,
        enabled_toolsets: list,
        ephemeral_prompt: str,
    ) -> str:
        """Compute a stable string key from agent config values.

        When this signature changes between messages, the cached AIAgent is
        discarded and rebuilt.  When it stays the same, the cached agent is
        reused — preserving the frozen system prompt and tool schemas for
        prompt cache hits.
        """
        import hashlib, json as _j

        # Fingerprint the FULL credential string instead of using a short
        # prefix. OAuth/JWT-style tokens frequently share a common prefix
        # (e.g. "eyJhbGci"), which can cause false cache hits across auth
        # switches if only the first few characters are considered.
        _api_key = str(runtime.get("api_key", "") or "")
        _api_key_fingerprint = hashlib.sha256(_api_key.encode()).hexdigest() if _api_key else ""

        blob = _j.dumps(
            [
                model,
                _api_key_fingerprint,
                runtime.get("base_url", ""),
                runtime.get("provider", ""),
                runtime.get("api_mode", ""),
                sorted(enabled_toolsets) if enabled_toolsets else [],
                # reasoning_config excluded — it's set per-message on the
                # cached agent and doesn't affect system prompt or tools.
                ephemeral_prompt or "",
            ],
            sort_keys=True,
            default=str,
        )
        return hashlib.sha256(blob.encode()).hexdigest()[:16]

    def _evict_cached_agent(self, session_key: str) -> None:
        """Remove a cached agent for a session (called on /new, /model, etc)."""
        _lock = getattr(self, "_agent_cache_lock", None)
        if _lock:
            with _lock:
                self._agent_cache.pop(session_key, None)

    async def _run_agent(
        self,
        message: str,
        context_prompt: str,
        history: List[Dict[str, Any]],
        source: SessionSource,
        session_id: str,
        session_key: str = None,
        _interrupt_depth: int = 0,
        event_message_id: Optional[str] = None,
    ) -> Dict[str, Any]:
        """
        Run the agent with the given message and context.
        
        Returns the full result dict from run_conversation, including:
          - "final_response": str (the text to send back)
          - "messages": list (full conversation including tool calls)
          - "api_calls": int
          - "completed": bool
        
        This is run in a thread pool to not block the event loop.
        Supports interruption via new messages.
        """
        from run_agent import AIAgent
        import queue
        
        user_config = _load_gateway_config()
        platform_key = _platform_config_key(source.platform)

        from hermes_cli.tools_config import _get_platform_tools
        enabled_toolsets = sorted(_get_platform_tools(user_config, platform_key))

        # Tool progress mode from config.yaml: "all", "new", "verbose", "off"
        # Falls back to env vars for backward compatibility.
        # YAML 1.1 parses bare `off` as boolean False — normalise before
        # the `or` chain so it doesn't silently fall through to "all".
        _raw_tp = user_config.get("display", {}).get("tool_progress")
        if _raw_tp is False:
            _raw_tp = "off"
        progress_mode = (
            _raw_tp
            or os.getenv("HERMES_TOOL_PROGRESS_MODE")
            or "all"
        )
        tool_progress_enabled = progress_mode != "off"
        
        # Queue for progress messages (thread-safe)
        progress_queue = queue.Queue() if tool_progress_enabled else None
        last_tool = [None]  # Mutable container for tracking in closure
        last_progress_msg = [None]  # Track last message for dedup
        repeat_count = [0]  # How many times the same message repeated
        
        def progress_callback(tool_name: str, preview: str = None, args: dict = None):
            """Callback invoked by agent when a tool is called."""
            if not progress_queue:
                return
            
            # "new" mode: only report when tool changes
            if progress_mode == "new" and tool_name == last_tool[0]:
                return
            last_tool[0] = tool_name
            
            # Build progress message with primary argument preview
            from agent.display import get_tool_emoji
            emoji = get_tool_emoji(tool_name, default="⚙️")
            
            # Verbose mode: show detailed arguments
            if progress_mode == "verbose" and args:
                import json as _json
                args_str = _json.dumps(args, ensure_ascii=False, default=str)
                if len(args_str) > 200:
                    args_str = args_str[:197] + "..."
                msg = f"{emoji} {tool_name}({list(args.keys())})\n{args_str}"
                progress_queue.put(msg)
                return
            
            if preview:
                # Truncate preview to keep messages clean
                if len(preview) > 80:
                    preview = preview[:77] + "..."
                msg = f"{emoji} {tool_name}: \"{preview}\""
            else:
                msg = f"{emoji} {tool_name}..."
            
            # Dedup: collapse consecutive identical progress messages.
            # Common with execute_code where models iterate with the same
            # code (same boilerplate imports → identical previews).
            if msg == last_progress_msg[0]:
                repeat_count[0] += 1
                # Update the last line in progress_lines with a counter
                # via a special "dedup" queue message.
                progress_queue.put(("__dedup__", msg, repeat_count[0]))
                return
            last_progress_msg[0] = msg
            repeat_count[0] = 0
            
            progress_queue.put(msg)
        
        # Background task to send progress messages
        # Accumulates tool lines into a single message that gets edited.
        #
        # Threading metadata is platform-specific:
        # - Slack DM threading needs event_message_id fallback (reply thread)
        # - Telegram uses message_thread_id only for forum topics; passing a
        #   normal DM/group message id as thread_id causes send failures
        # - Other platforms should use explicit source.thread_id only
        if source.platform == Platform.SLACK:
            _progress_thread_id = source.thread_id or event_message_id
        else:
            _progress_thread_id = source.thread_id
        _progress_metadata = {"thread_id": _progress_thread_id} if _progress_thread_id else None

        async def send_progress_messages():
            if not progress_queue:
                return

            adapter = self.adapters.get(source.platform)
            if not adapter:
                return

            progress_lines = []      # Accumulated tool lines
            progress_msg_id = None   # ID of the progress message to edit
            can_edit = True          # False once an edit fails (platform doesn't support it)

            while True:
                try:
                    raw = progress_queue.get_nowait()
                    
                    # Handle dedup messages: update last line with repeat counter
                    if isinstance(raw, tuple) and len(raw) == 3 and raw[0] == "__dedup__":
                        _, base_msg, count = raw
                        if progress_lines:
                            progress_lines[-1] = f"{base_msg} (×{count + 1})"
                        msg = progress_lines[-1] if progress_lines else base_msg
                    else:
                        msg = raw
                        progress_lines.append(msg)

                    if can_edit and progress_msg_id is not None:
                        # Try to edit the existing progress message
                        full_text = "\n".join(progress_lines)
                        result = await adapter.edit_message(
                            chat_id=source.chat_id,
                            message_id=progress_msg_id,
                            content=full_text,
                        )
                        if not result.success:
                            # Platform doesn't support editing — stop trying,
                            # send just this new line as a separate message
                            can_edit = False
                            await adapter.send(chat_id=source.chat_id, content=msg, metadata=_progress_metadata)
                    else:
                        if can_edit:
                            # First tool: send all accumulated text as new message
                            full_text = "\n".join(progress_lines)
                            result = await adapter.send(chat_id=source.chat_id, content=full_text, metadata=_progress_metadata)
                        else:
                            # Editing unsupported: send just this line
                            result = await adapter.send(chat_id=source.chat_id, content=msg, metadata=_progress_metadata)
                        if result.success and result.message_id:
                            progress_msg_id = result.message_id

                    # Restore typing indicator
                    await asyncio.sleep(0.3)
                    await adapter.send_typing(source.chat_id, metadata=_progress_metadata)

                except queue.Empty:
                    await asyncio.sleep(0.3)
                except asyncio.CancelledError:
                    # Drain remaining queued messages
                    while not progress_queue.empty():
                        try:
                            raw = progress_queue.get_nowait()
                            if isinstance(raw, tuple) and len(raw) == 3 and raw[0] == "__dedup__":
                                _, base_msg, count = raw
                                if progress_lines:
                                    progress_lines[-1] = f"{base_msg} (×{count + 1})"
                            else:
                                progress_lines.append(raw)
                        except Exception:
                            break
                    # Final edit with all remaining tools (only if editing works)
                    if can_edit and progress_lines and progress_msg_id:
                        full_text = "\n".join(progress_lines)
                        try:
                            await adapter.edit_message(
                                chat_id=source.chat_id,
                                message_id=progress_msg_id,
                                content=full_text,
                            )
                        except Exception:
                            pass
                    return
                except Exception as e:
                    logger.error("Progress message error: %s", e)
                    await asyncio.sleep(1)
        
        # We need to share the agent instance for interrupt support
        agent_holder = [None]  # Mutable container for the agent instance
        result_holder = [None]  # Mutable container for the result
        tools_holder = [None]   # Mutable container for the tool definitions
        stream_consumer_holder = [None]  # Mutable container for stream consumer
        
        # Bridge sync step_callback → async hooks.emit for agent:step events
        _loop_for_step = asyncio.get_event_loop()
        _hooks_ref = self.hooks

        def _step_callback_sync(iteration: int, tool_names: list) -> None:
            try:
                asyncio.run_coroutine_threadsafe(
                    _hooks_ref.emit("agent:step", {
                        "platform": source.platform.value if source.platform else "",
                        "user_id": source.user_id,
                        "session_id": session_id,
                        "iteration": iteration,
                        "tool_names": tool_names,
                    }),
                    _loop_for_step,
                )
            except Exception as _e:
                logger.debug("agent:step hook error: %s", _e)

        # Bridge sync status_callback → async adapter.send for context pressure
        _status_adapter = self.adapters.get(source.platform)
        _status_chat_id = source.chat_id
        _status_thread_metadata = {"thread_id": _progress_thread_id} if _progress_thread_id else None

        def _status_callback_sync(event_type: str, message: str) -> None:
            if not _status_adapter:
                return
            try:
                asyncio.run_coroutine_threadsafe(
                    _status_adapter.send(
                        _status_chat_id,
                        message,
                        metadata=_status_thread_metadata,
                    ),
                    _loop_for_step,
                )
            except Exception as _e:
                logger.debug("status_callback error (%s): %s", event_type, _e)

        def run_sync():
            # Pass session_key to process registry via env var so background
            # processes can be mapped back to this gateway session
            os.environ["HERMES_SESSION_KEY"] = session_key or ""

            # Read from env var or use default (same as CLI)
            max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "90"))
            
            # Map platform enum to the platform hint key the agent understands.
            # Platform.LOCAL ("local") maps to "cli"; others pass through as-is.
            platform_key = "cli" if source.platform == Platform.LOCAL else source.platform.value
            
            # Combine platform context with user-configured ephemeral system prompt
            combined_ephemeral = context_prompt or ""
            if self._ephemeral_system_prompt:
                combined_ephemeral = (combined_ephemeral + "\n\n" + self._ephemeral_system_prompt).strip()

            # Re-read .env and config for fresh credentials (gateway is long-lived,
            # keys may change without restart).
            try:
                load_dotenv(_env_path, override=True, encoding="utf-8")
            except UnicodeDecodeError:
                load_dotenv(_env_path, override=True, encoding="latin-1")
            except Exception:
                pass

            model = _resolve_gateway_model(user_config)

            try:
                runtime_kwargs = _resolve_runtime_agent_kwargs()
            except Exception as exc:
                return {
                    "final_response": f"⚠️ Provider authentication failed: {exc}",
                    "messages": [],
                    "api_calls": 0,
                    "tools": [],
                }

            pr = self._provider_routing
            honcho_manager, honcho_config = self._get_or_create_gateway_honcho(session_key)
            reasoning_config = self._load_reasoning_config()
            self._reasoning_config = reasoning_config
            # Set up streaming consumer if enabled
            _stream_consumer = None
            _stream_delta_cb = None
            _scfg = getattr(getattr(self, 'config', None), 'streaming', None)
            if _scfg is None:
                from gateway.config import StreamingConfig
                _scfg = StreamingConfig()

            if _scfg.enabled and _scfg.transport != "off":
                try:
                    from gateway.stream_consumer import GatewayStreamConsumer, StreamConsumerConfig
                    _adapter = self.adapters.get(source.platform)
                    if _adapter:
                        _consumer_cfg = StreamConsumerConfig(
                            edit_interval=_scfg.edit_interval,
                            buffer_threshold=_scfg.buffer_threshold,
                            cursor=_scfg.cursor,
                        )
                        _stream_consumer = GatewayStreamConsumer(
                            adapter=_adapter,
                            chat_id=source.chat_id,
                            config=_consumer_cfg,
                            metadata={"thread_id": _progress_thread_id} if _progress_thread_id else None,
                        )
                        _stream_delta_cb = _stream_consumer.on_delta
                        stream_consumer_holder[0] = _stream_consumer
                except Exception as _sc_err:
                    logger.debug("Could not set up stream consumer: %s", _sc_err)

            turn_route = self._resolve_turn_agent_config(message, model, runtime_kwargs)

            # Check agent cache — reuse the AIAgent from the previous message
            # in this session to preserve the frozen system prompt and tool
            # schemas for prompt cache hits.
            _sig = self._agent_config_signature(
                turn_route["model"],
                turn_route["runtime"],
                enabled_toolsets,
                combined_ephemeral,
            )
            agent = None
            _cache_lock = getattr(self, "_agent_cache_lock", None)
            _cache = getattr(self, "_agent_cache", None)
            if _cache_lock and _cache is not None:
                with _cache_lock:
                    cached = _cache.get(session_key)
                    if cached and cached[1] == _sig:
                        agent = cached[0]
                        logger.debug("Reusing cached agent for session %s", session_key)

            if agent is None:
                # Config changed or first message — create fresh agent
                agent = AIAgent(
                    model=turn_route["model"],
                    **turn_route["runtime"],
                    max_iterations=max_iterations,
                    quiet_mode=True,
                    verbose_logging=False,
                    enabled_toolsets=enabled_toolsets,
                    ephemeral_system_prompt=combined_ephemeral or None,
                    prefill_messages=self._prefill_messages or None,
                    reasoning_config=reasoning_config,
                    providers_allowed=pr.get("only"),
                    providers_ignored=pr.get("ignore"),
                    providers_order=pr.get("order"),
                    provider_sort=pr.get("sort"),
                    provider_require_parameters=pr.get("require_parameters", False),
                    provider_data_collection=pr.get("data_collection"),
                    session_id=session_id,
                    platform=platform_key,
                    honcho_session_key=session_key,
                    honcho_manager=honcho_manager,
                    honcho_config=honcho_config,
                    session_db=self._session_db,
                    fallback_model=self._fallback_model,
                )
                if _cache_lock and _cache is not None:
                    with _cache_lock:
                        _cache[session_key] = (agent, _sig)
                logger.debug("Created new agent for session %s (sig=%s)", session_key, _sig)

            # Per-message state — callbacks and reasoning config change every
            # turn and must not be baked into the cached agent constructor.
            agent.tool_progress_callback = progress_callback if tool_progress_enabled else None
            agent.step_callback = _step_callback_sync if _hooks_ref.loaded_hooks else None
            agent.stream_delta_callback = _stream_delta_cb
            agent.status_callback = _status_callback_sync
            agent.reasoning_config = reasoning_config

            # Background review delivery — send "💾 Memory updated" etc. to user
            def _bg_review_send(message: str) -> None:
                if not _status_adapter:
                    return
                try:
                    asyncio.run_coroutine_threadsafe(
                        _status_adapter.send(
                            _status_chat_id,
                            message,
                            metadata=_status_thread_metadata,
                        ),
                        _loop_for_step,
                    )
                except Exception as _e:
                    logger.debug("background_review_callback error: %s", _e)

            agent.background_review_callback = _bg_review_send

            # Store agent reference for interrupt support
            agent_holder[0] = agent
            # Capture the full tool definitions for transcript logging
            tools_holder[0] = agent.tools if hasattr(agent, 'tools') else None
            
            # Convert history to agent format.
            # Two cases:
            #   1. Normal path (from transcript): simple {role, content, timestamp} dicts
            #      - Strip timestamps, keep role+content
            #   2. Interrupt path (from agent result["messages"]): full agent messages
            #      that may include tool_calls, tool_call_id, reasoning, etc.
            #      - These must be passed through intact so the API sees valid
            #        assistant→tool sequences (dropping tool_calls causes 500 errors)
            agent_history = []
            for msg in history:
                role = msg.get("role")
                if not role:
                    continue
                
                # Skip metadata entries (tool definitions, session info)
                # -- these are for transcript logging, not for the LLM
                if role in ("session_meta",):
                    continue
                
                # Skip system messages -- the agent rebuilds its own system prompt
                if role == "system":
                    continue
                
                # Rich agent messages (tool_calls, tool results) must be passed
                # through intact so the API sees valid assistant→tool sequences
                has_tool_calls = "tool_calls" in msg
                has_tool_call_id = "tool_call_id" in msg
                is_tool_message = role == "tool"
                
                if has_tool_calls or has_tool_call_id or is_tool_message:
                    clean_msg = {k: v for k, v in msg.items() if k != "timestamp"}
                    agent_history.append(clean_msg)
                else:
                    # Simple text message - just need role and content
                    content = msg.get("content")
                    if content:
                        # Tag cross-platform mirror messages so the agent knows their origin
                        if msg.get("mirror"):
                            mirror_src = msg.get("mirror_source", "another session")
                            content = f"[Delivered from {mirror_src}] {content}"
                        entry = {"role": role, "content": content}
                        # Preserve reasoning fields on assistant messages so
                        # multi-turn reasoning context survives session reload.
                        # The agent's _build_api_kwargs converts these to the
                        # provider-specific format (reasoning_content, etc.).
                        if role == "assistant":
                            for _rkey in ("reasoning", "reasoning_details",
                                          "codex_reasoning_items"):
                                _rval = msg.get(_rkey)
                                if _rval:
                                    entry[_rkey] = _rval
                        agent_history.append(entry)
            
            # Collect MEDIA paths already in history so we can exclude them
            # from the current turn's extraction. This is compression-safe:
            # even if the message list shrinks, we know which paths are old.
            _history_media_paths: set = set()
            for _hm in agent_history:
                if _hm.get("role") in ("tool", "function"):
                    _hc = _hm.get("content", "")
                    if "MEDIA:" in _hc:
                        for _match in re.finditer(r'MEDIA:(\S+)', _hc):
                            _p = _match.group(1).strip().rstrip('",}')
                            if _p:
                                _history_media_paths.add(_p)
            
            result = agent.run_conversation(message, conversation_history=agent_history, task_id=session_id)
            result_holder[0] = result

            # Signal the stream consumer that the agent is done
            if _stream_consumer is not None:
                _stream_consumer.finish()
            
            # Return final response, or a message if something went wrong
            final_response = result.get("final_response")

            # Extract actual token counts from the agent instance used for this run
            _last_prompt_toks = 0
            _input_toks = 0
            _output_toks = 0
            _agent = agent_holder[0]
            if _agent and hasattr(_agent, "context_compressor"):
                _last_prompt_toks = getattr(_agent.context_compressor, "last_prompt_tokens", 0)
                _input_toks = getattr(_agent, "session_prompt_tokens", 0)
                _output_toks = getattr(_agent, "session_completion_tokens", 0)
            _resolved_model = getattr(_agent, "model", None) if _agent else None

            if not final_response:
                error_msg = f"⚠️ {result['error']}" if result.get("error") else "(No response generated)"
                return {
                    "final_response": error_msg,
                    "messages": result.get("messages", []),
                    "api_calls": result.get("api_calls", 0),
                    "tools": tools_holder[0] or [],
                    "history_offset": len(agent_history),
                    "last_prompt_tokens": _last_prompt_toks,
                    "input_tokens": _input_toks,
                    "output_tokens": _output_toks,
                    "model": _resolved_model,
                }
            
            # Scan tool results for MEDIA:<path> tags that need to be delivered
            # as native audio/file attachments.  The TTS tool embeds MEDIA: tags
            # in its JSON response, but the model's final text reply usually
            # doesn't include them.  We collect unique tags from tool results and
            # append any that aren't already present in the final response, so the
            # adapter's extract_media() can find and deliver the files exactly once.
            #
            # Uses path-based deduplication against _history_media_paths (collected
            # before run_conversation) instead of index slicing. This is safe even
            # when context compression shrinks the message list. (Fixes #160)
            if "MEDIA:" not in final_response:
                media_tags = []
                has_voice_directive = False
                for msg in result.get("messages", []):
                    if msg.get("role") in ("tool", "function"):
                        content = msg.get("content", "")
                        if "MEDIA:" in content:
                            for match in re.finditer(r'MEDIA:(\S+)', content):
                                path = match.group(1).strip().rstrip('",}')
                                if path and path not in _history_media_paths:
                                    media_tags.append(f"MEDIA:{path}")
                            if "[[audio_as_voice]]" in content:
                                has_voice_directive = True
                
                if media_tags:
                    seen = set()
                    unique_tags = []
                    for tag in media_tags:
                        if tag not in seen:
                            seen.add(tag)
                            unique_tags.append(tag)
                    if has_voice_directive:
                        unique_tags.insert(0, "[[audio_as_voice]]")
                    final_response = final_response + "\n" + "\n".join(unique_tags)
            
            # Sync session_id: the agent may have created a new session during
            # mid-run context compression (_compress_context splits sessions).
            # If so, update the session store entry so the NEXT message loads
            # the compressed transcript, not the stale pre-compression one.
            agent = agent_holder[0]
            if agent and session_key and hasattr(agent, 'session_id') and agent.session_id != session_id:
                logger.info(
                    "Session split detected: %s → %s (compression)",
                    session_id, agent.session_id,
                )
                entry = self.session_store._entries.get(session_key)
                if entry:
                    entry.session_id = agent.session_id
                    self.session_store._save()

            effective_session_id = getattr(agent, 'session_id', session_id) if agent else session_id

            # Auto-generate session title after first exchange (non-blocking)
            if final_response and self._session_db:
                try:
                    from agent.title_generator import maybe_auto_title
                    all_msgs = result_holder[0].get("messages", []) if result_holder[0] else []
                    maybe_auto_title(
                        self._session_db,
                        effective_session_id,
                        message,
                        final_response,
                        all_msgs,
                    )
                except Exception:
                    pass

            return {
                "final_response": final_response,
                "last_reasoning": result.get("last_reasoning"),
                "messages": result_holder[0].get("messages", []) if result_holder[0] else [],
                "api_calls": result_holder[0].get("api_calls", 0) if result_holder[0] else 0,
                "tools": tools_holder[0] or [],
                "history_offset": len(agent_history),
                "last_prompt_tokens": _last_prompt_toks,
                "input_tokens": _input_toks,
                "output_tokens": _output_toks,
                "model": _resolved_model,
                "session_id": effective_session_id,
            }
        
        # Start progress message sender if enabled
        progress_task = None
        if tool_progress_enabled:
            progress_task = asyncio.create_task(send_progress_messages())

        # Start stream consumer task — polls for consumer creation since it
        # happens inside run_sync (thread pool) after the agent is constructed.
        stream_task = None

        async def _start_stream_consumer():
            """Wait for the stream consumer to be created, then run it."""
            for _ in range(200):  # Up to 10s wait
                if stream_consumer_holder[0] is not None:
                    await stream_consumer_holder[0].run()
                    return
                await asyncio.sleep(0.05)

        stream_task = asyncio.create_task(_start_stream_consumer())
        
        # Track this agent as running for this session (for interrupt support)
        # We do this in a callback after the agent is created
        async def track_agent():
            # Wait for agent to be created
            while agent_holder[0] is None:
                await asyncio.sleep(0.05)
            if session_key:
                self._running_agents[session_key] = agent_holder[0]
        
        tracking_task = asyncio.create_task(track_agent())
        
        # Monitor for interrupts from the adapter (new messages arriving)
        async def monitor_for_interrupt():
            adapter = self.adapters.get(source.platform)
            if not adapter or not session_key:
                return
            
            while True:
                await asyncio.sleep(0.2)  # Check every 200ms
                # Check if adapter has a pending interrupt for this session.
                # Must use session_key (build_session_key output) — NOT
                # source.chat_id — because the adapter stores interrupt events
                # under the full session key.
                if hasattr(adapter, 'has_pending_interrupt') and adapter.has_pending_interrupt(session_key):
                    agent = agent_holder[0]
                    if agent:
                        pending_event = adapter.get_pending_message(session_key)
                        pending_text = pending_event.text if pending_event else None
                        logger.debug("Interrupt detected from adapter, signaling agent...")
                        agent.interrupt(pending_text)
                        break
        
        interrupt_monitor = asyncio.create_task(monitor_for_interrupt())
        
        try:
            # Run in thread pool to not block
            loop = asyncio.get_event_loop()
            response = await loop.run_in_executor(None, run_sync)

            # Track fallback model state: if the agent switched to a
            # fallback model during this run, persist it so /model shows
            # the actually-active model instead of the config default.
            _agent = agent_holder[0]
            if _agent is not None and hasattr(_agent, 'model'):
                _cfg_model = _resolve_gateway_model()
                if _agent.model != _cfg_model:
                    self._effective_model = _agent.model
                    self._effective_provider = getattr(_agent, 'provider', None)
                    # Fallback activated — evict cached agent so the next
                    # message starts fresh and retries the primary model.
                    self._evict_cached_agent(session_key)
                else:
                    # Primary model worked — clear any stale fallback state
                    self._effective_model = None
                    self._effective_provider = None

            # Check if we were interrupted OR have a queued message (/queue).
            result = result_holder[0]
            adapter = self.adapters.get(source.platform)
            
            # Get pending message from adapter.
            # Use session_key (not source.chat_id) to match adapter's storage keys.
            pending = None
            if result and adapter and session_key:
                if result.get("interrupted"):
                    # Interrupted — consume the interrupt message
                    pending_event = adapter.get_pending_message(session_key)
                    if pending_event:
                        pending = pending_event.text
                    elif result.get("interrupt_message"):
                        pending = result.get("interrupt_message")
                else:
                    # Normal completion — check for /queue'd messages that were
                    # stored without triggering an interrupt.
                    pending_event = adapter.get_pending_message(session_key)
                    if pending_event:
                        pending = pending_event.text
                        logger.debug("Processing queued message after agent completion: '%s...'", pending[:40])
            
            if pending:
                logger.debug("Processing pending message: '%s...'", pending[:40])
                
                # Clear the adapter's interrupt event so the next _run_agent call
                # doesn't immediately re-trigger the interrupt before the new agent
                # even makes its first API call (this was causing an infinite loop).
                if adapter and hasattr(adapter, '_active_sessions') and session_key and session_key in adapter._active_sessions:
                    adapter._active_sessions[session_key].clear()
                
                # Cap recursion depth to prevent resource exhaustion when the
                # user sends multiple messages while the agent keeps failing. (#816)
                if _interrupt_depth >= self._MAX_INTERRUPT_DEPTH:
                    logger.warning(
                        "Interrupt recursion depth %d reached for session %s — "
                        "queueing message instead of recursing.",
                        _interrupt_depth, session_key,
                    )
                    # Queue the pending message for normal processing on next turn
                    adapter = self.adapters.get(source.platform)
                    if adapter and hasattr(adapter, 'queue_message'):
                        adapter.queue_message(session_key, pending)
                    return result_holder[0] or {"final_response": response, "messages": history}

                was_interrupted = result.get("interrupted")
                if not was_interrupted:
                    # Queued message after normal completion — deliver the first
                    # response before processing the queued follow-up.
                    # Skip if streaming already delivered it.
                    _sc = stream_consumer_holder[0]
                    _already_streamed = _sc and getattr(_sc, "already_sent", False)
                    first_response = result.get("final_response", "")
                    if first_response and not _already_streamed:
                        try:
                            await adapter.send(source.chat_id, first_response,
                                               metadata=getattr(event, "metadata", None))
                        except Exception as e:
                            logger.warning("Failed to send first response before queued message: %s", e)
                # else: interrupted — discard the interrupted response ("Operation
                # interrupted." is just noise; the user already knows they sent a
                # new message).

                # Process the pending message with updated history
                updated_history = result.get("messages", history)
                return await self._run_agent(
                    message=pending,
                    context_prompt=context_prompt,
                    history=updated_history,
                    source=source,
                    session_id=session_id,
                    session_key=session_key,
                    _interrupt_depth=_interrupt_depth + 1,
                )
        finally:
            # Stop progress sender and interrupt monitor
            if progress_task:
                progress_task.cancel()
            interrupt_monitor.cancel()

            # Wait for stream consumer to finish its final edit
            if stream_task:
                try:
                    await asyncio.wait_for(stream_task, timeout=5.0)
                except (asyncio.TimeoutError, asyncio.CancelledError):
                    stream_task.cancel()
                    try:
                        await stream_task
                    except asyncio.CancelledError:
                        pass
            
            # Clean up tracking
            tracking_task.cancel()
            if session_key and session_key in self._running_agents:
                del self._running_agents[session_key]
            
            # Wait for cancelled tasks
            for task in [progress_task, interrupt_monitor, tracking_task]:
                if task:
                    try:
                        await task
                    except asyncio.CancelledError:
                        pass

        # If streaming already delivered the response, mark it so the
        # caller's send() is skipped (avoiding duplicate messages).
        _sc = stream_consumer_holder[0]
        if _sc and _sc.already_sent and isinstance(response, dict):
            response["already_sent"] = True
        
        return response


def _start_cron_ticker(stop_event: threading.Event, adapters=None, interval: int = 60):
    """
    Background thread that ticks the cron scheduler at a regular interval.
    
    Runs inside the gateway process so cronjobs fire automatically without
    needing a separate `hermes cron daemon` or system cron entry.

    Also refreshes the channel directory every 5 minutes and prunes the
    image/audio/document cache once per hour.
    """
    from cron.scheduler import tick as cron_tick
    from gateway.platforms.base import cleanup_image_cache, cleanup_document_cache

    IMAGE_CACHE_EVERY = 60   # ticks — once per hour at default 60s interval
    CHANNEL_DIR_EVERY = 5    # ticks — every 5 minutes

    logger.info("Cron ticker started (interval=%ds)", interval)
    tick_count = 0
    while not stop_event.is_set():
        try:
            cron_tick(verbose=False)
        except Exception as e:
            logger.debug("Cron tick error: %s", e)

        tick_count += 1

        if tick_count % CHANNEL_DIR_EVERY == 0 and adapters:
            try:
                from gateway.channel_directory import build_channel_directory
                build_channel_directory(adapters)
            except Exception as e:
                logger.debug("Channel directory refresh error: %s", e)

        if tick_count % IMAGE_CACHE_EVERY == 0:
            try:
                removed = cleanup_image_cache(max_age_hours=24)
                if removed:
                    logger.info("Image cache cleanup: removed %d stale file(s)", removed)
            except Exception as e:
                logger.debug("Image cache cleanup error: %s", e)
            try:
                removed = cleanup_document_cache(max_age_hours=24)
                if removed:
                    logger.info("Document cache cleanup: removed %d stale file(s)", removed)
            except Exception as e:
                logger.debug("Document cache cleanup error: %s", e)

        stop_event.wait(timeout=interval)
    logger.info("Cron ticker stopped")


async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool = False) -> bool:
    """
    Start the gateway and run until interrupted.
    
    This is the main entry point for running the gateway.
    Returns True if the gateway ran successfully, False if it failed to start.
    A False return causes a non-zero exit code so systemd can auto-restart.
    
    Args:
        config: Optional gateway configuration override.
        replace: If True, kill any existing gateway instance before starting.
                 Useful for systemd services to avoid restart-loop deadlocks
                 when the previous process hasn't fully exited yet.
    """
    # ── Duplicate-instance guard ──────────────────────────────────────
    # Prevent two gateways from running under the same HERMES_HOME.
    # The PID file is scoped to HERMES_HOME, so future multi-profile
    # setups (each profile using a distinct HERMES_HOME) will naturally
    # allow concurrent instances without tripping this guard.
    import time as _time
    from gateway.status import get_running_pid, remove_pid_file
    existing_pid = get_running_pid()
    if existing_pid is not None and existing_pid != os.getpid():
        if replace:
            logger.info(
                "Replacing existing gateway instance (PID %d) with --replace.",
                existing_pid,
            )
            try:
                os.kill(existing_pid, signal.SIGTERM)
            except ProcessLookupError:
                pass  # Already gone
            except PermissionError:
                logger.error(
                    "Permission denied killing PID %d. Cannot replace.",
                    existing_pid,
                )
                return False
            # Wait up to 10 seconds for the old process to exit
            for _ in range(20):
                try:
                    os.kill(existing_pid, 0)
                    _time.sleep(0.5)
                except (ProcessLookupError, PermissionError):
                    break  # Process is gone
            else:
                # Still alive after 10s — force kill
                logger.warning(
                    "Old gateway (PID %d) did not exit after SIGTERM, sending SIGKILL.",
                    existing_pid,
                )
                try:
                    os.kill(existing_pid, signal.SIGKILL)
                    _time.sleep(0.5)
                except (ProcessLookupError, PermissionError):
                    pass
            remove_pid_file()
            # Also release all scoped locks left by the old process.
            # Stopped (Ctrl+Z) processes don't release locks on exit,
            # leaving stale lock files that block the new gateway from starting.
            try:
                from gateway.status import release_all_scoped_locks
                _released = release_all_scoped_locks()
                if _released:
                    logger.info("Released %d stale scoped lock(s) from old gateway.", _released)
            except Exception:
                pass
        else:
            hermes_home = str(get_hermes_home())
            logger.error(
                "Another gateway instance is already running (PID %d, HERMES_HOME=%s). "
                "Use 'hermes gateway restart' to replace it, or 'hermes gateway stop' first.",
                existing_pid, hermes_home,
            )
            print(
                f"\n❌ Gateway already running (PID {existing_pid}).\n"
                f"   Use 'hermes gateway restart' to replace it,\n"
                f"   or 'hermes gateway stop' to kill it first.\n"
                f"   Or use 'hermes gateway run --replace' to auto-replace.\n"
            )
            return False

    # Sync bundled skills on gateway start (fast -- skips unchanged)
    try:
        from tools.skills_sync import sync_skills
        sync_skills(quiet=True)
    except Exception:
        pass

    # Configure rotating file log so gateway output is persisted for debugging
    log_dir = _hermes_home / 'logs'
    log_dir.mkdir(parents=True, exist_ok=True)
    file_handler = RotatingFileHandler(
        log_dir / 'gateway.log',
        maxBytes=5 * 1024 * 1024,
        backupCount=3,
    )
    from agent.redact import RedactingFormatter
    file_handler.setFormatter(RedactingFormatter('%(asctime)s %(levelname)s %(name)s: %(message)s'))
    logging.getLogger().addHandler(file_handler)
    logging.getLogger().setLevel(logging.INFO)

    # Separate errors-only log for easy debugging
    error_handler = RotatingFileHandler(
        log_dir / 'errors.log',
        maxBytes=2 * 1024 * 1024,
        backupCount=2,
    )
    error_handler.setLevel(logging.WARNING)
    error_handler.setFormatter(RedactingFormatter('%(asctime)s %(levelname)s %(name)s: %(message)s'))
    logging.getLogger().addHandler(error_handler)

    runner = GatewayRunner(config)
    
    # Set up signal handlers
    def signal_handler():
        asyncio.create_task(runner.stop())
    
    loop = asyncio.get_event_loop()
    for sig in (signal.SIGINT, signal.SIGTERM):
        try:
            loop.add_signal_handler(sig, signal_handler)
        except NotImplementedError:
            pass
    
    # Start the gateway
    success = await runner.start()
    if not success:
        return False
    if runner.should_exit_cleanly:
        if runner.exit_reason:
            logger.error("Gateway exiting cleanly: %s", runner.exit_reason)
        return True
    
    # Write PID file so CLI can detect gateway is running
    import atexit
    from gateway.status import write_pid_file, remove_pid_file
    write_pid_file()
    atexit.register(remove_pid_file)
    
    # Start background cron ticker so scheduled jobs fire automatically
    cron_stop = threading.Event()
    cron_thread = threading.Thread(
        target=_start_cron_ticker,
        args=(cron_stop,),
        kwargs={"adapters": runner.adapters},
        daemon=True,
        name="cron-ticker",
    )
    cron_thread.start()
    
    # Wait for shutdown
    await runner.wait_for_shutdown()

    if runner.should_exit_with_failure:
        if runner.exit_reason:
            logger.error("Gateway exiting with failure: %s", runner.exit_reason)
        return False
    
    # Stop cron ticker cleanly
    cron_stop.set()
    cron_thread.join(timeout=5)

    # Close MCP server connections
    try:
        from tools.mcp_tool import shutdown_mcp_servers
        shutdown_mcp_servers()
    except Exception:
        pass

    return True


def main():
    """CLI entry point for the gateway."""
    import argparse
    
    parser = argparse.ArgumentParser(description="Hermes Gateway - Multi-platform messaging")
    parser.add_argument("--config", "-c", help="Path to gateway config file")
    parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output")
    
    args = parser.parse_args()
    
    config = None
    if args.config:
        import json
        with open(args.config, encoding="utf-8") as f:
            data = json.load(f)
            config = GatewayConfig.from_dict(data)
    
    # Run the gateway - exit with code 1 if no platforms connected,
    # so systemd Restart=on-failure will retry on transient errors (e.g. DNS)
    success = asyncio.run(start_gateway(config))
    if not success:
        sys.exit(1)


if __name__ == "__main__":
    main()
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								"""
 								Gateway runner - entry point for messaging platform integrations.
 								This module provides:
 								- start_gateway(): Start all configured platform adapters
 								- GatewayRunner: Main class managing the gateway lifecycle
 								Usage:
 								    # Start the gateway
 								    python -m gateway.run
 								    # Or from CLI
 								    python cli.py --gateway
 								"""
 								import asyncio
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
+								import json
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								import logging
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								import os
-												Add Text-to-Speech (TTS) support with Edge TTS and ElevenLabs integration

- Updated `pyproject.toml` to include Edge TTS and ElevenLabs as dependencies.
- Enhanced documentation to detail voice message capabilities across platforms and TTS provider options.
- Modified the GatewayRunner to handle MEDIA tags from TTS tool responses, ensuring proper delivery of audio messages.

											
										
										
											2026-02-14 16:08:14 -08:00
+								import re
-												fix: notify gateway users when updates finish or fail

											
										
										
											2026-03-11 20:43:31 +01:00
+								import shlex
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								import sys
 								import signal
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
+								import tempfile
-												refactor: streamline cron job handling and update CLI commands

- Removed legacy cron daemon functionality, integrating cron job execution directly into the gateway process for improved efficiency.
- Updated CLI commands to reflect changes, replacing `hermes cron daemon` with `hermes cron status` and enhancing documentation for cron job management.
- Clarified messaging in the README and other documentation regarding the gateway's role in managing cron jobs.
- Removed obsolete terminal_hecate tool and related configurations to simplify the codebase.

											
										
										
											2026-02-21 16:21:19 -08:00
+								import threading
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
+								import time
-												Hermes Agent UX Improvements

											
										
										
											2026-02-22 02:16:11 -08:00
+								from logging.handlers import RotatingFileHandler
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								from pathlib import Path
 								from datetime import datetime
 								from typing import Dict, Optional, Any, List
-												fix(gateway): SSL certificate auto-detection for NixOS and non-standard systems

Add _ensure_ssl_certs() that discovers CA certificate bundles before any
HTTP library is imported.  Resolution order:
1. Python's ssl.get_default_verify_paths()
2. certifi (if installed)
3. Common distro/macOS paths

Only sets SSL_CERT_FILE if not already present in the environment.
Wrapped in a function (called immediately) to avoid polluting module
namespace.

Based on PR #1151 by sylvesterroos.

											
										
										
											2026-03-15 23:04:34 -07:00
+								# ---------------------------------------------------------------------------
 								# SSL certificate auto-detection for NixOS and other non-standard systems.
 								# Must run BEFORE any HTTP library (discord, aiohttp, etc.) is imported.
 								# ---------------------------------------------------------------------------
 								def _ensure_ssl_certs() -> None:
 								    """Set SSL_CERT_FILE if the system doesn't expose CA certs to Python."""
 								    if "SSL_CERT_FILE" in os.environ:
 								        return  # user already configured it
 								    import ssl
 								    # 1. Python's compiled-in defaults
 								    paths = ssl.get_default_verify_paths()
 								    for candidate in (paths.cafile, paths.openssl_cafile):
 								        if candidate and os.path.exists(candidate):
 								            os.environ["SSL_CERT_FILE"] = candidate
 								            return
 								    # 2. certifi (ships its own Mozilla bundle)
 								    try:
 								        import certifi
 								        os.environ["SSL_CERT_FILE"] = certifi.where()
 								        return
 								    except ImportError:
 								        pass
 								    # 3. Common distro / macOS locations
 								    for candidate in (
 								        "/etc/ssl/certs/ca-certificates.crt",               # Debian/Ubuntu/Gentoo
 								        "/etc/pki/tls/certs/ca-bundle.crt",                 # RHEL/CentOS 7
 								        "/etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem", # RHEL/CentOS 8+
 								        "/etc/ssl/ca-bundle.pem",                            # SUSE/OpenSUSE
 								        "/etc/ssl/cert.pem",                                 # Alpine / macOS
 								        "/etc/pki/tls/cert.pem",                             # Fedora
 								        "/usr/local/etc/openssl@1.1/cert.pem",               # macOS Homebrew Intel
 								        "/opt/homebrew/etc/openssl@1.1/cert.pem",            # macOS Homebrew ARM
 								    ):
 								        if os.path.exists(candidate):
 								            os.environ["SSL_CERT_FILE"] = candidate
 								            return
 								_ensure_ssl_certs()
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								# Add parent directory to path
 								sys.path.insert(0, str(Path(__file__).parent.parent))
-												fix: respect HERMES_HOME env var in gateway and cron scheduler

Both entry points hardcoded Path.home() / ".hermes" for .env, config.yaml,
logs, and lock files. Now uses _hermes_home which reads HERMES_HOME env var
with ~/.hermes as default, matching cli.py and run_agent.py.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-26 18:51:46 +11:00
+								# Resolve Hermes home directory (respects HERMES_HOME override)
-												refactor: consolidate get_hermes_home() and parse_reasoning_effort() (#3062)

Centralizes two widely-duplicated patterns into hermes_constants.py:

1. get_hermes_home() — Path resolution for ~/.hermes (HERMES_HOME env var)
   - Was copy-pasted inline across 30+ files as:
     Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
   - Now defined once in hermes_constants.py (zero-dependency module)
   - hermes_cli/config.py re-exports it for backward compatibility
   - Removed local wrapper functions in honcho_integration/client.py,
     tools/website_policy.py, tools/tirith_security.py, hermes_cli/uninstall.py

2. parse_reasoning_effort() — Reasoning effort string validation
   - Was copy-pasted in cli.py, gateway/run.py, cron/scheduler.py
   - Same validation logic: check against (xhigh, high, medium, low, minimal, none)
   - Now defined once in hermes_constants.py, called from all 3 locations
   - Warning log for unknown values kept at call sites (context-specific)

31 files changed, net +31 lines (125 insertions, 94 deletions)
Full test suite: 6179 passed, 0 failed
											
										
										
											2026-03-25 15:54:28 -07:00
+								from hermes_constants import get_hermes_home
 								_hermes_home = get_hermes_home()
-												fix: respect HERMES_HOME env var in gateway and cron scheduler

Both entry points hardcoded Path.home() / ".hermes" for .env, config.yaml,
logs, and lock files. Now uses _hermes_home which reads HERMES_HOME env var
with ~/.hermes as default, matching cli.py and run_agent.py.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-26 18:51:46 +11:00
-												fix(config): reload .env over stale shell overrides

Hermes startup entrypoints now load ~/.hermes/.env and project fallback env files with user config taking precedence over stale shell-exported values. This makes model/provider/base URL changes in .env actually take effect after restarting Hermes. Adds a shared env loader plus regression coverage, and reproduces the original bug case where OPENAI_BASE_URL and HERMES_INFERENCE_PROVIDER remained stuck on old shell values before import.

											
										
										
											2026-03-15 06:46:28 -07:00
+								# Load environment variables from ~/.hermes/.env first.
 								# User-managed env files should override stale shell exports on restart.
 								from dotenv import load_dotenv  # backward-compat for tests that monkeypatch this symbol
 								from hermes_cli.env_loader import load_hermes_dotenv
-												fix: respect HERMES_HOME env var in gateway and cron scheduler

Both entry points hardcoded Path.home() / ".hermes" for .env, config.yaml,
logs, and lock files. Now uses _hermes_home which reads HERMES_HOME env var
with ~/.hermes as default, matching cli.py and run_agent.py.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-26 18:51:46 +11:00
+								_env_path = _hermes_home / '.env'
-												fix(config): reload .env over stale shell overrides

Hermes startup entrypoints now load ~/.hermes/.env and project fallback env files with user config taking precedence over stale shell-exported values. This makes model/provider/base URL changes in .env actually take effect after restarting Hermes. Adds a shared env loader plus regression coverage, and reproduces the original bug case where OPENAI_BASE_URL and HERMES_INFERENCE_PROVIDER remained stuck on old shell values before import.

											
										
										
											2026-03-15 06:46:28 -07:00
+								load_hermes_dotenv(hermes_home=_hermes_home, project_env=Path(__file__).resolve().parents[1] / '.env')
-												Update requirements and enhance environment variable loading in gateway

- Updated requirements.txt to uncomment and ensure the installation of `python-telegram-bot` and `discord.py` packages.
- Enhanced the gateway run script to load environment variables from a specified path, improving configuration management and flexibility for different environments.

											
										
										
											2026-02-03 07:02:59 -08:00
-												feat: integrate config.yaml values into environment for enhanced flexibility

- Added functionality to load values from config.yaml into the environment, allowing os.getenv() to access them.
- Ensured that existing environment variables take precedence over config values.
- Updated DiscordAdapter to resolve usernames in DISCORD_ALLOWED_USERS to numeric IDs, improving user authorization checks.
- Enhanced event handling to provide clearer logging and ensure proper synchronization of slash commands.

											
										
										
											2026-02-22 17:35:45 -08:00
+								# Bridge config.yaml values into the environment so os.getenv() picks them up.
-												feat(config): enhance terminal environment variable management

- Updated .env.example to clarify terminal backend configuration and its relationship with config.yaml.
- Modified gateway/run.py to ensure terminal settings from config.yaml take precedence over .env, improving consistency in environment variable handling.
- Added mapping for terminal configuration options to corresponding environment variables for better integration.

											
										
										
											2026-02-26 20:05:35 -08:00
+								# config.yaml is authoritative for terminal settings — overrides .env.
-												fix: respect HERMES_HOME env var in gateway and cron scheduler

Both entry points hardcoded Path.home() / ".hermes" for .env, config.yaml,
logs, and lock files. Now uses _hermes_home which reads HERMES_HOME env var
with ~/.hermes as default, matching cli.py and run_agent.py.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-26 18:51:46 +11:00
+								_config_path = _hermes_home / 'config.yaml'
-												feat: integrate config.yaml values into environment for enhanced flexibility

- Added functionality to load values from config.yaml into the environment, allowing os.getenv() to access them.
- Ensured that existing environment variables take precedence over config values.
- Updated DiscordAdapter to resolve usernames in DISCORD_ALLOWED_USERS to numeric IDs, improving user authorization checks.
- Enhanced event handling to provide clearer logging and ensure proper synchronization of slash commands.

											
										
										
											2026-02-22 17:35:45 -08:00
+								if _config_path.exists():
 								    try:
 								        import yaml as _yaml
-												Add explicit encoding="utf-8" to all config/data file open() calls

On Windows, open() defaults to the system locale encoding (cp1252,
cp1254, etc.) rather than UTF-8. This breaks any file containing
non-ASCII characters, and also causes crashes when writing JSON with
ensure_ascii=False.

This adds encoding="utf-8" to open() calls in:
- gateway/run.py (config.yaml reads/writes throughout)
- gateway/config.py (gateway.json and config.yaml)
- hermes_cli/config.py (config.yaml load/save)
- hermes_cli/main.py (session export with ensure_ascii=False)
- hermes_cli/status.py (jobs.json and sessions.json)

											
										
										
											2026-03-05 17:04:33 -05:00
+								        with open(_config_path, encoding="utf-8") as _f:
-												feat: integrate config.yaml values into environment for enhanced flexibility

- Added functionality to load values from config.yaml into the environment, allowing os.getenv() to access them.
- Ensured that existing environment variables take precedence over config values.
- Updated DiscordAdapter to resolve usernames in DISCORD_ALLOWED_USERS to numeric IDs, improving user authorization checks.
- Enhanced event handling to provide clearer logging and ensure proper synchronization of slash commands.

											
										
										
											2026-02-22 17:35:45 -08:00
+								            _cfg = _yaml.safe_load(_f) or {}
-												feat(config): support ${ENV_VAR} substitution in config.yaml (#2684)

* feat(config): support ${ENV_VAR} substitution in config.yaml

* fix: extend env var expansion to CLI and gateway config loaders

The original PR (#2680) only wired _expand_env_vars into load_config(),
which is used by 'hermes tools' and 'hermes setup'. The two primary
config paths were missed:

- load_cli_config() in cli.py (interactive CLI)
- Module-level _cfg in gateway/run.py (gateway — bridges api_keys to env vars)

Also:
- Remove redundant 'import re' (already imported at module level)
- Add missing blank lines between top-level functions (PEP 8)
- Add tests for load_cli_config() expansion

---------

Co-authored-by: teyrebaz33 <hakanerten02@hotmail.com>
											
										
										
											2026-03-23 16:02:06 -07:00
+								        # Expand ${ENV_VAR} references before bridging to env vars.
 								        from hermes_cli.config import _expand_env_vars
 								        _cfg = _expand_env_vars(_cfg)
-												feat(config): enhance terminal environment variable management

- Updated .env.example to clarify terminal backend configuration and its relationship with config.yaml.
- Modified gateway/run.py to ensure terminal settings from config.yaml take precedence over .env, improving consistency in environment variable handling.
- Added mapping for terminal configuration options to corresponding environment variables for better integration.

											
										
										
											2026-02-26 20:05:35 -08:00
+								        # Top-level simple values (fallback only — don't override .env)
-												feat: integrate config.yaml values into environment for enhanced flexibility

- Added functionality to load values from config.yaml into the environment, allowing os.getenv() to access them.
- Ensured that existing environment variables take precedence over config values.
- Updated DiscordAdapter to resolve usernames in DISCORD_ALLOWED_USERS to numeric IDs, improving user authorization checks.
- Enhanced event handling to provide clearer logging and ensure proper synchronization of slash commands.

											
										
										
											2026-02-22 17:35:45 -08:00
+								        for _key, _val in _cfg.items():
 								            if isinstance(_val, (str, int, float, bool)) and _key not in os.environ:
 								                os.environ[_key] = str(_val)
-												feat(config): enhance terminal environment variable management

- Updated .env.example to clarify terminal backend configuration and its relationship with config.yaml.
- Modified gateway/run.py to ensure terminal settings from config.yaml take precedence over .env, improving consistency in environment variable handling.
- Added mapping for terminal configuration options to corresponding environment variables for better integration.

											
										
										
											2026-02-26 20:05:35 -08:00
+								        # Terminal config is nested — bridge to TERMINAL_* env vars.
 								        # config.yaml overrides .env for these since it's the documented config path.
 								        _terminal_cfg = _cfg.get("terminal", {})
 								        if _terminal_cfg and isinstance(_terminal_cfg, dict):
 								            _terminal_env_map = {
 								                "backend": "TERMINAL_ENV",
 								                "cwd": "TERMINAL_CWD",
 								                "timeout": "TERMINAL_TIMEOUT",
 								                "lifetime_seconds": "TERMINAL_LIFETIME_SECONDS",
 								                "docker_image": "TERMINAL_DOCKER_IMAGE",
-												fix(docker): add explicit env allowlist for container credentials (#1436)

Docker terminal sessions are secret-dark by default. This adds
terminal.docker_forward_env as an explicit allowlist for env vars
that may be forwarded into Docker containers.

Values resolve from the current shell first, then fall back to
~/.hermes/.env. Only variables the user explicitly lists are
forwarded — nothing is auto-exposed.

Cherry-picked from PR #1449 by @teknium1, conflict-resolved onto
current main.

Fixes #1436
Supersedes #1439

											
										
										
											2026-03-17 02:34:25 -07:00
+								                "docker_forward_env": "TERMINAL_DOCKER_FORWARD_ENV",
-												feat(config): enhance terminal environment variable management

- Updated .env.example to clarify terminal backend configuration and its relationship with config.yaml.
- Modified gateway/run.py to ensure terminal settings from config.yaml take precedence over .env, improving consistency in environment variable handling.
- Added mapping for terminal configuration options to corresponding environment variables for better integration.

											
										
										
											2026-02-26 20:05:35 -08:00
+								                "singularity_image": "TERMINAL_SINGULARITY_IMAGE",
 								                "modal_image": "TERMINAL_MODAL_IMAGE",
-												fix(daytona): add missing config mappings in gateway, CLI defaults, and config display

Signed-off-by: rovle <lovre.pesut@gmail.com>

											
										
										
											2026-03-05 11:12:50 -08:00
+								                "daytona_image": "TERMINAL_DAYTONA_IMAGE",
-												feat(config): enhance terminal environment variable management

- Updated .env.example to clarify terminal backend configuration and its relationship with config.yaml.
- Modified gateway/run.py to ensure terminal settings from config.yaml take precedence over .env, improving consistency in environment variable handling.
- Added mapping for terminal configuration options to corresponding environment variables for better integration.

											
										
										
											2026-02-26 20:05:35 -08:00
+								                "ssh_host": "TERMINAL_SSH_HOST",
 								                "ssh_user": "TERMINAL_SSH_USER",
 								                "ssh_port": "TERMINAL_SSH_PORT",
 								                "ssh_key": "TERMINAL_SSH_KEY",
 								                "container_cpu": "TERMINAL_CONTAINER_CPU",
 								                "container_memory": "TERMINAL_CONTAINER_MEMORY",
 								                "container_disk": "TERMINAL_CONTAINER_DISK",
 								                "container_persistent": "TERMINAL_CONTAINER_PERSISTENT",
-												fix: gateway missing docker_volumes config bridge + list serialization bug

The gateway's config.yaml → env var bridge was missing docker_volumes,
so Docker volume mounts configured in config.yaml were ignored for
gateway sessions (Telegram, Discord, etc.) while working in CLI.

Also fixes list serialization: str() produces Python repr with single
quotes which json.loads() in terminal_tool.py can't parse. Now uses
json.dumps() for list values.

Based on PR #431 by @manuelschipper (applied manually due to stale branch).

											
										
										
											2026-03-09 17:24:00 -07:00
+								                "docker_volumes": "TERMINAL_DOCKER_VOLUMES",
-												fix: respect config.yaml cwd in gateway, add sandbox_dir config option

Two fixes:

1. Gateway CWD override: TERMINAL_CWD from config.yaml was being
   unconditionally overwritten by the messaging_cwd fallback (line 114).
   Now explicit paths in config.yaml are respected — only '.' / 'auto' /
   'cwd' (or unset) fall back to MESSAGING_CWD or home directory.

2. sandbox_dir config: Added terminal.sandbox_dir to config.yaml bridge
   in gateway/run.py, cli.py, and hermes_cli/config.py. Maps to
   TERMINAL_SANDBOX_DIR env var, which get_sandbox_dir() reads to
   determine where Docker/Singularity sandbox data is stored (default:
   ~/.hermes/sandboxes/). Users can now set:
     hermes config set terminal.sandbox_dir /data/hermes-sandboxes

											
										
										
											2026-03-08 01:33:46 -08:00
+								                "sandbox_dir": "TERMINAL_SANDBOX_DIR",
-												feat: enable persistent shell by default for SSH, add config option

SSH persistent shell now defaults to true — non-local backends benefit
most from state persistence across execute() calls. Local backend
remains opt-in via TERMINAL_LOCAL_PERSISTENT env var.

New config.yaml option: terminal.persistent_shell (default: true)
Controls the default for non-local backends. Users can disable with:
  hermes config set terminal.persistent_shell false

Precedence: per-backend env var > TERMINAL_PERSISTENT_SHELL > default.

Wired through cli.py, gateway/run.py, and hermes_cli/config.py so the
config.yaml value reaches terminal_tool via env var bridge.

											
										
										
											2026-03-15 20:17:13 -07:00
+								                "persistent_shell": "TERMINAL_PERSISTENT_SHELL",
-												feat(config): enhance terminal environment variable management

- Updated .env.example to clarify terminal backend configuration and its relationship with config.yaml.
- Modified gateway/run.py to ensure terminal settings from config.yaml take precedence over .env, improving consistency in environment variable handling.
- Added mapping for terminal configuration options to corresponding environment variables for better integration.

											
										
										
											2026-02-26 20:05:35 -08:00
+								            }
 								            for _cfg_key, _env_var in _terminal_env_map.items():
 								                if _cfg_key in _terminal_cfg:
-												fix: gateway missing docker_volumes config bridge + list serialization bug

The gateway's config.yaml → env var bridge was missing docker_volumes,
so Docker volume mounts configured in config.yaml were ignored for
gateway sessions (Telegram, Discord, etc.) while working in CLI.

Also fixes list serialization: str() produces Python repr with single
quotes which json.loads() in terminal_tool.py can't parse. Now uses
json.dumps() for list values.

Based on PR #431 by @manuelschipper (applied manually due to stale branch).

											
										
										
											2026-03-09 17:24:00 -07:00
+								                    _val = _terminal_cfg[_cfg_key]
 								                    if isinstance(_val, list):
 								                        os.environ[_env_var] = json.dumps(_val)
 								                    else:
 								                        os.environ[_env_var] = str(_val)
-												feat(compression): add summary_base_url + move compression config to YAML-only

- Add summary_base_url config option to compression block for custom
  OpenAI-compatible endpoints (e.g. zai, DeepSeek, Ollama)
- Remove compression env var bridges from cli.py and gateway/run.py
  (CONTEXT_COMPRESSION_* env vars no longer set from config)
- Switch run_agent.py to read compression config directly from
  config.yaml instead of env vars
- Fix backwards-compat block in _resolve_task_provider_model to also
  fire when auxiliary.compression.provider is 'auto' (DEFAULT_CONFIG
  sets this, which was silently preventing the compression section's
  summary_* keys from being read)
- Add test for summary_base_url config-to-client flow
- Update docs to show compression as config.yaml-only

Closes #1591
Based on PR #1702 by @uzaylisak
											
										
										
											2026-03-17 04:46:15 -07:00
+								        # Compression config is read directly from config.yaml by run_agent.py
 								        # and auxiliary_client.py — no env var bridging needed.
-												feat: add direct endpoint overrides for auxiliary and delegation

Add base_url/api_key overrides for auxiliary tasks and delegation so users can
route those flows straight to a custom OpenAI-compatible endpoint without
having to rely on provider=main or named custom providers.

Also clear gateway session env vars in test isolation so the full suite stays
deterministic when run from a messaging-backed agent session.

											
										
										
											2026-03-14 20:48:29 -07:00
+								        # Auxiliary model/direct-endpoint overrides (vision, web_extract).
 								        # Each task has provider/model/base_url/api_key; bridge non-default values to env vars.
-												fix: harden auxiliary model config — gateway bridge, vision safety, tests

Improvements on top of PR #606 (auxiliary model configuration):

1. Gateway bridge: Added auxiliary.* and compression.summary_provider
   config bridging to gateway/run.py so config.yaml settings work from
   messaging platforms (not just CLI). Matches the pattern in cli.py.

2. Vision auto-fallback safety: In auto mode, vision now only tries
   OpenRouter + Nous Portal (known multimodal-capable providers).
   Custom endpoints, Codex, and API-key providers are skipped to avoid
   confusing errors from providers that don't support vision input.
   Explicit provider override (AUXILIARY_VISION_PROVIDER=main) still
   allows using any provider.

3. Comprehensive tests (46 new):
   - _get_auxiliary_provider env var resolution (8 tests)
   - _resolve_forced_provider with all provider types (8 tests)
   - Per-task provider routing integration (4 tests)
   - Vision auto-fallback safety (7 tests)
   - Config bridging logic (11 tests)
   - Gateway/CLI bridge parity (2 tests)
   - Vision model override via env var (2 tests)
   - DEFAULT_CONFIG shape validation (4 tests)

4. Docs: Added auxiliary_client.py to AGENTS.md project structure.
   Updated module docstring with separate text/vision resolution chains.

Tests: 2429 passed (was 2383).

											
										
										
											2026-03-08 18:06:40 -07:00
+								        _auxiliary_cfg = _cfg.get("auxiliary", {})
 								        if _auxiliary_cfg and isinstance(_auxiliary_cfg, dict):
 								            _aux_task_env = {
-												feat: add direct endpoint overrides for auxiliary and delegation

Add base_url/api_key overrides for auxiliary tasks and delegation so users can
route those flows straight to a custom OpenAI-compatible endpoint without
having to rely on provider=main or named custom providers.

Also clear gateway session env vars in test isolation so the full suite stays
deterministic when run from a messaging-backed agent session.

											
										
										
											2026-03-14 20:48:29 -07:00
+								                "vision": {
 								                    "provider": "AUXILIARY_VISION_PROVIDER",
 								                    "model": "AUXILIARY_VISION_MODEL",
 								                    "base_url": "AUXILIARY_VISION_BASE_URL",
 								                    "api_key": "AUXILIARY_VISION_API_KEY",
 								                },
 								                "web_extract": {
 								                    "provider": "AUXILIARY_WEB_EXTRACT_PROVIDER",
 								                    "model": "AUXILIARY_WEB_EXTRACT_MODEL",
 								                    "base_url": "AUXILIARY_WEB_EXTRACT_BASE_URL",
 								                    "api_key": "AUXILIARY_WEB_EXTRACT_API_KEY",
 								                },
-												feat: smart approvals + /stop command (inspired by OpenAI Codex)

* feat: smart approvals — LLM-based risk assessment for dangerous commands

Adds a 'smart' approval mode that uses the auxiliary LLM to assess
whether a flagged command is genuinely dangerous or a false positive,
auto-approving low-risk commands without prompting the user.

Inspired by OpenAI Codex's Smart Approvals guardian subagent
(openai/codex#13860).

Config (config.yaml):
  approvals:
    mode: manual   # manual (default), smart, off

Modes:
- manual — current behavior, always prompt the user
- smart  — aux LLM evaluates risk: APPROVE (auto-allow), DENY (block),
           or ESCALATE (fall through to manual prompt)
- off    — skip all approval prompts (equivalent to --yolo)

When smart mode auto-approves, the pattern gets session-level approval
so subsequent uses of the same pattern don't trigger another LLM call.
When it denies, the command is blocked without user prompt. When
uncertain, it escalates to the normal manual approval flow.

The LLM prompt is carefully scoped: it sees only the command text and
the flagged reason, assesses actual risk vs false positive, and returns
a single-word verdict.

* feat: make smart approval model configurable via config.yaml

Adds auxiliary.approval section to config.yaml with the same
provider/model/base_url/api_key pattern as other aux tasks (vision,
web_extract, compression, etc.).

Config:
  auxiliary:
    approval:
      provider: auto
      model: ''        # fast/cheap model recommended
      base_url: ''
      api_key: ''

Bridged to env vars in both CLI and gateway paths so the aux client
picks them up automatically.

* feat: add /stop command to kill all background processes

Adds a /stop slash command that kills all running background processes
at once. Currently users have to process(list) then process(kill) for
each one individually.

Inspired by OpenAI Codex's separation of interrupt (Ctrl+C stops current
turn) from /stop (cleans up background processes). See openai/codex#14602.

Ctrl+C continues to only interrupt the active agent turn — background
dev servers, watchers, etc. are preserved. /stop is the explicit way
to clean them all up.
											
										
										
											2026-03-16 06:20:11 -07:00
+								                "approval": {
 								                    "provider": "AUXILIARY_APPROVAL_PROVIDER",
 								                    "model": "AUXILIARY_APPROVAL_MODEL",
 								                    "base_url": "AUXILIARY_APPROVAL_BASE_URL",
 								                    "api_key": "AUXILIARY_APPROVAL_API_KEY",
 								                },
-												fix: harden auxiliary model config — gateway bridge, vision safety, tests

Improvements on top of PR #606 (auxiliary model configuration):

1. Gateway bridge: Added auxiliary.* and compression.summary_provider
   config bridging to gateway/run.py so config.yaml settings work from
   messaging platforms (not just CLI). Matches the pattern in cli.py.

2. Vision auto-fallback safety: In auto mode, vision now only tries
   OpenRouter + Nous Portal (known multimodal-capable providers).
   Custom endpoints, Codex, and API-key providers are skipped to avoid
   confusing errors from providers that don't support vision input.
   Explicit provider override (AUXILIARY_VISION_PROVIDER=main) still
   allows using any provider.

3. Comprehensive tests (46 new):
   - _get_auxiliary_provider env var resolution (8 tests)
   - _resolve_forced_provider with all provider types (8 tests)
   - Per-task provider routing integration (4 tests)
   - Vision auto-fallback safety (7 tests)
   - Config bridging logic (11 tests)
   - Gateway/CLI bridge parity (2 tests)
   - Vision model override via env var (2 tests)
   - DEFAULT_CONFIG shape validation (4 tests)

4. Docs: Added auxiliary_client.py to AGENTS.md project structure.
   Updated module docstring with separate text/vision resolution chains.

Tests: 2429 passed (was 2383).

											
										
										
											2026-03-08 18:06:40 -07:00
+								            }
-												feat: add direct endpoint overrides for auxiliary and delegation

Add base_url/api_key overrides for auxiliary tasks and delegation so users can
route those flows straight to a custom OpenAI-compatible endpoint without
having to rely on provider=main or named custom providers.

Also clear gateway session env vars in test isolation so the full suite stays
deterministic when run from a messaging-backed agent session.

											
										
										
											2026-03-14 20:48:29 -07:00
+								            for _task_key, _env_map in _aux_task_env.items():
-												fix: harden auxiliary model config — gateway bridge, vision safety, tests

Improvements on top of PR #606 (auxiliary model configuration):

1. Gateway bridge: Added auxiliary.* and compression.summary_provider
   config bridging to gateway/run.py so config.yaml settings work from
   messaging platforms (not just CLI). Matches the pattern in cli.py.

2. Vision auto-fallback safety: In auto mode, vision now only tries
   OpenRouter + Nous Portal (known multimodal-capable providers).
   Custom endpoints, Codex, and API-key providers are skipped to avoid
   confusing errors from providers that don't support vision input.
   Explicit provider override (AUXILIARY_VISION_PROVIDER=main) still
   allows using any provider.

3. Comprehensive tests (46 new):
   - _get_auxiliary_provider env var resolution (8 tests)
   - _resolve_forced_provider with all provider types (8 tests)
   - Per-task provider routing integration (4 tests)
   - Vision auto-fallback safety (7 tests)
   - Config bridging logic (11 tests)
   - Gateway/CLI bridge parity (2 tests)
   - Vision model override via env var (2 tests)
   - DEFAULT_CONFIG shape validation (4 tests)

4. Docs: Added auxiliary_client.py to AGENTS.md project structure.
   Updated module docstring with separate text/vision resolution chains.

Tests: 2429 passed (was 2383).

											
										
										
											2026-03-08 18:06:40 -07:00
+								                _task_cfg = _auxiliary_cfg.get(_task_key, {})
 								                if not isinstance(_task_cfg, dict):
 								                    continue
 								                _prov = str(_task_cfg.get("provider", "")).strip()
 								                _model = str(_task_cfg.get("model", "")).strip()
-												feat: add direct endpoint overrides for auxiliary and delegation

Add base_url/api_key overrides for auxiliary tasks and delegation so users can
route those flows straight to a custom OpenAI-compatible endpoint without
having to rely on provider=main or named custom providers.

Also clear gateway session env vars in test isolation so the full suite stays
deterministic when run from a messaging-backed agent session.

											
										
										
											2026-03-14 20:48:29 -07:00
+								                _base_url = str(_task_cfg.get("base_url", "")).strip()
 								                _api_key = str(_task_cfg.get("api_key", "")).strip()
-												fix: harden auxiliary model config — gateway bridge, vision safety, tests

Improvements on top of PR #606 (auxiliary model configuration):

1. Gateway bridge: Added auxiliary.* and compression.summary_provider
   config bridging to gateway/run.py so config.yaml settings work from
   messaging platforms (not just CLI). Matches the pattern in cli.py.

2. Vision auto-fallback safety: In auto mode, vision now only tries
   OpenRouter + Nous Portal (known multimodal-capable providers).
   Custom endpoints, Codex, and API-key providers are skipped to avoid
   confusing errors from providers that don't support vision input.
   Explicit provider override (AUXILIARY_VISION_PROVIDER=main) still
   allows using any provider.

3. Comprehensive tests (46 new):
   - _get_auxiliary_provider env var resolution (8 tests)
   - _resolve_forced_provider with all provider types (8 tests)
   - Per-task provider routing integration (4 tests)
   - Vision auto-fallback safety (7 tests)
   - Config bridging logic (11 tests)
   - Gateway/CLI bridge parity (2 tests)
   - Vision model override via env var (2 tests)
   - DEFAULT_CONFIG shape validation (4 tests)

4. Docs: Added auxiliary_client.py to AGENTS.md project structure.
   Updated module docstring with separate text/vision resolution chains.

Tests: 2429 passed (was 2383).

											
										
										
											2026-03-08 18:06:40 -07:00
+								                if _prov and _prov != "auto":
-												feat: add direct endpoint overrides for auxiliary and delegation

Add base_url/api_key overrides for auxiliary tasks and delegation so users can
route those flows straight to a custom OpenAI-compatible endpoint without
having to rely on provider=main or named custom providers.

Also clear gateway session env vars in test isolation so the full suite stays
deterministic when run from a messaging-backed agent session.

											
										
										
											2026-03-14 20:48:29 -07:00
+								                    os.environ[_env_map["provider"]] = _prov
-												fix: harden auxiliary model config — gateway bridge, vision safety, tests

Improvements on top of PR #606 (auxiliary model configuration):

1. Gateway bridge: Added auxiliary.* and compression.summary_provider
   config bridging to gateway/run.py so config.yaml settings work from
   messaging platforms (not just CLI). Matches the pattern in cli.py.

2. Vision auto-fallback safety: In auto mode, vision now only tries
   OpenRouter + Nous Portal (known multimodal-capable providers).
   Custom endpoints, Codex, and API-key providers are skipped to avoid
   confusing errors from providers that don't support vision input.
   Explicit provider override (AUXILIARY_VISION_PROVIDER=main) still
   allows using any provider.

3. Comprehensive tests (46 new):
   - _get_auxiliary_provider env var resolution (8 tests)
   - _resolve_forced_provider with all provider types (8 tests)
   - Per-task provider routing integration (4 tests)
   - Vision auto-fallback safety (7 tests)
   - Config bridging logic (11 tests)
   - Gateway/CLI bridge parity (2 tests)
   - Vision model override via env var (2 tests)
   - DEFAULT_CONFIG shape validation (4 tests)

4. Docs: Added auxiliary_client.py to AGENTS.md project structure.
   Updated module docstring with separate text/vision resolution chains.

Tests: 2429 passed (was 2383).

											
										
										
											2026-03-08 18:06:40 -07:00
+								                if _model:
-												feat: add direct endpoint overrides for auxiliary and delegation

Add base_url/api_key overrides for auxiliary tasks and delegation so users can
route those flows straight to a custom OpenAI-compatible endpoint without
having to rely on provider=main or named custom providers.

Also clear gateway session env vars in test isolation so the full suite stays
deterministic when run from a messaging-backed agent session.

											
										
										
											2026-03-14 20:48:29 -07:00
+								                    os.environ[_env_map["model"]] = _model
 								                if _base_url:
 								                    os.environ[_env_map["base_url"]] = _base_url
 								                if _api_key:
 								                    os.environ[_env_map["api_key"]] = _api_key
-												refactor(cli): update max turns configuration precedence and enhance documentation

											
										
										
											2026-02-28 10:35:49 -08:00
+								        _agent_cfg = _cfg.get("agent", {})
 								        if _agent_cfg and isinstance(_agent_cfg, dict):
 								            if "max_turns" in _agent_cfg:
 								                os.environ["HERMES_MAX_ITERATIONS"] = str(_agent_cfg["max_turns"])
-												fix(timezone): add timezone-aware clock across agent, cron, and execute_code

											
										
										
											2026-03-03 11:57:18 +05:30
+								        # Timezone: bridge config.yaml → HERMES_TIMEZONE env var.
 								        # HERMES_TIMEZONE from .env takes precedence (already in os.environ).
 								        _tz_cfg = _cfg.get("timezone", "")
 								        if _tz_cfg and isinstance(_tz_cfg, str) and "HERMES_TIMEZONE" not in os.environ:
 								            os.environ["HERMES_TIMEZONE"] = _tz_cfg.strip()
-												feat: add config toggle to disable secret redaction

New config option:

  security:
    redact_secrets: false  # default: true

When set to false, API keys, tokens, and passwords are shown in
full in read_file, search_files, and terminal output. Useful for
debugging auth issues where you need to verify the actual key value.

Bridged to both CLI and gateway via HERMES_REDACT_SECRETS env var.
The check is in redact_sensitive_text() itself, so all call sites
(terminal, file tools, log formatter) respect it.

											
										
										
											2026-03-09 01:04:33 -07:00
+								        # Security settings
 								        _security_cfg = _cfg.get("security", {})
 								        if isinstance(_security_cfg, dict):
 								            _redact = _security_cfg.get("redact_secrets")
 								            if _redact is not None:
 								                os.environ["HERMES_REDACT_SECRETS"] = str(_redact).lower()
-												feat: integrate config.yaml values into environment for enhanced flexibility

- Added functionality to load values from config.yaml into the environment, allowing os.getenv() to access them.
- Ensured that existing environment variables take precedence over config values.
- Updated DiscordAdapter to resolve usernames in DISCORD_ALLOWED_USERS to numeric IDs, improving user authorization checks.
- Enhanced event handling to provide clearer logging and ensure proper synchronization of slash commands.

											
										
										
											2026-02-22 17:35:45 -08:00
+								    except Exception:
 								        pass  # Non-fatal; gateway can still run with .env values
-												Enhance messaging gateway configuration and security features

- Added new environment variables for Telegram and Discord bot configurations, including `TELEGRAM_ALLOWED_USERS` and `DISCORD_ALLOWED_USERS`, to restrict bot access to specific users.
- Updated documentation in AGENTS.md and README.md to include detailed setup instructions for the messaging gateway, emphasizing the importance of user allowlists for security.
- Improved the CLI setup wizard to prompt for allowed user IDs during configuration, enhancing user guidance and security awareness.
- Refined the gateway run script to support user authorization checks, ensuring only allowed users can interact with the bot.

											
										
										
											2026-02-03 10:46:23 -08:00
+								# Gateway runs in quiet mode - suppress debug output and use cwd directly (no temp dirs)
 								os.environ["HERMES_QUIET"] = "1"
-												Add Text-to-Speech (TTS) functionality with multiple providers

Add tool previews

Add AGENTS and SOUL.md support

Add Exec Approval

											
										
										
											2026-02-12 10:05:08 -08:00
+								# Enable interactive exec approval for dangerous commands on messaging platforms
 								os.environ["HERMES_EXEC_ASK"] = "1"
-												fix: respect config.yaml cwd in gateway, add sandbox_dir config option

Two fixes:

1. Gateway CWD override: TERMINAL_CWD from config.yaml was being
   unconditionally overwritten by the messaging_cwd fallback (line 114).
   Now explicit paths in config.yaml are respected — only '.' / 'auto' /
   'cwd' (or unset) fall back to MESSAGING_CWD or home directory.

2. sandbox_dir config: Added terminal.sandbox_dir to config.yaml bridge
   in gateway/run.py, cli.py, and hermes_cli/config.py. Maps to
   TERMINAL_SANDBOX_DIR env var, which get_sandbox_dir() reads to
   determine where Docker/Singularity sandbox data is stored (default:
   ~/.hermes/sandboxes/). Users can now set:
     hermes config set terminal.sandbox_dir /data/hermes-sandboxes

											
										
										
											2026-03-08 01:33:46 -08:00
+								# Set terminal working directory for messaging platforms.
 								# If the user set an explicit path in config.yaml (not "." or "auto"),
 								# respect it. Otherwise use MESSAGING_CWD or default to home directory.
 								_configured_cwd = os.environ.get("TERMINAL_CWD", "")
 								if not _configured_cwd or _configured_cwd in (".", "auto", "cwd"):
 								    messaging_cwd = os.getenv("MESSAGING_CWD") or str(Path.home())
 								    os.environ["TERMINAL_CWD"] = messaging_cwd
-												Enhance messaging gateway configuration and security features

- Added new environment variables for Telegram and Discord bot configurations, including `TELEGRAM_ALLOWED_USERS` and `DISCORD_ALLOWED_USERS`, to restrict bot access to specific users.
- Updated documentation in AGENTS.md and README.md to include detailed setup instructions for the messaging gateway, emphasizing the importance of user allowlists for security.
- Improved the CLI setup wizard to prompt for allowed user IDs during configuration, enhancing user guidance and security awareness.
- Refined the gateway run script to support user authorization checks, ensuring only allowed users can interact with the bot.

											
										
										
											2026-02-03 10:46:23 -08:00
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								from gateway.config import (
 								    Platform,
 								    GatewayConfig,
 								    load_gateway_config,
 								)
 								from gateway.session import (
 								    SessionStore,
 								    SessionSource,
 								    SessionContext,
 								    build_session_context,
 								    build_session_context_prompt,
-												refactor: extract build_session_key() as single source of truth

The session key construction logic was duplicated in 4 places
(session.py + 3 inline copies in run.py), which is exactly the
kind of drift that caused issue #349 in the first place.

Extracted build_session_key() as a public function in session.py.
SessionStore._generate_session_key() now delegates to it, and all
inline key construction in run.py has been replaced with calls to
the shared function. Tests updated to test the function directly.

											
										
										
											2026-03-04 03:34:45 -08:00
+								    build_session_key,
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								)
-												chore: remove ~100 unused imports across 55 files (#3016)

Automated cleanup via pyflakes + autoflake with manual review.

Changes:
- Removed unused stdlib imports (os, sys, json, pathlib.Path, etc.)
- Removed unused typing imports (List, Dict, Any, Optional, Tuple, Set, etc.)
- Removed unused internal imports (hermes_cli.auth, hermes_cli.config, etc.)
- Fixed cli.py: removed 8 shadowed banner imports (imported from hermes_cli.banner
  then immediately redefined locally — only build_welcome_banner is actually used)
- Added noqa comments to imports that appear unused but serve a purpose:
  - Re-exports (gateway/session.py SessionResetPolicy, tools/terminal_tool.py
    is_interrupted/_interrupt_event)
  - SDK presence checks in try/except (daytona, fal_client, discord)
  - Test mock targets (auxiliary_client.py Path, mcp_config.py get_hermes_home)

Zero behavioral changes. Full test suite passes (6162/6162, 2 pre-existing
streaming test failures unrelated to this change).
											
										
										
											2026-03-25 15:02:03 -07:00
+								from gateway.delivery import DeliveryRouter
-												Enhance image handling and analysis capabilities across platforms

- Updated the vision tool to accept both HTTP/HTTPS URLs and local file paths for image analysis.
- Implemented caching of user-uploaded images in local directories to ensure reliable access for the vision tool, addressing issues with ephemeral URLs.
- Enhanced platform adapters (Discord, Telegram, WhatsApp) to download and cache images, allowing for immediate analysis and enriched message context.
- Added a new method to auto-analyze images attached by users, enriching the conversation with detailed descriptions.
- Improved documentation for image handling processes and updated related functions for clarity and efficiency.

											
										
										
											2026-02-15 16:10:50 -08:00
+								from gateway.platforms.base import BasePlatformAdapter, MessageEvent, MessageType
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								logger = logging.getLogger(__name__)
-												fix(gateway): prevent concurrent agent runs for the same session

Place a sentinel in _running_agents immediately after the "already
running" guard check passes — before any await.  Without this, the
numerous await points between the guard (line 1324) and agent
registration (track_agent at line 4790) create a window where a
second message for the same session can bypass the guard and start
a duplicate agent, corrupting the transcript.

The await gap includes: hook emissions, vision enrichment (external
API call), audio transcription (external API call), session hygiene
compression, and the run_in_executor call itself.  For messages with
media attachments the window can be several seconds wide.

The sentinel is wrapped in try/finally so it is always cleaned up —
even if the handler raises or takes an early-return path.  When the
real AIAgent is created, track_agent() overwrites the sentinel with
the actual instance (preserving interrupt support).

Also handles the edge case where a message arrives while the sentinel
is set but no real agent exists yet: the message is queued via the
adapter's pending-message mechanism instead of attempting to call
interrupt() on the sentinel object.

											
										
										
											2026-03-19 22:32:37 +03:00
+								# Sentinel placed into _running_agents immediately when a session starts
 								# processing, *before* any await.  Prevents a second message for the same
 								# session from bypassing the "already running" guard during the async gap
 								# between the guard check and actual agent creation.
 								_AGENT_PENDING_SENTINEL = object()
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
-												Add OpenAI Codex provider runtime and responses integration (without .agent/PLANS.md)

											
										
										
											2026-02-25 18:20:38 -08:00
+								def _resolve_runtime_agent_kwargs() -> dict:
 								    """Resolve provider credentials for gateway-created AIAgent instances."""
 								    from hermes_cli.runtime_provider import (
 								        resolve_runtime_provider,
 								        format_runtime_provider_error,
 								    )
 								    try:
 								        runtime = resolve_runtime_provider(
 								            requested=os.getenv("HERMES_INFERENCE_PROVIDER"),
 								        )
 								    except Exception as exc:
 								        raise RuntimeError(format_runtime_provider_error(exc)) from exc
 								    return {
 								        "api_key": runtime.get("api_key"),
 								        "base_url": runtime.get("base_url"),
 								        "provider": runtime.get("provider"),
 								        "api_mode": runtime.get("api_mode"),
-												feat: integrate GitHub Copilot providers across Hermes

Add first-class GitHub Copilot and Copilot ACP provider support across
model selection, runtime provider resolution, CLI sessions, delegated
subagents, cron jobs, and the Telegram gateway.

This also normalizes Copilot model catalogs and API modes, introduces a
Copilot ACP OpenAI-compatible shim, and fixes service-mode auth by
resolving Homebrew-installed gh binaries under launchd.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

											
										
										
											2026-03-17 23:40:22 -07:00
+								        "command": runtime.get("command"),
 								        "args": list(runtime.get("args") or []),
-												Add OpenAI Codex provider runtime and responses integration (without .agent/PLANS.md)

											
										
										
											2026-02-25 18:20:38 -08:00
+								    }
-												fix: MCP toolset resolution for runtime and config (#3252)

Gateway sessions had their own inline toolset resolution that only read
platform_toolsets from config, which never includes MCP server names.
MCP tools were discovered and registered but invisible to the model.

- Replace duplicated gateway toolset resolution in _run_agent() and
  _run_background_task() with calls to the shared _get_platform_tools()
- Extend _get_platform_tools() to include globally enabled MCP servers
  at runtime (include_default_mcp_servers=True), while config-editing
  flows use include_default_mcp_servers=False to avoid persisting
  implicit MCP defaults into platform_toolsets
- Add homeassistant to PLATFORMS dict (was missing, caused KeyError)
- Fix CLI entry point to use _get_platform_tools() as well, so MCP
  tools are visible in CLI mode too
- Remove redundant platform_key reassignment in _run_background_task

Co-authored-by: kshitijk4poor <kshitijk4poor@users.noreply.github.com>
											
										
										
											2026-03-26 13:39:41 -07:00
+								def _platform_config_key(platform: "Platform") -> str:
 								    """Map a Platform enum to its config.yaml key (LOCAL→"cli", rest→enum value)."""
 								    return "cli" if platform == Platform.LOCAL else platform.value
 								def _load_gateway_config() -> dict:
 								    """Load and parse ~/.hermes/config.yaml, returning {} on any error."""
 								    try:
 								        config_path = _hermes_home / 'config.yaml'
 								        if config_path.exists():
 								            import yaml
 								            with open(config_path, 'r', encoding='utf-8') as f:
 								                return yaml.safe_load(f) or {}
 								    except Exception:
 								        logger.debug("Could not load gateway config from %s", _hermes_home / 'config.yaml')
 								    return {}
 								def _resolve_gateway_model(config: dict | None = None) -> str:
-												fix(gateway): pass model to temporary AIAgent instances

Memory flush, /compress, and session hygiene create AIAgent without
model=, falling back to the hardcoded default "anthropic/claude-opus-4.6".
This fails with a 400 error when the active provider is openai-codex
(Codex only accepts its own model names like gpt-5.1-codex-mini).

Add _resolve_gateway_model() that mirrors the env/config resolution
already used by _run_agent_sync, and wire it into all three temporary
agent creation sites.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 00:09:37 +01:00
+								    """Read model from env/config — mirrors the resolution in _run_agent_sync.
 								    Without this, temporary AIAgent instances (memory flush, /compress) fall
 								    back to the hardcoded default ("anthropic/claude-opus-4.6") which fails
 								    when the active provider is openai-codex.
 								    """
 								    model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") or "anthropic/claude-opus-4.6"
-												fix: MCP toolset resolution for runtime and config (#3252)

Gateway sessions had their own inline toolset resolution that only read
platform_toolsets from config, which never includes MCP server names.
MCP tools were discovered and registered but invisible to the model.

- Replace duplicated gateway toolset resolution in _run_agent() and
  _run_background_task() with calls to the shared _get_platform_tools()
- Extend _get_platform_tools() to include globally enabled MCP servers
  at runtime (include_default_mcp_servers=True), while config-editing
  flows use include_default_mcp_servers=False to avoid persisting
  implicit MCP defaults into platform_toolsets
- Add homeassistant to PLATFORMS dict (was missing, caused KeyError)
- Fix CLI entry point to use _get_platform_tools() as well, so MCP
  tools are visible in CLI mode too
- Remove redundant platform_key reassignment in _run_background_task

Co-authored-by: kshitijk4poor <kshitijk4poor@users.noreply.github.com>
											
										
										
											2026-03-26 13:39:41 -07:00
+								    cfg = config if config is not None else _load_gateway_config()
 								    model_cfg = cfg.get("model", {})
 								    if isinstance(model_cfg, str):
 								        model = model_cfg
 								    elif isinstance(model_cfg, dict):
 								        model = model_cfg.get("default", model)
-												fix(gateway): pass model to temporary AIAgent instances

Memory flush, /compress, and session hygiene create AIAgent without
model=, falling back to the hardcoded default "anthropic/claude-opus-4.6".
This fails with a 400 error when the active provider is openai-codex
(Codex only accepts its own model names like gpt-5.1-codex-mini).

Add _resolve_gateway_model() that mirrors the env/config resolution
already used by _run_agent_sync, and wire it into all three temporary
agent creation sites.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 00:09:37 +01:00
+								    return model
-												fix(gateway): fall back to sys.executable -m hermes_cli.main when hermes not on PATH

When shutil.which('hermes') returns None, _resolve_hermes_bin() now tries
sys.executable -m hermes_cli.main as a fallback. This handles setups where
Hermes is launched via a venv or module invocation and the hermes symlink is
not on PATH for the gateway process.

Fixes #1049

											
										
										
											2026-03-12 18:02:21 +03:00
+								def _resolve_hermes_bin() -> Optional[list[str]]:
 								    """Resolve the Hermes update command as argv parts.
 								    Tries in order:
 . ``shutil.which("hermes")`` — standard PATH lookup
 . ``sys.executable -m hermes_cli.main`` — fallback when Hermes is running
 								       from a venv/module invocation and the ``hermes`` shim is not on PATH
 								    Returns argv parts ready for quoting/joining, or ``None`` if neither works.
 								    """
 								    import shutil
 								    hermes_bin = shutil.which("hermes")
 								    if hermes_bin:
 								        return [hermes_bin]
 								    try:
 								        import importlib.util
 								        if importlib.util.find_spec("hermes_cli") is not None:
 								            return [sys.executable, "-m", "hermes_cli.main"]
 								    except Exception:
 								        pass
 								    return None
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								class GatewayRunner:
 								    """
 								    Main gateway controller.
 								    Manages the lifecycle of all platform adapters and routes
 								    messages to/from the agent.
 								    """
 								    def __init__(self, config: Optional[GatewayConfig] = None):
 								        self.config = config or load_gateway_config()
 								        self.adapters: Dict[Platform, BasePlatformAdapter] = {}
-												Add background process management with process tool, wait, PTY, and stdin support

New process registry and tool for managing long-running background processes
across all terminal backends (local, Docker, Singularity, Modal, SSH).

Process Registry (tools/process_registry.py):
- ProcessSession tracking with rolling 200KB output buffer
- spawn_local() with optional PTY via ptyprocess for interactive CLIs
- spawn_via_env() for non-local backends (runs inside sandbox, never on host)
- Background reader threads per process (Popen stdout or PTY)
- wait() with timeout clamping, interrupt support, and transparent limit reporting
- JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery
- Module-level singleton shared across agent loop, gateway, and RL

Process Tool (model_tools.py):
- 7 actions: list, poll, log, wait, kill, write, submit
- Paired with terminal in all toolsets (CLI, messaging, RL)
- Timeout clamping with transparent notes in response

Terminal Tool Updates (tools/terminal_tool.py):
- Replaced nohup background mode with registry spawn (returns session_id)
- Added workdir parameter for per-command working directory
- Added check_interval parameter for gateway auto-check watchers
- Added pty parameter for interactive CLI tools (Codex, Claude Code)
- Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs
- Cleanup thread now respects active background processes (won't reap sandbox)

Gateway Integration (gateway/run.py, session.py, config.py):
- Session reset protection: sessions with active processes exempt from reset
- Default idle timeout increased from 2 hours to 24 hours
- from_dict fallback aligned to match (was 120, now 1440)
- session_key env var propagated to process registry for session mapping
- Crash recovery on gateway startup via checkpoint probe
- check_interval watcher: asyncio task polls process, delivers updates to platform

RL Safety (environments/):
- tool_context.py cleanup() kills background processes on episode end
- hermes_base_env.py warns when enabled_toolsets is None (loads all tools)
- Process tool safe in RL via wait() blocking the agent loop

Also:
- Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all])
- Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP
- Updated AGENTS.md with process management docs and project structure
- Updated README.md terminal section with process management overview

											
										
										
											2026-02-17 02:51:31 -08:00
-												feat: add ephemeral prefill messages and system prompt loading

- Implemented functionality to load ephemeral prefill messages from a JSON file, enhancing few-shot priming capabilities for the agent.
- Introduced a mechanism to load an ephemeral system prompt from environment variables or configuration files, ensuring dynamic prompt adjustments at API-call time.
- Updated the CLI and agent initialization to utilize the new prefill messages and system prompt, improving the overall interaction experience.
- Enhanced configuration options with new environment variables for prefill messages and system prompts, allowing for greater customization without persistence.

											
										
										
											2026-02-23 23:55:42 -08:00
+								        # Load ephemeral config from config.yaml / env vars.
 								        # Both are injected at API-call time only and never persisted.
 								        self._prefill_messages = self._load_prefill_messages()
 								        self._ephemeral_system_prompt = self._load_ephemeral_system_prompt()
-												feat: add reasoning effort configuration for agent

- Introduced a new configuration option for reasoning effort in the CLI, allowing users to specify the level of reasoning the agent should perform before responding.
- Updated the CLI and agent initialization to incorporate the reasoning configuration, enhancing the agent's responsiveness and adaptability.
- Implemented logic to load reasoning effort from environment variables and configuration files, providing flexibility in agent behavior.
- Enhanced the documentation in the example configuration file to clarify the new reasoning effort options available.

											
										
										
											2026-02-24 03:30:19 -08:00
+								        self._reasoning_config = self._load_reasoning_config()
-												fix: /reasoning command — add gateway support, fix display, persist settings (#1031)

* fix: /reasoning command output ordering, display, and inline think extraction

Three issues with the /reasoning command:

1. Output interleaving: The command echo used print() while feedback
   used _cprint(), causing them to render out-of-order under
   prompt_toolkit's patch_stdout. Changed echo to use _cprint() so
   all output renders through the same path in correct order.

2. Reasoning display not working: /reasoning show toggled a flag
   but reasoning never appeared for models that embed thinking in
   inline <think> blocks rather than structured API fields. Added
   fallback extraction in _build_assistant_message to capture
   <think> block content as reasoning when no structured reasoning
   fields (reasoning, reasoning_content, reasoning_details) are
   present. This feeds into both the reasoning callback (during
   tool loops) and the post-response reasoning box display.

3. Feedback clarity: Added checkmarks to confirm actions, persisted
   show/hide to config (was session-only before), and aligned the
   status display for readability.

Tests: 7 new tests for inline think block extraction (41 total).

* feat: add /reasoning command to gateway (Telegram/Discord/etc)

The /reasoning command only existed in the CLI — messaging platforms
had no way to view or change reasoning settings. This adds:

1. /reasoning command handler in the gateway:
   - No args: shows current effort level and display state
   - /reasoning <level>: sets reasoning effort (none/low/medium/high/xhigh)
   - /reasoning show|hide: toggles reasoning display in responses
   - All changes saved to config.yaml immediately

2. Reasoning display in gateway responses:
   - When show_reasoning is enabled, prepends a 'Reasoning' block
     with the model's last_reasoning content before the response
   - Collapses long reasoning (>15 lines) to keep messages readable
   - Uses last_reasoning from run_conversation result dict

3. Plumbing:
   - Added _show_reasoning attribute loaded from config at startup
   - Propagated last_reasoning through _run_agent return dict
   - Added /reasoning to help text and known_commands set
   - Uses getattr for _show_reasoning to handle test stubs
											
										
										
											2026-03-12 05:38:19 -07:00
+								        self._show_reasoning = self._load_show_reasoning()
-												feat(provider-routing): add OpenRouter provider routing configuration

Introduced a new `provider_routing` section in the CLI configuration to control how requests are routed across providers when using OpenRouter. This includes options for sorting providers by throughput, latency, or price, as well as allowing or ignoring specific providers, setting the order of provider attempts, and managing data collection policies. Updated relevant classes and documentation to support these features, enhancing flexibility in provider selection.

											
										
										
											2026-03-01 18:24:27 -08:00
+								        self._provider_routing = self._load_provider_routing()
-												feat: simple fallback model for provider resilience

When the primary model/provider fails after retries (rate limit, overload,
auth errors, connection failures), Hermes automatically switches to a
configured fallback model for the remainder of the session.

Config (in ~/.hermes/config.yaml):

  fallback_model:
    provider: openrouter
    model: anthropic/claude-sonnet-4

Supports all major providers: OpenRouter, OpenAI, Nous, DeepSeek, Together,
Groq, Fireworks, Mistral, Gemini — plus custom endpoints via base_url and
api_key_env overrides.

Design principles:
- Dead simple: one fallback model, not a chain
- One-shot: switches once, doesn't ping-pong back
- Zero new dependencies: uses existing OpenAI client
- Minimal code: ~100 lines in run_agent.py, ~5 lines in cli.py/gateway
- Three trigger points: max retries exhausted, non-retryable client errors,
  and invalid response exhaustion

Does NOT trigger on context overflow or payload-too-large errors (those
are handled by the existing compression system).

Addresses #737.

25 new tests, 2492 total passing.

											
										
										
											2026-03-08 20:22:33 -07:00
+								        self._fallback_model = self._load_fallback_model()
-												fix: hermes update causes dual gateways on macOS (launchd) (#1567)

* feat: add optional smart model routing

Add a conservative cheap-vs-strong routing option that can send very short/simple turns to a cheaper model across providers while keeping the primary model for complex work. Wire it through CLI, gateway, and cron, and document the config.yaml workflow.

* fix(gateway): remove recursive ExecStop from systemd units, extend TimeoutStopSec to 60s

* fix(gateway): avoid recursive ExecStop in user systemd unit

* fix: extend ExecStop removal and TimeoutStopSec=60 to system unit

The cherry-picked PR #1448 fix only covered the user systemd unit.
The system unit had the same TimeoutStopSec=15 and could benefit
from the same 60s timeout for clean shutdown. Also adds a regression
test for the system unit.

---------

Co-authored-by: Ninja <ninja@local>

* feat(skills): add blender-mcp optional skill for 3D modeling

Control a running Blender instance from Hermes via socket connection
to the blender-mcp addon (port 9876). Supports creating 3D objects,
materials, animations, and running arbitrary bpy code.

Placed in optional-skills/ since it requires Blender 4.3+ desktop
with a third-party addon manually started each session.

* feat(acp): support slash commands in ACP adapter (#1532)

Adds /help, /model, /tools, /context, /reset, /compact, /version
to the ACP adapter (VS Code, Zed, JetBrains). Commands are handled
directly in the server without instantiating the TUI — each command
queries agent/session state and returns plain text.

Unrecognized /commands fall through to the LLM as normal messages.

/model uses detect_provider_for_model() for auto-detection when
switching models, matching the CLI and gateway behavior.

Fixes #1402

* fix(logging): improve error logging in session search tool (#1533)

* fix(gateway): restart on retryable startup failures (#1517)

* feat(email): add skip_attachments option via config.yaml

* feat(email): add skip_attachments option via config.yaml

Adds a config.yaml-driven option to skip email attachments in the
gateway email adapter. Useful for malware protection and bandwidth
savings.

Configure in config.yaml:
  platforms:
    email:
      skip_attachments: true

Based on PR #1521 by @an420eth, changed from env var to config.yaml
(via PlatformConfig.extra) to match the project's config-first pattern.

* docs: document skip_attachments option for email adapter

* fix(telegram): retry on transient TLS failures during connect and send

Add exponential-backoff retry (3 attempts) around initialize() to
handle transient TLS resets during gateway startup. Also catches
TimedOut and OSError in addition to NetworkError.

Add exponential-backoff retry (3 attempts) around send_message() for
NetworkError during message delivery, wrapping the existing Markdown
fallback logic.

Both imports are guarded with try/except ImportError for test
environments where telegram is mocked.

Based on PR #1527 by cmd8. Closes #1526.

* feat: permissive block_anchor thresholds and unicode normalization (#1539)

Salvaged from PR #1528 by an420eth. Closes #517.

Improves _strategy_block_anchor in fuzzy_match.py:
- Add unicode normalization (smart quotes, em/en-dashes, ellipsis,
  non-breaking spaces → ASCII) so LLM-produced unicode artifacts
  don't break anchor line matching
- Lower thresholds: 0.10 for unique matches (was 0.70), 0.30 for
  multiple candidates — if first/last lines match exactly, the
  block is almost certainly correct
- Use original (non-normalized) content for offset calculation to
  preserve correct character positions

Tested: 3 new scenarios fixed (em-dash anchors, non-breaking space
anchors, very-low-similarity unique matches), zero regressions on
all 9 existing fuzzy match tests.

Co-authored-by: an420eth <an420eth@users.noreply.github.com>

* feat(cli): add file path autocomplete in the input prompt (#1545)

When typing a path-like token (./  ../  ~/  /  or containing /),
the CLI now shows filesystem completions in the dropdown menu.
Directories show a trailing slash and 'dir' label; files show
their size. Completions are case-insensitive and capped at 30
entries.

Triggered by tokens like:
  edit ./src/ma     → shows ./src/main.py, ./src/manifest.json, ...
  check ~/doc       → shows ~/docs/, ~/documents/, ...
  read /etc/hos     → shows /etc/hosts, /etc/hostname, ...
  open tools/reg    → shows tools/registry.py

Slash command autocomplete (/help, /model, etc.) is unaffected —
it still triggers when the input starts with /.

Inspired by OpenCode PR #145 (file path completion menu).

Implementation:
- hermes_cli/commands.py: _extract_path_word() detects path-like
  tokens, _path_completions() yields filesystem Completions with
  size labels, get_completions() routes to paths vs slash commands
- tests/hermes_cli/test_path_completion.py: 26 tests covering
  path extraction, prefix filtering, directory markers, home
  expansion, case-insensitivity, integration with slash commands

* feat(privacy): redact PII from LLM context when privacy.redact_pii is enabled

Add privacy.redact_pii config option (boolean, default false). When
enabled, the gateway redacts personally identifiable information from
the system prompt before sending it to the LLM provider:

- Phone numbers (user IDs on WhatsApp/Signal) → hashed to user_<sha256>
- User IDs → hashed to user_<sha256>
- Chat IDs → numeric portion hashed, platform prefix preserved
- Home channel IDs → hashed
- Names/usernames → NOT affected (user-chosen, publicly visible)

Hashes are deterministic (same user → same hash) so the model can
still distinguish users in group chats. Routing and delivery use
the original values internally — redaction only affects LLM context.

Inspired by OpenClaw PR #47959.

* fix(privacy): skip PII redaction on Discord/Slack (mentions need real IDs)

Discord uses <@user_id> for mentions and Slack uses <@U12345> — the LLM
needs the real ID to tag users. Redaction now only applies to WhatsApp,
Signal, and Telegram where IDs are pure routing metadata.

Add 4 platform-specific tests covering Discord, WhatsApp, Signal, Slack.

* feat: smart approvals + /stop command (inspired by OpenAI Codex)

* feat: smart approvals — LLM-based risk assessment for dangerous commands

Adds a 'smart' approval mode that uses the auxiliary LLM to assess
whether a flagged command is genuinely dangerous or a false positive,
auto-approving low-risk commands without prompting the user.

Inspired by OpenAI Codex's Smart Approvals guardian subagent
(openai/codex#13860).

Config (config.yaml):
  approvals:
    mode: manual   # manual (default), smart, off

Modes:
- manual — current behavior, always prompt the user
- smart  — aux LLM evaluates risk: APPROVE (auto-allow), DENY (block),
           or ESCALATE (fall through to manual prompt)
- off    — skip all approval prompts (equivalent to --yolo)

When smart mode auto-approves, the pattern gets session-level approval
so subsequent uses of the same pattern don't trigger another LLM call.
When it denies, the command is blocked without user prompt. When
uncertain, it escalates to the normal manual approval flow.

The LLM prompt is carefully scoped: it sees only the command text and
the flagged reason, assesses actual risk vs false positive, and returns
a single-word verdict.

* feat: make smart approval model configurable via config.yaml

Adds auxiliary.approval section to config.yaml with the same
provider/model/base_url/api_key pattern as other aux tasks (vision,
web_extract, compression, etc.).

Config:
  auxiliary:
    approval:
      provider: auto
      model: ''        # fast/cheap model recommended
      base_url: ''
      api_key: ''

Bridged to env vars in both CLI and gateway paths so the aux client
picks them up automatically.

* feat: add /stop command to kill all background processes

Adds a /stop slash command that kills all running background processes
at once. Currently users have to process(list) then process(kill) for
each one individually.

Inspired by OpenAI Codex's separation of interrupt (Ctrl+C stops current
turn) from /stop (cleans up background processes). See openai/codex#14602.

Ctrl+C continues to only interrupt the active agent turn — background
dev servers, watchers, etc. are preserved. /stop is the explicit way
to clean them all up.

* feat: first-class plugin architecture + hide status bar cost by default (#1544)

The persistent status bar now shows context %, token counts, and
duration but NOT $ cost by default. Cost display is opt-in via:

  display:
    show_cost: true

in config.yaml, or: hermes config set display.show_cost true

The /usage command still shows full cost breakdown since the user
explicitly asked for it — this only affects the always-visible bar.

Status bar without cost:
  ⚕ claude-sonnet-4 │ 12K/200K │ 6% │ 15m

Status bar with show_cost: true:
  ⚕ claude-sonnet-4 │ 12K/200K │ 6% │ $0.06 │ 15m

* feat: improve memory prioritization + aggressive skill updates (inspired by OpenAI Codex)

* feat: improve memory prioritization — user preferences over procedural knowledge

Inspired by OpenAI Codex's memory prompt improvements (openai/codex#14493)
which focus memory writes on user preferences and recurring patterns
rather than procedural task details.

Key insight: 'Optimize for reducing future user steering — the most
valuable memory prevents the user from having to repeat themselves.'

Changes:
- MEMORY_GUIDANCE (prompt_builder.py): added prioritization hierarchy
  and the core principle about reducing user steering
- MEMORY_SCHEMA (memory_tool.py): reordered WHEN TO SAVE list to put
  corrections first, added explicit PRIORITY guidance
- Memory nudge (run_agent.py): now asks specifically about preferences,
  corrections, and workflow patterns instead of generic 'anything'
- Memory flush (run_agent.py): now instructs to prioritize user
  preferences and corrections over task-specific details

* feat: more aggressive skill creation and update prompting

Press harder on skill updates — the agent should proactively patch
skills when it encounters issues during use, not wait to be asked.

Changes:
- SKILLS_GUIDANCE: 'consider saving' → 'save'; added explicit instruction
  to patch skills immediately when found outdated/wrong
- Skills header: added instruction to update loaded skills before finishing
  if they had missing steps or wrong commands
- Skill nudge: more assertive ('save the approach' not 'consider saving'),
  now also prompts for updating existing skills used in the task
- Skill nudge interval: lowered default from 15 to 10 iterations
- skill_manage schema: added 'patch it immediately' to update triggers

* feat: first-class plugin architecture (#1555)

Plugin system for extending Hermes with custom tools, hooks, and
integrations — no source code changes required.

Core system (hermes_cli/plugins.py):
  - Plugin discovery from ~/.hermes/plugins/, .hermes/plugins/, and
    pip entry_points (hermes_agent.plugins group)
  - PluginContext with register_tool() and register_hook()
  - 6 lifecycle hooks: pre/post tool_call, pre/post llm_call,
    on_session_start/end
  - Namespace package handling for relative imports in plugins
  - Graceful error isolation — broken plugins never crash the agent

Integration (model_tools.py):
  - Plugin discovery runs after built-in + MCP tools
  - Plugin tools bypass toolset filter via get_plugin_tool_names()
  - Pre/post tool call hooks fire in handle_function_call()

CLI:
  - /plugins command shows loaded plugins, tool counts, status
  - Added to COMMANDS dict for autocomplete

Docs:
  - Getting started guide (build-a-hermes-plugin.md) — full tutorial
    building a calculator plugin step by step
  - Reference page (features/plugins.md) — quick overview + tables
  - Covers: file structure, schemas, handlers, hooks, data files,
    bundled skills, env var gating, pip distribution, common mistakes

Tests: 16 tests covering discovery, loading, hooks, tool visibility.

* fix: hermes update causes dual gateways on macOS (launchd)

Three bugs worked together to create the dual-gateway problem:

1. cmd_update only checked systemd for gateway restart, completely
   ignoring launchd on macOS. After killing the PID it would print
   'Restart it with: hermes gateway run' even when launchd was about
   to auto-respawn the process.

2. launchd's KeepAlive.SuccessfulExit=false respawns the gateway
   after SIGTERM (non-zero exit), so the user's manual restart
   created a second instance.

3. The launchd plist lacked --replace (systemd had it), so the
   respawned gateway didn't kill stale instances on startup.

Fixes:
- Add --replace to launchd ProgramArguments (matches systemd)
- Add launchd detection to cmd_update's auto-restart logic
- Print 'auto-restart via launchd' instead of manual restart hint

* fix: add launchd plist auto-refresh + explicit restart in cmd_update

Two integration issues with the initial fix:

1. Existing macOS users with old plist (no --replace) would never
   get the fix until manual uninstall/reinstall. Added
   refresh_launchd_plist_if_needed() — mirrors the existing
   refresh_systemd_unit_if_needed(). Called from launchd_start(),
   launchd_restart(), and cmd_update.

2. cmd_update relied on KeepAlive respawn after SIGTERM rather than
   explicit launchctl stop/start. This caused races: launchd would
   respawn the old process before the PID file was cleaned up.
   Now does explicit stop+start (matching how systemd gets an
   explicit systemctl restart), with plist refresh first so the
   new --replace flag is picked up.

---------

Co-authored-by: Ninja <ninja@local>
Co-authored-by: alireza78a <alireza78a@users.noreply.github.com>
Co-authored-by: Oktay Aydin <113846926+aydnOktay@users.noreply.github.com>
Co-authored-by: JP Lew <polydegen@protonmail.com>
Co-authored-by: an420eth <an420eth@users.noreply.github.com>
											
										
										
											2026-03-16 12:36:29 -07:00
+								        self._smart_model_routing = self._load_smart_model_routing()
-												feat: add ephemeral prefill messages and system prompt loading

- Implemented functionality to load ephemeral prefill messages from a JSON file, enhancing few-shot priming capabilities for the agent.
- Introduced a mechanism to load an ephemeral system prompt from environment variables or configuration files, ensuring dynamic prompt adjustments at API-call time.
- Updated the CLI and agent initialization to utilize the new prefill messages and system prompt, improving the overall interaction experience.
- Enhanced configuration options with new environment variables for prefill messages and system prompts, allowing for greater customization without persistence.

											
										
										
											2026-02-23 23:55:42 -08:00
-												Add background process management with process tool, wait, PTY, and stdin support

New process registry and tool for managing long-running background processes
across all terminal backends (local, Docker, Singularity, Modal, SSH).

Process Registry (tools/process_registry.py):
- ProcessSession tracking with rolling 200KB output buffer
- spawn_local() with optional PTY via ptyprocess for interactive CLIs
- spawn_via_env() for non-local backends (runs inside sandbox, never on host)
- Background reader threads per process (Popen stdout or PTY)
- wait() with timeout clamping, interrupt support, and transparent limit reporting
- JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery
- Module-level singleton shared across agent loop, gateway, and RL

Process Tool (model_tools.py):
- 7 actions: list, poll, log, wait, kill, write, submit
- Paired with terminal in all toolsets (CLI, messaging, RL)
- Timeout clamping with transparent notes in response

Terminal Tool Updates (tools/terminal_tool.py):
- Replaced nohup background mode with registry spawn (returns session_id)
- Added workdir parameter for per-command working directory
- Added check_interval parameter for gateway auto-check watchers
- Added pty parameter for interactive CLI tools (Codex, Claude Code)
- Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs
- Cleanup thread now respects active background processes (won't reap sandbox)

Gateway Integration (gateway/run.py, session.py, config.py):
- Session reset protection: sessions with active processes exempt from reset
- Default idle timeout increased from 2 hours to 24 hours
- from_dict fallback aligned to match (was 120, now 1440)
- session_key env var propagated to process registry for session mapping
- Crash recovery on gateway startup via checkpoint probe
- check_interval watcher: asyncio task polls process, delivers updates to platform

RL Safety (environments/):
- tool_context.py cleanup() kills background processes on episode end
- hermes_base_env.py warns when enabled_toolsets is None (loads all tools)
- Process tool safe in RL via wait() blocking the agent loop

Also:
- Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all])
- Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP
- Updated AGENTS.md with process management docs and project structure
- Updated README.md terminal section with process management overview

											
										
										
											2026-02-17 02:51:31 -08:00
+								        # Wire process registry into session store for reset protection
 								        from tools.process_registry import process_registry
 								        self.session_store = SessionStore(
 								            self.config.sessions_dir, self.config,
 								            has_active_processes_fn=lambda key: process_registry.has_active_for_session(key),
 								        )
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        self.delivery_router = DeliveryRouter(self.config)
 								        self._running = False
 								        self._shutdown_event = asyncio.Event()
-												fix: preserve current gateway update and startup behavior

Follow up on salvaged PR #1052.
Restore current-main gateway lifecycle handling after conflict resolution and
adapt the update fallback to use shell-quoted argv parts safely.

											
										
										
											2026-03-14 18:03:50 -07:00
+								        self._exit_cleanly = False
-												fix(gateway): restart on whatsapp bridge child exit (#2334)

Co-authored-by: Frederico Ribeiro <fr@tecompanytea.com>
											
										
										
											2026-03-21 09:38:52 -07:00
+								        self._exit_with_failure = False
-												fix: preserve current gateway update and startup behavior

Follow up on salvaged PR #1052.
Restore current-main gateway lifecycle handling after conflict resolution and
adapt the update fallback to use shell-quoted argv parts safely.

											
										
										
											2026-03-14 18:03:50 -07:00
+								        self._exit_reason: Optional[str] = None
-												Implement interrupt handling for agent and CLI input and persistent prompt line at bottom of CLI :)

- Enhanced the AIAgent class to support interrupt requests, allowing for graceful interruption of ongoing tasks and processing of new messages.
- Updated the HermesCLI to manage user input in a persistent manner, enabling real-time interruption of the agent's conversation.
- Introduced a mechanism in the GatewayRunner to handle incoming messages while an agent is running, allowing for immediate response to user commands.
- Improved overall user experience by providing feedback during interruptions and ensuring that pending messages are processed correctly.

											
										
										
											2026-02-03 16:15:49 -08:00
 								        # Track running agents per session for interrupt support
 								        # Key: session_key, Value: AIAgent instance
 								        self._running_agents: Dict[str, Any] = {}
 								        self._pending_messages: Dict[str, str] = {}  # Queued messages during interrupt
-												fix(gateway): /model shows active fallback model instead of config default (#1615)

* fix: prevent infinite 400 failure loop on context overflow (#1630)

When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message.  This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error.  Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.

Three-layer fix:

1. run_agent.py — Fallback heuristic: when a 400 error has a very short
   generic message AND the session is large (>40% of context or >80
   messages), treat it as a probable context overflow and trigger
   compression instead of aborting.

2. run_agent.py + gateway/run.py — Don't persist failed messages:
   when the agent returns failed=True before generating any response,
   skip writing the user's message to the transcript/DB. This prevents
   the session from growing on each failure.

3. gateway/run.py — Smarter error messages: detect context-overflow
   failures and suggest /compact or /reset specifically, instead of a
   generic 'try again' that will fail identically.

* fix(skills): detect prompt injection patterns and block cache file reads

Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):

1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
   (index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
   was the original injection vector — untrusted skill descriptions
   in the catalog contained adversarial text that the model executed.

2. skill_view: warns when skills are loaded from outside the trusted
   ~/.hermes/skills/ directory, and detects common injection patterns
   in skill content ("ignore previous instructions", "<system>", etc.).

Cherry-picked from PR #1562 by ygd58.

* fix(tools): chunk long messages in send_message_tool before dispatch (#1552)

Long messages sent via send_message tool or cron delivery silently
failed when exceeding platform limits. Gateway adapters handle this
via truncate_message(), but the standalone senders in send_message_tool
bypassed that entirely.

- Apply truncate_message() chunking in _send_to_platform() before
  dispatching to individual platform senders
- Remove naive message[i:i+2000] character split in _send_discord()
  in favor of centralized smart splitting
- Attach media files to last chunk only for Telegram
- Add regression tests for chunking and media placement

Cherry-picked from PR #1557 by llbn.

* fix(approval): show full command in dangerous command approval (#1553)

Previously the command was truncated to 80 chars in CLI (with a
[v]iew full option), 500 chars in Discord embeds, and missing entirely
in Telegram/Slack approval messages. Now the full command is always
displayed everywhere:

- CLI: removed 80-char truncation and [v]iew full menu option
- Gateway (TG/Slack): approval_required message includes full command
  in a code block
- Discord: embed shows full command up to 4096-char limit
- Windows: skip SIGALRM-based test timeout (Unix-only)
- Updated tests: replaced view-flow tests with direct approval tests

Cherry-picked from PR #1566 by crazywriter1.

* fix(cli): flush stdout during agent loop to prevent macOS display freeze (#1624)

The interrupt polling loop in chat() waited on the queue without
invalidating the prompt_toolkit renderer. On macOS, the StdoutProxy
buffer only flushed on input events, causing the CLI to appear frozen
during tool execution until the user typed a key.

Fix: call _invalidate() on each queue timeout (every ~100ms, throttled
to 150ms) to force the renderer to flush buffered agent output.

* fix(claw): warn when API keys are skipped during OpenClaw migration (#1580)

When --migrate-secrets is not passed (the default), API keys like
OPENROUTER_API_KEY are silently skipped with no warning. Users don't
realize their keys weren't migrated until the agent fails to connect.

Add a post-migration warning with actionable instructions: either
re-run with --migrate-secrets or add the key manually via
hermes config set.

Cherry-picked from PR #1593 by ygd58.

* fix(security): block sandbox backend creds from subprocess env (#1264)

Add Modal and Daytona sandbox credentials to the subprocess env
blocklist so they're not leaked to agent terminal sessions via
printenv/env.

Cherry-picked from PR #1571 by ygd58.

* fix(gateway): cap interrupt recursion depth to prevent resource exhaustion (#816)

When a user sends multiple messages while the agent keeps failing,
_run_agent() calls itself recursively with no depth limit. This can
exhaust stack/memory if the agent is in a failure loop.

Add _MAX_INTERRUPT_DEPTH = 3. When exceeded, the pending message is
logged and the current result is returned instead of recursing deeper.

The log handler duplication bug described in #816 was already fixed
separately (AIAgent.__init__ deduplicates handlers).

* fix(gateway): /model shows active fallback model instead of config default (#1615)

When the agent falls back to a different model (e.g. due to rate
limiting), /model still showed the config default. Now tracks the
effective model/provider after each agent run and displays it.

Cleared when the primary model succeeds again or the user explicitly
switches via /model.

Cherry-picked from PR #1616 by MaxKerkula. Added hasattr guard for
test compatibility.

---------

Co-authored-by: buray <ygd58@users.noreply.github.com>
Co-authored-by: lbn <llbn@users.noreply.github.com>
Co-authored-by: crazywriter1 <53251494+crazywriter1@users.noreply.github.com>
Co-authored-by: Max K <MaxKerkula@users.noreply.github.com>
											
										
										
											2026-03-17 02:26:51 -07:00
-												feat(gateway): cache AIAgent per session for prompt caching

The gateway created a fresh AIAgent per message, rebuilding the system
prompt (including memory, skills, context files) every turn. This broke
prompt prefix caching — providers like Anthropic charge ~10x more for
uncached prefixes.

Now caches AIAgent instances per session_key with a config signature.
The cached agent is reused across messages in the same session,
preserving the frozen system prompt and tool schemas. Cache is
invalidated when:
- Config changes (model, provider, toolsets, reasoning, ephemeral
  prompt) — detected via signature mismatch
- /new, /reset, /clear — explicit session reset
- /model — global model change clears all cached agents
- /reasoning — global reasoning change clears all cached agents

Per-message state (callbacks, stream consumers, progress queues) is
set on the agent instance before each run_conversation() call.

This matches CLI behavior where a single AIAgent lives across all turns
in a session, with _cached_system_prompt built once and reused.

											
										
										
											2026-03-21 13:07:08 -07:00
+								        # Cache AIAgent instances per session to preserve prompt caching.
 								        # Without this, a new AIAgent is created per message, rebuilding the
 								        # system prompt (including memory) every turn — breaking prefix cache
 								        # and costing ~10x more on providers with prompt caching (Anthropic).
 								        # Key: session_key, Value: (AIAgent, config_signature_str)
 								        import threading as _threading
 								        self._agent_cache: Dict[str, tuple] = {}
 								        self._agent_cache_lock = _threading.Lock()
-												fix(gateway): /model shows active fallback model instead of config default (#1615)

* fix: prevent infinite 400 failure loop on context overflow (#1630)

When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message.  This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error.  Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.

Three-layer fix:

1. run_agent.py — Fallback heuristic: when a 400 error has a very short
   generic message AND the session is large (>40% of context or >80
   messages), treat it as a probable context overflow and trigger
   compression instead of aborting.

2. run_agent.py + gateway/run.py — Don't persist failed messages:
   when the agent returns failed=True before generating any response,
   skip writing the user's message to the transcript/DB. This prevents
   the session from growing on each failure.

3. gateway/run.py — Smarter error messages: detect context-overflow
   failures and suggest /compact or /reset specifically, instead of a
   generic 'try again' that will fail identically.

* fix(skills): detect prompt injection patterns and block cache file reads

Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):

1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
   (index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
   was the original injection vector — untrusted skill descriptions
   in the catalog contained adversarial text that the model executed.

2. skill_view: warns when skills are loaded from outside the trusted
   ~/.hermes/skills/ directory, and detects common injection patterns
   in skill content ("ignore previous instructions", "<system>", etc.).

Cherry-picked from PR #1562 by ygd58.

* fix(tools): chunk long messages in send_message_tool before dispatch (#1552)

Long messages sent via send_message tool or cron delivery silently
failed when exceeding platform limits. Gateway adapters handle this
via truncate_message(), but the standalone senders in send_message_tool
bypassed that entirely.

- Apply truncate_message() chunking in _send_to_platform() before
  dispatching to individual platform senders
- Remove naive message[i:i+2000] character split in _send_discord()
  in favor of centralized smart splitting
- Attach media files to last chunk only for Telegram
- Add regression tests for chunking and media placement

Cherry-picked from PR #1557 by llbn.

* fix(approval): show full command in dangerous command approval (#1553)

Previously the command was truncated to 80 chars in CLI (with a
[v]iew full option), 500 chars in Discord embeds, and missing entirely
in Telegram/Slack approval messages. Now the full command is always
displayed everywhere:

- CLI: removed 80-char truncation and [v]iew full menu option
- Gateway (TG/Slack): approval_required message includes full command
  in a code block
- Discord: embed shows full command up to 4096-char limit
- Windows: skip SIGALRM-based test timeout (Unix-only)
- Updated tests: replaced view-flow tests with direct approval tests

Cherry-picked from PR #1566 by crazywriter1.

* fix(cli): flush stdout during agent loop to prevent macOS display freeze (#1624)

The interrupt polling loop in chat() waited on the queue without
invalidating the prompt_toolkit renderer. On macOS, the StdoutProxy
buffer only flushed on input events, causing the CLI to appear frozen
during tool execution until the user typed a key.

Fix: call _invalidate() on each queue timeout (every ~100ms, throttled
to 150ms) to force the renderer to flush buffered agent output.

* fix(claw): warn when API keys are skipped during OpenClaw migration (#1580)

When --migrate-secrets is not passed (the default), API keys like
OPENROUTER_API_KEY are silently skipped with no warning. Users don't
realize their keys weren't migrated until the agent fails to connect.

Add a post-migration warning with actionable instructions: either
re-run with --migrate-secrets or add the key manually via
hermes config set.

Cherry-picked from PR #1593 by ygd58.

* fix(security): block sandbox backend creds from subprocess env (#1264)

Add Modal and Daytona sandbox credentials to the subprocess env
blocklist so they're not leaked to agent terminal sessions via
printenv/env.

Cherry-picked from PR #1571 by ygd58.

* fix(gateway): cap interrupt recursion depth to prevent resource exhaustion (#816)

When a user sends multiple messages while the agent keeps failing,
_run_agent() calls itself recursively with no depth limit. This can
exhaust stack/memory if the agent is in a failure loop.

Add _MAX_INTERRUPT_DEPTH = 3. When exceeded, the pending message is
logged and the current result is returned instead of recursing deeper.

The log handler duplication bug described in #816 was already fixed
separately (AIAgent.__init__ deduplicates handlers).

* fix(gateway): /model shows active fallback model instead of config default (#1615)

When the agent falls back to a different model (e.g. due to rate
limiting), /model still showed the config default. Now tracks the
effective model/provider after each agent run and displays it.

Cleared when the primary model succeeds again or the user explicitly
switches via /model.

Cherry-picked from PR #1616 by MaxKerkula. Added hasattr guard for
test compatibility.

---------

Co-authored-by: buray <ygd58@users.noreply.github.com>
Co-authored-by: lbn <llbn@users.noreply.github.com>
Co-authored-by: crazywriter1 <53251494+crazywriter1@users.noreply.github.com>
Co-authored-by: Max K <MaxKerkula@users.noreply.github.com>
											
										
										
											2026-03-17 02:26:51 -07:00
+								        # Track active fallback model/provider when primary is rate-limited.
 								        # Set after an agent run where fallback was activated; cleared when
 								        # the primary model succeeds again or the user switches via /model.
 								        self._effective_model: Optional[str] = None
 								        self._effective_provider: Optional[str] = None
-												Add Text-to-Speech (TTS) functionality with multiple providers

Add tool previews

Add AGENTS and SOUL.md support

Add Exec Approval

											
										
										
											2026-02-12 10:05:08 -08:00
+								        # Track pending exec approvals per session
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        # Key: session_key, Value: {"command": str, "pattern_key": str, ...}
 								        self._pending_approvals: Dict[str, Dict[str, Any]] = {}
-												fix(gateway): persist Honcho managers across session requests

											
										
										
											2026-03-10 02:06:17 -07:00
-												feat: auto-reconnect failed gateway platforms with exponential backoff (#2584)

When a messaging platform fails to connect at startup (e.g. transient DNS
failure) or disconnects at runtime with a retryable error, the gateway now
queues it for background reconnection instead of giving up permanently.

- New _platform_reconnect_watcher background task runs alongside the
  existing session expiry watcher
- Exponential backoff: 30s, 60s, 120s, 240s, 300s cap
- Max 20 retry attempts before giving up on a platform
- Non-retryable errors (bad auth token, etc.) are not retried
- Runtime disconnections via _handle_adapter_fatal_error now queue
  retryable failures instead of triggering gateway shutdown
- On successful reconnect, adapter is wired up and channel directory
  is rebuilt automatically

Fixes the case where a DNS blip during gateway startup caused Telegram
and Discord to be permanently unavailable until manual restart.
											
										
										
											2026-03-22 23:48:24 -07:00
+								        # Track platforms that failed to connect for background reconnection.
 								        # Key: Platform enum, Value: {"config": platform_config, "attempts": int, "next_retry": float}
 								        self._failed_platforms: Dict[Platform, Dict[str, Any]] = {}
-												fix(gateway): persist Honcho managers across session requests

											
										
										
											2026-03-10 02:06:17 -07:00
+								        # Persistent Honcho managers keyed by gateway session key.
 								        # This preserves write_frequency="session" semantics across short-lived
 								        # per-message AIAgent instances.
 								        self._honcho_managers: Dict[str, Any] = {}
 								        self._honcho_configs: Dict[str, Any] = {}
-												feat(security): add tirith pre-exec command scanning

Integrate tirith as a pre-execution security scanner that detects
homograph URLs, pipe-to-interpreter patterns, terminal injection,
zero-width Unicode, and environment variable manipulation — threats
the existing 50-pattern dangerous command detector doesn't cover.

Architecture: gather-then-decide — both tirith and the dangerous
command detector run before any approval prompt, preventing gateway
force=True replay from bypassing one check when only the other was
shown to the user.

New files:
- tools/tirith_security.py: subprocess wrapper with auto-installer,
  mandatory cosign provenance verification, non-blocking background
  download, disk-persistent failure markers with retryable-cause
  tracking (cosign_missing auto-clears when cosign appears on PATH)
- tests/tools/test_tirith_security.py: 62 tests covering exit code
  mapping, fail_open, cosign verification, background install,
  HERMES_HOME isolation, and failure recovery
- tests/tools/test_command_guards.py: 21 integration tests for the
  combined guard orchestration

Modified files:
- tools/approval.py: add check_all_command_guards() orchestrator,
  add allow_permanent parameter to prompt_dangerous_approval()
- tools/terminal_tool.py: replace _check_dangerous_command with
  consolidated check_all_command_guards
- cli.py: update _approval_callback for allow_permanent kwarg,
  call ensure_installed() at startup
- gateway/run.py: iterate pattern_keys list on replay approval,
  call ensure_installed() at startup
- hermes_cli/config.py: add security config defaults, split
  commented sections for independent fallback
- cli-config.yaml.example: document tirith security config

											
										
										
											2026-03-11 14:20:32 +05:30
 								        # Ensure tirith security scanner is available (downloads if needed)
 								        try:
 								            from tools.tirith_security import ensure_installed
-												fix(cli): silence tirith prefetch install warnings at startup (#1452)
											
										
										
											2026-03-15 18:07:03 -07:00
+								            ensure_installed(log_failures=False)
-												feat(security): add tirith pre-exec command scanning

Integrate tirith as a pre-execution security scanner that detects
homograph URLs, pipe-to-interpreter patterns, terminal injection,
zero-width Unicode, and environment variable manipulation — threats
the existing 50-pattern dangerous command detector doesn't cover.

Architecture: gather-then-decide — both tirith and the dangerous
command detector run before any approval prompt, preventing gateway
force=True replay from bypassing one check when only the other was
shown to the user.

New files:
- tools/tirith_security.py: subprocess wrapper with auto-installer,
  mandatory cosign provenance verification, non-blocking background
  download, disk-persistent failure markers with retryable-cause
  tracking (cosign_missing auto-clears when cosign appears on PATH)
- tests/tools/test_tirith_security.py: 62 tests covering exit code
  mapping, fail_open, cosign verification, background install,
  HERMES_HOME isolation, and failure recovery
- tests/tools/test_command_guards.py: 21 integration tests for the
  combined guard orchestration

Modified files:
- tools/approval.py: add check_all_command_guards() orchestrator,
  add allow_permanent parameter to prompt_dangerous_approval()
- tools/terminal_tool.py: replace _check_dangerous_command with
  consolidated check_all_command_guards
- cli.py: update _approval_callback for allow_permanent kwarg,
  call ensure_installed() at startup
- gateway/run.py: iterate pattern_keys list on replay approval,
  call ensure_installed() at startup
- hermes_cli/config.py: add security config defaults, split
  commented sections for independent fallback
- cli-config.yaml.example: document tirith security config

											
										
										
											2026-03-11 14:20:32 +05:30
+								        except Exception:
 								            pass  # Non-fatal — fail-open at scan time if unavailable
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
-												fix(gateway): Pass session_db to AIAgent, fixing session_search error

When running via the gateway (e.g. Telegram), the session_search tool
returned: {"error": "session_search must be handled by the agent loop"}

Root cause:
- gateway/run.py creates AIAgent without passing session_db=
- self._session_db is None in the agent instance
- The dispatch condition "elif function_name == 'session_search' and self._session_db"
  skips when _session_db is None, falling through to the generic error

This fix:
1. Initializes self._session_db in GatewayRunner.__init__()
2. Passes session_db to all AIAgent instantiations in gateway/run.py
3. Adds defensive fallback in run_agent.py to return a clear error when
   session_db is unavailable, instead of falling through

Fixes #105

											
										
										
											2026-02-27 00:32:17 -05:00
+								        # Initialize session database for session_search tool support
 								        self._session_db = None
 								        try:
 								            from hermes_state import SessionDB
 								            self._session_db = SessionDB()
 								        except Exception as e:
 								            logger.debug("SQLite session store not available: %s", e)
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
+								        # DM pairing store for code-based user authorization
 								        from gateway.pairing import PairingStore
 								        self.pairing_store = PairingStore()
 								        # Event hook system
 								        from gateway.hooks import HookRegistry
 								        self.hooks = HookRegistry()
-												fix(gateway): persist Honcho managers across session requests

											
										
										
											2026-03-10 02:06:17 -07:00
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
+								        # Per-chat voice reply mode: "off" | "voice_only" | "all"
 								        self._voice_mode: Dict[str, str] = self._load_voice_modes()
-												fix(gateway): track background task references in GatewayRunner (#3254)

Asyncio tasks created with create_task() but never stored can be
garbage collected mid-execution. Add self._background_tasks set to
hold references, with add_done_callback cleanup. Tracks:
- /background command task
- session-reset memory flush task
- session-resume memory flush task
Cancel all pending tasks in stop().

Update test fixtures that construct GatewayRunner via object.__new__()
to include the new _background_tasks attribute.

Cherry-picked from PR #3167 by memosr. The original PR also deleted
the DM topic auto-skill loading code — that deletion was excluded
from this salvage as it removes a shipped feature (#2598).

Co-authored-by: memosr.eth <96793918+memosr@users.noreply.github.com>
											
										
										
											2026-03-26 14:33:48 -07:00
+								        # Track background tasks to prevent garbage collection mid-execution
 								        self._background_tasks: set = set()
-												fix(gateway): persist Honcho managers across session requests

											
										
										
											2026-03-10 02:06:17 -07:00
+								    def _get_or_create_gateway_honcho(self, session_key: str):
 								        """Return a persistent Honcho manager/config pair for this gateway session."""
 								        if not hasattr(self, "_honcho_managers"):
 								            self._honcho_managers = {}
 								        if not hasattr(self, "_honcho_configs"):
 								            self._honcho_configs = {}
 								        if session_key in self._honcho_managers:
 								            return self._honcho_managers[session_key], self._honcho_configs.get(session_key)
 								        try:
 								            from honcho_integration.client import HonchoClientConfig, get_honcho_client
 								            from honcho_integration.session import HonchoSessionManager
 								            hcfg = HonchoClientConfig.from_global_config()
-												refactor(honcho): remove local memory mode

The "local" memoryMode was redundant with enabled: false. Simplifies
the mode system to hybrid and honcho only.

											
										
										
											2026-03-12 16:23:34 -04:00
+								            if not hcfg.enabled or not hcfg.api_key:
-												fix(gateway): persist Honcho managers across session requests

											
										
										
											2026-03-10 02:06:17 -07:00
+								                return None, hcfg
 								            client = get_honcho_client(hcfg)
 								            manager = HonchoSessionManager(
 								                honcho=client,
 								                config=hcfg,
 								                context_tokens=hcfg.context_tokens,
 								            )
 								            self._honcho_managers[session_key] = manager
 								            self._honcho_configs[session_key] = hcfg
 								            return manager, hcfg
 								        except Exception as e:
 								            logger.debug("Gateway Honcho init failed for %s: %s", session_key, e)
 								            return None, None
 								    def _shutdown_gateway_honcho(self, session_key: str) -> None:
 								        """Flush and close the persistent Honcho manager for a gateway session."""
 								        managers = getattr(self, "_honcho_managers", None)
 								        configs = getattr(self, "_honcho_configs", None)
 								        if managers is None or configs is None:
 								            return
 								        manager = managers.pop(session_key, None)
 								        configs.pop(session_key, None)
 								        if not manager:
 								            return
 								        try:
 								            manager.shutdown()
 								        except Exception as e:
 								            logger.debug("Gateway Honcho shutdown failed for %s: %s", session_key, e)
 								    def _shutdown_all_gateway_honcho(self) -> None:
 								        """Flush and close all persistent Honcho managers."""
 								        managers = getattr(self, "_honcho_managers", None)
 								        if not managers:
 								            return
 								        for session_key in list(managers.keys()):
 								            self._shutdown_gateway_honcho(session_key)
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
-												fix: check skill availability before hinting at hermes-agent-setup

Only mention the hermes-agent-setup skill in STT failure notes (both
the direct user message and the agent context note) when the skill is
actually installed. Uses _find_skill() from skill_manager_tool.

Also confirmed: STT is the only user-facing failure case where the
setup skill hint helps. Vision failures are transient API issues,
runtime transcription errors indicate a configured-but-broken provider,
and platform startup warnings are server logs.

											
										
										
											2026-03-18 03:17:23 -07:00
+								    # -- Setup skill availability ----------------------------------------
 								    def _has_setup_skill(self) -> bool:
 								        """Check if the hermes-agent-setup skill is installed."""
 								        try:
 								            from tools.skill_manager_tool import _find_skill
 								            return _find_skill("hermes-agent-setup") is not None
 								        except Exception:
 								            return False
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
+								    # -- Voice mode persistence ------------------------------------------
 								    _VOICE_MODE_PATH = _hermes_home / "gateway_voice_mode.json"
 								    def _load_voice_modes(self) -> Dict[str, str]:
 								        try:
-												fix: persist clean voice transcripts and /voice off state

- keep CLI voice prefixes API-local while storing the original user text
- persist explicit gateway off state and restore adapter auto-TTS suppression on restart
- add regression coverage for both behaviors

											
										
										
											2026-03-14 06:14:22 -07:00
+								            data = json.loads(self._VOICE_MODE_PATH.read_text())
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
+								        except (FileNotFoundError, json.JSONDecodeError, OSError):
 								            return {}
-												fix: persist clean voice transcripts and /voice off state

- keep CLI voice prefixes API-local while storing the original user text
- persist explicit gateway off state and restore adapter auto-TTS suppression on restart
- add regression coverage for both behaviors

											
										
										
											2026-03-14 06:14:22 -07:00
+								        if not isinstance(data, dict):
 								            return {}
 								        valid_modes = {"off", "voice_only", "all"}
 								        return {
 								            str(chat_id): mode
 								            for chat_id, mode in data.items()
 								            if mode in valid_modes
 								        }
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
+								    def _save_voice_modes(self) -> None:
 								        try:
 								            self._VOICE_MODE_PATH.parent.mkdir(parents=True, exist_ok=True)
 								            self._VOICE_MODE_PATH.write_text(
 								                json.dumps(self._voice_mode, indent=2)
 								            )
 								        except OSError as e:
 								            logger.warning("Failed to save voice modes: %s", e)
-												fix: persist clean voice transcripts and /voice off state

- keep CLI voice prefixes API-local while storing the original user text
- persist explicit gateway off state and restore adapter auto-TTS suppression on restart
- add regression coverage for both behaviors

											
										
										
											2026-03-14 06:14:22 -07:00
+								    def _set_adapter_auto_tts_disabled(self, adapter, chat_id: str, disabled: bool) -> None:
 								        """Update an adapter's in-memory auto-TTS suppression set if present."""
 								        disabled_chats = getattr(adapter, "_auto_tts_disabled_chats", None)
 								        if not isinstance(disabled_chats, set):
 								            return
 								        if disabled:
 								            disabled_chats.add(chat_id)
 								        else:
 								            disabled_chats.discard(chat_id)
 								    def _sync_voice_mode_state_to_adapter(self, adapter) -> None:
 								        """Restore persisted /voice off state into a live platform adapter."""
 								        disabled_chats = getattr(adapter, "_auto_tts_disabled_chats", None)
 								        if not isinstance(disabled_chats, set):
 								            return
 								        disabled_chats.clear()
 								        disabled_chats.update(
 								            chat_id for chat_id, mode in self._voice_mode.items() if mode == "off"
 								        )
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
+								    # -----------------------------------------------------------------
-												fix(honcho): isolate session routing for multi-user gateway (#1500)

Salvaged from PR #1470 by adavyas.

Core fix: Honcho tool calls in a multi-session gateway could route to
the wrong session because honcho_tools.py relied on process-global
state. Now threads session context through the call chain:
  AIAgent._invoke_tool() → handle_function_call() → registry.dispatch()
  → handler **kw → _resolve_session_context()

Changes:
- Add _resolve_session_context() to prefer per-call context over globals
- Plumb honcho_manager + honcho_session_key through handle_function_call
- Add sync_honcho=False to run_conversation() for synthetic flush turns
- Pass honcho_session_key through gateway memory flush lifecycle
- Harden gateway PID detection when /proc cmdline is unreadable
- Make interrupt test scripts import-safe for pytest-xdist
- Wrap BibTeX examples in Jekyll raw blocks for docs build
- Fix thread-order-dependent assertion in client lifecycle test
- Expand Honcho docs: session isolation, lifecycle, routing internals

Dropped from original PR:
- Indentation change in _create_request_openai_client that would move
  client creation inside the lock (causes unnecessary contention)

Co-authored-by: adavyas <adavyas@users.noreply.github.com>
											
										
										
											2026-03-16 00:23:47 -07:00
+								    def _flush_memories_for_session(
 								        self,
 								        old_session_id: str,
 								        honcho_session_key: Optional[str] = None,
 								    ):
-												feat(gateway): proactive async memory flush on session expiry

Previously, when a session expired (idle/daily reset), the memory flush
ran synchronously inside get_or_create_session — blocking the user's
message for 10-60s while an LLM call saved memories.

Now a background watcher task (_session_expiry_watcher) runs every 5 min,
detects expired sessions, and flushes memories proactively in a thread
pool.  By the time the user sends their next message, memories are
already saved and the response is immediate.

Changes:
- Add _is_session_expired(entry) to SessionStore — works from entry
  alone without needing a SessionSource
- Add _pre_flushed_sessions set to track already-flushed sessions
- Remove sync _on_auto_reset callback from get_or_create_session
- Refactor flush into _flush_memories_for_session (sync worker) +
  _async_flush_memories (thread pool wrapper)
- Add _session_expiry_watcher background task, started in start()
- Simplify /reset command to use shared fire-and-forget flush
- Add 10 tests for expiry detection, callback removal, tracking

											
										
										
											2026-03-07 11:27:50 -08:00
+								        """Prompt the agent to save memories/skills before context is lost.
 								        Synchronous worker — meant to be called via run_in_executor from
 								        an async context so it doesn't block the event loop.
-												feat(session): implement session reset policy for messaging platforms

- Added configuration options for automatic session resets based on inactivity or daily boundaries in cli-config.yaml.
- Enhanced SessionResetPolicy class to support a "none" mode for no auto-resets.
- Implemented memory flushing before session resets in SessionStore to preserve important information.
- Updated setup wizard to guide users in configuring session reset preferences.

											
										
										
											2026-02-26 21:20:50 -08:00
+								        """
-												fix(gateway): prevent stale memory overwrites by flush agent (#2670)

The gateway memory flush agent reviews old conversation history on session
reset/expiry and writes to memory. It had no awareness of memory changes
made after that conversation ended (by the live agent, cron jobs, or other
sessions), causing silent overwrites of newer entries.

Two fixes:

1. Skip memory flush entirely for cron sessions (session IDs starting with
   'cron_'). Cron sessions are headless with no meaningful user conversation
   to extract memories from.

2. Inject the current live memory state (MEMORY.md + USER.md) directly into
   the flush prompt. The flush agent can now see what's already saved and
   make informed decisions — only adding genuinely new information rather
   than blindly overwriting entries that may have been updated since the
   conversation ended.

Addresses the root cause identified in #2670: the flush agent was making
memory decisions blind to the current state of memory, causing stale
context to overwrite newer entries on gateway restarts and session resets.

Co-authored-by: devorun <devorun@users.noreply.github.com>
Co-authored-by: dlkakbs <dlkakbs@users.noreply.github.com>
											
										
										
											2026-03-23 16:08:38 -07:00
+								        # Skip cron sessions — they run headless with no meaningful user
 								        # conversation to extract memories from.
 								        if old_session_id and old_session_id.startswith("cron_"):
 								            logger.debug("Skipping memory flush for cron session: %s", old_session_id)
 								            return
-												feat(session): implement session reset policy for messaging platforms

- Added configuration options for automatic session resets based on inactivity or daily boundaries in cli-config.yaml.
- Enhanced SessionResetPolicy class to support a "none" mode for no auto-resets.
- Implemented memory flushing before session resets in SessionStore to preserve important information.
- Updated setup wizard to guide users in configuring session reset preferences.

											
										
										
											2026-02-26 21:20:50 -08:00
+								        try:
-												feat(gateway): proactive async memory flush on session expiry

Previously, when a session expired (idle/daily reset), the memory flush
ran synchronously inside get_or_create_session — blocking the user's
message for 10-60s while an LLM call saved memories.

Now a background watcher task (_session_expiry_watcher) runs every 5 min,
detects expired sessions, and flushes memories proactively in a thread
pool.  By the time the user sends their next message, memories are
already saved and the response is immediate.

Changes:
- Add _is_session_expired(entry) to SessionStore — works from entry
  alone without needing a SessionSource
- Add _pre_flushed_sessions set to track already-flushed sessions
- Remove sync _on_auto_reset callback from get_or_create_session
- Refactor flush into _flush_memories_for_session (sync worker) +
  _async_flush_memories (thread pool wrapper)
- Add _session_expiry_watcher background task, started in start()
- Simplify /reset command to use shared fire-and-forget flush
- Add 10 tests for expiry detection, callback removal, tracking

											
										
										
											2026-03-07 11:27:50 -08:00
+								            history = self.session_store.load_transcript(old_session_id)
-												feat(session): implement session reset policy for messaging platforms

- Added configuration options for automatic session resets based on inactivity or daily boundaries in cli-config.yaml.
- Enhanced SessionResetPolicy class to support a "none" mode for no auto-resets.
- Implemented memory flushing before session resets in SessionStore to preserve important information.
- Updated setup wizard to guide users in configuring session reset preferences.

											
										
										
											2026-02-26 21:20:50 -08:00
+								            if not history or len(history) < 4:
 								                return
 								            from run_agent import AIAgent
-												refactor(cli): Finalize OpenAI Codex Integration with OAuth

- Enhanced Codex model discovery by fetching available models from the API, with fallback to local cache and defaults.
- Updated the context compressor's summary target tokens to 2500 for improved performance.
- Added external credential detection for Codex CLI to streamline authentication.
- Refactored various components to ensure consistent handling of authentication and model selection across the application.

											
										
										
											2026-02-28 21:47:51 -08:00
+								            runtime_kwargs = _resolve_runtime_agent_kwargs()
 								            if not runtime_kwargs.get("api_key"):
-												feat(session): implement session reset policy for messaging platforms

- Added configuration options for automatic session resets based on inactivity or daily boundaries in cli-config.yaml.
- Enhanced SessionResetPolicy class to support a "none" mode for no auto-resets.
- Implemented memory flushing before session resets in SessionStore to preserve important information.
- Updated setup wizard to guide users in configuring session reset preferences.

											
										
										
											2026-02-26 21:20:50 -08:00
+								                return
-												fix(gateway): pass model to temporary AIAgent instances

Memory flush, /compress, and session hygiene create AIAgent without
model=, falling back to the hardcoded default "anthropic/claude-opus-4.6".
This fails with a 400 error when the active provider is openai-codex
(Codex only accepts its own model names like gpt-5.1-codex-mini).

Add _resolve_gateway_model() that mirrors the env/config resolution
already used by _run_agent_sync, and wire it into all three temporary
agent creation sites.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 00:09:37 +01:00
+								            # Resolve model from config — AIAgent's default is OpenRouter-
 								            # formatted ("anthropic/claude-opus-4.6") which fails when the
 								            # active provider is openai-codex.
 								            model = _resolve_gateway_model()
-												feat(session): implement session reset policy for messaging platforms

- Added configuration options for automatic session resets based on inactivity or daily boundaries in cli-config.yaml.
- Enhanced SessionResetPolicy class to support a "none" mode for no auto-resets.
- Implemented memory flushing before session resets in SessionStore to preserve important information.
- Updated setup wizard to guide users in configuring session reset preferences.

											
										
										
											2026-02-26 21:20:50 -08:00
+								            tmp_agent = AIAgent(
-												refactor(cli): Finalize OpenAI Codex Integration with OAuth

- Enhanced Codex model discovery by fetching available models from the API, with fallback to local cache and defaults.
- Updated the context compressor's summary target tokens to 2500 for improved performance.
- Added external credential detection for Codex CLI to streamline authentication.
- Refactored various components to ensure consistent handling of authentication and model selection across the application.

											
										
										
											2026-02-28 21:47:51 -08:00
+								                **runtime_kwargs,
-												fix(gateway): pass model to temporary AIAgent instances

Memory flush, /compress, and session hygiene create AIAgent without
model=, falling back to the hardcoded default "anthropic/claude-opus-4.6".
This fails with a 400 error when the active provider is openai-codex
(Codex only accepts its own model names like gpt-5.1-codex-mini).

Add _resolve_gateway_model() that mirrors the env/config resolution
already used by _run_agent_sync, and wire it into all three temporary
agent creation sites.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 00:09:37 +01:00
+								                model=model,
-												feat(session): implement session reset policy for messaging platforms

- Added configuration options for automatic session resets based on inactivity or daily boundaries in cli-config.yaml.
- Enhanced SessionResetPolicy class to support a "none" mode for no auto-resets.
- Implemented memory flushing before session resets in SessionStore to preserve important information.
- Updated setup wizard to guide users in configuring session reset preferences.

											
										
										
											2026-02-26 21:20:50 -08:00
+								                max_iterations=8,
 								                quiet_mode=True,
 								                enabled_toolsets=["memory", "skills"],
-												feat(gateway): proactive async memory flush on session expiry

Previously, when a session expired (idle/daily reset), the memory flush
ran synchronously inside get_or_create_session — blocking the user's
message for 10-60s while an LLM call saved memories.

Now a background watcher task (_session_expiry_watcher) runs every 5 min,
detects expired sessions, and flushes memories proactively in a thread
pool.  By the time the user sends their next message, memories are
already saved and the response is immediate.

Changes:
- Add _is_session_expired(entry) to SessionStore — works from entry
  alone without needing a SessionSource
- Add _pre_flushed_sessions set to track already-flushed sessions
- Remove sync _on_auto_reset callback from get_or_create_session
- Refactor flush into _flush_memories_for_session (sync worker) +
  _async_flush_memories (thread pool wrapper)
- Add _session_expiry_watcher background task, started in start()
- Simplify /reset command to use shared fire-and-forget flush
- Add 10 tests for expiry detection, callback removal, tracking

											
										
										
											2026-03-07 11:27:50 -08:00
+								                session_id=old_session_id,
-												fix(honcho): isolate session routing for multi-user gateway (#1500)

Salvaged from PR #1470 by adavyas.

Core fix: Honcho tool calls in a multi-session gateway could route to
the wrong session because honcho_tools.py relied on process-global
state. Now threads session context through the call chain:
  AIAgent._invoke_tool() → handle_function_call() → registry.dispatch()
  → handler **kw → _resolve_session_context()

Changes:
- Add _resolve_session_context() to prefer per-call context over globals
- Plumb honcho_manager + honcho_session_key through handle_function_call
- Add sync_honcho=False to run_conversation() for synthetic flush turns
- Pass honcho_session_key through gateway memory flush lifecycle
- Harden gateway PID detection when /proc cmdline is unreadable
- Make interrupt test scripts import-safe for pytest-xdist
- Wrap BibTeX examples in Jekyll raw blocks for docs build
- Fix thread-order-dependent assertion in client lifecycle test
- Expand Honcho docs: session isolation, lifecycle, routing internals

Dropped from original PR:
- Indentation change in _create_request_openai_client that would move
  client creation inside the lock (causes unnecessary contention)

Co-authored-by: adavyas <adavyas@users.noreply.github.com>
											
										
										
											2026-03-16 00:23:47 -07:00
+								                honcho_session_key=honcho_session_key,
-												feat(session): implement session reset policy for messaging platforms

- Added configuration options for automatic session resets based on inactivity or daily boundaries in cli-config.yaml.
- Enhanced SessionResetPolicy class to support a "none" mode for no auto-resets.
- Implemented memory flushing before session resets in SessionStore to preserve important information.
- Updated setup wizard to guide users in configuring session reset preferences.

											
										
										
											2026-02-26 21:20:50 -08:00
+								            )
-												fix(gateway): silence background agent terminal output (#3297)

* fix(gateway): silence flush agent terminal output

quiet_mode=True only suppresses AIAgent init messages.
Tool call output still leaks to the terminal through
_safe_print → _print_fn during session reset/expiry.

Since #2670 injected live memory state into the flush prompt,
the flush agent now reliably calls memory tools — making the
output leak noticeable for the first time.

Set _print_fn to a no-op so the background flush is fully silent.

* test(gateway): add test for flush agent terminal silence + fix dotenv mock

- Add TestFlushAgentSilenced: verifies _print_fn is set to a no-op on
  the flush agent so tool output never leaks to the terminal
- Fix pre-existing test failures: replace patch('run_agent.AIAgent')
  with sys.modules mock to avoid importing run_agent (requires openai)
- Add autouse _mock_dotenv fixture so all tests in this file run
  without the dotenv package installed

* fix(display): route KawaiiSpinner output through print_fn to fully silence flush agent

The previous fix set tmp_agent._print_fn = no-op on the flush agent but
spinner output and quiet-mode cute messages bypassed _print_fn entirely:
- KawaiiSpinner captured sys.stdout at __init__ and wrote directly to it
- quiet-mode tool results used builtin print() instead of _safe_print()

Add optional print_fn parameter to KawaiiSpinner.__init__; _write routes
through it when set. Pass self._print_fn to all spinner construction sites
in run_agent.py and change the quiet-mode cute message print to _safe_print.
The existing gateway fix (tmp_agent._print_fn = lambda) now propagates
correctly through both paths.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

* fix(gateway): silence hygiene and compression background agents

Two more background AIAgent instances in the gateway were created with
quiet_mode=True but without _print_fn = no-op, causing tool output to
leak to the terminal:
- _hyg_agent (in-turn hygiene memory agent)
- tmp_agent (_compress_context path)

Apply the same _print_fn no-op pattern used for the flush agent.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

* chore(display): remove unused _last_flush_time from KawaiiSpinner

Attribute was set but never read; upstream already removed it.
Leftover from conflict resolution during rebase onto upstream/main.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

---------

Co-authored-by: Dilee <uzmpsk.dilekakbas@gmail.com>
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
											
										
										
											2026-03-26 17:40:31 -07:00
+								            # Fully silence the flush agent — quiet_mode only suppresses init
 								            # messages; tool call output still leaks to the terminal through
 								            # _safe_print → _print_fn.  Set a no-op to prevent that.
 								            tmp_agent._print_fn = lambda *a, **kw: None
-												feat(session): implement session reset policy for messaging platforms

- Added configuration options for automatic session resets based on inactivity or daily boundaries in cli-config.yaml.
- Enhanced SessionResetPolicy class to support a "none" mode for no auto-resets.
- Implemented memory flushing before session resets in SessionStore to preserve important information.
- Updated setup wizard to guide users in configuring session reset preferences.

											
										
										
											2026-02-26 21:20:50 -08:00
 								            # Build conversation history from transcript
 								            msgs = [
 								                {"role": m.get("role"), "content": m.get("content")}
 								                for m in history
 								                if m.get("role") in ("user", "assistant") and m.get("content")
 								            ]
-												fix(gateway): prevent stale memory overwrites by flush agent (#2670)

The gateway memory flush agent reviews old conversation history on session
reset/expiry and writes to memory. It had no awareness of memory changes
made after that conversation ended (by the live agent, cron jobs, or other
sessions), causing silent overwrites of newer entries.

Two fixes:

1. Skip memory flush entirely for cron sessions (session IDs starting with
   'cron_'). Cron sessions are headless with no meaningful user conversation
   to extract memories from.

2. Inject the current live memory state (MEMORY.md + USER.md) directly into
   the flush prompt. The flush agent can now see what's already saved and
   make informed decisions — only adding genuinely new information rather
   than blindly overwriting entries that may have been updated since the
   conversation ended.

Addresses the root cause identified in #2670: the flush agent was making
memory decisions blind to the current state of memory, causing stale
context to overwrite newer entries on gateway restarts and session resets.

Co-authored-by: devorun <devorun@users.noreply.github.com>
Co-authored-by: dlkakbs <dlkakbs@users.noreply.github.com>
											
										
										
											2026-03-23 16:08:38 -07:00
+								            # Read live memory state from disk so the flush agent can see
 								            # what's already saved and avoid overwriting newer entries.
 								            _current_memory = ""
 								            try:
 								                from tools.memory_tool import MEMORY_DIR
 								                for fname, label in [
 								                    ("MEMORY.md", "MEMORY (your personal notes)"),
 								                    ("USER.md", "USER PROFILE (who the user is)"),
 								                ]:
 								                    fpath = MEMORY_DIR / fname
 								                    if fpath.exists():
 								                        content = fpath.read_text(encoding="utf-8").strip()
 								                        if content:
 								                            _current_memory += f"\n\n## Current {label}:\n{content}"
 								            except Exception:
 								                pass  # Non-fatal — flush still works, just without the guard
-												feat(session): implement session reset policy for messaging platforms

- Added configuration options for automatic session resets based on inactivity or daily boundaries in cli-config.yaml.
- Enhanced SessionResetPolicy class to support a "none" mode for no auto-resets.
- Implemented memory flushing before session resets in SessionStore to preserve important information.
- Updated setup wizard to guide users in configuring session reset preferences.

											
										
										
											2026-02-26 21:20:50 -08:00
+								            # Give the agent a real turn to think about what to save
 								            flush_prompt = (
 								                "[System: This session is about to be automatically reset due to "
 								                "inactivity or a scheduled daily reset. The conversation context "
 								                "will be cleared after this turn.\n\n"
 								                "Review the conversation above and:\n"
 								                "1. Save any important facts, preferences, or decisions to memory "
 								                "(user profile or your notes) that would be useful in future sessions.\n"
 								                "2. If you discovered a reusable workflow or solved a non-trivial "
 								                "problem, consider saving it as a skill.\n"
 								                "3. If nothing is worth saving, that's fine — just skip.\n\n"
-												fix(gateway): prevent stale memory overwrites by flush agent (#2670)

The gateway memory flush agent reviews old conversation history on session
reset/expiry and writes to memory. It had no awareness of memory changes
made after that conversation ended (by the live agent, cron jobs, or other
sessions), causing silent overwrites of newer entries.

Two fixes:

1. Skip memory flush entirely for cron sessions (session IDs starting with
   'cron_'). Cron sessions are headless with no meaningful user conversation
   to extract memories from.

2. Inject the current live memory state (MEMORY.md + USER.md) directly into
   the flush prompt. The flush agent can now see what's already saved and
   make informed decisions — only adding genuinely new information rather
   than blindly overwriting entries that may have been updated since the
   conversation ended.

Addresses the root cause identified in #2670: the flush agent was making
memory decisions blind to the current state of memory, causing stale
context to overwrite newer entries on gateway restarts and session resets.

Co-authored-by: devorun <devorun@users.noreply.github.com>
Co-authored-by: dlkakbs <dlkakbs@users.noreply.github.com>
											
										
										
											2026-03-23 16:08:38 -07:00
+								            )
 								            if _current_memory:
 								                flush_prompt += (
 								                    "IMPORTANT — here is the current live state of memory. Other "
 								                    "sessions, cron jobs, or the user may have updated it since this "
 								                    "conversation ended. Do NOT overwrite or remove entries unless "
 								                    "the conversation above reveals something that genuinely "
 								                    "supersedes them. Only add new information that is not already "
 								                    "captured below."
 								                    f"{_current_memory}\n\n"
 								                )
 								            flush_prompt += (
-												feat(session): implement session reset policy for messaging platforms

- Added configuration options for automatic session resets based on inactivity or daily boundaries in cli-config.yaml.
- Enhanced SessionResetPolicy class to support a "none" mode for no auto-resets.
- Implemented memory flushing before session resets in SessionStore to preserve important information.
- Updated setup wizard to guide users in configuring session reset preferences.

											
										
										
											2026-02-26 21:20:50 -08:00
+								                "Do NOT respond to the user. Just use the memory and skill_manage "
 								                "tools if needed, then stop.]"
 								            )
 								            tmp_agent.run_conversation(
 								                user_message=flush_prompt,
 								                conversation_history=msgs,
-												fix(honcho): isolate session routing for multi-user gateway (#1500)

Salvaged from PR #1470 by adavyas.

Core fix: Honcho tool calls in a multi-session gateway could route to
the wrong session because honcho_tools.py relied on process-global
state. Now threads session context through the call chain:
  AIAgent._invoke_tool() → handle_function_call() → registry.dispatch()
  → handler **kw → _resolve_session_context()

Changes:
- Add _resolve_session_context() to prefer per-call context over globals
- Plumb honcho_manager + honcho_session_key through handle_function_call
- Add sync_honcho=False to run_conversation() for synthetic flush turns
- Pass honcho_session_key through gateway memory flush lifecycle
- Harden gateway PID detection when /proc cmdline is unreadable
- Make interrupt test scripts import-safe for pytest-xdist
- Wrap BibTeX examples in Jekyll raw blocks for docs build
- Fix thread-order-dependent assertion in client lifecycle test
- Expand Honcho docs: session isolation, lifecycle, routing internals

Dropped from original PR:
- Indentation change in _create_request_openai_client that would move
  client creation inside the lock (causes unnecessary contention)

Co-authored-by: adavyas <adavyas@users.noreply.github.com>
											
										
										
											2026-03-16 00:23:47 -07:00
+								                sync_honcho=False,
-												feat(session): implement session reset policy for messaging platforms

- Added configuration options for automatic session resets based on inactivity or daily boundaries in cli-config.yaml.
- Enhanced SessionResetPolicy class to support a "none" mode for no auto-resets.
- Implemented memory flushing before session resets in SessionStore to preserve important information.
- Updated setup wizard to guide users in configuring session reset preferences.

											
										
										
											2026-02-26 21:20:50 -08:00
+								            )
-												feat(gateway): proactive async memory flush on session expiry

Previously, when a session expired (idle/daily reset), the memory flush
ran synchronously inside get_or_create_session — blocking the user's
message for 10-60s while an LLM call saved memories.

Now a background watcher task (_session_expiry_watcher) runs every 5 min,
detects expired sessions, and flushes memories proactively in a thread
pool.  By the time the user sends their next message, memories are
already saved and the response is immediate.

Changes:
- Add _is_session_expired(entry) to SessionStore — works from entry
  alone without needing a SessionSource
- Add _pre_flushed_sessions set to track already-flushed sessions
- Remove sync _on_auto_reset callback from get_or_create_session
- Refactor flush into _flush_memories_for_session (sync worker) +
  _async_flush_memories (thread pool wrapper)
- Add _session_expiry_watcher background task, started in start()
- Simplify /reset command to use shared fire-and-forget flush
- Add 10 tests for expiry detection, callback removal, tracking

											
										
										
											2026-03-07 11:27:50 -08:00
+								            logger.info("Pre-reset memory flush completed for session %s", old_session_id)
-												feat(honcho): async memory integration with prefetch pipeline and recallMode

Adds full Honcho memory integration to Hermes:

- Session manager with async background writes, memory modes (honcho/hybrid/local),
  and dialectic prefetch for first-turn context warming
- Agent integration: prefetch pipeline, tool surface gated by recallMode,
  system prompt context injection, SIGTERM/SIGINT flush handlers
- CLI commands: setup, status, mode, tokens, peer, identity, migrate
- recallMode setting (auto | context | tools) for A/B testing retrieval strategies
- Session strategies: per-session, per-repo (git tree root), per-directory, global
- Polymorphic memoryMode config: string shorthand or per-peer object overrides
- 97 tests covering async writes, client config, session resolution, and memory modes

											
										
										
											2026-03-09 15:58:22 -04:00
+								            # Flush any queued Honcho writes before the session is dropped
 								            if getattr(tmp_agent, '_honcho', None):
 								                try:
 								                    tmp_agent._honcho.shutdown()
 								                except Exception:
 								                    pass
-												feat(session): implement session reset policy for messaging platforms

- Added configuration options for automatic session resets based on inactivity or daily boundaries in cli-config.yaml.
- Enhanced SessionResetPolicy class to support a "none" mode for no auto-resets.
- Implemented memory flushing before session resets in SessionStore to preserve important information.
- Updated setup wizard to guide users in configuring session reset preferences.

											
										
										
											2026-02-26 21:20:50 -08:00
+								        except Exception as e:
-												feat(gateway): proactive async memory flush on session expiry

Previously, when a session expired (idle/daily reset), the memory flush
ran synchronously inside get_or_create_session — blocking the user's
message for 10-60s while an LLM call saved memories.

Now a background watcher task (_session_expiry_watcher) runs every 5 min,
detects expired sessions, and flushes memories proactively in a thread
pool.  By the time the user sends their next message, memories are
already saved and the response is immediate.

Changes:
- Add _is_session_expired(entry) to SessionStore — works from entry
  alone without needing a SessionSource
- Add _pre_flushed_sessions set to track already-flushed sessions
- Remove sync _on_auto_reset callback from get_or_create_session
- Refactor flush into _flush_memories_for_session (sync worker) +
  _async_flush_memories (thread pool wrapper)
- Add _session_expiry_watcher background task, started in start()
- Simplify /reset command to use shared fire-and-forget flush
- Add 10 tests for expiry detection, callback removal, tracking

											
										
										
											2026-03-07 11:27:50 -08:00
+								            logger.debug("Pre-reset memory flush failed for session %s: %s", old_session_id, e)
-												fix(honcho): isolate session routing for multi-user gateway (#1500)

Salvaged from PR #1470 by adavyas.

Core fix: Honcho tool calls in a multi-session gateway could route to
the wrong session because honcho_tools.py relied on process-global
state. Now threads session context through the call chain:
  AIAgent._invoke_tool() → handle_function_call() → registry.dispatch()
  → handler **kw → _resolve_session_context()

Changes:
- Add _resolve_session_context() to prefer per-call context over globals
- Plumb honcho_manager + honcho_session_key through handle_function_call
- Add sync_honcho=False to run_conversation() for synthetic flush turns
- Pass honcho_session_key through gateway memory flush lifecycle
- Harden gateway PID detection when /proc cmdline is unreadable
- Make interrupt test scripts import-safe for pytest-xdist
- Wrap BibTeX examples in Jekyll raw blocks for docs build
- Fix thread-order-dependent assertion in client lifecycle test
- Expand Honcho docs: session isolation, lifecycle, routing internals

Dropped from original PR:
- Indentation change in _create_request_openai_client that would move
  client creation inside the lock (causes unnecessary contention)

Co-authored-by: adavyas <adavyas@users.noreply.github.com>
											
										
										
											2026-03-16 00:23:47 -07:00
+								    async def _async_flush_memories(
 								        self,
 								        old_session_id: str,
 								        honcho_session_key: Optional[str] = None,
 								    ):
-												feat(gateway): proactive async memory flush on session expiry

Previously, when a session expired (idle/daily reset), the memory flush
ran synchronously inside get_or_create_session — blocking the user's
message for 10-60s while an LLM call saved memories.

Now a background watcher task (_session_expiry_watcher) runs every 5 min,
detects expired sessions, and flushes memories proactively in a thread
pool.  By the time the user sends their next message, memories are
already saved and the response is immediate.

Changes:
- Add _is_session_expired(entry) to SessionStore — works from entry
  alone without needing a SessionSource
- Add _pre_flushed_sessions set to track already-flushed sessions
- Remove sync _on_auto_reset callback from get_or_create_session
- Refactor flush into _flush_memories_for_session (sync worker) +
  _async_flush_memories (thread pool wrapper)
- Add _session_expiry_watcher background task, started in start()
- Simplify /reset command to use shared fire-and-forget flush
- Add 10 tests for expiry detection, callback removal, tracking

											
										
										
											2026-03-07 11:27:50 -08:00
+								        """Run the sync memory flush in a thread pool so it won't block the event loop."""
 								        loop = asyncio.get_event_loop()
-												fix(honcho): isolate session routing for multi-user gateway (#1500)

Salvaged from PR #1470 by adavyas.

Core fix: Honcho tool calls in a multi-session gateway could route to
the wrong session because honcho_tools.py relied on process-global
state. Now threads session context through the call chain:
  AIAgent._invoke_tool() → handle_function_call() → registry.dispatch()
  → handler **kw → _resolve_session_context()

Changes:
- Add _resolve_session_context() to prefer per-call context over globals
- Plumb honcho_manager + honcho_session_key through handle_function_call
- Add sync_honcho=False to run_conversation() for synthetic flush turns
- Pass honcho_session_key through gateway memory flush lifecycle
- Harden gateway PID detection when /proc cmdline is unreadable
- Make interrupt test scripts import-safe for pytest-xdist
- Wrap BibTeX examples in Jekyll raw blocks for docs build
- Fix thread-order-dependent assertion in client lifecycle test
- Expand Honcho docs: session isolation, lifecycle, routing internals

Dropped from original PR:
- Indentation change in _create_request_openai_client that would move
  client creation inside the lock (causes unnecessary contention)

Co-authored-by: adavyas <adavyas@users.noreply.github.com>
											
										
										
											2026-03-16 00:23:47 -07:00
+								        await loop.run_in_executor(
 								            None,
 								            self._flush_memories_for_session,
 								            old_session_id,
 								            honcho_session_key,
 								        )
-												fix: preserve current gateway update and startup behavior

Follow up on salvaged PR #1052.
Restore current-main gateway lifecycle handling after conflict resolution and
adapt the update fallback to use shell-quoted argv parts safely.

											
										
										
											2026-03-14 18:03:50 -07:00
 								    @property
 								    def should_exit_cleanly(self) -> bool:
 								        return self._exit_cleanly
-												fix(gateway): restart on whatsapp bridge child exit (#2334)

Co-authored-by: Frederico Ribeiro <fr@tecompanytea.com>
											
										
										
											2026-03-21 09:38:52 -07:00
+								    @property
 								    def should_exit_with_failure(self) -> bool:
 								        return self._exit_with_failure
-												fix: preserve current gateway update and startup behavior

Follow up on salvaged PR #1052.
Restore current-main gateway lifecycle handling after conflict resolution and
adapt the update fallback to use shell-quoted argv parts safely.

											
										
										
											2026-03-14 18:03:50 -07:00
+								    @property
 								    def exit_reason(self) -> Optional[str]:
 								        return self._exit_reason
-												fix(gateway): make group session isolation configurable

default group and channel sessions to per-user isolation, allow opting back into shared room sessions via config.yaml, and document Discord gateway routing and session behavior.

											
										
										
											2026-03-16 00:22:23 -07:00
+								    def _session_key_for_source(self, source: SessionSource) -> str:
 								        """Resolve the current session key for a source, honoring gateway config when available."""
 								        if hasattr(self, "session_store") and self.session_store is not None:
 								            try:
 								                session_key = self.session_store._generate_session_key(source)
 								                if isinstance(session_key, str) and session_key:
 								                    return session_key
 								            except Exception:
 								                pass
 								        config = getattr(self, "config", None)
 								        return build_session_key(
 								            source,
 								            group_sessions_per_user=getattr(config, "group_sessions_per_user", True),
 								        )
-												fix: hermes update causes dual gateways on macOS (launchd) (#1567)

* feat: add optional smart model routing

Add a conservative cheap-vs-strong routing option that can send very short/simple turns to a cheaper model across providers while keeping the primary model for complex work. Wire it through CLI, gateway, and cron, and document the config.yaml workflow.

* fix(gateway): remove recursive ExecStop from systemd units, extend TimeoutStopSec to 60s

* fix(gateway): avoid recursive ExecStop in user systemd unit

* fix: extend ExecStop removal and TimeoutStopSec=60 to system unit

The cherry-picked PR #1448 fix only covered the user systemd unit.
The system unit had the same TimeoutStopSec=15 and could benefit
from the same 60s timeout for clean shutdown. Also adds a regression
test for the system unit.

---------

Co-authored-by: Ninja <ninja@local>

* feat(skills): add blender-mcp optional skill for 3D modeling

Control a running Blender instance from Hermes via socket connection
to the blender-mcp addon (port 9876). Supports creating 3D objects,
materials, animations, and running arbitrary bpy code.

Placed in optional-skills/ since it requires Blender 4.3+ desktop
with a third-party addon manually started each session.

* feat(acp): support slash commands in ACP adapter (#1532)

Adds /help, /model, /tools, /context, /reset, /compact, /version
to the ACP adapter (VS Code, Zed, JetBrains). Commands are handled
directly in the server without instantiating the TUI — each command
queries agent/session state and returns plain text.

Unrecognized /commands fall through to the LLM as normal messages.

/model uses detect_provider_for_model() for auto-detection when
switching models, matching the CLI and gateway behavior.

Fixes #1402

* fix(logging): improve error logging in session search tool (#1533)

* fix(gateway): restart on retryable startup failures (#1517)

* feat(email): add skip_attachments option via config.yaml

* feat(email): add skip_attachments option via config.yaml

Adds a config.yaml-driven option to skip email attachments in the
gateway email adapter. Useful for malware protection and bandwidth
savings.

Configure in config.yaml:
  platforms:
    email:
      skip_attachments: true

Based on PR #1521 by @an420eth, changed from env var to config.yaml
(via PlatformConfig.extra) to match the project's config-first pattern.

* docs: document skip_attachments option for email adapter

* fix(telegram): retry on transient TLS failures during connect and send

Add exponential-backoff retry (3 attempts) around initialize() to
handle transient TLS resets during gateway startup. Also catches
TimedOut and OSError in addition to NetworkError.

Add exponential-backoff retry (3 attempts) around send_message() for
NetworkError during message delivery, wrapping the existing Markdown
fallback logic.

Both imports are guarded with try/except ImportError for test
environments where telegram is mocked.

Based on PR #1527 by cmd8. Closes #1526.

* feat: permissive block_anchor thresholds and unicode normalization (#1539)

Salvaged from PR #1528 by an420eth. Closes #517.

Improves _strategy_block_anchor in fuzzy_match.py:
- Add unicode normalization (smart quotes, em/en-dashes, ellipsis,
  non-breaking spaces → ASCII) so LLM-produced unicode artifacts
  don't break anchor line matching
- Lower thresholds: 0.10 for unique matches (was 0.70), 0.30 for
  multiple candidates — if first/last lines match exactly, the
  block is almost certainly correct
- Use original (non-normalized) content for offset calculation to
  preserve correct character positions

Tested: 3 new scenarios fixed (em-dash anchors, non-breaking space
anchors, very-low-similarity unique matches), zero regressions on
all 9 existing fuzzy match tests.

Co-authored-by: an420eth <an420eth@users.noreply.github.com>

* feat(cli): add file path autocomplete in the input prompt (#1545)

When typing a path-like token (./  ../  ~/  /  or containing /),
the CLI now shows filesystem completions in the dropdown menu.
Directories show a trailing slash and 'dir' label; files show
their size. Completions are case-insensitive and capped at 30
entries.

Triggered by tokens like:
  edit ./src/ma     → shows ./src/main.py, ./src/manifest.json, ...
  check ~/doc       → shows ~/docs/, ~/documents/, ...
  read /etc/hos     → shows /etc/hosts, /etc/hostname, ...
  open tools/reg    → shows tools/registry.py

Slash command autocomplete (/help, /model, etc.) is unaffected —
it still triggers when the input starts with /.

Inspired by OpenCode PR #145 (file path completion menu).

Implementation:
- hermes_cli/commands.py: _extract_path_word() detects path-like
  tokens, _path_completions() yields filesystem Completions with
  size labels, get_completions() routes to paths vs slash commands
- tests/hermes_cli/test_path_completion.py: 26 tests covering
  path extraction, prefix filtering, directory markers, home
  expansion, case-insensitivity, integration with slash commands

* feat(privacy): redact PII from LLM context when privacy.redact_pii is enabled

Add privacy.redact_pii config option (boolean, default false). When
enabled, the gateway redacts personally identifiable information from
the system prompt before sending it to the LLM provider:

- Phone numbers (user IDs on WhatsApp/Signal) → hashed to user_<sha256>
- User IDs → hashed to user_<sha256>
- Chat IDs → numeric portion hashed, platform prefix preserved
- Home channel IDs → hashed
- Names/usernames → NOT affected (user-chosen, publicly visible)

Hashes are deterministic (same user → same hash) so the model can
still distinguish users in group chats. Routing and delivery use
the original values internally — redaction only affects LLM context.

Inspired by OpenClaw PR #47959.

* fix(privacy): skip PII redaction on Discord/Slack (mentions need real IDs)

Discord uses <@user_id> for mentions and Slack uses <@U12345> — the LLM
needs the real ID to tag users. Redaction now only applies to WhatsApp,
Signal, and Telegram where IDs are pure routing metadata.

Add 4 platform-specific tests covering Discord, WhatsApp, Signal, Slack.

* feat: smart approvals + /stop command (inspired by OpenAI Codex)

* feat: smart approvals — LLM-based risk assessment for dangerous commands

Adds a 'smart' approval mode that uses the auxiliary LLM to assess
whether a flagged command is genuinely dangerous or a false positive,
auto-approving low-risk commands without prompting the user.

Inspired by OpenAI Codex's Smart Approvals guardian subagent
(openai/codex#13860).

Config (config.yaml):
  approvals:
    mode: manual   # manual (default), smart, off

Modes:
- manual — current behavior, always prompt the user
- smart  — aux LLM evaluates risk: APPROVE (auto-allow), DENY (block),
           or ESCALATE (fall through to manual prompt)
- off    — skip all approval prompts (equivalent to --yolo)

When smart mode auto-approves, the pattern gets session-level approval
so subsequent uses of the same pattern don't trigger another LLM call.
When it denies, the command is blocked without user prompt. When
uncertain, it escalates to the normal manual approval flow.

The LLM prompt is carefully scoped: it sees only the command text and
the flagged reason, assesses actual risk vs false positive, and returns
a single-word verdict.

* feat: make smart approval model configurable via config.yaml

Adds auxiliary.approval section to config.yaml with the same
provider/model/base_url/api_key pattern as other aux tasks (vision,
web_extract, compression, etc.).

Config:
  auxiliary:
    approval:
      provider: auto
      model: ''        # fast/cheap model recommended
      base_url: ''
      api_key: ''

Bridged to env vars in both CLI and gateway paths so the aux client
picks them up automatically.

* feat: add /stop command to kill all background processes

Adds a /stop slash command that kills all running background processes
at once. Currently users have to process(list) then process(kill) for
each one individually.

Inspired by OpenAI Codex's separation of interrupt (Ctrl+C stops current
turn) from /stop (cleans up background processes). See openai/codex#14602.

Ctrl+C continues to only interrupt the active agent turn — background
dev servers, watchers, etc. are preserved. /stop is the explicit way
to clean them all up.

* feat: first-class plugin architecture + hide status bar cost by default (#1544)

The persistent status bar now shows context %, token counts, and
duration but NOT $ cost by default. Cost display is opt-in via:

  display:
    show_cost: true

in config.yaml, or: hermes config set display.show_cost true

The /usage command still shows full cost breakdown since the user
explicitly asked for it — this only affects the always-visible bar.

Status bar without cost:
  ⚕ claude-sonnet-4 │ 12K/200K │ 6% │ 15m

Status bar with show_cost: true:
  ⚕ claude-sonnet-4 │ 12K/200K │ 6% │ $0.06 │ 15m

* feat: improve memory prioritization + aggressive skill updates (inspired by OpenAI Codex)

* feat: improve memory prioritization — user preferences over procedural knowledge

Inspired by OpenAI Codex's memory prompt improvements (openai/codex#14493)
which focus memory writes on user preferences and recurring patterns
rather than procedural task details.

Key insight: 'Optimize for reducing future user steering — the most
valuable memory prevents the user from having to repeat themselves.'

Changes:
- MEMORY_GUIDANCE (prompt_builder.py): added prioritization hierarchy
  and the core principle about reducing user steering
- MEMORY_SCHEMA (memory_tool.py): reordered WHEN TO SAVE list to put
  corrections first, added explicit PRIORITY guidance
- Memory nudge (run_agent.py): now asks specifically about preferences,
  corrections, and workflow patterns instead of generic 'anything'
- Memory flush (run_agent.py): now instructs to prioritize user
  preferences and corrections over task-specific details

* feat: more aggressive skill creation and update prompting

Press harder on skill updates — the agent should proactively patch
skills when it encounters issues during use, not wait to be asked.

Changes:
- SKILLS_GUIDANCE: 'consider saving' → 'save'; added explicit instruction
  to patch skills immediately when found outdated/wrong
- Skills header: added instruction to update loaded skills before finishing
  if they had missing steps or wrong commands
- Skill nudge: more assertive ('save the approach' not 'consider saving'),
  now also prompts for updating existing skills used in the task
- Skill nudge interval: lowered default from 15 to 10 iterations
- skill_manage schema: added 'patch it immediately' to update triggers

* feat: first-class plugin architecture (#1555)

Plugin system for extending Hermes with custom tools, hooks, and
integrations — no source code changes required.

Core system (hermes_cli/plugins.py):
  - Plugin discovery from ~/.hermes/plugins/, .hermes/plugins/, and
    pip entry_points (hermes_agent.plugins group)
  - PluginContext with register_tool() and register_hook()
  - 6 lifecycle hooks: pre/post tool_call, pre/post llm_call,
    on_session_start/end
  - Namespace package handling for relative imports in plugins
  - Graceful error isolation — broken plugins never crash the agent

Integration (model_tools.py):
  - Plugin discovery runs after built-in + MCP tools
  - Plugin tools bypass toolset filter via get_plugin_tool_names()
  - Pre/post tool call hooks fire in handle_function_call()

CLI:
  - /plugins command shows loaded plugins, tool counts, status
  - Added to COMMANDS dict for autocomplete

Docs:
  - Getting started guide (build-a-hermes-plugin.md) — full tutorial
    building a calculator plugin step by step
  - Reference page (features/plugins.md) — quick overview + tables
  - Covers: file structure, schemas, handlers, hooks, data files,
    bundled skills, env var gating, pip distribution, common mistakes

Tests: 16 tests covering discovery, loading, hooks, tool visibility.

* fix: hermes update causes dual gateways on macOS (launchd)

Three bugs worked together to create the dual-gateway problem:

1. cmd_update only checked systemd for gateway restart, completely
   ignoring launchd on macOS. After killing the PID it would print
   'Restart it with: hermes gateway run' even when launchd was about
   to auto-respawn the process.

2. launchd's KeepAlive.SuccessfulExit=false respawns the gateway
   after SIGTERM (non-zero exit), so the user's manual restart
   created a second instance.

3. The launchd plist lacked --replace (systemd had it), so the
   respawned gateway didn't kill stale instances on startup.

Fixes:
- Add --replace to launchd ProgramArguments (matches systemd)
- Add launchd detection to cmd_update's auto-restart logic
- Print 'auto-restart via launchd' instead of manual restart hint

* fix: add launchd plist auto-refresh + explicit restart in cmd_update

Two integration issues with the initial fix:

1. Existing macOS users with old plist (no --replace) would never
   get the fix until manual uninstall/reinstall. Added
   refresh_launchd_plist_if_needed() — mirrors the existing
   refresh_systemd_unit_if_needed(). Called from launchd_start(),
   launchd_restart(), and cmd_update.

2. cmd_update relied on KeepAlive respawn after SIGTERM rather than
   explicit launchctl stop/start. This caused races: launchd would
   respawn the old process before the PID file was cleaned up.
   Now does explicit stop+start (matching how systemd gets an
   explicit systemctl restart), with plist refresh first so the
   new --replace flag is picked up.

---------

Co-authored-by: Ninja <ninja@local>
Co-authored-by: alireza78a <alireza78a@users.noreply.github.com>
Co-authored-by: Oktay Aydin <113846926+aydnOktay@users.noreply.github.com>
Co-authored-by: JP Lew <polydegen@protonmail.com>
Co-authored-by: an420eth <an420eth@users.noreply.github.com>
											
										
										
											2026-03-16 12:36:29 -07:00
+								    def _resolve_turn_agent_config(self, user_message: str, model: str, runtime_kwargs: dict) -> dict:
 								        from agent.smart_model_routing import resolve_turn_route
 								        primary = {
 								            "model": model,
 								            "api_key": runtime_kwargs.get("api_key"),
 								            "base_url": runtime_kwargs.get("base_url"),
 								            "provider": runtime_kwargs.get("provider"),
 								            "api_mode": runtime_kwargs.get("api_mode"),
-												feat: integrate GitHub Copilot providers across Hermes

Add first-class GitHub Copilot and Copilot ACP provider support across
model selection, runtime provider resolution, CLI sessions, delegated
subagents, cron jobs, and the Telegram gateway.

This also normalizes Copilot model catalogs and API modes, introduces a
Copilot ACP OpenAI-compatible shim, and fixes service-mode auth by
resolving Homebrew-installed gh binaries under launchd.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

											
										
										
											2026-03-17 23:40:22 -07:00
+								            "command": runtime_kwargs.get("command"),
 								            "args": list(runtime_kwargs.get("args") or []),
-												fix: hermes update causes dual gateways on macOS (launchd) (#1567)

* feat: add optional smart model routing

Add a conservative cheap-vs-strong routing option that can send very short/simple turns to a cheaper model across providers while keeping the primary model for complex work. Wire it through CLI, gateway, and cron, and document the config.yaml workflow.

* fix(gateway): remove recursive ExecStop from systemd units, extend TimeoutStopSec to 60s

* fix(gateway): avoid recursive ExecStop in user systemd unit

* fix: extend ExecStop removal and TimeoutStopSec=60 to system unit

The cherry-picked PR #1448 fix only covered the user systemd unit.
The system unit had the same TimeoutStopSec=15 and could benefit
from the same 60s timeout for clean shutdown. Also adds a regression
test for the system unit.

---------

Co-authored-by: Ninja <ninja@local>

* feat(skills): add blender-mcp optional skill for 3D modeling

Control a running Blender instance from Hermes via socket connection
to the blender-mcp addon (port 9876). Supports creating 3D objects,
materials, animations, and running arbitrary bpy code.

Placed in optional-skills/ since it requires Blender 4.3+ desktop
with a third-party addon manually started each session.

* feat(acp): support slash commands in ACP adapter (#1532)

Adds /help, /model, /tools, /context, /reset, /compact, /version
to the ACP adapter (VS Code, Zed, JetBrains). Commands are handled
directly in the server without instantiating the TUI — each command
queries agent/session state and returns plain text.

Unrecognized /commands fall through to the LLM as normal messages.

/model uses detect_provider_for_model() for auto-detection when
switching models, matching the CLI and gateway behavior.

Fixes #1402

* fix(logging): improve error logging in session search tool (#1533)

* fix(gateway): restart on retryable startup failures (#1517)

* feat(email): add skip_attachments option via config.yaml

* feat(email): add skip_attachments option via config.yaml

Adds a config.yaml-driven option to skip email attachments in the
gateway email adapter. Useful for malware protection and bandwidth
savings.

Configure in config.yaml:
  platforms:
    email:
      skip_attachments: true

Based on PR #1521 by @an420eth, changed from env var to config.yaml
(via PlatformConfig.extra) to match the project's config-first pattern.

* docs: document skip_attachments option for email adapter

* fix(telegram): retry on transient TLS failures during connect and send

Add exponential-backoff retry (3 attempts) around initialize() to
handle transient TLS resets during gateway startup. Also catches
TimedOut and OSError in addition to NetworkError.

Add exponential-backoff retry (3 attempts) around send_message() for
NetworkError during message delivery, wrapping the existing Markdown
fallback logic.

Both imports are guarded with try/except ImportError for test
environments where telegram is mocked.

Based on PR #1527 by cmd8. Closes #1526.

* feat: permissive block_anchor thresholds and unicode normalization (#1539)

Salvaged from PR #1528 by an420eth. Closes #517.

Improves _strategy_block_anchor in fuzzy_match.py:
- Add unicode normalization (smart quotes, em/en-dashes, ellipsis,
  non-breaking spaces → ASCII) so LLM-produced unicode artifacts
  don't break anchor line matching
- Lower thresholds: 0.10 for unique matches (was 0.70), 0.30 for
  multiple candidates — if first/last lines match exactly, the
  block is almost certainly correct
- Use original (non-normalized) content for offset calculation to
  preserve correct character positions

Tested: 3 new scenarios fixed (em-dash anchors, non-breaking space
anchors, very-low-similarity unique matches), zero regressions on
all 9 existing fuzzy match tests.

Co-authored-by: an420eth <an420eth@users.noreply.github.com>

* feat(cli): add file path autocomplete in the input prompt (#1545)

When typing a path-like token (./  ../  ~/  /  or containing /),
the CLI now shows filesystem completions in the dropdown menu.
Directories show a trailing slash and 'dir' label; files show
their size. Completions are case-insensitive and capped at 30
entries.

Triggered by tokens like:
  edit ./src/ma     → shows ./src/main.py, ./src/manifest.json, ...
  check ~/doc       → shows ~/docs/, ~/documents/, ...
  read /etc/hos     → shows /etc/hosts, /etc/hostname, ...
  open tools/reg    → shows tools/registry.py

Slash command autocomplete (/help, /model, etc.) is unaffected —
it still triggers when the input starts with /.

Inspired by OpenCode PR #145 (file path completion menu).

Implementation:
- hermes_cli/commands.py: _extract_path_word() detects path-like
  tokens, _path_completions() yields filesystem Completions with
  size labels, get_completions() routes to paths vs slash commands
- tests/hermes_cli/test_path_completion.py: 26 tests covering
  path extraction, prefix filtering, directory markers, home
  expansion, case-insensitivity, integration with slash commands

* feat(privacy): redact PII from LLM context when privacy.redact_pii is enabled

Add privacy.redact_pii config option (boolean, default false). When
enabled, the gateway redacts personally identifiable information from
the system prompt before sending it to the LLM provider:

- Phone numbers (user IDs on WhatsApp/Signal) → hashed to user_<sha256>
- User IDs → hashed to user_<sha256>
- Chat IDs → numeric portion hashed, platform prefix preserved
- Home channel IDs → hashed
- Names/usernames → NOT affected (user-chosen, publicly visible)

Hashes are deterministic (same user → same hash) so the model can
still distinguish users in group chats. Routing and delivery use
the original values internally — redaction only affects LLM context.

Inspired by OpenClaw PR #47959.

* fix(privacy): skip PII redaction on Discord/Slack (mentions need real IDs)

Discord uses <@user_id> for mentions and Slack uses <@U12345> — the LLM
needs the real ID to tag users. Redaction now only applies to WhatsApp,
Signal, and Telegram where IDs are pure routing metadata.

Add 4 platform-specific tests covering Discord, WhatsApp, Signal, Slack.

* feat: smart approvals + /stop command (inspired by OpenAI Codex)

* feat: smart approvals — LLM-based risk assessment for dangerous commands

Adds a 'smart' approval mode that uses the auxiliary LLM to assess
whether a flagged command is genuinely dangerous or a false positive,
auto-approving low-risk commands without prompting the user.

Inspired by OpenAI Codex's Smart Approvals guardian subagent
(openai/codex#13860).

Config (config.yaml):
  approvals:
    mode: manual   # manual (default), smart, off

Modes:
- manual — current behavior, always prompt the user
- smart  — aux LLM evaluates risk: APPROVE (auto-allow), DENY (block),
           or ESCALATE (fall through to manual prompt)
- off    — skip all approval prompts (equivalent to --yolo)

When smart mode auto-approves, the pattern gets session-level approval
so subsequent uses of the same pattern don't trigger another LLM call.
When it denies, the command is blocked without user prompt. When
uncertain, it escalates to the normal manual approval flow.

The LLM prompt is carefully scoped: it sees only the command text and
the flagged reason, assesses actual risk vs false positive, and returns
a single-word verdict.

* feat: make smart approval model configurable via config.yaml

Adds auxiliary.approval section to config.yaml with the same
provider/model/base_url/api_key pattern as other aux tasks (vision,
web_extract, compression, etc.).

Config:
  auxiliary:
    approval:
      provider: auto
      model: ''        # fast/cheap model recommended
      base_url: ''
      api_key: ''

Bridged to env vars in both CLI and gateway paths so the aux client
picks them up automatically.

* feat: add /stop command to kill all background processes

Adds a /stop slash command that kills all running background processes
at once. Currently users have to process(list) then process(kill) for
each one individually.

Inspired by OpenAI Codex's separation of interrupt (Ctrl+C stops current
turn) from /stop (cleans up background processes). See openai/codex#14602.

Ctrl+C continues to only interrupt the active agent turn — background
dev servers, watchers, etc. are preserved. /stop is the explicit way
to clean them all up.

* feat: first-class plugin architecture + hide status bar cost by default (#1544)

The persistent status bar now shows context %, token counts, and
duration but NOT $ cost by default. Cost display is opt-in via:

  display:
    show_cost: true

in config.yaml, or: hermes config set display.show_cost true

The /usage command still shows full cost breakdown since the user
explicitly asked for it — this only affects the always-visible bar.

Status bar without cost:
  ⚕ claude-sonnet-4 │ 12K/200K │ 6% │ 15m

Status bar with show_cost: true:
  ⚕ claude-sonnet-4 │ 12K/200K │ 6% │ $0.06 │ 15m

* feat: improve memory prioritization + aggressive skill updates (inspired by OpenAI Codex)

* feat: improve memory prioritization — user preferences over procedural knowledge

Inspired by OpenAI Codex's memory prompt improvements (openai/codex#14493)
which focus memory writes on user preferences and recurring patterns
rather than procedural task details.

Key insight: 'Optimize for reducing future user steering — the most
valuable memory prevents the user from having to repeat themselves.'

Changes:
- MEMORY_GUIDANCE (prompt_builder.py): added prioritization hierarchy
  and the core principle about reducing user steering
- MEMORY_SCHEMA (memory_tool.py): reordered WHEN TO SAVE list to put
  corrections first, added explicit PRIORITY guidance
- Memory nudge (run_agent.py): now asks specifically about preferences,
  corrections, and workflow patterns instead of generic 'anything'
- Memory flush (run_agent.py): now instructs to prioritize user
  preferences and corrections over task-specific details

* feat: more aggressive skill creation and update prompting

Press harder on skill updates — the agent should proactively patch
skills when it encounters issues during use, not wait to be asked.

Changes:
- SKILLS_GUIDANCE: 'consider saving' → 'save'; added explicit instruction
  to patch skills immediately when found outdated/wrong
- Skills header: added instruction to update loaded skills before finishing
  if they had missing steps or wrong commands
- Skill nudge: more assertive ('save the approach' not 'consider saving'),
  now also prompts for updating existing skills used in the task
- Skill nudge interval: lowered default from 15 to 10 iterations
- skill_manage schema: added 'patch it immediately' to update triggers

* feat: first-class plugin architecture (#1555)

Plugin system for extending Hermes with custom tools, hooks, and
integrations — no source code changes required.

Core system (hermes_cli/plugins.py):
  - Plugin discovery from ~/.hermes/plugins/, .hermes/plugins/, and
    pip entry_points (hermes_agent.plugins group)
  - PluginContext with register_tool() and register_hook()
  - 6 lifecycle hooks: pre/post tool_call, pre/post llm_call,
    on_session_start/end
  - Namespace package handling for relative imports in plugins
  - Graceful error isolation — broken plugins never crash the agent

Integration (model_tools.py):
  - Plugin discovery runs after built-in + MCP tools
  - Plugin tools bypass toolset filter via get_plugin_tool_names()
  - Pre/post tool call hooks fire in handle_function_call()

CLI:
  - /plugins command shows loaded plugins, tool counts, status
  - Added to COMMANDS dict for autocomplete

Docs:
  - Getting started guide (build-a-hermes-plugin.md) — full tutorial
    building a calculator plugin step by step
  - Reference page (features/plugins.md) — quick overview + tables
  - Covers: file structure, schemas, handlers, hooks, data files,
    bundled skills, env var gating, pip distribution, common mistakes

Tests: 16 tests covering discovery, loading, hooks, tool visibility.

* fix: hermes update causes dual gateways on macOS (launchd)

Three bugs worked together to create the dual-gateway problem:

1. cmd_update only checked systemd for gateway restart, completely
   ignoring launchd on macOS. After killing the PID it would print
   'Restart it with: hermes gateway run' even when launchd was about
   to auto-respawn the process.

2. launchd's KeepAlive.SuccessfulExit=false respawns the gateway
   after SIGTERM (non-zero exit), so the user's manual restart
   created a second instance.

3. The launchd plist lacked --replace (systemd had it), so the
   respawned gateway didn't kill stale instances on startup.

Fixes:
- Add --replace to launchd ProgramArguments (matches systemd)
- Add launchd detection to cmd_update's auto-restart logic
- Print 'auto-restart via launchd' instead of manual restart hint

* fix: add launchd plist auto-refresh + explicit restart in cmd_update

Two integration issues with the initial fix:

1. Existing macOS users with old plist (no --replace) would never
   get the fix until manual uninstall/reinstall. Added
   refresh_launchd_plist_if_needed() — mirrors the existing
   refresh_systemd_unit_if_needed(). Called from launchd_start(),
   launchd_restart(), and cmd_update.

2. cmd_update relied on KeepAlive respawn after SIGTERM rather than
   explicit launchctl stop/start. This caused races: launchd would
   respawn the old process before the PID file was cleaned up.
   Now does explicit stop+start (matching how systemd gets an
   explicit systemctl restart), with plist refresh first so the
   new --replace flag is picked up.

---------

Co-authored-by: Ninja <ninja@local>
Co-authored-by: alireza78a <alireza78a@users.noreply.github.com>
Co-authored-by: Oktay Aydin <113846926+aydnOktay@users.noreply.github.com>
Co-authored-by: JP Lew <polydegen@protonmail.com>
Co-authored-by: an420eth <an420eth@users.noreply.github.com>
											
										
										
											2026-03-16 12:36:29 -07:00
+								        }
 								        return resolve_turn_route(user_message, getattr(self, "_smart_model_routing", {}), primary)
-												fix: preserve current gateway update and startup behavior

Follow up on salvaged PR #1052.
Restore current-main gateway lifecycle handling after conflict resolution and
adapt the update fallback to use shell-quoted argv parts safely.

											
										
										
											2026-03-14 18:03:50 -07:00
+								    async def _handle_adapter_fatal_error(self, adapter: BasePlatformAdapter) -> None:
-												feat: auto-reconnect failed gateway platforms with exponential backoff (#2584)

When a messaging platform fails to connect at startup (e.g. transient DNS
failure) or disconnects at runtime with a retryable error, the gateway now
queues it for background reconnection instead of giving up permanently.

- New _platform_reconnect_watcher background task runs alongside the
  existing session expiry watcher
- Exponential backoff: 30s, 60s, 120s, 240s, 300s cap
- Max 20 retry attempts before giving up on a platform
- Non-retryable errors (bad auth token, etc.) are not retried
- Runtime disconnections via _handle_adapter_fatal_error now queue
  retryable failures instead of triggering gateway shutdown
- On successful reconnect, adapter is wired up and channel directory
  is rebuilt automatically

Fixes the case where a DNS blip during gateway startup caused Telegram
and Discord to be permanently unavailable until manual restart.
											
										
										
											2026-03-22 23:48:24 -07:00
+								        """React to an adapter failure after startup.
 								        If the error is retryable (e.g. network blip, DNS failure), queue the
 								        platform for background reconnection instead of giving up permanently.
 								        """
-												fix: preserve current gateway update and startup behavior

Follow up on salvaged PR #1052.
Restore current-main gateway lifecycle handling after conflict resolution and
adapt the update fallback to use shell-quoted argv parts safely.

											
										
										
											2026-03-14 18:03:50 -07:00
+								        logger.error(
 								            "Fatal %s adapter error (%s): %s",
 								            adapter.platform.value,
 								            adapter.fatal_error_code or "unknown",
 								            adapter.fatal_error_message or "unknown error",
 								        )
 								        existing = self.adapters.get(adapter.platform)
 								        if existing is adapter:
 								            try:
 								                await adapter.disconnect()
 								            finally:
 								                self.adapters.pop(adapter.platform, None)
 								                self.delivery_router.adapters = self.adapters
-												feat: auto-reconnect failed gateway platforms with exponential backoff (#2584)

When a messaging platform fails to connect at startup (e.g. transient DNS
failure) or disconnects at runtime with a retryable error, the gateway now
queues it for background reconnection instead of giving up permanently.

- New _platform_reconnect_watcher background task runs alongside the
  existing session expiry watcher
- Exponential backoff: 30s, 60s, 120s, 240s, 300s cap
- Max 20 retry attempts before giving up on a platform
- Non-retryable errors (bad auth token, etc.) are not retried
- Runtime disconnections via _handle_adapter_fatal_error now queue
  retryable failures instead of triggering gateway shutdown
- On successful reconnect, adapter is wired up and channel directory
  is rebuilt automatically

Fixes the case where a DNS blip during gateway startup caused Telegram
and Discord to be permanently unavailable until manual restart.
											
										
										
											2026-03-22 23:48:24 -07:00
+								        # Queue retryable failures for background reconnection
 								        if adapter.fatal_error_retryable:
 								            platform_config = self.config.platforms.get(adapter.platform)
 								            if platform_config and adapter.platform not in self._failed_platforms:
 								                self._failed_platforms[adapter.platform] = {
 								                    "config": platform_config,
 								                    "attempts": 0,
 								                    "next_retry": time.monotonic() + 30,
 								                }
 								                logger.info(
 								                    "%s queued for background reconnection",
 								                    adapter.platform.value,
 								                )
 								        if not self.adapters and not self._failed_platforms:
-												fix: preserve current gateway update and startup behavior

Follow up on salvaged PR #1052.
Restore current-main gateway lifecycle handling after conflict resolution and
adapt the update fallback to use shell-quoted argv parts safely.

											
										
										
											2026-03-14 18:03:50 -07:00
+								            self._exit_reason = adapter.fatal_error_message or "All messaging adapters disconnected"
-												fix(gateway): restart on whatsapp bridge child exit (#2334)

Co-authored-by: Frederico Ribeiro <fr@tecompanytea.com>
											
										
										
											2026-03-21 09:38:52 -07:00
+								            if adapter.fatal_error_retryable:
 								                self._exit_with_failure = True
 								                logger.error("No connected messaging platforms remain. Shutting down gateway for service restart.")
 								            else:
 								                logger.error("No connected messaging platforms remain. Shutting down gateway cleanly.")
-												fix: preserve current gateway update and startup behavior

Follow up on salvaged PR #1052.
Restore current-main gateway lifecycle handling after conflict resolution and
adapt the update fallback to use shell-quoted argv parts safely.

											
										
										
											2026-03-14 18:03:50 -07:00
+								            await self.stop()
-												feat: auto-reconnect failed gateway platforms with exponential backoff (#2584)

When a messaging platform fails to connect at startup (e.g. transient DNS
failure) or disconnects at runtime with a retryable error, the gateway now
queues it for background reconnection instead of giving up permanently.

- New _platform_reconnect_watcher background task runs alongside the
  existing session expiry watcher
- Exponential backoff: 30s, 60s, 120s, 240s, 300s cap
- Max 20 retry attempts before giving up on a platform
- Non-retryable errors (bad auth token, etc.) are not retried
- Runtime disconnections via _handle_adapter_fatal_error now queue
  retryable failures instead of triggering gateway shutdown
- On successful reconnect, adapter is wired up and channel directory
  is rebuilt automatically

Fixes the case where a DNS blip during gateway startup caused Telegram
and Discord to be permanently unavailable until manual restart.
											
										
										
											2026-03-22 23:48:24 -07:00
+								        elif not self.adapters and self._failed_platforms:
-												fix(gateway): exit with failure when all platforms fail with retryable errors (#3592)

When all messaging platforms exhaust retries and get queued for background
reconnection, exit with code 1 so systemd Restart=on-failure can restart
the process. Previously the gateway stayed alive as a zombie with no
connected platforms and exit code 0.

Salvaged from PR #3567 by kelsia14. Test updates added.

Co-authored-by: kelsia14 <kelsia14@users.noreply.github.com>
											
										
										
											2026-03-28 14:25:12 -07:00
+								            # All platforms are down and queued for background reconnection.
 								            # If the error is retryable, exit with failure so systemd Restart=on-failure
 								            # can restart the process. Otherwise stay alive and keep retrying in background.
 								            if adapter.fatal_error_retryable:
 								                self._exit_reason = adapter.fatal_error_message or "All messaging platforms failed with retryable errors"
 								                self._exit_with_failure = True
 								                logger.error(
 								                    "All messaging platforms failed with retryable errors. "
 								                    "Shutting down gateway for service restart (systemd will retry)."
 								                )
 								                await self.stop()
 								            else:
 								                logger.warning(
 								                    "No connected messaging platforms remain, but %d platform(s) queued for reconnection",
 								                    len(self._failed_platforms),
 								                )
-												fix: preserve current gateway update and startup behavior

Follow up on salvaged PR #1052.
Restore current-main gateway lifecycle handling after conflict resolution and
adapt the update fallback to use shell-quoted argv parts safely.

											
										
										
											2026-03-14 18:03:50 -07:00
 								    def _request_clean_exit(self, reason: str) -> None:
 								        self._exit_cleanly = True
 								        self._exit_reason = reason
 								        self._shutdown_event.set()
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
-												feat: add ephemeral prefill messages and system prompt loading

- Implemented functionality to load ephemeral prefill messages from a JSON file, enhancing few-shot priming capabilities for the agent.
- Introduced a mechanism to load an ephemeral system prompt from environment variables or configuration files, ensuring dynamic prompt adjustments at API-call time.
- Updated the CLI and agent initialization to utilize the new prefill messages and system prompt, improving the overall interaction experience.
- Enhanced configuration options with new environment variables for prefill messages and system prompts, allowing for greater customization without persistence.

											
										
										
											2026-02-23 23:55:42 -08:00
+								    @staticmethod
 								    def _load_prefill_messages() -> List[Dict[str, Any]]:
 								        """Load ephemeral prefill messages from config or env var.
 								        Checks HERMES_PREFILL_MESSAGES_FILE env var first, then falls back to
 								        the prefill_messages_file key in ~/.hermes/config.yaml.
 								        Relative paths are resolved from ~/.hermes/.
 								        """
 								        import json as _json
 								        file_path = os.getenv("HERMES_PREFILL_MESSAGES_FILE", "")
 								        if not file_path:
 								            try:
 								                import yaml as _y
-												fix: respect HERMES_HOME env var in gateway and cron scheduler

Both entry points hardcoded Path.home() / ".hermes" for .env, config.yaml,
logs, and lock files. Now uses _hermes_home which reads HERMES_HOME env var
with ~/.hermes as default, matching cli.py and run_agent.py.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-26 18:51:46 +11:00
+								                cfg_path = _hermes_home / "config.yaml"
-												feat: add ephemeral prefill messages and system prompt loading

- Implemented functionality to load ephemeral prefill messages from a JSON file, enhancing few-shot priming capabilities for the agent.
- Introduced a mechanism to load an ephemeral system prompt from environment variables or configuration files, ensuring dynamic prompt adjustments at API-call time.
- Updated the CLI and agent initialization to utilize the new prefill messages and system prompt, improving the overall interaction experience.
- Enhanced configuration options with new environment variables for prefill messages and system prompts, allowing for greater customization without persistence.

											
										
										
											2026-02-23 23:55:42 -08:00
+								                if cfg_path.exists():
-												Add explicit encoding="utf-8" to all config/data file open() calls

On Windows, open() defaults to the system locale encoding (cp1252,
cp1254, etc.) rather than UTF-8. This breaks any file containing
non-ASCII characters, and also causes crashes when writing JSON with
ensure_ascii=False.

This adds encoding="utf-8" to open() calls in:
- gateway/run.py (config.yaml reads/writes throughout)
- gateway/config.py (gateway.json and config.yaml)
- hermes_cli/config.py (config.yaml load/save)
- hermes_cli/main.py (session export with ensure_ascii=False)
- hermes_cli/status.py (jobs.json and sessions.json)

											
										
										
											2026-03-05 17:04:33 -05:00
+								                    with open(cfg_path, encoding="utf-8") as _f:
-												feat: add ephemeral prefill messages and system prompt loading

- Implemented functionality to load ephemeral prefill messages from a JSON file, enhancing few-shot priming capabilities for the agent.
- Introduced a mechanism to load an ephemeral system prompt from environment variables or configuration files, ensuring dynamic prompt adjustments at API-call time.
- Updated the CLI and agent initialization to utilize the new prefill messages and system prompt, improving the overall interaction experience.
- Enhanced configuration options with new environment variables for prefill messages and system prompts, allowing for greater customization without persistence.

											
										
										
											2026-02-23 23:55:42 -08:00
+								                        cfg = _y.safe_load(_f) or {}
 								                    file_path = cfg.get("prefill_messages_file", "")
 								            except Exception:
 								                pass
 								        if not file_path:
 								            return []
 								        path = Path(file_path).expanduser()
 								        if not path.is_absolute():
-												fix: respect HERMES_HOME env var in gateway and cron scheduler

Both entry points hardcoded Path.home() / ".hermes" for .env, config.yaml,
logs, and lock files. Now uses _hermes_home which reads HERMES_HOME env var
with ~/.hermes as default, matching cli.py and run_agent.py.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-26 18:51:46 +11:00
+								            path = _hermes_home / path
-												feat: add ephemeral prefill messages and system prompt loading

- Implemented functionality to load ephemeral prefill messages from a JSON file, enhancing few-shot priming capabilities for the agent.
- Introduced a mechanism to load an ephemeral system prompt from environment variables or configuration files, ensuring dynamic prompt adjustments at API-call time.
- Updated the CLI and agent initialization to utilize the new prefill messages and system prompt, improving the overall interaction experience.
- Enhanced configuration options with new environment variables for prefill messages and system prompts, allowing for greater customization without persistence.

											
										
										
											2026-02-23 23:55:42 -08:00
+								        if not path.exists():
 								            logger.warning("Prefill messages file not found: %s", path)
 								            return []
 								        try:
 								            with open(path, "r", encoding="utf-8") as f:
 								                data = _json.load(f)
 								            if not isinstance(data, list):
 								                logger.warning("Prefill messages file must contain a JSON array: %s", path)
 								                return []
 								            return data
 								        except Exception as e:
 								            logger.warning("Failed to load prefill messages from %s: %s", path, e)
 								            return []
 								    @staticmethod
 								    def _load_ephemeral_system_prompt() -> str:
 								        """Load ephemeral system prompt from config or env var.
 								        Checks HERMES_EPHEMERAL_SYSTEM_PROMPT env var first, then falls back to
 								        agent.system_prompt in ~/.hermes/config.yaml.
 								        """
 								        prompt = os.getenv("HERMES_EPHEMERAL_SYSTEM_PROMPT", "")
 								        if prompt:
 								            return prompt
 								        try:
 								            import yaml as _y
-												fix: respect HERMES_HOME env var in gateway and cron scheduler

Both entry points hardcoded Path.home() / ".hermes" for .env, config.yaml,
logs, and lock files. Now uses _hermes_home which reads HERMES_HOME env var
with ~/.hermes as default, matching cli.py and run_agent.py.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-26 18:51:46 +11:00
+								            cfg_path = _hermes_home / "config.yaml"
-												feat: add ephemeral prefill messages and system prompt loading

- Implemented functionality to load ephemeral prefill messages from a JSON file, enhancing few-shot priming capabilities for the agent.
- Introduced a mechanism to load an ephemeral system prompt from environment variables or configuration files, ensuring dynamic prompt adjustments at API-call time.
- Updated the CLI and agent initialization to utilize the new prefill messages and system prompt, improving the overall interaction experience.
- Enhanced configuration options with new environment variables for prefill messages and system prompts, allowing for greater customization without persistence.

											
										
										
											2026-02-23 23:55:42 -08:00
+								            if cfg_path.exists():
-												Add explicit encoding="utf-8" to all config/data file open() calls

On Windows, open() defaults to the system locale encoding (cp1252,
cp1254, etc.) rather than UTF-8. This breaks any file containing
non-ASCII characters, and also causes crashes when writing JSON with
ensure_ascii=False.

This adds encoding="utf-8" to open() calls in:
- gateway/run.py (config.yaml reads/writes throughout)
- gateway/config.py (gateway.json and config.yaml)
- hermes_cli/config.py (config.yaml load/save)
- hermes_cli/main.py (session export with ensure_ascii=False)
- hermes_cli/status.py (jobs.json and sessions.json)

											
										
										
											2026-03-05 17:04:33 -05:00
+								                with open(cfg_path, encoding="utf-8") as _f:
-												feat: add ephemeral prefill messages and system prompt loading

- Implemented functionality to load ephemeral prefill messages from a JSON file, enhancing few-shot priming capabilities for the agent.
- Introduced a mechanism to load an ephemeral system prompt from environment variables or configuration files, ensuring dynamic prompt adjustments at API-call time.
- Updated the CLI and agent initialization to utilize the new prefill messages and system prompt, improving the overall interaction experience.
- Enhanced configuration options with new environment variables for prefill messages and system prompts, allowing for greater customization without persistence.

											
										
										
											2026-02-23 23:55:42 -08:00
+								                    cfg = _y.safe_load(_f) or {}
 								                return (cfg.get("agent", {}).get("system_prompt", "") or "").strip()
 								        except Exception:
 								            pass
 								        return ""
-												feat: add reasoning effort configuration for agent

- Introduced a new configuration option for reasoning effort in the CLI, allowing users to specify the level of reasoning the agent should perform before responding.
- Updated the CLI and agent initialization to incorporate the reasoning configuration, enhancing the agent's responsiveness and adaptability.
- Implemented logic to load reasoning effort from environment variables and configuration files, providing flexibility in agent behavior.
- Enhanced the documentation in the example configuration file to clarify the new reasoning effort options available.

											
										
										
											2026-02-24 03:30:19 -08:00
+								    @staticmethod
 								    def _load_reasoning_config() -> dict | None:
-												feat(gateway): add reasoning hot reload

Add a /reasoning command across gateway adapters so users can
inspect or change reasoning effort without editing config by hand.

Reload reasoning settings from config.yaml before each agent run,
including background tasks, so the next message picks up the new
value consistently.

											
										
										
											2026-03-11 22:12:11 +08:00
+								        """Load reasoning effort from config with env fallback.
 								        Checks agent.reasoning_effort in config.yaml first, then
 								        HERMES_REASONING_EFFORT as a fallback. Valid: "xhigh", "high",
 								        "medium", "low", "minimal", "none". Returns None to use default
 								        (medium).
-												feat: add reasoning effort configuration for agent

- Introduced a new configuration option for reasoning effort in the CLI, allowing users to specify the level of reasoning the agent should perform before responding.
- Updated the CLI and agent initialization to incorporate the reasoning configuration, enhancing the agent's responsiveness and adaptability.
- Implemented logic to load reasoning effort from environment variables and configuration files, providing flexibility in agent behavior.
- Enhanced the documentation in the example configuration file to clarify the new reasoning effort options available.

											
										
										
											2026-02-24 03:30:19 -08:00
+								        """
-												refactor: consolidate get_hermes_home() and parse_reasoning_effort() (#3062)

Centralizes two widely-duplicated patterns into hermes_constants.py:

1. get_hermes_home() — Path resolution for ~/.hermes (HERMES_HOME env var)
   - Was copy-pasted inline across 30+ files as:
     Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
   - Now defined once in hermes_constants.py (zero-dependency module)
   - hermes_cli/config.py re-exports it for backward compatibility
   - Removed local wrapper functions in honcho_integration/client.py,
     tools/website_policy.py, tools/tirith_security.py, hermes_cli/uninstall.py

2. parse_reasoning_effort() — Reasoning effort string validation
   - Was copy-pasted in cli.py, gateway/run.py, cron/scheduler.py
   - Same validation logic: check against (xhigh, high, medium, low, minimal, none)
   - Now defined once in hermes_constants.py, called from all 3 locations
   - Warning log for unknown values kept at call sites (context-specific)

31 files changed, net +31 lines (125 insertions, 94 deletions)
Full test suite: 6179 passed, 0 failed
											
										
										
											2026-03-25 15:54:28 -07:00
+								        from hermes_constants import parse_reasoning_effort
-												feat(gateway): add reasoning hot reload

Add a /reasoning command across gateway adapters so users can
inspect or change reasoning effort without editing config by hand.

Reload reasoning settings from config.yaml before each agent run,
including background tasks, so the next message picks up the new
value consistently.

											
										
										
											2026-03-11 22:12:11 +08:00
+								        effort = ""
 								        try:
 								            import yaml as _y
 								            cfg_path = _hermes_home / "config.yaml"
 								            if cfg_path.exists():
 								                with open(cfg_path, encoding="utf-8") as _f:
 								                    cfg = _y.safe_load(_f) or {}
 								                effort = str(cfg.get("agent", {}).get("reasoning_effort", "") or "").strip()
 								        except Exception:
 								            pass
-												feat: add reasoning effort configuration for agent

- Introduced a new configuration option for reasoning effort in the CLI, allowing users to specify the level of reasoning the agent should perform before responding.
- Updated the CLI and agent initialization to incorporate the reasoning configuration, enhancing the agent's responsiveness and adaptability.
- Implemented logic to load reasoning effort from environment variables and configuration files, providing flexibility in agent behavior.
- Enhanced the documentation in the example configuration file to clarify the new reasoning effort options available.

											
										
										
											2026-02-24 03:30:19 -08:00
+								        if not effort:
-												feat(gateway): add reasoning hot reload

Add a /reasoning command across gateway adapters so users can
inspect or change reasoning effort without editing config by hand.

Reload reasoning settings from config.yaml before each agent run,
including background tasks, so the next message picks up the new
value consistently.

											
										
										
											2026-03-11 22:12:11 +08:00
+								            effort = os.getenv("HERMES_REASONING_EFFORT", "")
-												refactor: consolidate get_hermes_home() and parse_reasoning_effort() (#3062)

Centralizes two widely-duplicated patterns into hermes_constants.py:

1. get_hermes_home() — Path resolution for ~/.hermes (HERMES_HOME env var)
   - Was copy-pasted inline across 30+ files as:
     Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
   - Now defined once in hermes_constants.py (zero-dependency module)
   - hermes_cli/config.py re-exports it for backward compatibility
   - Removed local wrapper functions in honcho_integration/client.py,
     tools/website_policy.py, tools/tirith_security.py, hermes_cli/uninstall.py

2. parse_reasoning_effort() — Reasoning effort string validation
   - Was copy-pasted in cli.py, gateway/run.py, cron/scheduler.py
   - Same validation logic: check against (xhigh, high, medium, low, minimal, none)
   - Now defined once in hermes_constants.py, called from all 3 locations
   - Warning log for unknown values kept at call sites (context-specific)

31 files changed, net +31 lines (125 insertions, 94 deletions)
Full test suite: 6179 passed, 0 failed
											
										
										
											2026-03-25 15:54:28 -07:00
+								        result = parse_reasoning_effort(effort)
 								        if effort and effort.strip() and result is None:
 								            logger.warning("Unknown reasoning_effort '%s', using default (medium)", effort)
 								        return result
-												feat: add reasoning effort configuration for agent

- Introduced a new configuration option for reasoning effort in the CLI, allowing users to specify the level of reasoning the agent should perform before responding.
- Updated the CLI and agent initialization to incorporate the reasoning configuration, enhancing the agent's responsiveness and adaptability.
- Implemented logic to load reasoning effort from environment variables and configuration files, providing flexibility in agent behavior.
- Enhanced the documentation in the example configuration file to clarify the new reasoning effort options available.

											
										
										
											2026-02-24 03:30:19 -08:00
-												fix: /reasoning command — add gateway support, fix display, persist settings (#1031)

* fix: /reasoning command output ordering, display, and inline think extraction

Three issues with the /reasoning command:

1. Output interleaving: The command echo used print() while feedback
   used _cprint(), causing them to render out-of-order under
   prompt_toolkit's patch_stdout. Changed echo to use _cprint() so
   all output renders through the same path in correct order.

2. Reasoning display not working: /reasoning show toggled a flag
   but reasoning never appeared for models that embed thinking in
   inline <think> blocks rather than structured API fields. Added
   fallback extraction in _build_assistant_message to capture
   <think> block content as reasoning when no structured reasoning
   fields (reasoning, reasoning_content, reasoning_details) are
   present. This feeds into both the reasoning callback (during
   tool loops) and the post-response reasoning box display.

3. Feedback clarity: Added checkmarks to confirm actions, persisted
   show/hide to config (was session-only before), and aligned the
   status display for readability.

Tests: 7 new tests for inline think block extraction (41 total).

* feat: add /reasoning command to gateway (Telegram/Discord/etc)

The /reasoning command only existed in the CLI — messaging platforms
had no way to view or change reasoning settings. This adds:

1. /reasoning command handler in the gateway:
   - No args: shows current effort level and display state
   - /reasoning <level>: sets reasoning effort (none/low/medium/high/xhigh)
   - /reasoning show|hide: toggles reasoning display in responses
   - All changes saved to config.yaml immediately

2. Reasoning display in gateway responses:
   - When show_reasoning is enabled, prepends a 'Reasoning' block
     with the model's last_reasoning content before the response
   - Collapses long reasoning (>15 lines) to keep messages readable
   - Uses last_reasoning from run_conversation result dict

3. Plumbing:
   - Added _show_reasoning attribute loaded from config at startup
   - Propagated last_reasoning through _run_agent return dict
   - Added /reasoning to help text and known_commands set
   - Uses getattr for _show_reasoning to handle test stubs
											
										
										
											2026-03-12 05:38:19 -07:00
+								    @staticmethod
 								    def _load_show_reasoning() -> bool:
 								        """Load show_reasoning toggle from config.yaml display section."""
 								        try:
 								            import yaml as _y
 								            cfg_path = _hermes_home / "config.yaml"
 								            if cfg_path.exists():
 								                with open(cfg_path, encoding="utf-8") as _f:
 								                    cfg = _y.safe_load(_f) or {}
 								                return bool(cfg.get("display", {}).get("show_reasoning", False))
 								        except Exception:
 								            pass
 								        return False
-												feat(gateway): configurable background process watcher notifications

Add display.background_process_notifications config option to control
how chatty the gateway process watcher is when using
terminal(background=true, check_interval=...) from messaging platforms.

Modes:
  - all:    running-output updates + final message (default, current behavior)
  - result: only the final completion message
  - error:  only the final message when exit code != 0
  - off:    no watcher messages at all

Also supports HERMES_BACKGROUND_NOTIFICATIONS env var override.

Includes 12 tests (5 config loading + 7 watcher behavior).

Inspired by @PeterFile's PR #593. Closes #592.

											
										
										
											2026-03-10 04:12:39 -07:00
+								    @staticmethod
 								    def _load_background_notifications_mode() -> str:
 								        """Load background process notification mode from config or env var.
 								        Modes:
 								          - ``all``    — push running-output updates *and* the final message (default)
 								          - ``result`` — only the final completion message (regardless of exit code)
 								          - ``error``  — only the final message when exit code is non-zero
 								          - ``off``    — no watcher messages at all
 								        """
 								        mode = os.getenv("HERMES_BACKGROUND_NOTIFICATIONS", "")
 								        if not mode:
 								            try:
 								                import yaml as _y
 								                cfg_path = _hermes_home / "config.yaml"
 								                if cfg_path.exists():
 								                    with open(cfg_path, encoding="utf-8") as _f:
 								                        cfg = _y.safe_load(_f) or {}
 								                    raw = cfg.get("display", {}).get("background_process_notifications")
 								                    if raw is False:
 								                        mode = "off"
 								                    elif raw not in (None, ""):
 								                        mode = str(raw)
 								            except Exception:
 								                pass
 								        mode = (mode or "all").strip().lower()
 								        valid = {"all", "result", "error", "off"}
 								        if mode not in valid:
 								            logger.warning(
 								                "Unknown background_process_notifications '%s', defaulting to 'all'",
 								                mode,
 								            )
 								            return "all"
 								        return mode
-												feat(provider-routing): add OpenRouter provider routing configuration

Introduced a new `provider_routing` section in the CLI configuration to control how requests are routed across providers when using OpenRouter. This includes options for sorting providers by throughput, latency, or price, as well as allowing or ignoring specific providers, setting the order of provider attempts, and managing data collection policies. Updated relevant classes and documentation to support these features, enhancing flexibility in provider selection.

											
										
										
											2026-03-01 18:24:27 -08:00
+								    @staticmethod
 								    def _load_provider_routing() -> dict:
 								        """Load OpenRouter provider routing preferences from config.yaml."""
 								        try:
 								            import yaml as _y
 								            cfg_path = _hermes_home / "config.yaml"
 								            if cfg_path.exists():
-												Add explicit encoding="utf-8" to all config/data file open() calls

On Windows, open() defaults to the system locale encoding (cp1252,
cp1254, etc.) rather than UTF-8. This breaks any file containing
non-ASCII characters, and also causes crashes when writing JSON with
ensure_ascii=False.

This adds encoding="utf-8" to open() calls in:
- gateway/run.py (config.yaml reads/writes throughout)
- gateway/config.py (gateway.json and config.yaml)
- hermes_cli/config.py (config.yaml load/save)
- hermes_cli/main.py (session export with ensure_ascii=False)
- hermes_cli/status.py (jobs.json and sessions.json)

											
										
										
											2026-03-05 17:04:33 -05:00
+								                with open(cfg_path, encoding="utf-8") as _f:
-												feat(provider-routing): add OpenRouter provider routing configuration

Introduced a new `provider_routing` section in the CLI configuration to control how requests are routed across providers when using OpenRouter. This includes options for sorting providers by throughput, latency, or price, as well as allowing or ignoring specific providers, setting the order of provider attempts, and managing data collection policies. Updated relevant classes and documentation to support these features, enhancing flexibility in provider selection.

											
										
										
											2026-03-01 18:24:27 -08:00
+								                    cfg = _y.safe_load(_f) or {}
 								                return cfg.get("provider_routing", {}) or {}
 								        except Exception:
 								            pass
 								        return {}
-												feat: simple fallback model for provider resilience

When the primary model/provider fails after retries (rate limit, overload,
auth errors, connection failures), Hermes automatically switches to a
configured fallback model for the remainder of the session.

Config (in ~/.hermes/config.yaml):

  fallback_model:
    provider: openrouter
    model: anthropic/claude-sonnet-4

Supports all major providers: OpenRouter, OpenAI, Nous, DeepSeek, Together,
Groq, Fireworks, Mistral, Gemini — plus custom endpoints via base_url and
api_key_env overrides.

Design principles:
- Dead simple: one fallback model, not a chain
- One-shot: switches once, doesn't ping-pong back
- Zero new dependencies: uses existing OpenAI client
- Minimal code: ~100 lines in run_agent.py, ~5 lines in cli.py/gateway
- Three trigger points: max retries exhausted, non-retryable client errors,
  and invalid response exhaustion

Does NOT trigger on context overflow or payload-too-large errors (those
are handled by the existing compression system).

Addresses #737.

25 new tests, 2492 total passing.

											
										
										
											2026-03-08 20:22:33 -07:00
+								    @staticmethod
 								    def _load_fallback_model() -> dict | None:
 								        """Load fallback model config from config.yaml.
 								        Returns a dict with 'provider' and 'model' keys, or None if
 								        not configured / both fields empty.
 								        """
 								        try:
 								            import yaml as _y
 								            cfg_path = _hermes_home / "config.yaml"
 								            if cfg_path.exists():
-												Merge PR #458: Add explicit UTF-8 encoding to config/data file I/O

Authored by shitcoinsherpa. Adds encoding='utf-8' to all text-mode
open() calls in gateway/run.py, gateway/config.py, hermes_cli/config.py,
hermes_cli/main.py, and hermes_cli/status.py. Prevents encoding errors
on Windows where the default locale is not UTF-8.

Also fixed 4 additional open() calls in gateway/run.py that were added
after the PR branch was created.

											
										
										
											2026-03-09 21:19:20 -07:00
+								                with open(cfg_path, encoding="utf-8") as _f:
-												feat: simple fallback model for provider resilience

When the primary model/provider fails after retries (rate limit, overload,
auth errors, connection failures), Hermes automatically switches to a
configured fallback model for the remainder of the session.

Config (in ~/.hermes/config.yaml):

  fallback_model:
    provider: openrouter
    model: anthropic/claude-sonnet-4

Supports all major providers: OpenRouter, OpenAI, Nous, DeepSeek, Together,
Groq, Fireworks, Mistral, Gemini — plus custom endpoints via base_url and
api_key_env overrides.

Design principles:
- Dead simple: one fallback model, not a chain
- One-shot: switches once, doesn't ping-pong back
- Zero new dependencies: uses existing OpenAI client
- Minimal code: ~100 lines in run_agent.py, ~5 lines in cli.py/gateway
- Three trigger points: max retries exhausted, non-retryable client errors,
  and invalid response exhaustion

Does NOT trigger on context overflow or payload-too-large errors (those
are handled by the existing compression system).

Addresses #737.

25 new tests, 2492 total passing.

											
										
										
											2026-03-08 20:22:33 -07:00
+								                    cfg = _y.safe_load(_f) or {}
 								                fb = cfg.get("fallback_model", {}) or {}
 								                if fb.get("provider") and fb.get("model"):
 								                    return fb
 								        except Exception:
 								            pass
 								        return None
-												fix: hermes update causes dual gateways on macOS (launchd) (#1567)

* feat: add optional smart model routing

Add a conservative cheap-vs-strong routing option that can send very short/simple turns to a cheaper model across providers while keeping the primary model for complex work. Wire it through CLI, gateway, and cron, and document the config.yaml workflow.

* fix(gateway): remove recursive ExecStop from systemd units, extend TimeoutStopSec to 60s

* fix(gateway): avoid recursive ExecStop in user systemd unit

* fix: extend ExecStop removal and TimeoutStopSec=60 to system unit

The cherry-picked PR #1448 fix only covered the user systemd unit.
The system unit had the same TimeoutStopSec=15 and could benefit
from the same 60s timeout for clean shutdown. Also adds a regression
test for the system unit.

---------

Co-authored-by: Ninja <ninja@local>

* feat(skills): add blender-mcp optional skill for 3D modeling

Control a running Blender instance from Hermes via socket connection
to the blender-mcp addon (port 9876). Supports creating 3D objects,
materials, animations, and running arbitrary bpy code.

Placed in optional-skills/ since it requires Blender 4.3+ desktop
with a third-party addon manually started each session.

* feat(acp): support slash commands in ACP adapter (#1532)

Adds /help, /model, /tools, /context, /reset, /compact, /version
to the ACP adapter (VS Code, Zed, JetBrains). Commands are handled
directly in the server without instantiating the TUI — each command
queries agent/session state and returns plain text.

Unrecognized /commands fall through to the LLM as normal messages.

/model uses detect_provider_for_model() for auto-detection when
switching models, matching the CLI and gateway behavior.

Fixes #1402

* fix(logging): improve error logging in session search tool (#1533)

* fix(gateway): restart on retryable startup failures (#1517)

* feat(email): add skip_attachments option via config.yaml

* feat(email): add skip_attachments option via config.yaml

Adds a config.yaml-driven option to skip email attachments in the
gateway email adapter. Useful for malware protection and bandwidth
savings.

Configure in config.yaml:
  platforms:
    email:
      skip_attachments: true

Based on PR #1521 by @an420eth, changed from env var to config.yaml
(via PlatformConfig.extra) to match the project's config-first pattern.

* docs: document skip_attachments option for email adapter

* fix(telegram): retry on transient TLS failures during connect and send

Add exponential-backoff retry (3 attempts) around initialize() to
handle transient TLS resets during gateway startup. Also catches
TimedOut and OSError in addition to NetworkError.

Add exponential-backoff retry (3 attempts) around send_message() for
NetworkError during message delivery, wrapping the existing Markdown
fallback logic.

Both imports are guarded with try/except ImportError for test
environments where telegram is mocked.

Based on PR #1527 by cmd8. Closes #1526.

* feat: permissive block_anchor thresholds and unicode normalization (#1539)

Salvaged from PR #1528 by an420eth. Closes #517.

Improves _strategy_block_anchor in fuzzy_match.py:
- Add unicode normalization (smart quotes, em/en-dashes, ellipsis,
  non-breaking spaces → ASCII) so LLM-produced unicode artifacts
  don't break anchor line matching
- Lower thresholds: 0.10 for unique matches (was 0.70), 0.30 for
  multiple candidates — if first/last lines match exactly, the
  block is almost certainly correct
- Use original (non-normalized) content for offset calculation to
  preserve correct character positions

Tested: 3 new scenarios fixed (em-dash anchors, non-breaking space
anchors, very-low-similarity unique matches), zero regressions on
all 9 existing fuzzy match tests.

Co-authored-by: an420eth <an420eth@users.noreply.github.com>

* feat(cli): add file path autocomplete in the input prompt (#1545)

When typing a path-like token (./  ../  ~/  /  or containing /),
the CLI now shows filesystem completions in the dropdown menu.
Directories show a trailing slash and 'dir' label; files show
their size. Completions are case-insensitive and capped at 30
entries.

Triggered by tokens like:
  edit ./src/ma     → shows ./src/main.py, ./src/manifest.json, ...
  check ~/doc       → shows ~/docs/, ~/documents/, ...
  read /etc/hos     → shows /etc/hosts, /etc/hostname, ...
  open tools/reg    → shows tools/registry.py

Slash command autocomplete (/help, /model, etc.) is unaffected —
it still triggers when the input starts with /.

Inspired by OpenCode PR #145 (file path completion menu).

Implementation:
- hermes_cli/commands.py: _extract_path_word() detects path-like
  tokens, _path_completions() yields filesystem Completions with
  size labels, get_completions() routes to paths vs slash commands
- tests/hermes_cli/test_path_completion.py: 26 tests covering
  path extraction, prefix filtering, directory markers, home
  expansion, case-insensitivity, integration with slash commands

* feat(privacy): redact PII from LLM context when privacy.redact_pii is enabled

Add privacy.redact_pii config option (boolean, default false). When
enabled, the gateway redacts personally identifiable information from
the system prompt before sending it to the LLM provider:

- Phone numbers (user IDs on WhatsApp/Signal) → hashed to user_<sha256>
- User IDs → hashed to user_<sha256>
- Chat IDs → numeric portion hashed, platform prefix preserved
- Home channel IDs → hashed
- Names/usernames → NOT affected (user-chosen, publicly visible)

Hashes are deterministic (same user → same hash) so the model can
still distinguish users in group chats. Routing and delivery use
the original values internally — redaction only affects LLM context.

Inspired by OpenClaw PR #47959.

* fix(privacy): skip PII redaction on Discord/Slack (mentions need real IDs)

Discord uses <@user_id> for mentions and Slack uses <@U12345> — the LLM
needs the real ID to tag users. Redaction now only applies to WhatsApp,
Signal, and Telegram where IDs are pure routing metadata.

Add 4 platform-specific tests covering Discord, WhatsApp, Signal, Slack.

* feat: smart approvals + /stop command (inspired by OpenAI Codex)

* feat: smart approvals — LLM-based risk assessment for dangerous commands

Adds a 'smart' approval mode that uses the auxiliary LLM to assess
whether a flagged command is genuinely dangerous or a false positive,
auto-approving low-risk commands without prompting the user.

Inspired by OpenAI Codex's Smart Approvals guardian subagent
(openai/codex#13860).

Config (config.yaml):
  approvals:
    mode: manual   # manual (default), smart, off

Modes:
- manual — current behavior, always prompt the user
- smart  — aux LLM evaluates risk: APPROVE (auto-allow), DENY (block),
           or ESCALATE (fall through to manual prompt)
- off    — skip all approval prompts (equivalent to --yolo)

When smart mode auto-approves, the pattern gets session-level approval
so subsequent uses of the same pattern don't trigger another LLM call.
When it denies, the command is blocked without user prompt. When
uncertain, it escalates to the normal manual approval flow.

The LLM prompt is carefully scoped: it sees only the command text and
the flagged reason, assesses actual risk vs false positive, and returns
a single-word verdict.

* feat: make smart approval model configurable via config.yaml

Adds auxiliary.approval section to config.yaml with the same
provider/model/base_url/api_key pattern as other aux tasks (vision,
web_extract, compression, etc.).

Config:
  auxiliary:
    approval:
      provider: auto
      model: ''        # fast/cheap model recommended
      base_url: ''
      api_key: ''

Bridged to env vars in both CLI and gateway paths so the aux client
picks them up automatically.

* feat: add /stop command to kill all background processes

Adds a /stop slash command that kills all running background processes
at once. Currently users have to process(list) then process(kill) for
each one individually.

Inspired by OpenAI Codex's separation of interrupt (Ctrl+C stops current
turn) from /stop (cleans up background processes). See openai/codex#14602.

Ctrl+C continues to only interrupt the active agent turn — background
dev servers, watchers, etc. are preserved. /stop is the explicit way
to clean them all up.

* feat: first-class plugin architecture + hide status bar cost by default (#1544)

The persistent status bar now shows context %, token counts, and
duration but NOT $ cost by default. Cost display is opt-in via:

  display:
    show_cost: true

in config.yaml, or: hermes config set display.show_cost true

The /usage command still shows full cost breakdown since the user
explicitly asked for it — this only affects the always-visible bar.

Status bar without cost:
  ⚕ claude-sonnet-4 │ 12K/200K │ 6% │ 15m

Status bar with show_cost: true:
  ⚕ claude-sonnet-4 │ 12K/200K │ 6% │ $0.06 │ 15m

* feat: improve memory prioritization + aggressive skill updates (inspired by OpenAI Codex)

* feat: improve memory prioritization — user preferences over procedural knowledge

Inspired by OpenAI Codex's memory prompt improvements (openai/codex#14493)
which focus memory writes on user preferences and recurring patterns
rather than procedural task details.

Key insight: 'Optimize for reducing future user steering — the most
valuable memory prevents the user from having to repeat themselves.'

Changes:
- MEMORY_GUIDANCE (prompt_builder.py): added prioritization hierarchy
  and the core principle about reducing user steering
- MEMORY_SCHEMA (memory_tool.py): reordered WHEN TO SAVE list to put
  corrections first, added explicit PRIORITY guidance
- Memory nudge (run_agent.py): now asks specifically about preferences,
  corrections, and workflow patterns instead of generic 'anything'
- Memory flush (run_agent.py): now instructs to prioritize user
  preferences and corrections over task-specific details

* feat: more aggressive skill creation and update prompting

Press harder on skill updates — the agent should proactively patch
skills when it encounters issues during use, not wait to be asked.

Changes:
- SKILLS_GUIDANCE: 'consider saving' → 'save'; added explicit instruction
  to patch skills immediately when found outdated/wrong
- Skills header: added instruction to update loaded skills before finishing
  if they had missing steps or wrong commands
- Skill nudge: more assertive ('save the approach' not 'consider saving'),
  now also prompts for updating existing skills used in the task
- Skill nudge interval: lowered default from 15 to 10 iterations
- skill_manage schema: added 'patch it immediately' to update triggers

* feat: first-class plugin architecture (#1555)

Plugin system for extending Hermes with custom tools, hooks, and
integrations — no source code changes required.

Core system (hermes_cli/plugins.py):
  - Plugin discovery from ~/.hermes/plugins/, .hermes/plugins/, and
    pip entry_points (hermes_agent.plugins group)
  - PluginContext with register_tool() and register_hook()
  - 6 lifecycle hooks: pre/post tool_call, pre/post llm_call,
    on_session_start/end
  - Namespace package handling for relative imports in plugins
  - Graceful error isolation — broken plugins never crash the agent

Integration (model_tools.py):
  - Plugin discovery runs after built-in + MCP tools
  - Plugin tools bypass toolset filter via get_plugin_tool_names()
  - Pre/post tool call hooks fire in handle_function_call()

CLI:
  - /plugins command shows loaded plugins, tool counts, status
  - Added to COMMANDS dict for autocomplete

Docs:
  - Getting started guide (build-a-hermes-plugin.md) — full tutorial
    building a calculator plugin step by step
  - Reference page (features/plugins.md) — quick overview + tables
  - Covers: file structure, schemas, handlers, hooks, data files,
    bundled skills, env var gating, pip distribution, common mistakes

Tests: 16 tests covering discovery, loading, hooks, tool visibility.

* fix: hermes update causes dual gateways on macOS (launchd)

Three bugs worked together to create the dual-gateway problem:

1. cmd_update only checked systemd for gateway restart, completely
   ignoring launchd on macOS. After killing the PID it would print
   'Restart it with: hermes gateway run' even when launchd was about
   to auto-respawn the process.

2. launchd's KeepAlive.SuccessfulExit=false respawns the gateway
   after SIGTERM (non-zero exit), so the user's manual restart
   created a second instance.

3. The launchd plist lacked --replace (systemd had it), so the
   respawned gateway didn't kill stale instances on startup.

Fixes:
- Add --replace to launchd ProgramArguments (matches systemd)
- Add launchd detection to cmd_update's auto-restart logic
- Print 'auto-restart via launchd' instead of manual restart hint

* fix: add launchd plist auto-refresh + explicit restart in cmd_update

Two integration issues with the initial fix:

1. Existing macOS users with old plist (no --replace) would never
   get the fix until manual uninstall/reinstall. Added
   refresh_launchd_plist_if_needed() — mirrors the existing
   refresh_systemd_unit_if_needed(). Called from launchd_start(),
   launchd_restart(), and cmd_update.

2. cmd_update relied on KeepAlive respawn after SIGTERM rather than
   explicit launchctl stop/start. This caused races: launchd would
   respawn the old process before the PID file was cleaned up.
   Now does explicit stop+start (matching how systemd gets an
   explicit systemctl restart), with plist refresh first so the
   new --replace flag is picked up.

---------

Co-authored-by: Ninja <ninja@local>
Co-authored-by: alireza78a <alireza78a@users.noreply.github.com>
Co-authored-by: Oktay Aydin <113846926+aydnOktay@users.noreply.github.com>
Co-authored-by: JP Lew <polydegen@protonmail.com>
Co-authored-by: an420eth <an420eth@users.noreply.github.com>
											
										
										
											2026-03-16 12:36:29 -07:00
+								    @staticmethod
 								    def _load_smart_model_routing() -> dict:
 								        """Load optional smart cheap-vs-strong model routing config."""
 								        try:
 								            import yaml as _y
 								            cfg_path = _hermes_home / "config.yaml"
 								            if cfg_path.exists():
 								                with open(cfg_path, encoding="utf-8") as _f:
 								                    cfg = _y.safe_load(_f) or {}
 								                return cfg.get("smart_model_routing", {}) or {}
 								        except Exception:
 								            pass
 								        return {}
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								    async def start(self) -> bool:
 								        """
 								        Start the gateway and all configured platform adapters.
 								        Returns True if at least one adapter connected successfully.
 								        """
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								        logger.info("Starting Hermes Gateway...")
 								        logger.info("Session storage: %s", self.config.sessions_dir)
-												fix: preserve current gateway update and startup behavior

Follow up on salvaged PR #1052.
Restore current-main gateway lifecycle handling after conflict resolution and
adapt the update fallback to use shell-quoted argv parts safely.

											
										
										
											2026-03-14 18:03:50 -07:00
+								        try:
 								            from gateway.status import write_runtime_status
 								            write_runtime_status(gateway_state="starting", exit_reason=None)
 								        except Exception:
 								            pass
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
-												refactor: deduplicate toolsets, unify async bridging, fix approval race condition, harden security

- Replace 4 copy-pasted messaging platform toolsets with shared _HERMES_CORE_TOOLS list
- Consolidate 5 ad-hoc async-bridging patterns into single _run_async() in model_tools.py
  - Removes deprecated get_event_loop()/set_event_loop() calls
  - Makes all tool handlers self-protecting regardless of caller's event loop state
  - RL handler refactored from if/elif chain to dispatch dict
- Fix exec approval race condition: replace module-level globals with thread-safe
  per-session tools/approval.py (submit_pending, pop_pending, approve_session, is_approved)
  - Session A approving "rm" no longer approves it for all other sessions
- Fix config deep merge: user overriding tts.elevenlabs.voice_id no longer clobbers
  tts.elevenlabs.model_id; migration detection now recurses to arbitrary depth
- Gateway default-deny: unauthenticated users denied unless GATEWAY_ALLOW_ALL_USERS=true
- Add 10 dangerous command patterns: rm --recursive, bash -c, python -e, curl|bash,
  xargs rm, find -delete
- Sanitize gateway error messages: users see generic message, full traceback goes to logs

											
										
										
											2026-02-21 18:28:49 -08:00
+								        # Warn if no user allowlists are configured and open access is not opted in
 								        _any_allowlist = any(
 								            os.getenv(v)
 								            for v in ("TELEGRAM_ALLOWED_USERS", "DISCORD_ALLOWED_USERS",
 								                       "WHATSAPP_ALLOWED_USERS", "SLACK_ALLOWED_USERS",
-												fix(gateway): include per-platform ALLOW_ALL and SIGNAL_GROUP in startup allowlist check (#3313)

The startup warning 'No user allowlists configured' only checked
GATEWAY_ALLOW_ALL_USERS and per-platform _ALLOWED_USERS vars. It
missed SIGNAL_GROUP_ALLOWED_USERS and per-platform _ALLOW_ALL_USERS
vars (e.g. TELEGRAM_ALLOW_ALL_USERS), causing a false warning even
when users had these configured. The actual auth check in
_is_user_authorized already recognized these vars.

Cherry-picked from PR #3202 by binhnt92.

Co-authored-by: binhnt92 <binhnt.ht.92@gmail.com>
											
										
										
											2026-03-26 18:23:49 -07:00
+								                       "SIGNAL_ALLOWED_USERS", "SIGNAL_GROUP_ALLOWED_USERS",
 								                       "EMAIL_ALLOWED_USERS",
-												fix(gateway): add all missing platform allowlist env vars to startup warning check (#2628)

* fix(gateway): added MATRIX_ALLOWED_USERS to list of env vars checked by gateway

* fix(gateway): add all missing platform allowlist env vars to startup check

The startup warning for 'No user allowlists configured' was only checking
TELEGRAM, DISCORD, WHATSAPP, SLACK, and SMS — missing SIGNAL, EMAIL,
MATTERMOST, and DINGTALK. Users of those platforms would see a spurious
warning even with their platform-specific allowlist configured.

Now matches the canonical platform_env_map in _is_user_authorized().

---------

Co-authored-by: SteelPh0enix <wojciech_olech@hotmail.com>
											
										
										
											2026-03-23 07:19:14 -07:00
+								                       "SMS_ALLOWED_USERS", "MATTERMOST_ALLOWED_USERS",
 								                       "MATRIX_ALLOWED_USERS", "DINGTALK_ALLOWED_USERS",
-												revert: revert SMS (Telnyx) platform adapter for review

This reverts commit ef67037f8ee538ed6995655374ed994b560d4342.
											
										
										
											2026-03-17 02:53:30 -07:00
+								                       "GATEWAY_ALLOWED_USERS")
-												refactor: deduplicate toolsets, unify async bridging, fix approval race condition, harden security

- Replace 4 copy-pasted messaging platform toolsets with shared _HERMES_CORE_TOOLS list
- Consolidate 5 ad-hoc async-bridging patterns into single _run_async() in model_tools.py
  - Removes deprecated get_event_loop()/set_event_loop() calls
  - Makes all tool handlers self-protecting regardless of caller's event loop state
  - RL handler refactored from if/elif chain to dispatch dict
- Fix exec approval race condition: replace module-level globals with thread-safe
  per-session tools/approval.py (submit_pending, pop_pending, approve_session, is_approved)
  - Session A approving "rm" no longer approves it for all other sessions
- Fix config deep merge: user overriding tts.elevenlabs.voice_id no longer clobbers
  tts.elevenlabs.model_id; migration detection now recurses to arbitrary depth
- Gateway default-deny: unauthenticated users denied unless GATEWAY_ALLOW_ALL_USERS=true
- Add 10 dangerous command patterns: rm --recursive, bash -c, python -e, curl|bash,
  xargs rm, find -delete
- Sanitize gateway error messages: users see generic message, full traceback goes to logs

											
										
										
											2026-02-21 18:28:49 -08:00
+								        )
-												fix(gateway): include per-platform ALLOW_ALL and SIGNAL_GROUP in startup allowlist check (#3313)

The startup warning 'No user allowlists configured' only checked
GATEWAY_ALLOW_ALL_USERS and per-platform _ALLOWED_USERS vars. It
missed SIGNAL_GROUP_ALLOWED_USERS and per-platform _ALLOW_ALL_USERS
vars (e.g. TELEGRAM_ALLOW_ALL_USERS), causing a false warning even
when users had these configured. The actual auth check in
_is_user_authorized already recognized these vars.

Cherry-picked from PR #3202 by binhnt92.

Co-authored-by: binhnt92 <binhnt.ht.92@gmail.com>
											
										
										
											2026-03-26 18:23:49 -07:00
+								        _allow_all = os.getenv("GATEWAY_ALLOW_ALL_USERS", "").lower() in ("true", "1", "yes") or any(
 								            os.getenv(v, "").lower() in ("true", "1", "yes")
 								            for v in ("TELEGRAM_ALLOW_ALL_USERS", "DISCORD_ALLOW_ALL_USERS",
 								                       "WHATSAPP_ALLOW_ALL_USERS", "SLACK_ALLOW_ALL_USERS",
 								                       "SIGNAL_ALLOW_ALL_USERS", "EMAIL_ALLOW_ALL_USERS",
 								                       "SMS_ALLOW_ALL_USERS", "MATTERMOST_ALLOW_ALL_USERS",
 								                       "MATRIX_ALLOW_ALL_USERS", "DINGTALK_ALLOW_ALL_USERS")
 								        )
-												refactor: deduplicate toolsets, unify async bridging, fix approval race condition, harden security

- Replace 4 copy-pasted messaging platform toolsets with shared _HERMES_CORE_TOOLS list
- Consolidate 5 ad-hoc async-bridging patterns into single _run_async() in model_tools.py
  - Removes deprecated get_event_loop()/set_event_loop() calls
  - Makes all tool handlers self-protecting regardless of caller's event loop state
  - RL handler refactored from if/elif chain to dispatch dict
- Fix exec approval race condition: replace module-level globals with thread-safe
  per-session tools/approval.py (submit_pending, pop_pending, approve_session, is_approved)
  - Session A approving "rm" no longer approves it for all other sessions
- Fix config deep merge: user overriding tts.elevenlabs.voice_id no longer clobbers
  tts.elevenlabs.model_id; migration detection now recurses to arbitrary depth
- Gateway default-deny: unauthenticated users denied unless GATEWAY_ALLOW_ALL_USERS=true
- Add 10 dangerous command patterns: rm --recursive, bash -c, python -e, curl|bash,
  xargs rm, find -delete
- Sanitize gateway error messages: users see generic message, full traceback goes to logs

											
										
										
											2026-02-21 18:28:49 -08:00
+								        if not _any_allowlist and not _allow_all:
 								            logger.warning(
 								                "No user allowlists configured. All unauthorized users will be denied. "
 								                "Set GATEWAY_ALLOW_ALL_USERS=true in ~/.hermes/.env to allow open access, "
 								                "or configure platform allowlists (e.g., TELEGRAM_ALLOWED_USERS=your_id)."
 								            )
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
+								        # Discover and load event hooks
 								        self.hooks.discover_and_load()
-												Add background process management with process tool, wait, PTY, and stdin support

New process registry and tool for managing long-running background processes
across all terminal backends (local, Docker, Singularity, Modal, SSH).

Process Registry (tools/process_registry.py):
- ProcessSession tracking with rolling 200KB output buffer
- spawn_local() with optional PTY via ptyprocess for interactive CLIs
- spawn_via_env() for non-local backends (runs inside sandbox, never on host)
- Background reader threads per process (Popen stdout or PTY)
- wait() with timeout clamping, interrupt support, and transparent limit reporting
- JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery
- Module-level singleton shared across agent loop, gateway, and RL

Process Tool (model_tools.py):
- 7 actions: list, poll, log, wait, kill, write, submit
- Paired with terminal in all toolsets (CLI, messaging, RL)
- Timeout clamping with transparent notes in response

Terminal Tool Updates (tools/terminal_tool.py):
- Replaced nohup background mode with registry spawn (returns session_id)
- Added workdir parameter for per-command working directory
- Added check_interval parameter for gateway auto-check watchers
- Added pty parameter for interactive CLI tools (Codex, Claude Code)
- Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs
- Cleanup thread now respects active background processes (won't reap sandbox)

Gateway Integration (gateway/run.py, session.py, config.py):
- Session reset protection: sessions with active processes exempt from reset
- Default idle timeout increased from 2 hours to 24 hours
- from_dict fallback aligned to match (was 120, now 1440)
- session_key env var propagated to process registry for session mapping
- Crash recovery on gateway startup via checkpoint probe
- check_interval watcher: asyncio task polls process, delivers updates to platform

RL Safety (environments/):
- tool_context.py cleanup() kills background processes on episode end
- hermes_base_env.py warns when enabled_toolsets is None (loads all tools)
- Process tool safe in RL via wait() blocking the agent loop

Also:
- Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all])
- Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP
- Updated AGENTS.md with process management docs and project structure
- Updated README.md terminal section with process management overview

											
										
										
											2026-02-17 02:51:31 -08:00
+								        # Recover background processes from checkpoint (crash recovery)
 								        try:
 								            from tools.process_registry import process_registry
 								            recovered = process_registry.recover_from_checkpoint()
 								            if recovered:
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								                logger.info("Recovered %s background process(es) from previous run", recovered)
-												Add background process management with process tool, wait, PTY, and stdin support

New process registry and tool for managing long-running background processes
across all terminal backends (local, Docker, Singularity, Modal, SSH).

Process Registry (tools/process_registry.py):
- ProcessSession tracking with rolling 200KB output buffer
- spawn_local() with optional PTY via ptyprocess for interactive CLIs
- spawn_via_env() for non-local backends (runs inside sandbox, never on host)
- Background reader threads per process (Popen stdout or PTY)
- wait() with timeout clamping, interrupt support, and transparent limit reporting
- JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery
- Module-level singleton shared across agent loop, gateway, and RL

Process Tool (model_tools.py):
- 7 actions: list, poll, log, wait, kill, write, submit
- Paired with terminal in all toolsets (CLI, messaging, RL)
- Timeout clamping with transparent notes in response

Terminal Tool Updates (tools/terminal_tool.py):
- Replaced nohup background mode with registry spawn (returns session_id)
- Added workdir parameter for per-command working directory
- Added check_interval parameter for gateway auto-check watchers
- Added pty parameter for interactive CLI tools (Codex, Claude Code)
- Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs
- Cleanup thread now respects active background processes (won't reap sandbox)

Gateway Integration (gateway/run.py, session.py, config.py):
- Session reset protection: sessions with active processes exempt from reset
- Default idle timeout increased from 2 hours to 24 hours
- from_dict fallback aligned to match (was 120, now 1440)
- session_key env var propagated to process registry for session mapping
- Crash recovery on gateway startup via checkpoint probe
- check_interval watcher: asyncio task polls process, delivers updates to platform

RL Safety (environments/):
- tool_context.py cleanup() kills background processes on episode end
- hermes_base_env.py warns when enabled_toolsets is None (loads all tools)
- Process tool safe in RL via wait() blocking the agent loop

Also:
- Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all])
- Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP
- Updated AGENTS.md with process management docs and project structure
- Updated README.md terminal section with process management overview

											
										
										
											2026-02-17 02:51:31 -08:00
+								        except Exception as e:
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								            logger.warning("Process checkpoint recovery: %s", e)
-												Add background process management with process tool, wait, PTY, and stdin support

New process registry and tool for managing long-running background processes
across all terminal backends (local, Docker, Singularity, Modal, SSH).

Process Registry (tools/process_registry.py):
- ProcessSession tracking with rolling 200KB output buffer
- spawn_local() with optional PTY via ptyprocess for interactive CLIs
- spawn_via_env() for non-local backends (runs inside sandbox, never on host)
- Background reader threads per process (Popen stdout or PTY)
- wait() with timeout clamping, interrupt support, and transparent limit reporting
- JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery
- Module-level singleton shared across agent loop, gateway, and RL

Process Tool (model_tools.py):
- 7 actions: list, poll, log, wait, kill, write, submit
- Paired with terminal in all toolsets (CLI, messaging, RL)
- Timeout clamping with transparent notes in response

Terminal Tool Updates (tools/terminal_tool.py):
- Replaced nohup background mode with registry spawn (returns session_id)
- Added workdir parameter for per-command working directory
- Added check_interval parameter for gateway auto-check watchers
- Added pty parameter for interactive CLI tools (Codex, Claude Code)
- Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs
- Cleanup thread now respects active background processes (won't reap sandbox)

Gateway Integration (gateway/run.py, session.py, config.py):
- Session reset protection: sessions with active processes exempt from reset
- Default idle timeout increased from 2 hours to 24 hours
- from_dict fallback aligned to match (was 120, now 1440)
- session_key env var propagated to process registry for session mapping
- Crash recovery on gateway startup via checkpoint probe
- check_interval watcher: asyncio task polls process, delivers updates to platform

RL Safety (environments/):
- tool_context.py cleanup() kills background processes on episode end
- hermes_base_env.py warns when enabled_toolsets is None (loads all tools)
- Process tool safe in RL via wait() blocking the agent loop

Also:
- Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all])
- Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP
- Updated AGENTS.md with process management docs and project structure
- Updated README.md terminal section with process management overview

											
										
										
											2026-02-17 02:51:31 -08:00
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        connected_count = 0
-												fix(gateway): restart on retryable startup failures (#1517)
											
										
										
											2026-03-16 17:56:31 +05:30
+								        enabled_platform_count = 0
-												fix: preserve current gateway update and startup behavior

Follow up on salvaged PR #1052.
Restore current-main gateway lifecycle handling after conflict resolution and
adapt the update fallback to use shell-quoted argv parts safely.

											
										
										
											2026-03-14 18:03:50 -07:00
+								        startup_nonretryable_errors: list[str] = []
-												fix(gateway): restart on retryable startup failures (#1517)
											
										
										
											2026-03-16 17:56:31 +05:30
+								        startup_retryable_errors: list[str] = []
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
 								        # Initialize and connect each configured platform
 								        for platform, platform_config in self.config.platforms.items():
 								            if not platform_config.enabled:
 								                continue
-												fix(gateway): restart on retryable startup failures (#1517)
											
										
										
											2026-03-16 17:56:31 +05:30
+								            enabled_platform_count += 1
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
 								            adapter = self._create_adapter(platform, platform_config)
 								            if not adapter:
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								                logger.warning("No adapter available for %s", platform.value)
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								                continue
-												fix: preserve current gateway update and startup behavior

Follow up on salvaged PR #1052.
Restore current-main gateway lifecycle handling after conflict resolution and
adapt the update fallback to use shell-quoted argv parts safely.

											
										
										
											2026-03-14 18:03:50 -07:00
+								            # Set up message + fatal error handlers
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								            adapter.set_message_handler(self._handle_message)
-												fix: preserve current gateway update and startup behavior

Follow up on salvaged PR #1052.
Restore current-main gateway lifecycle handling after conflict resolution and
adapt the update fallback to use shell-quoted argv parts safely.

											
										
										
											2026-03-14 18:03:50 -07:00
+								            adapter.set_fatal_error_handler(self._handle_adapter_fatal_error)
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
 								            # Try to connect
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								            logger.info("Connecting to %s...", platform.value)
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								            try:
 								                success = await adapter.connect()
 								                if success:
 								                    self.adapters[platform] = adapter
-												fix: persist clean voice transcripts and /voice off state

- keep CLI voice prefixes API-local while storing the original user text
- persist explicit gateway off state and restore adapter auto-TTS suppression on restart
- add regression coverage for both behaviors

											
										
										
											2026-03-14 06:14:22 -07:00
+								                    self._sync_voice_mode_state_to_adapter(adapter)
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								                    connected_count += 1
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								                    logger.info("✓ %s connected", platform.value)
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								                else:
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								                    logger.warning("✗ %s failed to connect", platform.value)
-												fix(gateway): restart on retryable startup failures (#1517)
											
										
										
											2026-03-16 17:56:31 +05:30
+								                    if adapter.has_fatal_error:
 								                        target = (
 								                            startup_retryable_errors
 								                            if adapter.fatal_error_retryable
 								                            else startup_nonretryable_errors
 								                        )
 								                        target.append(
-												fix: preserve current gateway update and startup behavior

Follow up on salvaged PR #1052.
Restore current-main gateway lifecycle handling after conflict resolution and
adapt the update fallback to use shell-quoted argv parts safely.

											
										
										
											2026-03-14 18:03:50 -07:00
+								                            f"{platform.value}: {adapter.fatal_error_message}"
 								                        )
-												feat: auto-reconnect failed gateway platforms with exponential backoff (#2584)

When a messaging platform fails to connect at startup (e.g. transient DNS
failure) or disconnects at runtime with a retryable error, the gateway now
queues it for background reconnection instead of giving up permanently.

- New _platform_reconnect_watcher background task runs alongside the
  existing session expiry watcher
- Exponential backoff: 30s, 60s, 120s, 240s, 300s cap
- Max 20 retry attempts before giving up on a platform
- Non-retryable errors (bad auth token, etc.) are not retried
- Runtime disconnections via _handle_adapter_fatal_error now queue
  retryable failures instead of triggering gateway shutdown
- On successful reconnect, adapter is wired up and channel directory
  is rebuilt automatically

Fixes the case where a DNS blip during gateway startup caused Telegram
and Discord to be permanently unavailable until manual restart.
											
										
										
											2026-03-22 23:48:24 -07:00
+								                        # Queue for reconnection if the error is retryable
 								                        if adapter.fatal_error_retryable:
 								                            self._failed_platforms[platform] = {
 								                                "config": platform_config,
 								                                "attempts": 1,
 								                                "next_retry": time.monotonic() + 30,
 								                            }
-												fix(gateway): restart on retryable startup failures (#1517)
											
										
										
											2026-03-16 17:56:31 +05:30
+								                    else:
 								                        startup_retryable_errors.append(
 								                            f"{platform.value}: failed to connect"
 								                        )
-												feat: auto-reconnect failed gateway platforms with exponential backoff (#2584)

When a messaging platform fails to connect at startup (e.g. transient DNS
failure) or disconnects at runtime with a retryable error, the gateway now
queues it for background reconnection instead of giving up permanently.

- New _platform_reconnect_watcher background task runs alongside the
  existing session expiry watcher
- Exponential backoff: 30s, 60s, 120s, 240s, 300s cap
- Max 20 retry attempts before giving up on a platform
- Non-retryable errors (bad auth token, etc.) are not retried
- Runtime disconnections via _handle_adapter_fatal_error now queue
  retryable failures instead of triggering gateway shutdown
- On successful reconnect, adapter is wired up and channel directory
  is rebuilt automatically

Fixes the case where a DNS blip during gateway startup caused Telegram
and Discord to be permanently unavailable until manual restart.
											
										
										
											2026-03-22 23:48:24 -07:00
+								                        # No fatal error info means likely a transient issue — queue for retry
 								                        self._failed_platforms[platform] = {
 								                            "config": platform_config,
 								                            "attempts": 1,
 								                            "next_retry": time.monotonic() + 30,
 								                        }
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								            except Exception as e:
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								                logger.error("✗ %s error: %s", platform.value, e)
-												fix(gateway): restart on retryable startup failures (#1517)
											
										
										
											2026-03-16 17:56:31 +05:30
+								                startup_retryable_errors.append(f"{platform.value}: {e}")
-												feat: auto-reconnect failed gateway platforms with exponential backoff (#2584)

When a messaging platform fails to connect at startup (e.g. transient DNS
failure) or disconnects at runtime with a retryable error, the gateway now
queues it for background reconnection instead of giving up permanently.

- New _platform_reconnect_watcher background task runs alongside the
  existing session expiry watcher
- Exponential backoff: 30s, 60s, 120s, 240s, 300s cap
- Max 20 retry attempts before giving up on a platform
- Non-retryable errors (bad auth token, etc.) are not retried
- Runtime disconnections via _handle_adapter_fatal_error now queue
  retryable failures instead of triggering gateway shutdown
- On successful reconnect, adapter is wired up and channel directory
  is rebuilt automatically

Fixes the case where a DNS blip during gateway startup caused Telegram
and Discord to be permanently unavailable until manual restart.
											
										
										
											2026-03-22 23:48:24 -07:00
+								                # Unexpected exceptions are typically transient — queue for retry
 								                self._failed_platforms[platform] = {
 								                    "config": platform_config,
 								                    "attempts": 1,
 								                    "next_retry": time.monotonic() + 30,
 								                }
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
 								        if connected_count == 0:
-												fix: preserve current gateway update and startup behavior

Follow up on salvaged PR #1052.
Restore current-main gateway lifecycle handling after conflict resolution and
adapt the update fallback to use shell-quoted argv parts safely.

											
										
										
											2026-03-14 18:03:50 -07:00
+								            if startup_nonretryable_errors:
 								                reason = "; ".join(startup_nonretryable_errors)
 								                logger.error("Gateway hit a non-retryable startup conflict: %s", reason)
 								                try:
 								                    from gateway.status import write_runtime_status
 								                    write_runtime_status(gateway_state="startup_failed", exit_reason=reason)
 								                except Exception:
 								                    pass
 								                self._request_clean_exit(reason)
 								                return True
-												fix(gateway): restart on retryable startup failures (#1517)
											
										
										
											2026-03-16 17:56:31 +05:30
+								            if enabled_platform_count > 0:
 								                reason = "; ".join(startup_retryable_errors) or "all configured messaging platforms failed to connect"
 								                logger.error("Gateway failed to connect any configured messaging platform: %s", reason)
 								                try:
 								                    from gateway.status import write_runtime_status
 								                    write_runtime_status(gateway_state="startup_failed", exit_reason=reason)
 								                except Exception:
 								                    pass
 								                return False
 								            logger.warning("No messaging platforms enabled.")
-												refactor: streamline cron job handling and update CLI commands

- Removed legacy cron daemon functionality, integrating cron job execution directly into the gateway process for improved efficiency.
- Updated CLI commands to reflect changes, replacing `hermes cron daemon` with `hermes cron status` and enhancing documentation for cron job management.
- Clarified messaging in the README and other documentation regarding the gateway's role in managing cron jobs.
- Removed obsolete terminal_hecate tool and related configurations to simplify the codebase.

											
										
										
											2026-02-21 16:21:19 -08:00
+								            logger.info("Gateway will continue running for cron job execution.")
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
 								        # Update delivery router with adapters
 								        self.delivery_router.adapters = self.adapters
 								        self._running = True
-												fix: preserve current gateway update and startup behavior

Follow up on salvaged PR #1052.
Restore current-main gateway lifecycle handling after conflict resolution and
adapt the update fallback to use shell-quoted argv parts safely.

											
										
										
											2026-03-14 18:03:50 -07:00
+								        try:
 								            from gateway.status import write_runtime_status
 								            write_runtime_status(gateway_state="running", exit_reason=None)
 								        except Exception:
 								            pass
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
 								        # Emit gateway:startup hook
 								        hook_count = len(self.hooks.loaded_hooks)
 								        if hook_count:
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								            logger.info("%s hook(s) loaded", hook_count)
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
+								        await self.hooks.emit("gateway:startup", {
 								            "platforms": [p.value for p in self.adapters.keys()],
 								        })
-												refactor: streamline cron job handling and update CLI commands

- Removed legacy cron daemon functionality, integrating cron job execution directly into the gateway process for improved efficiency.
- Updated CLI commands to reflect changes, replacing `hermes cron daemon` with `hermes cron status` and enhancing documentation for cron job management.
- Clarified messaging in the README and other documentation regarding the gateway's role in managing cron jobs.
- Removed obsolete terminal_hecate tool and related configurations to simplify the codebase.

											
										
										
											2026-02-21 16:21:19 -08:00
+								        if connected_count > 0:
 								            logger.info("Gateway running with %s platform(s)", connected_count)
-												feat: implement channel directory and message mirroring for cross-platform communication

- Introduced a new channel directory to cache reachable channels/contacts for messaging platforms, enhancing the send_message tool's ability to resolve human-friendly names to numeric IDs.
- Added functionality to mirror sent messages into the target's session transcript, providing context for cross-platform message delivery.
- Updated the send_message tool to support listing available targets and improved error handling for channel resolution.
- Enhanced the gateway to build and refresh the channel directory during startup and at regular intervals, ensuring up-to-date channel information.

											
										
										
											2026-02-22 20:44:15 -08:00
 								        # Build initial channel directory for send_message name resolution
 								        try:
 								            from gateway.channel_directory import build_channel_directory
 								            directory = build_channel_directory(self.adapters)
 								            ch_count = sum(len(chs) for chs in directory.get("platforms", {}).values())
 								            logger.info("Channel directory built: %d target(s)", ch_count)
 								        except Exception as e:
 								            logger.warning("Channel directory build failed: %s", e)
-												fix: notify gateway users when updates finish or fail

											
										
										
											2026-03-11 20:43:31 +01:00
+								        # Check if we're restarting after a /update command. If the update is
 								        # still running, keep watching so we notify once it actually finishes.
 								        notified = await self._send_update_notification()
 								        if not notified and any(
 								            path.exists()
 								            for path in (
 								                _hermes_home / ".update_pending.json",
 								                _hermes_home / ".update_pending.claimed.json",
 								            )
 								        ):
 								            self._schedule_update_notification_watch()
-												feat: add /update slash command for gateway platforms

Adds a /update command to Telegram, Discord, and other gateway platforms
that runs `hermes update` to pull the latest code, update dependencies,
sync skills, and restart the gateway.

Implementation:
- Spawns `hermes update` in a separate systemd scope (systemd-run --user
  --scope) so the process survives the gateway restart that hermes update
  triggers at the end. Falls back to nohup if systemd-run is unavailable.
- Writes a marker file (.update_pending.json) with the originating
  platform and chat_id before spawning the update.
- On gateway startup, _send_update_notification() checks for the marker,
  reads the captured update output, sends the results back to the user,
  and cleans up.

Also:
- Registers /update as a Discord slash command
- Updates README.md, docs/messaging.md, docs/slash-commands.md
- Adds 18 tests covering handler, notification, and edge cases

											
										
										
											2026-03-05 01:20:58 -08:00
-												fix(gateway): persist watcher metadata in checkpoint for crash recovery (#1706)

Salvaged from PR #1573 by @eren-karakus0. Cherry-picked with authorship preserved.

Fixes #1143 — background process notifications resume after gateway restart.

Co-authored-by: Muhammet Eren Karakuş <erenkar950@gmail.com>
											
										
										
											2026-03-17 03:52:15 -07:00
+								        # Drain any recovered process watchers (from crash recovery checkpoint)
 								        try:
 								            from tools.process_registry import process_registry
 								            while process_registry.pending_watchers:
 								                watcher = process_registry.pending_watchers.pop(0)
 								                asyncio.create_task(self._run_process_watcher(watcher))
 								                logger.info("Resumed watcher for recovered process %s", watcher.get("session_id"))
 								        except Exception as e:
 								            logger.error("Recovered watcher setup error: %s", e)
-												feat(gateway): proactive async memory flush on session expiry

Previously, when a session expired (idle/daily reset), the memory flush
ran synchronously inside get_or_create_session — blocking the user's
message for 10-60s while an LLM call saved memories.

Now a background watcher task (_session_expiry_watcher) runs every 5 min,
detects expired sessions, and flushes memories proactively in a thread
pool.  By the time the user sends their next message, memories are
already saved and the response is immediate.

Changes:
- Add _is_session_expired(entry) to SessionStore — works from entry
  alone without needing a SessionSource
- Add _pre_flushed_sessions set to track already-flushed sessions
- Remove sync _on_auto_reset callback from get_or_create_session
- Refactor flush into _flush_memories_for_session (sync worker) +
  _async_flush_memories (thread pool wrapper)
- Add _session_expiry_watcher background task, started in start()
- Simplify /reset command to use shared fire-and-forget flush
- Add 10 tests for expiry detection, callback removal, tracking

											
										
										
											2026-03-07 11:27:50 -08:00
+								        # Start background session expiry watcher for proactive memory flushing
 								        asyncio.create_task(self._session_expiry_watcher())
-												feat: auto-reconnect failed gateway platforms with exponential backoff (#2584)

When a messaging platform fails to connect at startup (e.g. transient DNS
failure) or disconnects at runtime with a retryable error, the gateway now
queues it for background reconnection instead of giving up permanently.

- New _platform_reconnect_watcher background task runs alongside the
  existing session expiry watcher
- Exponential backoff: 30s, 60s, 120s, 240s, 300s cap
- Max 20 retry attempts before giving up on a platform
- Non-retryable errors (bad auth token, etc.) are not retried
- Runtime disconnections via _handle_adapter_fatal_error now queue
  retryable failures instead of triggering gateway shutdown
- On successful reconnect, adapter is wired up and channel directory
  is rebuilt automatically

Fixes the case where a DNS blip during gateway startup caused Telegram
and Discord to be permanently unavailable until manual restart.
											
										
										
											2026-03-22 23:48:24 -07:00
+								        # Start background reconnection watcher for platforms that failed at startup
 								        if self._failed_platforms:
 								            logger.info(
 								                "Starting reconnection watcher for %d failed platform(s): %s",
 								                len(self._failed_platforms),
 								                ", ".join(p.value for p in self._failed_platforms),
 								            )
 								        asyncio.create_task(self._platform_reconnect_watcher())
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								        logger.info("Press Ctrl+C to stop")
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
 								        return True
-												feat(gateway): proactive async memory flush on session expiry

Previously, when a session expired (idle/daily reset), the memory flush
ran synchronously inside get_or_create_session — blocking the user's
message for 10-60s while an LLM call saved memories.

Now a background watcher task (_session_expiry_watcher) runs every 5 min,
detects expired sessions, and flushes memories proactively in a thread
pool.  By the time the user sends their next message, memories are
already saved and the response is immediate.

Changes:
- Add _is_session_expired(entry) to SessionStore — works from entry
  alone without needing a SessionSource
- Add _pre_flushed_sessions set to track already-flushed sessions
- Remove sync _on_auto_reset callback from get_or_create_session
- Refactor flush into _flush_memories_for_session (sync worker) +
  _async_flush_memories (thread pool wrapper)
- Add _session_expiry_watcher background task, started in start()
- Simplify /reset command to use shared fire-and-forget flush
- Add 10 tests for expiry detection, callback removal, tracking

											
										
										
											2026-03-07 11:27:50 -08:00
+								    async def _session_expiry_watcher(self, interval: int = 300):
 								        """Background task that proactively flushes memories for expired sessions.
 								        Runs every `interval` seconds (default 5 min).  For each session that
 								        has expired according to its reset policy, flushes memories in a thread
 								        pool and marks the session so it won't be flushed again.
 								        This means memories are already saved by the time the user sends their
 								        next message, so there's no blocking delay.
 								        """
 								        await asyncio.sleep(60)  # initial delay — let the gateway fully start
 								        while self._running:
 								            try:
 								                self.session_store._ensure_loaded()
 								                for key, entry in list(self.session_store._entries.items()):
 								                    if entry.session_id in self.session_store._pre_flushed_sessions:
 								                        continue  # already flushed this session
 								                    if not self.session_store._is_session_expired(entry):
 								                        continue  # session still active
 								                    # Session has expired — flush memories in the background
 								                    logger.info(
 								                        "Session %s expired (key=%s), flushing memories proactively",
 								                        entry.session_id, key,
 								                    )
 								                    try:
-												fix(honcho): isolate session routing for multi-user gateway (#1500)

Salvaged from PR #1470 by adavyas.

Core fix: Honcho tool calls in a multi-session gateway could route to
the wrong session because honcho_tools.py relied on process-global
state. Now threads session context through the call chain:
  AIAgent._invoke_tool() → handle_function_call() → registry.dispatch()
  → handler **kw → _resolve_session_context()

Changes:
- Add _resolve_session_context() to prefer per-call context over globals
- Plumb honcho_manager + honcho_session_key through handle_function_call
- Add sync_honcho=False to run_conversation() for synthetic flush turns
- Pass honcho_session_key through gateway memory flush lifecycle
- Harden gateway PID detection when /proc cmdline is unreadable
- Make interrupt test scripts import-safe for pytest-xdist
- Wrap BibTeX examples in Jekyll raw blocks for docs build
- Fix thread-order-dependent assertion in client lifecycle test
- Expand Honcho docs: session isolation, lifecycle, routing internals

Dropped from original PR:
- Indentation change in _create_request_openai_client that would move
  client creation inside the lock (causes unnecessary contention)

Co-authored-by: adavyas <adavyas@users.noreply.github.com>
											
										
										
											2026-03-16 00:23:47 -07:00
+								                        await self._async_flush_memories(entry.session_id, key)
-												fix(gateway): persist Honcho managers across session requests

											
										
										
											2026-03-10 02:06:17 -07:00
+								                        self._shutdown_gateway_honcho(key)
-												feat(gateway): proactive async memory flush on session expiry

Previously, when a session expired (idle/daily reset), the memory flush
ran synchronously inside get_or_create_session — blocking the user's
message for 10-60s while an LLM call saved memories.

Now a background watcher task (_session_expiry_watcher) runs every 5 min,
detects expired sessions, and flushes memories proactively in a thread
pool.  By the time the user sends their next message, memories are
already saved and the response is immediate.

Changes:
- Add _is_session_expired(entry) to SessionStore — works from entry
  alone without needing a SessionSource
- Add _pre_flushed_sessions set to track already-flushed sessions
- Remove sync _on_auto_reset callback from get_or_create_session
- Refactor flush into _flush_memories_for_session (sync worker) +
  _async_flush_memories (thread pool wrapper)
- Add _session_expiry_watcher background task, started in start()
- Simplify /reset command to use shared fire-and-forget flush
- Add 10 tests for expiry detection, callback removal, tracking

											
										
										
											2026-03-07 11:27:50 -08:00
+								                        self.session_store._pre_flushed_sessions.add(entry.session_id)
 								                    except Exception as e:
 								                        logger.debug("Proactive memory flush failed for %s: %s", entry.session_id, e)
 								            except Exception as e:
 								                logger.debug("Session expiry watcher error: %s", e)
 								            # Sleep in small increments so we can stop quickly
 								            for _ in range(interval):
 								                if not self._running:
 								                    break
 								                await asyncio.sleep(1)
-												feat: auto-reconnect failed gateway platforms with exponential backoff (#2584)

When a messaging platform fails to connect at startup (e.g. transient DNS
failure) or disconnects at runtime with a retryable error, the gateway now
queues it for background reconnection instead of giving up permanently.

- New _platform_reconnect_watcher background task runs alongside the
  existing session expiry watcher
- Exponential backoff: 30s, 60s, 120s, 240s, 300s cap
- Max 20 retry attempts before giving up on a platform
- Non-retryable errors (bad auth token, etc.) are not retried
- Runtime disconnections via _handle_adapter_fatal_error now queue
  retryable failures instead of triggering gateway shutdown
- On successful reconnect, adapter is wired up and channel directory
  is rebuilt automatically

Fixes the case where a DNS blip during gateway startup caused Telegram
and Discord to be permanently unavailable until manual restart.
											
										
										
											2026-03-22 23:48:24 -07:00
+								    async def _platform_reconnect_watcher(self) -> None:
 								        """Background task that periodically retries connecting failed platforms.
 								        Uses exponential backoff: 30s → 60s → 120s → 240s → 300s (cap).
 								        Stops retrying a platform after 20 failed attempts or if the error
 								        is non-retryable (e.g. bad auth token).
 								        """
 								        _MAX_ATTEMPTS = 20
 								        _BACKOFF_CAP = 300  # 5 minutes max between retries
 								        await asyncio.sleep(10)  # initial delay — let startup finish
 								        while self._running:
 								            if not self._failed_platforms:
 								                # Nothing to reconnect — sleep and check again
 								                for _ in range(30):
 								                    if not self._running:
 								                        return
 								                    await asyncio.sleep(1)
 								                continue
 								            now = time.monotonic()
 								            for platform in list(self._failed_platforms.keys()):
 								                if not self._running:
 								                    return
 								                info = self._failed_platforms[platform]
 								                if now < info["next_retry"]:
 								                    continue  # not time yet
 								                if info["attempts"] >= _MAX_ATTEMPTS:
 								                    logger.warning(
 								                        "Giving up reconnecting %s after %d attempts",
 								                        platform.value, info["attempts"],
 								                    )
 								                    del self._failed_platforms[platform]
 								                    continue
 								                platform_config = info["config"]
 								                attempt = info["attempts"] + 1
 								                logger.info(
 								                    "Reconnecting %s (attempt %d/%d)...",
 								                    platform.value, attempt, _MAX_ATTEMPTS,
 								                )
 								                try:
 								                    adapter = self._create_adapter(platform, platform_config)
 								                    if not adapter:
 								                        logger.warning(
 								                            "Reconnect %s: adapter creation returned None, removing from retry queue",
 								                            platform.value,
 								                        )
 								                        del self._failed_platforms[platform]
 								                        continue
 								                    adapter.set_message_handler(self._handle_message)
 								                    adapter.set_fatal_error_handler(self._handle_adapter_fatal_error)
 								                    success = await adapter.connect()
 								                    if success:
 								                        self.adapters[platform] = adapter
 								                        self._sync_voice_mode_state_to_adapter(adapter)
 								                        self.delivery_router.adapters = self.adapters
 								                        del self._failed_platforms[platform]
 								                        logger.info("✓ %s reconnected successfully", platform.value)
 								                        # Rebuild channel directory with the new adapter
 								                        try:
 								                            from gateway.channel_directory import build_channel_directory
 								                            build_channel_directory(self.adapters)
 								                        except Exception:
 								                            pass
 								                    else:
 								                        # Check if the failure is non-retryable
 								                        if adapter.has_fatal_error and not adapter.fatal_error_retryable:
 								                            logger.warning(
 								                                "Reconnect %s: non-retryable error (%s), removing from retry queue",
 								                                platform.value, adapter.fatal_error_message,
 								                            )
 								                            del self._failed_platforms[platform]
 								                        else:
 								                            backoff = min(30 * (2 ** (attempt - 1)), _BACKOFF_CAP)
 								                            info["attempts"] = attempt
 								                            info["next_retry"] = time.monotonic() + backoff
 								                            logger.info(
 								                                "Reconnect %s failed, next retry in %ds",
 								                                platform.value, backoff,
 								                            )
 								                except Exception as e:
 								                    backoff = min(30 * (2 ** (attempt - 1)), _BACKOFF_CAP)
 								                    info["attempts"] = attempt
 								                    info["next_retry"] = time.monotonic() + backoff
 								                    logger.warning(
 								                        "Reconnect %s error: %s, next retry in %ds",
 								                        platform.value, e, backoff,
 								                    )
 								            # Check every 10 seconds for platforms that need reconnection
 								            for _ in range(10):
 								                if not self._running:
 								                    return
 								                await asyncio.sleep(1)
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								    async def stop(self) -> None:
 								        """Stop the gateway and disconnect all adapters."""
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								        logger.info("Stopping gateway...")
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        self._running = False
-												fix(gateway): cancel active runs during shutdown

Track adapter background message-processing tasks, cancel them during gateway shutdown, and interrupt running agents before disconnecting adapters. This prevents old gateway instances from continuing in-flight work after stop/replace, which was contributing to the restart-time task continuation/flicker behavior reported in #1414. Adds regression coverage for adapter task cancellation and shutdown interrupts.

											
										
										
											2026-03-15 04:21:50 -07:00
 								        for session_key, agent in list(self._running_agents.items()):
-												fix: harden sentinel guard for /stop during setup and shutdown

- /stop during sentinel returns helpful message instead of queuing
- Shutdown loop skips sentinel entries instead of catching AttributeError
- _handle_stop_command guards against sentinel (defensive)
- Added tests for both edge cases (7 total race guard tests)

											
										
										
											2026-03-19 18:26:09 -07:00
+								            if agent is _AGENT_PENDING_SENTINEL:
 								                continue
-												fix(gateway): cancel active runs during shutdown

Track adapter background message-processing tasks, cancel them during gateway shutdown, and interrupt running agents before disconnecting adapters. This prevents old gateway instances from continuing in-flight work after stop/replace, which was contributing to the restart-time task continuation/flicker behavior reported in #1414. Adds regression coverage for adapter task cancellation and shutdown interrupts.

											
										
										
											2026-03-15 04:21:50 -07:00
+								            try:
 								                agent.interrupt("Gateway shutting down")
 								                logger.debug("Interrupted running agent for session %s during shutdown", session_key[:20])
 								            except Exception as e:
 								                logger.debug("Failed interrupting agent during shutdown: %s", e)
-												fix: add macOS Homebrew Opus fallback and fix shutdown dict iteration

- Add Homebrew library path fallback when ctypes.util.find_library fails
  on macOS (Apple Silicon + Intel paths, guarded by platform check)
- Fix RuntimeError in gateway stop() by iterating over dict copy
- Update Opus tests to verify find_library-first + conditional fallback

											
										
										
											2026-03-13 16:59:03 +03:00
+								        for platform, adapter in list(self.adapters.items()):
-												fix(gateway): cancel active runs during shutdown

Track adapter background message-processing tasks, cancel them during gateway shutdown, and interrupt running agents before disconnecting adapters. This prevents old gateway instances from continuing in-flight work after stop/replace, which was contributing to the restart-time task continuation/flicker behavior reported in #1414. Adds regression coverage for adapter task cancellation and shutdown interrupts.

											
										
										
											2026-03-15 04:21:50 -07:00
+								            try:
 								                await adapter.cancel_background_tasks()
 								            except Exception as e:
 								                logger.debug("✗ %s background-task cancel error: %s", platform.value, e)
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								            try:
 								                await adapter.disconnect()
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								                logger.info("✓ %s disconnected", platform.value)
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								            except Exception as e:
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								                logger.error("✗ %s disconnect error: %s", platform.value, e)
-												fix(gateway): persist Honcho managers across session requests

											
										
										
											2026-03-10 02:06:17 -07:00
-												fix(gateway): track background task references in GatewayRunner (#3254)

Asyncio tasks created with create_task() but never stored can be
garbage collected mid-execution. Add self._background_tasks set to
hold references, with add_done_callback cleanup. Tracks:
- /background command task
- session-reset memory flush task
- session-resume memory flush task
Cancel all pending tasks in stop().

Update test fixtures that construct GatewayRunner via object.__new__()
to include the new _background_tasks attribute.

Cherry-picked from PR #3167 by memosr. The original PR also deleted
the DM topic auto-skill loading code — that deletion was excluded
from this salvage as it removes a shipped feature (#2598).

Co-authored-by: memosr.eth <96793918+memosr@users.noreply.github.com>
											
										
										
											2026-03-26 14:33:48 -07:00
+								        # Cancel any pending background tasks
 								        for _task in list(self._background_tasks):
 								            _task.cancel()
 								        self._background_tasks.clear()
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        self.adapters.clear()
-												fix(gateway): cancel active runs during shutdown

Track adapter background message-processing tasks, cancel them during gateway shutdown, and interrupt running agents before disconnecting adapters. This prevents old gateway instances from continuing in-flight work after stop/replace, which was contributing to the restart-time task continuation/flicker behavior reported in #1414. Adds regression coverage for adapter task cancellation and shutdown interrupts.

											
										
										
											2026-03-15 04:21:50 -07:00
+								        self._running_agents.clear()
 								        self._pending_messages.clear()
 								        self._pending_approvals.clear()
-												fix(gateway): persist Honcho managers across session requests

											
										
										
											2026-03-10 02:06:17 -07:00
+								        self._shutdown_all_gateway_honcho()
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        self._shutdown_event.set()
-												feat: implement channel directory and message mirroring for cross-platform communication

- Introduced a new channel directory to cache reachable channels/contacts for messaging platforms, enhancing the send_message tool's ability to resolve human-friendly names to numeric IDs.
- Added functionality to mirror sent messages into the target's session transcript, providing context for cross-platform message delivery.
- Updated the send_message tool to support listing available targets and improved error handling for channel resolution.
- Enhanced the gateway to build and refresh the channel directory during startup and at regular intervals, ensuring up-to-date channel information.

											
										
										
											2026-02-22 20:44:15 -08:00
-												fix: preserve current gateway update and startup behavior

Follow up on salvaged PR #1052.
Restore current-main gateway lifecycle handling after conflict resolution and
adapt the update fallback to use shell-quoted argv parts safely.

											
										
										
											2026-03-14 18:03:50 -07:00
+								        from gateway.status import remove_pid_file, write_runtime_status
-												feat: implement channel directory and message mirroring for cross-platform communication

- Introduced a new channel directory to cache reachable channels/contacts for messaging platforms, enhancing the send_message tool's ability to resolve human-friendly names to numeric IDs.
- Added functionality to mirror sent messages into the target's session transcript, providing context for cross-platform message delivery.
- Updated the send_message tool to support listing available targets and improved error handling for channel resolution.
- Enhanced the gateway to build and refresh the channel directory during startup and at regular intervals, ensuring up-to-date channel information.

											
										
										
											2026-02-22 20:44:15 -08:00
+								        remove_pid_file()
-												fix: preserve current gateway update and startup behavior

Follow up on salvaged PR #1052.
Restore current-main gateway lifecycle handling after conflict resolution and
adapt the update fallback to use shell-quoted argv parts safely.

											
										
										
											2026-03-14 18:03:50 -07:00
+								        try:
 								            write_runtime_status(gateway_state="stopped", exit_reason=self._exit_reason)
 								        except Exception:
 								            pass
-												feat: implement channel directory and message mirroring for cross-platform communication

- Introduced a new channel directory to cache reachable channels/contacts for messaging platforms, enhancing the send_message tool's ability to resolve human-friendly names to numeric IDs.
- Added functionality to mirror sent messages into the target's session transcript, providing context for cross-platform message delivery.
- Updated the send_message tool to support listing available targets and improved error handling for channel resolution.
- Enhanced the gateway to build and refresh the channel directory during startup and at regular intervals, ensuring up-to-date channel information.

											
										
										
											2026-02-22 20:44:15 -08:00
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								        logger.info("Gateway stopped")
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
 								    async def wait_for_shutdown(self) -> None:
 								        """Wait for shutdown signal."""
 								        await self._shutdown_event.wait()
 								    def _create_adapter(
 								        self,
 								        platform: Platform,
 								        config: Any
 								    ) -> Optional[BasePlatformAdapter]:
 								        """Create the appropriate adapter for a platform."""
-												fix(gateway): make group session isolation configurable

default group and channel sessions to per-user isolation, allow opting back into shared room sessions via config.yaml, and document Discord gateway routing and session behavior.

											
										
										
											2026-03-16 00:22:23 -07:00
+								        if hasattr(config, "extra") and isinstance(config.extra, dict):
 								            config.extra.setdefault(
 								                "group_sessions_per_user",
 								                self.config.group_sessions_per_user,
 								            )
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        if platform == Platform.TELEGRAM:
 								            from gateway.platforms.telegram import TelegramAdapter, check_telegram_requirements
 								            if not check_telegram_requirements():
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								                logger.warning("Telegram: python-telegram-bot not installed")
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								                return None
 								            return TelegramAdapter(config)
 								        elif platform == Platform.DISCORD:
 								            from gateway.platforms.discord import DiscordAdapter, check_discord_requirements
 								            if not check_discord_requirements():
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								                logger.warning("Discord: discord.py not installed")
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								                return None
 								            return DiscordAdapter(config)
 								        elif platform == Platform.WHATSAPP:
 								            from gateway.platforms.whatsapp import WhatsAppAdapter, check_whatsapp_requirements
 								            if not check_whatsapp_requirements():
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								                logger.warning("WhatsApp: Node.js not installed or bridge not configured")
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								                return None
 								            return WhatsAppAdapter(config)
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
+								        elif platform == Platform.SLACK:
 								            from gateway.platforms.slack import SlackAdapter, check_slack_requirements
 								            if not check_slack_requirements():
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								                logger.warning("Slack: slack-bolt not installed. Run: pip install 'hermes-agent[slack]'")
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
+								                return None
 								            return SlackAdapter(config)
-												feat: add Home Assistant integration (REST tools + WebSocket gateway)

- Add ha_list_entities, ha_get_state, ha_call_service tools via REST API
- Add WebSocket gateway adapter for real-time state_changed event monitoring
- Support domain/entity filtering, cooldown, and auto-reconnect with backoff
- Use REST API for outbound notifications to avoid WS race condition
- Gate tool availability on HASS_TOKEN env var
- Add 82 unit tests covering real logic (filtering, payload building, event pipeline)

											
										
										
											2026-02-28 13:32:48 +03:00
-												feat: add Signal messenger gateway platform (#405)

Complete Signal adapter using signal-cli daemon HTTP API.
Based on PR #268 by ibhagwan, rebuilt on current main with bug fixes.

Architecture:
- SSE streaming for inbound messages with exponential backoff (2s→60s)
- JSON-RPC 2.0 for outbound (send, typing, attachments, contacts)
- Health monitor detects stale SSE connections (120s threshold)
- Phone number redaction in all logs and global redact.py

Features:
- DM and group message support with separate access policies
- DM policies: pairing (default), allowlist, open
- Group policies: disabled (default), allowlist, open
- Attachment download with magic-byte type detection
- Typing indicators (8s refresh interval)
- 100MB attachment size limit, 8000 char message limit
- E.164 phone + UUID allowlist support

Integration:
- Platform.SIGNAL enum in gateway/config.py
- Signal in _is_user_authorized() allowlist maps (gateway/run.py)
- Adapter factory in _create_adapter() (gateway/run.py)
- user_id_alt/chat_id_alt fields in SessionSource for UUIDs
- send_message tool support via httpx JSON-RPC (not aiohttp)
- Interactive setup wizard in 'hermes gateway setup'
- Connectivity testing during setup (pings /api/v1/check)
- signal-cli detection and install guidance

Bug fixes from PR #268:
- Timestamp reads from envelope_data (not outer wrapper)
- Uses httpx consistently (not aiohttp in send_message tool)
- SIGNAL_DEBUG scoped to signal logger (not root)
- extract_images regex NOT modified (preserves group numbering)
- pairing.py NOT modified (no cross-platform side effects)
- No dual authorization (adapter defers to run.py for user auth)
- Wildcard uses set membership ('*' in set, not list equality)
- .zip default for PK magic bytes (not .docx)

No new Python dependencies — uses httpx (already core).
External requirement: signal-cli daemon (user-installed).

Tests: 30 new tests covering config, init, helpers, session source,
phone redaction, authorization, and send_message integration.

Co-authored-by: ibhagwan <ibhagwan@users.noreply.github.com>

											
										
										
											2026-03-08 20:20:35 -07:00
+								        elif platform == Platform.SIGNAL:
 								            from gateway.platforms.signal import SignalAdapter, check_signal_requirements
 								            if not check_signal_requirements():
 								                logger.warning("Signal: SIGNAL_HTTP_URL or SIGNAL_ACCOUNT not configured")
 								                return None
 								            return SignalAdapter(config)
-												feat: add Home Assistant integration (REST tools + WebSocket gateway)

- Add ha_list_entities, ha_get_state, ha_call_service tools via REST API
- Add WebSocket gateway adapter for real-time state_changed event monitoring
- Support domain/entity filtering, cooldown, and auto-reconnect with backoff
- Use REST API for outbound notifications to avoid WS race condition
- Gate tool availability on HASS_TOKEN env var
- Add 82 unit tests covering real logic (filtering, payload building, event pipeline)

											
										
										
											2026-02-28 13:32:48 +03:00
+								        elif platform == Platform.HOMEASSISTANT:
 								            from gateway.platforms.homeassistant import HomeAssistantAdapter, check_ha_requirements
 								            if not check_ha_requirements():
 								                logger.warning("HomeAssistant: aiohttp not installed or HASS_TOKEN not set")
 								                return None
 								            return HomeAssistantAdapter(config)
-												feat: add email gateway platform (IMAP/SMTP)

Allow users to interact with Hermes by sending and receiving emails.
Uses IMAP polling for incoming messages and SMTP for replies with
proper threading (In-Reply-To, References headers).

Integrates with all 14 gateway extension points: config, adapter
factory, authorization, send_message tool, cron delivery, toolsets,
prompt hints, channel directory, setup wizard, status display, and
env example.

65 tests covering config, parsing, dispatch, threading, IMAP fetch,
SMTP send, attachments, and all integration points.

											
										
										
											2026-03-10 03:15:38 +03:00
+								        elif platform == Platform.EMAIL:
 								            from gateway.platforms.email import EmailAdapter, check_email_requirements
 								            if not check_email_requirements():
 								                logger.warning("Email: EMAIL_ADDRESS, EMAIL_PASSWORD, EMAIL_IMAP_HOST, or EMAIL_SMTP_HOST not set")
 								                return None
 								            return EmailAdapter(config)
-												feat: add SMS (Telnyx) platform adapter

Implement SMS as a first-class messaging platform following
ADDING_A_PLATFORM.md checklist. All 16 integration points covered:

- gateway/platforms/sms.py: Core adapter with aiohttp webhook server,
  Telnyx REST API send, markdown stripping, 1600-char chunking,
  echo loop prevention, multi-number reply-from tracking
- gateway/config.py: Platform.SMS enum + env override block
- gateway/run.py: Adapter factory + auth maps (SMS_ALLOWED_USERS,
  SMS_ALLOW_ALL_USERS)
- toolsets.py: hermes-sms toolset + included in hermes-gateway
- cron/scheduler.py: SMS in platform_map for cron delivery
- tools/send_message_tool.py: SMS routing + _send_sms() standalone sender
- tools/cronjob_tools.py: 'sms' in deliver description
- gateway/channel_directory.py: SMS in session-based discovery
- agent/prompt_builder.py: SMS platform hint (plain text, concise)
- hermes_cli/status.py: SMS in platforms status display
- hermes_cli/gateway.py: SMS in setup wizard with Telnyx instructions
- pyproject.toml: sms optional dependency group (aiohttp>=3.9.0)
- tests/gateway/test_sms.py: Unit tests for config, format, truncate,
  echo prevention, requirements, toolset integration

Co-authored-by: sunsakis <teo@sunsakis.com>
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
											
										
										
											2026-03-17 02:52:34 -07:00
+								        elif platform == Platform.SMS:
 								            from gateway.platforms.sms import SmsAdapter, check_sms_requirements
 								            if not check_sms_requirements():
-												feat: add SMS (Twilio) platform adapter

Add SMS as a first-class messaging platform via the Twilio API.
Shares credentials with the existing telephony skill — same
TWILIO_ACCOUNT_SID, TWILIO_AUTH_TOKEN, TWILIO_PHONE_NUMBER env vars.

Adapter (gateway/platforms/sms.py):
- aiohttp webhook server for inbound (Twilio form-encoded POSTs)
- Twilio REST API with Basic auth for outbound
- Markdown stripping, smart chunking at 1600 chars
- Echo loop prevention, phone number redaction in logs

Integration (13 files):
- gateway config, run, channel_directory
- agent prompt_builder (SMS platform hint)
- cron scheduler, cronjob tools
- send_message_tool (_send_sms via Twilio API)
- toolsets (hermes-sms + hermes-gateway)
- gateway setup wizard, status display
- pyproject.toml (sms optional extra)
- 21 tests

Docs:
- website/docs/user-guide/messaging/sms.md (full setup guide)
- Updated messaging index (architecture, toolsets, security, links)
- Updated environment-variables.md reference

Inspired by PR #1575 (@sunsakis), rewritten for Twilio.
											
										
										
											2026-03-17 03:14:53 -07:00
+								                logger.warning("SMS: aiohttp not installed or TWILIO_ACCOUNT_SID/TWILIO_AUTH_TOKEN not set")
-												feat: add SMS (Telnyx) platform adapter

Implement SMS as a first-class messaging platform following
ADDING_A_PLATFORM.md checklist. All 16 integration points covered:

- gateway/platforms/sms.py: Core adapter with aiohttp webhook server,
  Telnyx REST API send, markdown stripping, 1600-char chunking,
  echo loop prevention, multi-number reply-from tracking
- gateway/config.py: Platform.SMS enum + env override block
- gateway/run.py: Adapter factory + auth maps (SMS_ALLOWED_USERS,
  SMS_ALLOW_ALL_USERS)
- toolsets.py: hermes-sms toolset + included in hermes-gateway
- cron/scheduler.py: SMS in platform_map for cron delivery
- tools/send_message_tool.py: SMS routing + _send_sms() standalone sender
- tools/cronjob_tools.py: 'sms' in deliver description
- gateway/channel_directory.py: SMS in session-based discovery
- agent/prompt_builder.py: SMS platform hint (plain text, concise)
- hermes_cli/status.py: SMS in platforms status display
- hermes_cli/gateway.py: SMS in setup wizard with Telnyx instructions
- pyproject.toml: sms optional dependency group (aiohttp>=3.9.0)
- tests/gateway/test_sms.py: Unit tests for config, format, truncate,
  echo prevention, requirements, toolset integration

Co-authored-by: sunsakis <teo@sunsakis.com>
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
											
										
										
											2026-03-17 02:52:34 -07:00
+								                return None
 								            return SmsAdapter(config)
-												feat(gateway): add DingTalk platform adapter (#1685)

Add DingTalk as a messaging platform using the dingtalk-stream SDK
for real-time message reception via Stream Mode (no webhook needed).
Replies are sent via session webhook using markdown format.

Features:
- Stream Mode connection (long-lived WebSocket, no public URL needed)
- Text and rich text message support
- DM and group chat support
- Message deduplication with 5-minute window
- Auto-reconnection with exponential backoff
- Session webhook caching for reply routing

Configuration:
  export DINGTALK_CLIENT_ID=your-app-key
  export DINGTALK_CLIENT_SECRET=your-app-secret

  # or in config.yaml:
  platforms:
    dingtalk:
      enabled: true
      extra:
        client_id: your-app-key
        client_secret: your-app-secret

Files:
- gateway/platforms/dingtalk.py (340 lines) — adapter implementation
- gateway/config.py — add DINGTALK to Platform enum
- gateway/run.py — add DingTalk to _create_adapter
- hermes_cli/config.py — add env vars to _EXTRA_ENV_KEYS
- hermes_cli/tools_config.py — add dingtalk to PLATFORMS
- tests/gateway/test_dingtalk.py — 21 tests
											
										
										
											2026-03-17 03:04:58 -07:00
+								        elif platform == Platform.DINGTALK:
 								            from gateway.platforms.dingtalk import DingTalkAdapter, check_dingtalk_requirements
 								            if not check_dingtalk_requirements():
 								                logger.warning("DingTalk: dingtalk-stream not installed or DINGTALK_CLIENT_ID/SECRET not set")
 								                return None
 								            return DingTalkAdapter(config)
-												feat: add Mattermost and Matrix gateway adapters

Add support for Mattermost (self-hosted Slack alternative) and Matrix
(federated messaging protocol) as messaging platforms.

Mattermost adapter:
- REST API v4 client for posts, files, channels, typing indicators
- WebSocket listener for real-time 'posted' events with reconnect backoff
- Thread support via root_id
- File upload/download with auth-aware caching
- Dedup cache (5min TTL, 2000 entries)
- Full self-hosted instance support

Matrix adapter:
- matrix-nio AsyncClient with sync loop
- Dual auth: access token or user_id + password
- Optional E2EE via matrix-nio[e2e] (libolm)
- Thread support via m.thread (MSC3440)
- Reply support via m.in_reply_to with fallback stripping
- Media upload/download via mxc:// URLs (authenticated v1.11+ endpoint)
- Auto-join on room invite
- DM detection via m.direct account data with sync fallback
- Markdown to HTML conversion

Fixes applied over original PR #1225 by @cyb0rgk1tty:
- Mattermost: add timeout to file downloads, wrap API helpers in
  try/except for network errors, download incoming files immediately
  with auth headers instead of passing auth-required URLs
- Matrix: use authenticated media endpoint (/_matrix/client/v1/media/),
  robust m.direct cache with sync fallback, prefer aiohttp over httpx

Install Matrix support: pip install 'hermes-agent[matrix]'
Mattermost needs no extra deps (uses aiohttp).

Salvaged from PR #1225 by @cyb0rgk1tty with fixes.

											
										
										
											2026-03-17 02:59:36 -07:00
+								        elif platform == Platform.MATTERMOST:
 								            from gateway.platforms.mattermost import MattermostAdapter, check_mattermost_requirements
 								            if not check_mattermost_requirements():
 								                logger.warning("Mattermost: MATTERMOST_TOKEN or MATTERMOST_URL not set, or aiohttp missing")
 								                return None
 								            return MattermostAdapter(config)
 								        elif platform == Platform.MATRIX:
 								            from gateway.platforms.matrix import MatrixAdapter, check_matrix_requirements
 								            if not check_matrix_requirements():
 								                logger.warning("Matrix: matrix-nio not installed or credentials not set. Run: pip install 'matrix-nio[e2e]'")
 								                return None
 								            return MatrixAdapter(config)
-												feat: OpenAI-compatible API server + WhatsApp configurable reply prefix (#1756)

* feat: OpenAI-compatible API server platform adapter

Salvaged from PR #956, updated for current main.

Adds an HTTP API server as a gateway platform adapter that exposes
hermes-agent via the OpenAI Chat Completions and Responses APIs.
Any OpenAI-compatible frontend (Open WebUI, LobeChat, LibreChat,
AnythingLLM, NextChat, ChatBox, etc.) can connect by pointing at
http://localhost:8642/v1.

Endpoints:
- POST /v1/chat/completions  — stateless Chat Completions API
- POST /v1/responses         — stateful Responses API with chaining
- GET  /v1/responses/{id}    — retrieve stored response
- DELETE /v1/responses/{id}  — delete stored response
- GET  /v1/models            — list hermes-agent as available model
- GET  /health               — health check

Features:
- Real SSE streaming via stream_delta_callback (uses main's streaming)
- In-memory LRU response store for Responses API conversation chaining
- Named conversations via 'conversation' parameter
- Bearer token auth (optional, via API_SERVER_KEY)
- CORS support for browser-based frontends
- System prompt layering (frontend system messages on top of core)
- Real token usage tracking in responses

Integration points:
- Platform.API_SERVER in gateway/config.py
- _create_adapter() branch in gateway/run.py
- API_SERVER_* env vars in hermes_cli/config.py
- Env var overrides in gateway/config.py _apply_env_overrides()

Changes vs original PR #956:
- Removed streaming infrastructure (already on main via stream_consumer.py)
- Removed Telegram reply_to_mode (separate feature, not included)
- Updated _resolve_model() -> _resolve_gateway_model()
- Updated stream_callback -> stream_delta_callback
- Updated connect()/disconnect() to use _mark_connected()/_mark_disconnected()
- Adapted to current Platform enum (includes MATTERMOST, MATRIX, DINGTALK)

Tests: 72 new tests, all passing
Docs: API server guide, Open WebUI integration guide, env var reference

* feat(whatsapp): make reply prefix configurable via config.yaml

Reworked from PR #1764 (ifrederico) to use config.yaml instead of .env.

The WhatsApp bridge prepends a header to every outgoing message.
This was hardcoded to '⚕ *Hermes Agent*'. Users can now customize
or disable it via config.yaml:

  whatsapp:
    reply_prefix: ''                     # disable header
    reply_prefix: '🤖 *My Bot*\n───\n'  # custom prefix

How it works:
- load_gateway_config() reads whatsapp.reply_prefix from config.yaml
  and stores it in PlatformConfig.extra['reply_prefix']
- WhatsAppAdapter reads it from config.extra at init
- When spawning bridge.js, the adapter passes it as
  WHATSAPP_REPLY_PREFIX in the subprocess environment
- bridge.js handles undefined (default), empty (no header),
  or custom values with \\n escape support
- Self-chat echo suppression uses the configured prefix

Also fixes _config_version: was 9 but ENV_VARS_BY_VERSION had a
key 10 (TAVILY_API_KEY), so existing users at v9 would never be
prompted for Tavily. Bumped to 10 to close the gap. Added a
regression test to prevent this from happening again.

Credit: ifrederico (PR #1764) for the bridge.js implementation
and the config version gap discovery.

---------

Co-authored-by: Test <test@test.com>
											
										
										
											2026-03-17 10:44:37 -07:00
+								        elif platform == Platform.API_SERVER:
 								            from gateway.platforms.api_server import APIServerAdapter, check_api_server_requirements
 								            if not check_api_server_requirements():
 								                logger.warning("API Server: aiohttp not installed")
 								                return None
 								            return APIServerAdapter(config)
-												feat(gateway): add webhook platform adapter for external event triggers

Add a generic webhook platform adapter that receives HTTP POSTs from
external services (GitHub, GitLab, JIRA, Stripe, etc.), validates HMAC
signatures, transforms payloads into agent prompts, and routes responses
back to the source or to another platform.

Features:
- Configurable routes with per-route HMAC secrets, event filters,
  prompt templates with dot-notation payload access, skill loading,
  and pluggable delivery (github_comment, telegram, discord, log)
- HMAC signature validation (GitHub SHA-256, GitLab token, generic)
- Rate limiting (30 req/min per route, configurable)
- Idempotency cache (1hr TTL, prevents duplicate runs on retries)
- Body size limits (1MB default, checked before reading payload)
- Setup wizard integration with security warnings and docs links
- 33 tests (29 unit + 4 integration), all passing

Security:
- HMAC secret required per route (startup validation)
- Setup wizard warns about internet exposure for webhook/SMS platforms
- Sandboxing (Docker/VM) recommended in docs for public-facing deployments

Files changed:
- gateway/config.py — Platform.WEBHOOK enum + env var overrides
- gateway/platforms/webhook.py — WebhookAdapter (~420 lines)
- gateway/run.py — factory wiring + auth bypass for webhook events
- hermes_cli/config.py — WEBHOOK_* env var definitions
- hermes_cli/setup.py — webhook section in setup_gateway()
- tests/gateway/test_webhook_adapter.py — 29 unit tests
- tests/gateway/test_webhook_integration.py — 4 integration tests
- website/docs/user-guide/messaging/webhooks.md — full user docs
- website/docs/reference/environment-variables.md — WEBHOOK_* vars
- website/sidebars.ts — nav entry

											
										
										
											2026-03-20 06:33:36 -07:00
+								        elif platform == Platform.WEBHOOK:
 								            from gateway.platforms.webhook import WebhookAdapter, check_webhook_requirements
 								            if not check_webhook_requirements():
 								                logger.warning("Webhook: aiohttp not installed")
 								                return None
 								            adapter = WebhookAdapter(config)
 								            adapter.gateway_runner = self  # For cross-platform delivery
 								            return adapter
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        return None
-												Enhance messaging gateway configuration and security features

- Added new environment variables for Telegram and Discord bot configurations, including `TELEGRAM_ALLOWED_USERS` and `DISCORD_ALLOWED_USERS`, to restrict bot access to specific users.
- Updated documentation in AGENTS.md and README.md to include detailed setup instructions for the messaging gateway, emphasizing the importance of user allowlists for security.
- Improved the CLI setup wizard to prompt for allowed user IDs during configuration, enhancing user guidance and security awareness.
- Refined the gateway run script to support user authorization checks, ensuring only allowed users can interact with the bot.

											
										
										
											2026-02-03 10:46:23 -08:00
+								    def _is_user_authorized(self, source: SessionSource) -> bool:
 								        """
 								        Check if a user is authorized to use the bot.
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
+								        Checks in order:
-												feat: enhance user authorization checks in GatewayRunner

- Updated the authorization logic to include a per-platform allow-all flag for improved flexibility.
- Revised the order of checks to prioritize platform-specific allow-all settings, followed by environment variable allowlists and DM pairing approvals.
- Added global allow-all configuration for broader access control.
- Improved handling of allowlists by stripping whitespace and ensuring valid entries are processed.

											
										
										
											2026-02-22 16:32:08 -08:00
+. Per-platform allow-all flag (e.g., DISCORD_ALLOW_ALL_USERS=true)
 . Environment variable allowlists (TELEGRAM_ALLOWED_USERS, etc.)
 . DM pairing approved list
 . Global allow-all (GATEWAY_ALLOW_ALL_USERS=true)
 . Default: deny
-												Enhance messaging gateway configuration and security features

- Added new environment variables for Telegram and Discord bot configurations, including `TELEGRAM_ALLOWED_USERS` and `DISCORD_ALLOWED_USERS`, to restrict bot access to specific users.
- Updated documentation in AGENTS.md and README.md to include detailed setup instructions for the messaging gateway, emphasizing the importance of user allowlists for security.
- Improved the CLI setup wizard to prompt for allowed user IDs during configuration, enhancing user guidance and security awareness.
- Refined the gateway run script to support user authorization checks, ensuring only allowed users can interact with the bot.

											
										
										
											2026-02-03 10:46:23 -08:00
+								        """
-												fix: resolve 4 bugs found in HA integration code review

- Auto-authorize HA events in gateway (system-generated, not user messages)
- Guard _read_events against None/closed WebSocket after failed reconnect
- Use UUID for send() message_id instead of polluting WS sequence counter
- entity_id parameter now takes precedence over data["entity_id"]

											
										
										
											2026-02-28 15:12:18 +03:00
+								        # Home Assistant events are system-generated (state changes), not
 								        # user-initiated messages.  The HASS_TOKEN already authenticates the
 								        # connection, so HA events are always authorized.
-												feat(gateway): add webhook platform adapter for external event triggers

Add a generic webhook platform adapter that receives HTTP POSTs from
external services (GitHub, GitLab, JIRA, Stripe, etc.), validates HMAC
signatures, transforms payloads into agent prompts, and routes responses
back to the source or to another platform.

Features:
- Configurable routes with per-route HMAC secrets, event filters,
  prompt templates with dot-notation payload access, skill loading,
  and pluggable delivery (github_comment, telegram, discord, log)
- HMAC signature validation (GitHub SHA-256, GitLab token, generic)
- Rate limiting (30 req/min per route, configurable)
- Idempotency cache (1hr TTL, prevents duplicate runs on retries)
- Body size limits (1MB default, checked before reading payload)
- Setup wizard integration with security warnings and docs links
- 33 tests (29 unit + 4 integration), all passing

Security:
- HMAC secret required per route (startup validation)
- Setup wizard warns about internet exposure for webhook/SMS platforms
- Sandboxing (Docker/VM) recommended in docs for public-facing deployments

Files changed:
- gateway/config.py — Platform.WEBHOOK enum + env var overrides
- gateway/platforms/webhook.py — WebhookAdapter (~420 lines)
- gateway/run.py — factory wiring + auth bypass for webhook events
- hermes_cli/config.py — WEBHOOK_* env var definitions
- hermes_cli/setup.py — webhook section in setup_gateway()
- tests/gateway/test_webhook_adapter.py — 29 unit tests
- tests/gateway/test_webhook_integration.py — 4 integration tests
- website/docs/user-guide/messaging/webhooks.md — full user docs
- website/docs/reference/environment-variables.md — WEBHOOK_* vars
- website/sidebars.ts — nav entry

											
										
										
											2026-03-20 06:33:36 -07:00
+								        # Webhook events are authenticated via HMAC signature validation in
 								        # the adapter itself — no user allowlist applies.
 								        if source.platform in (Platform.HOMEASSISTANT, Platform.WEBHOOK):
-												fix: resolve 4 bugs found in HA integration code review

- Auto-authorize HA events in gateway (system-generated, not user messages)
- Guard _read_events against None/closed WebSocket after failed reconnect
- Use UUID for send() message_id instead of polluting WS sequence counter
- entity_id parameter now takes precedence over data["entity_id"]

											
										
										
											2026-02-28 15:12:18 +03:00
+								            return True
-												Enhance messaging gateway configuration and security features

- Added new environment variables for Telegram and Discord bot configurations, including `TELEGRAM_ALLOWED_USERS` and `DISCORD_ALLOWED_USERS`, to restrict bot access to specific users.
- Updated documentation in AGENTS.md and README.md to include detailed setup instructions for the messaging gateway, emphasizing the importance of user allowlists for security.
- Improved the CLI setup wizard to prompt for allowed user IDs during configuration, enhancing user guidance and security awareness.
- Refined the gateway run script to support user authorization checks, ensuring only allowed users can interact with the bot.

											
										
										
											2026-02-03 10:46:23 -08:00
+								        user_id = source.user_id
 								        if not user_id:
-												feat: enhance user authorization checks in GatewayRunner

- Updated the authorization logic to include a per-platform allow-all flag for improved flexibility.
- Revised the order of checks to prioritize platform-specific allow-all settings, followed by environment variable allowlists and DM pairing approvals.
- Added global allow-all configuration for broader access control.
- Improved handling of allowlists by stripping whitespace and ensuring valid entries are processed.

											
										
										
											2026-02-22 16:32:08 -08:00
+								            return False
-												Enhance messaging gateway configuration and security features

- Added new environment variables for Telegram and Discord bot configurations, including `TELEGRAM_ALLOWED_USERS` and `DISCORD_ALLOWED_USERS`, to restrict bot access to specific users.
- Updated documentation in AGENTS.md and README.md to include detailed setup instructions for the messaging gateway, emphasizing the importance of user allowlists for security.
- Improved the CLI setup wizard to prompt for allowed user IDs during configuration, enhancing user guidance and security awareness.
- Refined the gateway run script to support user authorization checks, ensuring only allowed users can interact with the bot.

											
										
										
											2026-02-03 10:46:23 -08:00
+								        platform_env_map = {
 								            Platform.TELEGRAM: "TELEGRAM_ALLOWED_USERS",
 								            Platform.DISCORD: "DISCORD_ALLOWED_USERS",
 								            Platform.WHATSAPP: "WHATSAPP_ALLOWED_USERS",
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
+								            Platform.SLACK: "SLACK_ALLOWED_USERS",
-												feat: add Signal messenger gateway platform (#405)

Complete Signal adapter using signal-cli daemon HTTP API.
Based on PR #268 by ibhagwan, rebuilt on current main with bug fixes.

Architecture:
- SSE streaming for inbound messages with exponential backoff (2s→60s)
- JSON-RPC 2.0 for outbound (send, typing, attachments, contacts)
- Health monitor detects stale SSE connections (120s threshold)
- Phone number redaction in all logs and global redact.py

Features:
- DM and group message support with separate access policies
- DM policies: pairing (default), allowlist, open
- Group policies: disabled (default), allowlist, open
- Attachment download with magic-byte type detection
- Typing indicators (8s refresh interval)
- 100MB attachment size limit, 8000 char message limit
- E.164 phone + UUID allowlist support

Integration:
- Platform.SIGNAL enum in gateway/config.py
- Signal in _is_user_authorized() allowlist maps (gateway/run.py)
- Adapter factory in _create_adapter() (gateway/run.py)
- user_id_alt/chat_id_alt fields in SessionSource for UUIDs
- send_message tool support via httpx JSON-RPC (not aiohttp)
- Interactive setup wizard in 'hermes gateway setup'
- Connectivity testing during setup (pings /api/v1/check)
- signal-cli detection and install guidance

Bug fixes from PR #268:
- Timestamp reads from envelope_data (not outer wrapper)
- Uses httpx consistently (not aiohttp in send_message tool)
- SIGNAL_DEBUG scoped to signal logger (not root)
- extract_images regex NOT modified (preserves group numbering)
- pairing.py NOT modified (no cross-platform side effects)
- No dual authorization (adapter defers to run.py for user auth)
- Wildcard uses set membership ('*' in set, not list equality)
- .zip default for PK magic bytes (not .docx)

No new Python dependencies — uses httpx (already core).
External requirement: signal-cli daemon (user-installed).

Tests: 30 new tests covering config, init, helpers, session source,
phone redaction, authorization, and send_message integration.

Co-authored-by: ibhagwan <ibhagwan@users.noreply.github.com>

											
										
										
											2026-03-08 20:20:35 -07:00
+								            Platform.SIGNAL: "SIGNAL_ALLOWED_USERS",
-												feat: add email gateway platform (IMAP/SMTP)

Allow users to interact with Hermes by sending and receiving emails.
Uses IMAP polling for incoming messages and SMTP for replies with
proper threading (In-Reply-To, References headers).

Integrates with all 14 gateway extension points: config, adapter
factory, authorization, send_message tool, cron delivery, toolsets,
prompt hints, channel directory, setup wizard, status display, and
env example.

65 tests covering config, parsing, dispatch, threading, IMAP fetch,
SMTP send, attachments, and all integration points.

											
										
										
											2026-03-10 03:15:38 +03:00
+								            Platform.EMAIL: "EMAIL_ALLOWED_USERS",
-												feat: add SMS (Telnyx) platform adapter

Implement SMS as a first-class messaging platform following
ADDING_A_PLATFORM.md checklist. All 16 integration points covered:

- gateway/platforms/sms.py: Core adapter with aiohttp webhook server,
  Telnyx REST API send, markdown stripping, 1600-char chunking,
  echo loop prevention, multi-number reply-from tracking
- gateway/config.py: Platform.SMS enum + env override block
- gateway/run.py: Adapter factory + auth maps (SMS_ALLOWED_USERS,
  SMS_ALLOW_ALL_USERS)
- toolsets.py: hermes-sms toolset + included in hermes-gateway
- cron/scheduler.py: SMS in platform_map for cron delivery
- tools/send_message_tool.py: SMS routing + _send_sms() standalone sender
- tools/cronjob_tools.py: 'sms' in deliver description
- gateway/channel_directory.py: SMS in session-based discovery
- agent/prompt_builder.py: SMS platform hint (plain text, concise)
- hermes_cli/status.py: SMS in platforms status display
- hermes_cli/gateway.py: SMS in setup wizard with Telnyx instructions
- pyproject.toml: sms optional dependency group (aiohttp>=3.9.0)
- tests/gateway/test_sms.py: Unit tests for config, format, truncate,
  echo prevention, requirements, toolset integration

Co-authored-by: sunsakis <teo@sunsakis.com>
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
											
										
										
											2026-03-17 02:52:34 -07:00
+								            Platform.SMS: "SMS_ALLOWED_USERS",
-												feat: add Mattermost and Matrix gateway adapters

Add support for Mattermost (self-hosted Slack alternative) and Matrix
(federated messaging protocol) as messaging platforms.

Mattermost adapter:
- REST API v4 client for posts, files, channels, typing indicators
- WebSocket listener for real-time 'posted' events with reconnect backoff
- Thread support via root_id
- File upload/download with auth-aware caching
- Dedup cache (5min TTL, 2000 entries)
- Full self-hosted instance support

Matrix adapter:
- matrix-nio AsyncClient with sync loop
- Dual auth: access token or user_id + password
- Optional E2EE via matrix-nio[e2e] (libolm)
- Thread support via m.thread (MSC3440)
- Reply support via m.in_reply_to with fallback stripping
- Media upload/download via mxc:// URLs (authenticated v1.11+ endpoint)
- Auto-join on room invite
- DM detection via m.direct account data with sync fallback
- Markdown to HTML conversion

Fixes applied over original PR #1225 by @cyb0rgk1tty:
- Mattermost: add timeout to file downloads, wrap API helpers in
  try/except for network errors, download incoming files immediately
  with auth headers instead of passing auth-required URLs
- Matrix: use authenticated media endpoint (/_matrix/client/v1/media/),
  robust m.direct cache with sync fallback, prefer aiohttp over httpx

Install Matrix support: pip install 'hermes-agent[matrix]'
Mattermost needs no extra deps (uses aiohttp).

Salvaged from PR #1225 by @cyb0rgk1tty with fixes.

											
										
										
											2026-03-17 02:59:36 -07:00
+								            Platform.MATTERMOST: "MATTERMOST_ALLOWED_USERS",
 								            Platform.MATRIX: "MATRIX_ALLOWED_USERS",
-												feat(gateway): wire DingTalk into gateway setup and platform maps (#1690)

Add DingTalk to:
- hermes_cli/gateway.py: _PLATFORMS list with setup instructions,
  AppKey/AppSecret prompts, and Stream Mode setup guide
- gateway/run.py: all platform-to-config-key maps, allowed users
  map, allow-all-users map, and toolset resolution maps
											
										
										
											2026-03-17 03:19:45 -07:00
+								            Platform.DINGTALK: "DINGTALK_ALLOWED_USERS",
-												Enhance messaging gateway configuration and security features

- Added new environment variables for Telegram and Discord bot configurations, including `TELEGRAM_ALLOWED_USERS` and `DISCORD_ALLOWED_USERS`, to restrict bot access to specific users.
- Updated documentation in AGENTS.md and README.md to include detailed setup instructions for the messaging gateway, emphasizing the importance of user allowlists for security.
- Improved the CLI setup wizard to prompt for allowed user IDs during configuration, enhancing user guidance and security awareness.
- Refined the gateway run script to support user authorization checks, ensuring only allowed users can interact with the bot.

											
										
										
											2026-02-03 10:46:23 -08:00
+								        }
-												feat: enhance user authorization checks in GatewayRunner

- Updated the authorization logic to include a per-platform allow-all flag for improved flexibility.
- Revised the order of checks to prioritize platform-specific allow-all settings, followed by environment variable allowlists and DM pairing approvals.
- Added global allow-all configuration for broader access control.
- Improved handling of allowlists by stripping whitespace and ensuring valid entries are processed.

											
										
										
											2026-02-22 16:32:08 -08:00
+								        platform_allow_all_map = {
 								            Platform.TELEGRAM: "TELEGRAM_ALLOW_ALL_USERS",
 								            Platform.DISCORD: "DISCORD_ALLOW_ALL_USERS",
 								            Platform.WHATSAPP: "WHATSAPP_ALLOW_ALL_USERS",
 								            Platform.SLACK: "SLACK_ALLOW_ALL_USERS",
-												feat: add Signal messenger gateway platform (#405)

Complete Signal adapter using signal-cli daemon HTTP API.
Based on PR #268 by ibhagwan, rebuilt on current main with bug fixes.

Architecture:
- SSE streaming for inbound messages with exponential backoff (2s→60s)
- JSON-RPC 2.0 for outbound (send, typing, attachments, contacts)
- Health monitor detects stale SSE connections (120s threshold)
- Phone number redaction in all logs and global redact.py

Features:
- DM and group message support with separate access policies
- DM policies: pairing (default), allowlist, open
- Group policies: disabled (default), allowlist, open
- Attachment download with magic-byte type detection
- Typing indicators (8s refresh interval)
- 100MB attachment size limit, 8000 char message limit
- E.164 phone + UUID allowlist support

Integration:
- Platform.SIGNAL enum in gateway/config.py
- Signal in _is_user_authorized() allowlist maps (gateway/run.py)
- Adapter factory in _create_adapter() (gateway/run.py)
- user_id_alt/chat_id_alt fields in SessionSource for UUIDs
- send_message tool support via httpx JSON-RPC (not aiohttp)
- Interactive setup wizard in 'hermes gateway setup'
- Connectivity testing during setup (pings /api/v1/check)
- signal-cli detection and install guidance

Bug fixes from PR #268:
- Timestamp reads from envelope_data (not outer wrapper)
- Uses httpx consistently (not aiohttp in send_message tool)
- SIGNAL_DEBUG scoped to signal logger (not root)
- extract_images regex NOT modified (preserves group numbering)
- pairing.py NOT modified (no cross-platform side effects)
- No dual authorization (adapter defers to run.py for user auth)
- Wildcard uses set membership ('*' in set, not list equality)
- .zip default for PK magic bytes (not .docx)

No new Python dependencies — uses httpx (already core).
External requirement: signal-cli daemon (user-installed).

Tests: 30 new tests covering config, init, helpers, session source,
phone redaction, authorization, and send_message integration.

Co-authored-by: ibhagwan <ibhagwan@users.noreply.github.com>

											
										
										
											2026-03-08 20:20:35 -07:00
+								            Platform.SIGNAL: "SIGNAL_ALLOW_ALL_USERS",
-												feat: add email gateway platform (IMAP/SMTP)

Allow users to interact with Hermes by sending and receiving emails.
Uses IMAP polling for incoming messages and SMTP for replies with
proper threading (In-Reply-To, References headers).

Integrates with all 14 gateway extension points: config, adapter
factory, authorization, send_message tool, cron delivery, toolsets,
prompt hints, channel directory, setup wizard, status display, and
env example.

65 tests covering config, parsing, dispatch, threading, IMAP fetch,
SMTP send, attachments, and all integration points.

											
										
										
											2026-03-10 03:15:38 +03:00
+								            Platform.EMAIL: "EMAIL_ALLOW_ALL_USERS",
-												feat: add SMS (Telnyx) platform adapter

Implement SMS as a first-class messaging platform following
ADDING_A_PLATFORM.md checklist. All 16 integration points covered:

- gateway/platforms/sms.py: Core adapter with aiohttp webhook server,
  Telnyx REST API send, markdown stripping, 1600-char chunking,
  echo loop prevention, multi-number reply-from tracking
- gateway/config.py: Platform.SMS enum + env override block
- gateway/run.py: Adapter factory + auth maps (SMS_ALLOWED_USERS,
  SMS_ALLOW_ALL_USERS)
- toolsets.py: hermes-sms toolset + included in hermes-gateway
- cron/scheduler.py: SMS in platform_map for cron delivery
- tools/send_message_tool.py: SMS routing + _send_sms() standalone sender
- tools/cronjob_tools.py: 'sms' in deliver description
- gateway/channel_directory.py: SMS in session-based discovery
- agent/prompt_builder.py: SMS platform hint (plain text, concise)
- hermes_cli/status.py: SMS in platforms status display
- hermes_cli/gateway.py: SMS in setup wizard with Telnyx instructions
- pyproject.toml: sms optional dependency group (aiohttp>=3.9.0)
- tests/gateway/test_sms.py: Unit tests for config, format, truncate,
  echo prevention, requirements, toolset integration

Co-authored-by: sunsakis <teo@sunsakis.com>
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
											
										
										
											2026-03-17 02:52:34 -07:00
+								            Platform.SMS: "SMS_ALLOW_ALL_USERS",
-												feat: add Mattermost and Matrix gateway adapters

Add support for Mattermost (self-hosted Slack alternative) and Matrix
(federated messaging protocol) as messaging platforms.

Mattermost adapter:
- REST API v4 client for posts, files, channels, typing indicators
- WebSocket listener for real-time 'posted' events with reconnect backoff
- Thread support via root_id
- File upload/download with auth-aware caching
- Dedup cache (5min TTL, 2000 entries)
- Full self-hosted instance support

Matrix adapter:
- matrix-nio AsyncClient with sync loop
- Dual auth: access token or user_id + password
- Optional E2EE via matrix-nio[e2e] (libolm)
- Thread support via m.thread (MSC3440)
- Reply support via m.in_reply_to with fallback stripping
- Media upload/download via mxc:// URLs (authenticated v1.11+ endpoint)
- Auto-join on room invite
- DM detection via m.direct account data with sync fallback
- Markdown to HTML conversion

Fixes applied over original PR #1225 by @cyb0rgk1tty:
- Mattermost: add timeout to file downloads, wrap API helpers in
  try/except for network errors, download incoming files immediately
  with auth headers instead of passing auth-required URLs
- Matrix: use authenticated media endpoint (/_matrix/client/v1/media/),
  robust m.direct cache with sync fallback, prefer aiohttp over httpx

Install Matrix support: pip install 'hermes-agent[matrix]'
Mattermost needs no extra deps (uses aiohttp).

Salvaged from PR #1225 by @cyb0rgk1tty with fixes.

											
										
										
											2026-03-17 02:59:36 -07:00
+								            Platform.MATTERMOST: "MATTERMOST_ALLOW_ALL_USERS",
 								            Platform.MATRIX: "MATRIX_ALLOW_ALL_USERS",
-												feat(gateway): wire DingTalk into gateway setup and platform maps (#1690)

Add DingTalk to:
- hermes_cli/gateway.py: _PLATFORMS list with setup instructions,
  AppKey/AppSecret prompts, and Stream Mode setup guide
- gateway/run.py: all platform-to-config-key maps, allowed users
  map, allow-all-users map, and toolset resolution maps
											
										
										
											2026-03-17 03:19:45 -07:00
+								            Platform.DINGTALK: "DINGTALK_ALLOW_ALL_USERS",
-												feat: enhance user authorization checks in GatewayRunner

- Updated the authorization logic to include a per-platform allow-all flag for improved flexibility.
- Revised the order of checks to prioritize platform-specific allow-all settings, followed by environment variable allowlists and DM pairing approvals.
- Added global allow-all configuration for broader access control.
- Improved handling of allowlists by stripping whitespace and ensuring valid entries are processed.

											
										
										
											2026-02-22 16:32:08 -08:00
+								        }
 								        # Per-platform allow-all flag (e.g., DISCORD_ALLOW_ALL_USERS=true)
 								        platform_allow_all_var = platform_allow_all_map.get(source.platform, "")
 								        if platform_allow_all_var and os.getenv(platform_allow_all_var, "").lower() in ("true", "1", "yes"):
 								            return True
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
+								        # Check pairing store (always checked, regardless of allowlists)
 								        platform_name = source.platform.value if source.platform else ""
 								        if self.pairing_store.is_approved(platform_name, user_id):
 								            return True
-												feat: enhance user authorization checks in GatewayRunner

- Updated the authorization logic to include a per-platform allow-all flag for improved flexibility.
- Revised the order of checks to prioritize platform-specific allow-all settings, followed by environment variable allowlists and DM pairing approvals.
- Added global allow-all configuration for broader access control.
- Improved handling of allowlists by stripping whitespace and ensuring valid entries are processed.

											
										
										
											2026-02-22 16:32:08 -08:00
 								        # Check platform-specific and global allowlists
 								        platform_allowlist = os.getenv(platform_env_map.get(source.platform, ""), "").strip()
 								        global_allowlist = os.getenv("GATEWAY_ALLOWED_USERS", "").strip()
-												Enhance messaging gateway configuration and security features

- Added new environment variables for Telegram and Discord bot configurations, including `TELEGRAM_ALLOWED_USERS` and `DISCORD_ALLOWED_USERS`, to restrict bot access to specific users.
- Updated documentation in AGENTS.md and README.md to include detailed setup instructions for the messaging gateway, emphasizing the importance of user allowlists for security.
- Improved the CLI setup wizard to prompt for allowed user IDs during configuration, enhancing user guidance and security awareness.
- Refined the gateway run script to support user authorization checks, ensuring only allowed users can interact with the bot.

											
										
										
											2026-02-03 10:46:23 -08:00
+								        if not platform_allowlist and not global_allowlist:
-												feat: enhance user authorization checks in GatewayRunner

- Updated the authorization logic to include a per-platform allow-all flag for improved flexibility.
- Revised the order of checks to prioritize platform-specific allow-all settings, followed by environment variable allowlists and DM pairing approvals.
- Added global allow-all configuration for broader access control.
- Improved handling of allowlists by stripping whitespace and ensuring valid entries are processed.

											
										
										
											2026-02-22 16:32:08 -08:00
+								            # No allowlists configured -- check global allow-all flag
 								            return os.getenv("GATEWAY_ALLOW_ALL_USERS", "").lower() in ("true", "1", "yes")
-												Enhance messaging gateway configuration and security features

- Added new environment variables for Telegram and Discord bot configurations, including `TELEGRAM_ALLOWED_USERS` and `DISCORD_ALLOWED_USERS`, to restrict bot access to specific users.
- Updated documentation in AGENTS.md and README.md to include detailed setup instructions for the messaging gateway, emphasizing the importance of user allowlists for security.
- Improved the CLI setup wizard to prompt for allowed user IDs during configuration, enhancing user guidance and security awareness.
- Refined the gateway run script to support user authorization checks, ensuring only allowed users can interact with the bot.

											
										
										
											2026-02-03 10:46:23 -08:00
+								        # Check if user is in any allowlist
 								        allowed_ids = set()
 								        if platform_allowlist:
-												feat: enhance user authorization checks in GatewayRunner

- Updated the authorization logic to include a per-platform allow-all flag for improved flexibility.
- Revised the order of checks to prioritize platform-specific allow-all settings, followed by environment variable allowlists and DM pairing approvals.
- Added global allow-all configuration for broader access control.
- Improved handling of allowlists by stripping whitespace and ensuring valid entries are processed.

											
										
										
											2026-02-22 16:32:08 -08:00
+								            allowed_ids.update(uid.strip() for uid in platform_allowlist.split(",") if uid.strip())
-												Enhance messaging gateway configuration and security features

- Added new environment variables for Telegram and Discord bot configurations, including `TELEGRAM_ALLOWED_USERS` and `DISCORD_ALLOWED_USERS`, to restrict bot access to specific users.
- Updated documentation in AGENTS.md and README.md to include detailed setup instructions for the messaging gateway, emphasizing the importance of user allowlists for security.
- Improved the CLI setup wizard to prompt for allowed user IDs during configuration, enhancing user guidance and security awareness.
- Refined the gateway run script to support user authorization checks, ensuring only allowed users can interact with the bot.

											
										
										
											2026-02-03 10:46:23 -08:00
+								        if global_allowlist:
-												feat: enhance user authorization checks in GatewayRunner

- Updated the authorization logic to include a per-platform allow-all flag for improved flexibility.
- Revised the order of checks to prioritize platform-specific allow-all settings, followed by environment variable allowlists and DM pairing approvals.
- Added global allow-all configuration for broader access control.
- Improved handling of allowlists by stripping whitespace and ensuring valid entries are processed.

											
										
										
											2026-02-22 16:32:08 -08:00
+								            allowed_ids.update(uid.strip() for uid in global_allowlist.split(",") if uid.strip())
-												add full support for whatsapp

											
										
										
											2026-02-25 21:04:36 -08:00
+								        # WhatsApp JIDs have @s.whatsapp.net suffix — strip it for comparison
 								        check_ids = {user_id}
 								        if "@" in user_id:
 								            check_ids.add(user_id.split("@")[0])
 								        return bool(check_ids & allowed_ids)
-												feat: support ignoring unauthorized gateway DMs (#1919)

Add unauthorized_dm_behavior config (pair|ignore) with global default
and per-platform override. WhatsApp can silently drop unknown DMs
instead of sending pairing codes.

Adapted config bridging to work with gw_data dict (pre-construction)
rather than config object. Dropped implementation plan document.

Co-authored-by: Frederico Ribeiro <fr@tecompanytea.com>
											
										
										
											2026-03-18 04:06:08 -07:00
 								    def _get_unauthorized_dm_behavior(self, platform: Optional[Platform]) -> str:
 								        """Return how unauthorized DMs should be handled for a platform."""
 								        config = getattr(self, "config", None)
 								        if config and hasattr(config, "get_unauthorized_dm_behavior"):
 								            return config.get_unauthorized_dm_behavior(platform)
 								        return "pair"
-												Enhance messaging gateway configuration and security features

- Added new environment variables for Telegram and Discord bot configurations, including `TELEGRAM_ALLOWED_USERS` and `DISCORD_ALLOWED_USERS`, to restrict bot access to specific users.
- Updated documentation in AGENTS.md and README.md to include detailed setup instructions for the messaging gateway, emphasizing the importance of user allowlists for security.
- Improved the CLI setup wizard to prompt for allowed user IDs during configuration, enhancing user guidance and security awareness.
- Refined the gateway run script to support user authorization checks, ensuring only allowed users can interact with the bot.

											
										
										
											2026-02-03 10:46:23 -08:00
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								    async def _handle_message(self, event: MessageEvent) -> Optional[str]:
 								        """
 								        Handle an incoming message from any platform.
 								        This is the core message processing pipeline:
-												Enhance messaging gateway configuration and security features

- Added new environment variables for Telegram and Discord bot configurations, including `TELEGRAM_ALLOWED_USERS` and `DISCORD_ALLOWED_USERS`, to restrict bot access to specific users.
- Updated documentation in AGENTS.md and README.md to include detailed setup instructions for the messaging gateway, emphasizing the importance of user allowlists for security.
- Improved the CLI setup wizard to prompt for allowed user IDs during configuration, enhancing user guidance and security awareness.
- Refined the gateway run script to support user authorization checks, ensuring only allowed users can interact with the bot.

											
										
										
											2026-02-03 10:46:23 -08:00
+. Check user authorization
 . Check for commands (/new, /reset, etc.)
-												Implement interrupt handling for agent and CLI input and persistent prompt line at bottom of CLI :)

- Enhanced the AIAgent class to support interrupt requests, allowing for graceful interruption of ongoing tasks and processing of new messages.
- Updated the HermesCLI to manage user input in a persistent manner, enabling real-time interruption of the agent's conversation.
- Introduced a mechanism in the GatewayRunner to handle incoming messages while an agent is running, allowing for immediate response to user commands.
- Improved overall user experience by providing feedback during interruptions and ensuring that pending messages are processed correctly.

											
										
										
											2026-02-03 16:15:49 -08:00
+. Check for running agent and interrupt if needed
 . Get or create session
 . Build context for agent
 . Run agent conversation
 . Return response
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        """
 								        source = event.source
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
-												Enhance messaging gateway configuration and security features

- Added new environment variables for Telegram and Discord bot configurations, including `TELEGRAM_ALLOWED_USERS` and `DISCORD_ALLOWED_USERS`, to restrict bot access to specific users.
- Updated documentation in AGENTS.md and README.md to include detailed setup instructions for the messaging gateway, emphasizing the importance of user allowlists for security.
- Improved the CLI setup wizard to prompt for allowed user IDs during configuration, enhancing user guidance and security awareness.
- Refined the gateway run script to support user authorization checks, ensuring only allowed users can interact with the bot.

											
										
										
											2026-02-03 10:46:23 -08:00
+								        # Check if user is authorized
 								        if not self._is_user_authorized(source):
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								            logger.warning("Unauthorized user: %s (%s) on %s", source.user_id, source.user_name, source.platform.value)
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
+								            # In DMs: offer pairing code. In groups: silently ignore.
-												feat: support ignoring unauthorized gateway DMs (#1919)

Add unauthorized_dm_behavior config (pair|ignore) with global default
and per-platform override. WhatsApp can silently drop unknown DMs
instead of sending pairing codes.

Adapted config bridging to work with gw_data dict (pre-construction)
rather than config object. Dropped implementation plan document.

Co-authored-by: Frederico Ribeiro <fr@tecompanytea.com>
											
										
										
											2026-03-18 04:06:08 -07:00
+								            if source.chat_type == "dm" and self._get_unauthorized_dm_behavior(source.platform) == "pair":
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
+								                platform_name = source.platform.value if source.platform else "unknown"
 								                code = self.pairing_store.generate_code(
 								                    platform_name, source.user_id, source.user_name or ""
 								                )
 								                if code:
 								                    adapter = self.adapters.get(source.platform)
 								                    if adapter:
 								                        await adapter.send(
 								                            source.chat_id,
 								                            f"Hi~ I don't recognize you yet!\n\n"
 								                            f"Here's your pairing code: `{code}`\n\n"
 								                            f"Ask the bot owner to run:\n"
 								                            f"`hermes pairing approve {platform_name} {code}`"
 								                        )
 								                else:
 								                    adapter = self.adapters.get(source.platform)
 								                    if adapter:
 								                        await adapter.send(
 								                            source.chat_id,
 								                            "Too many pairing requests right now~ "
 								                            "Please try again later!"
 								                        )
 								            return None
-												Enhance messaging gateway configuration and security features

- Added new environment variables for Telegram and Discord bot configurations, including `TELEGRAM_ALLOWED_USERS` and `DISCORD_ALLOWED_USERS`, to restrict bot access to specific users.
- Updated documentation in AGENTS.md and README.md to include detailed setup instructions for the messaging gateway, emphasizing the importance of user allowlists for security.
- Improved the CLI setup wizard to prompt for allowed user IDs during configuration, enhancing user guidance and security awareness.
- Refined the gateway run script to support user authorization checks, ensuring only allowed users can interact with the bot.

											
										
										
											2026-02-03 10:46:23 -08:00
-												fix(gateway): prevent telegram photo burst interrupts

											
										
										
											2026-03-15 11:58:19 +05:30
+								        # PRIORITY handling when an agent is already running for this session.
 								        # Default behavior is to interrupt immediately so user text/stop messages
 								        # are handled with minimal latency.
 								        #
 								        # Special case: Telegram/photo bursts often arrive as multiple near-
 								        # simultaneous updates. Do NOT interrupt for photo-only follow-ups here;
 								        # let the adapter-level batching/queueing logic absorb them.
-												fix(gateway): make group session isolation configurable

default group and channel sessions to per-user isolation, allow opting back into shared room sessions via config.yaml, and document Discord gateway routing and session behavior.

											
										
										
											2026-03-16 00:22:23 -07:00
+								        _quick_key = self._session_key_for_source(source)
-												feat: enhance interrupt handling and container resource configuration

- Introduced a shared interrupt signaling mechanism to allow tools to check for user interrupts during long-running operations.
- Updated the AIAgent to handle interrupts more effectively, ensuring in-progress tool calls are canceled and multiple interrupt messages are combined into one prompt.
- Enhanced the CLI configuration to include container resource limits (CPU, memory, disk) and persistence options for Docker, Singularity, and Modal environments.
- Improved documentation to clarify interrupt behaviors and container resource settings, providing users with better guidance on configuration and usage.

											
										
										
											2026-02-23 02:11:33 -08:00
+								        if _quick_key in self._running_agents:
-												fix(gateway): make /status report live state and tokens (#1476)
											
										
										
											2026-03-15 19:18:58 -07:00
+								            if event.get_command() == "status":
 								                return await self._handle_status_command(event)
-												fix(gateway): recover from hung agents — /stop force-unlocks session (#3104)

When an agent thread hangs (truly blocked, never checks _interrupt_requested),
/stop now force-cleans _running_agents to unlock the session immediately.

Two changes:
- Early /stop intercept in the running-agent guard: bypasses normal command
  dispatch to force-interrupt and unlock the session. Follows the same pattern
  as the existing /new intercept.
- Sentinel /stop: force-cleans the sentinel instead of returning 'nothing to
  stop yet', so /stop during slow startup actually unlocks the session.

Follow-up improvements over original PR:
- Consolidated duplicate resolve_command imports into single early resolution
- Updated _handle_stop_command to also force-clean for consistency
- Removed 10-minute hard timeout on the executor (would kill legitimate
  long-running agent tasks; the /stop force-clean handles recovery)

Cherry-picked from Mibayy's PR #2498.

Co-authored-by: Mibayy <Mibayy@users.noreply.github.com>
											
										
										
											2026-03-25 18:46:50 -07:00
+								            # Resolve the command once for all early-intercept checks below.
 								            from hermes_cli.commands import resolve_command as _resolve_cmd_inner
 								            _evt_cmd = event.get_command()
 								            _cmd_def_inner = _resolve_cmd_inner(_evt_cmd) if _evt_cmd else None
 								            # /stop must hard-kill the session when an agent is running.
 								            # A soft interrupt (agent.interrupt()) doesn't help when the agent
 								            # is truly hung — the executor thread is blocked and never checks
 								            # _interrupt_requested.  Force-clean _running_agents so the session
 								            # is unlocked and subsequent messages are processed normally.
 								            if _cmd_def_inner and _cmd_def_inner.name == "stop":
 								                running_agent = self._running_agents.get(_quick_key)
 								                if running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
 								                    running_agent.interrupt("Stop requested")
 								                # Force-clean: remove the session lock regardless of agent state
 								                adapter = self.adapters.get(source.platform)
 								                if adapter and hasattr(adapter, 'get_pending_message'):
 								                    adapter.get_pending_message(_quick_key)  # consume and discard
 								                self._pending_messages.pop(_quick_key, None)
 								                if _quick_key in self._running_agents:
 								                    del self._running_agents[_quick_key]
 								                logger.info("HARD STOP for session %s — session lock released", _quick_key[:20])
 								                return "⚡ Force-stopped. The session is unlocked — you can send a new message."
-												fix(gateway): strip orphaned tool_results + let /reset bypass running agent (#2180)

Two fixes for Telegram/gateway-specific bugs:

1. Anthropic adapter: strip orphaned tool_result blocks (mirror of
   existing tool_use stripping). Context compression or session
   truncation can remove an assistant message containing a tool_use
   while leaving the subsequent tool_result intact. Anthropic rejects
   these with a 400: 'unexpected tool_use_id found in tool_result
   blocks'. The adapter now collects all tool_use IDs and filters out
   any tool_result blocks referencing IDs not in that set.

2. Gateway: /reset and /new now bypass the running-agent guard (like
   /status already does). Previously, sending /reset while an agent
   was running caused the raw text to be queued and later fed back as
   a user message with the same broken history — replaying the
   corrupted session instead of resetting it. Now the running agent is
   interrupted, pending messages are cleared, and the reset command
   dispatches immediately.

Tests updated: existing tests now include proper tool_use→tool_result
pairs; two new tests cover orphaned tool_result stripping.

Co-authored-by: Test <test@test.com>
											
										
										
											2026-03-20 08:39:49 -07:00
+								            # /reset and /new must bypass the running-agent guard so they
 								            # actually dispatch as commands instead of being queued as user
 								            # text (which would be fed back to the agent with the same
 								            # broken history — #2170).  Interrupt the agent first, then
 								            # clear the adapter's pending queue so the stale "/reset" text
 								            # doesn't get re-processed as a user message after the
 								            # interrupt completes.
 								            if _cmd_def_inner and _cmd_def_inner.name == "new":
 								                running_agent = self._running_agents.get(_quick_key)
 								                if running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
 								                    running_agent.interrupt("Session reset requested")
 								                # Clear any pending messages so the old text doesn't replay
 								                adapter = self.adapters.get(source.platform)
 								                if adapter and hasattr(adapter, 'get_pending_message'):
 								                    adapter.get_pending_message(_quick_key)  # consume and discard
 								                self._pending_messages.pop(_quick_key, None)
 								                # Clean up the running agent entry so the reset handler
 								                # doesn't think an agent is still active.
 								                if _quick_key in self._running_agents:
 								                    del self._running_agents[_quick_key]
 								                return await self._handle_reset_command(event)
-												feat: add /queue command to queue prompts without interrupting (#2191)

Adds /queue <prompt> (alias /q) that queues a message for the next
turn while the agent is busy, without interrupting the current run.

- CLI: /queue <prompt> puts it in _pending_input for the next turn
- Gateway: /queue <prompt> creates a pending MessageEvent on the
  adapter, picked up after the current agent run finishes
- Enter still interrupts as usual (no behavior change)
- /queue with no prompt shows usage
- /queue when agent is idle tells user to just type normally

Co-authored-by: Test <test@test.com>
											
										
										
											2026-03-20 09:44:27 -07:00
+								            # /queue <prompt> — queue without interrupting
 								            if event.get_command() in ("queue", "q"):
 								                queued_text = event.get_command_args().strip()
 								                if not queued_text:
 								                    return "Usage: /queue <prompt>"
 								                adapter = self.adapters.get(source.platform)
 								                if adapter:
 								                    from gateway.platforms.base import MessageEvent as _ME, MessageType as _MT
 								                    queued_event = _ME(
 								                        text=queued_text,
 								                        message_type=_MT.TEXT,
 								                        source=event.source,
 								                        message_id=event.message_id,
 								                    )
 								                    adapter._pending_messages[_quick_key] = queued_event
 								                return "Queued for the next turn."
-												fix(gateway): prevent telegram photo burst interrupts

											
										
										
											2026-03-15 11:58:19 +05:30
+								            if event.message_type == MessageType.PHOTO:
 								                logger.debug("PRIORITY photo follow-up for session %s — queueing without interrupt", _quick_key[:20])
 								                adapter = self.adapters.get(source.platform)
 								                if adapter:
 								                    # Reuse adapter queue semantics so photo bursts merge cleanly.
 								                    if _quick_key in adapter._pending_messages:
 								                        existing = adapter._pending_messages[_quick_key]
 								                        if getattr(existing, "message_type", None) == MessageType.PHOTO:
 								                            existing.media_urls.extend(event.media_urls)
 								                            existing.media_types.extend(event.media_types)
 								                            if event.text:
 								                                if not existing.text:
 								                                    existing.text = event.text
 								                                elif event.text not in existing.text:
 								                                    existing.text = f"{existing.text}\n\n{event.text}".strip()
 								                        else:
 								                            adapter._pending_messages[_quick_key] = event
 								                    else:
 								                        adapter._pending_messages[_quick_key] = event
 								                return None
-												fix(gateway): prevent concurrent agent runs for the same session

Place a sentinel in _running_agents immediately after the "already
running" guard check passes — before any await.  Without this, the
numerous await points between the guard (line 1324) and agent
registration (track_agent at line 4790) create a window where a
second message for the same session can bypass the guard and start
a duplicate agent, corrupting the transcript.

The await gap includes: hook emissions, vision enrichment (external
API call), audio transcription (external API call), session hygiene
compression, and the run_in_executor call itself.  For messages with
media attachments the window can be several seconds wide.

The sentinel is wrapped in try/finally so it is always cleaned up —
even if the handler raises or takes an early-return path.  When the
real AIAgent is created, track_agent() overwrites the sentinel with
the actual instance (preserving interrupt support).

Also handles the edge case where a message arrives while the sentinel
is set but no real agent exists yet: the message is queued via the
adapter's pending-message mechanism instead of attempting to call
interrupt() on the sentinel object.

											
										
										
											2026-03-19 22:32:37 +03:00
+								            running_agent = self._running_agents.get(_quick_key)
 								            if running_agent is _AGENT_PENDING_SENTINEL:
-												fix: harden sentinel guard for /stop during setup and shutdown

- /stop during sentinel returns helpful message instead of queuing
- Shutdown loop skips sentinel entries instead of catching AttributeError
- _handle_stop_command guards against sentinel (defensive)
- Added tests for both edge cases (7 total race guard tests)

											
										
										
											2026-03-19 18:26:09 -07:00
+								                # Agent is being set up but not ready yet.
 								                if event.get_command() == "stop":
-												fix(gateway): recover from hung agents — /stop force-unlocks session (#3104)

When an agent thread hangs (truly blocked, never checks _interrupt_requested),
/stop now force-cleans _running_agents to unlock the session immediately.

Two changes:
- Early /stop intercept in the running-agent guard: bypasses normal command
  dispatch to force-interrupt and unlock the session. Follows the same pattern
  as the existing /new intercept.
- Sentinel /stop: force-cleans the sentinel instead of returning 'nothing to
  stop yet', so /stop during slow startup actually unlocks the session.

Follow-up improvements over original PR:
- Consolidated duplicate resolve_command imports into single early resolution
- Updated _handle_stop_command to also force-clean for consistency
- Removed 10-minute hard timeout on the executor (would kill legitimate
  long-running agent tasks; the /stop force-clean handles recovery)

Cherry-picked from Mibayy's PR #2498.

Co-authored-by: Mibayy <Mibayy@users.noreply.github.com>
											
										
										
											2026-03-25 18:46:50 -07:00
+								                    # Force-clean the sentinel so the session is unlocked.
 								                    if _quick_key in self._running_agents:
 								                        del self._running_agents[_quick_key]
 								                    logger.info("HARD STOP (pending) for session %s — sentinel cleared", _quick_key[:20])
 								                    return "⚡ Force-stopped. The agent was still starting — session unlocked."
-												fix: harden sentinel guard for /stop during setup and shutdown

- /stop during sentinel returns helpful message instead of queuing
- Shutdown loop skips sentinel entries instead of catching AttributeError
- _handle_stop_command guards against sentinel (defensive)
- Added tests for both edge cases (7 total race guard tests)

											
										
										
											2026-03-19 18:26:09 -07:00
+								                # Queue the message so it will be picked up after the
 								                # agent starts.
-												fix(gateway): prevent concurrent agent runs for the same session

Place a sentinel in _running_agents immediately after the "already
running" guard check passes — before any await.  Without this, the
numerous await points between the guard (line 1324) and agent
registration (track_agent at line 4790) create a window where a
second message for the same session can bypass the guard and start
a duplicate agent, corrupting the transcript.

The await gap includes: hook emissions, vision enrichment (external
API call), audio transcription (external API call), session hygiene
compression, and the run_in_executor call itself.  For messages with
media attachments the window can be several seconds wide.

The sentinel is wrapped in try/finally so it is always cleaned up —
even if the handler raises or takes an early-return path.  When the
real AIAgent is created, track_agent() overwrites the sentinel with
the actual instance (preserving interrupt support).

Also handles the edge case where a message arrives while the sentinel
is set but no real agent exists yet: the message is queued via the
adapter's pending-message mechanism instead of attempting to call
interrupt() on the sentinel object.

											
										
										
											2026-03-19 22:32:37 +03:00
+								                adapter = self.adapters.get(source.platform)
 								                if adapter:
 								                    adapter._pending_messages[_quick_key] = event
 								                return None
-												feat: enhance interrupt handling and container resource configuration

- Introduced a shared interrupt signaling mechanism to allow tools to check for user interrupts during long-running operations.
- Updated the AIAgent to handle interrupts more effectively, ensuring in-progress tool calls are canceled and multiple interrupt messages are combined into one prompt.
- Enhanced the CLI configuration to include container resource limits (CPU, memory, disk) and persistence options for Docker, Singularity, and Modal environments.
- Improved documentation to clarify interrupt behaviors and container resource settings, providing users with better guidance on configuration and usage.

											
										
										
											2026-02-23 02:11:33 -08:00
+								            logger.debug("PRIORITY interrupt for session %s", _quick_key[:20])
 								            running_agent.interrupt(event.text)
 								            if _quick_key in self._pending_messages:
 								                self._pending_messages[_quick_key] += "\n" + event.text
 								            else:
 								                self._pending_messages[_quick_key] = event.text
 								            return None
-												fix(gateway): prevent concurrent agent runs for the same session

Place a sentinel in _running_agents immediately after the "already
running" guard check passes — before any await.  Without this, the
numerous await points between the guard (line 1324) and agent
registration (track_agent at line 4790) create a window where a
second message for the same session can bypass the guard and start
a duplicate agent, corrupting the transcript.

The await gap includes: hook emissions, vision enrichment (external
API call), audio transcription (external API call), session hygiene
compression, and the run_in_executor call itself.  For messages with
media attachments the window can be several seconds wide.

The sentinel is wrapped in try/finally so it is always cleaned up —
even if the handler raises or takes an early-return path.  When the
real AIAgent is created, track_agent() overwrites the sentinel with
the actual instance (preserving interrupt support).

Also handles the edge case where a message arrives while the sentinel
is set but no real agent exists yet: the message is queued via the
adapter's pending-message mechanism instead of attempting to call
interrupt() on the sentinel object.

											
										
										
											2026-03-19 22:32:37 +03:00
-												Implement interrupt handling for agent and CLI input and persistent prompt line at bottom of CLI :)

- Enhanced the AIAgent class to support interrupt requests, allowing for graceful interruption of ongoing tasks and processing of new messages.
- Updated the HermesCLI to manage user input in a persistent manner, enabling real-time interruption of the agent's conversation.
- Introduced a mechanism in the GatewayRunner to handle incoming messages while an agent is running, allowing for immediate response to user commands.
- Improved overall user experience by providing feedback during interruptions and ensuring that pending messages are processed correctly.

											
										
										
											2026-02-03 16:15:49 -08:00
+								        # Check for commands
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        command = event.get_command()
-												feat(hooks): introduce event hooks system for lifecycle management

Add a new hooks system allowing users to run custom code at key lifecycle points in the agent's operation. This includes support for events such as `gateway:startup`, `session:start`, `agent:step`, and more. Documentation for creating hooks and available events has been added to `README.md` and a new `hooks.md` file. Additionally, integrate step callbacks in the agent to facilitate hook execution during tool-calling iterations.

											
										
										
											2026-02-28 17:09:26 -08:00
-												refactor: centralize slash command registry (#1603)

* refactor: centralize slash command registry

Replace 7+ scattered command definition sites with a single
CommandDef registry in hermes_cli/commands.py. All downstream
consumers now derive from this registry:

- CLI process_command() resolves aliases via resolve_command()
- Gateway _known_commands uses GATEWAY_KNOWN_COMMANDS frozenset
- Gateway help text generated by gateway_help_lines()
- Telegram BotCommands generated by telegram_bot_commands()
- Slack subcommand map generated by slack_subcommand_map()

Adding a command or alias is now a one-line change to
COMMAND_REGISTRY instead of touching 6+ files.

Bugfixes included:
- Telegram now registers /rollback, /background (were missing)
- Slack now has /voice, /update, /reload-mcp (were missing)
- Gateway duplicate 'reasoning' dispatch (dead code) removed
- Gateway help text can no longer drift from CLI help

Backwards-compatible: COMMANDS and COMMANDS_BY_CATEGORY dicts are
rebuilt from the registry, so existing imports work unchanged.

* docs: update developer docs for centralized command registry

Update AGENTS.md with full 'Slash Command Registry' and 'Adding a
Slash Command' sections covering CommandDef fields, registry helpers,
and the one-line alias workflow.

Also update:
- CONTRIBUTING.md: commands.py description
- website/docs/reference/slash-commands.md: reference central registry
- docs/plans/centralize-command-registry.md: mark COMPLETED
- plans/checkpoint-rollback.md: reference new pattern
- hermes-agent-dev skill: architecture table

* chore: remove stale plan docs
											
										
										
											2026-03-16 23:21:03 -07:00
+								        # Emit command:* hook for any recognized slash command.
 								        # GATEWAY_KNOWN_COMMANDS is derived from the central COMMAND_REGISTRY
 								        # in hermes_cli/commands.py — no hardcoded set to maintain here.
 								        from hermes_cli.commands import GATEWAY_KNOWN_COMMANDS, resolve_command as _resolve_cmd
 								        if command and command in GATEWAY_KNOWN_COMMANDS:
-												feat(hooks): introduce event hooks system for lifecycle management

Add a new hooks system allowing users to run custom code at key lifecycle points in the agent's operation. This includes support for events such as `gateway:startup`, `session:start`, `agent:step`, and more. Documentation for creating hooks and available events has been added to `README.md` and a new `hooks.md` file. Additionally, integrate step callbacks in the agent to facilitate hook execution during tool-calling iterations.

											
										
										
											2026-02-28 17:09:26 -08:00
+								            await self.hooks.emit(f"command:{command}", {
 								                "platform": source.platform.value if source.platform else "",
 								                "user_id": source.user_id,
 								                "command": command,
 								                "args": event.get_command_args().strip(),
 								            })
-												refactor: centralize slash command registry (#1603)

* refactor: centralize slash command registry

Replace 7+ scattered command definition sites with a single
CommandDef registry in hermes_cli/commands.py. All downstream
consumers now derive from this registry:

- CLI process_command() resolves aliases via resolve_command()
- Gateway _known_commands uses GATEWAY_KNOWN_COMMANDS frozenset
- Gateway help text generated by gateway_help_lines()
- Telegram BotCommands generated by telegram_bot_commands()
- Slack subcommand map generated by slack_subcommand_map()

Adding a command or alias is now a one-line change to
COMMAND_REGISTRY instead of touching 6+ files.

Bugfixes included:
- Telegram now registers /rollback, /background (were missing)
- Slack now has /voice, /update, /reload-mcp (were missing)
- Gateway duplicate 'reasoning' dispatch (dead code) removed
- Gateway help text can no longer drift from CLI help

Backwards-compatible: COMMANDS and COMMANDS_BY_CATEGORY dicts are
rebuilt from the registry, so existing imports work unchanged.

* docs: update developer docs for centralized command registry

Update AGENTS.md with full 'Slash Command Registry' and 'Adding a
Slash Command' sections covering CommandDef fields, registry helpers,
and the one-line alias workflow.

Also update:
- CONTRIBUTING.md: commands.py description
- website/docs/reference/slash-commands.md: reference central registry
- docs/plans/centralize-command-registry.md: mark COMPLETED
- plans/checkpoint-rollback.md: reference new pattern
- hermes-agent-dev skill: architecture table

* chore: remove stale plan docs
											
										
										
											2026-03-16 23:21:03 -07:00
 								        # Resolve aliases to canonical name so dispatch only checks canonicals.
 								        _cmd_def = _resolve_cmd(command) if command else None
 								        canonical = _cmd_def.name if _cmd_def else command
 								        if canonical == "new":
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								            return await self._handle_reset_command(event)
-												refactor: centralize slash command registry (#1603)

* refactor: centralize slash command registry

Replace 7+ scattered command definition sites with a single
CommandDef registry in hermes_cli/commands.py. All downstream
consumers now derive from this registry:

- CLI process_command() resolves aliases via resolve_command()
- Gateway _known_commands uses GATEWAY_KNOWN_COMMANDS frozenset
- Gateway help text generated by gateway_help_lines()
- Telegram BotCommands generated by telegram_bot_commands()
- Slack subcommand map generated by slack_subcommand_map()

Adding a command or alias is now a one-line change to
COMMAND_REGISTRY instead of touching 6+ files.

Bugfixes included:
- Telegram now registers /rollback, /background (were missing)
- Slack now has /voice, /update, /reload-mcp (were missing)
- Gateway duplicate 'reasoning' dispatch (dead code) removed
- Gateway help text can no longer drift from CLI help

Backwards-compatible: COMMANDS and COMMANDS_BY_CATEGORY dicts are
rebuilt from the registry, so existing imports work unchanged.

* docs: update developer docs for centralized command registry

Update AGENTS.md with full 'Slash Command Registry' and 'Adding a
Slash Command' sections covering CommandDef fields, registry helpers,
and the one-line alias workflow.

Also update:
- CONTRIBUTING.md: commands.py description
- website/docs/reference/slash-commands.md: reference central registry
- docs/plans/centralize-command-registry.md: mark COMPLETED
- plans/checkpoint-rollback.md: reference new pattern
- hermes-agent-dev skill: architecture table

* chore: remove stale plan docs
											
										
										
											2026-03-16 23:21:03 -07:00
+								        if canonical == "help":
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
+								            return await self._handle_help_command(event)
-												refactor: centralize slash command registry (#1603)

* refactor: centralize slash command registry

Replace 7+ scattered command definition sites with a single
CommandDef registry in hermes_cli/commands.py. All downstream
consumers now derive from this registry:

- CLI process_command() resolves aliases via resolve_command()
- Gateway _known_commands uses GATEWAY_KNOWN_COMMANDS frozenset
- Gateway help text generated by gateway_help_lines()
- Telegram BotCommands generated by telegram_bot_commands()
- Slack subcommand map generated by slack_subcommand_map()

Adding a command or alias is now a one-line change to
COMMAND_REGISTRY instead of touching 6+ files.

Bugfixes included:
- Telegram now registers /rollback, /background (were missing)
- Slack now has /voice, /update, /reload-mcp (were missing)
- Gateway duplicate 'reasoning' dispatch (dead code) removed
- Gateway help text can no longer drift from CLI help

Backwards-compatible: COMMANDS and COMMANDS_BY_CATEGORY dicts are
rebuilt from the registry, so existing imports work unchanged.

* docs: update developer docs for centralized command registry

Update AGENTS.md with full 'Slash Command Registry' and 'Adding a
Slash Command' sections covering CommandDef fields, registry helpers,
and the one-line alias workflow.

Also update:
- CONTRIBUTING.md: commands.py description
- website/docs/reference/slash-commands.md: reference central registry
- docs/plans/centralize-command-registry.md: mark COMPLETED
- plans/checkpoint-rollback.md: reference new pattern
- hermes-agent-dev skill: architecture table

* chore: remove stale plan docs
											
										
										
											2026-03-16 23:21:03 -07:00
+								        if canonical == "status":
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								            return await self._handle_status_command(event)
-												refactor: centralize slash command registry (#1603)

* refactor: centralize slash command registry

Replace 7+ scattered command definition sites with a single
CommandDef registry in hermes_cli/commands.py. All downstream
consumers now derive from this registry:

- CLI process_command() resolves aliases via resolve_command()
- Gateway _known_commands uses GATEWAY_KNOWN_COMMANDS frozenset
- Gateway help text generated by gateway_help_lines()
- Telegram BotCommands generated by telegram_bot_commands()
- Slack subcommand map generated by slack_subcommand_map()

Adding a command or alias is now a one-line change to
COMMAND_REGISTRY instead of touching 6+ files.

Bugfixes included:
- Telegram now registers /rollback, /background (were missing)
- Slack now has /voice, /update, /reload-mcp (were missing)
- Gateway duplicate 'reasoning' dispatch (dead code) removed
- Gateway help text can no longer drift from CLI help

Backwards-compatible: COMMANDS and COMMANDS_BY_CATEGORY dicts are
rebuilt from the registry, so existing imports work unchanged.

* docs: update developer docs for centralized command registry

Update AGENTS.md with full 'Slash Command Registry' and 'Adding a
Slash Command' sections covering CommandDef fields, registry helpers,
and the one-line alias workflow.

Also update:
- CONTRIBUTING.md: commands.py description
- website/docs/reference/slash-commands.md: reference central registry
- docs/plans/centralize-command-registry.md: mark COMPLETED
- plans/checkpoint-rollback.md: reference new pattern
- hermes-agent-dev skill: architecture table

* chore: remove stale plan docs
											
										
										
											2026-03-16 23:21:03 -07:00
+								        if canonical == "stop":
-												Implement interrupt handling for agent and CLI input and persistent prompt line at bottom of CLI :)

- Enhanced the AIAgent class to support interrupt requests, allowing for graceful interruption of ongoing tasks and processing of new messages.
- Updated the HermesCLI to manage user input in a persistent manner, enabling real-time interruption of the agent's conversation.
- Introduced a mechanism in the GatewayRunner to handle incoming messages while an agent is running, allowing for immediate response to user commands.
- Improved overall user experience by providing feedback during interruptions and ensuring that pending messages are processed correctly.

											
										
										
											2026-02-03 16:15:49 -08:00
+								            return await self._handle_stop_command(event)
-												refactor: centralize slash command registry (#1603)

* refactor: centralize slash command registry

Replace 7+ scattered command definition sites with a single
CommandDef registry in hermes_cli/commands.py. All downstream
consumers now derive from this registry:

- CLI process_command() resolves aliases via resolve_command()
- Gateway _known_commands uses GATEWAY_KNOWN_COMMANDS frozenset
- Gateway help text generated by gateway_help_lines()
- Telegram BotCommands generated by telegram_bot_commands()
- Slack subcommand map generated by slack_subcommand_map()

Adding a command or alias is now a one-line change to
COMMAND_REGISTRY instead of touching 6+ files.

Bugfixes included:
- Telegram now registers /rollback, /background (were missing)
- Slack now has /voice, /update, /reload-mcp (were missing)
- Gateway duplicate 'reasoning' dispatch (dead code) removed
- Gateway help text can no longer drift from CLI help

Backwards-compatible: COMMANDS and COMMANDS_BY_CATEGORY dicts are
rebuilt from the registry, so existing imports work unchanged.

* docs: update developer docs for centralized command registry

Update AGENTS.md with full 'Slash Command Registry' and 'Adding a
Slash Command' sections covering CommandDef fields, registry helpers,
and the one-line alias workflow.

Also update:
- CONTRIBUTING.md: commands.py description
- website/docs/reference/slash-commands.md: reference central registry
- docs/plans/centralize-command-registry.md: mark COMPLETED
- plans/checkpoint-rollback.md: reference new pattern
- hermes-agent-dev skill: architecture table

* chore: remove stale plan docs
											
										
										
											2026-03-16 23:21:03 -07:00
+								        if canonical == "reasoning":
-												feat(gateway): add reasoning hot reload

Add a /reasoning command across gateway adapters so users can
inspect or change reasoning effort without editing config by hand.

Reload reasoning settings from config.yaml before each agent run,
including background tasks, so the next message picks up the new
value consistently.

											
										
										
											2026-03-11 22:12:11 +08:00
+								            return await self._handle_reasoning_command(event)
-												feat: config-gated /verbose command for messaging gateway (#3262)

* feat: config-gated /verbose command for messaging gateway

Add gateway_config_gate field to CommandDef, allowing cli_only commands
to be conditionally available in the gateway based on a config value.

- CommandDef gains gateway_config_gate: str | None — a config dotpath
  that, when truthy, overrides cli_only for gateway surfaces
- /verbose uses gateway_config_gate='display.tool_progress_command'
- Default is off (cli_only behavior preserved)
- When enabled, /verbose cycles tool_progress mode (off/new/all/verbose)
  in the gateway, saving to config.yaml — same cycle as the CLI
- Gateway helpers (help, telegram menus, slack mapping) dynamically
  check config to include/exclude config-gated commands
- GATEWAY_KNOWN_COMMANDS always includes config-gated commands so
  the gateway recognizes them and can respond appropriately
- Handles YAML 1.1 bool coercion (bare 'off' parses as False)
- 8 new tests for the config gate mechanism + gateway handler

* docs: document gateway_config_gate and /verbose messaging support

- AGENTS.md: add gateway_config_gate to CommandDef fields
- slash-commands.md: note /verbose can be enabled for messaging, update Notes
- configuration.md: add tool_progress_command to display section + usage note
- cli.md: cross-link to config docs for messaging enablement
- messaging/index.md: show tool_progress_command in config snippet
- plugins.md: add gateway_config_gate to register_command parameter table
											
										
										
											2026-03-26 14:41:04 -07:00
+								        if canonical == "verbose":
 								            return await self._handle_verbose_command(event)
-												refactor: centralize slash command registry (#1603)

* refactor: centralize slash command registry

Replace 7+ scattered command definition sites with a single
CommandDef registry in hermes_cli/commands.py. All downstream
consumers now derive from this registry:

- CLI process_command() resolves aliases via resolve_command()
- Gateway _known_commands uses GATEWAY_KNOWN_COMMANDS frozenset
- Gateway help text generated by gateway_help_lines()
- Telegram BotCommands generated by telegram_bot_commands()
- Slack subcommand map generated by slack_subcommand_map()

Adding a command or alias is now a one-line change to
COMMAND_REGISTRY instead of touching 6+ files.

Bugfixes included:
- Telegram now registers /rollback, /background (were missing)
- Slack now has /voice, /update, /reload-mcp (were missing)
- Gateway duplicate 'reasoning' dispatch (dead code) removed
- Gateway help text can no longer drift from CLI help

Backwards-compatible: COMMANDS and COMMANDS_BY_CATEGORY dicts are
rebuilt from the registry, so existing imports work unchanged.

* docs: update developer docs for centralized command registry

Update AGENTS.md with full 'Slash Command Registry' and 'Adding a
Slash Command' sections covering CommandDef fields, registry helpers,
and the one-line alias workflow.

Also update:
- CONTRIBUTING.md: commands.py description
- website/docs/reference/slash-commands.md: reference central registry
- docs/plans/centralize-command-registry.md: mark COMPLETED
- plans/checkpoint-rollback.md: reference new pattern
- hermes-agent-dev skill: architecture table

* chore: remove stale plan docs
											
										
										
											2026-03-16 23:21:03 -07:00
+								        if canonical == "provider":
-												feat: /provider command + fix gateway bugs + harden parse_model_input

/provider command (CLI + gateway):
  Shows all providers with auth status (✓/✗), aliases, and active marker.
  Users can now discover what provider names work with provider:model syntax.

Gateway bugs fixed:
  - Config was saved even when validation.persist=False (told user 'session
    only' but actually persisted the unvalidated model)
  - HERMES_INFERENCE_PROVIDER env var not set on provider switch, causing
    the switch to be silently overridden if that env var was already set

parse_model_input hardened:
  - Colon only treated as provider delimiter if left side is a recognized
    provider name or alias. 'anthropic/claude-3.5-sonnet:beta' now passes
    through as a model name instead of trying provider='anthropic/claude-3.5-sonnet'.
  - HTTP URLs, random colons no longer misinterpreted.

56 tests passing across model validation, CLI commands, and integration.

											
										
										
											2026-03-08 06:09:36 -07:00
+								            return await self._handle_provider_command(event)
-												refactor: centralize slash command registry (#1603)

* refactor: centralize slash command registry

Replace 7+ scattered command definition sites with a single
CommandDef registry in hermes_cli/commands.py. All downstream
consumers now derive from this registry:

- CLI process_command() resolves aliases via resolve_command()
- Gateway _known_commands uses GATEWAY_KNOWN_COMMANDS frozenset
- Gateway help text generated by gateway_help_lines()
- Telegram BotCommands generated by telegram_bot_commands()
- Slack subcommand map generated by slack_subcommand_map()

Adding a command or alias is now a one-line change to
COMMAND_REGISTRY instead of touching 6+ files.

Bugfixes included:
- Telegram now registers /rollback, /background (were missing)
- Slack now has /voice, /update, /reload-mcp (were missing)
- Gateway duplicate 'reasoning' dispatch (dead code) removed
- Gateway help text can no longer drift from CLI help

Backwards-compatible: COMMANDS and COMMANDS_BY_CATEGORY dicts are
rebuilt from the registry, so existing imports work unchanged.

* docs: update developer docs for centralized command registry

Update AGENTS.md with full 'Slash Command Registry' and 'Adding a
Slash Command' sections covering CommandDef fields, registry helpers,
and the one-line alias workflow.

Also update:
- CONTRIBUTING.md: commands.py description
- website/docs/reference/slash-commands.md: reference central registry
- docs/plans/centralize-command-registry.md: mark COMPLETED
- plans/checkpoint-rollback.md: reference new pattern
- hermes-agent-dev skill: architecture table

* chore: remove stale plan docs
											
										
										
											2026-03-16 23:21:03 -07:00
+								        if canonical == "personality":
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
+								            return await self._handle_personality_command(event)
-												feat: add /plan command (#1372)

* feat: add /plan command

* refactor: back /plan with bundled skill

* docs: document /plan skill
											
										
										
											2026-03-14 21:18:17 -07:00
-												refactor: centralize slash command registry (#1603)

* refactor: centralize slash command registry

Replace 7+ scattered command definition sites with a single
CommandDef registry in hermes_cli/commands.py. All downstream
consumers now derive from this registry:

- CLI process_command() resolves aliases via resolve_command()
- Gateway _known_commands uses GATEWAY_KNOWN_COMMANDS frozenset
- Gateway help text generated by gateway_help_lines()
- Telegram BotCommands generated by telegram_bot_commands()
- Slack subcommand map generated by slack_subcommand_map()

Adding a command or alias is now a one-line change to
COMMAND_REGISTRY instead of touching 6+ files.

Bugfixes included:
- Telegram now registers /rollback, /background (were missing)
- Slack now has /voice, /update, /reload-mcp (were missing)
- Gateway duplicate 'reasoning' dispatch (dead code) removed
- Gateway help text can no longer drift from CLI help

Backwards-compatible: COMMANDS and COMMANDS_BY_CATEGORY dicts are
rebuilt from the registry, so existing imports work unchanged.

* docs: update developer docs for centralized command registry

Update AGENTS.md with full 'Slash Command Registry' and 'Adding a
Slash Command' sections covering CommandDef fields, registry helpers,
and the one-line alias workflow.

Also update:
- CONTRIBUTING.md: commands.py description
- website/docs/reference/slash-commands.md: reference central registry
- docs/plans/centralize-command-registry.md: mark COMPLETED
- plans/checkpoint-rollback.md: reference new pattern
- hermes-agent-dev skill: architecture table

* chore: remove stale plan docs
											
										
										
											2026-03-16 23:21:03 -07:00
+								        if canonical == "plan":
-												feat: add /plan command (#1372)

* feat: add /plan command

* refactor: back /plan with bundled skill

* docs: document /plan skill
											
										
										
											2026-03-14 21:18:17 -07:00
+								            try:
 								                from agent.skill_commands import build_plan_path, build_skill_invocation_message
 								                user_instruction = event.get_command_args().strip()
 								                plan_path = build_plan_path(user_instruction)
 								                event.text = build_skill_invocation_message(
 								                    "/plan",
 								                    user_instruction,
 								                    task_id=_quick_key,
 								                    runtime_note=(
-												fix: save /plan output in workspace (#1381)
											
										
										
											2026-03-14 21:28:51 -07:00
+								                        "Save the markdown plan with write_file to this exact relative path "
 								                        f"inside the active workspace/backend cwd: {plan_path}"
-												feat: add /plan command (#1372)

* feat: add /plan command

* refactor: back /plan with bundled skill

* docs: document /plan skill
											
										
										
											2026-03-14 21:18:17 -07:00
+								                    ),
 								                )
 								                if not event.text:
 								                    return "Failed to load the bundled /plan skill."
-												refactor: centralize slash command registry (#1603)

* refactor: centralize slash command registry

Replace 7+ scattered command definition sites with a single
CommandDef registry in hermes_cli/commands.py. All downstream
consumers now derive from this registry:

- CLI process_command() resolves aliases via resolve_command()
- Gateway _known_commands uses GATEWAY_KNOWN_COMMANDS frozenset
- Gateway help text generated by gateway_help_lines()
- Telegram BotCommands generated by telegram_bot_commands()
- Slack subcommand map generated by slack_subcommand_map()

Adding a command or alias is now a one-line change to
COMMAND_REGISTRY instead of touching 6+ files.

Bugfixes included:
- Telegram now registers /rollback, /background (were missing)
- Slack now has /voice, /update, /reload-mcp (were missing)
- Gateway duplicate 'reasoning' dispatch (dead code) removed
- Gateway help text can no longer drift from CLI help

Backwards-compatible: COMMANDS and COMMANDS_BY_CATEGORY dicts are
rebuilt from the registry, so existing imports work unchanged.

* docs: update developer docs for centralized command registry

Update AGENTS.md with full 'Slash Command Registry' and 'Adding a
Slash Command' sections covering CommandDef fields, registry helpers,
and the one-line alias workflow.

Also update:
- CONTRIBUTING.md: commands.py description
- website/docs/reference/slash-commands.md: reference central registry
- docs/plans/centralize-command-registry.md: mark COMPLETED
- plans/checkpoint-rollback.md: reference new pattern
- hermes-agent-dev skill: architecture table

* chore: remove stale plan docs
											
										
										
											2026-03-16 23:21:03 -07:00
+								                canonical = None
-												feat: add /plan command (#1372)

* feat: add /plan command

* refactor: back /plan with bundled skill

* docs: document /plan skill
											
										
										
											2026-03-14 21:18:17 -07:00
+								            except Exception as e:
 								                logger.exception("Failed to prepare /plan command")
 								                return f"Failed to enter plan mode: {e}"
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
-												refactor: centralize slash command registry (#1603)

* refactor: centralize slash command registry

Replace 7+ scattered command definition sites with a single
CommandDef registry in hermes_cli/commands.py. All downstream
consumers now derive from this registry:

- CLI process_command() resolves aliases via resolve_command()
- Gateway _known_commands uses GATEWAY_KNOWN_COMMANDS frozenset
- Gateway help text generated by gateway_help_lines()
- Telegram BotCommands generated by telegram_bot_commands()
- Slack subcommand map generated by slack_subcommand_map()

Adding a command or alias is now a one-line change to
COMMAND_REGISTRY instead of touching 6+ files.

Bugfixes included:
- Telegram now registers /rollback, /background (were missing)
- Slack now has /voice, /update, /reload-mcp (were missing)
- Gateway duplicate 'reasoning' dispatch (dead code) removed
- Gateway help text can no longer drift from CLI help

Backwards-compatible: COMMANDS and COMMANDS_BY_CATEGORY dicts are
rebuilt from the registry, so existing imports work unchanged.

* docs: update developer docs for centralized command registry

Update AGENTS.md with full 'Slash Command Registry' and 'Adding a
Slash Command' sections covering CommandDef fields, registry helpers,
and the one-line alias workflow.

Also update:
- CONTRIBUTING.md: commands.py description
- website/docs/reference/slash-commands.md: reference central registry
- docs/plans/centralize-command-registry.md: mark COMPLETED
- plans/checkpoint-rollback.md: reference new pattern
- hermes-agent-dev skill: architecture table

* chore: remove stale plan docs
											
										
										
											2026-03-16 23:21:03 -07:00
+								        if canonical == "retry":
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
+								            return await self._handle_retry_command(event)
-												refactor: centralize slash command registry (#1603)

* refactor: centralize slash command registry

Replace 7+ scattered command definition sites with a single
CommandDef registry in hermes_cli/commands.py. All downstream
consumers now derive from this registry:

- CLI process_command() resolves aliases via resolve_command()
- Gateway _known_commands uses GATEWAY_KNOWN_COMMANDS frozenset
- Gateway help text generated by gateway_help_lines()
- Telegram BotCommands generated by telegram_bot_commands()
- Slack subcommand map generated by slack_subcommand_map()

Adding a command or alias is now a one-line change to
COMMAND_REGISTRY instead of touching 6+ files.

Bugfixes included:
- Telegram now registers /rollback, /background (were missing)
- Slack now has /voice, /update, /reload-mcp (were missing)
- Gateway duplicate 'reasoning' dispatch (dead code) removed
- Gateway help text can no longer drift from CLI help

Backwards-compatible: COMMANDS and COMMANDS_BY_CATEGORY dicts are
rebuilt from the registry, so existing imports work unchanged.

* docs: update developer docs for centralized command registry

Update AGENTS.md with full 'Slash Command Registry' and 'Adding a
Slash Command' sections covering CommandDef fields, registry helpers,
and the one-line alias workflow.

Also update:
- CONTRIBUTING.md: commands.py description
- website/docs/reference/slash-commands.md: reference central registry
- docs/plans/centralize-command-registry.md: mark COMPLETED
- plans/checkpoint-rollback.md: reference new pattern
- hermes-agent-dev skill: architecture table

* chore: remove stale plan docs
											
										
										
											2026-03-16 23:21:03 -07:00
+								        if canonical == "undo":
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
+								            return await self._handle_undo_command(event)
-												refactor: centralize slash command registry (#1603)

* refactor: centralize slash command registry

Replace 7+ scattered command definition sites with a single
CommandDef registry in hermes_cli/commands.py. All downstream
consumers now derive from this registry:

- CLI process_command() resolves aliases via resolve_command()
- Gateway _known_commands uses GATEWAY_KNOWN_COMMANDS frozenset
- Gateway help text generated by gateway_help_lines()
- Telegram BotCommands generated by telegram_bot_commands()
- Slack subcommand map generated by slack_subcommand_map()

Adding a command or alias is now a one-line change to
COMMAND_REGISTRY instead of touching 6+ files.

Bugfixes included:
- Telegram now registers /rollback, /background (were missing)
- Slack now has /voice, /update, /reload-mcp (were missing)
- Gateway duplicate 'reasoning' dispatch (dead code) removed
- Gateway help text can no longer drift from CLI help

Backwards-compatible: COMMANDS and COMMANDS_BY_CATEGORY dicts are
rebuilt from the registry, so existing imports work unchanged.

* docs: update developer docs for centralized command registry

Update AGENTS.md with full 'Slash Command Registry' and 'Adding a
Slash Command' sections covering CommandDef fields, registry helpers,
and the one-line alias workflow.

Also update:
- CONTRIBUTING.md: commands.py description
- website/docs/reference/slash-commands.md: reference central registry
- docs/plans/centralize-command-registry.md: mark COMPLETED
- plans/checkpoint-rollback.md: reference new pattern
- hermes-agent-dev skill: architecture table

* chore: remove stale plan docs
											
										
										
											2026-03-16 23:21:03 -07:00
+								        if canonical == "sethome":
-												feat: implement channel directory and message mirroring for cross-platform communication

- Introduced a new channel directory to cache reachable channels/contacts for messaging platforms, enhancing the send_message tool's ability to resolve human-friendly names to numeric IDs.
- Added functionality to mirror sent messages into the target's session transcript, providing context for cross-platform message delivery.
- Updated the send_message tool to support listing available targets and improved error handling for channel resolution.
- Enhanced the gateway to build and refresh the channel directory during startup and at regular intervals, ensuring up-to-date channel information.

											
										
										
											2026-02-22 20:44:15 -08:00
+								            return await self._handle_set_home_command(event)
-												feat(gateway): add /compress and /usage commands for conversation management

Implemented the /compress command to allow users to manually compress conversation context, ensuring sufficient history is available before execution. The /usage command was also added to display token usage statistics for the current session, including prompt and completion tokens. Updated command documentation to reflect these new features.

											
										
										
											2026-03-01 00:25:44 -08:00
-												refactor: centralize slash command registry (#1603)

* refactor: centralize slash command registry

Replace 7+ scattered command definition sites with a single
CommandDef registry in hermes_cli/commands.py. All downstream
consumers now derive from this registry:

- CLI process_command() resolves aliases via resolve_command()
- Gateway _known_commands uses GATEWAY_KNOWN_COMMANDS frozenset
- Gateway help text generated by gateway_help_lines()
- Telegram BotCommands generated by telegram_bot_commands()
- Slack subcommand map generated by slack_subcommand_map()

Adding a command or alias is now a one-line change to
COMMAND_REGISTRY instead of touching 6+ files.

Bugfixes included:
- Telegram now registers /rollback, /background (were missing)
- Slack now has /voice, /update, /reload-mcp (were missing)
- Gateway duplicate 'reasoning' dispatch (dead code) removed
- Gateway help text can no longer drift from CLI help

Backwards-compatible: COMMANDS and COMMANDS_BY_CATEGORY dicts are
rebuilt from the registry, so existing imports work unchanged.

* docs: update developer docs for centralized command registry

Update AGENTS.md with full 'Slash Command Registry' and 'Adding a
Slash Command' sections covering CommandDef fields, registry helpers,
and the one-line alias workflow.

Also update:
- CONTRIBUTING.md: commands.py description
- website/docs/reference/slash-commands.md: reference central registry
- docs/plans/centralize-command-registry.md: mark COMPLETED
- plans/checkpoint-rollback.md: reference new pattern
- hermes-agent-dev skill: architecture table

* chore: remove stale plan docs
											
										
										
											2026-03-16 23:21:03 -07:00
+								        if canonical == "compress":
-												feat(gateway): add /compress and /usage commands for conversation management

Implemented the /compress command to allow users to manually compress conversation context, ensuring sufficient history is available before execution. The /usage command was also added to display token usage statistics for the current session, including prompt and completion tokens. Updated command documentation to reflect these new features.

											
										
										
											2026-03-01 00:25:44 -08:00
+								            return await self._handle_compress_command(event)
-												refactor: centralize slash command registry (#1603)

* refactor: centralize slash command registry

Replace 7+ scattered command definition sites with a single
CommandDef registry in hermes_cli/commands.py. All downstream
consumers now derive from this registry:

- CLI process_command() resolves aliases via resolve_command()
- Gateway _known_commands uses GATEWAY_KNOWN_COMMANDS frozenset
- Gateway help text generated by gateway_help_lines()
- Telegram BotCommands generated by telegram_bot_commands()
- Slack subcommand map generated by slack_subcommand_map()

Adding a command or alias is now a one-line change to
COMMAND_REGISTRY instead of touching 6+ files.

Bugfixes included:
- Telegram now registers /rollback, /background (were missing)
- Slack now has /voice, /update, /reload-mcp (were missing)
- Gateway duplicate 'reasoning' dispatch (dead code) removed
- Gateway help text can no longer drift from CLI help

Backwards-compatible: COMMANDS and COMMANDS_BY_CATEGORY dicts are
rebuilt from the registry, so existing imports work unchanged.

* docs: update developer docs for centralized command registry

Update AGENTS.md with full 'Slash Command Registry' and 'Adding a
Slash Command' sections covering CommandDef fields, registry helpers,
and the one-line alias workflow.

Also update:
- CONTRIBUTING.md: commands.py description
- website/docs/reference/slash-commands.md: reference central registry
- docs/plans/centralize-command-registry.md: mark COMPLETED
- plans/checkpoint-rollback.md: reference new pattern
- hermes-agent-dev skill: architecture table

* chore: remove stale plan docs
											
										
										
											2026-03-16 23:21:03 -07:00
+								        if canonical == "usage":
-												feat(gateway): add /compress and /usage commands for conversation management

Implemented the /compress command to allow users to manually compress conversation context, ensuring sufficient history is available before execution. The /usage command was also added to display token usage statistics for the current session, including prompt and completion tokens. Updated command documentation to reflect these new features.

											
										
										
											2026-03-01 00:25:44 -08:00
+								            return await self._handle_usage_command(event)
-												feat(mcp): banner integration, /reload-mcp command, resources & prompts

Banner integration:
- MCP Servers section in CLI startup banner between Tools and Skills
- Shows each server with transport type, tool count, connection status
- Failed servers shown in red; section hidden when no MCP configured
- Summary line includes MCP server count
- Removed raw print() calls from discovery (banner handles display)

/reload-mcp command:
- New slash command in both CLI and gateway
- Disconnects all MCP servers, re-reads config.yaml, reconnects
- Reports what changed (added/removed/reconnected servers)
- Allows adding/removing MCP servers without restarting

Resources & Prompts support:
- 4 utility tools registered per server: list_resources, read_resource,
  list_prompts, get_prompt
- Exposes MCP Resources (data sources) and Prompts (templates) as tools
- Proper parameter schemas (uri for read_resource, name for get_prompt)
- Handles text and binary resource content
- 23 new tests covering schemas, handlers, and registration

Test coverage: 74 MCP tests total, 1186 tests pass overall.

											
										
										
											2026-03-02 19:15:59 -08:00
-												refactor: centralize slash command registry (#1603)

* refactor: centralize slash command registry

Replace 7+ scattered command definition sites with a single
CommandDef registry in hermes_cli/commands.py. All downstream
consumers now derive from this registry:

- CLI process_command() resolves aliases via resolve_command()
- Gateway _known_commands uses GATEWAY_KNOWN_COMMANDS frozenset
- Gateway help text generated by gateway_help_lines()
- Telegram BotCommands generated by telegram_bot_commands()
- Slack subcommand map generated by slack_subcommand_map()

Adding a command or alias is now a one-line change to
COMMAND_REGISTRY instead of touching 6+ files.

Bugfixes included:
- Telegram now registers /rollback, /background (were missing)
- Slack now has /voice, /update, /reload-mcp (were missing)
- Gateway duplicate 'reasoning' dispatch (dead code) removed
- Gateway help text can no longer drift from CLI help

Backwards-compatible: COMMANDS and COMMANDS_BY_CATEGORY dicts are
rebuilt from the registry, so existing imports work unchanged.

* docs: update developer docs for centralized command registry

Update AGENTS.md with full 'Slash Command Registry' and 'Adding a
Slash Command' sections covering CommandDef fields, registry helpers,
and the one-line alias workflow.

Also update:
- CONTRIBUTING.md: commands.py description
- website/docs/reference/slash-commands.md: reference central registry
- docs/plans/centralize-command-registry.md: mark COMPLETED
- plans/checkpoint-rollback.md: reference new pattern
- hermes-agent-dev skill: architecture table

* chore: remove stale plan docs
											
										
										
											2026-03-16 23:21:03 -07:00
+								        if canonical == "insights":
-												feat: add /insights command with usage analytics and cost estimation

Inspired by Claude Code's /insights, adapted for Hermes Agent's multi-platform
architecture. Analyzes session history from state.db to produce comprehensive
usage insights.

Features:
- Overview stats: sessions, messages, tokens, estimated cost, active time
- Model breakdown: per-model sessions, tokens, and cost estimation
- Platform breakdown: CLI vs Telegram vs Discord etc. (unique to Hermes)
- Tool usage ranking: most-used tools with percentages
- Activity patterns: day-of-week chart, peak hours, streaks
- Notable sessions: longest, most messages, most tokens, most tool calls
- Cost estimation: real pricing data for 25+ models (OpenAI, Anthropic,
  DeepSeek, Google, Meta) with fuzzy model name matching
- Configurable time window: --days flag (default 30)
- Source filtering: --source flag to filter by platform

Three entry points:
- /insights slash command in CLI (supports --days and --source flags)
- /insights slash command in gateway (compact markdown format)
- hermes insights CLI subcommand (standalone)

Includes 56 tests covering pricing helpers, format helpers, empty DB,
populated DB with multi-platform data, filtering, formatting, and edge cases.

											
										
										
											2026-03-06 14:04:59 -08:00
+								            return await self._handle_insights_command(event)
-												refactor: centralize slash command registry (#1603)

* refactor: centralize slash command registry

Replace 7+ scattered command definition sites with a single
CommandDef registry in hermes_cli/commands.py. All downstream
consumers now derive from this registry:

- CLI process_command() resolves aliases via resolve_command()
- Gateway _known_commands uses GATEWAY_KNOWN_COMMANDS frozenset
- Gateway help text generated by gateway_help_lines()
- Telegram BotCommands generated by telegram_bot_commands()
- Slack subcommand map generated by slack_subcommand_map()

Adding a command or alias is now a one-line change to
COMMAND_REGISTRY instead of touching 6+ files.

Bugfixes included:
- Telegram now registers /rollback, /background (were missing)
- Slack now has /voice, /update, /reload-mcp (were missing)
- Gateway duplicate 'reasoning' dispatch (dead code) removed
- Gateway help text can no longer drift from CLI help

Backwards-compatible: COMMANDS and COMMANDS_BY_CATEGORY dicts are
rebuilt from the registry, so existing imports work unchanged.

* docs: update developer docs for centralized command registry

Update AGENTS.md with full 'Slash Command Registry' and 'Adding a
Slash Command' sections covering CommandDef fields, registry helpers,
and the one-line alias workflow.

Also update:
- CONTRIBUTING.md: commands.py description
- website/docs/reference/slash-commands.md: reference central registry
- docs/plans/centralize-command-registry.md: mark COMPLETED
- plans/checkpoint-rollback.md: reference new pattern
- hermes-agent-dev skill: architecture table

* chore: remove stale plan docs
											
										
										
											2026-03-16 23:21:03 -07:00
+								        if canonical == "reload-mcp":
-												feat(mcp): banner integration, /reload-mcp command, resources & prompts

Banner integration:
- MCP Servers section in CLI startup banner between Tools and Skills
- Shows each server with transport type, tool count, connection status
- Failed servers shown in red; section hidden when no MCP configured
- Summary line includes MCP server count
- Removed raw print() calls from discovery (banner handles display)

/reload-mcp command:
- New slash command in both CLI and gateway
- Disconnects all MCP servers, re-reads config.yaml, reconnects
- Reports what changed (added/removed/reconnected servers)
- Allows adding/removing MCP servers without restarting

Resources & Prompts support:
- 4 utility tools registered per server: list_resources, read_resource,
  list_prompts, get_prompt
- Exposes MCP Resources (data sources) and Prompts (templates) as tools
- Proper parameter schemas (uri for read_resource, name for get_prompt)
- Handles text and binary resource content
- 23 new tests covering schemas, handlers, and registration

Test coverage: 74 MCP tests total, 1186 tests pass overall.

											
										
										
											2026-03-02 19:15:59 -08:00
+								            return await self._handle_reload_mcp_command(event)
-												feat: add /update slash command for gateway platforms

Adds a /update command to Telegram, Discord, and other gateway platforms
that runs `hermes update` to pull the latest code, update dependencies,
sync skills, and restart the gateway.

Implementation:
- Spawns `hermes update` in a separate systemd scope (systemd-run --user
  --scope) so the process survives the gateway restart that hermes update
  triggers at the end. Falls back to nohup if systemd-run is unavailable.
- Writes a marker file (.update_pending.json) with the originating
  platform and chat_id before spawning the update.
- On gateway startup, _send_update_notification() checks for the marker,
  reads the captured update output, sends the results back to the user,
  and cleans up.

Also:
- Registers /update as a Discord slash command
- Updates README.md, docs/messaging.md, docs/slash-commands.md
- Adds 18 tests covering handler, notification, and edge cases

											
										
										
											2026-03-05 01:20:58 -08:00
-												fix(gateway): replace bare text approval with /approve and /deny commands (#2002)

The gateway approval system previously intercepted bare 'yes'/'no' text
from the user's next message to approve/deny dangerous commands. This was
fragile and dangerous — if the agent asked a clarify question and the user
said 'yes' to answer it, the gateway would execute the pending dangerous
command instead. (Fixes #1888)

Changes:
- Remove bare text matching ('yes', 'y', 'approve', 'ok', etc.) from
  _handle_message approval check
- Add /approve and /deny as gateway-only slash commands in the command
  registry
- /approve supports scoping: /approve (one-time), /approve session,
  /approve always (permanent)
- Add 5-minute timeout for stale approvals
- Gateway appends structured instructions to the agent response when a
  dangerous command is pending, telling the user exactly how to respond
- 9 tests covering approve, deny, timeout, scoping, and verification
  that bare 'yes' no longer triggers execution

Credit to @solo386 and @FlyByNight69420 for identifying and reporting
this security issue in PR #1971 and issue #1888.

Co-authored-by: Test <test@test.com>
											
										
										
											2026-03-18 16:58:20 -07:00
+								        if canonical == "approve":
 								            return await self._handle_approve_command(event)
 								        if canonical == "deny":
 								            return await self._handle_deny_command(event)
-												refactor: centralize slash command registry (#1603)

* refactor: centralize slash command registry

Replace 7+ scattered command definition sites with a single
CommandDef registry in hermes_cli/commands.py. All downstream
consumers now derive from this registry:

- CLI process_command() resolves aliases via resolve_command()
- Gateway _known_commands uses GATEWAY_KNOWN_COMMANDS frozenset
- Gateway help text generated by gateway_help_lines()
- Telegram BotCommands generated by telegram_bot_commands()
- Slack subcommand map generated by slack_subcommand_map()

Adding a command or alias is now a one-line change to
COMMAND_REGISTRY instead of touching 6+ files.

Bugfixes included:
- Telegram now registers /rollback, /background (were missing)
- Slack now has /voice, /update, /reload-mcp (were missing)
- Gateway duplicate 'reasoning' dispatch (dead code) removed
- Gateway help text can no longer drift from CLI help

Backwards-compatible: COMMANDS and COMMANDS_BY_CATEGORY dicts are
rebuilt from the registry, so existing imports work unchanged.

* docs: update developer docs for centralized command registry

Update AGENTS.md with full 'Slash Command Registry' and 'Adding a
Slash Command' sections covering CommandDef fields, registry helpers,
and the one-line alias workflow.

Also update:
- CONTRIBUTING.md: commands.py description
- website/docs/reference/slash-commands.md: reference central registry
- docs/plans/centralize-command-registry.md: mark COMPLETED
- plans/checkpoint-rollback.md: reference new pattern
- hermes-agent-dev skill: architecture table

* chore: remove stale plan docs
											
										
										
											2026-03-16 23:21:03 -07:00
+								        if canonical == "update":
-												feat: add /update slash command for gateway platforms

Adds a /update command to Telegram, Discord, and other gateway platforms
that runs `hermes update` to pull the latest code, update dependencies,
sync skills, and restart the gateway.

Implementation:
- Spawns `hermes update` in a separate systemd scope (systemd-run --user
  --scope) so the process survives the gateway restart that hermes update
  triggers at the end. Falls back to nohup if systemd-run is unavailable.
- Writes a marker file (.update_pending.json) with the originating
  platform and chat_id before spawning the update.
- On gateway startup, _send_update_notification() checks for the marker,
  reads the captured update output, sends the results back to the user,
  and cleans up.

Also:
- Registers /update as a Discord slash command
- Updates README.md, docs/messaging.md, docs/slash-commands.md
- Adds 18 tests covering handler, notification, and edge cases

											
										
										
											2026-03-05 01:20:58 -08:00
+								            return await self._handle_update_command(event)
-												fix: harden session title system + add /title to gateway

- Empty string titles normalized to None (prevents uncaught IntegrityError
  when two sessions both get empty-string titles via the unique index)
- Escape SQL LIKE wildcards (%, _) in resolve_session_by_title and
  get_next_title_in_lineage to prevent false matches on titles like
  'test_project' matching 'testXproject #2'
- Optimize list_sessions_rich from N+2 queries to a single query with
  correlated subqueries (preview + last_active computed in SQL)
- Add /title slash command to gateway (Telegram, Discord, Slack, WhatsApp)
  with set and show modes, uniqueness conflict handling
- Add /title to gateway /help text and _known_commands
- 12 new tests: empty string normalization, multi-empty-title safety,
  SQL wildcard edge cases, gateway /title set/show/conflict/cross-platform

											
										
										
											2026-03-08 15:48:09 -07:00
-												refactor: centralize slash command registry (#1603)

* refactor: centralize slash command registry

Replace 7+ scattered command definition sites with a single
CommandDef registry in hermes_cli/commands.py. All downstream
consumers now derive from this registry:

- CLI process_command() resolves aliases via resolve_command()
- Gateway _known_commands uses GATEWAY_KNOWN_COMMANDS frozenset
- Gateway help text generated by gateway_help_lines()
- Telegram BotCommands generated by telegram_bot_commands()
- Slack subcommand map generated by slack_subcommand_map()

Adding a command or alias is now a one-line change to
COMMAND_REGISTRY instead of touching 6+ files.

Bugfixes included:
- Telegram now registers /rollback, /background (were missing)
- Slack now has /voice, /update, /reload-mcp (were missing)
- Gateway duplicate 'reasoning' dispatch (dead code) removed
- Gateway help text can no longer drift from CLI help

Backwards-compatible: COMMANDS and COMMANDS_BY_CATEGORY dicts are
rebuilt from the registry, so existing imports work unchanged.

* docs: update developer docs for centralized command registry

Update AGENTS.md with full 'Slash Command Registry' and 'Adding a
Slash Command' sections covering CommandDef fields, registry helpers,
and the one-line alias workflow.

Also update:
- CONTRIBUTING.md: commands.py description
- website/docs/reference/slash-commands.md: reference central registry
- docs/plans/centralize-command-registry.md: mark COMPLETED
- plans/checkpoint-rollback.md: reference new pattern
- hermes-agent-dev skill: architecture table

* chore: remove stale plan docs
											
										
										
											2026-03-16 23:21:03 -07:00
+								        if canonical == "title":
-												fix: harden session title system + add /title to gateway

- Empty string titles normalized to None (prevents uncaught IntegrityError
  when two sessions both get empty-string titles via the unique index)
- Escape SQL LIKE wildcards (%, _) in resolve_session_by_title and
  get_next_title_in_lineage to prevent false matches on titles like
  'test_project' matching 'testXproject #2'
- Optimize list_sessions_rich from N+2 queries to a single query with
  correlated subqueries (preview + last_active computed in SQL)
- Add /title slash command to gateway (Telegram, Discord, Slack, WhatsApp)
  with set and show modes, uniqueness conflict handling
- Add /title to gateway /help text and _known_commands
- 12 new tests: empty string normalization, multi-empty-title safety,
  SQL wildcard edge cases, gateway /title set/show/conflict/cross-platform

											
										
										
											2026-03-08 15:48:09 -07:00
+								            return await self._handle_title_command(event)
-												feat: add /resume command to gateway for switching to named sessions

Messaging users can now switch back to previously-named sessions:
- /resume My Project  — resolves the title (with auto-lineage) and
  restores that session's conversation history
- /resume (no args)   — lists recent titled sessions to choose from

Adds SessionStore.switch_session() which ends the current session and
points the session entry at the target session ID so the old transcript
is loaded on the next message. Running agents are cleared on switch.

Completes the session naming feature from PR #720 for gateway users.

8 new tests covering: name resolution, lineage auto-latest, already-on-
session check, nonexistent names, agent cleanup, no-DB fallback, and
listing titled sessions.

											
										
										
											2026-03-08 17:09:00 -07:00
-												refactor: centralize slash command registry (#1603)

* refactor: centralize slash command registry

Replace 7+ scattered command definition sites with a single
CommandDef registry in hermes_cli/commands.py. All downstream
consumers now derive from this registry:

- CLI process_command() resolves aliases via resolve_command()
- Gateway _known_commands uses GATEWAY_KNOWN_COMMANDS frozenset
- Gateway help text generated by gateway_help_lines()
- Telegram BotCommands generated by telegram_bot_commands()
- Slack subcommand map generated by slack_subcommand_map()

Adding a command or alias is now a one-line change to
COMMAND_REGISTRY instead of touching 6+ files.

Bugfixes included:
- Telegram now registers /rollback, /background (were missing)
- Slack now has /voice, /update, /reload-mcp (were missing)
- Gateway duplicate 'reasoning' dispatch (dead code) removed
- Gateway help text can no longer drift from CLI help

Backwards-compatible: COMMANDS and COMMANDS_BY_CATEGORY dicts are
rebuilt from the registry, so existing imports work unchanged.

* docs: update developer docs for centralized command registry

Update AGENTS.md with full 'Slash Command Registry' and 'Adding a
Slash Command' sections covering CommandDef fields, registry helpers,
and the one-line alias workflow.

Also update:
- CONTRIBUTING.md: commands.py description
- website/docs/reference/slash-commands.md: reference central registry
- docs/plans/centralize-command-registry.md: mark COMPLETED
- plans/checkpoint-rollback.md: reference new pattern
- hermes-agent-dev skill: architecture table

* chore: remove stale plan docs
											
										
										
											2026-03-16 23:21:03 -07:00
+								        if canonical == "resume":
-												feat: add /resume command to gateway for switching to named sessions

Messaging users can now switch back to previously-named sessions:
- /resume My Project  — resolves the title (with auto-lineage) and
  restores that session's conversation history
- /resume (no args)   — lists recent titled sessions to choose from

Adds SessionStore.switch_session() which ends the current session and
points the session entry at the target session ID so the old transcript
is loaded on the next message. Running agents are cleared on switch.

Completes the session naming feature from PR #720 for gateway users.

8 new tests covering: name resolution, lineage auto-latest, already-on-
session check, nonexistent names, agent cleanup, no-DB fallback, and
listing titled sessions.

											
										
										
											2026-03-08 17:09:00 -07:00
+								            return await self._handle_resume_command(event)
-												feat: filesystem checkpoints and /rollback command

Automatic filesystem snapshots before destructive file operations,
with user-facing rollback.  Inspired by PR #559 (by @alireza78a).

Architecture:
- Shadow git repos at ~/.hermes/checkpoints/{hash}/ via GIT_DIR
- CheckpointManager: take/list/restore, turn-scoped dedup, pruning
- Transparent — the LLM never sees it, no tool schema, no tokens
- Once per turn — only first write_file/patch triggers a snapshot

Integration:
- Config: checkpoints.enabled + checkpoints.max_snapshots
- CLI flag: hermes --checkpoints
- Trigger: run_agent.py _execute_tool_calls() before write_file/patch
- /rollback slash command in CLI + gateway (list, restore by number)
- Pre-rollback snapshot auto-created on restore (undo the undo)

Safety:
- Never blocks file operations — all errors silently logged
- Skips root dir, home dir, dirs >50K files
- Disables gracefully when git not installed
- Shadow repo completely isolated from project git

Tests: 35 new tests, all passing (2798 total suite)
Docs: feature page, config reference, CLI commands reference

											
										
										
											2026-03-10 00:49:15 -07:00
-												refactor: centralize slash command registry (#1603)

* refactor: centralize slash command registry

Replace 7+ scattered command definition sites with a single
CommandDef registry in hermes_cli/commands.py. All downstream
consumers now derive from this registry:

- CLI process_command() resolves aliases via resolve_command()
- Gateway _known_commands uses GATEWAY_KNOWN_COMMANDS frozenset
- Gateway help text generated by gateway_help_lines()
- Telegram BotCommands generated by telegram_bot_commands()
- Slack subcommand map generated by slack_subcommand_map()

Adding a command or alias is now a one-line change to
COMMAND_REGISTRY instead of touching 6+ files.

Bugfixes included:
- Telegram now registers /rollback, /background (were missing)
- Slack now has /voice, /update, /reload-mcp (were missing)
- Gateway duplicate 'reasoning' dispatch (dead code) removed
- Gateway help text can no longer drift from CLI help

Backwards-compatible: COMMANDS and COMMANDS_BY_CATEGORY dicts are
rebuilt from the registry, so existing imports work unchanged.

* docs: update developer docs for centralized command registry

Update AGENTS.md with full 'Slash Command Registry' and 'Adding a
Slash Command' sections covering CommandDef fields, registry helpers,
and the one-line alias workflow.

Also update:
- CONTRIBUTING.md: commands.py description
- website/docs/reference/slash-commands.md: reference central registry
- docs/plans/centralize-command-registry.md: mark COMPLETED
- plans/checkpoint-rollback.md: reference new pattern
- hermes-agent-dev skill: architecture table

* chore: remove stale plan docs
											
										
										
											2026-03-16 23:21:03 -07:00
+								        if canonical == "rollback":
-												feat: filesystem checkpoints and /rollback command

Automatic filesystem snapshots before destructive file operations,
with user-facing rollback.  Inspired by PR #559 (by @alireza78a).

Architecture:
- Shadow git repos at ~/.hermes/checkpoints/{hash}/ via GIT_DIR
- CheckpointManager: take/list/restore, turn-scoped dedup, pruning
- Transparent — the LLM never sees it, no tool schema, no tokens
- Once per turn — only first write_file/patch triggers a snapshot

Integration:
- Config: checkpoints.enabled + checkpoints.max_snapshots
- CLI flag: hermes --checkpoints
- Trigger: run_agent.py _execute_tool_calls() before write_file/patch
- /rollback slash command in CLI + gateway (list, restore by number)
- Pre-rollback snapshot auto-created on restore (undo the undo)

Safety:
- Never blocks file operations — all errors silently logged
- Skips root dir, home dir, dirs >50K files
- Disables gracefully when git not installed
- Shadow repo completely isolated from project git

Tests: 35 new tests, all passing (2798 total suite)
Docs: feature page, config reference, CLI commands reference

											
										
										
											2026-03-10 00:49:15 -07:00
+								            return await self._handle_rollback_command(event)
-												feat: add /background command to gateway and CLI commands registry

Add /background <prompt> to the gateway, allowing users on Telegram,
Discord, Slack, etc. to fire off a prompt in a separate agent session.
The result is delivered back to the same chat when done, without
modifying the active conversation history.

Implementation:
- _handle_background_command: validates input, spawns asyncio task
- _run_background_task: creates AIAgent in executor thread, delivers
  result (text, images, media files) back via the platform adapter
- Inherits model, toolsets, provider routing from gateway config
- Error handling with user-visible failure messages

Also adds /background to hermes_cli/commands.py registry so it
appears in /help and autocomplete.

Tests: 15 new tests covering usage, task creation, uniqueness,
multi-platform, error paths, and help/autocomplete integration.

											
										
										
											2026-03-11 02:41:36 -07:00
-												refactor: centralize slash command registry (#1603)

* refactor: centralize slash command registry

Replace 7+ scattered command definition sites with a single
CommandDef registry in hermes_cli/commands.py. All downstream
consumers now derive from this registry:

- CLI process_command() resolves aliases via resolve_command()
- Gateway _known_commands uses GATEWAY_KNOWN_COMMANDS frozenset
- Gateway help text generated by gateway_help_lines()
- Telegram BotCommands generated by telegram_bot_commands()
- Slack subcommand map generated by slack_subcommand_map()

Adding a command or alias is now a one-line change to
COMMAND_REGISTRY instead of touching 6+ files.

Bugfixes included:
- Telegram now registers /rollback, /background (were missing)
- Slack now has /voice, /update, /reload-mcp (were missing)
- Gateway duplicate 'reasoning' dispatch (dead code) removed
- Gateway help text can no longer drift from CLI help

Backwards-compatible: COMMANDS and COMMANDS_BY_CATEGORY dicts are
rebuilt from the registry, so existing imports work unchanged.

* docs: update developer docs for centralized command registry

Update AGENTS.md with full 'Slash Command Registry' and 'Adding a
Slash Command' sections covering CommandDef fields, registry helpers,
and the one-line alias workflow.

Also update:
- CONTRIBUTING.md: commands.py description
- website/docs/reference/slash-commands.md: reference central registry
- docs/plans/centralize-command-registry.md: mark COMPLETED
- plans/checkpoint-rollback.md: reference new pattern
- hermes-agent-dev skill: architecture table

* chore: remove stale plan docs
											
										
										
											2026-03-16 23:21:03 -07:00
+								        if canonical == "background":
-												feat: add /background command to gateway and CLI commands registry

Add /background <prompt> to the gateway, allowing users on Telegram,
Discord, Slack, etc. to fire off a prompt in a separate agent session.
The result is delivered back to the same chat when done, without
modifying the active conversation history.

Implementation:
- _handle_background_command: validates input, spawns asyncio task
- _run_background_task: creates AIAgent in executor thread, delivers
  result (text, images, media files) back via the platform adapter
- Inherits model, toolsets, provider routing from gateway config
- Error handling with user-visible failure messages

Also adds /background to hermes_cli/commands.py registry so it
appears in /help and autocomplete.

Tests: 15 new tests covering usage, task creation, uniqueness,
multi-platform, error paths, and help/autocomplete integration.

											
										
										
											2026-03-11 02:41:36 -07:00
+								            return await self._handle_background_command(event)
-												fix: /reasoning command — add gateway support, fix display, persist settings (#1031)

* fix: /reasoning command output ordering, display, and inline think extraction

Three issues with the /reasoning command:

1. Output interleaving: The command echo used print() while feedback
   used _cprint(), causing them to render out-of-order under
   prompt_toolkit's patch_stdout. Changed echo to use _cprint() so
   all output renders through the same path in correct order.

2. Reasoning display not working: /reasoning show toggled a flag
   but reasoning never appeared for models that embed thinking in
   inline <think> blocks rather than structured API fields. Added
   fallback extraction in _build_assistant_message to capture
   <think> block content as reasoning when no structured reasoning
   fields (reasoning, reasoning_content, reasoning_details) are
   present. This feeds into both the reasoning callback (during
   tool loops) and the post-response reasoning box display.

3. Feedback clarity: Added checkmarks to confirm actions, persisted
   show/hide to config (was session-only before), and aligned the
   status display for readability.

Tests: 7 new tests for inline think block extraction (41 total).

* feat: add /reasoning command to gateway (Telegram/Discord/etc)

The /reasoning command only existed in the CLI — messaging platforms
had no way to view or change reasoning settings. This adds:

1. /reasoning command handler in the gateway:
   - No args: shows current effort level and display state
   - /reasoning <level>: sets reasoning effort (none/low/medium/high/xhigh)
   - /reasoning show|hide: toggles reasoning display in responses
   - All changes saved to config.yaml immediately

2. Reasoning display in gateway responses:
   - When show_reasoning is enabled, prepends a 'Reasoning' block
     with the model's last_reasoning content before the response
   - Collapses long reasoning (>15 lines) to keep messages readable
   - Uses last_reasoning from run_conversation result dict

3. Plumbing:
   - Added _show_reasoning attribute loaded from config at startup
   - Propagated last_reasoning through _run_agent return dict
   - Added /reasoning to help text and known_commands set
   - Uses getattr for _show_reasoning to handle test stubs
											
										
										
											2026-03-12 05:38:19 -07:00
-												refactor: centralize slash command registry (#1603)

* refactor: centralize slash command registry

Replace 7+ scattered command definition sites with a single
CommandDef registry in hermes_cli/commands.py. All downstream
consumers now derive from this registry:

- CLI process_command() resolves aliases via resolve_command()
- Gateway _known_commands uses GATEWAY_KNOWN_COMMANDS frozenset
- Gateway help text generated by gateway_help_lines()
- Telegram BotCommands generated by telegram_bot_commands()
- Slack subcommand map generated by slack_subcommand_map()

Adding a command or alias is now a one-line change to
COMMAND_REGISTRY instead of touching 6+ files.

Bugfixes included:
- Telegram now registers /rollback, /background (were missing)
- Slack now has /voice, /update, /reload-mcp (were missing)
- Gateway duplicate 'reasoning' dispatch (dead code) removed
- Gateway help text can no longer drift from CLI help

Backwards-compatible: COMMANDS and COMMANDS_BY_CATEGORY dicts are
rebuilt from the registry, so existing imports work unchanged.

* docs: update developer docs for centralized command registry

Update AGENTS.md with full 'Slash Command Registry' and 'Adding a
Slash Command' sections covering CommandDef fields, registry helpers,
and the one-line alias workflow.

Also update:
- CONTRIBUTING.md: commands.py description
- website/docs/reference/slash-commands.md: reference central registry
- docs/plans/centralize-command-registry.md: mark COMPLETED
- plans/checkpoint-rollback.md: reference new pattern
- hermes-agent-dev skill: architecture table

* chore: remove stale plan docs
											
										
										
											2026-03-16 23:21:03 -07:00
+								        if canonical == "voice":
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
+								            return await self._handle_voice_command(event)
-												feat(cli,gateway): add user-defined quick commands that bypass agent loop

Implements config-driven quick commands for both CLI and gateway that
execute locally without invoking the LLM.

Config example (~/.hermes/config.yaml):
  quick_commands:
    limits:
      type: exec
      command: /home/user/.local/bin/hermes-limits
    dn:
      type: exec
      command: echo daily-note

Changes:
- hermes_cli/config.py: add quick_commands: {} default
- cli.py: check quick_commands before skill commands in process_command()
- gateway/run.py: check quick_commands before skill commands in _handle_message()
- tests/test_quick_commands.py: 11 tests covering exec, timeout, unsupported type, missing command, priority over skills

Closes #744

											
										
										
											2026-03-09 07:38:06 +03:00
+								        # User-defined quick commands (bypass agent loop, no LLM call)
 								        if command:
-												fix(gateway): support quick commands from GatewayConfig

											
										
										
											2026-03-11 12:46:56 -07:00
+								            if isinstance(self.config, dict):
 								                quick_commands = self.config.get("quick_commands", {}) or {}
 								            else:
 								                quick_commands = getattr(self.config, "quick_commands", {}) or {}
-												fix(gateway): bridge quick commands into GatewayConfig runtime

Follow-up on salvaged PR #975.

Bridge quick_commands from config.yaml into load_gateway_config(),
normalize non-dict quick command config at runtime, and add coverage
for GatewayConfig round-trips plus config.yaml bridging. This makes the
GatewayConfig quick-command fix complete for the real user-facing config
path implicated by issue #973.

											
										
										
											2026-03-14 03:57:25 -07:00
+								            if not isinstance(quick_commands, dict):
 								                quick_commands = {}
-												feat(cli,gateway): add user-defined quick commands that bypass agent loop

Implements config-driven quick commands for both CLI and gateway that
execute locally without invoking the LLM.

Config example (~/.hermes/config.yaml):
  quick_commands:
    limits:
      type: exec
      command: /home/user/.local/bin/hermes-limits
    dn:
      type: exec
      command: echo daily-note

Changes:
- hermes_cli/config.py: add quick_commands: {} default
- cli.py: check quick_commands before skill commands in process_command()
- gateway/run.py: check quick_commands before skill commands in _handle_message()
- tests/test_quick_commands.py: 11 tests covering exec, timeout, unsupported type, missing command, priority over skills

Closes #744

											
										
										
											2026-03-09 07:38:06 +03:00
+								            if command in quick_commands:
 								                qcmd = quick_commands[command]
 								                if qcmd.get("type") == "exec":
 								                    exec_cmd = qcmd.get("command", "")
 								                    if exec_cmd:
 								                        try:
 								                            proc = await asyncio.create_subprocess_shell(
 								                                exec_cmd,
 								                                stdout=asyncio.subprocess.PIPE,
 								                                stderr=asyncio.subprocess.PIPE,
 								                            )
 								                            stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=30)
 								                            output = (stdout or stderr).decode().strip()
 								                            return output if output else "Command returned no output."
 								                        except asyncio.TimeoutError:
 								                            return "Quick command timed out (30s)."
 								                        except Exception as e:
 								                            return f"Quick command error: {e}"
 								                    else:
 								                        return f"Quick command '/{command}' has no command defined."
-												fix: thread safety for concurrent subagent delegation (#1672)

* fix: thread safety for concurrent subagent delegation

Four thread-safety fixes that prevent crashes and data races when
running multiple subagents concurrently via delegate_task:

1. Remove redirect_stdout/stderr from delegate_tool — mutating global
   sys.stdout races with the spinner thread when multiple children start
   concurrently, causing segfaults. Children already run with
   quiet_mode=True so the redirect was redundant.

2. Split _run_single_child into _build_child_agent (main thread) +
   _run_single_child (worker thread). AIAgent construction creates
   httpx/SSL clients which are not thread-safe to initialize
   concurrently.

3. Add threading.Lock to SessionDB — subagents share the parent's
   SessionDB and call create_session/append_message from worker threads
   with no synchronization.

4. Add _active_children_lock to AIAgent — interrupt() iterates
   _active_children while worker threads append/remove children.

5. Add _client_cache_lock to auxiliary_client — multiple subagent
   threads may resolve clients concurrently via call_llm().

Based on PR #1471 by peteromallet.

* feat: Honcho base_url override via config.yaml + quick command alias type

Two features salvaged from PR #1576:

1. Honcho base_url override: allows pointing Hermes at a remote
   self-hosted Honcho deployment via config.yaml:

     honcho:
       base_url: "http://192.168.x.x:8000"

   When set, this overrides the Honcho SDK's environment mapping
   (production/local), enabling LAN/VPN Honcho deployments without
   requiring the server to live on localhost. Uses config.yaml instead
   of env var (HONCHO_URL) per project convention.

2. Quick command alias type: adds a new 'alias' quick command type
   that rewrites to another slash command before normal dispatch:

     quick_commands:
       sc:
         type: alias
         target: /context

   Supports both CLI and gateway. Arguments are forwarded to the
   target command.

Based on PR #1576 by redhelix.

---------

Co-authored-by: peteromallet <peteromallet@users.noreply.github.com>
Co-authored-by: redhelix <redhelix@users.noreply.github.com>
											
										
										
											2026-03-17 02:53:33 -07:00
+								                elif qcmd.get("type") == "alias":
 								                    target = qcmd.get("target", "").strip()
 								                    if target:
 								                        target = target if target.startswith("/") else f"/{target}"
 								                        target_command = target.lstrip("/")
 								                        user_args = event.get_command_args().strip()
 								                        event.text = f"{target} {user_args}".strip()
 								                        command = target_command
 								                        # Fall through to normal command dispatch below
 								                    else:
 								                        return f"Quick command '/{command}' has no target defined."
-												feat(cli,gateway): add user-defined quick commands that bypass agent loop

Implements config-driven quick commands for both CLI and gateway that
execute locally without invoking the LLM.

Config example (~/.hermes/config.yaml):
  quick_commands:
    limits:
      type: exec
      command: /home/user/.local/bin/hermes-limits
    dn:
      type: exec
      command: echo daily-note

Changes:
- hermes_cli/config.py: add quick_commands: {} default
- cli.py: check quick_commands before skill commands in process_command()
- gateway/run.py: check quick_commands before skill commands in _handle_message()
- tests/test_quick_commands.py: 11 tests covering exec, timeout, unsupported type, missing command, priority over skills

Closes #744

											
										
										
											2026-03-09 07:38:06 +03:00
+								                else:
-												fix: thread safety for concurrent subagent delegation (#1672)

* fix: thread safety for concurrent subagent delegation

Four thread-safety fixes that prevent crashes and data races when
running multiple subagents concurrently via delegate_task:

1. Remove redirect_stdout/stderr from delegate_tool — mutating global
   sys.stdout races with the spinner thread when multiple children start
   concurrently, causing segfaults. Children already run with
   quiet_mode=True so the redirect was redundant.

2. Split _run_single_child into _build_child_agent (main thread) +
   _run_single_child (worker thread). AIAgent construction creates
   httpx/SSL clients which are not thread-safe to initialize
   concurrently.

3. Add threading.Lock to SessionDB — subagents share the parent's
   SessionDB and call create_session/append_message from worker threads
   with no synchronization.

4. Add _active_children_lock to AIAgent — interrupt() iterates
   _active_children while worker threads append/remove children.

5. Add _client_cache_lock to auxiliary_client — multiple subagent
   threads may resolve clients concurrently via call_llm().

Based on PR #1471 by peteromallet.

* feat: Honcho base_url override via config.yaml + quick command alias type

Two features salvaged from PR #1576:

1. Honcho base_url override: allows pointing Hermes at a remote
   self-hosted Honcho deployment via config.yaml:

     honcho:
       base_url: "http://192.168.x.x:8000"

   When set, this overrides the Honcho SDK's environment mapping
   (production/local), enabling LAN/VPN Honcho deployments without
   requiring the server to live on localhost. Uses config.yaml instead
   of env var (HONCHO_URL) per project convention.

2. Quick command alias type: adds a new 'alias' quick command type
   that rewrites to another slash command before normal dispatch:

     quick_commands:
       sc:
         type: alias
         target: /context

   Supports both CLI and gateway. Arguments are forwarded to the
   target command.

Based on PR #1576 by redhelix.

---------

Co-authored-by: peteromallet <peteromallet@users.noreply.github.com>
Co-authored-by: redhelix <redhelix@users.noreply.github.com>
											
										
										
											2026-03-17 02:53:33 -07:00
+								                    return f"Quick command '/{command}' has unsupported type (supported: 'exec', 'alias')."
-												feat(cli,gateway): add user-defined quick commands that bypass agent loop

Implements config-driven quick commands for both CLI and gateway that
execute locally without invoking the LLM.

Config example (~/.hermes/config.yaml):
  quick_commands:
    limits:
      type: exec
      command: /home/user/.local/bin/hermes-limits
    dn:
      type: exec
      command: echo daily-note

Changes:
- hermes_cli/config.py: add quick_commands: {} default
- cli.py: check quick_commands before skill commands in process_command()
- gateway/run.py: check quick_commands before skill commands in _handle_message()
- tests/test_quick_commands.py: 11 tests covering exec, timeout, unsupported type, missing command, priority over skills

Closes #744

											
										
										
											2026-03-09 07:38:06 +03:00
-												feat(plugins): add slash command registration for plugins (#2359)

Plugins can now register slash commands via ctx.register_command()
in their register() function. Commands automatically appear in:
- /help and COMMANDS_BY_CATEGORY (under 'Plugins' category)
- Tab autocomplete in CLI
- Telegram bot menu
- Slack subcommand mapping
- Gateway dispatch

Handler signature: handler(args: str) -> str | None
Async handlers are supported in gateway context.

Changes:
- commands.py: add register_plugin_command() and rebuild_lookups()
- plugins.py: add register_command() to PluginContext, track in
  PluginManager._plugin_commands and LoadedPlugin.commands_registered
- cli.py: dispatch plugin commands in process_command()
- gateway/run.py: dispatch plugin commands before skill commands
- tests: 5 new tests for registration, help, tracking, handler, gateway
- docs: update plugins feature page and build guide
											
										
										
											2026-03-21 16:00:30 -07:00
+								        # Plugin-registered slash commands
 								        if command:
 								            try:
 								                from hermes_cli.plugins import get_plugin_command_handler
 								                plugin_handler = get_plugin_command_handler(command)
 								                if plugin_handler:
 								                    user_args = event.get_command_args().strip()
 								                    import asyncio as _aio
 								                    result = plugin_handler(user_args)
 								                    if _aio.iscoroutine(result):
 								                        result = await result
 								                    return str(result) if result else None
 								            except Exception as e:
 								                logger.debug("Plugin command dispatch failed (non-fatal): %s", e)
-												feat(skills): implement dynamic skill slash commands for CLI and gateway

											
										
										
											2026-02-28 11:18:50 -08:00
+								        # Skill slash commands: /skill-name loads the skill and sends to agent
 								        if command:
 								            try:
 								                from agent.skill_commands import get_skill_commands, build_skill_invocation_message
 								                skill_cmds = get_skill_commands()
 								                cmd_key = f"/{command}"
 								                if cmd_key in skill_cmds:
 								                    user_instruction = event.get_command_args().strip()
-												feat: secure skill env setup on load (core #688)

When a skill declares required_environment_variables in its YAML
frontmatter, missing env vars trigger a secure TUI prompt (identical
to the sudo password widget) when the skill is loaded. Secrets flow
directly to ~/.hermes/.env, never entering LLM context.

Key changes:
- New required_environment_variables frontmatter field for skills
- Secure TUI widget (masked input, 120s timeout)
- Gateway safety: messaging platforms show local setup guidance
- Legacy prerequisites.env_vars normalized into new format
- Remote backend handling: conservative setup_needed=True
- Env var name validation, file permissions hardened to 0o600
- Redact patterns extended for secret-related JSON fields
- 12 existing skills updated with prerequisites declarations
- ~48 new tests covering skip, timeout, gateway, remote backends
- Dynamic panel widget sizing (fixes hardcoded width from original PR)

Cherry-picked from PR #723 by kshitijk4poor, rebased onto current main
with conflict resolution.

Fixes #688

Co-authored-by: kshitijk4poor <kshitijk4poor@users.noreply.github.com>

											
										
										
											2026-03-13 03:14:04 -07:00
+								                    msg = build_skill_invocation_message(
-												fix(gateway): use correct variable for skill slash command task_id

Line 1482 referenced 'session_key' which is not defined until line 1519,
causing a NameError on every skill slash command invocation in the gateway
(e.g. /deploy, /plan-with-skill). The try/except silently swallowed the
error, making all user-defined skill slash commands silently fail.

The correct variable is '_quick_key', defined at line 1292 (same variable
used by the /plan handler on line 1379).

											
										
										
											2026-03-17 03:42:15 -07:00
+								                        cmd_key, user_instruction, task_id=_quick_key
-												feat: secure skill env setup on load (core #688)

When a skill declares required_environment_variables in its YAML
frontmatter, missing env vars trigger a secure TUI prompt (identical
to the sudo password widget) when the skill is loaded. Secrets flow
directly to ~/.hermes/.env, never entering LLM context.

Key changes:
- New required_environment_variables frontmatter field for skills
- Secure TUI widget (masked input, 120s timeout)
- Gateway safety: messaging platforms show local setup guidance
- Legacy prerequisites.env_vars normalized into new format
- Remote backend handling: conservative setup_needed=True
- Env var name validation, file permissions hardened to 0o600
- Redact patterns extended for secret-related JSON fields
- 12 existing skills updated with prerequisites declarations
- ~48 new tests covering skip, timeout, gateway, remote backends
- Dynamic panel widget sizing (fixes hardcoded width from original PR)

Cherry-picked from PR #723 by kshitijk4poor, rebased onto current main
with conflict resolution.

Fixes #688

Co-authored-by: kshitijk4poor <kshitijk4poor@users.noreply.github.com>

											
										
										
											2026-03-13 03:14:04 -07:00
+								                    )
-												feat(skills): implement dynamic skill slash commands for CLI and gateway

											
										
										
											2026-02-28 11:18:50 -08:00
+								                    if msg:
 								                        event.text = msg
 								                        # Fall through to normal message processing with skill content
 								            except Exception as e:
 								                logger.debug("Skill command check failed (non-fatal): %s", e)
-												feat: implement channel directory and message mirroring for cross-platform communication

- Introduced a new channel directory to cache reachable channels/contacts for messaging platforms, enhancing the send_message tool's ability to resolve human-friendly names to numeric IDs.
- Added functionality to mirror sent messages into the target's session transcript, providing context for cross-platform message delivery.
- Updated the send_message tool to support listing available targets and improved error handling for channel resolution.
- Enhanced the gateway to build and refresh the channel directory during startup and at regular intervals, ensuring up-to-date channel information.

											
										
										
											2026-02-22 20:44:15 -08:00
-												fix(gateway): replace bare text approval with /approve and /deny commands (#2002)

The gateway approval system previously intercepted bare 'yes'/'no' text
from the user's next message to approve/deny dangerous commands. This was
fragile and dangerous — if the agent asked a clarify question and the user
said 'yes' to answer it, the gateway would execute the pending dangerous
command instead. (Fixes #1888)

Changes:
- Remove bare text matching ('yes', 'y', 'approve', 'ok', etc.) from
  _handle_message approval check
- Add /approve and /deny as gateway-only slash commands in the command
  registry
- /approve supports scoping: /approve (one-time), /approve session,
  /approve always (permanent)
- Add 5-minute timeout for stale approvals
- Gateway appends structured instructions to the agent response when a
  dangerous command is pending, telling the user exactly how to respond
- 9 tests covering approve, deny, timeout, scoping, and verification
  that bare 'yes' no longer triggers execution

Credit to @solo386 and @FlyByNight69420 for identifying and reporting
this security issue in PR #1971 and issue #1888.

Co-authored-by: Test <test@test.com>
											
										
										
											2026-03-18 16:58:20 -07:00
+								        # Pending exec approvals are handled by /approve and /deny commands above.
 								        # No bare text matching — "yes" in normal conversation must not trigger
 								        # execution of a dangerous command.
-												fix(gateway): prevent concurrent agent runs for the same session

Place a sentinel in _running_agents immediately after the "already
running" guard check passes — before any await.  Without this, the
numerous await points between the guard (line 1324) and agent
registration (track_agent at line 4790) create a window where a
second message for the same session can bypass the guard and start
a duplicate agent, corrupting the transcript.

The await gap includes: hook emissions, vision enrichment (external
API call), audio transcription (external API call), session hygiene
compression, and the run_in_executor call itself.  For messages with
media attachments the window can be several seconds wide.

The sentinel is wrapped in try/finally so it is always cleaned up —
even if the handler raises or takes an early-return path.  When the
real AIAgent is created, track_agent() overwrites the sentinel with
the actual instance (preserving interrupt support).

Also handles the edge case where a message arrives while the sentinel
is set but no real agent exists yet: the message is queued via the
adapter's pending-message mechanism instead of attempting to call
interrupt() on the sentinel object.

											
										
										
											2026-03-19 22:32:37 +03:00
 								        # ── Claim this session before any await ───────────────────────
 								        # Between here and _run_agent registering the real AIAgent, there
 								        # are numerous await points (hooks, vision enrichment, STT,
 								        # session hygiene compression).  Without this sentinel a second
 								        # message arriving during any of those yields would pass the
 								        # "already running" guard and spin up a duplicate agent for the
 								        # same session — corrupting the transcript.
 								        self._running_agents[_quick_key] = _AGENT_PENDING_SENTINEL
 								        try:
 								            return await self._handle_message_with_agent(event, source, _quick_key)
 								        finally:
 								            # If _run_agent replaced the sentinel with a real agent and
 								            # then cleaned it up, this is a no-op.  If we exited early
 								            # (exception, command fallthrough, etc.) the sentinel must
 								            # not linger or the session would be permanently locked out.
 								            if self._running_agents.get(_quick_key) is _AGENT_PENDING_SENTINEL:
 								                del self._running_agents[_quick_key]
 								    async def _handle_message_with_agent(self, event, source, _quick_key: str):
 								        """Inner handler that runs under the _running_agents sentinel guard."""
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        # Get or create session
 								        session_entry = self.session_store.get_or_create_session(source)
-												Implement interrupt handling for agent and CLI input and persistent prompt line at bottom of CLI :)

- Enhanced the AIAgent class to support interrupt requests, allowing for graceful interruption of ongoing tasks and processing of new messages.
- Updated the HermesCLI to manage user input in a persistent manner, enabling real-time interruption of the agent's conversation.
- Introduced a mechanism in the GatewayRunner to handle incoming messages while an agent is running, allowing for immediate response to user commands.
- Improved overall user experience by providing feedback during interruptions and ensuring that pending messages are processed correctly.

											
										
										
											2026-02-03 16:15:49 -08:00
+								        session_key = session_entry.session_key
-												feat(hooks): introduce event hooks system for lifecycle management

Add a new hooks system allowing users to run custom code at key lifecycle points in the agent's operation. This includes support for events such as `gateway:startup`, `session:start`, `agent:step`, and more. Documentation for creating hooks and available events has been added to `README.md` and a new `hooks.md` file. Additionally, integrate step callbacks in the agent to facilitate hook execution during tool-calling iterations.

											
										
										
											2026-02-28 17:09:26 -08:00
+								        # Emit session:start for new or auto-reset sessions
 								        _is_new_session = (
 								            session_entry.created_at == session_entry.updated_at
 								            or getattr(session_entry, "was_auto_reset", False)
 								        )
 								        if _is_new_session:
 								            await self.hooks.emit("session:start", {
 								                "platform": source.platform.value if source.platform else "",
 								                "user_id": source.user_id,
 								                "session_id": session_entry.session_id,
 								                "session_key": session_key,
 								            })
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        # Build session context
 								        context = build_session_context(source, self.config, session_entry)
 								        # Set environment variables for tools
 								        self._set_session_env(context)
-												feat(privacy): redact PII from LLM context when privacy.redact_pii is enabled

Add privacy.redact_pii config option (boolean, default false). When
enabled, the gateway redacts personally identifiable information from
the system prompt before sending it to the LLM provider:

- Phone numbers (user IDs on WhatsApp/Signal) → hashed to user_<sha256>
- User IDs → hashed to user_<sha256>
- Chat IDs → numeric portion hashed, platform prefix preserved
- Home channel IDs → hashed
- Names/usernames → NOT affected (user-chosen, publicly visible)

Hashes are deterministic (same user → same hash) so the model can
still distinguish users in group chats. Routing and delivery use
the original values internally — redaction only affects LLM context.

Inspired by OpenClaw PR #47959.

											
										
										
											2026-03-16 05:48:45 -07:00
+								        # Read privacy.redact_pii from config (re-read per message)
 								        _redact_pii = False
 								        try:
-												fix(gateway): add missing yaml import for PII redaction config read

The privacy.redact_pii config reader on line 1546 used bare 'yaml'
which is not in scope — yaml is imported as '_yaml' at module level
(line 93) and as '_y' in other methods. The NameError was silently
caught by the try/except, so PII redaction never activated even when
configured.

Add a local 'import yaml as _pii_yaml' consistent with the pattern
used elsewhere in the file.

											
										
										
											2026-03-17 03:48:15 -07:00
+								            import yaml as _pii_yaml
-												feat(privacy): redact PII from LLM context when privacy.redact_pii is enabled

Add privacy.redact_pii config option (boolean, default false). When
enabled, the gateway redacts personally identifiable information from
the system prompt before sending it to the LLM provider:

- Phone numbers (user IDs on WhatsApp/Signal) → hashed to user_<sha256>
- User IDs → hashed to user_<sha256>
- Chat IDs → numeric portion hashed, platform prefix preserved
- Home channel IDs → hashed
- Names/usernames → NOT affected (user-chosen, publicly visible)

Hashes are deterministic (same user → same hash) so the model can
still distinguish users in group chats. Routing and delivery use
the original values internally — redaction only affects LLM context.

Inspired by OpenClaw PR #47959.

											
										
										
											2026-03-16 05:48:45 -07:00
+								            with open(_config_path, encoding="utf-8") as _pf:
-												fix(gateway): add missing yaml import for PII redaction config read

The privacy.redact_pii config reader on line 1546 used bare 'yaml'
which is not in scope — yaml is imported as '_yaml' at module level
(line 93) and as '_y' in other methods. The NameError was silently
caught by the try/except, so PII redaction never activated even when
configured.

Add a local 'import yaml as _pii_yaml' consistent with the pattern
used elsewhere in the file.

											
										
										
											2026-03-17 03:48:15 -07:00
+								                _pcfg = _pii_yaml.safe_load(_pf) or {}
-												feat(privacy): redact PII from LLM context when privacy.redact_pii is enabled

Add privacy.redact_pii config option (boolean, default false). When
enabled, the gateway redacts personally identifiable information from
the system prompt before sending it to the LLM provider:

- Phone numbers (user IDs on WhatsApp/Signal) → hashed to user_<sha256>
- User IDs → hashed to user_<sha256>
- Chat IDs → numeric portion hashed, platform prefix preserved
- Home channel IDs → hashed
- Names/usernames → NOT affected (user-chosen, publicly visible)

Hashes are deterministic (same user → same hash) so the model can
still distinguish users in group chats. Routing and delivery use
the original values internally — redaction only affects LLM context.

Inspired by OpenClaw PR #47959.

											
										
										
											2026-03-16 05:48:45 -07:00
+								            _redact_pii = bool((_pcfg.get("privacy") or {}).get("redact_pii", False))
 								        except Exception:
 								            pass
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        # Build the context prompt to inject
-												feat(privacy): redact PII from LLM context when privacy.redact_pii is enabled

Add privacy.redact_pii config option (boolean, default false). When
enabled, the gateway redacts personally identifiable information from
the system prompt before sending it to the LLM provider:

- Phone numbers (user IDs on WhatsApp/Signal) → hashed to user_<sha256>
- User IDs → hashed to user_<sha256>
- Chat IDs → numeric portion hashed, platform prefix preserved
- Home channel IDs → hashed
- Names/usernames → NOT affected (user-chosen, publicly visible)

Hashes are deterministic (same user → same hash) so the model can
still distinguish users in group chats. Routing and delivery use
the original values internally — redaction only affects LLM context.

Inspired by OpenClaw PR #47959.

											
										
										
											2026-03-16 05:48:45 -07:00
+								        context_prompt = build_session_context_prompt(context, redact_pii=_redact_pii)
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
-												Hermes Agent UX Improvements

											
										
										
											2026-02-22 02:16:11 -08:00
+								        # If the previous session expired and was auto-reset, prepend a notice
 								        # so the agent knows this is a fresh conversation (not an intentional /reset).
 								        if getattr(session_entry, 'was_auto_reset', False):
-												feat(gateway): notify users when session auto-resets (#2519)

When a session expires (daily schedule or idle timeout) and is
automatically reset, send a notification to the user explaining
what happened:

  ◐ Session automatically reset (inactive for 24h).
    Conversation history cleared.
  Use /resume to browse and restore a previous session.
  Adjust reset timing in config.yaml under session_reset.

Notifications are suppressed when:
- The expired session had no activity (no tokens used)
- The platform is excluded (api_server, webhook by default)
- notify: false in config

Changes:
- session.py: _should_reset() returns reason string ('idle'/'daily')
  instead of bool; SessionEntry gains auto_reset_reason and
  reset_had_activity fields; old entry's total_tokens checked
- config.py: SessionResetPolicy gains notify (bool, default: true)
  and notify_exclude_platforms (default: api_server, webhook)
- run.py: sends notification via adapter.send() before processing
  the user's message, with activity + platform checks
- 13 new tests

Config (config.yaml):

  session_reset:
    notify: true
    notify_exclude_platforms: [api_server, webhook]
											
										
										
											2026-03-22 09:33:39 -07:00
+								            reset_reason = getattr(session_entry, 'auto_reset_reason', None) or 'idle'
 								            if reset_reason == "daily":
 								                context_note = "[System note: The user's session was automatically reset by the daily schedule. This is a fresh conversation with no prior context.]"
 								            else:
 								                context_note = "[System note: The user's previous session expired due to inactivity. This is a fresh conversation with no prior context.]"
 								            context_prompt = context_note + "\n\n" + context_prompt
 								            # Send a user-facing notification explaining the reset, unless:
 								            # - notifications are disabled in config
 								            # - the platform is excluded (e.g. api_server, webhook)
 								            # - the expired session had no activity (nothing was cleared)
 								            try:
 								                policy = self.session_store.config.get_reset_policy(
 								                    platform=source.platform,
 								                    session_type=getattr(source, 'chat_type', 'dm'),
 								                )
 								                platform_name = source.platform.value if source.platform else ""
 								                had_activity = getattr(session_entry, 'reset_had_activity', False)
 								                should_notify = (
 								                    policy.notify
 								                    and had_activity
 								                    and platform_name not in policy.notify_exclude_platforms
 								                )
 								                if should_notify:
 								                    adapter = self.adapters.get(source.platform)
 								                    if adapter:
 								                        if reset_reason == "daily":
 								                            reason_text = f"daily schedule at {policy.at_hour}:00"
 								                        else:
 								                            hours = policy.idle_minutes // 60
 								                            mins = policy.idle_minutes % 60
 								                            duration = f"{hours}h" if not mins else f"{hours}h {mins}m" if hours else f"{mins}m"
 								                            reason_text = f"inactive for {duration}"
 								                        notice = (
 								                            f"◐ Session automatically reset ({reason_text}). "
 								                            f"Conversation history cleared.\n"
 								                            f"Use /resume to browse and restore a previous session.\n"
 								                            f"Adjust reset timing in config.yaml under session_reset."
 								                        )
-												feat(gateway): surface session config on /new, /reset, and auto-reset (#3321)

When a new session starts in the gateway (via /new, /reset, or
auto-reset), send the user a summary of the detected configuration:

  ✨ Session reset! Starting fresh.

  ◆ Model: qwen3.5:27b-q4_K_M
  ◆ Provider: custom
  ◆ Context: 8K tokens (config)
  ◆ Endpoint: http://localhost:11434/v1

This makes misconfigured context length immediately visible — a user
running a local 8K model that falls to the 128K default will see:

  ◆ Context: 128K tokens (default — set model.context_length in config to override)

Instead of silently getting no compression and degrading responses.

- _format_session_info() resolves model, provider, context length,
  and endpoint from config + runtime, matching the hygiene code's
  resolution chain
- Local/custom endpoints shown; cloud endpoints hidden (not useful)
- Context source annotated: config, detected, or default with hint
- Appended to /new and /reset responses, and auto-reset notifications
- 9 tests covering all formatting paths and failure resilience

Addresses the user-facing side of #2708 — instead of trying to fix
every edge case in context detection, surface the values so users
can immediately see when something is wrong.
											
										
										
											2026-03-26 19:27:58 -07:00
+								                        try:
 								                            session_info = self._format_session_info()
 								                            if session_info:
 								                                notice = f"{notice}\n\n{session_info}"
 								                        except Exception:
 								                            pass
-												feat(gateway): notify users when session auto-resets (#2519)

When a session expires (daily schedule or idle timeout) and is
automatically reset, send a notification to the user explaining
what happened:

  ◐ Session automatically reset (inactive for 24h).
    Conversation history cleared.
  Use /resume to browse and restore a previous session.
  Adjust reset timing in config.yaml under session_reset.

Notifications are suppressed when:
- The expired session had no activity (no tokens used)
- The platform is excluded (api_server, webhook by default)
- notify: false in config

Changes:
- session.py: _should_reset() returns reason string ('idle'/'daily')
  instead of bool; SessionEntry gains auto_reset_reason and
  reset_had_activity fields; old entry's total_tokens checked
- config.py: SessionResetPolicy gains notify (bool, default: true)
  and notify_exclude_platforms (default: api_server, webhook)
- run.py: sends notification via adapter.send() before processing
  the user's message, with activity + platform checks
- 13 new tests

Config (config.yaml):

  session_reset:
    notify: true
    notify_exclude_platforms: [api_server, webhook]
											
										
										
											2026-03-22 09:33:39 -07:00
+								                        await adapter.send(
 								                            source.chat_id, notice,
 								                            metadata=getattr(event, 'metadata', None),
 								                        )
 								            except Exception as e:
 								                logger.debug("Auto-reset notification failed (non-fatal): %s", e)
-												Hermes Agent UX Improvements

											
										
										
											2026-02-22 02:16:11 -08:00
+								            session_entry.was_auto_reset = False
-												feat(gateway): notify users when session auto-resets (#2519)

When a session expires (daily schedule or idle timeout) and is
automatically reset, send a notification to the user explaining
what happened:

  ◐ Session automatically reset (inactive for 24h).
    Conversation history cleared.
  Use /resume to browse and restore a previous session.
  Adjust reset timing in config.yaml under session_reset.

Notifications are suppressed when:
- The expired session had no activity (no tokens used)
- The platform is excluded (api_server, webhook by default)
- notify: false in config

Changes:
- session.py: _should_reset() returns reason string ('idle'/'daily')
  instead of bool; SessionEntry gains auto_reset_reason and
  reset_had_activity fields; old entry's total_tokens checked
- config.py: SessionResetPolicy gains notify (bool, default: true)
  and notify_exclude_platforms (default: api_server, webhook)
- run.py: sends notification via adapter.send() before processing
  the user's message, with activity + platform checks
- 13 new tests

Config (config.yaml):

  session_reset:
    notify: true
    notify_exclude_platforms: [api_server, webhook]
											
										
										
											2026-03-22 09:33:39 -07:00
+								            session_entry.auto_reset_reason = None
-												feat(telegram): Private Chat Topics with functional skill binding (#2598)

Salvages PR #3005 by web3blind. Cherry-picked onto current main with functional skill binding and docs added.

- DM topic creation via createForumTopic (Bot API 9.4, Feb 2026)
- Config-driven topics with thread_id persistence across restarts
- Session isolation via existing build_session_key thread_id support
- auto_skill field on MessageEvent for topic-skill bindings
- Gateway auto-loads bound skill on new sessions (same as /skill commands)
- Docs: full Private Chat Topics section in Telegram messaging guide
- 20 tests (17 original + 3 for auto_skill)

Closes #2598
Co-authored-by: web3blind <web3blind@users.noreply.github.com>
											
										
										
											2026-03-26 02:04:11 -07:00
 								        # Auto-load skill for DM topic bindings (e.g., Telegram Private Chat Topics)
 								        # Only inject on NEW sessions — for ongoing conversations the skill content
 								        # is already in the conversation history from the first message.
 								        if _is_new_session and getattr(event, "auto_skill", None):
 								            try:
 								                from agent.skill_commands import _load_skill_payload, _build_skill_message
 								                _skill_name = event.auto_skill
 								                _loaded = _load_skill_payload(_skill_name, task_id=_quick_key)
 								                if _loaded:
 								                    _loaded_skill, _skill_dir, _display_name = _loaded
 								                    _activation_note = (
 								                        f'[SYSTEM: This conversation is in a topic with the "{_display_name}" skill '
 								                        f"auto-loaded. Follow its instructions for the duration of this session.]"
 								                    )
 								                    _skill_msg = _build_skill_message(
 								                        _loaded_skill, _skill_dir, _activation_note,
 								                        user_instruction=event.text,
 								                    )
 								                    if _skill_msg:
 								                        event.text = _skill_msg
 								                        logger.info(
 								                            "[Gateway] Auto-loaded skill '%s' for DM topic session %s",
 								                            _skill_name, session_key,
 								                        )
 								                else:
 								                    logger.warning(
 								                        "[Gateway] DM topic skill '%s' not found in available skills",
 								                        _skill_name,
 								                    )
 								            except Exception as e:
 								                logger.warning("[Gateway] Failed to auto-load topic skill '%s': %s", event.auto_skill, e)
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        # Load conversation history from transcript
 								        history = self.session_store.load_transcript(session_entry.session_id)
-												feat: auto-compress pathologically large gateway sessions (#628)

Long-lived gateway sessions can accumulate enough history that every new
message rehydrates an oversized transcript, causing repeated truncation
failures (finish_reason=length).

Add a session hygiene check in _handle_message that runs right after
loading the transcript and before invoking the agent:

1. Estimate message count and rough token count of the transcript
2. If above configurable thresholds (default: 200 msgs or 100K tokens),
   auto-compress the transcript proactively
3. Notify the user about the compression with before/after stats
4. If still above warn threshold (default: 200K tokens) after
   compression, suggest /reset
5. If compression fails on a dangerously large session, warn the user
   to use /compress or /reset manually

Thresholds are configurable via config.yaml:

  session_hygiene:
    auto_compress_tokens: 100000
    auto_compress_messages: 200
    warn_tokens: 200000

This complements the agent's existing preflight compression (which
runs inside run_conversation) by catching pathological sessions at
the gateway layer before the agent is even created.

Includes 12 tests for threshold detection and token estimation.

											
										
										
											2026-03-07 20:09:48 -08:00
+								        # -----------------------------------------------------------------
 								        # Session hygiene: auto-compress pathologically large transcripts
 								        #
 								        # Long-lived gateway sessions can accumulate enough history that
 								        # every new message rehydrates an oversized transcript, causing
 								        # repeated truncation/context failures.  Detect this early and
 								        # compress proactively — before the agent even starts.  (#628)
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								        #
-												fix: use actual API token counts for gateway compression pre-check

Root cause of aggressive gateway compression vs CLI:
- CLI: single AIAgent persists across conversation, uses real API-reported
  prompt_tokens for compression decisions — accurate
- Gateway: each message creates fresh AIAgent, token count discarded after,
  next message pre-check falls back to rough str(msg)//4 estimate which
  overestimates 30-50% on tool-heavy conversations

Fix:
- Add last_prompt_tokens field to SessionEntry — stores the actual
  API-reported prompt token count from the most recent agent turn
- After run_conversation(), extract context_compressor.last_prompt_tokens
  and persist it via update_session()
- Gateway pre-check now uses stored actual tokens when available (exact
  same accuracy as CLI), falling back to rough estimate with 1.4x safety
  factor only for the first message of a session

This makes gateway compression behave identically to CLI compression
for all turns after the first. Reported by TigerHix.

											
										
										
											2026-03-10 23:28:18 -07:00
+								        # Token source priority:
 								        # 1. Actual API-reported prompt_tokens from the last turn
 								        #    (stored in session_entry.last_prompt_tokens)
-												refactor(gateway): remove broken 1.4x hygiene multiplier entirely

The previous commit capped the 1.4x at 95% of context, but the multiplier
itself is unnecessary and confusing:

  85% threshold × 1.4 = 119% of context → never fires
  95% warn      × 1.4 = 133% of context → never warns

The 85% hygiene threshold already provides ample headroom over the agent's
own 50% compressor. Even if rough estimates overestimate by 50%, hygiene
would fire at ~57% actual usage — safe and harmless.

Remove the multiplier entirely. Both actual and estimated token paths
now use the same 85% / 95% thresholds. Update tests and comments.

											
										
										
											2026-03-22 15:21:18 -07:00
+								        # 2. Rough char-based estimate (str(msg)//4). Overestimates
 								        #    by 30-50% on code/JSON-heavy sessions, but that just
 								        #    means hygiene fires a bit early — safe and harmless.
-												feat: auto-compress pathologically large gateway sessions (#628)

Long-lived gateway sessions can accumulate enough history that every new
message rehydrates an oversized transcript, causing repeated truncation
failures (finish_reason=length).

Add a session hygiene check in _handle_message that runs right after
loading the transcript and before invoking the agent:

1. Estimate message count and rough token count of the transcript
2. If above configurable thresholds (default: 200 msgs or 100K tokens),
   auto-compress the transcript proactively
3. Notify the user about the compression with before/after stats
4. If still above warn threshold (default: 200K tokens) after
   compression, suggest /reset
5. If compression fails on a dangerously large session, warn the user
   to use /compress or /reset manually

Thresholds are configurable via config.yaml:

  session_hygiene:
    auto_compress_tokens: 100000
    auto_compress_messages: 200
    warn_tokens: 200000

This complements the agent's existing preflight compression (which
runs inside run_conversation) by catching pathological sessions at
the gateway layer before the agent is even created.

Includes 12 tests for threshold detection and token estimation.

											
										
										
											2026-03-07 20:09:48 -08:00
+								        # -----------------------------------------------------------------
 								        if history and len(history) >= 4:
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								            from agent.model_metadata import (
 								                estimate_messages_tokens_rough,
 								                get_model_context_length,
 								            )
-												feat: auto-compress pathologically large gateway sessions (#628)

Long-lived gateway sessions can accumulate enough history that every new
message rehydrates an oversized transcript, causing repeated truncation
failures (finish_reason=length).

Add a session hygiene check in _handle_message that runs right after
loading the transcript and before invoking the agent:

1. Estimate message count and rough token count of the transcript
2. If above configurable thresholds (default: 200 msgs or 100K tokens),
   auto-compress the transcript proactively
3. Notify the user about the compression with before/after stats
4. If still above warn threshold (default: 200K tokens) after
   compression, suggest /reset
5. If compression fails on a dangerously large session, warn the user
   to use /compress or /reset manually

Thresholds are configurable via config.yaml:

  session_hygiene:
    auto_compress_tokens: 100000
    auto_compress_messages: 200
    warn_tokens: 200000

This complements the agent's existing preflight compression (which
runs inside run_conversation) by catching pathological sessions at
the gateway layer before the agent is even created.

Includes 12 tests for threshold detection and token estimation.

											
										
										
											2026-03-07 20:09:48 -08:00
-												fix: raise session hygiene threshold from 50% to 85%

Session hygiene was firing at the same threshold (50%) as the agent's
own context compressor, causing premature compression on every turn
in long gateway sessions (especially Telegram).

Hygiene is a safety net for pathologically large sessions that would
cause API failures — it should NOT be doing normal compression work.
The agent's own compressor handles that during its tool loop with
accurate real token counts from the API.

Changes:
- Default hygiene threshold: 0.50 → 0.85 (fires only when truly large)
- Hygiene threshold is now independent of compression.threshold config
  (that setting controls the agent's compressor, not the pre-agent safety net)
- Removed env var override for hygiene threshold (CONTEXT_COMPRESSION_THRESHOLD
  still controls the agent's own compressor)
											
										
										
											2026-03-13 04:17:45 -07:00
+								            # Read model + compression config from config.yaml.
 								            # NOTE: hygiene threshold is intentionally HIGHER than the agent's
 								            # own compressor (0.85 vs 0.50).  Hygiene is a safety net for
 								            # sessions that grew too large between turns — it fires pre-agent
 								            # to prevent API failures.  The agent's own compressor handles
 								            # normal context management during its tool loop with accurate
 								            # real token counts.  Having hygiene at 0.50 caused premature
 								            # compression on every turn in long gateway sessions.
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								            _hyg_model = "anthropic/claude-sonnet-4.6"
-												fix: raise session hygiene threshold from 50% to 85%

Session hygiene was firing at the same threshold (50%) as the agent's
own context compressor, causing premature compression on every turn
in long gateway sessions (especially Telegram).

Hygiene is a safety net for pathologically large sessions that would
cause API failures — it should NOT be doing normal compression work.
The agent's own compressor handles that during its tool loop with
accurate real token counts from the API.

Changes:
- Default hygiene threshold: 0.50 → 0.85 (fires only when truly large)
- Hygiene threshold is now independent of compression.threshold config
  (that setting controls the agent's compressor, not the pre-agent safety net)
- Removed env var override for hygiene threshold (CONTEXT_COMPRESSION_THRESHOLD
  still controls the agent's own compressor)
											
										
										
											2026-03-13 04:17:45 -07:00
+								            _hyg_threshold_pct = 0.85
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								            _hyg_compression_enabled = True
-												fix(gateway): hygiene compression ignores config context_length and 1.4x exceeds model limit

Three bugs in gateway session hygiene pre-compression caused 'Session too
large' errors for ~200K context models like GLM-5-turbo on z.ai:

1. Gateway hygiene called get_model_context_length(model) without passing
   config_context_length, provider, or base_url — so user overrides like
   model.context_length: 180000 were ignored, and provider-aware detection
   (models.dev, z.ai endpoint) couldn't fire. The agent's own compressor
   correctly passed all three (run_agent.py line 1038).

2. The 1.4x safety factor on rough token estimates pushed the compression
   threshold above the model's actual context limit:
     200K * 0.85 * 1.4 = 238K > 200K (model limit)
   So hygiene never compressed, sessions grew past the limit, and the API
   rejected the request.

3. Same issue for the warn threshold: 200K * 0.95 * 1.4 = 266K.

Fix:
- Read model.context_length, provider, and base_url from config.yaml
  (same as run_agent.py does) and pass them to get_model_context_length()
- Resolve provider/base_url from runtime when not in config
- Cap the 1.4x-adjusted compress threshold at 95% of context_length
- Cap the 1.4x-adjusted warn threshold at context_length

Affects: z.ai GLM-5/GLM-5-turbo, any ~200K or smaller context model
where the 1.4x factor would push 85% above 100%.

Ref: Discord report from Ddox — glm-5-turbo on z.ai coding plan

											
										
										
											2026-03-22 15:15:25 -07:00
+								            _hyg_config_context_length = None
 								            _hyg_provider = None
 								            _hyg_base_url = None
 								            _hyg_api_key = None
-												feat: auto-compress pathologically large gateway sessions (#628)

Long-lived gateway sessions can accumulate enough history that every new
message rehydrates an oversized transcript, causing repeated truncation
failures (finish_reason=length).

Add a session hygiene check in _handle_message that runs right after
loading the transcript and before invoking the agent:

1. Estimate message count and rough token count of the transcript
2. If above configurable thresholds (default: 200 msgs or 100K tokens),
   auto-compress the transcript proactively
3. Notify the user about the compression with before/after stats
4. If still above warn threshold (default: 200K tokens) after
   compression, suggest /reset
5. If compression fails on a dangerously large session, warn the user
   to use /compress or /reset manually

Thresholds are configurable via config.yaml:

  session_hygiene:
    auto_compress_tokens: 100000
    auto_compress_messages: 200
    warn_tokens: 200000

This complements the agent's existing preflight compression (which
runs inside run_conversation) by catching pathological sessions at
the gateway layer before the agent is even created.

Includes 12 tests for threshold detection and token estimation.

											
										
										
											2026-03-07 20:09:48 -08:00
+								            try:
 								                _hyg_cfg_path = _hermes_home / "config.yaml"
 								                if _hyg_cfg_path.exists():
 								                    import yaml as _hyg_yaml
-												Merge PR #458: Add explicit UTF-8 encoding to config/data file I/O

Authored by shitcoinsherpa. Adds encoding='utf-8' to all text-mode
open() calls in gateway/run.py, gateway/config.py, hermes_cli/config.py,
hermes_cli/main.py, and hermes_cli/status.py. Prevents encoding errors
on Windows where the default locale is not UTF-8.

Also fixed 4 additional open() calls in gateway/run.py that were added
after the PR branch was created.

											
										
										
											2026-03-09 21:19:20 -07:00
+								                    with open(_hyg_cfg_path, encoding="utf-8") as _hyg_f:
-												feat: auto-compress pathologically large gateway sessions (#628)

Long-lived gateway sessions can accumulate enough history that every new
message rehydrates an oversized transcript, causing repeated truncation
failures (finish_reason=length).

Add a session hygiene check in _handle_message that runs right after
loading the transcript and before invoking the agent:

1. Estimate message count and rough token count of the transcript
2. If above configurable thresholds (default: 200 msgs or 100K tokens),
   auto-compress the transcript proactively
3. Notify the user about the compression with before/after stats
4. If still above warn threshold (default: 200K tokens) after
   compression, suggest /reset
5. If compression fails on a dangerously large session, warn the user
   to use /compress or /reset manually

Thresholds are configurable via config.yaml:

  session_hygiene:
    auto_compress_tokens: 100000
    auto_compress_messages: 200
    warn_tokens: 200000

This complements the agent's existing preflight compression (which
runs inside run_conversation) by catching pathological sessions at
the gateway layer before the agent is even created.

Includes 12 tests for threshold detection and token estimation.

											
										
										
											2026-03-07 20:09:48 -08:00
+								                        _hyg_data = _hyg_yaml.safe_load(_hyg_f) or {}
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
 								                    # Resolve model name (same logic as run_sync)
 								                    _model_cfg = _hyg_data.get("model", {})
 								                    if isinstance(_model_cfg, str):
 								                        _hyg_model = _model_cfg
 								                    elif isinstance(_model_cfg, dict):
 								                        _hyg_model = _model_cfg.get("default", _hyg_model)
-												fix(gateway): hygiene compression ignores config context_length and 1.4x exceeds model limit

Three bugs in gateway session hygiene pre-compression caused 'Session too
large' errors for ~200K context models like GLM-5-turbo on z.ai:

1. Gateway hygiene called get_model_context_length(model) without passing
   config_context_length, provider, or base_url — so user overrides like
   model.context_length: 180000 were ignored, and provider-aware detection
   (models.dev, z.ai endpoint) couldn't fire. The agent's own compressor
   correctly passed all three (run_agent.py line 1038).

2. The 1.4x safety factor on rough token estimates pushed the compression
   threshold above the model's actual context limit:
     200K * 0.85 * 1.4 = 238K > 200K (model limit)
   So hygiene never compressed, sessions grew past the limit, and the API
   rejected the request.

3. Same issue for the warn threshold: 200K * 0.95 * 1.4 = 266K.

Fix:
- Read model.context_length, provider, and base_url from config.yaml
  (same as run_agent.py does) and pass them to get_model_context_length()
- Resolve provider/base_url from runtime when not in config
- Cap the 1.4x-adjusted compress threshold at 95% of context_length
- Cap the 1.4x-adjusted warn threshold at context_length

Affects: z.ai GLM-5/GLM-5-turbo, any ~200K or smaller context model
where the 1.4x factor would push 85% above 100%.

Ref: Discord report from Ddox — glm-5-turbo on z.ai coding plan

											
										
										
											2026-03-22 15:15:25 -07:00
+								                        # Read explicit context_length override from model config
 								                        # (same as run_agent.py lines 995-1005)
 								                        _raw_ctx = _model_cfg.get("context_length")
 								                        if _raw_ctx is not None:
 								                            try:
 								                                _hyg_config_context_length = int(_raw_ctx)
 								                            except (TypeError, ValueError):
 								                                pass
 								                        # Read provider for accurate context detection
 								                        _hyg_provider = _model_cfg.get("provider") or None
 								                        _hyg_base_url = _model_cfg.get("base_url") or None
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
-												fix: raise session hygiene threshold from 50% to 85%

Session hygiene was firing at the same threshold (50%) as the agent's
own context compressor, causing premature compression on every turn
in long gateway sessions (especially Telegram).

Hygiene is a safety net for pathologically large sessions that would
cause API failures — it should NOT be doing normal compression work.
The agent's own compressor handles that during its tool loop with
accurate real token counts from the API.

Changes:
- Default hygiene threshold: 0.50 → 0.85 (fires only when truly large)
- Hygiene threshold is now independent of compression.threshold config
  (that setting controls the agent's compressor, not the pre-agent safety net)
- Removed env var override for hygiene threshold (CONTEXT_COMPRESSION_THRESHOLD
  still controls the agent's own compressor)
											
										
										
											2026-03-13 04:17:45 -07:00
+								                    # Read compression settings — only use enabled flag.
 								                    # The threshold is intentionally separate from the agent's
 								                    # compression.threshold (hygiene runs higher).
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								                    _comp_cfg = _hyg_data.get("compression", {})
 								                    if isinstance(_comp_cfg, dict):
 								                        _hyg_compression_enabled = str(
 								                            _comp_cfg.get("enabled", True)
 								                        ).lower() in ("true", "1", "yes")
-												fix(gateway): hygiene compression ignores config context_length and 1.4x exceeds model limit

Three bugs in gateway session hygiene pre-compression caused 'Session too
large' errors for ~200K context models like GLM-5-turbo on z.ai:

1. Gateway hygiene called get_model_context_length(model) without passing
   config_context_length, provider, or base_url — so user overrides like
   model.context_length: 180000 were ignored, and provider-aware detection
   (models.dev, z.ai endpoint) couldn't fire. The agent's own compressor
   correctly passed all three (run_agent.py line 1038).

2. The 1.4x safety factor on rough token estimates pushed the compression
   threshold above the model's actual context limit:
     200K * 0.85 * 1.4 = 238K > 200K (model limit)
   So hygiene never compressed, sessions grew past the limit, and the API
   rejected the request.

3. Same issue for the warn threshold: 200K * 0.95 * 1.4 = 266K.

Fix:
- Read model.context_length, provider, and base_url from config.yaml
  (same as run_agent.py does) and pass them to get_model_context_length()
- Resolve provider/base_url from runtime when not in config
- Cap the 1.4x-adjusted compress threshold at 95% of context_length
- Cap the 1.4x-adjusted warn threshold at context_length

Affects: z.ai GLM-5/GLM-5-turbo, any ~200K or smaller context model
where the 1.4x factor would push 85% above 100%.

Ref: Discord report from Ddox — glm-5-turbo on z.ai coding plan

											
										
										
											2026-03-22 15:15:25 -07:00
 								                # Resolve provider/base_url from runtime if not in config
 								                if not _hyg_provider or not _hyg_base_url:
 								                    try:
 								                        _hyg_runtime = _resolve_runtime_agent_kwargs()
 								                        _hyg_provider = _hyg_provider or _hyg_runtime.get("provider")
 								                        _hyg_base_url = _hyg_base_url or _hyg_runtime.get("base_url")
 								                        _hyg_api_key = _hyg_runtime.get("api_key")
 								                    except Exception:
 								                        pass
-												feat: auto-compress pathologically large gateway sessions (#628)

Long-lived gateway sessions can accumulate enough history that every new
message rehydrates an oversized transcript, causing repeated truncation
failures (finish_reason=length).

Add a session hygiene check in _handle_message that runs right after
loading the transcript and before invoking the agent:

1. Estimate message count and rough token count of the transcript
2. If above configurable thresholds (default: 200 msgs or 100K tokens),
   auto-compress the transcript proactively
3. Notify the user about the compression with before/after stats
4. If still above warn threshold (default: 200K tokens) after
   compression, suggest /reset
5. If compression fails on a dangerously large session, warn the user
   to use /compress or /reset manually

Thresholds are configurable via config.yaml:

  session_hygiene:
    auto_compress_tokens: 100000
    auto_compress_messages: 200
    warn_tokens: 200000

This complements the agent's existing preflight compression (which
runs inside run_conversation) by catching pathological sessions at
the gateway layer before the agent is even created.

Includes 12 tests for threshold detection and token estimation.

											
										
										
											2026-03-07 20:09:48 -08:00
+								            except Exception:
 								                pass
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								            if _hyg_compression_enabled:
-												fix(gateway): hygiene compression ignores config context_length and 1.4x exceeds model limit

Three bugs in gateway session hygiene pre-compression caused 'Session too
large' errors for ~200K context models like GLM-5-turbo on z.ai:

1. Gateway hygiene called get_model_context_length(model) without passing
   config_context_length, provider, or base_url — so user overrides like
   model.context_length: 180000 were ignored, and provider-aware detection
   (models.dev, z.ai endpoint) couldn't fire. The agent's own compressor
   correctly passed all three (run_agent.py line 1038).

2. The 1.4x safety factor on rough token estimates pushed the compression
   threshold above the model's actual context limit:
     200K * 0.85 * 1.4 = 238K > 200K (model limit)
   So hygiene never compressed, sessions grew past the limit, and the API
   rejected the request.

3. Same issue for the warn threshold: 200K * 0.95 * 1.4 = 266K.

Fix:
- Read model.context_length, provider, and base_url from config.yaml
  (same as run_agent.py does) and pass them to get_model_context_length()
- Resolve provider/base_url from runtime when not in config
- Cap the 1.4x-adjusted compress threshold at 95% of context_length
- Cap the 1.4x-adjusted warn threshold at context_length

Affects: z.ai GLM-5/GLM-5-turbo, any ~200K or smaller context model
where the 1.4x factor would push 85% above 100%.

Ref: Discord report from Ddox — glm-5-turbo on z.ai coding plan

											
										
										
											2026-03-22 15:15:25 -07:00
+								                _hyg_context_length = get_model_context_length(
 								                    _hyg_model,
 								                    base_url=_hyg_base_url or "",
 								                    api_key=_hyg_api_key or "",
 								                    config_context_length=_hyg_config_context_length,
 								                    provider=_hyg_provider or "",
 								                )
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								                _compress_token_threshold = int(
-												fix: use actual API token counts for gateway compression pre-check

Root cause of aggressive gateway compression vs CLI:
- CLI: single AIAgent persists across conversation, uses real API-reported
  prompt_tokens for compression decisions — accurate
- Gateway: each message creates fresh AIAgent, token count discarded after,
  next message pre-check falls back to rough str(msg)//4 estimate which
  overestimates 30-50% on tool-heavy conversations

Fix:
- Add last_prompt_tokens field to SessionEntry — stores the actual
  API-reported prompt token count from the most recent agent turn
- After run_conversation(), extract context_compressor.last_prompt_tokens
  and persist it via update_session()
- Gateway pre-check now uses stored actual tokens when available (exact
  same accuracy as CLI), falling back to rough estimate with 1.4x safety
  factor only for the first message of a session

This makes gateway compression behave identically to CLI compression
for all turns after the first. Reported by TigerHix.

											
										
										
											2026-03-10 23:28:18 -07:00
+								                    _hyg_context_length * _hyg_threshold_pct
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								                )
-												fix: use actual API token counts for gateway compression pre-check

Root cause of aggressive gateway compression vs CLI:
- CLI: single AIAgent persists across conversation, uses real API-reported
  prompt_tokens for compression decisions — accurate
- Gateway: each message creates fresh AIAgent, token count discarded after,
  next message pre-check falls back to rough str(msg)//4 estimate which
  overestimates 30-50% on tool-heavy conversations

Fix:
- Add last_prompt_tokens field to SessionEntry — stores the actual
  API-reported prompt token count from the most recent agent turn
- After run_conversation(), extract context_compressor.last_prompt_tokens
  and persist it via update_session()
- Gateway pre-check now uses stored actual tokens when available (exact
  same accuracy as CLI), falling back to rough estimate with 1.4x safety
  factor only for the first message of a session

This makes gateway compression behave identically to CLI compression
for all turns after the first. Reported by TigerHix.

											
										
										
											2026-03-10 23:28:18 -07:00
+								                _warn_token_threshold = int(_hyg_context_length * 0.95)
-												feat: auto-compress pathologically large gateway sessions (#628)

Long-lived gateway sessions can accumulate enough history that every new
message rehydrates an oversized transcript, causing repeated truncation
failures (finish_reason=length).

Add a session hygiene check in _handle_message that runs right after
loading the transcript and before invoking the agent:

1. Estimate message count and rough token count of the transcript
2. If above configurable thresholds (default: 200 msgs or 100K tokens),
   auto-compress the transcript proactively
3. Notify the user about the compression with before/after stats
4. If still above warn threshold (default: 200K tokens) after
   compression, suggest /reset
5. If compression fails on a dangerously large session, warn the user
   to use /compress or /reset manually

Thresholds are configurable via config.yaml:

  session_hygiene:
    auto_compress_tokens: 100000
    auto_compress_messages: 200
    warn_tokens: 200000

This complements the agent's existing preflight compression (which
runs inside run_conversation) by catching pathological sessions at
the gateway layer before the agent is even created.

Includes 12 tests for threshold detection and token estimation.

											
										
										
											2026-03-07 20:09:48 -08:00
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								                _msg_count = len(history)
-												fix: use actual API token counts for gateway compression pre-check

Root cause of aggressive gateway compression vs CLI:
- CLI: single AIAgent persists across conversation, uses real API-reported
  prompt_tokens for compression decisions — accurate
- Gateway: each message creates fresh AIAgent, token count discarded after,
  next message pre-check falls back to rough str(msg)//4 estimate which
  overestimates 30-50% on tool-heavy conversations

Fix:
- Add last_prompt_tokens field to SessionEntry — stores the actual
  API-reported prompt token count from the most recent agent turn
- After run_conversation(), extract context_compressor.last_prompt_tokens
  and persist it via update_session()
- Gateway pre-check now uses stored actual tokens when available (exact
  same accuracy as CLI), falling back to rough estimate with 1.4x safety
  factor only for the first message of a session

This makes gateway compression behave identically to CLI compression
for all turns after the first. Reported by TigerHix.

											
										
										
											2026-03-10 23:28:18 -07:00
 								                # Prefer actual API-reported tokens from the last turn
 								                # (stored in session entry) over the rough char-based estimate.
 								                _stored_tokens = session_entry.last_prompt_tokens
 								                if _stored_tokens > 0:
 								                    _approx_tokens = _stored_tokens
 								                    _token_source = "actual"
 								                else:
 								                    _approx_tokens = estimate_messages_tokens_rough(history)
 								                    _token_source = "estimated"
-												refactor(gateway): remove broken 1.4x hygiene multiplier entirely

The previous commit capped the 1.4x at 95% of context, but the multiplier
itself is unnecessary and confusing:

  85% threshold × 1.4 = 119% of context → never fires
  95% warn      × 1.4 = 133% of context → never warns

The 85% hygiene threshold already provides ample headroom over the agent's
own 50% compressor. Even if rough estimates overestimate by 50%, hygiene
would fire at ~57% actual usage — safe and harmless.

Remove the multiplier entirely. Both actual and estimated token paths
now use the same 85% / 95% thresholds. Update tests and comments.

											
										
										
											2026-03-22 15:21:18 -07:00
+								                    # Note: rough estimates overestimate by 30-50% for code/JSON-heavy
 								                    # sessions, but that just means hygiene fires a bit early — which
 								                    # is safe and harmless.  The 85% threshold already provides ample
 								                    # headroom (agent's own compressor runs at 50%).  A previous 1.4x
 								                    # multiplier tried to compensate by inflating the threshold, but
 								                    # 85% * 1.4 = 119% of context — which exceeds the model's limit
 								                    # and prevented hygiene from ever firing for ~200K models (GLM-5).
-												feat: auto-compress pathologically large gateway sessions (#628)

Long-lived gateway sessions can accumulate enough history that every new
message rehydrates an oversized transcript, causing repeated truncation
failures (finish_reason=length).

Add a session hygiene check in _handle_message that runs right after
loading the transcript and before invoking the agent:

1. Estimate message count and rough token count of the transcript
2. If above configurable thresholds (default: 200 msgs or 100K tokens),
   auto-compress the transcript proactively
3. Notify the user about the compression with before/after stats
4. If still above warn threshold (default: 200K tokens) after
   compression, suggest /reset
5. If compression fails on a dangerously large session, warn the user
   to use /compress or /reset manually

Thresholds are configurable via config.yaml:

  session_hygiene:
    auto_compress_tokens: 100000
    auto_compress_messages: 200
    warn_tokens: 200000

This complements the agent's existing preflight compression (which
runs inside run_conversation) by catching pathological sessions at
the gateway layer before the agent is even created.

Includes 12 tests for threshold detection and token estimation.

											
										
										
											2026-03-07 20:09:48 -08:00
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								                _needs_compress = _approx_tokens >= _compress_token_threshold
-												feat: auto-compress pathologically large gateway sessions (#628)

Long-lived gateway sessions can accumulate enough history that every new
message rehydrates an oversized transcript, causing repeated truncation
failures (finish_reason=length).

Add a session hygiene check in _handle_message that runs right after
loading the transcript and before invoking the agent:

1. Estimate message count and rough token count of the transcript
2. If above configurable thresholds (default: 200 msgs or 100K tokens),
   auto-compress the transcript proactively
3. Notify the user about the compression with before/after stats
4. If still above warn threshold (default: 200K tokens) after
   compression, suggest /reset
5. If compression fails on a dangerously large session, warn the user
   to use /compress or /reset manually

Thresholds are configurable via config.yaml:

  session_hygiene:
    auto_compress_tokens: 100000
    auto_compress_messages: 200
    warn_tokens: 200000

This complements the agent's existing preflight compression (which
runs inside run_conversation) by catching pathological sessions at
the gateway layer before the agent is even created.

Includes 12 tests for threshold detection and token estimation.

											
										
										
											2026-03-07 20:09:48 -08:00
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								                if _needs_compress:
 								                    logger.info(
-												fix: use actual API token counts for gateway compression pre-check

Root cause of aggressive gateway compression vs CLI:
- CLI: single AIAgent persists across conversation, uses real API-reported
  prompt_tokens for compression decisions — accurate
- Gateway: each message creates fresh AIAgent, token count discarded after,
  next message pre-check falls back to rough str(msg)//4 estimate which
  overestimates 30-50% on tool-heavy conversations

Fix:
- Add last_prompt_tokens field to SessionEntry — stores the actual
  API-reported prompt token count from the most recent agent turn
- After run_conversation(), extract context_compressor.last_prompt_tokens
  and persist it via update_session()
- Gateway pre-check now uses stored actual tokens when available (exact
  same accuracy as CLI), falling back to rough estimate with 1.4x safety
  factor only for the first message of a session

This makes gateway compression behave identically to CLI compression
for all turns after the first. Reported by TigerHix.

											
										
										
											2026-03-10 23:28:18 -07:00
+								                        "Session hygiene: %s messages, ~%s tokens (%s) — auto-compressing "
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								                        "(threshold: %s%% of %s = %s tokens)",
-												fix: use actual API token counts for gateway compression pre-check

Root cause of aggressive gateway compression vs CLI:
- CLI: single AIAgent persists across conversation, uses real API-reported
  prompt_tokens for compression decisions — accurate
- Gateway: each message creates fresh AIAgent, token count discarded after,
  next message pre-check falls back to rough str(msg)//4 estimate which
  overestimates 30-50% on tool-heavy conversations

Fix:
- Add last_prompt_tokens field to SessionEntry — stores the actual
  API-reported prompt token count from the most recent agent turn
- After run_conversation(), extract context_compressor.last_prompt_tokens
  and persist it via update_session()
- Gateway pre-check now uses stored actual tokens when available (exact
  same accuracy as CLI), falling back to rough estimate with 1.4x safety
  factor only for the first message of a session

This makes gateway compression behave identically to CLI compression
for all turns after the first. Reported by TigerHix.

											
										
										
											2026-03-10 23:28:18 -07:00
+								                        _msg_count, f"{_approx_tokens:,}", _token_source,
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								                        int(_hyg_threshold_pct * 100),
 								                        f"{_hyg_context_length:,}",
 								                        f"{_compress_token_threshold:,}",
 								                    )
-												feat: auto-compress pathologically large gateway sessions (#628)

Long-lived gateway sessions can accumulate enough history that every new
message rehydrates an oversized transcript, causing repeated truncation
failures (finish_reason=length).

Add a session hygiene check in _handle_message that runs right after
loading the transcript and before invoking the agent:

1. Estimate message count and rough token count of the transcript
2. If above configurable thresholds (default: 200 msgs or 100K tokens),
   auto-compress the transcript proactively
3. Notify the user about the compression with before/after stats
4. If still above warn threshold (default: 200K tokens) after
   compression, suggest /reset
5. If compression fails on a dangerously large session, warn the user
   to use /compress or /reset manually

Thresholds are configurable via config.yaml:

  session_hygiene:
    auto_compress_tokens: 100000
    auto_compress_messages: 200
    warn_tokens: 200000

This complements the agent's existing preflight compression (which
runs inside run_conversation) by catching pathological sessions at
the gateway layer before the agent is even created.

Includes 12 tests for threshold detection and token estimation.

											
										
										
											2026-03-07 20:09:48 -08:00
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								                    _hyg_adapter = self.adapters.get(source.platform)
-												fix(gateway): isolate telegram forum topic sessions

											
										
										
											2026-03-11 09:15:34 +01:00
+								                    _hyg_meta = {"thread_id": source.thread_id} if source.thread_id else None
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								                    if _hyg_adapter:
 								                        try:
 								                            await _hyg_adapter.send(
 								                                source.chat_id,
 								                                f"🗜️ Session is large ({_msg_count} messages, "
-												fix(gateway): isolate telegram forum topic sessions

											
										
										
											2026-03-11 09:15:34 +01:00
+								                                f"~{_approx_tokens:,} tokens). Auto-compressing...",
 								                                metadata=_hyg_meta,
-												feat: auto-compress pathologically large gateway sessions (#628)

Long-lived gateway sessions can accumulate enough history that every new
message rehydrates an oversized transcript, causing repeated truncation
failures (finish_reason=length).

Add a session hygiene check in _handle_message that runs right after
loading the transcript and before invoking the agent:

1. Estimate message count and rough token count of the transcript
2. If above configurable thresholds (default: 200 msgs or 100K tokens),
   auto-compress the transcript proactively
3. Notify the user about the compression with before/after stats
4. If still above warn threshold (default: 200K tokens) after
   compression, suggest /reset
5. If compression fails on a dangerously large session, warn the user
   to use /compress or /reset manually

Thresholds are configurable via config.yaml:

  session_hygiene:
    auto_compress_tokens: 100000
    auto_compress_messages: 200
    warn_tokens: 200000

This complements the agent's existing preflight compression (which
runs inside run_conversation) by catching pathological sessions at
the gateway layer before the agent is even created.

Includes 12 tests for threshold detection and token estimation.

											
										
										
											2026-03-07 20:09:48 -08:00
+								                            )
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								                        except Exception:
 								                            pass
-												feat: auto-compress pathologically large gateway sessions (#628)

Long-lived gateway sessions can accumulate enough history that every new
message rehydrates an oversized transcript, causing repeated truncation
failures (finish_reason=length).

Add a session hygiene check in _handle_message that runs right after
loading the transcript and before invoking the agent:

1. Estimate message count and rough token count of the transcript
2. If above configurable thresholds (default: 200 msgs or 100K tokens),
   auto-compress the transcript proactively
3. Notify the user about the compression with before/after stats
4. If still above warn threshold (default: 200K tokens) after
   compression, suggest /reset
5. If compression fails on a dangerously large session, warn the user
   to use /compress or /reset manually

Thresholds are configurable via config.yaml:

  session_hygiene:
    auto_compress_tokens: 100000
    auto_compress_messages: 200
    warn_tokens: 200000

This complements the agent's existing preflight compression (which
runs inside run_conversation) by catching pathological sessions at
the gateway layer before the agent is even created.

Includes 12 tests for threshold detection and token estimation.

											
										
										
											2026-03-07 20:09:48 -08:00
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								                    try:
 								                        from run_agent import AIAgent
 								                        _hyg_runtime = _resolve_runtime_agent_kwargs()
 								                        if _hyg_runtime.get("api_key"):
 								                            _hyg_msgs = [
 								                                {"role": m.get("role"), "content": m.get("content")}
 								                                for m in history
 								                                if m.get("role") in ("user", "assistant")
 								                                and m.get("content")
 								                            ]
 								                            if len(_hyg_msgs) >= 4:
 								                                _hyg_agent = AIAgent(
 								                                    **_hyg_runtime,
-												fix(gateway): pass model to temporary AIAgent instances

Memory flush, /compress, and session hygiene create AIAgent without
model=, falling back to the hardcoded default "anthropic/claude-opus-4.6".
This fails with a 400 error when the active provider is openai-codex
(Codex only accepts its own model names like gpt-5.1-codex-mini).

Add _resolve_gateway_model() that mirrors the env/config resolution
already used by _run_agent_sync, and wire it into all three temporary
agent creation sites.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 00:09:37 +01:00
+								                                    model=_hyg_model,
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								                                    max_iterations=4,
 								                                    quiet_mode=True,
 								                                    enabled_toolsets=["memory"],
 								                                    session_id=session_entry.session_id,
 								                                )
-												fix(gateway): silence background agent terminal output (#3297)

* fix(gateway): silence flush agent terminal output

quiet_mode=True only suppresses AIAgent init messages.
Tool call output still leaks to the terminal through
_safe_print → _print_fn during session reset/expiry.

Since #2670 injected live memory state into the flush prompt,
the flush agent now reliably calls memory tools — making the
output leak noticeable for the first time.

Set _print_fn to a no-op so the background flush is fully silent.

* test(gateway): add test for flush agent terminal silence + fix dotenv mock

- Add TestFlushAgentSilenced: verifies _print_fn is set to a no-op on
  the flush agent so tool output never leaks to the terminal
- Fix pre-existing test failures: replace patch('run_agent.AIAgent')
  with sys.modules mock to avoid importing run_agent (requires openai)
- Add autouse _mock_dotenv fixture so all tests in this file run
  without the dotenv package installed

* fix(display): route KawaiiSpinner output through print_fn to fully silence flush agent

The previous fix set tmp_agent._print_fn = no-op on the flush agent but
spinner output and quiet-mode cute messages bypassed _print_fn entirely:
- KawaiiSpinner captured sys.stdout at __init__ and wrote directly to it
- quiet-mode tool results used builtin print() instead of _safe_print()

Add optional print_fn parameter to KawaiiSpinner.__init__; _write routes
through it when set. Pass self._print_fn to all spinner construction sites
in run_agent.py and change the quiet-mode cute message print to _safe_print.
The existing gateway fix (tmp_agent._print_fn = lambda) now propagates
correctly through both paths.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

* fix(gateway): silence hygiene and compression background agents

Two more background AIAgent instances in the gateway were created with
quiet_mode=True but without _print_fn = no-op, causing tool output to
leak to the terminal:
- _hyg_agent (in-turn hygiene memory agent)
- tmp_agent (_compress_context path)

Apply the same _print_fn no-op pattern used for the flush agent.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

* chore(display): remove unused _last_flush_time from KawaiiSpinner

Attribute was set but never read; upstream already removed it.
Leftover from conflict resolution during rebase onto upstream/main.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

---------

Co-authored-by: Dilee <uzmpsk.dilekakbas@gmail.com>
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
											
										
										
											2026-03-26 17:40:31 -07:00
+								                                _hyg_agent._print_fn = lambda *a, **kw: None
-												feat: auto-compress pathologically large gateway sessions (#628)

Long-lived gateway sessions can accumulate enough history that every new
message rehydrates an oversized transcript, causing repeated truncation
failures (finish_reason=length).

Add a session hygiene check in _handle_message that runs right after
loading the transcript and before invoking the agent:

1. Estimate message count and rough token count of the transcript
2. If above configurable thresholds (default: 200 msgs or 100K tokens),
   auto-compress the transcript proactively
3. Notify the user about the compression with before/after stats
4. If still above warn threshold (default: 200K tokens) after
   compression, suggest /reset
5. If compression fails on a dangerously large session, warn the user
   to use /compress or /reset manually

Thresholds are configurable via config.yaml:

  session_hygiene:
    auto_compress_tokens: 100000
    auto_compress_messages: 200
    warn_tokens: 200000

This complements the agent's existing preflight compression (which
runs inside run_conversation) by catching pathological sessions at
the gateway layer before the agent is even created.

Includes 12 tests for threshold detection and token estimation.

											
										
										
											2026-03-07 20:09:48 -08:00
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								                                loop = asyncio.get_event_loop()
 								                                _compressed, _ = await loop.run_in_executor(
 								                                    None,
 								                                    lambda: _hyg_agent._compress_context(
 								                                        _hyg_msgs, "",
 								                                        approx_tokens=_approx_tokens,
 								                                    ),
 								                                )
-												feat: auto-compress pathologically large gateway sessions (#628)

Long-lived gateway sessions can accumulate enough history that every new
message rehydrates an oversized transcript, causing repeated truncation
failures (finish_reason=length).

Add a session hygiene check in _handle_message that runs right after
loading the transcript and before invoking the agent:

1. Estimate message count and rough token count of the transcript
2. If above configurable thresholds (default: 200 msgs or 100K tokens),
   auto-compress the transcript proactively
3. Notify the user about the compression with before/after stats
4. If still above warn threshold (default: 200K tokens) after
   compression, suggest /reset
5. If compression fails on a dangerously large session, warn the user
   to use /compress or /reset manually

Thresholds are configurable via config.yaml:

  session_hygiene:
    auto_compress_tokens: 100000
    auto_compress_messages: 200
    warn_tokens: 200000

This complements the agent's existing preflight compression (which
runs inside run_conversation) by catching pathological sessions at
the gateway layer before the agent is even created.

Includes 12 tests for threshold detection and token estimation.

											
										
										
											2026-03-07 20:09:48 -08:00
-												fix(gateway): preserve transcript on /compress and hygiene compression (salvage #3516) (#3556)

* fix(gateway): preserve full transcript on /compress instead of overwriting

The /compress command calls _compress_context() which correctly ends the
old session (preserving its full transcript in SQLite) and creates a new
session_id for the continuation. However, it then immediately called
rewrite_transcript() on the OLD session_id, overwriting the preserved
transcript with the compressed version — destroying searchable history.

Auto-compression (triggered by context pressure) does not have this bug
because the gateway already handles the session_id swap via the
agent.session_id != session_id check after _run_agent_sync.

Fix: after _compress_context creates the new session, write the compressed
messages into the NEW session_id and update the session store pointer.
The old session's full transcript stays intact and searchable via
session_search.

Before: /compress destroys original messages, session_search can't find
details from compressed portions.

After: /compress behaves like /new for history — full transcript preserved,
compressed context for the live session.

* fix(gateway): preserve transcript on /compress and hygiene compression

Apply session_id swap after _compress_context in both /compress handler
and hygiene pre-compression. _compress_context creates a new session
(ending the old one), but both paths were calling rewrite_transcript on
the OLD session_id — overwriting the preserved transcript and destroying
searchable history.

Now follows the same pattern as the auto-compression handler (lines
5415-5423): detect the new session_id, update the session store entry,
and write compressed messages to the new session.

Also fix FakeCompressAgent test mock to include session_id attribute
and simulate the session_id change that real _compress_context performs.

Co-authored-by: MacroAnarchy <MacroAnarchy@users.noreply.github.com>

---------

Co-authored-by: MacroAnarchy <MacroAnarchy@users.noreply.github.com>
											
										
										
											2026-03-28 12:23:43 -07:00
+								                                # _compress_context ends the old session and creates
 								                                # a new session_id.  Write compressed messages into
 								                                # the NEW session so the old transcript stays intact
 								                                # and searchable via session_search.
 								                                _hyg_new_sid = _hyg_agent.session_id
 								                                if _hyg_new_sid != session_entry.session_id:
 								                                    session_entry.session_id = _hyg_new_sid
 								                                    self.session_store._save()
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								                                self.session_store.rewrite_transcript(
 								                                    session_entry.session_id, _compressed
 								                                )
-												fix: integration hardening for gateway token tracking

Follow-up to 58dbd81 — ensures smooth transition for existing users:

- Backward compat: old session files without last_prompt_tokens
  default to 0 via data.get('last_prompt_tokens', 0)
- /compress, /undo, /retry: reset last_prompt_tokens to 0 after
  rewriting transcripts (stale token counts would under-report)
- Auto-compression hygiene: reset last_prompt_tokens after rewriting
- update_session: use None sentinel (not 0) as default so callers
  can explicitly reset to 0 while normal calls don't clobber
- 6 new tests covering: default value, serialization roundtrip,
  old-format migration, set/reset/no-change semantics
- /reset: new SessionEntry naturally gets last_prompt_tokens=0

2942 tests pass.

											
										
										
											2026-03-10 23:40:24 -07:00
+								                                # Reset stored token count — transcript was rewritten
 								                                session_entry.last_prompt_tokens = 0
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								                                history = _compressed
 								                                _new_count = len(_compressed)
 								                                _new_tokens = estimate_messages_tokens_rough(
 								                                    _compressed
 								                                )
-												feat: auto-compress pathologically large gateway sessions (#628)

Long-lived gateway sessions can accumulate enough history that every new
message rehydrates an oversized transcript, causing repeated truncation
failures (finish_reason=length).

Add a session hygiene check in _handle_message that runs right after
loading the transcript and before invoking the agent:

1. Estimate message count and rough token count of the transcript
2. If above configurable thresholds (default: 200 msgs or 100K tokens),
   auto-compress the transcript proactively
3. Notify the user about the compression with before/after stats
4. If still above warn threshold (default: 200K tokens) after
   compression, suggest /reset
5. If compression fails on a dangerously large session, warn the user
   to use /compress or /reset manually

Thresholds are configurable via config.yaml:

  session_hygiene:
    auto_compress_tokens: 100000
    auto_compress_messages: 200
    warn_tokens: 200000

This complements the agent's existing preflight compression (which
runs inside run_conversation) by catching pathological sessions at
the gateway layer before the agent is even created.

Includes 12 tests for threshold detection and token estimation.

											
										
										
											2026-03-07 20:09:48 -08:00
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								                                logger.info(
 								                                    "Session hygiene: compressed %s → %s msgs, "
 								                                    "~%s → ~%s tokens",
 								                                    _msg_count, _new_count,
 								                                    f"{_approx_tokens:,}", f"{_new_tokens:,}",
-												feat: auto-compress pathologically large gateway sessions (#628)

Long-lived gateway sessions can accumulate enough history that every new
message rehydrates an oversized transcript, causing repeated truncation
failures (finish_reason=length).

Add a session hygiene check in _handle_message that runs right after
loading the transcript and before invoking the agent:

1. Estimate message count and rough token count of the transcript
2. If above configurable thresholds (default: 200 msgs or 100K tokens),
   auto-compress the transcript proactively
3. Notify the user about the compression with before/after stats
4. If still above warn threshold (default: 200K tokens) after
   compression, suggest /reset
5. If compression fails on a dangerously large session, warn the user
   to use /compress or /reset manually

Thresholds are configurable via config.yaml:

  session_hygiene:
    auto_compress_tokens: 100000
    auto_compress_messages: 200
    warn_tokens: 200000

This complements the agent's existing preflight compression (which
runs inside run_conversation) by catching pathological sessions at
the gateway layer before the agent is even created.

Includes 12 tests for threshold detection and token estimation.

											
										
										
											2026-03-07 20:09:48 -08:00
+								                                )
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
-												feat: auto-compress pathologically large gateway sessions (#628)

Long-lived gateway sessions can accumulate enough history that every new
message rehydrates an oversized transcript, causing repeated truncation
failures (finish_reason=length).

Add a session hygiene check in _handle_message that runs right after
loading the transcript and before invoking the agent:

1. Estimate message count and rough token count of the transcript
2. If above configurable thresholds (default: 200 msgs or 100K tokens),
   auto-compress the transcript proactively
3. Notify the user about the compression with before/after stats
4. If still above warn threshold (default: 200K tokens) after
   compression, suggest /reset
5. If compression fails on a dangerously large session, warn the user
   to use /compress or /reset manually

Thresholds are configurable via config.yaml:

  session_hygiene:
    auto_compress_tokens: 100000
    auto_compress_messages: 200
    warn_tokens: 200000

This complements the agent's existing preflight compression (which
runs inside run_conversation) by catching pathological sessions at
the gateway layer before the agent is even created.

Includes 12 tests for threshold detection and token estimation.

											
										
										
											2026-03-07 20:09:48 -08:00
+								                                if _hyg_adapter:
 								                                    try:
 								                                        await _hyg_adapter.send(
 								                                            source.chat_id,
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								                                            f"🗜️ Compressed: {_msg_count} → "
 								                                            f"{_new_count} messages, "
 								                                            f"~{_approx_tokens:,} → "
-												fix(gateway): isolate telegram forum topic sessions

											
										
										
											2026-03-11 09:15:34 +01:00
+								                                            f"~{_new_tokens:,} tokens",
 								                                            metadata=_hyg_meta,
-												feat: auto-compress pathologically large gateway sessions (#628)

Long-lived gateway sessions can accumulate enough history that every new
message rehydrates an oversized transcript, causing repeated truncation
failures (finish_reason=length).

Add a session hygiene check in _handle_message that runs right after
loading the transcript and before invoking the agent:

1. Estimate message count and rough token count of the transcript
2. If above configurable thresholds (default: 200 msgs or 100K tokens),
   auto-compress the transcript proactively
3. Notify the user about the compression with before/after stats
4. If still above warn threshold (default: 200K tokens) after
   compression, suggest /reset
5. If compression fails on a dangerously large session, warn the user
   to use /compress or /reset manually

Thresholds are configurable via config.yaml:

  session_hygiene:
    auto_compress_tokens: 100000
    auto_compress_messages: 200
    warn_tokens: 200000

This complements the agent's existing preflight compression (which
runs inside run_conversation) by catching pathological sessions at
the gateway layer before the agent is even created.

Includes 12 tests for threshold detection and token estimation.

											
										
										
											2026-03-07 20:09:48 -08:00
+								                                        )
 								                                    except Exception:
 								                                        pass
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								                                # Still too large after compression — warn user
 								                                if _new_tokens >= _warn_token_threshold:
 								                                    logger.warning(
 								                                        "Session hygiene: still ~%s tokens after "
 								                                        "compression — suggesting /reset",
 								                                        f"{_new_tokens:,}",
 								                                    )
 								                                    if _hyg_adapter:
 								                                        try:
 								                                            await _hyg_adapter.send(
 								                                                source.chat_id,
 								                                                "⚠️ Session is still very large "
 								                                                "after compression "
 								                                                f"(~{_new_tokens:,} tokens). "
 								                                                "Consider using /reset to start "
-												fix(gateway): isolate telegram forum topic sessions

											
										
										
											2026-03-11 09:15:34 +01:00
+								                                                "fresh if you experience issues.",
 								                                                metadata=_hyg_meta,
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								                                            )
 								                                        except Exception:
 								                                            pass
 								                    except Exception as e:
 								                        logger.warning(
 								                            "Session hygiene auto-compress failed: %s", e
 								                        )
 								                        # Compression failed and session is dangerously large
 								                        if _approx_tokens >= _warn_token_threshold:
 								                            _hyg_adapter = self.adapters.get(source.platform)
-												fix(gateway): isolate telegram forum topic sessions

											
										
										
											2026-03-11 09:15:34 +01:00
+								                            _hyg_meta = {"thread_id": source.thread_id} if source.thread_id else None
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								                            if _hyg_adapter:
 								                                try:
 								                                    await _hyg_adapter.send(
 								                                        source.chat_id,
 								                                        f"⚠️ Session is very large "
 								                                        f"({_msg_count} messages, "
 								                                        f"~{_approx_tokens:,} tokens) and "
 								                                        "auto-compression failed. Consider "
 								                                        "using /compress or /reset to avoid "
-												fix(gateway): isolate telegram forum topic sessions

											
										
										
											2026-03-11 09:15:34 +01:00
+								                                        "issues.",
 								                                        metadata=_hyg_meta,
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								                                    )
 								                                except Exception:
 								                                    pass
-												feat: auto-compress pathologically large gateway sessions (#628)

Long-lived gateway sessions can accumulate enough history that every new
message rehydrates an oversized transcript, causing repeated truncation
failures (finish_reason=length).

Add a session hygiene check in _handle_message that runs right after
loading the transcript and before invoking the agent:

1. Estimate message count and rough token count of the transcript
2. If above configurable thresholds (default: 200 msgs or 100K tokens),
   auto-compress the transcript proactively
3. Notify the user about the compression with before/after stats
4. If still above warn threshold (default: 200K tokens) after
   compression, suggest /reset
5. If compression fails on a dangerously large session, warn the user
   to use /compress or /reset manually

Thresholds are configurable via config.yaml:

  session_hygiene:
    auto_compress_tokens: 100000
    auto_compress_messages: 200
    warn_tokens: 200000

This complements the agent's existing preflight compression (which
runs inside run_conversation) by catching pathological sessions at
the gateway layer before the agent is even created.

Includes 12 tests for threshold detection and token estimation.

											
										
										
											2026-03-07 20:09:48 -08:00
-												feat: implement channel directory and message mirroring for cross-platform communication

- Introduced a new channel directory to cache reachable channels/contacts for messaging platforms, enhancing the send_message tool's ability to resolve human-friendly names to numeric IDs.
- Added functionality to mirror sent messages into the target's session transcript, providing context for cross-platform message delivery.
- Updated the send_message tool to support listing available targets and improved error handling for channel resolution.
- Enhanced the gateway to build and refresh the channel directory during startup and at regular intervals, ensuring up-to-date channel information.

											
										
										
											2026-02-22 20:44:15 -08:00
+								        # First-message onboarding -- only on the very first interaction ever
 								        if not history and not self.session_store.has_any_sessions():
-												Hermes Agent UX Improvements

											
										
										
											2026-02-22 02:16:11 -08:00
+								            context_prompt += (
-												feat: implement channel directory and message mirroring for cross-platform communication

- Introduced a new channel directory to cache reachable channels/contacts for messaging platforms, enhancing the send_message tool's ability to resolve human-friendly names to numeric IDs.
- Added functionality to mirror sent messages into the target's session transcript, providing context for cross-platform message delivery.
- Updated the send_message tool to support listing available targets and improved error handling for channel resolution.
- Enhanced the gateway to build and refresh the channel directory during startup and at regular intervals, ensuring up-to-date channel information.

											
										
										
											2026-02-22 20:44:15 -08:00
+								                "\n\n[System note: This is the user's very first message ever. "
-												Hermes Agent UX Improvements

											
										
										
											2026-02-22 02:16:11 -08:00
+								                "Briefly introduce yourself and mention that /help shows available commands. "
 								                "Keep the introduction concise -- one or two sentences max.]"
 								            )
-												feat: implement channel directory and message mirroring for cross-platform communication

- Introduced a new channel directory to cache reachable channels/contacts for messaging platforms, enhancing the send_message tool's ability to resolve human-friendly names to numeric IDs.
- Added functionality to mirror sent messages into the target's session transcript, providing context for cross-platform message delivery.
- Updated the send_message tool to support listing available targets and improved error handling for channel resolution.
- Enhanced the gateway to build and refresh the channel directory during startup and at regular intervals, ensuring up-to-date channel information.

											
										
										
											2026-02-22 20:44:15 -08:00
+								        # One-time prompt if no home channel is set for this platform
 								        if not history and source.platform and source.platform != Platform.LOCAL:
 								            platform_name = source.platform.value
 								            env_key = f"{platform_name.upper()}_HOME_CHANNEL"
 								            if not os.getenv(env_key):
 								                adapter = self.adapters.get(source.platform)
 								                if adapter:
 								                    await adapter.send(
 								                        source.chat_id,
 								                        f"📬 No home channel is set for {platform_name.title()}. "
 								                        f"A home channel is where Hermes delivers cron job results "
 								                        f"and cross-platform messages.\n\n"
-												feat: unify set-home command naming across platforms

- Updated the command name from `/set-home` to `/sethome` in the GatewayRunner class for consistency.
- Added a new slash command `/sethome` in the Discord adapter to set the home channel.
- Registered the `/sethome` command in the Telegram adapter to align with the updated naming convention.

											
										
										
											2026-02-23 15:01:22 -08:00
+								                        f"Type /sethome to make this chat your home channel, "
-												feat: implement channel directory and message mirroring for cross-platform communication

- Introduced a new channel directory to cache reachable channels/contacts for messaging platforms, enhancing the send_message tool's ability to resolve human-friendly names to numeric IDs.
- Added functionality to mirror sent messages into the target's session transcript, providing context for cross-platform message delivery.
- Updated the send_message tool to support listing available targets and improved error handling for channel resolution.
- Enhanced the gateway to build and refresh the channel directory during startup and at regular intervals, ensuring up-to-date channel information.

											
										
										
											2026-02-22 20:44:15 -08:00
+								                        f"or ignore to skip."
 								                    )
-												feat: add voice channel awareness — inject participant and speaking state into agent context

											
										
										
											2026-03-14 02:14:34 +03:00
+								        # -----------------------------------------------------------------
 								        # Voice channel awareness — inject current voice channel state
 								        # into context so the agent knows who is in the channel and who
 								        # is speaking, without needing a separate tool call.
 								        # -----------------------------------------------------------------
 								        if source.platform == Platform.DISCORD:
 								            adapter = self.adapters.get(Platform.DISCORD)
 								            guild_id = self._get_guild_id(event)
 								            if guild_id and adapter and hasattr(adapter, "get_voice_channel_context"):
 								                vc_context = adapter.get_voice_channel_context(guild_id)
 								                if vc_context:
 								                    context_prompt += f"\n\n{vc_context}"
-												Enhance image handling and analysis capabilities across platforms

- Updated the vision tool to accept both HTTP/HTTPS URLs and local file paths for image analysis.
- Implemented caching of user-uploaded images in local directories to ensure reliable access for the vision tool, addressing issues with ephemeral URLs.
- Enhanced platform adapters (Discord, Telegram, WhatsApp) to download and cache images, allowing for immediate analysis and enriched message context.
- Added a new method to auto-analyze images attached by users, enriching the conversation with detailed descriptions.
- Improved documentation for image handling processes and updated related functions for clarity and efficiency.

											
										
										
											2026-02-15 16:10:50 -08:00
+								        # -----------------------------------------------------------------
 								        # Auto-analyze images sent by the user
 								        #
 								        # If the user attached image(s), we run the vision tool eagerly so
 								        # the conversation model always receives a text description.  The
 								        # local file path is also included so the model can re-examine the
 								        # image later with a more targeted question via vision_analyze.
 								        #
 								        # We filter to image paths only (by media_type) so that non-image
 								        # attachments (documents, audio, etc.) are not sent to the vision
 								        # tool even when they appear in the same message.
 								        # -----------------------------------------------------------------
 								        message_text = event.text or ""
 								        if event.media_urls:
 								            image_paths = []
 								            for i, path in enumerate(event.media_urls):
 								                # Check media_types if available; otherwise infer from message type
 								                mtype = event.media_types[i] if i < len(event.media_types) else ""
 								                is_image = (
 								                    mtype.startswith("image/")
 								                    or event.message_type == MessageType.PHOTO
 								                )
 								                if is_image:
 								                    image_paths.append(path)
 								            if image_paths:
 								                message_text = await self._enrich_message_with_vision(
 								                    message_text, image_paths
 								                )
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
+								        # -----------------------------------------------------------------
 								        # Auto-transcribe voice/audio messages sent by the user
 								        # -----------------------------------------------------------------
 								        if event.media_urls:
 								            audio_paths = []
 								            for i, path in enumerate(event.media_urls):
 								                mtype = event.media_types[i] if i < len(event.media_types) else ""
 								                is_audio = (
 								                    mtype.startswith("audio/")
 								                    or event.message_type in (MessageType.VOICE, MessageType.AUDIO)
 								                )
 								                if is_audio:
 								                    audio_paths.append(path)
 								            if audio_paths:
 								                message_text = await self._enrich_message_with_transcription(
 								                    message_text, audio_paths
 								                )
-												fix: direct user message on STT failure + hermes-agent-setup skill

When a user sends a voice message and STT isn't configured, the gateway
now sends a clear message directly to the user explaining how to set up
voice transcription, rather than relying on the agent to relay an
injected context note (which often gets misinterpreted).

Also adds a hermes-agent-setup bundled skill covering STT/TTS setup,
tool configuration, dependency installation, and troubleshooting.

											
										
										
											2026-03-18 03:01:41 -07:00
+								                # If STT failed, send a direct message to the user so they
 								                # know voice isn't configured — don't rely on the agent to
 								                # relay the error clearly.
 								                _stt_fail_markers = (
 								                    "No STT provider",
 								                    "STT is disabled",
 								                    "can't listen",
 								                    "VOICE_TOOLS_OPENAI_KEY",
 								                )
 								                if any(m in message_text for m in _stt_fail_markers):
 								                    _stt_adapter = self.adapters.get(source.platform)
 								                    _stt_meta = {"thread_id": source.thread_id} if source.thread_id else None
 								                    if _stt_adapter:
 								                        try:
-												fix: check skill availability before hinting at hermes-agent-setup

Only mention the hermes-agent-setup skill in STT failure notes (both
the direct user message and the agent context note) when the skill is
actually installed. Uses _find_skill() from skill_manager_tool.

Also confirmed: STT is the only user-facing failure case where the
setup skill hint helps. Vision failures are transient API issues,
runtime transcription errors indicate a configured-but-broken provider,
and platform startup warnings are server logs.

											
										
										
											2026-03-18 03:17:23 -07:00
+								                            _stt_msg = (
-												fix: direct user message on STT failure + hermes-agent-setup skill

When a user sends a voice message and STT isn't configured, the gateway
now sends a clear message directly to the user explaining how to set up
voice transcription, rather than relying on the agent to relay an
injected context note (which often gets misinterpreted).

Also adds a hermes-agent-setup bundled skill covering STT/TTS setup,
tool configuration, dependency installation, and troubleshooting.

											
										
										
											2026-03-18 03:01:41 -07:00
+								                                "🎤 I received your voice message but can't transcribe it — "
 								                                "no speech-to-text provider is configured.\n\n"
 								                                "To enable voice: install faster-whisper "
 								                                "(`pip install faster-whisper` in the Hermes venv) "
 								                                "and set `stt.enabled: true` in config.yaml, "
-												fix: check skill availability before hinting at hermes-agent-setup

Only mention the hermes-agent-setup skill in STT failure notes (both
the direct user message and the agent context note) when the skill is
actually installed. Uses _find_skill() from skill_manager_tool.

Also confirmed: STT is the only user-facing failure case where the
setup skill hint helps. Vision failures are transient API issues,
runtime transcription errors indicate a configured-but-broken provider,
and platform startup warnings are server logs.

											
										
										
											2026-03-18 03:17:23 -07:00
+								                                "then /restart the gateway."
 								                            )
 								                            # Point to setup skill if it's installed
 								                            if self._has_setup_skill():
 								                                _stt_msg += "\n\nFor full setup instructions, type: `/skill hermes-agent-setup`"
 								                            await _stt_adapter.send(
 								                                source.chat_id, _stt_msg,
-												fix: direct user message on STT failure + hermes-agent-setup skill

When a user sends a voice message and STT isn't configured, the gateway
now sends a clear message directly to the user explaining how to set up
voice transcription, rather than relying on the agent to relay an
injected context note (which often gets misinterpreted).

Also adds a hermes-agent-setup bundled skill covering STT/TTS setup,
tool configuration, dependency installation, and troubleshooting.

											
										
										
											2026-03-18 03:01:41 -07:00
+								                                metadata=_stt_meta,
 								                            )
 								                        except Exception:
 								                            pass
-												feat(telegram): add document file processing for PDF, text, and Office files

Download, cache, and enrich document files sent via Telegram. Supports
.pdf, .md, .txt, .docx, .xlsx, .pptx with size validation, unsupported
type rejection, text content injection for .md/.txt, and hourly cache
cleanup.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-27 11:44:57 -05:00
 								        # -----------------------------------------------------------------
 								        # Enrich document messages with context notes for the agent
 								        # -----------------------------------------------------------------
 								        if event.media_urls and event.message_type == MessageType.DOCUMENT:
 								            for i, path in enumerate(event.media_urls):
 								                mtype = event.media_types[i] if i < len(event.media_types) else ""
 								                if not (mtype.startswith("application/") or mtype.startswith("text/")):
 								                    continue
 								                # Extract display filename by stripping the doc_{uuid12}_ prefix
 								                import os as _os
 								                basename = _os.path.basename(path)
 								                # Format: doc_<12hex>_<original_filename>
 								                parts = basename.split("_", 2)
 								                display_name = parts[2] if len(parts) >= 3 else basename
-												fix(security): patch path traversal, size bypass, and prompt injection in document processing

- Sanitize filenames in cache_document_from_bytes to prevent path traversal (strip directory components, null bytes, resolve check)
- Reject documents with None file_size instead of silently allowing download
- Cap text file injection at 100 KB to prevent oversized prompt payloads
- Sanitize display_name in run.py context notes to block prompt injection via filenames
- Add 35 unit tests covering document cache utilities and Telegram document handling

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-27 11:53:46 -05:00
+								                # Sanitize to prevent prompt injection via filenames
 								                import re as _re
 								                display_name = _re.sub(r'[^\w.\- ]', '_', display_name)
-												feat(telegram): add document file processing for PDF, text, and Office files

Download, cache, and enrich document files sent via Telegram. Supports
.pdf, .md, .txt, .docx, .xlsx, .pptx with size validation, unsupported
type rejection, text content injection for .md/.txt, and hourly cache
cleanup.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-27 11:44:57 -05:00
 								                if mtype.startswith("text/"):
 								                    context_note = (
 								                        f"[The user sent a text document: '{display_name}'. "
 								                        f"Its content has been included below. "
 								                        f"The file is also saved at: {path}]"
 								                    )
 								                else:
 								                    context_note = (
 								                        f"[The user sent a document: '{display_name}'. "
 								                        f"The file is saved at: {path}. "
 								                        f"Ask the user what they'd like you to do with it.]"
 								                    )
 								                message_text = f"{context_note}\n\n{message_text}"
-												feat(gateway): inject reply-to message context for out-of-session replies (#1594)

* fix: prevent infinite 400 failure loop on context overflow (#1630)

When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message.  This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error.  Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.

Three-layer fix:

1. run_agent.py — Fallback heuristic: when a 400 error has a very short
   generic message AND the session is large (>40% of context or >80
   messages), treat it as a probable context overflow and trigger
   compression instead of aborting.

2. run_agent.py + gateway/run.py — Don't persist failed messages:
   when the agent returns failed=True before generating any response,
   skip writing the user's message to the transcript/DB. This prevents
   the session from growing on each failure.

3. gateway/run.py — Smarter error messages: detect context-overflow
   failures and suggest /compact or /reset specifically, instead of a
   generic 'try again' that will fail identically.

* fix(skills): detect prompt injection patterns and block cache file reads

Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):

1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
   (index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
   was the original injection vector — untrusted skill descriptions
   in the catalog contained adversarial text that the model executed.

2. skill_view: warns when skills are loaded from outside the trusted
   ~/.hermes/skills/ directory, and detects common injection patterns
   in skill content ("ignore previous instructions", "<system>", etc.).

Cherry-picked from PR #1562 by ygd58.

* fix(tools): chunk long messages in send_message_tool before dispatch (#1552)

Long messages sent via send_message tool or cron delivery silently
failed when exceeding platform limits. Gateway adapters handle this
via truncate_message(), but the standalone senders in send_message_tool
bypassed that entirely.

- Apply truncate_message() chunking in _send_to_platform() before
  dispatching to individual platform senders
- Remove naive message[i:i+2000] character split in _send_discord()
  in favor of centralized smart splitting
- Attach media files to last chunk only for Telegram
- Add regression tests for chunking and media placement

Cherry-picked from PR #1557 by llbn.

* fix(approval): show full command in dangerous command approval (#1553)

Previously the command was truncated to 80 chars in CLI (with a
[v]iew full option), 500 chars in Discord embeds, and missing entirely
in Telegram/Slack approval messages. Now the full command is always
displayed everywhere:

- CLI: removed 80-char truncation and [v]iew full menu option
- Gateway (TG/Slack): approval_required message includes full command
  in a code block
- Discord: embed shows full command up to 4096-char limit
- Windows: skip SIGALRM-based test timeout (Unix-only)
- Updated tests: replaced view-flow tests with direct approval tests

Cherry-picked from PR #1566 by crazywriter1.

* fix(cli): flush stdout during agent loop to prevent macOS display freeze (#1624)

The interrupt polling loop in chat() waited on the queue without
invalidating the prompt_toolkit renderer. On macOS, the StdoutProxy
buffer only flushed on input events, causing the CLI to appear frozen
during tool execution until the user typed a key.

Fix: call _invalidate() on each queue timeout (every ~100ms, throttled
to 150ms) to force the renderer to flush buffered agent output.

* fix(claw): warn when API keys are skipped during OpenClaw migration (#1580)

When --migrate-secrets is not passed (the default), API keys like
OPENROUTER_API_KEY are silently skipped with no warning. Users don't
realize their keys weren't migrated until the agent fails to connect.

Add a post-migration warning with actionable instructions: either
re-run with --migrate-secrets or add the key manually via
hermes config set.

Cherry-picked from PR #1593 by ygd58.

* fix(security): block sandbox backend creds from subprocess env (#1264)

Add Modal and Daytona sandbox credentials to the subprocess env
blocklist so they're not leaked to agent terminal sessions via
printenv/env.

Cherry-picked from PR #1571 by ygd58.

* fix(gateway): cap interrupt recursion depth to prevent resource exhaustion (#816)

When a user sends multiple messages while the agent keeps failing,
_run_agent() calls itself recursively with no depth limit. This can
exhaust stack/memory if the agent is in a failure loop.

Add _MAX_INTERRUPT_DEPTH = 3. When exceeded, the pending message is
logged and the current result is returned instead of recursing deeper.

The log handler duplication bug described in #816 was already fixed
separately (AIAgent.__init__ deduplicates handlers).

* fix(gateway): /model shows active fallback model instead of config default (#1615)

When the agent falls back to a different model (e.g. due to rate
limiting), /model still showed the config default. Now tracks the
effective model/provider after each agent run and displays it.

Cleared when the primary model succeeds again or the user explicitly
switches via /model.

Cherry-picked from PR #1616 by MaxKerkula. Added hasattr guard for
test compatibility.

* feat(gateway): inject reply-to message context for out-of-session replies (#1594)

When a user replies to a Telegram message, check if the quoted text
exists in the current session transcript. If missing (from cron jobs,
background tasks, or old sessions), prepend [Replying to: "..."] to
the message so the agent has context about what's being referenced.

- Add reply_to_text field to MessageEvent (base.py)
- Populate from Telegram's reply_to_message (text or caption)
- Inject context in _handle_message when not found in history

Based on PR #1596 by anpicasso (cherry-picked reply-to feature only,
excluded unrelated /server command and background delegation changes).

---------

Co-authored-by: buray <ygd58@users.noreply.github.com>
Co-authored-by: lbn <llbn@users.noreply.github.com>
Co-authored-by: crazywriter1 <53251494+crazywriter1@users.noreply.github.com>
Co-authored-by: Max K <MaxKerkula@users.noreply.github.com>
Co-authored-by: Angello Picasso <angello.picasso@devsu.com>
											
										
										
											2026-03-17 02:31:27 -07:00
+								        # -----------------------------------------------------------------
 								        # Inject reply context when user replies to a message not in history.
 								        # Telegram (and other platforms) let users reply to specific messages,
 								        # but if the quoted message is from a previous session, cron delivery,
 								        # or background task, the agent has no context about what's being
 								        # referenced. Prepend the quoted text so the agent understands. (#1594)
 								        # -----------------------------------------------------------------
 								        if getattr(event, 'reply_to_text', None) and event.reply_to_message_id:
 								            reply_snippet = event.reply_to_text[:500]
 								            found_in_history = any(
 								                reply_snippet[:200] in (msg.get("content") or "")
 								                for msg in history
 								                if msg.get("role") in ("assistant", "user", "tool")
 								            )
 								            if not found_in_history:
 								                message_text = f'[Replying to: "{reply_snippet}"]\n\n{message_text}'
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        try:
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
+								            # Emit agent:start hook
 								            hook_ctx = {
 								                "platform": source.platform.value if source.platform else "",
 								                "user_id": source.user_id,
 								                "session_id": session_entry.session_id,
 								                "message": message_text[:500],
 								            }
 								            await self.hooks.emit("agent:start", hook_ctx)
-												feat: @ context references — inline file, folder, diff, git, and URL injection

Add @file:path, @folder:dir, @diff, @staged, @git:N, and @url:
references that expand inline before the message reaches the LLM.
Supports line ranges (@file:main.py:10-50), token budget enforcement
(soft warn at 25%, hard block at 50%), and path sandboxing for gateway.

Core module from PR #2090 by @kshitijk4poor. CLI and gateway wiring
rewritten against current main. Fixed asyncio.run() crash when called
from inside a running event loop (gateway).

Closes #682.

											
										
										
											2026-03-21 15:57:13 -07:00
 								            # Expand @ context references (@file:, @folder:, @diff, etc.)
 								            if "@" in message_text:
 								                try:
 								                    from agent.context_references import preprocess_context_references_async
 								                    from agent.model_metadata import get_model_context_length
 								                    _msg_cwd = os.environ.get("MESSAGING_CWD", os.path.expanduser("~"))
 								                    _msg_ctx_len = get_model_context_length(
 								                        self._model, base_url=self._base_url or "")
 								                    _ctx_result = await preprocess_context_references_async(
 								                        message_text, cwd=_msg_cwd,
 								                        context_length=_msg_ctx_len, allowed_root=_msg_cwd)
 								                    if _ctx_result.blocked:
 								                        _adapter = self.adapters.get(source.platform)
 								                        if _adapter:
 								                            await _adapter.send(
 								                                source.chat_id,
 								                                "\n".join(_ctx_result.warnings) or "Context injection refused.",
 								                            )
 								                        return
 								                    if _ctx_result.expanded:
 								                        message_text = _ctx_result.message
 								                except Exception as exc:
 								                    logger.debug("@ context reference expansion failed: %s", exc)
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								            # Run the agent
-												Enhance agent response handling and transcript logging

- Refactored the agent response processing to return a comprehensive result dictionary, including final responses and full message history.
- Improved transcript logging to capture the complete conversation, including tool calls and intermediate reasoning, facilitating session resumption and debugging.
- Added handling for fresh sessions to include tool definitions in the transcript for clarity.
- Implemented logic to filter and timestamp new messages, ensuring accurate logging of user and assistant interactions.

											
										
										
											2026-02-16 00:53:17 -08:00
+								            agent_result = await self._run_agent(
-												Enhance image handling and analysis capabilities across platforms

- Updated the vision tool to accept both HTTP/HTTPS URLs and local file paths for image analysis.
- Implemented caching of user-uploaded images in local directories to ensure reliable access for the vision tool, addressing issues with ephemeral URLs.
- Enhanced platform adapters (Discord, Telegram, WhatsApp) to download and cache images, allowing for immediate analysis and enriched message context.
- Added a new method to auto-analyze images attached by users, enriching the conversation with detailed descriptions.
- Improved documentation for image handling processes and updated related functions for clarity and efficiency.

											
										
										
											2026-02-15 16:10:50 -08:00
+								                message=message_text,
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								                context_prompt=context_prompt,
 								                history=history,
 								                source=source,
-												Implement interrupt handling for agent and CLI input and persistent prompt line at bottom of CLI :)

- Enhanced the AIAgent class to support interrupt requests, allowing for graceful interruption of ongoing tasks and processing of new messages.
- Updated the HermesCLI to manage user input in a persistent manner, enabling real-time interruption of the agent's conversation.
- Introduced a mechanism in the GatewayRunner to handle incoming messages while an agent is running, allowing for immediate response to user commands.
- Improved overall user experience by providing feedback during interruptions and ensuring that pending messages are processed correctly.

											
										
										
											2026-02-03 16:15:49 -08:00
+								                session_id=session_entry.session_id,
-												fix(gateway/slack): send progress messages to correct thread (#3063)

Co-authored-by: Jneeee <jneeee@outlook.com>
											
										
										
											2026-03-25 15:51:15 -07:00
+								                session_key=session_key,
 								                event_message_id=event.message_id,
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								            )
-												feat(discord): persistent typing indicator for DMs

Based on PR #2427 by @oxngon (core feature extracted, reformatting
and unrelated changes dropped).

Discord's TYPING_START gateway event is unreliable for bot DMs. This
adds a background typing loop that hits POST /channels/{id}/typing
every 8 seconds (indicator lasts ~10s) until the response is sent.

- send_typing() starts a per-channel background loop (idempotent)
- stop_typing() cancels it (called after _run_agent returns)
- Base adapter gets stop_typing() as a no-op default
- Per-channel tracking via _typing_tasks dict prevents duplicates

											
										
										
											2026-03-22 04:47:53 -07:00
 								            # Stop persistent typing indicator now that the agent is done
 								            try:
 								                _typing_adapter = self.adapters.get(source.platform)
 								                if _typing_adapter and hasattr(_typing_adapter, "stop_typing"):
 								                    await _typing_adapter.stop_typing(source.chat_id)
 								            except Exception:
 								                pass
-												fix(anthropic): retry 429/529 errors and surface error details to users

- 429 rate limit and 529 overloaded were incorrectly treated as
  non-retryable client errors, causing immediate failure instead of
  exponential backoff retry. Users hitting Anthropic rate limits got
  silent failures or no response at all.
- Generic "Sorry, I encountered an unexpected error" now includes
  error type, details, and status-specific hints (auth, rate limit,
  overloaded).
- Failed agent with final_response=None now surfaces the actual
  error message instead of returning an empty response.

											
										
										
											2026-03-17 00:30:26 +03:00
+								            response = agent_result.get("final_response") or ""
-												Enhance agent response handling and transcript logging

- Refactored the agent response processing to return a comprehensive result dictionary, including final responses and full message history.
- Improved transcript logging to capture the complete conversation, including tool calls and intermediate reasoning, facilitating session resumption and debugging.
- Added handling for fresh sessions to include tool definitions in the transcript for clarity.
- Implemented logic to filter and timestamp new messages, ensuring accurate logging of user and assistant interactions.

											
										
										
											2026-02-16 00:53:17 -08:00
+								            agent_messages = agent_result.get("messages", [])
-												fix: /reasoning command — add gateway support, fix display, persist settings (#1031)

* fix: /reasoning command output ordering, display, and inline think extraction

Three issues with the /reasoning command:

1. Output interleaving: The command echo used print() while feedback
   used _cprint(), causing them to render out-of-order under
   prompt_toolkit's patch_stdout. Changed echo to use _cprint() so
   all output renders through the same path in correct order.

2. Reasoning display not working: /reasoning show toggled a flag
   but reasoning never appeared for models that embed thinking in
   inline <think> blocks rather than structured API fields. Added
   fallback extraction in _build_assistant_message to capture
   <think> block content as reasoning when no structured reasoning
   fields (reasoning, reasoning_content, reasoning_details) are
   present. This feeds into both the reasoning callback (during
   tool loops) and the post-response reasoning box display.

3. Feedback clarity: Added checkmarks to confirm actions, persisted
   show/hide to config (was session-only before), and aligned the
   status display for readability.

Tests: 7 new tests for inline think block extraction (41 total).

* feat: add /reasoning command to gateway (Telegram/Discord/etc)

The /reasoning command only existed in the CLI — messaging platforms
had no way to view or change reasoning settings. This adds:

1. /reasoning command handler in the gateway:
   - No args: shows current effort level and display state
   - /reasoning <level>: sets reasoning effort (none/low/medium/high/xhigh)
   - /reasoning show|hide: toggles reasoning display in responses
   - All changes saved to config.yaml immediately

2. Reasoning display in gateway responses:
   - When show_reasoning is enabled, prepends a 'Reasoning' block
     with the model's last_reasoning content before the response
   - Collapses long reasoning (>15 lines) to keep messages readable
   - Uses last_reasoning from run_conversation result dict

3. Plumbing:
   - Added _show_reasoning attribute loaded from config at startup
   - Propagated last_reasoning through _run_agent return dict
   - Added /reasoning to help text and known_commands set
   - Uses getattr for _show_reasoning to handle test stubs
											
										
										
											2026-03-12 05:38:19 -07:00
-												fix(anthropic): retry 429/529 errors and surface error details to users

- 429 rate limit and 529 overloaded were incorrectly treated as
  non-retryable client errors, causing immediate failure instead of
  exponential backoff retry. Users hitting Anthropic rate limits got
  silent failures or no response at all.
- Generic "Sorry, I encountered an unexpected error" now includes
  error type, details, and status-specific hints (auth, rate limit,
  overloaded).
- Failed agent with final_response=None now surfaces the actual
  error message instead of returning an empty response.

											
										
										
											2026-03-17 00:30:26 +03:00
+								            # Surface error details when the agent failed silently (final_response=None)
 								            if not response and agent_result.get("failed"):
 								                error_detail = agent_result.get("error", "unknown error")
-												fix: prevent infinite 400 loop on context overflow + block prompt injection via cache files (#1630, #1558)

* fix: prevent infinite 400 failure loop on context overflow (#1630)

When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message.  This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error.  Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.

Three-layer fix:

1. run_agent.py — Fallback heuristic: when a 400 error has a very short
   generic message AND the session is large (>40% of context or >80
   messages), treat it as a probable context overflow and trigger
   compression instead of aborting.

2. run_agent.py + gateway/run.py — Don't persist failed messages:
   when the agent returns failed=True before generating any response,
   skip writing the user's message to the transcript/DB. This prevents
   the session from growing on each failure.

3. gateway/run.py — Smarter error messages: detect context-overflow
   failures and suggest /compact or /reset specifically, instead of a
   generic 'try again' that will fail identically.

* fix(skills): detect prompt injection patterns and block cache file reads

Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):

1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
   (index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
   was the original injection vector — untrusted skill descriptions
   in the catalog contained adversarial text that the model executed.

2. skill_view: warns when skills are loaded from outside the trusted
   ~/.hermes/skills/ directory, and detects common injection patterns
   in skill content ("ignore previous instructions", "<system>", etc.).

Cherry-picked from PR #1562 by ygd58.

---------

Co-authored-by: buray <ygd58@users.noreply.github.com>
											
										
										
											2026-03-17 01:50:59 -07:00
+								                error_str = str(error_detail).lower()
 								                # Detect context-overflow failures and give specific guidance.
 								                # Generic 400 "Error" from Anthropic with large sessions is the
 								                # most common cause of this (#1630).
 								                _is_ctx_fail = any(p in error_str for p in (
 								                    "context", "token", "too large", "too long",
 								                    "exceed", "payload",
 								                )) or (
 								                    "400" in error_str
 								                    and len(history) > 50
-												fix(anthropic): retry 429/529 errors and surface error details to users

- 429 rate limit and 529 overloaded were incorrectly treated as
  non-retryable client errors, causing immediate failure instead of
  exponential backoff retry. Users hitting Anthropic rate limits got
  silent failures or no response at all.
- Generic "Sorry, I encountered an unexpected error" now includes
  error type, details, and status-specific hints (auth, rate limit,
  overloaded).
- Failed agent with final_response=None now surfaces the actual
  error message instead of returning an empty response.

											
										
										
											2026-03-17 00:30:26 +03:00
+								                )
-												fix: prevent infinite 400 loop on context overflow + block prompt injection via cache files (#1630, #1558)

* fix: prevent infinite 400 failure loop on context overflow (#1630)

When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message.  This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error.  Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.

Three-layer fix:

1. run_agent.py — Fallback heuristic: when a 400 error has a very short
   generic message AND the session is large (>40% of context or >80
   messages), treat it as a probable context overflow and trigger
   compression instead of aborting.

2. run_agent.py + gateway/run.py — Don't persist failed messages:
   when the agent returns failed=True before generating any response,
   skip writing the user's message to the transcript/DB. This prevents
   the session from growing on each failure.

3. gateway/run.py — Smarter error messages: detect context-overflow
   failures and suggest /compact or /reset specifically, instead of a
   generic 'try again' that will fail identically.

* fix(skills): detect prompt injection patterns and block cache file reads

Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):

1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
   (index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
   was the original injection vector — untrusted skill descriptions
   in the catalog contained adversarial text that the model executed.

2. skill_view: warns when skills are loaded from outside the trusted
   ~/.hermes/skills/ directory, and detects common injection patterns
   in skill content ("ignore previous instructions", "<system>", etc.).

Cherry-picked from PR #1562 by ygd58.

---------

Co-authored-by: buray <ygd58@users.noreply.github.com>
											
										
										
											2026-03-17 01:50:59 -07:00
+								                if _is_ctx_fail:
 								                    response = (
 								                        "⚠️ Session too large for the model's context window.\n"
 								                        "Use /compact to compress the conversation, or "
 								                        "/reset to start fresh."
 								                    )
 								                else:
 								                    response = (
 								                        f"The request failed: {str(error_detail)[:300]}\n"
 								                        "Try again or use /reset to start a fresh session."
 								                    )
-												fix: sync session_id after mid-run context compression

Critical bug: when the agent's context compressor fires during a tool
loop (_compress_context), it creates a new session_id and writes the
compressed messages there. But the gateway's session_entry still pointed
to the old session_id. On the next message, load_transcript() loaded
the stale pre-compression transcript, causing:

- Context bloat returning every turn
- Repeated compression cycles
- Loss of carefully compressed context

Fix: after run_conversation() returns, check if the agent's session_id
changed (compression split) and sync it back to the session store entry.
Also pass the effective session_id in the result dict so _handle_message
writes transcript entries to the correct session.

This affects ALL gateway adapters, not just webhook.
											
										
										
											2026-03-13 04:14:35 -07:00
+								            # If the agent's session_id changed during compression, update
 								            # session_entry so transcript writes below go to the right session.
 								            if agent_result.get("session_id") and agent_result["session_id"] != session_entry.session_id:
 								                session_entry.session_id = agent_result["session_id"]
-												fix: /reasoning command — add gateway support, fix display, persist settings (#1031)

* fix: /reasoning command output ordering, display, and inline think extraction

Three issues with the /reasoning command:

1. Output interleaving: The command echo used print() while feedback
   used _cprint(), causing them to render out-of-order under
   prompt_toolkit's patch_stdout. Changed echo to use _cprint() so
   all output renders through the same path in correct order.

2. Reasoning display not working: /reasoning show toggled a flag
   but reasoning never appeared for models that embed thinking in
   inline <think> blocks rather than structured API fields. Added
   fallback extraction in _build_assistant_message to capture
   <think> block content as reasoning when no structured reasoning
   fields (reasoning, reasoning_content, reasoning_details) are
   present. This feeds into both the reasoning callback (during
   tool loops) and the post-response reasoning box display.

3. Feedback clarity: Added checkmarks to confirm actions, persisted
   show/hide to config (was session-only before), and aligned the
   status display for readability.

Tests: 7 new tests for inline think block extraction (41 total).

* feat: add /reasoning command to gateway (Telegram/Discord/etc)

The /reasoning command only existed in the CLI — messaging platforms
had no way to view or change reasoning settings. This adds:

1. /reasoning command handler in the gateway:
   - No args: shows current effort level and display state
   - /reasoning <level>: sets reasoning effort (none/low/medium/high/xhigh)
   - /reasoning show|hide: toggles reasoning display in responses
   - All changes saved to config.yaml immediately

2. Reasoning display in gateway responses:
   - When show_reasoning is enabled, prepends a 'Reasoning' block
     with the model's last_reasoning content before the response
   - Collapses long reasoning (>15 lines) to keep messages readable
   - Uses last_reasoning from run_conversation result dict

3. Plumbing:
   - Added _show_reasoning attribute loaded from config at startup
   - Propagated last_reasoning through _run_agent return dict
   - Added /reasoning to help text and known_commands set
   - Uses getattr for _show_reasoning to handle test stubs
											
										
										
											2026-03-12 05:38:19 -07:00
+								            # Prepend reasoning/thinking if display is enabled
 								            if getattr(self, "_show_reasoning", False) and response:
 								                last_reasoning = agent_result.get("last_reasoning")
 								                if last_reasoning:
 								                    # Collapse long reasoning to keep messages readable
 								                    lines = last_reasoning.strip().splitlines()
 								                    if len(lines) > 15:
 								                        display_reasoning = "\n".join(lines[:15])
 								                        display_reasoning += f"\n_... ({len(lines) - 15} more lines)_"
 								                    else:
 								                        display_reasoning = last_reasoning.strip()
 								                    response = f"💭 **Reasoning:**\n```\n{display_reasoning}\n```\n\n{response}"
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
+								            # Emit agent:end hook
 								            await self.hooks.emit("agent:end", {
 								                **hook_ctx,
 								                "response": (response or "")[:500],
 								            })
-												Add background process management with process tool, wait, PTY, and stdin support

New process registry and tool for managing long-running background processes
across all terminal backends (local, Docker, Singularity, Modal, SSH).

Process Registry (tools/process_registry.py):
- ProcessSession tracking with rolling 200KB output buffer
- spawn_local() with optional PTY via ptyprocess for interactive CLIs
- spawn_via_env() for non-local backends (runs inside sandbox, never on host)
- Background reader threads per process (Popen stdout or PTY)
- wait() with timeout clamping, interrupt support, and transparent limit reporting
- JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery
- Module-level singleton shared across agent loop, gateway, and RL

Process Tool (model_tools.py):
- 7 actions: list, poll, log, wait, kill, write, submit
- Paired with terminal in all toolsets (CLI, messaging, RL)
- Timeout clamping with transparent notes in response

Terminal Tool Updates (tools/terminal_tool.py):
- Replaced nohup background mode with registry spawn (returns session_id)
- Added workdir parameter for per-command working directory
- Added check_interval parameter for gateway auto-check watchers
- Added pty parameter for interactive CLI tools (Codex, Claude Code)
- Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs
- Cleanup thread now respects active background processes (won't reap sandbox)

Gateway Integration (gateway/run.py, session.py, config.py):
- Session reset protection: sessions with active processes exempt from reset
- Default idle timeout increased from 2 hours to 24 hours
- from_dict fallback aligned to match (was 120, now 1440)
- session_key env var propagated to process registry for session mapping
- Crash recovery on gateway startup via checkpoint probe
- check_interval watcher: asyncio task polls process, delivers updates to platform

RL Safety (environments/):
- tool_context.py cleanup() kills background processes on episode end
- hermes_base_env.py warns when enabled_toolsets is None (loads all tools)
- Process tool safe in RL via wait() blocking the agent loop

Also:
- Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all])
- Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP
- Updated AGENTS.md with process management docs and project structure
- Updated README.md terminal section with process management overview

											
										
										
											2026-02-17 02:51:31 -08:00
+								            # Check for pending process watchers (check_interval on background processes)
 								            try:
 								                from tools.process_registry import process_registry
 								                while process_registry.pending_watchers:
 								                    watcher = process_registry.pending_watchers.pop(0)
 								                    asyncio.create_task(self._run_process_watcher(watcher))
 								            except Exception as e:
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								                logger.error("Process watcher setup error: %s", e)
-												Add background process management with process tool, wait, PTY, and stdin support

New process registry and tool for managing long-running background processes
across all terminal backends (local, Docker, Singularity, Modal, SSH).

Process Registry (tools/process_registry.py):
- ProcessSession tracking with rolling 200KB output buffer
- spawn_local() with optional PTY via ptyprocess for interactive CLIs
- spawn_via_env() for non-local backends (runs inside sandbox, never on host)
- Background reader threads per process (Popen stdout or PTY)
- wait() with timeout clamping, interrupt support, and transparent limit reporting
- JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery
- Module-level singleton shared across agent loop, gateway, and RL

Process Tool (model_tools.py):
- 7 actions: list, poll, log, wait, kill, write, submit
- Paired with terminal in all toolsets (CLI, messaging, RL)
- Timeout clamping with transparent notes in response

Terminal Tool Updates (tools/terminal_tool.py):
- Replaced nohup background mode with registry spawn (returns session_id)
- Added workdir parameter for per-command working directory
- Added check_interval parameter for gateway auto-check watchers
- Added pty parameter for interactive CLI tools (Codex, Claude Code)
- Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs
- Cleanup thread now respects active background processes (won't reap sandbox)

Gateway Integration (gateway/run.py, session.py, config.py):
- Session reset protection: sessions with active processes exempt from reset
- Default idle timeout increased from 2 hours to 24 hours
- from_dict fallback aligned to match (was 120, now 1440)
- session_key env var propagated to process registry for session mapping
- Crash recovery on gateway startup via checkpoint probe
- check_interval watcher: asyncio task polls process, delivers updates to platform

RL Safety (environments/):
- tool_context.py cleanup() kills background processes on episode end
- hermes_base_env.py warns when enabled_toolsets is None (loads all tools)
- Process tool safe in RL via wait() blocking the agent loop

Also:
- Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all])
- Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP
- Updated AGENTS.md with process management docs and project structure
- Updated README.md terminal section with process management overview

											
										
										
											2026-02-17 02:51:31 -08:00
-												Add Text-to-Speech (TTS) functionality with multiple providers

Add tool previews

Add AGENTS and SOUL.md support

Add Exec Approval

											
										
										
											2026-02-12 10:05:08 -08:00
+								            # Check if the agent encountered a dangerous command needing approval
 								            try:
-												refactor: deduplicate toolsets, unify async bridging, fix approval race condition, harden security

- Replace 4 copy-pasted messaging platform toolsets with shared _HERMES_CORE_TOOLS list
- Consolidate 5 ad-hoc async-bridging patterns into single _run_async() in model_tools.py
  - Removes deprecated get_event_loop()/set_event_loop() calls
  - Makes all tool handlers self-protecting regardless of caller's event loop state
  - RL handler refactored from if/elif chain to dispatch dict
- Fix exec approval race condition: replace module-level globals with thread-safe
  per-session tools/approval.py (submit_pending, pop_pending, approve_session, is_approved)
  - Session A approving "rm" no longer approves it for all other sessions
- Fix config deep merge: user overriding tts.elevenlabs.voice_id no longer clobbers
  tts.elevenlabs.model_id; migration detection now recurses to arbitrary depth
- Gateway default-deny: unauthenticated users denied unless GATEWAY_ALLOW_ALL_USERS=true
- Add 10 dangerous command patterns: rm --recursive, bash -c, python -e, curl|bash,
  xargs rm, find -delete
- Sanitize gateway error messages: users see generic message, full traceback goes to logs

											
										
										
											2026-02-21 18:28:49 -08:00
+								                from tools.approval import pop_pending
-												fix(gateway): replace bare text approval with /approve and /deny commands (#2002)

The gateway approval system previously intercepted bare 'yes'/'no' text
from the user's next message to approve/deny dangerous commands. This was
fragile and dangerous — if the agent asked a clarify question and the user
said 'yes' to answer it, the gateway would execute the pending dangerous
command instead. (Fixes #1888)

Changes:
- Remove bare text matching ('yes', 'y', 'approve', 'ok', etc.) from
  _handle_message approval check
- Add /approve and /deny as gateway-only slash commands in the command
  registry
- /approve supports scoping: /approve (one-time), /approve session,
  /approve always (permanent)
- Add 5-minute timeout for stale approvals
- Gateway appends structured instructions to the agent response when a
  dangerous command is pending, telling the user exactly how to respond
- 9 tests covering approve, deny, timeout, scoping, and verification
  that bare 'yes' no longer triggers execution

Credit to @solo386 and @FlyByNight69420 for identifying and reporting
this security issue in PR #1971 and issue #1888.

Co-authored-by: Test <test@test.com>
											
										
										
											2026-03-18 16:58:20 -07:00
+								                import time as _time
-												refactor: deduplicate toolsets, unify async bridging, fix approval race condition, harden security

- Replace 4 copy-pasted messaging platform toolsets with shared _HERMES_CORE_TOOLS list
- Consolidate 5 ad-hoc async-bridging patterns into single _run_async() in model_tools.py
  - Removes deprecated get_event_loop()/set_event_loop() calls
  - Makes all tool handlers self-protecting regardless of caller's event loop state
  - RL handler refactored from if/elif chain to dispatch dict
- Fix exec approval race condition: replace module-level globals with thread-safe
  per-session tools/approval.py (submit_pending, pop_pending, approve_session, is_approved)
  - Session A approving "rm" no longer approves it for all other sessions
- Fix config deep merge: user overriding tts.elevenlabs.voice_id no longer clobbers
  tts.elevenlabs.model_id; migration detection now recurses to arbitrary depth
- Gateway default-deny: unauthenticated users denied unless GATEWAY_ALLOW_ALL_USERS=true
- Add 10 dangerous command patterns: rm --recursive, bash -c, python -e, curl|bash,
  xargs rm, find -delete
- Sanitize gateway error messages: users see generic message, full traceback goes to logs

											
										
										
											2026-02-21 18:28:49 -08:00
+								                pending = pop_pending(session_key)
 								                if pending:
-												fix(gateway): replace bare text approval with /approve and /deny commands (#2002)

The gateway approval system previously intercepted bare 'yes'/'no' text
from the user's next message to approve/deny dangerous commands. This was
fragile and dangerous — if the agent asked a clarify question and the user
said 'yes' to answer it, the gateway would execute the pending dangerous
command instead. (Fixes #1888)

Changes:
- Remove bare text matching ('yes', 'y', 'approve', 'ok', etc.) from
  _handle_message approval check
- Add /approve and /deny as gateway-only slash commands in the command
  registry
- /approve supports scoping: /approve (one-time), /approve session,
  /approve always (permanent)
- Add 5-minute timeout for stale approvals
- Gateway appends structured instructions to the agent response when a
  dangerous command is pending, telling the user exactly how to respond
- 9 tests covering approve, deny, timeout, scoping, and verification
  that bare 'yes' no longer triggers execution

Credit to @solo386 and @FlyByNight69420 for identifying and reporting
this security issue in PR #1971 and issue #1888.

Co-authored-by: Test <test@test.com>
											
										
										
											2026-03-18 16:58:20 -07:00
+								                    pending["timestamp"] = _time.time()
-												refactor: deduplicate toolsets, unify async bridging, fix approval race condition, harden security

- Replace 4 copy-pasted messaging platform toolsets with shared _HERMES_CORE_TOOLS list
- Consolidate 5 ad-hoc async-bridging patterns into single _run_async() in model_tools.py
  - Removes deprecated get_event_loop()/set_event_loop() calls
  - Makes all tool handlers self-protecting regardless of caller's event loop state
  - RL handler refactored from if/elif chain to dispatch dict
- Fix exec approval race condition: replace module-level globals with thread-safe
  per-session tools/approval.py (submit_pending, pop_pending, approve_session, is_approved)
  - Session A approving "rm" no longer approves it for all other sessions
- Fix config deep merge: user overriding tts.elevenlabs.voice_id no longer clobbers
  tts.elevenlabs.model_id; migration detection now recurses to arbitrary depth
- Gateway default-deny: unauthenticated users denied unless GATEWAY_ALLOW_ALL_USERS=true
- Add 10 dangerous command patterns: rm --recursive, bash -c, python -e, curl|bash,
  xargs rm, find -delete
- Sanitize gateway error messages: users see generic message, full traceback goes to logs

											
										
										
											2026-02-21 18:28:49 -08:00
+								                    self._pending_approvals[session_key] = pending
-												fix(gateway): replace bare text approval with /approve and /deny commands (#2002)

The gateway approval system previously intercepted bare 'yes'/'no' text
from the user's next message to approve/deny dangerous commands. This was
fragile and dangerous — if the agent asked a clarify question and the user
said 'yes' to answer it, the gateway would execute the pending dangerous
command instead. (Fixes #1888)

Changes:
- Remove bare text matching ('yes', 'y', 'approve', 'ok', etc.) from
  _handle_message approval check
- Add /approve and /deny as gateway-only slash commands in the command
  registry
- /approve supports scoping: /approve (one-time), /approve session,
  /approve always (permanent)
- Add 5-minute timeout for stale approvals
- Gateway appends structured instructions to the agent response when a
  dangerous command is pending, telling the user exactly how to respond
- 9 tests covering approve, deny, timeout, scoping, and verification
  that bare 'yes' no longer triggers execution

Credit to @solo386 and @FlyByNight69420 for identifying and reporting
this security issue in PR #1971 and issue #1888.

Co-authored-by: Test <test@test.com>
											
										
										
											2026-03-18 16:58:20 -07:00
+								                    # Append structured instructions so the user knows how to respond
 								                    cmd_preview = pending.get("command", "")
 								                    if len(cmd_preview) > 200:
 								                        cmd_preview = cmd_preview[:200] + "..."
 								                    approval_hint = (
 								                        f"\n\n⚠️ **Dangerous command requires approval:**\n"
 								                        f"```\n{cmd_preview}\n```\n"
 								                        f"Reply `/approve` to execute, `/approve session` to approve this pattern "
 								                        f"for the session, or `/deny` to cancel."
 								                    )
 								                    response = (response or "") + approval_hint
-												refactor: enhance error handling with structured logging across multiple modules

- Updated various modules including cli.py, run_agent.py, gateway, and tools to replace silent exception handling with structured logging.
- Improved error messages to provide more context, aiding in debugging and monitoring.
- Ensured consistent logging practices throughout the codebase, enhancing traceability and maintainability.

											
										
										
											2026-02-21 03:32:11 -08:00
+								            except Exception as e:
 								                logger.debug("Failed to check pending approvals: %s", e)
-												Add Text-to-Speech (TTS) functionality with multiple providers

Add tool previews

Add AGENTS and SOUL.md support

Add Exec Approval

											
										
										
											2026-02-12 10:05:08 -08:00
-												Enhance agent response handling and transcript logging

- Refactored the agent response processing to return a comprehensive result dictionary, including final responses and full message history.
- Improved transcript logging to capture the complete conversation, including tool calls and intermediate reasoning, facilitating session resumption and debugging.
- Added handling for fresh sessions to include tool definitions in the transcript for clarity.
- Implemented logic to filter and timestamp new messages, ensuring accurate logging of user and assistant interactions.

											
										
										
											2026-02-16 00:53:17 -08:00
+								            # Save the full conversation to the transcript, including tool calls.
 								            # This preserves the complete agent loop (tool_calls, tool results,
 								            # intermediate reasoning) so sessions can be resumed with full context
 								            # and transcripts are useful for debugging and training data.
-												fix: prevent infinite 400 loop on context overflow + block prompt injection via cache files (#1630, #1558)

* fix: prevent infinite 400 failure loop on context overflow (#1630)

When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message.  This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error.  Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.

Three-layer fix:

1. run_agent.py — Fallback heuristic: when a 400 error has a very short
   generic message AND the session is large (>40% of context or >80
   messages), treat it as a probable context overflow and trigger
   compression instead of aborting.

2. run_agent.py + gateway/run.py — Don't persist failed messages:
   when the agent returns failed=True before generating any response,
   skip writing the user's message to the transcript/DB. This prevents
   the session from growing on each failure.

3. gateway/run.py — Smarter error messages: detect context-overflow
   failures and suggest /compact or /reset specifically, instead of a
   generic 'try again' that will fail identically.

* fix(skills): detect prompt injection patterns and block cache file reads

Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):

1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
   (index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
   was the original injection vector — untrusted skill descriptions
   in the catalog contained adversarial text that the model executed.

2. skill_view: warns when skills are loaded from outside the trusted
   ~/.hermes/skills/ directory, and detects common injection patterns
   in skill content ("ignore previous instructions", "<system>", etc.).

Cherry-picked from PR #1562 by ygd58.

---------

Co-authored-by: buray <ygd58@users.noreply.github.com>
											
										
										
											2026-03-17 01:50:59 -07:00
+								            #
 								            # IMPORTANT: When the agent failed before producing any response
 								            # (e.g. context-overflow 400), do NOT persist the user's message.
 								            # Persisting it would make the session even larger, causing the
 								            # same failure on the next attempt — an infinite loop. (#1630)
 								            agent_failed_early = (
 								                agent_result.get("failed")
 								                and not agent_result.get("final_response")
 								            )
 								            if agent_failed_early:
 								                logger.info(
 								                    "Skipping transcript persistence for failed request in "
 								                    "session %s to prevent session growth loop.",
 								                    session_entry.session_id,
 								                )
-												Enhance agent response handling and transcript logging

- Refactored the agent response processing to return a comprehensive result dictionary, including final responses and full message history.
- Improved transcript logging to capture the complete conversation, including tool calls and intermediate reasoning, facilitating session resumption and debugging.
- Added handling for fresh sessions to include tool definitions in the transcript for clarity.
- Implemented logic to filter and timestamp new messages, ensuring accurate logging of user and assistant interactions.

											
										
										
											2026-02-16 00:53:17 -08:00
+								            ts = datetime.now().isoformat()
 								            # If this is a fresh session (no history), write the full tool
 								            # definitions as the first entry so the transcript is self-describing
 								            # -- the same list of dicts sent as tools=[...] in the API request.
-												fix: prevent infinite 400 loop on context overflow + block prompt injection via cache files (#1630, #1558)

* fix: prevent infinite 400 failure loop on context overflow (#1630)

When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message.  This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error.  Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.

Three-layer fix:

1. run_agent.py — Fallback heuristic: when a 400 error has a very short
   generic message AND the session is large (>40% of context or >80
   messages), treat it as a probable context overflow and trigger
   compression instead of aborting.

2. run_agent.py + gateway/run.py — Don't persist failed messages:
   when the agent returns failed=True before generating any response,
   skip writing the user's message to the transcript/DB. This prevents
   the session from growing on each failure.

3. gateway/run.py — Smarter error messages: detect context-overflow
   failures and suggest /compact or /reset specifically, instead of a
   generic 'try again' that will fail identically.

* fix(skills): detect prompt injection patterns and block cache file reads

Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):

1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
   (index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
   was the original injection vector — untrusted skill descriptions
   in the catalog contained adversarial text that the model executed.

2. skill_view: warns when skills are loaded from outside the trusted
   ~/.hermes/skills/ directory, and detects common injection patterns
   in skill content ("ignore previous instructions", "<system>", etc.).

Cherry-picked from PR #1562 by ygd58.

---------

Co-authored-by: buray <ygd58@users.noreply.github.com>
											
										
										
											2026-03-17 01:50:59 -07:00
+								            if agent_failed_early:
 								                pass  # Skip all transcript writes — don't grow a broken session
 								            elif not history:
-												Update tool definitions handling in GatewayRunner

- Modified the retrieval of tool definitions to use the agent result's "tools" key, ensuring accurate logging in the transcript.
- Enhanced the response structure to include tools in the final output, improving the clarity of tool usage in session interactions.

											
										
										
											2026-02-16 00:55:18 -08:00
+								                tool_defs = agent_result.get("tools", [])
-												Enhance agent response handling and transcript logging

- Refactored the agent response processing to return a comprehensive result dictionary, including final responses and full message history.
- Improved transcript logging to capture the complete conversation, including tool calls and intermediate reasoning, facilitating session resumption and debugging.
- Added handling for fresh sessions to include tool definitions in the transcript for clarity.
- Implemented logic to filter and timestamp new messages, ensuring accurate logging of user and assistant interactions.

											
										
										
											2026-02-16 00:53:17 -08:00
+								                self.session_store.append_to_transcript(
 								                    session_entry.session_id,
 								                    {
 								                        "role": "session_meta",
 								                        "tools": tool_defs or [],
 								                        "model": os.getenv("HERMES_MODEL", ""),
 								                        "platform": source.platform.value if source.platform else "",
 								                        "timestamp": ts,
 								                    }
 								                )
-												fix(gateway): use filtered history length for transcript message extraction

The transcript extraction used len(history) to find new messages, but
history includes session_meta entries that are stripped before passing
to the agent. This mismatch caused 1 message to be lost from the
transcript on every turn after the first, because the slice offset
was too high. Use the filtered history length (history_offset) returned
by _run_agent instead.

Also changed the else branch from returning all agent_messages to
returning an empty list, so compressed/shorter agent output does not
duplicate the entire history into the transcript.

											
										
										
											2026-03-04 21:34:40 +03:00
+								            # Find only the NEW messages from this turn (skip history we loaded).
 								            # Use the filtered history length (history_offset) that was actually
 								            # passed to the agent, not len(history) which includes session_meta
 								            # entries that were stripped before the agent saw them.
-												fix: prevent infinite 400 loop on context overflow + block prompt injection via cache files (#1630, #1558)

* fix: prevent infinite 400 failure loop on context overflow (#1630)

When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message.  This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error.  Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.

Three-layer fix:

1. run_agent.py — Fallback heuristic: when a 400 error has a very short
   generic message AND the session is large (>40% of context or >80
   messages), treat it as a probable context overflow and trigger
   compression instead of aborting.

2. run_agent.py + gateway/run.py — Don't persist failed messages:
   when the agent returns failed=True before generating any response,
   skip writing the user's message to the transcript/DB. This prevents
   the session from growing on each failure.

3. gateway/run.py — Smarter error messages: detect context-overflow
   failures and suggest /compact or /reset specifically, instead of a
   generic 'try again' that will fail identically.

* fix(skills): detect prompt injection patterns and block cache file reads

Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):

1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
   (index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
   was the original injection vector — untrusted skill descriptions
   in the catalog contained adversarial text that the model executed.

2. skill_view: warns when skills are loaded from outside the trusted
   ~/.hermes/skills/ directory, and detects common injection patterns
   in skill content ("ignore previous instructions", "<system>", etc.).

Cherry-picked from PR #1562 by ygd58.

---------

Co-authored-by: buray <ygd58@users.noreply.github.com>
											
										
										
											2026-03-17 01:50:59 -07:00
+								            if not agent_failed_early:
 								                history_len = agent_result.get("history_offset", len(history))
 								                new_messages = agent_messages[history_len:] if len(agent_messages) > history_len else []
 								                # If no new messages found (edge case), fall back to simple user/assistant
 								                if not new_messages:
-												Enhance agent response handling and transcript logging

- Refactored the agent response processing to return a comprehensive result dictionary, including final responses and full message history.
- Improved transcript logging to capture the complete conversation, including tool calls and intermediate reasoning, facilitating session resumption and debugging.
- Added handling for fresh sessions to include tool definitions in the transcript for clarity.
- Implemented logic to filter and timestamp new messages, ensuring accurate logging of user and assistant interactions.

											
										
										
											2026-02-16 00:53:17 -08:00
+								                    self.session_store.append_to_transcript(
 								                        session_entry.session_id,
-												fix: prevent infinite 400 loop on context overflow + block prompt injection via cache files (#1630, #1558)

* fix: prevent infinite 400 failure loop on context overflow (#1630)

When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message.  This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error.  Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.

Three-layer fix:

1. run_agent.py — Fallback heuristic: when a 400 error has a very short
   generic message AND the session is large (>40% of context or >80
   messages), treat it as a probable context overflow and trigger
   compression instead of aborting.

2. run_agent.py + gateway/run.py — Don't persist failed messages:
   when the agent returns failed=True before generating any response,
   skip writing the user's message to the transcript/DB. This prevents
   the session from growing on each failure.

3. gateway/run.py — Smarter error messages: detect context-overflow
   failures and suggest /compact or /reset specifically, instead of a
   generic 'try again' that will fail identically.

* fix(skills): detect prompt injection patterns and block cache file reads

Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):

1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
   (index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
   was the original injection vector — untrusted skill descriptions
   in the catalog contained adversarial text that the model executed.

2. skill_view: warns when skills are loaded from outside the trusted
   ~/.hermes/skills/ directory, and detects common injection patterns
   in skill content ("ignore previous instructions", "<system>", etc.).

Cherry-picked from PR #1562 by ygd58.

---------

Co-authored-by: buray <ygd58@users.noreply.github.com>
											
										
										
											2026-03-17 01:50:59 -07:00
+								                        {"role": "user", "content": message_text, "timestamp": ts}
-												Enhance agent response handling and transcript logging

- Refactored the agent response processing to return a comprehensive result dictionary, including final responses and full message history.
- Improved transcript logging to capture the complete conversation, including tool calls and intermediate reasoning, facilitating session resumption and debugging.
- Added handling for fresh sessions to include tool definitions in the transcript for clarity.
- Implemented logic to filter and timestamp new messages, ensuring accurate logging of user and assistant interactions.

											
										
										
											2026-02-16 00:53:17 -08:00
+								                    )
-												fix: prevent infinite 400 loop on context overflow + block prompt injection via cache files (#1630, #1558)

* fix: prevent infinite 400 failure loop on context overflow (#1630)

When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message.  This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error.  Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.

Three-layer fix:

1. run_agent.py — Fallback heuristic: when a 400 error has a very short
   generic message AND the session is large (>40% of context or >80
   messages), treat it as a probable context overflow and trigger
   compression instead of aborting.

2. run_agent.py + gateway/run.py — Don't persist failed messages:
   when the agent returns failed=True before generating any response,
   skip writing the user's message to the transcript/DB. This prevents
   the session from growing on each failure.

3. gateway/run.py — Smarter error messages: detect context-overflow
   failures and suggest /compact or /reset specifically, instead of a
   generic 'try again' that will fail identically.

* fix(skills): detect prompt injection patterns and block cache file reads

Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):

1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
   (index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
   was the original injection vector — untrusted skill descriptions
   in the catalog contained adversarial text that the model executed.

2. skill_view: warns when skills are loaded from outside the trusted
   ~/.hermes/skills/ directory, and detects common injection patterns
   in skill content ("ignore previous instructions", "<system>", etc.).

Cherry-picked from PR #1562 by ygd58.

---------

Co-authored-by: buray <ygd58@users.noreply.github.com>
											
										
										
											2026-03-17 01:50:59 -07:00
+								                    if response:
 								                        self.session_store.append_to_transcript(
 								                            session_entry.session_id,
 								                            {"role": "assistant", "content": response, "timestamp": ts}
 								                        )
 								                else:
 								                    # The agent already persisted these messages to SQLite via
 								                    # _flush_messages_to_session_db(), so skip the DB write here
 								                    # to prevent the duplicate-write bug (#860).  We still write
 								                    # to JSONL for backward compatibility and as a backup.
 								                    agent_persisted = self._session_db is not None
 								                    for msg in new_messages:
 								                        # Skip system messages (they're rebuilt each run)
 								                        if msg.get("role") == "system":
 								                            continue
 								                        # Add timestamp to each message for debugging
 								                        entry = {**msg, "timestamp": ts}
 								                        self.session_store.append_to_transcript(
 								                            session_entry.session_id, entry,
 								                            skip_db=agent_persisted,
 								                        )
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
-												fix: backfill model on gateway sessions after agent runs

Gateway sessions end up with model=NULL because the session row is
created before AIAgent is constructed.  After the agent responds,
update_session() writes token counts but never fills in the model.

Thread agent.model through _run_agent()'s return dict into
update_session() → update_token_counts().  The SQL uses
COALESCE(model, ?) so it only fills NULL rows — never overwrites
a model already set at creation time (e.g. CLI sessions).

If the agent falls back to a different provider, agent.model is
updated in-place by _try_activate_fallback(), so the recorded value
reflects whichever model actually produced the response.

Fixes #987

											
										
										
											2026-03-11 17:44:37 -07:00
+								            # Update session with actual prompt token count and model from the agent
-												fix: use actual API token counts for gateway compression pre-check

Root cause of aggressive gateway compression vs CLI:
- CLI: single AIAgent persists across conversation, uses real API-reported
  prompt_tokens for compression decisions — accurate
- Gateway: each message creates fresh AIAgent, token count discarded after,
  next message pre-check falls back to rough str(msg)//4 estimate which
  overestimates 30-50% on tool-heavy conversations

Fix:
- Add last_prompt_tokens field to SessionEntry — stores the actual
  API-reported prompt token count from the most recent agent turn
- After run_conversation(), extract context_compressor.last_prompt_tokens
  and persist it via update_session()
- Gateway pre-check now uses stored actual tokens when available (exact
  same accuracy as CLI), falling back to rough estimate with 1.4x safety
  factor only for the first message of a session

This makes gateway compression behave identically to CLI compression
for all turns after the first. Reported by TigerHix.

											
										
										
											2026-03-10 23:28:18 -07:00
+								            self.session_store.update_session(
 								                session_entry.session_key,
-												fix(gateway): make /status report live state and tokens (#1476)
											
										
										
											2026-03-15 19:18:58 -07:00
+								                input_tokens=agent_result.get("input_tokens", 0),
 								                output_tokens=agent_result.get("output_tokens", 0),
-												feat: add route-aware pricing estimates (#1695)

Salvaged from PR #1563 by @kshitijk4poor. Cherry-picked with authorship preserved.

- Route-aware pricing architecture replacing static MODEL_PRICING + heuristics
- Canonical usage normalization (Anthropic/OpenAI/Codex API shapes)
- Cache-aware billing (separate cache_read/cache_write rates)
- Cost status tracking (estimated/included/unknown/actual)
- OpenRouter live pricing via models API
- Schema migration v4→v5 with billing metadata columns
- Removed speculative forward-looking entries
- Removed cost display from CLI status bar
- Threaded OpenRouter metadata pre-warm

Co-authored-by: kshitij <82637225+kshitijk4poor@users.noreply.github.com>
											
										
										
											2026-03-17 03:44:44 -07:00
+								                cache_read_tokens=agent_result.get("cache_read_tokens", 0),
 								                cache_write_tokens=agent_result.get("cache_write_tokens", 0),
-												fix: use actual API token counts for gateway compression pre-check

Root cause of aggressive gateway compression vs CLI:
- CLI: single AIAgent persists across conversation, uses real API-reported
  prompt_tokens for compression decisions — accurate
- Gateway: each message creates fresh AIAgent, token count discarded after,
  next message pre-check falls back to rough str(msg)//4 estimate which
  overestimates 30-50% on tool-heavy conversations

Fix:
- Add last_prompt_tokens field to SessionEntry — stores the actual
  API-reported prompt token count from the most recent agent turn
- After run_conversation(), extract context_compressor.last_prompt_tokens
  and persist it via update_session()
- Gateway pre-check now uses stored actual tokens when available (exact
  same accuracy as CLI), falling back to rough estimate with 1.4x safety
  factor only for the first message of a session

This makes gateway compression behave identically to CLI compression
for all turns after the first. Reported by TigerHix.

											
										
										
											2026-03-10 23:28:18 -07:00
+								                last_prompt_tokens=agent_result.get("last_prompt_tokens", 0),
-												fix: backfill model on gateway sessions after agent runs

Gateway sessions end up with model=NULL because the session row is
created before AIAgent is constructed.  After the agent responds,
update_session() writes token counts but never fills in the model.

Thread agent.model through _run_agent()'s return dict into
update_session() → update_token_counts().  The SQL uses
COALESCE(model, ?) so it only fills NULL rows — never overwrites
a model already set at creation time (e.g. CLI sessions).

If the agent falls back to a different provider, agent.model is
updated in-place by _try_activate_fallback(), so the recorded value
reflects whichever model actually produced the response.

Fixes #987

											
										
										
											2026-03-11 17:44:37 -07:00
+								                model=agent_result.get("model"),
-												feat: add route-aware pricing estimates (#1695)

Salvaged from PR #1563 by @kshitijk4poor. Cherry-picked with authorship preserved.

- Route-aware pricing architecture replacing static MODEL_PRICING + heuristics
- Canonical usage normalization (Anthropic/OpenAI/Codex API shapes)
- Cache-aware billing (separate cache_read/cache_write rates)
- Cost status tracking (estimated/included/unknown/actual)
- OpenRouter live pricing via models API
- Schema migration v4→v5 with billing metadata columns
- Removed speculative forward-looking entries
- Removed cost display from CLI status bar
- Threaded OpenRouter metadata pre-warm

Co-authored-by: kshitij <82637225+kshitijk4poor@users.noreply.github.com>
											
										
										
											2026-03-17 03:44:44 -07:00
+								                estimated_cost_usd=agent_result.get("estimated_cost_usd"),
 								                cost_status=agent_result.get("cost_status"),
 								                cost_source=agent_result.get("cost_source"),
 								                provider=agent_result.get("provider"),
 								                base_url=agent_result.get("base_url"),
-												fix: use actual API token counts for gateway compression pre-check

Root cause of aggressive gateway compression vs CLI:
- CLI: single AIAgent persists across conversation, uses real API-reported
  prompt_tokens for compression decisions — accurate
- Gateway: each message creates fresh AIAgent, token count discarded after,
  next message pre-check falls back to rough str(msg)//4 estimate which
  overestimates 30-50% on tool-heavy conversations

Fix:
- Add last_prompt_tokens field to SessionEntry — stores the actual
  API-reported prompt token count from the most recent agent turn
- After run_conversation(), extract context_compressor.last_prompt_tokens
  and persist it via update_session()
- Gateway pre-check now uses stored actual tokens when available (exact
  same accuracy as CLI), falling back to rough estimate with 1.4x safety
  factor only for the first message of a session

This makes gateway compression behave identically to CLI compression
for all turns after the first. Reported by TigerHix.

											
										
										
											2026-03-10 23:28:18 -07:00
+								            )
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
 								            # Auto voice reply: send TTS audio before the text response
-												fix(voice): enable TTS voice reply when streaming is active (#2322)

When streaming is enabled, the base adapter receives None from
_handle_message (already_sent=True) and cannot run auto-TTS for
voice input. The runner was unconditionally skipping voice input
TTS assuming the base adapter would handle it.

Now the runner takes over TTS responsibility when streaming has
already delivered the text response, so voice channel playback
works with both streaming on and off.

Streaming off behavior is unchanged (default already_sent=False
preserves the original code path exactly).

Co-authored-by: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com>
											
										
										
											2026-03-21 08:08:37 -07:00
+								            _already_sent = bool(agent_result.get("already_sent"))
 								            if self._should_send_voice_reply(event, response, agent_messages, already_sent=_already_sent):
-												fix: extract voice reply logic and add comprehensive tests

- Fix tempfile.mktemp() TOCTOU race in Discord voice input (use NamedTemporaryFile)
- Extract voice reply decision from _handle_message into _should_send_voice_reply()
- Rewrite TestAutoVoiceReply to call real method instead of testing a copy
- Add 59 new tests: VoiceReceiver, VC commands, adapter methods, streaming TTS

											
										
										
											2026-03-11 23:18:49 +03:00
+								                await self._send_voice_reply(event, response)
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
-												fix(gateway): deliver MEDIA: files after streaming responses

When streaming is enabled, text chunks are sent to the user in
real-time including raw MEDIA: tags. The normal post-processing in
_process_message_background is skipped when already_sent=True, so
MEDIA: files were never extracted or delivered — the user just saw
the raw MEDIA:/path/to/file text.

Fix: after streaming completes, extract MEDIA: tags and local file
paths from the response and deliver them via the platform adapter.
The text is already sent (with the raw tag visible in the stream),
but the actual files now get delivered as attachments.

											
										
										
											2026-03-21 16:01:25 -07:00
+								            # If streaming already delivered the response, extract and
 								            # deliver any MEDIA: files before returning None.  Streaming
 								            # sends raw text chunks that include MEDIA: tags — the normal
 								            # post-processing in _process_message_background is skipped
 								            # when already_sent is True, so media files would never be
 								            # delivered without this.
-												feat(gateway): streaming token delivery — StreamingConfig, GatewayStreamConsumer, already_sent

Stage 3 of streaming support. Gateway now streams tokens to messaging platforms:

- StreamingConfig dataclass (enabled, transport, edit_interval, buffer_threshold, cursor)
  on GatewayConfig with from_dict/to_dict serialization
- GatewayStreamConsumer: async queue-based consumer that progressively edits
  a single message on the target platform (edit transport)
- on_delta() → queue → run() async task → send_or_edit() with rate limiting
- already_sent propagation: when streaming delivered the response, handler
  returns None so base adapter skips duplicate send()
- stream_delta_callback wired into AIAgent constructor in _run_agent
- Consumer lifecycle: started as asyncio task, awaited with timeout in finally

Config (config.yaml):
  streaming:
    enabled: true
    transport: edit      # progressive editMessageText
    edit_interval: 0.3   # seconds between edits
    buffer_threshold: 40 # chars before forcing flush
    cursor: ' ▉'

Credit: jobless0x (#774, #1312), OutThisLife (#798), clicksingh (#697).

											
										
										
											2026-03-16 05:52:42 -07:00
+								            if agent_result.get("already_sent"):
-												fix(gateway): deliver MEDIA: files after streaming responses

When streaming is enabled, text chunks are sent to the user in
real-time including raw MEDIA: tags. The normal post-processing in
_process_message_background is skipped when already_sent=True, so
MEDIA: files were never extracted or delivered — the user just saw
the raw MEDIA:/path/to/file text.

Fix: after streaming completes, extract MEDIA: tags and local file
paths from the response and deliver them via the platform adapter.
The text is already sent (with the raw tag visible in the stream),
but the actual files now get delivered as attachments.

											
										
										
											2026-03-21 16:01:25 -07:00
+								                if response:
-												fix: /stop command crash + UnboundLocalError in streaming media delivery

Two fixes:

1. CLI /stop command crashed with 'cannot import name get_registry' —
   the code imported a non-existent function. Fixed to use the actual
   process_registry singleton and list_sessions() method.
   (Reported in #2458 by haiyuzhong1980)

2. Streaming media delivery used undefined 'adapter' variable —
   our PR #2382 called _deliver_media_from_response(adapter=adapter)
   but 'adapter' wasn't guaranteed to be defined in that scope.
   Fixed to resolve via self.adapters.get(source.platform).
   (Reported in #2424 by 42-evey)

											
										
										
											2026-03-22 04:35:27 -07:00
+								                    _media_adapter = self.adapters.get(source.platform)
 								                    if _media_adapter:
 								                        await self._deliver_media_from_response(
 								                            response, event, _media_adapter,
 								                        )
-												feat(gateway): streaming token delivery — StreamingConfig, GatewayStreamConsumer, already_sent

Stage 3 of streaming support. Gateway now streams tokens to messaging platforms:

- StreamingConfig dataclass (enabled, transport, edit_interval, buffer_threshold, cursor)
  on GatewayConfig with from_dict/to_dict serialization
- GatewayStreamConsumer: async queue-based consumer that progressively edits
  a single message on the target platform (edit transport)
- on_delta() → queue → run() async task → send_or_edit() with rate limiting
- already_sent propagation: when streaming delivered the response, handler
  returns None so base adapter skips duplicate send()
- stream_delta_callback wired into AIAgent constructor in _run_agent
- Consumer lifecycle: started as asyncio task, awaited with timeout in finally

Config (config.yaml):
  streaming:
    enabled: true
    transport: edit      # progressive editMessageText
    edit_interval: 0.3   # seconds between edits
    buffer_threshold: 40 # chars before forcing flush
    cursor: ' ▉'

Credit: jobless0x (#774, #1312), OutThisLife (#798), clicksingh (#697).

											
										
										
											2026-03-16 05:52:42 -07:00
+								                return None
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								            return response
 								        except Exception as e:
-												feat(discord): persistent typing indicator for DMs

Based on PR #2427 by @oxngon (core feature extracted, reformatting
and unrelated changes dropped).

Discord's TYPING_START gateway event is unreliable for bot DMs. This
adds a background typing loop that hits POST /channels/{id}/typing
every 8 seconds (indicator lasts ~10s) until the response is sent.

- send_typing() starts a per-channel background loop (idempotent)
- stop_typing() cancels it (called after _run_agent returns)
- Base adapter gets stop_typing() as a no-op default
- Per-channel tracking via _typing_tasks dict prevents duplicates

											
										
										
											2026-03-22 04:47:53 -07:00
+								            # Stop typing indicator on error too
 								            try:
 								                _err_adapter = self.adapters.get(source.platform)
 								                if _err_adapter and hasattr(_err_adapter, "stop_typing"):
 								                    await _err_adapter.stop_typing(source.chat_id)
 								            except Exception:
 								                pass
-												refactor: deduplicate toolsets, unify async bridging, fix approval race condition, harden security

- Replace 4 copy-pasted messaging platform toolsets with shared _HERMES_CORE_TOOLS list
- Consolidate 5 ad-hoc async-bridging patterns into single _run_async() in model_tools.py
  - Removes deprecated get_event_loop()/set_event_loop() calls
  - Makes all tool handlers self-protecting regardless of caller's event loop state
  - RL handler refactored from if/elif chain to dispatch dict
- Fix exec approval race condition: replace module-level globals with thread-safe
  per-session tools/approval.py (submit_pending, pop_pending, approve_session, is_approved)
  - Session A approving "rm" no longer approves it for all other sessions
- Fix config deep merge: user overriding tts.elevenlabs.voice_id no longer clobbers
  tts.elevenlabs.model_id; migration detection now recurses to arbitrary depth
- Gateway default-deny: unauthenticated users denied unless GATEWAY_ALLOW_ALL_USERS=true
- Add 10 dangerous command patterns: rm --recursive, bash -c, python -e, curl|bash,
  xargs rm, find -delete
- Sanitize gateway error messages: users see generic message, full traceback goes to logs

											
										
										
											2026-02-21 18:28:49 -08:00
+								            logger.exception("Agent error in session %s", session_key)
-												fix(anthropic): retry 429/529 errors and surface error details to users

- 429 rate limit and 529 overloaded were incorrectly treated as
  non-retryable client errors, causing immediate failure instead of
  exponential backoff retry. Users hitting Anthropic rate limits got
  silent failures or no response at all.
- Generic "Sorry, I encountered an unexpected error" now includes
  error type, details, and status-specific hints (auth, rate limit,
  overloaded).
- Failed agent with final_response=None now surfaces the actual
  error message instead of returning an empty response.

											
										
										
											2026-03-17 00:30:26 +03:00
+								            error_type = type(e).__name__
 								            error_detail = str(e)[:300] if str(e) else "no details available"
 								            status_hint = ""
 								            status_code = getattr(e, "status_code", None)
-												Improve gateway error handling for 429 usage limits and 500 context overflow

- Distinguish plan usage limits (429 with usage_limit_reached) from transient rate limits
- Show approximate reset time in hours for plan limits
- Treat HTTP 500 with large sessions as context overflow (same as 400)
- Move history length check earlier for reuse across status codes

											
										
										
											2026-03-17 13:25:20 -07:00
+								            _hist_len = len(history) if 'history' in locals() else 0
-												fix(anthropic): retry 429/529 errors and surface error details to users

- 429 rate limit and 529 overloaded were incorrectly treated as
  non-retryable client errors, causing immediate failure instead of
  exponential backoff retry. Users hitting Anthropic rate limits got
  silent failures or no response at all.
- Generic "Sorry, I encountered an unexpected error" now includes
  error type, details, and status-specific hints (auth, rate limit,
  overloaded).
- Failed agent with final_response=None now surfaces the actual
  error message instead of returning an empty response.

											
										
										
											2026-03-17 00:30:26 +03:00
+								            if status_code == 401:
 								                status_hint = " Check your API key or run `claude /login` to refresh OAuth credentials."
 								            elif status_code == 429:
-												Improve gateway error handling for 429 usage limits and 500 context overflow

- Distinguish plan usage limits (429 with usage_limit_reached) from transient rate limits
- Show approximate reset time in hours for plan limits
- Treat HTTP 500 with large sessions as context overflow (same as 400)
- Move history length check earlier for reuse across status codes

											
										
										
											2026-03-17 13:25:20 -07:00
+								                # Check if this is a plan usage limit (resets on a schedule) vs a transient rate limit
 								                _err_body = getattr(e, "response", None)
 								                _err_json = {}
 								                try:
 								                    if _err_body is not None:
 								                        _err_json = _err_body.json().get("error", {})
 								                except Exception:
 								                    pass
 								                if _err_json.get("type") == "usage_limit_reached":
 								                    _resets_in = _err_json.get("resets_in_seconds")
 								                    if _resets_in and _resets_in > 0:
 								                        import math
 								                        _hours = math.ceil(_resets_in / 3600)
 								                        status_hint = f" Your plan's usage limit has been reached. It resets in ~{_hours}h."
 								                    else:
 								                        status_hint = " Your plan's usage limit has been reached. Please wait until it resets."
 								                else:
 								                    status_hint = " You are being rate-limited. Please wait a moment and try again."
-												fix(anthropic): retry 429/529 errors and surface error details to users

- 429 rate limit and 529 overloaded were incorrectly treated as
  non-retryable client errors, causing immediate failure instead of
  exponential backoff retry. Users hitting Anthropic rate limits got
  silent failures or no response at all.
- Generic "Sorry, I encountered an unexpected error" now includes
  error type, details, and status-specific hints (auth, rate limit,
  overloaded).
- Failed agent with final_response=None now surfaces the actual
  error message instead of returning an empty response.

											
										
										
											2026-03-17 00:30:26 +03:00
+								            elif status_code == 529:
 								                status_hint = " The API is temporarily overloaded. Please try again shortly."
-												Improve gateway error handling for 429 usage limits and 500 context overflow

- Distinguish plan usage limits (429 with usage_limit_reached) from transient rate limits
- Show approximate reset time in hours for plan limits
- Treat HTTP 500 with large sessions as context overflow (same as 400)
- Move history length check earlier for reuse across status codes

											
										
										
											2026-03-17 13:25:20 -07:00
+								            elif status_code in (400, 500):
 								                # 400 with a large session is context overflow.
 								                # 500 with a large session often means the payload is too large
 								                # for the API to process — treat it the same way.
-												fix: prevent infinite 400 loop on context overflow + block prompt injection via cache files (#1630, #1558)

* fix: prevent infinite 400 failure loop on context overflow (#1630)

When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message.  This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error.  Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.

Three-layer fix:

1. run_agent.py — Fallback heuristic: when a 400 error has a very short
   generic message AND the session is large (>40% of context or >80
   messages), treat it as a probable context overflow and trigger
   compression instead of aborting.

2. run_agent.py + gateway/run.py — Don't persist failed messages:
   when the agent returns failed=True before generating any response,
   skip writing the user's message to the transcript/DB. This prevents
   the session from growing on each failure.

3. gateway/run.py — Smarter error messages: detect context-overflow
   failures and suggest /compact or /reset specifically, instead of a
   generic 'try again' that will fail identically.

* fix(skills): detect prompt injection patterns and block cache file reads

Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):

1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
   (index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
   was the original injection vector — untrusted skill descriptions
   in the catalog contained adversarial text that the model executed.

2. skill_view: warns when skills are loaded from outside the trusted
   ~/.hermes/skills/ directory, and detects common injection patterns
   in skill content ("ignore previous instructions", "<system>", etc.).

Cherry-picked from PR #1562 by ygd58.

---------

Co-authored-by: buray <ygd58@users.noreply.github.com>
											
										
										
											2026-03-17 01:50:59 -07:00
+								                if _hist_len > 50:
 								                    return (
 								                        "⚠️ Session too large for the model's context window.\n"
 								                        "Use /compact to compress the conversation, or "
 								                        "/reset to start fresh."
 								                    )
-												Improve gateway error handling for 429 usage limits and 500 context overflow

- Distinguish plan usage limits (429 with usage_limit_reached) from transient rate limits
- Show approximate reset time in hours for plan limits
- Treat HTTP 500 with large sessions as context overflow (same as 400)
- Move history length check earlier for reuse across status codes

											
										
										
											2026-03-17 13:25:20 -07:00
+								                elif status_code == 400:
-												fix: prevent infinite 400 loop on context overflow + block prompt injection via cache files (#1630, #1558)

* fix: prevent infinite 400 failure loop on context overflow (#1630)

When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message.  This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error.  Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.

Three-layer fix:

1. run_agent.py — Fallback heuristic: when a 400 error has a very short
   generic message AND the session is large (>40% of context or >80
   messages), treat it as a probable context overflow and trigger
   compression instead of aborting.

2. run_agent.py + gateway/run.py — Don't persist failed messages:
   when the agent returns failed=True before generating any response,
   skip writing the user's message to the transcript/DB. This prevents
   the session from growing on each failure.

3. gateway/run.py — Smarter error messages: detect context-overflow
   failures and suggest /compact or /reset specifically, instead of a
   generic 'try again' that will fail identically.

* fix(skills): detect prompt injection patterns and block cache file reads

Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):

1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
   (index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
   was the original injection vector — untrusted skill descriptions
   in the catalog contained adversarial text that the model executed.

2. skill_view: warns when skills are loaded from outside the trusted
   ~/.hermes/skills/ directory, and detects common injection patterns
   in skill content ("ignore previous instructions", "<system>", etc.).

Cherry-picked from PR #1562 by ygd58.

---------

Co-authored-by: buray <ygd58@users.noreply.github.com>
											
										
										
											2026-03-17 01:50:59 -07:00
+								                    status_hint = " The request was rejected by the API."
-												refactor: deduplicate toolsets, unify async bridging, fix approval race condition, harden security

- Replace 4 copy-pasted messaging platform toolsets with shared _HERMES_CORE_TOOLS list
- Consolidate 5 ad-hoc async-bridging patterns into single _run_async() in model_tools.py
  - Removes deprecated get_event_loop()/set_event_loop() calls
  - Makes all tool handlers self-protecting regardless of caller's event loop state
  - RL handler refactored from if/elif chain to dispatch dict
- Fix exec approval race condition: replace module-level globals with thread-safe
  per-session tools/approval.py (submit_pending, pop_pending, approve_session, is_approved)
  - Session A approving "rm" no longer approves it for all other sessions
- Fix config deep merge: user overriding tts.elevenlabs.voice_id no longer clobbers
  tts.elevenlabs.model_id; migration detection now recurses to arbitrary depth
- Gateway default-deny: unauthenticated users denied unless GATEWAY_ALLOW_ALL_USERS=true
- Add 10 dangerous command patterns: rm --recursive, bash -c, python -e, curl|bash,
  xargs rm, find -delete
- Sanitize gateway error messages: users see generic message, full traceback goes to logs

											
										
										
											2026-02-21 18:28:49 -08:00
+								            return (
-												fix(anthropic): retry 429/529 errors and surface error details to users

- 429 rate limit and 529 overloaded were incorrectly treated as
  non-retryable client errors, causing immediate failure instead of
  exponential backoff retry. Users hitting Anthropic rate limits got
  silent failures or no response at all.
- Generic "Sorry, I encountered an unexpected error" now includes
  error type, details, and status-specific hints (auth, rate limit,
  overloaded).
- Failed agent with final_response=None now surfaces the actual
  error message instead of returning an empty response.

											
										
										
											2026-03-17 00:30:26 +03:00
+								                f"Sorry, I encountered an error ({error_type}).\n"
 								                f"{error_detail}\n"
 								                f"{status_hint}"
-												refactor: deduplicate toolsets, unify async bridging, fix approval race condition, harden security

- Replace 4 copy-pasted messaging platform toolsets with shared _HERMES_CORE_TOOLS list
- Consolidate 5 ad-hoc async-bridging patterns into single _run_async() in model_tools.py
  - Removes deprecated get_event_loop()/set_event_loop() calls
  - Makes all tool handlers self-protecting regardless of caller's event loop state
  - RL handler refactored from if/elif chain to dispatch dict
- Fix exec approval race condition: replace module-level globals with thread-safe
  per-session tools/approval.py (submit_pending, pop_pending, approve_session, is_approved)
  - Session A approving "rm" no longer approves it for all other sessions
- Fix config deep merge: user overriding tts.elevenlabs.voice_id no longer clobbers
  tts.elevenlabs.model_id; migration detection now recurses to arbitrary depth
- Gateway default-deny: unauthenticated users denied unless GATEWAY_ALLOW_ALL_USERS=true
- Add 10 dangerous command patterns: rm --recursive, bash -c, python -e, curl|bash,
  xargs rm, find -delete
- Sanitize gateway error messages: users see generic message, full traceback goes to logs

											
										
										
											2026-02-21 18:28:49 -08:00
+								                "Try again or use /reset to start a fresh session."
 								            )
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        finally:
 								            # Clear session env
 								            self._clear_session_env()
-												feat(gateway): surface session config on /new, /reset, and auto-reset (#3321)

When a new session starts in the gateway (via /new, /reset, or
auto-reset), send the user a summary of the detected configuration:

  ✨ Session reset! Starting fresh.

  ◆ Model: qwen3.5:27b-q4_K_M
  ◆ Provider: custom
  ◆ Context: 8K tokens (config)
  ◆ Endpoint: http://localhost:11434/v1

This makes misconfigured context length immediately visible — a user
running a local 8K model that falls to the 128K default will see:

  ◆ Context: 128K tokens (default — set model.context_length in config to override)

Instead of silently getting no compression and degrading responses.

- _format_session_info() resolves model, provider, context length,
  and endpoint from config + runtime, matching the hygiene code's
  resolution chain
- Local/custom endpoints shown; cloud endpoints hidden (not useful)
- Context source annotated: config, detected, or default with hint
- Appended to /new and /reset responses, and auto-reset notifications
- 9 tests covering all formatting paths and failure resilience

Addresses the user-facing side of #2708 — instead of trying to fix
every edge case in context detection, surface the values so users
can immediately see when something is wrong.
											
										
										
											2026-03-26 19:27:58 -07:00
+								    def _format_session_info(self) -> str:
 								        """Resolve current model config and return a formatted info block.
 								        Surfaces model, provider, context length, and endpoint so gateway
 								        users can immediately see if context detection went wrong (e.g.
 								        local models falling to the 128K default).
 								        """
 								        from agent.model_metadata import get_model_context_length, DEFAULT_FALLBACK_CONTEXT
 								        model = _resolve_gateway_model()
 								        config_context_length = None
 								        provider = None
 								        base_url = None
 								        api_key = None
 								        try:
 								            cfg_path = _hermes_home / "config.yaml"
 								            if cfg_path.exists():
 								                import yaml as _info_yaml
 								                with open(cfg_path, encoding="utf-8") as f:
 								                    data = _info_yaml.safe_load(f) or {}
 								                model_cfg = data.get("model", {})
 								                if isinstance(model_cfg, dict):
 								                    raw_ctx = model_cfg.get("context_length")
 								                    if raw_ctx is not None:
 								                        try:
 								                            config_context_length = int(raw_ctx)
 								                        except (TypeError, ValueError):
 								                            pass
 								                    provider = model_cfg.get("provider") or None
 								                    base_url = model_cfg.get("base_url") or None
 								        except Exception:
 								            pass
 								        # Resolve runtime credentials for probing
 								        try:
 								            runtime = _resolve_runtime_agent_kwargs()
 								            provider = provider or runtime.get("provider")
 								            base_url = base_url or runtime.get("base_url")
 								            api_key = runtime.get("api_key")
 								        except Exception:
 								            pass
 								        context_length = get_model_context_length(
 								            model,
 								            base_url=base_url or "",
 								            api_key=api_key or "",
 								            config_context_length=config_context_length,
 								            provider=provider or "",
 								        )
 								        # Format context source hint
 								        if config_context_length is not None:
 								            ctx_source = "config"
 								        elif context_length == DEFAULT_FALLBACK_CONTEXT:
 								            ctx_source = "default — set model.context_length in config to override"
 								        else:
 								            ctx_source = "detected"
 								        # Format context length for display
 								        if context_length >= 1_000_000:
 								            ctx_display = f"{context_length / 1_000_000:.1f}M"
 								        elif context_length >= 1_000:
 								            ctx_display = f"{context_length // 1_000}K"
 								        else:
 								            ctx_display = str(context_length)
 								        lines = [
 								            f"◆ Model: `{model}`",
 								            f"◆ Provider: {provider or 'openrouter'}",
 								            f"◆ Context: {ctx_display} tokens ({ctx_source})",
 								        ]
 								        # Show endpoint for local/custom setups
 								        if base_url and ("localhost" in base_url or "127.0.0.1" in base_url or "0.0.0.0" in base_url):
 								            lines.append(f"◆ Endpoint: {base_url}")
 								        return "\n".join(lines)
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								    async def _handle_reset_command(self, event: MessageEvent) -> str:
 								        """Handle /new or /reset command."""
 								        source = event.source
 								        # Get existing session key
-												fix(gateway): make group session isolation configurable

default group and channel sessions to per-user isolation, allow opting back into shared room sessions via config.yaml, and document Discord gateway routing and session behavior.

											
										
										
											2026-03-16 00:22:23 -07:00
+								        session_key = self._session_key_for_source(source)
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
-												feat(gateway): proactive async memory flush on session expiry

Previously, when a session expired (idle/daily reset), the memory flush
ran synchronously inside get_or_create_session — blocking the user's
message for 10-60s while an LLM call saved memories.

Now a background watcher task (_session_expiry_watcher) runs every 5 min,
detects expired sessions, and flushes memories proactively in a thread
pool.  By the time the user sends their next message, memories are
already saved and the response is immediate.

Changes:
- Add _is_session_expired(entry) to SessionStore — works from entry
  alone without needing a SessionSource
- Add _pre_flushed_sessions set to track already-flushed sessions
- Remove sync _on_auto_reset callback from get_or_create_session
- Refactor flush into _flush_memories_for_session (sync worker) +
  _async_flush_memories (thread pool wrapper)
- Add _session_expiry_watcher background task, started in start()
- Simplify /reset command to use shared fire-and-forget flush
- Add 10 tests for expiry detection, callback removal, tracking

											
										
										
											2026-03-07 11:27:50 -08:00
+								        # Flush memories in the background (fire-and-forget) so the user
 								        # gets the "Session reset!" response immediately.
-												feat: introduce skills management features in AIAgent and CLI

- Added skills configuration options in cli-config.yaml.example, including a nudge interval for skill creation reminders.
- Implemented skills guidance in AIAgent to prompt users to save reusable workflows after complex tasks.
- Enhanced skills indexing in the prompt builder to include descriptions from SKILL.md files for better context.
- Updated the agent's behavior to periodically remind users about potential skills during tool-calling iterations.

											
										
										
											2026-02-22 13:28:13 -08:00
+								        try:
-												fix: /retry, /undo, /compress, and /reset gateway commands (#210)

- /retry, /undo, /compress were setting a non-existent conversation_history
  attribute on SessionEntry (a @dataclass with no such field). The dangling
  attribute was silently created but never read — transcript was reloaded
  from DB on next interaction, making all three commands no-ops.

- /reset accessed self.session_store._sessions (non-existent) instead of
  self.session_store._entries, causing AttributeError caught by a bare
  except, silently skipping the pre-reset memory flush.

Fix:
- Add SessionDB.clear_messages() to delete messages and reset counters
- Add SessionStore.rewrite_transcript() to atomically replace transcript
  in both SQLite and legacy JSONL storage
- Replace all dangling attr assignments with rewrite_transcript() calls
- Fix _sessions → _entries in /reset handler

Closes #210

											
										
										
											2026-03-02 00:14:49 -08:00
+								            old_entry = self.session_store._entries.get(session_key)
-												feat: introduce skills management features in AIAgent and CLI

- Added skills configuration options in cli-config.yaml.example, including a nudge interval for skill creation reminders.
- Implemented skills guidance in AIAgent to prompt users to save reusable workflows after complex tasks.
- Enhanced skills indexing in the prompt builder to include descriptions from SKILL.md files for better context.
- Updated the agent's behavior to periodically remind users about potential skills during tool-calling iterations.

											
										
										
											2026-02-22 13:28:13 -08:00
+								            if old_entry:
-												fix(gateway): track background task references in GatewayRunner (#3254)

Asyncio tasks created with create_task() but never stored can be
garbage collected mid-execution. Add self._background_tasks set to
hold references, with add_done_callback cleanup. Tracks:
- /background command task
- session-reset memory flush task
- session-resume memory flush task
Cancel all pending tasks in stop().

Update test fixtures that construct GatewayRunner via object.__new__()
to include the new _background_tasks attribute.

Cherry-picked from PR #3167 by memosr. The original PR also deleted
the DM topic auto-skill loading code — that deletion was excluded
from this salvage as it removes a shipped feature (#2598).

Co-authored-by: memosr.eth <96793918+memosr@users.noreply.github.com>
											
										
										
											2026-03-26 14:33:48 -07:00
+								                _flush_task = asyncio.create_task(
-												fix(honcho): isolate session routing for multi-user gateway (#1500)

Salvaged from PR #1470 by adavyas.

Core fix: Honcho tool calls in a multi-session gateway could route to
the wrong session because honcho_tools.py relied on process-global
state. Now threads session context through the call chain:
  AIAgent._invoke_tool() → handle_function_call() → registry.dispatch()
  → handler **kw → _resolve_session_context()

Changes:
- Add _resolve_session_context() to prefer per-call context over globals
- Plumb honcho_manager + honcho_session_key through handle_function_call
- Add sync_honcho=False to run_conversation() for synthetic flush turns
- Pass honcho_session_key through gateway memory flush lifecycle
- Harden gateway PID detection when /proc cmdline is unreadable
- Make interrupt test scripts import-safe for pytest-xdist
- Wrap BibTeX examples in Jekyll raw blocks for docs build
- Fix thread-order-dependent assertion in client lifecycle test
- Expand Honcho docs: session isolation, lifecycle, routing internals

Dropped from original PR:
- Indentation change in _create_request_openai_client that would move
  client creation inside the lock (causes unnecessary contention)

Co-authored-by: adavyas <adavyas@users.noreply.github.com>
											
										
										
											2026-03-16 00:23:47 -07:00
+								                    self._async_flush_memories(old_entry.session_id, session_key)
 								                )
-												fix(gateway): track background task references in GatewayRunner (#3254)

Asyncio tasks created with create_task() but never stored can be
garbage collected mid-execution. Add self._background_tasks set to
hold references, with add_done_callback cleanup. Tracks:
- /background command task
- session-reset memory flush task
- session-resume memory flush task
Cancel all pending tasks in stop().

Update test fixtures that construct GatewayRunner via object.__new__()
to include the new _background_tasks attribute.

Cherry-picked from PR #3167 by memosr. The original PR also deleted
the DM topic auto-skill loading code — that deletion was excluded
from this salvage as it removes a shipped feature (#2598).

Co-authored-by: memosr.eth <96793918+memosr@users.noreply.github.com>
											
										
										
											2026-03-26 14:33:48 -07:00
+								                self._background_tasks.add(_flush_task)
 								                _flush_task.add_done_callback(self._background_tasks.discard)
-												feat: introduce skills management features in AIAgent and CLI

- Added skills configuration options in cli-config.yaml.example, including a nudge interval for skill creation reminders.
- Implemented skills guidance in AIAgent to prompt users to save reusable workflows after complex tasks.
- Enhanced skills indexing in the prompt builder to include descriptions from SKILL.md files for better context.
- Updated the agent's behavior to periodically remind users about potential skills during tool-calling iterations.

											
										
										
											2026-02-22 13:28:13 -08:00
+								        except Exception as e:
 								            logger.debug("Gateway memory flush on reset failed: %s", e)
-												fix(gateway): persist Honcho managers across session requests

											
										
										
											2026-03-10 02:06:17 -07:00
 								        self._shutdown_gateway_honcho(session_key)
-												feat(gateway): cache AIAgent per session for prompt caching

The gateway created a fresh AIAgent per message, rebuilding the system
prompt (including memory, skills, context files) every turn. This broke
prompt prefix caching — providers like Anthropic charge ~10x more for
uncached prefixes.

Now caches AIAgent instances per session_key with a config signature.
The cached agent is reused across messages in the same session,
preserving the frozen system prompt and tool schemas. Cache is
invalidated when:
- Config changes (model, provider, toolsets, reasoning, ephemeral
  prompt) — detected via signature mismatch
- /new, /reset, /clear — explicit session reset
- /model — global model change clears all cached agents
- /reasoning — global reasoning change clears all cached agents

Per-message state (callbacks, stream consumers, progress queues) is
set on the agent instance before each run_conversation() call.

This matches CLI behavior where a single AIAgent lives across all turns
in a session, with _cached_system_prompt built once and reused.

											
										
										
											2026-03-21 13:07:08 -07:00
+								        self._evict_cached_agent(session_key)
-												feat: introduce skills management features in AIAgent and CLI

- Added skills configuration options in cli-config.yaml.example, including a nudge interval for skill creation reminders.
- Implemented skills guidance in AIAgent to prompt users to save reusable workflows after complex tasks.
- Enhanced skills indexing in the prompt builder to include descriptions from SKILL.md files for better context.
- Updated the agent's behavior to periodically remind users about potential skills during tool-calling iterations.

											
										
										
											2026-02-22 13:28:13 -08:00
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        # Reset the session
 								        new_entry = self.session_store.reset_session(session_key)
-												feat(hooks): emit session:end lifecycle event (#1725)

Based on PR #1432 by @bayrakdarerdem. session:start was already on main; this adds the session:end event.

Co-authored-by: bayrakdarerdem <bayrakdarerdem@users.noreply.github.com>
											
										
										
											2026-03-17 04:17:44 -07:00
 								        # Emit session:end hook (session is ending)
 								        await self.hooks.emit("session:end", {
 								            "platform": source.platform.value if source.platform else "",
 								            "user_id": source.user_id,
 								            "session_key": session_key,
 								        })
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
+								        # Emit session:reset hook
 								        await self.hooks.emit("session:reset", {
 								            "platform": source.platform.value if source.platform else "",
 								            "user_id": source.user_id,
 								            "session_key": session_key,
 								        })
-												feat(gateway): surface session config on /new, /reset, and auto-reset (#3321)

When a new session starts in the gateway (via /new, /reset, or
auto-reset), send the user a summary of the detected configuration:

  ✨ Session reset! Starting fresh.

  ◆ Model: qwen3.5:27b-q4_K_M
  ◆ Provider: custom
  ◆ Context: 8K tokens (config)
  ◆ Endpoint: http://localhost:11434/v1

This makes misconfigured context length immediately visible — a user
running a local 8K model that falls to the 128K default will see:

  ◆ Context: 128K tokens (default — set model.context_length in config to override)

Instead of silently getting no compression and degrading responses.

- _format_session_info() resolves model, provider, context length,
  and endpoint from config + runtime, matching the hygiene code's
  resolution chain
- Local/custom endpoints shown; cloud endpoints hidden (not useful)
- Context source annotated: config, detected, or default with hint
- Appended to /new and /reset responses, and auto-reset notifications
- 9 tests covering all formatting paths and failure resilience

Addresses the user-facing side of #2708 — instead of trying to fix
every edge case in context detection, surface the values so users
can immediately see when something is wrong.
											
										
										
											2026-03-26 19:27:58 -07:00
+								        # Resolve session config info to surface to the user
 								        try:
 								            session_info = self._format_session_info()
 								        except Exception:
 								            session_info = ""
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        if new_entry:
-												feat(gateway): surface session config on /new, /reset, and auto-reset (#3321)

When a new session starts in the gateway (via /new, /reset, or
auto-reset), send the user a summary of the detected configuration:

  ✨ Session reset! Starting fresh.

  ◆ Model: qwen3.5:27b-q4_K_M
  ◆ Provider: custom
  ◆ Context: 8K tokens (config)
  ◆ Endpoint: http://localhost:11434/v1

This makes misconfigured context length immediately visible — a user
running a local 8K model that falls to the 128K default will see:

  ◆ Context: 128K tokens (default — set model.context_length in config to override)

Instead of silently getting no compression and degrading responses.

- _format_session_info() resolves model, provider, context length,
  and endpoint from config + runtime, matching the hygiene code's
  resolution chain
- Local/custom endpoints shown; cloud endpoints hidden (not useful)
- Context source annotated: config, detected, or default with hint
- Appended to /new and /reset responses, and auto-reset notifications
- 9 tests covering all formatting paths and failure resilience

Addresses the user-facing side of #2708 — instead of trying to fix
every edge case in context detection, surface the values so users
can immediately see when something is wrong.
											
										
										
											2026-03-26 19:27:58 -07:00
+								            header = "✨ Session reset! Starting fresh."
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        else:
 								            # No existing session, just create one
 								            self.session_store.get_or_create_session(source, force_new=True)
-												feat(gateway): surface session config on /new, /reset, and auto-reset (#3321)

When a new session starts in the gateway (via /new, /reset, or
auto-reset), send the user a summary of the detected configuration:

  ✨ Session reset! Starting fresh.

  ◆ Model: qwen3.5:27b-q4_K_M
  ◆ Provider: custom
  ◆ Context: 8K tokens (config)
  ◆ Endpoint: http://localhost:11434/v1

This makes misconfigured context length immediately visible — a user
running a local 8K model that falls to the 128K default will see:

  ◆ Context: 128K tokens (default — set model.context_length in config to override)

Instead of silently getting no compression and degrading responses.

- _format_session_info() resolves model, provider, context length,
  and endpoint from config + runtime, matching the hygiene code's
  resolution chain
- Local/custom endpoints shown; cloud endpoints hidden (not useful)
- Context source annotated: config, detected, or default with hint
- Appended to /new and /reset responses, and auto-reset notifications
- 9 tests covering all formatting paths and failure resilience

Addresses the user-facing side of #2708 — instead of trying to fix
every edge case in context detection, surface the values so users
can immediately see when something is wrong.
											
										
										
											2026-03-26 19:27:58 -07:00
+								            header = "✨ New session started!"
 								        if session_info:
 								            return f"{header}\n\n{session_info}"
 								        return header
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
 								    async def _handle_status_command(self, event: MessageEvent) -> str:
 								        """Handle /status command."""
 								        source = event.source
 								        session_entry = self.session_store.get_or_create_session(source)
 								        connected_platforms = [p.value for p in self.adapters.keys()]
-												Implement interrupt handling for agent and CLI input and persistent prompt line at bottom of CLI :)

- Enhanced the AIAgent class to support interrupt requests, allowing for graceful interruption of ongoing tasks and processing of new messages.
- Updated the HermesCLI to manage user input in a persistent manner, enabling real-time interruption of the agent's conversation.
- Introduced a mechanism in the GatewayRunner to handle incoming messages while an agent is running, allowing for immediate response to user commands.
- Improved overall user experience by providing feedback during interruptions and ensuring that pending messages are processed correctly.

											
										
										
											2026-02-03 16:15:49 -08:00
+								        # Check if there's an active agent
 								        session_key = session_entry.session_key
 								        is_running = session_key in self._running_agents
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        lines = [
 								            "📊 **Hermes Gateway Status**",
 								            "",
 								            f"**Session ID:** `{session_entry.session_id[:12]}...`",
 								            f"**Created:** {session_entry.created_at.strftime('%Y-%m-%d %H:%M')}",
 								            f"**Last Activity:** {session_entry.updated_at.strftime('%Y-%m-%d %H:%M')}",
 								            f"**Tokens:** {session_entry.total_tokens:,}",
-												Implement interrupt handling for agent and CLI input and persistent prompt line at bottom of CLI :)

- Enhanced the AIAgent class to support interrupt requests, allowing for graceful interruption of ongoing tasks and processing of new messages.
- Updated the HermesCLI to manage user input in a persistent manner, enabling real-time interruption of the agent's conversation.
- Introduced a mechanism in the GatewayRunner to handle incoming messages while an agent is running, allowing for immediate response to user commands.
- Improved overall user experience by providing feedback during interruptions and ensuring that pending messages are processed correctly.

											
										
										
											2026-02-03 16:15:49 -08:00
+								            f"**Agent Running:** {'Yes ⚡' if is_running else 'No'}",
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								            "",
 								            f"**Connected Platforms:** {', '.join(connected_platforms)}",
 								        ]
 								        return "\n".join(lines)
-												Implement interrupt handling for agent and CLI input and persistent prompt line at bottom of CLI :)

- Enhanced the AIAgent class to support interrupt requests, allowing for graceful interruption of ongoing tasks and processing of new messages.
- Updated the HermesCLI to manage user input in a persistent manner, enabling real-time interruption of the agent's conversation.
- Introduced a mechanism in the GatewayRunner to handle incoming messages while an agent is running, allowing for immediate response to user commands.
- Improved overall user experience by providing feedback during interruptions and ensuring that pending messages are processed correctly.

											
										
										
											2026-02-03 16:15:49 -08:00
+								    async def _handle_stop_command(self, event: MessageEvent) -> str:
-												fix(gateway): recover from hung agents — /stop force-unlocks session (#3104)

When an agent thread hangs (truly blocked, never checks _interrupt_requested),
/stop now force-cleans _running_agents to unlock the session immediately.

Two changes:
- Early /stop intercept in the running-agent guard: bypasses normal command
  dispatch to force-interrupt and unlock the session. Follows the same pattern
  as the existing /new intercept.
- Sentinel /stop: force-cleans the sentinel instead of returning 'nothing to
  stop yet', so /stop during slow startup actually unlocks the session.

Follow-up improvements over original PR:
- Consolidated duplicate resolve_command imports into single early resolution
- Updated _handle_stop_command to also force-clean for consistency
- Removed 10-minute hard timeout on the executor (would kill legitimate
  long-running agent tasks; the /stop force-clean handles recovery)

Cherry-picked from Mibayy's PR #2498.

Co-authored-by: Mibayy <Mibayy@users.noreply.github.com>
											
										
										
											2026-03-25 18:46:50 -07:00
+								        """Handle /stop command - interrupt a running agent.
 								        When an agent is truly hung (blocked thread that never checks
 								        _interrupt_requested), the early intercept in _handle_message()
 								        handles /stop before this method is reached.  This handler fires
 								        only through normal command dispatch (no running agent) or as a
 								        fallback.  Force-clean the session lock in all cases for safety.
 								        """
-												Implement interrupt handling for agent and CLI input and persistent prompt line at bottom of CLI :)

- Enhanced the AIAgent class to support interrupt requests, allowing for graceful interruption of ongoing tasks and processing of new messages.
- Updated the HermesCLI to manage user input in a persistent manner, enabling real-time interruption of the agent's conversation.
- Introduced a mechanism in the GatewayRunner to handle incoming messages while an agent is running, allowing for immediate response to user commands.
- Improved overall user experience by providing feedback during interruptions and ensuring that pending messages are processed correctly.

											
										
										
											2026-02-03 16:15:49 -08:00
+								        source = event.source
 								        session_entry = self.session_store.get_or_create_session(source)
 								        session_key = session_entry.session_key
-												fix: harden sentinel guard for /stop during setup and shutdown

- /stop during sentinel returns helpful message instead of queuing
- Shutdown loop skips sentinel entries instead of catching AttributeError
- _handle_stop_command guards against sentinel (defensive)
- Added tests for both edge cases (7 total race guard tests)

											
										
										
											2026-03-19 18:26:09 -07:00
+								        agent = self._running_agents.get(session_key)
 								        if agent is _AGENT_PENDING_SENTINEL:
-												fix(gateway): recover from hung agents — /stop force-unlocks session (#3104)

When an agent thread hangs (truly blocked, never checks _interrupt_requested),
/stop now force-cleans _running_agents to unlock the session immediately.

Two changes:
- Early /stop intercept in the running-agent guard: bypasses normal command
  dispatch to force-interrupt and unlock the session. Follows the same pattern
  as the existing /new intercept.
- Sentinel /stop: force-cleans the sentinel instead of returning 'nothing to
  stop yet', so /stop during slow startup actually unlocks the session.

Follow-up improvements over original PR:
- Consolidated duplicate resolve_command imports into single early resolution
- Updated _handle_stop_command to also force-clean for consistency
- Removed 10-minute hard timeout on the executor (would kill legitimate
  long-running agent tasks; the /stop force-clean handles recovery)

Cherry-picked from Mibayy's PR #2498.

Co-authored-by: Mibayy <Mibayy@users.noreply.github.com>
											
										
										
											2026-03-25 18:46:50 -07:00
+								            # Force-clean the sentinel so the session is unlocked.
 								            if session_key in self._running_agents:
 								                del self._running_agents[session_key]
 								            logger.info("HARD STOP (pending) for session %s — sentinel cleared", session_key[:20])
 								            return "⚡ Force-stopped. The agent was still starting — session unlocked."
-												fix: harden sentinel guard for /stop during setup and shutdown

- /stop during sentinel returns helpful message instead of queuing
- Shutdown loop skips sentinel entries instead of catching AttributeError
- _handle_stop_command guards against sentinel (defensive)
- Added tests for both edge cases (7 total race guard tests)

											
										
										
											2026-03-19 18:26:09 -07:00
+								        if agent:
-												fix(gateway): recover from hung agents — /stop force-unlocks session (#3104)

When an agent thread hangs (truly blocked, never checks _interrupt_requested),
/stop now force-cleans _running_agents to unlock the session immediately.

Two changes:
- Early /stop intercept in the running-agent guard: bypasses normal command
  dispatch to force-interrupt and unlock the session. Follows the same pattern
  as the existing /new intercept.
- Sentinel /stop: force-cleans the sentinel instead of returning 'nothing to
  stop yet', so /stop during slow startup actually unlocks the session.

Follow-up improvements over original PR:
- Consolidated duplicate resolve_command imports into single early resolution
- Updated _handle_stop_command to also force-clean for consistency
- Removed 10-minute hard timeout on the executor (would kill legitimate
  long-running agent tasks; the /stop force-clean handles recovery)

Cherry-picked from Mibayy's PR #2498.

Co-authored-by: Mibayy <Mibayy@users.noreply.github.com>
											
										
										
											2026-03-25 18:46:50 -07:00
+								            agent.interrupt("Stop requested")
 								            # Force-clean the session lock so a truly hung agent doesn't
 								            # keep it locked forever.
 								            if session_key in self._running_agents:
 								                del self._running_agents[session_key]
 								            return "⚡ Force-stopped. The session is unlocked — you can send a new message."
-												Implement interrupt handling for agent and CLI input and persistent prompt line at bottom of CLI :)

- Enhanced the AIAgent class to support interrupt requests, allowing for graceful interruption of ongoing tasks and processing of new messages.
- Updated the HermesCLI to manage user input in a persistent manner, enabling real-time interruption of the agent's conversation.
- Introduced a mechanism in the GatewayRunner to handle incoming messages while an agent is running, allowing for immediate response to user commands.
- Improved overall user experience by providing feedback during interruptions and ensuring that pending messages are processed correctly.

											
										
										
											2026-02-03 16:15:49 -08:00
+								        else:
 								            return "No active task to stop."
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
+								    async def _handle_help_command(self, event: MessageEvent) -> str:
 								        """Handle /help command - list available commands."""
-												refactor: centralize slash command registry (#1603)

* refactor: centralize slash command registry

Replace 7+ scattered command definition sites with a single
CommandDef registry in hermes_cli/commands.py. All downstream
consumers now derive from this registry:

- CLI process_command() resolves aliases via resolve_command()
- Gateway _known_commands uses GATEWAY_KNOWN_COMMANDS frozenset
- Gateway help text generated by gateway_help_lines()
- Telegram BotCommands generated by telegram_bot_commands()
- Slack subcommand map generated by slack_subcommand_map()

Adding a command or alias is now a one-line change to
COMMAND_REGISTRY instead of touching 6+ files.

Bugfixes included:
- Telegram now registers /rollback, /background (were missing)
- Slack now has /voice, /update, /reload-mcp (were missing)
- Gateway duplicate 'reasoning' dispatch (dead code) removed
- Gateway help text can no longer drift from CLI help

Backwards-compatible: COMMANDS and COMMANDS_BY_CATEGORY dicts are
rebuilt from the registry, so existing imports work unchanged.

* docs: update developer docs for centralized command registry

Update AGENTS.md with full 'Slash Command Registry' and 'Adding a
Slash Command' sections covering CommandDef fields, registry helpers,
and the one-line alias workflow.

Also update:
- CONTRIBUTING.md: commands.py description
- website/docs/reference/slash-commands.md: reference central registry
- docs/plans/centralize-command-registry.md: mark COMPLETED
- plans/checkpoint-rollback.md: reference new pattern
- hermes-agent-dev skill: architecture table

* chore: remove stale plan docs
											
										
										
											2026-03-16 23:21:03 -07:00
+								        from hermes_cli.commands import gateway_help_lines
-												feat(skills): implement dynamic skill slash commands for CLI and gateway

											
										
										
											2026-02-28 11:18:50 -08:00
+								        lines = [
 								            "📖 **Hermes Commands**\n",
-												refactor: centralize slash command registry (#1603)

* refactor: centralize slash command registry

Replace 7+ scattered command definition sites with a single
CommandDef registry in hermes_cli/commands.py. All downstream
consumers now derive from this registry:

- CLI process_command() resolves aliases via resolve_command()
- Gateway _known_commands uses GATEWAY_KNOWN_COMMANDS frozenset
- Gateway help text generated by gateway_help_lines()
- Telegram BotCommands generated by telegram_bot_commands()
- Slack subcommand map generated by slack_subcommand_map()

Adding a command or alias is now a one-line change to
COMMAND_REGISTRY instead of touching 6+ files.

Bugfixes included:
- Telegram now registers /rollback, /background (were missing)
- Slack now has /voice, /update, /reload-mcp (were missing)
- Gateway duplicate 'reasoning' dispatch (dead code) removed
- Gateway help text can no longer drift from CLI help

Backwards-compatible: COMMANDS and COMMANDS_BY_CATEGORY dicts are
rebuilt from the registry, so existing imports work unchanged.

* docs: update developer docs for centralized command registry

Update AGENTS.md with full 'Slash Command Registry' and 'Adding a
Slash Command' sections covering CommandDef fields, registry helpers,
and the one-line alias workflow.

Also update:
- CONTRIBUTING.md: commands.py description
- website/docs/reference/slash-commands.md: reference central registry
- docs/plans/centralize-command-registry.md: mark COMPLETED
- plans/checkpoint-rollback.md: reference new pattern
- hermes-agent-dev skill: architecture table

* chore: remove stale plan docs
											
										
										
											2026-03-16 23:21:03 -07:00
+								            *gateway_help_lines(),
-												feat(skills): implement dynamic skill slash commands for CLI and gateway

											
										
										
											2026-02-28 11:18:50 -08:00
+								        ]
 								        try:
 								            from agent.skill_commands import get_skill_commands
 								            skill_cmds = get_skill_commands()
 								            if skill_cmds:
 								                lines.append(f"\n⚡ **Skill Commands** ({len(skill_cmds)} installed):")
 								                for cmd in sorted(skill_cmds):
 								                    lines.append(f"`{cmd}` — {skill_cmds[cmd]['description']}")
 								        except Exception:
 								            pass
 								        return "\n".join(lines)
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
-												feat: /provider command + fix gateway bugs + harden parse_model_input

/provider command (CLI + gateway):
  Shows all providers with auth status (✓/✗), aliases, and active marker.
  Users can now discover what provider names work with provider:model syntax.

Gateway bugs fixed:
  - Config was saved even when validation.persist=False (told user 'session
    only' but actually persisted the unvalidated model)
  - HERMES_INFERENCE_PROVIDER env var not set on provider switch, causing
    the switch to be silently overridden if that env var was already set

parse_model_input hardened:
  - Colon only treated as provider delimiter if left side is a recognized
    provider name or alias. 'anthropic/claude-3.5-sonnet:beta' now passes
    through as a model name instead of trying provider='anthropic/claude-3.5-sonnet'.
  - HTTP URLs, random colons no longer misinterpreted.

56 tests passing across model validation, CLI commands, and integration.

											
										
										
											2026-03-08 06:09:36 -07:00
+								    async def _handle_provider_command(self, event: MessageEvent) -> str:
 								        """Handle /provider command - show available providers."""
 								        import yaml
 								        from hermes_cli.models import (
 								            list_available_providers,
 								            normalize_provider,
 								            _PROVIDER_LABELS,
 								        )
 								        # Resolve current provider from config
 								        current_provider = "openrouter"
 								        config_path = _hermes_home / 'config.yaml'
 								        try:
 								            if config_path.exists():
-												Add explicit encoding="utf-8" to all config/data file open() calls

On Windows, open() defaults to the system locale encoding (cp1252,
cp1254, etc.) rather than UTF-8. This breaks any file containing
non-ASCII characters, and also causes crashes when writing JSON with
ensure_ascii=False.

This adds encoding="utf-8" to open() calls in:
- gateway/run.py (config.yaml reads/writes throughout)
- gateway/config.py (gateway.json and config.yaml)
- hermes_cli/config.py (config.yaml load/save)
- hermes_cli/main.py (session export with ensure_ascii=False)
- hermes_cli/status.py (jobs.json and sessions.json)

											
										
										
											2026-03-05 17:04:33 -05:00
+								                with open(config_path, encoding="utf-8") as f:
-												feat: /provider command + fix gateway bugs + harden parse_model_input

/provider command (CLI + gateway):
  Shows all providers with auth status (✓/✗), aliases, and active marker.
  Users can now discover what provider names work with provider:model syntax.

Gateway bugs fixed:
  - Config was saved even when validation.persist=False (told user 'session
    only' but actually persisted the unvalidated model)
  - HERMES_INFERENCE_PROVIDER env var not set on provider switch, causing
    the switch to be silently overridden if that env var was already set

parse_model_input hardened:
  - Colon only treated as provider delimiter if left side is a recognized
    provider name or alias. 'anthropic/claude-3.5-sonnet:beta' now passes
    through as a model name instead of trying provider='anthropic/claude-3.5-sonnet'.
  - HTTP URLs, random colons no longer misinterpreted.

56 tests passing across model validation, CLI commands, and integration.

											
										
										
											2026-03-08 06:09:36 -07:00
+								                    cfg = yaml.safe_load(f) or {}
 								                model_cfg = cfg.get("model", {})
 								                if isinstance(model_cfg, dict):
 								                    current_provider = model_cfg.get("provider", current_provider)
 								        except Exception:
 								            pass
 								        current_provider = normalize_provider(current_provider)
 								        if current_provider == "auto":
 								            try:
 								                from hermes_cli.auth import resolve_provider as _resolve_provider
 								                current_provider = _resolve_provider(current_provider)
 								            except Exception:
 								                current_provider = "openrouter"
-												fix: custom endpoint provider shows as openrouter in gateway

Three issues caused the gateway to display 'openrouter' instead of
'Custom endpoint' when users configured a custom OAI-compatible endpoint:

1. hermes setup: custom endpoint path saved OPENAI_BASE_URL and
   OPENAI_API_KEY to .env but never wrote model.provider to config.yaml.
   All other providers (Codex, z.ai, Kimi, etc.) call
   _update_config_for_provider() which sets this — custom was the only
   path that skipped it. Now writes model.provider='custom' and
   model.base_url to config.yaml.

2. hermes model: custom endpoint set model.provider='auto' in config.yaml.
   The CLI display had a hack to detect OPENAI_BASE_URL and override to
   'custom', but the gateway didn't. Now sets model.provider='custom'
   directly.

3. gateway /model and /provider commands: defaulted to 'openrouter' and
   read config.yaml — which had no provider set. Added OPENAI_BASE_URL
   detection fallback (same pattern the CLI uses) as a defensive catch
   for existing users who set up before this fix.

											
										
										
											2026-03-09 02:38:34 -07:00
+								        # Detect custom endpoint
 								        if current_provider == "openrouter" and os.getenv("OPENAI_BASE_URL", "").strip():
 								            current_provider = "custom"
-												feat: /provider command + fix gateway bugs + harden parse_model_input

/provider command (CLI + gateway):
  Shows all providers with auth status (✓/✗), aliases, and active marker.
  Users can now discover what provider names work with provider:model syntax.

Gateway bugs fixed:
  - Config was saved even when validation.persist=False (told user 'session
    only' but actually persisted the unvalidated model)
  - HERMES_INFERENCE_PROVIDER env var not set on provider switch, causing
    the switch to be silently overridden if that env var was already set

parse_model_input hardened:
  - Colon only treated as provider delimiter if left side is a recognized
    provider name or alias. 'anthropic/claude-3.5-sonnet:beta' now passes
    through as a model name instead of trying provider='anthropic/claude-3.5-sonnet'.
  - HTTP URLs, random colons no longer misinterpreted.

56 tests passing across model validation, CLI commands, and integration.

											
										
										
											2026-03-08 06:09:36 -07:00
+								        current_label = _PROVIDER_LABELS.get(current_provider, current_provider)
 								        lines = [
 								            f"🔌 **Current provider:** {current_label} (`{current_provider}`)",
 								            "",
 								            "**Available providers:**",
 								        ]
 								        providers = list_available_providers()
 								        for p in providers:
 								            marker = " ← active" if p["id"] == current_provider else ""
 								            auth = "✅" if p["authenticated"] else "❌"
 								            aliases = f"  _(also: {', '.join(p['aliases'])})_" if p["aliases"] else ""
 								            lines.append(f"{auth} `{p['id']}` — {p['label']}{aliases}{marker}")
 								        lines.append("")
 								        lines.append("Switch: `/model provider:model-name`")
 								        lines.append("Setup: `hermes setup`")
 								        return "\n".join(lines)
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
 								    async def _handle_personality_command(self, event: MessageEvent) -> str:
 								        """Handle /personality command - list or set a personality."""
-												fix(gateway): sync /model and /personality with CLI config.yaml pattern

											
										
										
											2026-02-27 11:14:14 -05:00
+								        import yaml
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
+								        args = event.get_command_args().strip().lower()
-												fix(gateway): sync /model and /personality with CLI config.yaml pattern

											
										
										
											2026-02-27 11:14:14 -05:00
+								        config_path = _hermes_home / 'config.yaml'
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
+								        try:
 								            if config_path.exists():
-												Add explicit encoding="utf-8" to all config/data file open() calls

On Windows, open() defaults to the system locale encoding (cp1252,
cp1254, etc.) rather than UTF-8. This breaks any file containing
non-ASCII characters, and also causes crashes when writing JSON with
ensure_ascii=False.

This adds encoding="utf-8" to open() calls in:
- gateway/run.py (config.yaml reads/writes throughout)
- gateway/config.py (gateway.json and config.yaml)
- hermes_cli/config.py (config.yaml load/save)
- hermes_cli/main.py (session export with ensure_ascii=False)
- hermes_cli/status.py (jobs.json and sessions.json)

											
										
										
											2026-03-05 17:04:33 -05:00
+								                with open(config_path, 'r', encoding="utf-8") as f:
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
+								                    config = yaml.safe_load(f) or {}
 								                personalities = config.get("agent", {}).get("personalities", {})
 								            else:
-												fix(gateway): sync /model and /personality with CLI config.yaml pattern

											
										
										
											2026-02-27 11:14:14 -05:00
+								                config = {}
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
+								                personalities = {}
 								        except Exception:
-												fix(gateway): sync /model and /personality with CLI config.yaml pattern

											
										
										
											2026-02-27 11:14:14 -05:00
+								            config = {}
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
+								            personalities = {}
-												fix(gateway): sync /model and /personality with CLI config.yaml pattern

											
										
										
											2026-02-27 11:14:14 -05:00
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
+								        if not personalities:
 								            return "No personalities configured in `~/.hermes/config.yaml`"
-												fix(gateway): sync /model and /personality with CLI config.yaml pattern

											
										
										
											2026-02-27 11:14:14 -05:00
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
+								        if not args:
 								            lines = ["🎭 **Available Personalities**\n"]
-												feat(cli,gateway): add /personality none and custom personality support

Closes #643

Changes:
- /personality none|default|neutral — clears system prompt overlay
- Custom personalities in config.yaml support dict format with:
  name, description, system_prompt, tone, style directives
- Backwards compatible — existing string format still works
- CLI + gateway both updated
- 18 tests covering none/default/neutral, dict format, string format,
  list display, save to config

											
										
										
											2026-03-09 17:18:09 +03:00
+								            lines.append("• `none` — (no personality overlay)")
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
+								            for name, prompt in personalities.items():
-												feat(cli,gateway): add /personality none and custom personality support

Closes #643

Changes:
- /personality none|default|neutral — clears system prompt overlay
- Custom personalities in config.yaml support dict format with:
  name, description, system_prompt, tone, style directives
- Backwards compatible — existing string format still works
- CLI + gateway both updated
- 18 tests covering none/default/neutral, dict format, string format,
  list display, save to config

											
										
										
											2026-03-09 17:18:09 +03:00
+								                if isinstance(prompt, dict):
 								                    preview = prompt.get("description") or prompt.get("system_prompt", "")[:50]
 								                else:
 								                    preview = prompt[:50] + "..." if len(prompt) > 50 else prompt
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
+								                lines.append(f"• `{name}` — {preview}")
-												chore: fix 154 f-strings, simplify getattr/URL patterns, remove dead code (#3119)

Three categories of cleanup, all zero-behavioral-change:

1. F-strings without placeholders (154 fixes across 29 files)
   - Converted f'...' to '...' where no {expression} was present
   - Heaviest files: run_agent.py (24), cli.py (20), honcho_integration/cli.py (34)

2. Simplify defensive patterns in run_agent.py
   - Added explicit self._is_anthropic_oauth = False in __init__ (before
     the api_mode branch that conditionally sets it)
   - Replaced 7x getattr(self, '_is_anthropic_oauth', False) with direct
     self._is_anthropic_oauth (attribute always initialized now)
   - Added _is_openrouter_url() and _is_anthropic_url() helper methods
   - Replaced 3 inline 'openrouter' in self._base_url_lower checks

3. Remove dead code in small files
   - hermes_cli/claw.py: removed unused 'total' computation
   - tools/fuzzy_match.py: removed unused strip_indent() function and
     pattern_stripped variable

Full test suite: 6184 passed, 0 failures
E2E PTY: banner clean, tool calls work, zero garbled ANSI
											
										
										
											2026-03-25 19:47:58 -07:00
+								            lines.append("\nUsage: `/personality <name>`")
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
+								            return "\n".join(lines)
-												fix(gateway): sync /model and /personality with CLI config.yaml pattern

											
										
										
											2026-02-27 11:14:14 -05:00
-												feat(cli,gateway): add /personality none and custom personality support

Closes #643

Changes:
- /personality none|default|neutral — clears system prompt overlay
- Custom personalities in config.yaml support dict format with:
  name, description, system_prompt, tone, style directives
- Backwards compatible — existing string format still works
- CLI + gateway both updated
- 18 tests covering none/default/neutral, dict format, string format,
  list display, save to config

											
										
										
											2026-03-09 17:18:09 +03:00
+								        def _resolve_prompt(value):
 								            if isinstance(value, dict):
 								                parts = [value.get("system_prompt", "")]
 								                if value.get("tone"):
 								                    parts.append(f'Tone: {value["tone"]}')
 								                if value.get("style"):
 								                    parts.append(f'Style: {value["style"]}')
 								                return "\n".join(p for p in parts if p)
 								            return str(value)
 								        if args in ("none", "default", "neutral"):
 								            try:
 								                if "agent" not in config or not isinstance(config.get("agent"), dict):
 								                    config["agent"] = {}
 								                config["agent"]["system_prompt"] = ""
 								                with open(config_path, "w") as f:
 								                    yaml.dump(config, f, default_flow_style=False, sort_keys=False)
 								            except Exception as e:
 								                return f"⚠️ Failed to save personality change: {e}"
 								            self._ephemeral_system_prompt = ""
 								            return "🎭 Personality cleared — using base agent behavior.\n_(takes effect on next message)_"
 								        elif args in personalities:
 								            new_prompt = _resolve_prompt(personalities[args])
-												fix(gateway): sync /model and /personality with CLI config.yaml pattern

											
										
										
											2026-02-27 11:14:14 -05:00
 								            # Write to config.yaml, same pattern as CLI save_config_value.
 								            try:
 								                if "agent" not in config or not isinstance(config.get("agent"), dict):
 								                    config["agent"] = {}
 								                config["agent"]["system_prompt"] = new_prompt
-												Add explicit encoding="utf-8" to all config/data file open() calls

On Windows, open() defaults to the system locale encoding (cp1252,
cp1254, etc.) rather than UTF-8. This breaks any file containing
non-ASCII characters, and also causes crashes when writing JSON with
ensure_ascii=False.

This adds encoding="utf-8" to open() calls in:
- gateway/run.py (config.yaml reads/writes throughout)
- gateway/config.py (gateway.json and config.yaml)
- hermes_cli/config.py (config.yaml load/save)
- hermes_cli/main.py (session export with ensure_ascii=False)
- hermes_cli/status.py (jobs.json and sessions.json)

											
										
										
											2026-03-05 17:04:33 -05:00
+								                with open(config_path, 'w', encoding="utf-8") as f:
-												fix(gateway): sync /model and /personality with CLI config.yaml pattern

											
										
										
											2026-02-27 11:14:14 -05:00
+								                    yaml.dump(config, f, default_flow_style=False, sort_keys=False)
 								            except Exception as e:
 								                return f"⚠️ Failed to save personality change: {e}"
 								            # Update in-memory so it takes effect on the very next message.
 								            self._ephemeral_system_prompt = new_prompt
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
+								            return f"🎭 Personality set to **{args}**\n_(takes effect on next message)_"
-												fix(gateway): sync /model and /personality with CLI config.yaml pattern

											
										
										
											2026-02-27 11:14:14 -05:00
-												feat(cli,gateway): add /personality none and custom personality support

Closes #643

Changes:
- /personality none|default|neutral — clears system prompt overlay
- Custom personalities in config.yaml support dict format with:
  name, description, system_prompt, tone, style directives
- Backwards compatible — existing string format still works
- CLI + gateway both updated
- 18 tests covering none/default/neutral, dict format, string format,
  list display, save to config

											
										
										
											2026-03-09 17:18:09 +03:00
+								        available = "`none`, " + ", ".join(f"`{n}`" for n in personalities.keys())
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
+								        return f"Unknown personality: `{args}`\n\nAvailable: {available}"
 								    async def _handle_retry_command(self, event: MessageEvent) -> str:
 								        """Handle /retry command - re-send the last user message."""
 								        source = event.source
 								        session_entry = self.session_store.get_or_create_session(source)
 								        history = self.session_store.load_transcript(session_entry.session_id)
 								        # Find the last user message
 								        last_user_msg = None
 								        last_user_idx = None
 								        for i in range(len(history) - 1, -1, -1):
 								            if history[i].get("role") == "user":
 								                last_user_msg = history[i].get("content", "")
 								                last_user_idx = i
 								                break
 								        if not last_user_msg:
 								            return "No previous message to retry."
-												fix: /retry, /undo, /compress, and /reset gateway commands (#210)

- /retry, /undo, /compress were setting a non-existent conversation_history
  attribute on SessionEntry (a @dataclass with no such field). The dangling
  attribute was silently created but never read — transcript was reloaded
  from DB on next interaction, making all three commands no-ops.

- /reset accessed self.session_store._sessions (non-existent) instead of
  self.session_store._entries, causing AttributeError caught by a bare
  except, silently skipping the pre-reset memory flush.

Fix:
- Add SessionDB.clear_messages() to delete messages and reset counters
- Add SessionStore.rewrite_transcript() to atomically replace transcript
  in both SQLite and legacy JSONL storage
- Replace all dangling attr assignments with rewrite_transcript() calls
- Fix _sessions → _entries in /reset handler

Closes #210

											
										
										
											2026-03-02 00:14:49 -08:00
+								        # Truncate history to before the last user message and persist
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
+								        truncated = history[:last_user_idx]
-												fix: /retry, /undo, /compress, and /reset gateway commands (#210)

- /retry, /undo, /compress were setting a non-existent conversation_history
  attribute on SessionEntry (a @dataclass with no such field). The dangling
  attribute was silently created but never read — transcript was reloaded
  from DB on next interaction, making all three commands no-ops.

- /reset accessed self.session_store._sessions (non-existent) instead of
  self.session_store._entries, causing AttributeError caught by a bare
  except, silently skipping the pre-reset memory flush.

Fix:
- Add SessionDB.clear_messages() to delete messages and reset counters
- Add SessionStore.rewrite_transcript() to atomically replace transcript
  in both SQLite and legacy JSONL storage
- Replace all dangling attr assignments with rewrite_transcript() calls
- Fix _sessions → _entries in /reset handler

Closes #210

											
										
										
											2026-03-02 00:14:49 -08:00
+								        self.session_store.rewrite_transcript(session_entry.session_id, truncated)
-												fix: integration hardening for gateway token tracking

Follow-up to 58dbd81 — ensures smooth transition for existing users:

- Backward compat: old session files without last_prompt_tokens
  default to 0 via data.get('last_prompt_tokens', 0)
- /compress, /undo, /retry: reset last_prompt_tokens to 0 after
  rewriting transcripts (stale token counts would under-report)
- Auto-compression hygiene: reset last_prompt_tokens after rewriting
- update_session: use None sentinel (not 0) as default so callers
  can explicitly reset to 0 while normal calls don't clobber
- 6 new tests covering: default value, serialization roundtrip,
  old-format migration, set/reset/no-change semantics
- /reset: new SessionEntry naturally gets last_prompt_tokens=0

2942 tests pass.

											
										
										
											2026-03-10 23:40:24 -07:00
+								        # Reset stored token count — transcript was truncated
 								        session_entry.last_prompt_tokens = 0
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
 								        # Re-send by creating a fake text event with the old message
 								        retry_event = MessageEvent(
 								            text=last_user_msg,
 								            message_type=MessageType.TEXT,
 								            source=source,
 								            raw_message=event.raw_message,
 								        )
 								        # Let the normal message handler process it
-												fix(gateway): return response from /retry handler instead of discarding it

											
										
										
											2026-03-05 19:59:54 +03:00
+								        return await self._handle_message(retry_event)
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
 								    async def _handle_undo_command(self, event: MessageEvent) -> str:
 								        """Handle /undo command - remove the last user/assistant exchange."""
 								        source = event.source
 								        session_entry = self.session_store.get_or_create_session(source)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        history = self.session_store.load_transcript(session_entry.session_id)
 								        # Find the last user message and remove everything from it onward
 								        last_user_idx = None
 								        for i in range(len(history) - 1, -1, -1):
 								            if history[i].get("role") == "user":
 								                last_user_idx = i
 								                break
 								        if last_user_idx is None:
 								            return "Nothing to undo."
 								        removed_msg = history[last_user_idx].get("content", "")
 								        removed_count = len(history) - last_user_idx
 								        self.session_store.rewrite_transcript(session_entry.session_id, history[:last_user_idx])
 								        # Reset stored token count — transcript was truncated
 								        session_entry.last_prompt_tokens = 0
 								        preview = removed_msg[:40] + "..." if len(removed_msg) > 40 else removed_msg
 								        return f"↩️ Undid {removed_count} message(s).\nRemoved: \"{preview}\""
 								    async def _handle_set_home_command(self, event: MessageEvent) -> str:
 								        """Handle /sethome command -- set the current chat as the platform's home channel."""
 								        source = event.source
 								        platform_name = source.platform.value if source.platform else "unknown"
 								        chat_id = source.chat_id
 								        chat_name = source.chat_name or chat_id
 								        env_key = f"{platform_name.upper()}_HOME_CHANNEL"
 								        # Save to config.yaml
 								        try:
 								            import yaml
 								            config_path = _hermes_home / 'config.yaml'
 								            user_config = {}
 								            if config_path.exists():
 								                with open(config_path, encoding="utf-8") as f:
 								                    user_config = yaml.safe_load(f) or {}
 								            user_config[env_key] = chat_id
 								            with open(config_path, 'w', encoding="utf-8") as f:
 								                yaml.dump(user_config, f, default_flow_style=False)
 								            # Also set in the current environment so it takes effect immediately
 								            os.environ[env_key] = str(chat_id)
 								        except Exception as e:
 								            return f"Failed to save home channel: {e}"
 								        return (
 								            f"✅ Home channel set to **{chat_name}** (ID: {chat_id}).\n"
 								            f"Cron jobs and cross-platform messages will be delivered here."
 								        )
-												feat: Discord voice channel support — bot joins VC and speaks replies

- /voice channel: bot joins user's voice channel, speaks TTS replies
- /voice leave: disconnect from voice channel
- Auto-disconnect after 5 min inactivity
- _get_guild_id() helper extracts guild from raw_message
- Load opus codec for voice playback
- discord.py[voice] in pyproject.toml (pulls PyNaCl + davey)

											
										
										
											2026-03-11 02:13:43 +03:00
+								    @staticmethod
 								    def _get_guild_id(event: MessageEvent) -> Optional[int]:
 								        """Extract Discord guild_id from the raw message object."""
 								        raw = getattr(event, "raw_message", None)
 								        if raw is None:
 								            return None
 								        # Slash command interaction
 								        if hasattr(raw, "guild_id") and raw.guild_id:
 								            return int(raw.guild_id)
 								        # Regular message
 								        if hasattr(raw, "guild") and raw.guild:
 								            return raw.guild.id
 								        return None
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
+								    async def _handle_voice_command(self, event: MessageEvent) -> str:
-												feat: Discord voice channel support — bot joins VC and speaks replies

- /voice channel: bot joins user's voice channel, speaks TTS replies
- /voice leave: disconnect from voice channel
- Auto-disconnect after 5 min inactivity
- _get_guild_id() helper extracts guild from raw_message
- Load opus codec for voice playback
- discord.py[voice] in pyproject.toml (pulls PyNaCl + davey)

											
										
										
											2026-03-11 02:13:43 +03:00
+								        """Handle /voice [on|off|tts|channel|leave|status] command."""
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
+								        args = event.get_command_args().strip().lower()
 								        chat_id = event.source.chat_id
-												fix: address PR review round 5 — streaming guard, VC auth, history prefix, auto-TTS control

1. Gate _streaming_api_call to chat_completions mode only — Anthropic and
   Codex fall back to _interruptible_api_call. Preserve Anthropic base_url
   across all client rebuild paths (interrupt, fallback, 401 refresh).

2. Discord VC synthetic events now use chat_type="channel" instead of
   defaulting to "dm" — prevents session bleed into DM context.
   Authorization runs before echoing transcript. Sanitize @everyone/@here
   in voice transcripts.

3. CLI voice prefix ("[Voice input...]") is now API-call-local only —
   stripped from returned history so it never persists to session DB or
   resumed sessions.

4. /voice off now disables base adapter auto-TTS via _auto_tts_disabled_chats
   set — voice input no longer triggers TTS when voice mode is off.

											
										
										
											2026-03-14 10:31:49 +03:00
+								        adapter = self.adapters.get(event.source.platform)
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
+								        if args in ("on", "enable"):
 								            self._voice_mode[chat_id] = "voice_only"
 								            self._save_voice_modes()
-												fix: address PR review round 5 — streaming guard, VC auth, history prefix, auto-TTS control

1. Gate _streaming_api_call to chat_completions mode only — Anthropic and
   Codex fall back to _interruptible_api_call. Preserve Anthropic base_url
   across all client rebuild paths (interrupt, fallback, 401 refresh).

2. Discord VC synthetic events now use chat_type="channel" instead of
   defaulting to "dm" — prevents session bleed into DM context.
   Authorization runs before echoing transcript. Sanitize @everyone/@here
   in voice transcripts.

3. CLI voice prefix ("[Voice input...]") is now API-call-local only —
   stripped from returned history so it never persists to session DB or
   resumed sessions.

4. /voice off now disables base adapter auto-TTS via _auto_tts_disabled_chats
   set — voice input no longer triggers TTS when voice mode is off.

											
										
										
											2026-03-14 10:31:49 +03:00
+								            if adapter:
-												fix: persist clean voice transcripts and /voice off state

- keep CLI voice prefixes API-local while storing the original user text
- persist explicit gateway off state and restore adapter auto-TTS suppression on restart
- add regression coverage for both behaviors

											
										
										
											2026-03-14 06:14:22 -07:00
+								                self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=False)
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
+								            return (
 								                "Voice mode enabled.\n"
 								                "I'll reply with voice when you send voice messages.\n"
 								                "Use /voice tts to get voice replies for all messages."
 								            )
 								        elif args in ("off", "disable"):
-												fix: persist clean voice transcripts and /voice off state

- keep CLI voice prefixes API-local while storing the original user text
- persist explicit gateway off state and restore adapter auto-TTS suppression on restart
- add regression coverage for both behaviors

											
										
										
											2026-03-14 06:14:22 -07:00
+								            self._voice_mode[chat_id] = "off"
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
+								            self._save_voice_modes()
-												fix: address PR review round 5 — streaming guard, VC auth, history prefix, auto-TTS control

1. Gate _streaming_api_call to chat_completions mode only — Anthropic and
   Codex fall back to _interruptible_api_call. Preserve Anthropic base_url
   across all client rebuild paths (interrupt, fallback, 401 refresh).

2. Discord VC synthetic events now use chat_type="channel" instead of
   defaulting to "dm" — prevents session bleed into DM context.
   Authorization runs before echoing transcript. Sanitize @everyone/@here
   in voice transcripts.

3. CLI voice prefix ("[Voice input...]") is now API-call-local only —
   stripped from returned history so it never persists to session DB or
   resumed sessions.

4. /voice off now disables base adapter auto-TTS via _auto_tts_disabled_chats
   set — voice input no longer triggers TTS when voice mode is off.

											
										
										
											2026-03-14 10:31:49 +03:00
+								            if adapter:
-												fix: persist clean voice transcripts and /voice off state

- keep CLI voice prefixes API-local while storing the original user text
- persist explicit gateway off state and restore adapter auto-TTS suppression on restart
- add regression coverage for both behaviors

											
										
										
											2026-03-14 06:14:22 -07:00
+								                self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=True)
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
+								            return "Voice mode disabled. Text-only replies."
 								        elif args == "tts":
 								            self._voice_mode[chat_id] = "all"
 								            self._save_voice_modes()
-												fix: address PR review round 5 — streaming guard, VC auth, history prefix, auto-TTS control

1. Gate _streaming_api_call to chat_completions mode only — Anthropic and
   Codex fall back to _interruptible_api_call. Preserve Anthropic base_url
   across all client rebuild paths (interrupt, fallback, 401 refresh).

2. Discord VC synthetic events now use chat_type="channel" instead of
   defaulting to "dm" — prevents session bleed into DM context.
   Authorization runs before echoing transcript. Sanitize @everyone/@here
   in voice transcripts.

3. CLI voice prefix ("[Voice input...]") is now API-call-local only —
   stripped from returned history so it never persists to session DB or
   resumed sessions.

4. /voice off now disables base adapter auto-TTS via _auto_tts_disabled_chats
   set — voice input no longer triggers TTS when voice mode is off.

											
										
										
											2026-03-14 10:31:49 +03:00
+								            if adapter:
-												fix: persist clean voice transcripts and /voice off state

- keep CLI voice prefixes API-local while storing the original user text
- persist explicit gateway off state and restore adapter auto-TTS suppression on restart
- add regression coverage for both behaviors

											
										
										
											2026-03-14 06:14:22 -07:00
+								                self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=False)
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
+								            return (
 								                "Auto-TTS enabled.\n"
 								                "All replies will include a voice message."
 								            )
-												feat: Discord voice channel support — bot joins VC and speaks replies

- /voice channel: bot joins user's voice channel, speaks TTS replies
- /voice leave: disconnect from voice channel
- Auto-disconnect after 5 min inactivity
- _get_guild_id() helper extracts guild from raw_message
- Load opus codec for voice playback
- discord.py[voice] in pyproject.toml (pulls PyNaCl + davey)

											
										
										
											2026-03-11 02:13:43 +03:00
+								        elif args in ("channel", "join"):
 								            return await self._handle_voice_channel_join(event)
 								        elif args == "leave":
 								            return await self._handle_voice_channel_leave(event)
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
+								        elif args == "status":
 								            mode = self._voice_mode.get(chat_id, "off")
 								            labels = {
 								                "off": "Off (text only)",
 								                "voice_only": "On (voice reply to voice messages)",
 								                "all": "TTS (voice reply to all messages)",
 								            }
-												feat: Discord voice channel support — bot joins VC and speaks replies

- /voice channel: bot joins user's voice channel, speaks TTS replies
- /voice leave: disconnect from voice channel
- Auto-disconnect after 5 min inactivity
- _get_guild_id() helper extracts guild from raw_message
- Load opus codec for voice playback
- discord.py[voice] in pyproject.toml (pulls PyNaCl + davey)

											
										
										
											2026-03-11 02:13:43 +03:00
+								            # Append voice channel info if connected
 								            adapter = self.adapters.get(event.source.platform)
 								            guild_id = self._get_guild_id(event)
-												feat: add voice channel awareness — inject participant and speaking state into agent context

											
										
										
											2026-03-14 02:14:34 +03:00
+								            if guild_id and hasattr(adapter, "get_voice_channel_info"):
 								                info = adapter.get_voice_channel_info(guild_id)
 								                if info:
 								                    lines = [
 								                        f"Voice mode: {labels.get(mode, mode)}",
 								                        f"Voice channel: #{info['channel_name']}",
 								                        f"Participants: {info['member_count']}",
 								                    ]
 								                    for m in info["members"]:
 								                        status = " (speaking)" if m.get("is_speaking") else ""
 								                        lines.append(f"  - {m['display_name']}{status}")
 								                    return "\n".join(lines)
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
+								            return f"Voice mode: {labels.get(mode, mode)}"
 								        else:
 								            # Toggle: off → on, on/all → off
 								            current = self._voice_mode.get(chat_id, "off")
 								            if current == "off":
 								                self._voice_mode[chat_id] = "voice_only"
 								                self._save_voice_modes()
-												fix: address PR review round 5 — streaming guard, VC auth, history prefix, auto-TTS control

1. Gate _streaming_api_call to chat_completions mode only — Anthropic and
   Codex fall back to _interruptible_api_call. Preserve Anthropic base_url
   across all client rebuild paths (interrupt, fallback, 401 refresh).

2. Discord VC synthetic events now use chat_type="channel" instead of
   defaulting to "dm" — prevents session bleed into DM context.
   Authorization runs before echoing transcript. Sanitize @everyone/@here
   in voice transcripts.

3. CLI voice prefix ("[Voice input...]") is now API-call-local only —
   stripped from returned history so it never persists to session DB or
   resumed sessions.

4. /voice off now disables base adapter auto-TTS via _auto_tts_disabled_chats
   set — voice input no longer triggers TTS when voice mode is off.

											
										
										
											2026-03-14 10:31:49 +03:00
+								                if adapter:
-												fix: persist clean voice transcripts and /voice off state

- keep CLI voice prefixes API-local while storing the original user text
- persist explicit gateway off state and restore adapter auto-TTS suppression on restart
- add regression coverage for both behaviors

											
										
										
											2026-03-14 06:14:22 -07:00
+								                    self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=False)
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
+								                return "Voice mode enabled."
 								            else:
-												fix: persist clean voice transcripts and /voice off state

- keep CLI voice prefixes API-local while storing the original user text
- persist explicit gateway off state and restore adapter auto-TTS suppression on restart
- add regression coverage for both behaviors

											
										
										
											2026-03-14 06:14:22 -07:00
+								                self._voice_mode[chat_id] = "off"
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
+								                self._save_voice_modes()
-												fix: address PR review round 5 — streaming guard, VC auth, history prefix, auto-TTS control

1. Gate _streaming_api_call to chat_completions mode only — Anthropic and
   Codex fall back to _interruptible_api_call. Preserve Anthropic base_url
   across all client rebuild paths (interrupt, fallback, 401 refresh).

2. Discord VC synthetic events now use chat_type="channel" instead of
   defaulting to "dm" — prevents session bleed into DM context.
   Authorization runs before echoing transcript. Sanitize @everyone/@here
   in voice transcripts.

3. CLI voice prefix ("[Voice input...]") is now API-call-local only —
   stripped from returned history so it never persists to session DB or
   resumed sessions.

4. /voice off now disables base adapter auto-TTS via _auto_tts_disabled_chats
   set — voice input no longer triggers TTS when voice mode is off.

											
										
										
											2026-03-14 10:31:49 +03:00
+								                if adapter:
-												fix: persist clean voice transcripts and /voice off state

- keep CLI voice prefixes API-local while storing the original user text
- persist explicit gateway off state and restore adapter auto-TTS suppression on restart
- add regression coverage for both behaviors

											
										
										
											2026-03-14 06:14:22 -07:00
+								                    self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=True)
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
+								                return "Voice mode disabled."
-												feat: Discord voice channel support — bot joins VC and speaks replies

- /voice channel: bot joins user's voice channel, speaks TTS replies
- /voice leave: disconnect from voice channel
- Auto-disconnect after 5 min inactivity
- _get_guild_id() helper extracts guild from raw_message
- Load opus codec for voice playback
- discord.py[voice] in pyproject.toml (pulls PyNaCl + davey)

											
										
										
											2026-03-11 02:13:43 +03:00
+								    async def _handle_voice_channel_join(self, event: MessageEvent) -> str:
 								        """Join the user's current Discord voice channel."""
 								        adapter = self.adapters.get(event.source.platform)
 								        if not hasattr(adapter, "join_voice_channel"):
 								            return "Voice channels are not supported on this platform."
 								        guild_id = self._get_guild_id(event)
 								        if not guild_id:
 								            return "This command only works in a Discord server."
 								        voice_channel = await adapter.get_user_voice_channel(
 								            guild_id, event.source.user_id
 								        )
 								        if not voice_channel:
 								            return "You need to be in a voice channel first."
-												fix: 8 voice pipeline bugs with tests proving each fix

1. VoiceReceiver.stop() now acquires _lock before clearing shared state
   to prevent race with _on_packet on the socket reader thread
2. _packet_debug_count moved from class-level to instance-level to avoid
   cross-instance race condition in multi-guild setups
3. play_in_voice_channel uses asyncio.get_running_loop() instead of
   deprecated asyncio.get_event_loop()
4. _send_voice_reply uses uuid for filenames instead of time-based names
   that can collide when two replies happen in the same second
5. Voice timeout now notifies runner via _on_voice_disconnect callback
   so runner cleans up _voice_mode state (prevents orphaned TTS replies)
6. play_in_voice_channel adds PLAYBACK_TIMEOUT (120s) to prevent
   infinite blocking when FFmpeg callback is never called
7. _send_voice_reply moves temp file cleanup to finally block so files
   are always cleaned up even when send_voice/play raises
8. Base adapter auto-TTS wraps play_tts in try/finally with os.remove
   to clean up generated audio files after playback

18 new tests (120 total voice tests)

											
										
										
											2026-03-11 23:57:42 +03:00
+								        # Wire callbacks BEFORE join so voice input arriving immediately
-												fix: voice pipeline thread safety and error handling bugs

- Add lock protection around VoiceReceiver buffer writes in _on_packet
  to prevent race condition with check_silence on different threads
- Wire _voice_input_callback BEFORE join_voice_channel to avoid
  losing voice input during the join window
- Add try/except around leave_voice_channel to ensure state cleanup
  (voice_mode, callback) even if leave raises an exception
- Guard against empty text after markdown stripping in base.py auto-TTS
- Add 11 tests proving each bug and verifying the fix

											
										
										
											2026-03-11 23:36:47 +03:00
+								        # after connection is not lost.
 								        if hasattr(adapter, "_voice_input_callback"):
 								            adapter._voice_input_callback = self._handle_voice_channel_input
-												fix: 8 voice pipeline bugs with tests proving each fix

1. VoiceReceiver.stop() now acquires _lock before clearing shared state
   to prevent race with _on_packet on the socket reader thread
2. _packet_debug_count moved from class-level to instance-level to avoid
   cross-instance race condition in multi-guild setups
3. play_in_voice_channel uses asyncio.get_running_loop() instead of
   deprecated asyncio.get_event_loop()
4. _send_voice_reply uses uuid for filenames instead of time-based names
   that can collide when two replies happen in the same second
5. Voice timeout now notifies runner via _on_voice_disconnect callback
   so runner cleans up _voice_mode state (prevents orphaned TTS replies)
6. play_in_voice_channel adds PLAYBACK_TIMEOUT (120s) to prevent
   infinite blocking when FFmpeg callback is never called
7. _send_voice_reply moves temp file cleanup to finally block so files
   are always cleaned up even when send_voice/play raises
8. Base adapter auto-TTS wraps play_tts in try/finally with os.remove
   to clean up generated audio files after playback

18 new tests (120 total voice tests)

											
										
										
											2026-03-11 23:57:42 +03:00
+								        if hasattr(adapter, "_on_voice_disconnect"):
 								            adapter._on_voice_disconnect = self._handle_voice_timeout_cleanup
-												fix: voice pipeline thread safety and error handling bugs

- Add lock protection around VoiceReceiver buffer writes in _on_packet
  to prevent race condition with check_silence on different threads
- Wire _voice_input_callback BEFORE join_voice_channel to avoid
  losing voice input during the join window
- Add try/except around leave_voice_channel to ensure state cleanup
  (voice_mode, callback) even if leave raises an exception
- Guard against empty text after markdown stripping in base.py auto-TTS
- Add 11 tests proving each bug and verifying the fix

											
										
										
											2026-03-11 23:36:47 +03:00
-												feat: Discord voice channel support — bot joins VC and speaks replies

- /voice channel: bot joins user's voice channel, speaks TTS replies
- /voice leave: disconnect from voice channel
- Auto-disconnect after 5 min inactivity
- _get_guild_id() helper extracts guild from raw_message
- Load opus codec for voice playback
- discord.py[voice] in pyproject.toml (pulls PyNaCl + davey)

											
										
										
											2026-03-11 02:13:43 +03:00
+								        try:
 								            success = await adapter.join_voice_channel(voice_channel)
 								        except Exception as e:
 								            logger.warning("Failed to join voice channel: %s", e)
-												fix: voice pipeline thread safety and error handling bugs

- Add lock protection around VoiceReceiver buffer writes in _on_packet
  to prevent race condition with check_silence on different threads
- Wire _voice_input_callback BEFORE join_voice_channel to avoid
  losing voice input during the join window
- Add try/except around leave_voice_channel to ensure state cleanup
  (voice_mode, callback) even if leave raises an exception
- Guard against empty text after markdown stripping in base.py auto-TTS
- Add 11 tests proving each bug and verifying the fix

											
										
										
											2026-03-11 23:36:47 +03:00
+								            adapter._voice_input_callback = None
-												fix(voice): show clear error when voice dependencies are missing

When PyNaCl or davey is not installed, joining a voice channel fails
with a raw exception. Now shows a human-readable message pointing
the user to reinstall with voice support.

Closes #1336

											
										
										
											2026-03-15 11:58:48 +03:00
+								            err_lower = str(e).lower()
 								            if "pynacl" in err_lower or "nacl" in err_lower or "davey" in err_lower:
-												test(voice): clarify install guidance and local skips

Add an explicit messaging-extra install hint to the missing PyNaCl/davey error path, cover it with a voice-channel join regression test, and skip the low-level NaCl packet tests when PyNaCl is not installed locally.

											
										
										
											2026-03-15 05:24:34 -07:00
+								                return (
 								                    "Voice dependencies are missing (PyNaCl / davey). "
 								                    "Install or reinstall Hermes with the messaging extra, e.g. "
 								                    "`pip install hermes-agent[messaging]`."
 								                )
-												feat: Discord voice channel support — bot joins VC and speaks replies

- /voice channel: bot joins user's voice channel, speaks TTS replies
- /voice leave: disconnect from voice channel
- Auto-disconnect after 5 min inactivity
- _get_guild_id() helper extracts guild from raw_message
- Load opus codec for voice playback
- discord.py[voice] in pyproject.toml (pulls PyNaCl + davey)

											
										
										
											2026-03-11 02:13:43 +03:00
+								            return f"Failed to join voice channel: {e}"
 								        if success:
 								            adapter._voice_text_channels[guild_id] = int(event.source.chat_id)
 								            self._voice_mode[event.source.chat_id] = "all"
 								            self._save_voice_modes()
-												fix: persist clean voice transcripts and /voice off state

- keep CLI voice prefixes API-local while storing the original user text
- persist explicit gateway off state and restore adapter auto-TTS suppression on restart
- add regression coverage for both behaviors

											
										
										
											2026-03-14 06:14:22 -07:00
+								            self._set_adapter_auto_tts_disabled(adapter, event.source.chat_id, disabled=False)
-												feat: Discord voice channel support — bot joins VC and speaks replies

- /voice channel: bot joins user's voice channel, speaks TTS replies
- /voice leave: disconnect from voice channel
- Auto-disconnect after 5 min inactivity
- _get_guild_id() helper extracts guild from raw_message
- Load opus codec for voice playback
- discord.py[voice] in pyproject.toml (pulls PyNaCl + davey)

											
										
										
											2026-03-11 02:13:43 +03:00
+								            return (
 								                f"Joined voice channel **{voice_channel.name}**.\n"
-												feat: add Discord voice channel listening — STT transcription and agent response pipeline

Phase 2 of voice channel support: bot listens to users speaking in VC,
transcribes speech via Groq Whisper, and processes through the agent pipeline.

- Add VoiceReceiver class for RTP packet capture, NaCl/DAVE decryption, Opus decode
- Add silence detection and per-user PCM buffering
- Wire voice input callback from adapter to GatewayRunner
- Fix adapter dict key: use Platform.DISCORD enum instead of string
- Fix guild_id extraction for synthetic voice events via SimpleNamespace raw_message
- Pause/resume receiver during TTS playback to prevent echo

											
										
										
											2026-03-11 04:34:58 +03:00
+								                f"I'll speak my replies and listen to you. Use /voice leave to disconnect."
-												feat: Discord voice channel support — bot joins VC and speaks replies

- /voice channel: bot joins user's voice channel, speaks TTS replies
- /voice leave: disconnect from voice channel
- Auto-disconnect after 5 min inactivity
- _get_guild_id() helper extracts guild from raw_message
- Load opus codec for voice playback
- discord.py[voice] in pyproject.toml (pulls PyNaCl + davey)

											
										
										
											2026-03-11 02:13:43 +03:00
+								            )
-												fix: voice pipeline thread safety and error handling bugs

- Add lock protection around VoiceReceiver buffer writes in _on_packet
  to prevent race condition with check_silence on different threads
- Wire _voice_input_callback BEFORE join_voice_channel to avoid
  losing voice input during the join window
- Add try/except around leave_voice_channel to ensure state cleanup
  (voice_mode, callback) even if leave raises an exception
- Guard against empty text after markdown stripping in base.py auto-TTS
- Add 11 tests proving each bug and verifying the fix

											
										
										
											2026-03-11 23:36:47 +03:00
+								        # Join failed — clear callback
 								        adapter._voice_input_callback = None
-												feat: Discord voice channel support — bot joins VC and speaks replies

- /voice channel: bot joins user's voice channel, speaks TTS replies
- /voice leave: disconnect from voice channel
- Auto-disconnect after 5 min inactivity
- _get_guild_id() helper extracts guild from raw_message
- Load opus codec for voice playback
- discord.py[voice] in pyproject.toml (pulls PyNaCl + davey)

											
										
										
											2026-03-11 02:13:43 +03:00
+								        return "Failed to join voice channel. Check bot permissions (Connect + Speak)."
 								    async def _handle_voice_channel_leave(self, event: MessageEvent) -> str:
 								        """Leave the Discord voice channel."""
 								        adapter = self.adapters.get(event.source.platform)
 								        guild_id = self._get_guild_id(event)
 								        if not guild_id or not hasattr(adapter, "leave_voice_channel"):
 								            return "Not in a voice channel."
 								        if not hasattr(adapter, "is_in_voice_channel") or not adapter.is_in_voice_channel(guild_id):
 								            return "Not in a voice channel."
-												fix: voice pipeline thread safety and error handling bugs

- Add lock protection around VoiceReceiver buffer writes in _on_packet
  to prevent race condition with check_silence on different threads
- Wire _voice_input_callback BEFORE join_voice_channel to avoid
  losing voice input during the join window
- Add try/except around leave_voice_channel to ensure state cleanup
  (voice_mode, callback) even if leave raises an exception
- Guard against empty text after markdown stripping in base.py auto-TTS
- Add 11 tests proving each bug and verifying the fix

											
										
										
											2026-03-11 23:36:47 +03:00
+								        try:
 								            await adapter.leave_voice_channel(guild_id)
 								        except Exception as e:
 								            logger.warning("Error leaving voice channel: %s", e)
 								        # Always clean up state even if leave raised an exception
-												fix: persist clean voice transcripts and /voice off state

- keep CLI voice prefixes API-local while storing the original user text
- persist explicit gateway off state and restore adapter auto-TTS suppression on restart
- add regression coverage for both behaviors

											
										
										
											2026-03-14 06:14:22 -07:00
+								        self._voice_mode[event.source.chat_id] = "off"
-												feat: Discord voice channel support — bot joins VC and speaks replies

- /voice channel: bot joins user's voice channel, speaks TTS replies
- /voice leave: disconnect from voice channel
- Auto-disconnect after 5 min inactivity
- _get_guild_id() helper extracts guild from raw_message
- Load opus codec for voice playback
- discord.py[voice] in pyproject.toml (pulls PyNaCl + davey)

											
										
										
											2026-03-11 02:13:43 +03:00
+								        self._save_voice_modes()
-												fix: persist clean voice transcripts and /voice off state

- keep CLI voice prefixes API-local while storing the original user text
- persist explicit gateway off state and restore adapter auto-TTS suppression on restart
- add regression coverage for both behaviors

											
										
										
											2026-03-14 06:14:22 -07:00
+								        self._set_adapter_auto_tts_disabled(adapter, event.source.chat_id, disabled=True)
-												fix: voice pipeline thread safety and error handling bugs

- Add lock protection around VoiceReceiver buffer writes in _on_packet
  to prevent race condition with check_silence on different threads
- Wire _voice_input_callback BEFORE join_voice_channel to avoid
  losing voice input during the join window
- Add try/except around leave_voice_channel to ensure state cleanup
  (voice_mode, callback) even if leave raises an exception
- Guard against empty text after markdown stripping in base.py auto-TTS
- Add 11 tests proving each bug and verifying the fix

											
										
										
											2026-03-11 23:36:47 +03:00
+								        if hasattr(adapter, "_voice_input_callback"):
 								            adapter._voice_input_callback = None
-												feat: Discord voice channel support — bot joins VC and speaks replies

- /voice channel: bot joins user's voice channel, speaks TTS replies
- /voice leave: disconnect from voice channel
- Auto-disconnect after 5 min inactivity
- _get_guild_id() helper extracts guild from raw_message
- Load opus codec for voice playback
- discord.py[voice] in pyproject.toml (pulls PyNaCl + davey)

											
										
										
											2026-03-11 02:13:43 +03:00
+								        return "Left voice channel."
-												fix: 8 voice pipeline bugs with tests proving each fix

1. VoiceReceiver.stop() now acquires _lock before clearing shared state
   to prevent race with _on_packet on the socket reader thread
2. _packet_debug_count moved from class-level to instance-level to avoid
   cross-instance race condition in multi-guild setups
3. play_in_voice_channel uses asyncio.get_running_loop() instead of
   deprecated asyncio.get_event_loop()
4. _send_voice_reply uses uuid for filenames instead of time-based names
   that can collide when two replies happen in the same second
5. Voice timeout now notifies runner via _on_voice_disconnect callback
   so runner cleans up _voice_mode state (prevents orphaned TTS replies)
6. play_in_voice_channel adds PLAYBACK_TIMEOUT (120s) to prevent
   infinite blocking when FFmpeg callback is never called
7. _send_voice_reply moves temp file cleanup to finally block so files
   are always cleaned up even when send_voice/play raises
8. Base adapter auto-TTS wraps play_tts in try/finally with os.remove
   to clean up generated audio files after playback

18 new tests (120 total voice tests)

											
										
										
											2026-03-11 23:57:42 +03:00
+								    def _handle_voice_timeout_cleanup(self, chat_id: str) -> None:
 								        """Called by the adapter when a voice channel times out.
 								        Cleans up runner-side voice_mode state that the adapter cannot reach.
 								        """
-												fix: persist clean voice transcripts and /voice off state

- keep CLI voice prefixes API-local while storing the original user text
- persist explicit gateway off state and restore adapter auto-TTS suppression on restart
- add regression coverage for both behaviors

											
										
										
											2026-03-14 06:14:22 -07:00
+								        self._voice_mode[chat_id] = "off"
-												fix: 8 voice pipeline bugs with tests proving each fix

1. VoiceReceiver.stop() now acquires _lock before clearing shared state
   to prevent race with _on_packet on the socket reader thread
2. _packet_debug_count moved from class-level to instance-level to avoid
   cross-instance race condition in multi-guild setups
3. play_in_voice_channel uses asyncio.get_running_loop() instead of
   deprecated asyncio.get_event_loop()
4. _send_voice_reply uses uuid for filenames instead of time-based names
   that can collide when two replies happen in the same second
5. Voice timeout now notifies runner via _on_voice_disconnect callback
   so runner cleans up _voice_mode state (prevents orphaned TTS replies)
6. play_in_voice_channel adds PLAYBACK_TIMEOUT (120s) to prevent
   infinite blocking when FFmpeg callback is never called
7. _send_voice_reply moves temp file cleanup to finally block so files
   are always cleaned up even when send_voice/play raises
8. Base adapter auto-TTS wraps play_tts in try/finally with os.remove
   to clean up generated audio files after playback

18 new tests (120 total voice tests)

											
										
										
											2026-03-11 23:57:42 +03:00
+								        self._save_voice_modes()
-												fix: persist clean voice transcripts and /voice off state

- keep CLI voice prefixes API-local while storing the original user text
- persist explicit gateway off state and restore adapter auto-TTS suppression on restart
- add regression coverage for both behaviors

											
										
										
											2026-03-14 06:14:22 -07:00
+								        adapter = self.adapters.get(Platform.DISCORD)
 								        self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=True)
-												fix: 8 voice pipeline bugs with tests proving each fix

1. VoiceReceiver.stop() now acquires _lock before clearing shared state
   to prevent race with _on_packet on the socket reader thread
2. _packet_debug_count moved from class-level to instance-level to avoid
   cross-instance race condition in multi-guild setups
3. play_in_voice_channel uses asyncio.get_running_loop() instead of
   deprecated asyncio.get_event_loop()
4. _send_voice_reply uses uuid for filenames instead of time-based names
   that can collide when two replies happen in the same second
5. Voice timeout now notifies runner via _on_voice_disconnect callback
   so runner cleans up _voice_mode state (prevents orphaned TTS replies)
6. play_in_voice_channel adds PLAYBACK_TIMEOUT (120s) to prevent
   infinite blocking when FFmpeg callback is never called
7. _send_voice_reply moves temp file cleanup to finally block so files
   are always cleaned up even when send_voice/play raises
8. Base adapter auto-TTS wraps play_tts in try/finally with os.remove
   to clean up generated audio files after playback

18 new tests (120 total voice tests)

											
										
										
											2026-03-11 23:57:42 +03:00
-												feat: add Discord voice channel listening — STT transcription and agent response pipeline

Phase 2 of voice channel support: bot listens to users speaking in VC,
transcribes speech via Groq Whisper, and processes through the agent pipeline.

- Add VoiceReceiver class for RTP packet capture, NaCl/DAVE decryption, Opus decode
- Add silence detection and per-user PCM buffering
- Wire voice input callback from adapter to GatewayRunner
- Fix adapter dict key: use Platform.DISCORD enum instead of string
- Fix guild_id extraction for synthetic voice events via SimpleNamespace raw_message
- Pause/resume receiver during TTS playback to prevent echo

											
										
										
											2026-03-11 04:34:58 +03:00
+								    async def _handle_voice_channel_input(
 								        self, guild_id: int, user_id: int, transcript: str
 								    ):
 								        """Handle transcribed voice from a user in a voice channel.
 								        Creates a synthetic MessageEvent and processes it through the
 								        adapter's full message pipeline (session, typing, agent, TTS reply).
 								        """
 								        adapter = self.adapters.get(Platform.DISCORD)
 								        if not adapter:
 								            return
 								        text_ch_id = adapter._voice_text_channels.get(guild_id)
 								        if not text_ch_id:
 								            return
-												fix: address PR review round 5 — streaming guard, VC auth, history prefix, auto-TTS control

1. Gate _streaming_api_call to chat_completions mode only — Anthropic and
   Codex fall back to _interruptible_api_call. Preserve Anthropic base_url
   across all client rebuild paths (interrupt, fallback, 401 refresh).

2. Discord VC synthetic events now use chat_type="channel" instead of
   defaulting to "dm" — prevents session bleed into DM context.
   Authorization runs before echoing transcript. Sanitize @everyone/@here
   in voice transcripts.

3. CLI voice prefix ("[Voice input...]") is now API-call-local only —
   stripped from returned history so it never persists to session DB or
   resumed sessions.

4. /voice off now disables base adapter auto-TTS via _auto_tts_disabled_chats
   set — voice input no longer triggers TTS when voice mode is off.

											
										
										
											2026-03-14 10:31:49 +03:00
+								        # Check authorization before processing voice input
 								        source = SessionSource(
 								            platform=Platform.DISCORD,
 								            chat_id=str(text_ch_id),
 								            user_id=str(user_id),
 								            user_name=str(user_id),
 								            chat_type="channel",
 								        )
 								        if not self._is_user_authorized(source):
 								            logger.debug("Unauthorized voice input from user %d, ignoring", user_id)
 								            return
 								        # Show transcript in text channel (after auth, with mention sanitization)
-												feat: add Discord voice channel listening — STT transcription and agent response pipeline

Phase 2 of voice channel support: bot listens to users speaking in VC,
transcribes speech via Groq Whisper, and processes through the agent pipeline.

- Add VoiceReceiver class for RTP packet capture, NaCl/DAVE decryption, Opus decode
- Add silence detection and per-user PCM buffering
- Wire voice input callback from adapter to GatewayRunner
- Fix adapter dict key: use Platform.DISCORD enum instead of string
- Fix guild_id extraction for synthetic voice events via SimpleNamespace raw_message
- Pause/resume receiver during TTS playback to prevent echo

											
										
										
											2026-03-11 04:34:58 +03:00
+								        try:
 								            channel = adapter._client.get_channel(text_ch_id)
 								            if channel:
-												fix: address PR review round 5 — streaming guard, VC auth, history prefix, auto-TTS control

1. Gate _streaming_api_call to chat_completions mode only — Anthropic and
   Codex fall back to _interruptible_api_call. Preserve Anthropic base_url
   across all client rebuild paths (interrupt, fallback, 401 refresh).

2. Discord VC synthetic events now use chat_type="channel" instead of
   defaulting to "dm" — prevents session bleed into DM context.
   Authorization runs before echoing transcript. Sanitize @everyone/@here
   in voice transcripts.

3. CLI voice prefix ("[Voice input...]") is now API-call-local only —
   stripped from returned history so it never persists to session DB or
   resumed sessions.

4. /voice off now disables base adapter auto-TTS via _auto_tts_disabled_chats
   set — voice input no longer triggers TTS when voice mode is off.

											
										
										
											2026-03-14 10:31:49 +03:00
+								                safe_text = transcript[:2000].replace("@everyone", "@\u200beveryone").replace("@here", "@\u200bhere")
 								                await channel.send(f"**[Voice]** <@{user_id}>: {safe_text}")
-												feat: add Discord voice channel listening — STT transcription and agent response pipeline

Phase 2 of voice channel support: bot listens to users speaking in VC,
transcribes speech via Groq Whisper, and processes through the agent pipeline.

- Add VoiceReceiver class for RTP packet capture, NaCl/DAVE decryption, Opus decode
- Add silence detection and per-user PCM buffering
- Wire voice input callback from adapter to GatewayRunner
- Fix adapter dict key: use Platform.DISCORD enum instead of string
- Fix guild_id extraction for synthetic voice events via SimpleNamespace raw_message
- Pause/resume receiver during TTS playback to prevent echo

											
										
										
											2026-03-11 04:34:58 +03:00
+								        except Exception:
 								            pass
 								        # Build a synthetic MessageEvent and feed through the normal pipeline
 								        # Use SimpleNamespace as raw_message so _get_guild_id() can extract
 								        # guild_id and _send_voice_reply() plays audio in the voice channel.
 								        from types import SimpleNamespace
 								        event = MessageEvent(
 								            source=source,
 								            text=transcript,
 								            message_type=MessageType.VOICE,
 								            raw_message=SimpleNamespace(guild_id=guild_id, guild=None),
 								        )
 								        await adapter.handle_message(event)
-												fix: extract voice reply logic and add comprehensive tests

- Fix tempfile.mktemp() TOCTOU race in Discord voice input (use NamedTemporaryFile)
- Extract voice reply decision from _handle_message into _should_send_voice_reply()
- Rewrite TestAutoVoiceReply to call real method instead of testing a copy
- Add 59 new tests: VoiceReceiver, VC commands, adapter methods, streaming TTS

											
										
										
											2026-03-11 23:18:49 +03:00
+								    def _should_send_voice_reply(
 								        self,
 								        event: MessageEvent,
 								        response: str,
 								        agent_messages: list,
-												fix(voice): enable TTS voice reply when streaming is active (#2322)

When streaming is enabled, the base adapter receives None from
_handle_message (already_sent=True) and cannot run auto-TTS for
voice input. The runner was unconditionally skipping voice input
TTS assuming the base adapter would handle it.

Now the runner takes over TTS responsibility when streaming has
already delivered the text response, so voice channel playback
works with both streaming on and off.

Streaming off behavior is unchanged (default already_sent=False
preserves the original code path exactly).

Co-authored-by: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com>
											
										
										
											2026-03-21 08:08:37 -07:00
+								        already_sent: bool = False,
-												fix: extract voice reply logic and add comprehensive tests

- Fix tempfile.mktemp() TOCTOU race in Discord voice input (use NamedTemporaryFile)
- Extract voice reply decision from _handle_message into _should_send_voice_reply()
- Rewrite TestAutoVoiceReply to call real method instead of testing a copy
- Add 59 new tests: VoiceReceiver, VC commands, adapter methods, streaming TTS

											
										
										
											2026-03-11 23:18:49 +03:00
+								    ) -> bool:
 								        """Decide whether the runner should send a TTS voice reply.
 								        Returns False when:
 								        - voice_mode is off for this chat
 								        - response is empty or an error
 								        - agent already called text_to_speech tool (dedup)
 								        - voice input and base adapter auto-TTS already handled it (skip_double)
-												fix(voice): enable TTS voice reply when streaming is active (#2322)

When streaming is enabled, the base adapter receives None from
_handle_message (already_sent=True) and cannot run auto-TTS for
voice input. The runner was unconditionally skipping voice input
TTS assuming the base adapter would handle it.

Now the runner takes over TTS responsibility when streaming has
already delivered the text response, so voice channel playback
works with both streaming on and off.

Streaming off behavior is unchanged (default already_sent=False
preserves the original code path exactly).

Co-authored-by: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com>
											
										
										
											2026-03-21 08:08:37 -07:00
+								          UNLESS streaming already consumed the response (already_sent=True),
 								          in which case the base adapter won't have text for auto-TTS so the
 								          runner must handle it.
-												fix: extract voice reply logic and add comprehensive tests

- Fix tempfile.mktemp() TOCTOU race in Discord voice input (use NamedTemporaryFile)
- Extract voice reply decision from _handle_message into _should_send_voice_reply()
- Rewrite TestAutoVoiceReply to call real method instead of testing a copy
- Add 59 new tests: VoiceReceiver, VC commands, adapter methods, streaming TTS

											
										
										
											2026-03-11 23:18:49 +03:00
+								        """
 								        if not response or response.startswith("Error:"):
 								            return False
 								        chat_id = event.source.chat_id
 								        voice_mode = self._voice_mode.get(chat_id, "off")
 								        is_voice_input = (event.message_type == MessageType.VOICE)
 								        should = (
 								            (voice_mode == "all")
 								            or (voice_mode == "voice_only" and is_voice_input)
 								        )
 								        if not should:
 								            return False
 								        # Dedup: agent already called TTS tool
 								        has_agent_tts = any(
 								            msg.get("role") == "assistant"
 								            and any(
 								                tc.get("function", {}).get("name") == "text_to_speech"
 								                for tc in (msg.get("tool_calls") or [])
 								            )
 								            for msg in agent_messages
 								        )
 								        if has_agent_tts:
 								            return False
-												fix(voice): make play_tts play in VC instead of no-op

play_tts was returning success without playing anything when bot was
in a voice channel. Now it calls play_in_voice_channel directly.

Simplified skip_double dedup: base adapter handles voice input TTS
via play_tts (which now works for VC), runner skips to avoid double.

											
										
										
											2026-03-14 23:57:40 +03:00
+								        # Dedup: base adapter auto-TTS already handles voice input
 								        # (play_tts plays in VC when connected, so runner can skip).
-												fix(voice): enable TTS voice reply when streaming is active (#2322)

When streaming is enabled, the base adapter receives None from
_handle_message (already_sent=True) and cannot run auto-TTS for
voice input. The runner was unconditionally skipping voice input
TTS assuming the base adapter would handle it.

Now the runner takes over TTS responsibility when streaming has
already delivered the text response, so voice channel playback
works with both streaming on and off.

Streaming off behavior is unchanged (default already_sent=False
preserves the original code path exactly).

Co-authored-by: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com>
											
										
										
											2026-03-21 08:08:37 -07:00
+								        # When streaming already delivered the text (already_sent=True),
 								        # the base adapter will receive None and can't run auto-TTS,
 								        # so the runner must take over.
 								        if is_voice_input and not already_sent:
-												fix: extract voice reply logic and add comprehensive tests

- Fix tempfile.mktemp() TOCTOU race in Discord voice input (use NamedTemporaryFile)
- Extract voice reply decision from _handle_message into _should_send_voice_reply()
- Rewrite TestAutoVoiceReply to call real method instead of testing a copy
- Add 59 new tests: VoiceReceiver, VC commands, adapter methods, streaming TTS

											
										
										
											2026-03-11 23:18:49 +03:00
+								            return False
 								        return True
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
+								    async def _send_voice_reply(self, event: MessageEvent, text: str) -> None:
 								        """Generate TTS audio and send as a voice message before the text reply."""
-												fix: 8 voice pipeline bugs with tests proving each fix

1. VoiceReceiver.stop() now acquires _lock before clearing shared state
   to prevent race with _on_packet on the socket reader thread
2. _packet_debug_count moved from class-level to instance-level to avoid
   cross-instance race condition in multi-guild setups
3. play_in_voice_channel uses asyncio.get_running_loop() instead of
   deprecated asyncio.get_event_loop()
4. _send_voice_reply uses uuid for filenames instead of time-based names
   that can collide when two replies happen in the same second
5. Voice timeout now notifies runner via _on_voice_disconnect callback
   so runner cleans up _voice_mode state (prevents orphaned TTS replies)
6. play_in_voice_channel adds PLAYBACK_TIMEOUT (120s) to prevent
   infinite blocking when FFmpeg callback is never called
7. _send_voice_reply moves temp file cleanup to finally block so files
   are always cleaned up even when send_voice/play raises
8. Base adapter auto-TTS wraps play_tts in try/finally with os.remove
   to clean up generated audio files after playback

18 new tests (120 total voice tests)

											
										
										
											2026-03-11 23:57:42 +03:00
+								        import uuid as _uuid
 								        audio_path = None
 								        actual_path = None
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
+								        try:
 								            from tools.tts_tool import text_to_speech_tool, _strip_markdown_for_tts
 								            tts_text = _strip_markdown_for_tts(text[:4000])
 								            if not tts_text:
 								                return
-												fix: Discord voice bubble + edge-tts mp3/ogg format mismatch

- Send Discord voice messages with flags=8192 and waveform metadata
  so they render as native voice bubbles instead of file attachments
- Use .mp3 output path for TTS so edge-tts opus conversion works
  correctly (edge always outputs mp3, convert was skipped for .ogg)
- Use actual file_path from TTS result after potential opus conversion

											
										
										
											2026-03-11 00:24:29 +03:00
+								            # Use .mp3 extension so edge-tts conversion to opus works correctly.
 								            # The TTS tool may convert to .ogg — use file_path from result.
 								            audio_path = os.path.join(
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
+								                tempfile.gettempdir(), "hermes_voice",
-												fix: 8 voice pipeline bugs with tests proving each fix

1. VoiceReceiver.stop() now acquires _lock before clearing shared state
   to prevent race with _on_packet on the socket reader thread
2. _packet_debug_count moved from class-level to instance-level to avoid
   cross-instance race condition in multi-guild setups
3. play_in_voice_channel uses asyncio.get_running_loop() instead of
   deprecated asyncio.get_event_loop()
4. _send_voice_reply uses uuid for filenames instead of time-based names
   that can collide when two replies happen in the same second
5. Voice timeout now notifies runner via _on_voice_disconnect callback
   so runner cleans up _voice_mode state (prevents orphaned TTS replies)
6. play_in_voice_channel adds PLAYBACK_TIMEOUT (120s) to prevent
   infinite blocking when FFmpeg callback is never called
7. _send_voice_reply moves temp file cleanup to finally block so files
   are always cleaned up even when send_voice/play raises
8. Base adapter auto-TTS wraps play_tts in try/finally with os.remove
   to clean up generated audio files after playback

18 new tests (120 total voice tests)

											
										
										
											2026-03-11 23:57:42 +03:00
+								                f"tts_reply_{_uuid.uuid4().hex[:12]}.mp3",
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
+								            )
-												fix: Discord voice bubble + edge-tts mp3/ogg format mismatch

- Send Discord voice messages with flags=8192 and waveform metadata
  so they render as native voice bubbles instead of file attachments
- Use .mp3 output path for TTS so edge-tts opus conversion works
  correctly (edge always outputs mp3, convert was skipped for .ogg)
- Use actual file_path from TTS result after potential opus conversion

											
										
										
											2026-03-11 00:24:29 +03:00
+								            os.makedirs(os.path.dirname(audio_path), exist_ok=True)
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
 								            result_json = await asyncio.to_thread(
-												fix: Discord voice bubble + edge-tts mp3/ogg format mismatch

- Send Discord voice messages with flags=8192 and waveform metadata
  so they render as native voice bubbles instead of file attachments
- Use .mp3 output path for TTS so edge-tts opus conversion works
  correctly (edge always outputs mp3, convert was skipped for .ogg)
- Use actual file_path from TTS result after potential opus conversion

											
										
										
											2026-03-11 00:24:29 +03:00
+								                text_to_speech_tool, text=tts_text, output_path=audio_path
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
+								            )
 								            result = json.loads(result_json)
-												fix: Discord voice bubble + edge-tts mp3/ogg format mismatch

- Send Discord voice messages with flags=8192 and waveform metadata
  so they render as native voice bubbles instead of file attachments
- Use .mp3 output path for TTS so edge-tts opus conversion works
  correctly (edge always outputs mp3, convert was skipped for .ogg)
- Use actual file_path from TTS result after potential opus conversion

											
										
										
											2026-03-11 00:24:29 +03:00
+								            # Use the actual file path from result (may differ after opus conversion)
 								            actual_path = result.get("file_path", audio_path)
 								            if not result.get("success") or not os.path.isfile(actual_path):
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
+								                logger.warning("Auto voice reply TTS failed: %s", result.get("error"))
 								                return
 								            adapter = self.adapters.get(event.source.platform)
-												feat: Discord voice channel support — bot joins VC and speaks replies

- /voice channel: bot joins user's voice channel, speaks TTS replies
- /voice leave: disconnect from voice channel
- Auto-disconnect after 5 min inactivity
- _get_guild_id() helper extracts guild from raw_message
- Load opus codec for voice playback
- discord.py[voice] in pyproject.toml (pulls PyNaCl + davey)

											
										
										
											2026-03-11 02:13:43 +03:00
 								            # If connected to a voice channel, play there instead of sending a file
 								            guild_id = self._get_guild_id(event)
 								            if (guild_id
 								                    and hasattr(adapter, "play_in_voice_channel")
 								                    and hasattr(adapter, "is_in_voice_channel")
 								                    and adapter.is_in_voice_channel(guild_id)):
 								                await adapter.play_in_voice_channel(guild_id, actual_path)
 								            elif adapter and hasattr(adapter, "send_voice"):
-												feat: add /voice slash command to Discord + fix cross-platform send_voice

- Register /voice as Discord slash command with mode choices
- Fix _send_voice_reply to handle adapters that don't accept metadata
  parameter (Discord) by inspecting the method signature at runtime

											
										
										
											2026-03-10 23:37:02 +03:00
+								                send_kwargs: Dict[str, Any] = {
 								                    "chat_id": event.source.chat_id,
-												fix: Discord voice bubble + edge-tts mp3/ogg format mismatch

- Send Discord voice messages with flags=8192 and waveform metadata
  so they render as native voice bubbles instead of file attachments
- Use .mp3 output path for TTS so edge-tts opus conversion works
  correctly (edge always outputs mp3, convert was skipped for .ogg)
- Use actual file_path from TTS result after potential opus conversion

											
										
										
											2026-03-11 00:24:29 +03:00
+								                    "audio_path": actual_path,
-												feat: add /voice slash command to Discord + fix cross-platform send_voice

- Register /voice as Discord slash command with mode choices
- Fix _send_voice_reply to handle adapters that don't accept metadata
  parameter (Discord) by inspecting the method signature at runtime

											
										
										
											2026-03-10 23:37:02 +03:00
+								                    "reply_to": event.message_id,
 								                }
 								                if event.source.thread_id:
 								                    send_kwargs["metadata"] = {"thread_id": event.source.thread_id}
 								                await adapter.send_voice(**send_kwargs)
-												fix: 8 voice pipeline bugs with tests proving each fix

1. VoiceReceiver.stop() now acquires _lock before clearing shared state
   to prevent race with _on_packet on the socket reader thread
2. _packet_debug_count moved from class-level to instance-level to avoid
   cross-instance race condition in multi-guild setups
3. play_in_voice_channel uses asyncio.get_running_loop() instead of
   deprecated asyncio.get_event_loop()
4. _send_voice_reply uses uuid for filenames instead of time-based names
   that can collide when two replies happen in the same second
5. Voice timeout now notifies runner via _on_voice_disconnect callback
   so runner cleans up _voice_mode state (prevents orphaned TTS replies)
6. play_in_voice_channel adds PLAYBACK_TIMEOUT (120s) to prevent
   infinite blocking when FFmpeg callback is never called
7. _send_voice_reply moves temp file cleanup to finally block so files
   are always cleaned up even when send_voice/play raises
8. Base adapter auto-TTS wraps play_tts in try/finally with os.remove
   to clean up generated audio files after playback

18 new tests (120 total voice tests)

											
										
										
											2026-03-11 23:57:42 +03:00
+								        except Exception as e:
 								            logger.warning("Auto voice reply failed: %s", e, exc_info=True)
 								        finally:
 								            for p in {audio_path, actual_path} - {None}:
-												fix: Discord voice bubble + edge-tts mp3/ogg format mismatch

- Send Discord voice messages with flags=8192 and waveform metadata
  so they render as native voice bubbles instead of file attachments
- Use .mp3 output path for TTS so edge-tts opus conversion works
  correctly (edge always outputs mp3, convert was skipped for .ogg)
- Use actual file_path from TTS result after potential opus conversion

											
										
										
											2026-03-11 00:24:29 +03:00
+								                try:
 								                    os.unlink(p)
 								                except OSError:
 								                    pass
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
-												fix(gateway): deliver MEDIA: files after streaming responses

When streaming is enabled, text chunks are sent to the user in
real-time including raw MEDIA: tags. The normal post-processing in
_process_message_background is skipped when already_sent=True, so
MEDIA: files were never extracted or delivered — the user just saw
the raw MEDIA:/path/to/file text.

Fix: after streaming completes, extract MEDIA: tags and local file
paths from the response and deliver them via the platform adapter.
The text is already sent (with the raw tag visible in the stream),
but the actual files now get delivered as attachments.

											
										
										
											2026-03-21 16:01:25 -07:00
+								    async def _deliver_media_from_response(
 								        self,
 								        response: str,
 								        event: MessageEvent,
 								        adapter,
 								    ) -> None:
 								        """Extract MEDIA: tags and local file paths from a response and deliver them.
 								        Called after streaming has already sent the text to the user, so the
 								        text itself is already delivered — this only handles file attachments
 								        that the normal _process_message_background path would have caught.
 								        """
 								        from pathlib import Path
 								        try:
 								            media_files, _ = adapter.extract_media(response)
 								            _, cleaned = adapter.extract_images(response)
 								            local_files, _ = adapter.extract_local_files(cleaned)
 								            _thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None
 								            _AUDIO_EXTS = {'.ogg', '.opus', '.mp3', '.wav', '.m4a'}
 								            _VIDEO_EXTS = {'.mp4', '.mov', '.avi', '.mkv', '.webm', '.3gp'}
 								            _IMAGE_EXTS = {'.jpg', '.jpeg', '.png', '.webp', '.gif'}
 								            for media_path, is_voice in media_files:
 								                try:
 								                    ext = Path(media_path).suffix.lower()
 								                    if ext in _AUDIO_EXTS:
 								                        await adapter.send_voice(
 								                            chat_id=event.source.chat_id,
 								                            audio_path=media_path,
 								                            metadata=_thread_meta,
 								                        )
 								                    elif ext in _VIDEO_EXTS:
 								                        await adapter.send_video(
 								                            chat_id=event.source.chat_id,
 								                            video_path=media_path,
 								                            metadata=_thread_meta,
 								                        )
 								                    elif ext in _IMAGE_EXTS:
 								                        await adapter.send_image_file(
 								                            chat_id=event.source.chat_id,
 								                            image_path=media_path,
 								                            metadata=_thread_meta,
 								                        )
 								                    else:
 								                        await adapter.send_document(
 								                            chat_id=event.source.chat_id,
 								                            file_path=media_path,
 								                            metadata=_thread_meta,
 								                        )
 								                except Exception as e:
 								                    logger.warning("[%s] Post-stream media delivery failed: %s", adapter.name, e)
 								            for file_path in local_files:
 								                try:
 								                    ext = Path(file_path).suffix.lower()
 								                    if ext in _IMAGE_EXTS:
 								                        await adapter.send_image_file(
 								                            chat_id=event.source.chat_id,
 								                            image_path=file_path,
 								                            metadata=_thread_meta,
 								                        )
 								                    else:
 								                        await adapter.send_document(
 								                            chat_id=event.source.chat_id,
 								                            file_path=file_path,
 								                            metadata=_thread_meta,
 								                        )
 								                except Exception as e:
 								                    logger.warning("[%s] Post-stream file delivery failed: %s", adapter.name, e)
 								        except Exception as e:
 								            logger.warning("Post-stream media extraction failed: %s", e)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								    async def _handle_rollback_command(self, event: MessageEvent) -> str:
 								        """Handle /rollback command — list or restore filesystem checkpoints."""
 								        from tools.checkpoint_manager import CheckpointManager, format_checkpoint_list
 								        # Read checkpoint config from config.yaml
 								        cp_cfg = {}
 								        try:
 								            import yaml as _y
 								            _cfg_path = _hermes_home / "config.yaml"
 								            if _cfg_path.exists():
 								                with open(_cfg_path, encoding="utf-8") as _f:
 								                    _data = _y.safe_load(_f) or {}
 								                cp_cfg = _data.get("checkpoints", {})
 								                if isinstance(cp_cfg, bool):
 								                    cp_cfg = {"enabled": cp_cfg}
 								        except Exception:
 								            pass
 								        if not cp_cfg.get("enabled", False):
 								            return (
 								                "Checkpoints are not enabled.\n"
 								                "Enable in config.yaml:\n```\ncheckpoints:\n  enabled: true\n```"
 								            )
 								        mgr = CheckpointManager(
 								            enabled=True,
 								            max_snapshots=cp_cfg.get("max_snapshots", 50),
 								        )
 								        cwd = os.getenv("MESSAGING_CWD", str(Path.home()))
 								        arg = event.get_command_args().strip()
 								        if not arg:
 								            checkpoints = mgr.list_checkpoints(cwd)
 								            return format_checkpoint_list(checkpoints, cwd)
 								        # Restore by number or hash
 								        checkpoints = mgr.list_checkpoints(cwd)
 								        if not checkpoints:
 								            return f"No checkpoints found for {cwd}"
 								        target_hash = None
 								        try:
 								            idx = int(arg) - 1
 								            if 0 <= idx < len(checkpoints):
 								                target_hash = checkpoints[idx]["hash"]
 								            else:
 								                return f"Invalid checkpoint number. Use 1-{len(checkpoints)}."
 								        except ValueError:
 								            target_hash = arg
 								        result = mgr.restore(cwd, target_hash)
 								        if result["success"]:
 								            return (
 								                f"✅ Restored to checkpoint {result['restored_to']}: {result['reason']}\n"
 								                f"A pre-rollback snapshot was saved automatically."
 								            )
 								        return f"❌ {result['error']}"
 								    async def _handle_background_command(self, event: MessageEvent) -> str:
 								        """Handle /background <prompt> — run a prompt in a separate background session.
 								        Spawns a new AIAgent in a background thread with its own session.
 								        When it completes, sends the result back to the same chat without
 								        modifying the active session's conversation history.
 								        """
 								        prompt = event.get_command_args().strip()
 								        if not prompt:
 								            return (
 								                "Usage: /background <prompt>\n"
 								                "Example: /background Summarize the top HN stories today\n\n"
 								                "Runs the prompt in a separate session. "
 								                "You can keep chatting — the result will appear here when done."
 								            )
 								        source = event.source
 								        task_id = f"bg_{datetime.now().strftime('%H%M%S')}_{os.urandom(3).hex()}"
 								        # Fire-and-forget the background task
-												fix(gateway): track background task references in GatewayRunner (#3254)

Asyncio tasks created with create_task() but never stored can be
garbage collected mid-execution. Add self._background_tasks set to
hold references, with add_done_callback cleanup. Tracks:
- /background command task
- session-reset memory flush task
- session-resume memory flush task
Cancel all pending tasks in stop().

Update test fixtures that construct GatewayRunner via object.__new__()
to include the new _background_tasks attribute.

Cherry-picked from PR #3167 by memosr. The original PR also deleted
the DM topic auto-skill loading code — that deletion was excluded
from this salvage as it removes a shipped feature (#2598).

Co-authored-by: memosr.eth <96793918+memosr@users.noreply.github.com>
											
										
										
											2026-03-26 14:33:48 -07:00
+								        _task = asyncio.create_task(
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            self._run_background_task(prompt, source, task_id)
 								        )
-												fix(gateway): track background task references in GatewayRunner (#3254)

Asyncio tasks created with create_task() but never stored can be
garbage collected mid-execution. Add self._background_tasks set to
hold references, with add_done_callback cleanup. Tracks:
- /background command task
- session-reset memory flush task
- session-resume memory flush task
Cancel all pending tasks in stop().

Update test fixtures that construct GatewayRunner via object.__new__()
to include the new _background_tasks attribute.

Cherry-picked from PR #3167 by memosr. The original PR also deleted
the DM topic auto-skill loading code — that deletion was excluded
from this salvage as it removes a shipped feature (#2598).

Co-authored-by: memosr.eth <96793918+memosr@users.noreply.github.com>
											
										
										
											2026-03-26 14:33:48 -07:00
+								        self._background_tasks.add(_task)
 								        _task.add_done_callback(self._background_tasks.discard)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
 								        preview = prompt[:60] + ("..." if len(prompt) > 60 else "")
 								        return f'🔄 Background task started: "{preview}"\nTask ID: {task_id}\nYou can keep chatting — results will appear when done.'
 								    async def _run_background_task(
 								        self, prompt: str, source: "SessionSource", task_id: str
 								    ) -> None:
 								        """Execute a background agent task and deliver the result to the chat."""
 								        from run_agent import AIAgent
 								        adapter = self.adapters.get(source.platform)
 								        if not adapter:
 								            logger.warning("No adapter for platform %s in background task %s", source.platform, task_id)
 								            return
 								        _thread_metadata = {"thread_id": source.thread_id} if source.thread_id else None
 								        try:
 								            runtime_kwargs = _resolve_runtime_agent_kwargs()
 								            if not runtime_kwargs.get("api_key"):
 								                await adapter.send(
 								                    source.chat_id,
 								                    f"❌ Background task {task_id} failed: no provider credentials configured.",
 								                    metadata=_thread_metadata,
 								                )
 								                return
-												fix: MCP toolset resolution for runtime and config (#3252)

Gateway sessions had their own inline toolset resolution that only read
platform_toolsets from config, which never includes MCP server names.
MCP tools were discovered and registered but invisible to the model.

- Replace duplicated gateway toolset resolution in _run_agent() and
  _run_background_task() with calls to the shared _get_platform_tools()
- Extend _get_platform_tools() to include globally enabled MCP servers
  at runtime (include_default_mcp_servers=True), while config-editing
  flows use include_default_mcp_servers=False to avoid persisting
  implicit MCP defaults into platform_toolsets
- Add homeassistant to PLATFORMS dict (was missing, caused KeyError)
- Fix CLI entry point to use _get_platform_tools() as well, so MCP
  tools are visible in CLI mode too
- Remove redundant platform_key reassignment in _run_background_task

Co-authored-by: kshitijk4poor <kshitijk4poor@users.noreply.github.com>
											
										
										
											2026-03-26 13:39:41 -07:00
+								            user_config = _load_gateway_config()
 								            model = _resolve_gateway_model(user_config)
 								            platform_key = _platform_config_key(source.platform)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
-												fix: MCP toolset resolution for runtime and config (#3252)

Gateway sessions had their own inline toolset resolution that only read
platform_toolsets from config, which never includes MCP server names.
MCP tools were discovered and registered but invisible to the model.

- Replace duplicated gateway toolset resolution in _run_agent() and
  _run_background_task() with calls to the shared _get_platform_tools()
- Extend _get_platform_tools() to include globally enabled MCP servers
  at runtime (include_default_mcp_servers=True), while config-editing
  flows use include_default_mcp_servers=False to avoid persisting
  implicit MCP defaults into platform_toolsets
- Add homeassistant to PLATFORMS dict (was missing, caused KeyError)
- Fix CLI entry point to use _get_platform_tools() as well, so MCP
  tools are visible in CLI mode too
- Remove redundant platform_key reassignment in _run_background_task

Co-authored-by: kshitijk4poor <kshitijk4poor@users.noreply.github.com>
											
										
										
											2026-03-26 13:39:41 -07:00
+								            from hermes_cli.tools_config import _get_platform_tools
 								            enabled_toolsets = sorted(_get_platform_tools(user_config, platform_key))
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
 								            pr = self._provider_routing
 								            max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "90"))
-												feat(gateway): add reasoning hot reload

Add a /reasoning command across gateway adapters so users can
inspect or change reasoning effort without editing config by hand.

Reload reasoning settings from config.yaml before each agent run,
including background tasks, so the next message picks up the new
value consistently.

											
										
										
											2026-03-11 22:12:11 +08:00
+								            reasoning_config = self._load_reasoning_config()
 								            self._reasoning_config = reasoning_config
-												fix: hermes update causes dual gateways on macOS (launchd) (#1567)

* feat: add optional smart model routing

Add a conservative cheap-vs-strong routing option that can send very short/simple turns to a cheaper model across providers while keeping the primary model for complex work. Wire it through CLI, gateway, and cron, and document the config.yaml workflow.

* fix(gateway): remove recursive ExecStop from systemd units, extend TimeoutStopSec to 60s

* fix(gateway): avoid recursive ExecStop in user systemd unit

* fix: extend ExecStop removal and TimeoutStopSec=60 to system unit

The cherry-picked PR #1448 fix only covered the user systemd unit.
The system unit had the same TimeoutStopSec=15 and could benefit
from the same 60s timeout for clean shutdown. Also adds a regression
test for the system unit.

---------

Co-authored-by: Ninja <ninja@local>

* feat(skills): add blender-mcp optional skill for 3D modeling

Control a running Blender instance from Hermes via socket connection
to the blender-mcp addon (port 9876). Supports creating 3D objects,
materials, animations, and running arbitrary bpy code.

Placed in optional-skills/ since it requires Blender 4.3+ desktop
with a third-party addon manually started each session.

* feat(acp): support slash commands in ACP adapter (#1532)

Adds /help, /model, /tools, /context, /reset, /compact, /version
to the ACP adapter (VS Code, Zed, JetBrains). Commands are handled
directly in the server without instantiating the TUI — each command
queries agent/session state and returns plain text.

Unrecognized /commands fall through to the LLM as normal messages.

/model uses detect_provider_for_model() for auto-detection when
switching models, matching the CLI and gateway behavior.

Fixes #1402

* fix(logging): improve error logging in session search tool (#1533)

* fix(gateway): restart on retryable startup failures (#1517)

* feat(email): add skip_attachments option via config.yaml

* feat(email): add skip_attachments option via config.yaml

Adds a config.yaml-driven option to skip email attachments in the
gateway email adapter. Useful for malware protection and bandwidth
savings.

Configure in config.yaml:
  platforms:
    email:
      skip_attachments: true

Based on PR #1521 by @an420eth, changed from env var to config.yaml
(via PlatformConfig.extra) to match the project's config-first pattern.

* docs: document skip_attachments option for email adapter

* fix(telegram): retry on transient TLS failures during connect and send

Add exponential-backoff retry (3 attempts) around initialize() to
handle transient TLS resets during gateway startup. Also catches
TimedOut and OSError in addition to NetworkError.

Add exponential-backoff retry (3 attempts) around send_message() for
NetworkError during message delivery, wrapping the existing Markdown
fallback logic.

Both imports are guarded with try/except ImportError for test
environments where telegram is mocked.

Based on PR #1527 by cmd8. Closes #1526.

* feat: permissive block_anchor thresholds and unicode normalization (#1539)

Salvaged from PR #1528 by an420eth. Closes #517.

Improves _strategy_block_anchor in fuzzy_match.py:
- Add unicode normalization (smart quotes, em/en-dashes, ellipsis,
  non-breaking spaces → ASCII) so LLM-produced unicode artifacts
  don't break anchor line matching
- Lower thresholds: 0.10 for unique matches (was 0.70), 0.30 for
  multiple candidates — if first/last lines match exactly, the
  block is almost certainly correct
- Use original (non-normalized) content for offset calculation to
  preserve correct character positions

Tested: 3 new scenarios fixed (em-dash anchors, non-breaking space
anchors, very-low-similarity unique matches), zero regressions on
all 9 existing fuzzy match tests.

Co-authored-by: an420eth <an420eth@users.noreply.github.com>

* feat(cli): add file path autocomplete in the input prompt (#1545)

When typing a path-like token (./  ../  ~/  /  or containing /),
the CLI now shows filesystem completions in the dropdown menu.
Directories show a trailing slash and 'dir' label; files show
their size. Completions are case-insensitive and capped at 30
entries.

Triggered by tokens like:
  edit ./src/ma     → shows ./src/main.py, ./src/manifest.json, ...
  check ~/doc       → shows ~/docs/, ~/documents/, ...
  read /etc/hos     → shows /etc/hosts, /etc/hostname, ...
  open tools/reg    → shows tools/registry.py

Slash command autocomplete (/help, /model, etc.) is unaffected —
it still triggers when the input starts with /.

Inspired by OpenCode PR #145 (file path completion menu).

Implementation:
- hermes_cli/commands.py: _extract_path_word() detects path-like
  tokens, _path_completions() yields filesystem Completions with
  size labels, get_completions() routes to paths vs slash commands
- tests/hermes_cli/test_path_completion.py: 26 tests covering
  path extraction, prefix filtering, directory markers, home
  expansion, case-insensitivity, integration with slash commands

* feat(privacy): redact PII from LLM context when privacy.redact_pii is enabled

Add privacy.redact_pii config option (boolean, default false). When
enabled, the gateway redacts personally identifiable information from
the system prompt before sending it to the LLM provider:

- Phone numbers (user IDs on WhatsApp/Signal) → hashed to user_<sha256>
- User IDs → hashed to user_<sha256>
- Chat IDs → numeric portion hashed, platform prefix preserved
- Home channel IDs → hashed
- Names/usernames → NOT affected (user-chosen, publicly visible)

Hashes are deterministic (same user → same hash) so the model can
still distinguish users in group chats. Routing and delivery use
the original values internally — redaction only affects LLM context.

Inspired by OpenClaw PR #47959.

* fix(privacy): skip PII redaction on Discord/Slack (mentions need real IDs)

Discord uses <@user_id> for mentions and Slack uses <@U12345> — the LLM
needs the real ID to tag users. Redaction now only applies to WhatsApp,
Signal, and Telegram where IDs are pure routing metadata.

Add 4 platform-specific tests covering Discord, WhatsApp, Signal, Slack.

* feat: smart approvals + /stop command (inspired by OpenAI Codex)

* feat: smart approvals — LLM-based risk assessment for dangerous commands

Adds a 'smart' approval mode that uses the auxiliary LLM to assess
whether a flagged command is genuinely dangerous or a false positive,
auto-approving low-risk commands without prompting the user.

Inspired by OpenAI Codex's Smart Approvals guardian subagent
(openai/codex#13860).

Config (config.yaml):
  approvals:
    mode: manual   # manual (default), smart, off

Modes:
- manual — current behavior, always prompt the user
- smart  — aux LLM evaluates risk: APPROVE (auto-allow), DENY (block),
           or ESCALATE (fall through to manual prompt)
- off    — skip all approval prompts (equivalent to --yolo)

When smart mode auto-approves, the pattern gets session-level approval
so subsequent uses of the same pattern don't trigger another LLM call.
When it denies, the command is blocked without user prompt. When
uncertain, it escalates to the normal manual approval flow.

The LLM prompt is carefully scoped: it sees only the command text and
the flagged reason, assesses actual risk vs false positive, and returns
a single-word verdict.

* feat: make smart approval model configurable via config.yaml

Adds auxiliary.approval section to config.yaml with the same
provider/model/base_url/api_key pattern as other aux tasks (vision,
web_extract, compression, etc.).

Config:
  auxiliary:
    approval:
      provider: auto
      model: ''        # fast/cheap model recommended
      base_url: ''
      api_key: ''

Bridged to env vars in both CLI and gateway paths so the aux client
picks them up automatically.

* feat: add /stop command to kill all background processes

Adds a /stop slash command that kills all running background processes
at once. Currently users have to process(list) then process(kill) for
each one individually.

Inspired by OpenAI Codex's separation of interrupt (Ctrl+C stops current
turn) from /stop (cleans up background processes). See openai/codex#14602.

Ctrl+C continues to only interrupt the active agent turn — background
dev servers, watchers, etc. are preserved. /stop is the explicit way
to clean them all up.

* feat: first-class plugin architecture + hide status bar cost by default (#1544)

The persistent status bar now shows context %, token counts, and
duration but NOT $ cost by default. Cost display is opt-in via:

  display:
    show_cost: true

in config.yaml, or: hermes config set display.show_cost true

The /usage command still shows full cost breakdown since the user
explicitly asked for it — this only affects the always-visible bar.

Status bar without cost:
  ⚕ claude-sonnet-4 │ 12K/200K │ 6% │ 15m

Status bar with show_cost: true:
  ⚕ claude-sonnet-4 │ 12K/200K │ 6% │ $0.06 │ 15m

* feat: improve memory prioritization + aggressive skill updates (inspired by OpenAI Codex)

* feat: improve memory prioritization — user preferences over procedural knowledge

Inspired by OpenAI Codex's memory prompt improvements (openai/codex#14493)
which focus memory writes on user preferences and recurring patterns
rather than procedural task details.

Key insight: 'Optimize for reducing future user steering — the most
valuable memory prevents the user from having to repeat themselves.'

Changes:
- MEMORY_GUIDANCE (prompt_builder.py): added prioritization hierarchy
  and the core principle about reducing user steering
- MEMORY_SCHEMA (memory_tool.py): reordered WHEN TO SAVE list to put
  corrections first, added explicit PRIORITY guidance
- Memory nudge (run_agent.py): now asks specifically about preferences,
  corrections, and workflow patterns instead of generic 'anything'
- Memory flush (run_agent.py): now instructs to prioritize user
  preferences and corrections over task-specific details

* feat: more aggressive skill creation and update prompting

Press harder on skill updates — the agent should proactively patch
skills when it encounters issues during use, not wait to be asked.

Changes:
- SKILLS_GUIDANCE: 'consider saving' → 'save'; added explicit instruction
  to patch skills immediately when found outdated/wrong
- Skills header: added instruction to update loaded skills before finishing
  if they had missing steps or wrong commands
- Skill nudge: more assertive ('save the approach' not 'consider saving'),
  now also prompts for updating existing skills used in the task
- Skill nudge interval: lowered default from 15 to 10 iterations
- skill_manage schema: added 'patch it immediately' to update triggers

* feat: first-class plugin architecture (#1555)

Plugin system for extending Hermes with custom tools, hooks, and
integrations — no source code changes required.

Core system (hermes_cli/plugins.py):
  - Plugin discovery from ~/.hermes/plugins/, .hermes/plugins/, and
    pip entry_points (hermes_agent.plugins group)
  - PluginContext with register_tool() and register_hook()
  - 6 lifecycle hooks: pre/post tool_call, pre/post llm_call,
    on_session_start/end
  - Namespace package handling for relative imports in plugins
  - Graceful error isolation — broken plugins never crash the agent

Integration (model_tools.py):
  - Plugin discovery runs after built-in + MCP tools
  - Plugin tools bypass toolset filter via get_plugin_tool_names()
  - Pre/post tool call hooks fire in handle_function_call()

CLI:
  - /plugins command shows loaded plugins, tool counts, status
  - Added to COMMANDS dict for autocomplete

Docs:
  - Getting started guide (build-a-hermes-plugin.md) — full tutorial
    building a calculator plugin step by step
  - Reference page (features/plugins.md) — quick overview + tables
  - Covers: file structure, schemas, handlers, hooks, data files,
    bundled skills, env var gating, pip distribution, common mistakes

Tests: 16 tests covering discovery, loading, hooks, tool visibility.

* fix: hermes update causes dual gateways on macOS (launchd)

Three bugs worked together to create the dual-gateway problem:

1. cmd_update only checked systemd for gateway restart, completely
   ignoring launchd on macOS. After killing the PID it would print
   'Restart it with: hermes gateway run' even when launchd was about
   to auto-respawn the process.

2. launchd's KeepAlive.SuccessfulExit=false respawns the gateway
   after SIGTERM (non-zero exit), so the user's manual restart
   created a second instance.

3. The launchd plist lacked --replace (systemd had it), so the
   respawned gateway didn't kill stale instances on startup.

Fixes:
- Add --replace to launchd ProgramArguments (matches systemd)
- Add launchd detection to cmd_update's auto-restart logic
- Print 'auto-restart via launchd' instead of manual restart hint

* fix: add launchd plist auto-refresh + explicit restart in cmd_update

Two integration issues with the initial fix:

1. Existing macOS users with old plist (no --replace) would never
   get the fix until manual uninstall/reinstall. Added
   refresh_launchd_plist_if_needed() — mirrors the existing
   refresh_systemd_unit_if_needed(). Called from launchd_start(),
   launchd_restart(), and cmd_update.

2. cmd_update relied on KeepAlive respawn after SIGTERM rather than
   explicit launchctl stop/start. This caused races: launchd would
   respawn the old process before the PID file was cleaned up.
   Now does explicit stop+start (matching how systemd gets an
   explicit systemctl restart), with plist refresh first so the
   new --replace flag is picked up.

---------

Co-authored-by: Ninja <ninja@local>
Co-authored-by: alireza78a <alireza78a@users.noreply.github.com>
Co-authored-by: Oktay Aydin <113846926+aydnOktay@users.noreply.github.com>
Co-authored-by: JP Lew <polydegen@protonmail.com>
Co-authored-by: an420eth <an420eth@users.noreply.github.com>
											
										
										
											2026-03-16 12:36:29 -07:00
+								            turn_route = self._resolve_turn_agent_config(prompt, model, runtime_kwargs)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
 								            def run_sync():
 								                agent = AIAgent(
-												fix: hermes update causes dual gateways on macOS (launchd) (#1567)

* feat: add optional smart model routing

Add a conservative cheap-vs-strong routing option that can send very short/simple turns to a cheaper model across providers while keeping the primary model for complex work. Wire it through CLI, gateway, and cron, and document the config.yaml workflow.

* fix(gateway): remove recursive ExecStop from systemd units, extend TimeoutStopSec to 60s

* fix(gateway): avoid recursive ExecStop in user systemd unit

* fix: extend ExecStop removal and TimeoutStopSec=60 to system unit

The cherry-picked PR #1448 fix only covered the user systemd unit.
The system unit had the same TimeoutStopSec=15 and could benefit
from the same 60s timeout for clean shutdown. Also adds a regression
test for the system unit.

---------

Co-authored-by: Ninja <ninja@local>

* feat(skills): add blender-mcp optional skill for 3D modeling

Control a running Blender instance from Hermes via socket connection
to the blender-mcp addon (port 9876). Supports creating 3D objects,
materials, animations, and running arbitrary bpy code.

Placed in optional-skills/ since it requires Blender 4.3+ desktop
with a third-party addon manually started each session.

* feat(acp): support slash commands in ACP adapter (#1532)

Adds /help, /model, /tools, /context, /reset, /compact, /version
to the ACP adapter (VS Code, Zed, JetBrains). Commands are handled
directly in the server without instantiating the TUI — each command
queries agent/session state and returns plain text.

Unrecognized /commands fall through to the LLM as normal messages.

/model uses detect_provider_for_model() for auto-detection when
switching models, matching the CLI and gateway behavior.

Fixes #1402

* fix(logging): improve error logging in session search tool (#1533)

* fix(gateway): restart on retryable startup failures (#1517)

* feat(email): add skip_attachments option via config.yaml

* feat(email): add skip_attachments option via config.yaml

Adds a config.yaml-driven option to skip email attachments in the
gateway email adapter. Useful for malware protection and bandwidth
savings.

Configure in config.yaml:
  platforms:
    email:
      skip_attachments: true

Based on PR #1521 by @an420eth, changed from env var to config.yaml
(via PlatformConfig.extra) to match the project's config-first pattern.

* docs: document skip_attachments option for email adapter

* fix(telegram): retry on transient TLS failures during connect and send

Add exponential-backoff retry (3 attempts) around initialize() to
handle transient TLS resets during gateway startup. Also catches
TimedOut and OSError in addition to NetworkError.

Add exponential-backoff retry (3 attempts) around send_message() for
NetworkError during message delivery, wrapping the existing Markdown
fallback logic.

Both imports are guarded with try/except ImportError for test
environments where telegram is mocked.

Based on PR #1527 by cmd8. Closes #1526.

* feat: permissive block_anchor thresholds and unicode normalization (#1539)

Salvaged from PR #1528 by an420eth. Closes #517.

Improves _strategy_block_anchor in fuzzy_match.py:
- Add unicode normalization (smart quotes, em/en-dashes, ellipsis,
  non-breaking spaces → ASCII) so LLM-produced unicode artifacts
  don't break anchor line matching
- Lower thresholds: 0.10 for unique matches (was 0.70), 0.30 for
  multiple candidates — if first/last lines match exactly, the
  block is almost certainly correct
- Use original (non-normalized) content for offset calculation to
  preserve correct character positions

Tested: 3 new scenarios fixed (em-dash anchors, non-breaking space
anchors, very-low-similarity unique matches), zero regressions on
all 9 existing fuzzy match tests.

Co-authored-by: an420eth <an420eth@users.noreply.github.com>

* feat(cli): add file path autocomplete in the input prompt (#1545)

When typing a path-like token (./  ../  ~/  /  or containing /),
the CLI now shows filesystem completions in the dropdown menu.
Directories show a trailing slash and 'dir' label; files show
their size. Completions are case-insensitive and capped at 30
entries.

Triggered by tokens like:
  edit ./src/ma     → shows ./src/main.py, ./src/manifest.json, ...
  check ~/doc       → shows ~/docs/, ~/documents/, ...
  read /etc/hos     → shows /etc/hosts, /etc/hostname, ...
  open tools/reg    → shows tools/registry.py

Slash command autocomplete (/help, /model, etc.) is unaffected —
it still triggers when the input starts with /.

Inspired by OpenCode PR #145 (file path completion menu).

Implementation:
- hermes_cli/commands.py: _extract_path_word() detects path-like
  tokens, _path_completions() yields filesystem Completions with
  size labels, get_completions() routes to paths vs slash commands
- tests/hermes_cli/test_path_completion.py: 26 tests covering
  path extraction, prefix filtering, directory markers, home
  expansion, case-insensitivity, integration with slash commands

* feat(privacy): redact PII from LLM context when privacy.redact_pii is enabled

Add privacy.redact_pii config option (boolean, default false). When
enabled, the gateway redacts personally identifiable information from
the system prompt before sending it to the LLM provider:

- Phone numbers (user IDs on WhatsApp/Signal) → hashed to user_<sha256>
- User IDs → hashed to user_<sha256>
- Chat IDs → numeric portion hashed, platform prefix preserved
- Home channel IDs → hashed
- Names/usernames → NOT affected (user-chosen, publicly visible)

Hashes are deterministic (same user → same hash) so the model can
still distinguish users in group chats. Routing and delivery use
the original values internally — redaction only affects LLM context.

Inspired by OpenClaw PR #47959.

* fix(privacy): skip PII redaction on Discord/Slack (mentions need real IDs)

Discord uses <@user_id> for mentions and Slack uses <@U12345> — the LLM
needs the real ID to tag users. Redaction now only applies to WhatsApp,
Signal, and Telegram where IDs are pure routing metadata.

Add 4 platform-specific tests covering Discord, WhatsApp, Signal, Slack.

* feat: smart approvals + /stop command (inspired by OpenAI Codex)

* feat: smart approvals — LLM-based risk assessment for dangerous commands

Adds a 'smart' approval mode that uses the auxiliary LLM to assess
whether a flagged command is genuinely dangerous or a false positive,
auto-approving low-risk commands without prompting the user.

Inspired by OpenAI Codex's Smart Approvals guardian subagent
(openai/codex#13860).

Config (config.yaml):
  approvals:
    mode: manual   # manual (default), smart, off

Modes:
- manual — current behavior, always prompt the user
- smart  — aux LLM evaluates risk: APPROVE (auto-allow), DENY (block),
           or ESCALATE (fall through to manual prompt)
- off    — skip all approval prompts (equivalent to --yolo)

When smart mode auto-approves, the pattern gets session-level approval
so subsequent uses of the same pattern don't trigger another LLM call.
When it denies, the command is blocked without user prompt. When
uncertain, it escalates to the normal manual approval flow.

The LLM prompt is carefully scoped: it sees only the command text and
the flagged reason, assesses actual risk vs false positive, and returns
a single-word verdict.

* feat: make smart approval model configurable via config.yaml

Adds auxiliary.approval section to config.yaml with the same
provider/model/base_url/api_key pattern as other aux tasks (vision,
web_extract, compression, etc.).

Config:
  auxiliary:
    approval:
      provider: auto
      model: ''        # fast/cheap model recommended
      base_url: ''
      api_key: ''

Bridged to env vars in both CLI and gateway paths so the aux client
picks them up automatically.

* feat: add /stop command to kill all background processes

Adds a /stop slash command that kills all running background processes
at once. Currently users have to process(list) then process(kill) for
each one individually.

Inspired by OpenAI Codex's separation of interrupt (Ctrl+C stops current
turn) from /stop (cleans up background processes). See openai/codex#14602.

Ctrl+C continues to only interrupt the active agent turn — background
dev servers, watchers, etc. are preserved. /stop is the explicit way
to clean them all up.

* feat: first-class plugin architecture + hide status bar cost by default (#1544)

The persistent status bar now shows context %, token counts, and
duration but NOT $ cost by default. Cost display is opt-in via:

  display:
    show_cost: true

in config.yaml, or: hermes config set display.show_cost true

The /usage command still shows full cost breakdown since the user
explicitly asked for it — this only affects the always-visible bar.

Status bar without cost:
  ⚕ claude-sonnet-4 │ 12K/200K │ 6% │ 15m

Status bar with show_cost: true:
  ⚕ claude-sonnet-4 │ 12K/200K │ 6% │ $0.06 │ 15m

* feat: improve memory prioritization + aggressive skill updates (inspired by OpenAI Codex)

* feat: improve memory prioritization — user preferences over procedural knowledge

Inspired by OpenAI Codex's memory prompt improvements (openai/codex#14493)
which focus memory writes on user preferences and recurring patterns
rather than procedural task details.

Key insight: 'Optimize for reducing future user steering — the most
valuable memory prevents the user from having to repeat themselves.'

Changes:
- MEMORY_GUIDANCE (prompt_builder.py): added prioritization hierarchy
  and the core principle about reducing user steering
- MEMORY_SCHEMA (memory_tool.py): reordered WHEN TO SAVE list to put
  corrections first, added explicit PRIORITY guidance
- Memory nudge (run_agent.py): now asks specifically about preferences,
  corrections, and workflow patterns instead of generic 'anything'
- Memory flush (run_agent.py): now instructs to prioritize user
  preferences and corrections over task-specific details

* feat: more aggressive skill creation and update prompting

Press harder on skill updates — the agent should proactively patch
skills when it encounters issues during use, not wait to be asked.

Changes:
- SKILLS_GUIDANCE: 'consider saving' → 'save'; added explicit instruction
  to patch skills immediately when found outdated/wrong
- Skills header: added instruction to update loaded skills before finishing
  if they had missing steps or wrong commands
- Skill nudge: more assertive ('save the approach' not 'consider saving'),
  now also prompts for updating existing skills used in the task
- Skill nudge interval: lowered default from 15 to 10 iterations
- skill_manage schema: added 'patch it immediately' to update triggers

* feat: first-class plugin architecture (#1555)

Plugin system for extending Hermes with custom tools, hooks, and
integrations — no source code changes required.

Core system (hermes_cli/plugins.py):
  - Plugin discovery from ~/.hermes/plugins/, .hermes/plugins/, and
    pip entry_points (hermes_agent.plugins group)
  - PluginContext with register_tool() and register_hook()
  - 6 lifecycle hooks: pre/post tool_call, pre/post llm_call,
    on_session_start/end
  - Namespace package handling for relative imports in plugins
  - Graceful error isolation — broken plugins never crash the agent

Integration (model_tools.py):
  - Plugin discovery runs after built-in + MCP tools
  - Plugin tools bypass toolset filter via get_plugin_tool_names()
  - Pre/post tool call hooks fire in handle_function_call()

CLI:
  - /plugins command shows loaded plugins, tool counts, status
  - Added to COMMANDS dict for autocomplete

Docs:
  - Getting started guide (build-a-hermes-plugin.md) — full tutorial
    building a calculator plugin step by step
  - Reference page (features/plugins.md) — quick overview + tables
  - Covers: file structure, schemas, handlers, hooks, data files,
    bundled skills, env var gating, pip distribution, common mistakes

Tests: 16 tests covering discovery, loading, hooks, tool visibility.

* fix: hermes update causes dual gateways on macOS (launchd)

Three bugs worked together to create the dual-gateway problem:

1. cmd_update only checked systemd for gateway restart, completely
   ignoring launchd on macOS. After killing the PID it would print
   'Restart it with: hermes gateway run' even when launchd was about
   to auto-respawn the process.

2. launchd's KeepAlive.SuccessfulExit=false respawns the gateway
   after SIGTERM (non-zero exit), so the user's manual restart
   created a second instance.

3. The launchd plist lacked --replace (systemd had it), so the
   respawned gateway didn't kill stale instances on startup.

Fixes:
- Add --replace to launchd ProgramArguments (matches systemd)
- Add launchd detection to cmd_update's auto-restart logic
- Print 'auto-restart via launchd' instead of manual restart hint

* fix: add launchd plist auto-refresh + explicit restart in cmd_update

Two integration issues with the initial fix:

1. Existing macOS users with old plist (no --replace) would never
   get the fix until manual uninstall/reinstall. Added
   refresh_launchd_plist_if_needed() — mirrors the existing
   refresh_systemd_unit_if_needed(). Called from launchd_start(),
   launchd_restart(), and cmd_update.

2. cmd_update relied on KeepAlive respawn after SIGTERM rather than
   explicit launchctl stop/start. This caused races: launchd would
   respawn the old process before the PID file was cleaned up.
   Now does explicit stop+start (matching how systemd gets an
   explicit systemctl restart), with plist refresh first so the
   new --replace flag is picked up.

---------

Co-authored-by: Ninja <ninja@local>
Co-authored-by: alireza78a <alireza78a@users.noreply.github.com>
Co-authored-by: Oktay Aydin <113846926+aydnOktay@users.noreply.github.com>
Co-authored-by: JP Lew <polydegen@protonmail.com>
Co-authored-by: an420eth <an420eth@users.noreply.github.com>
											
										
										
											2026-03-16 12:36:29 -07:00
+								                    model=turn_route["model"],
 								                    **turn_route["runtime"],
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                    max_iterations=max_iterations,
 								                    quiet_mode=True,
 								                    verbose_logging=False,
 								                    enabled_toolsets=enabled_toolsets,
-												feat(gateway): add reasoning hot reload

Add a /reasoning command across gateway adapters so users can
inspect or change reasoning effort without editing config by hand.

Reload reasoning settings from config.yaml before each agent run,
including background tasks, so the next message picks up the new
value consistently.

											
										
										
											2026-03-11 22:12:11 +08:00
+								                    reasoning_config=reasoning_config,
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                    providers_allowed=pr.get("only"),
 								                    providers_ignored=pr.get("ignore"),
 								                    providers_order=pr.get("order"),
 								                    provider_sort=pr.get("sort"),
 								                    provider_require_parameters=pr.get("require_parameters", False),
 								                    provider_data_collection=pr.get("data_collection"),
 								                    session_id=task_id,
 								                    platform=platform_key,
 								                    session_db=self._session_db,
 								                    fallback_model=self._fallback_model,
 								                )
 								                return agent.run_conversation(
 								                    user_message=prompt,
 								                    task_id=task_id,
 								                )
 								            loop = asyncio.get_event_loop()
 								            result = await loop.run_in_executor(None, run_sync)
 								            response = result.get("final_response", "") if result else ""
 								            if not response and result and result.get("error"):
 								                response = f"Error: {result['error']}"
 								            # Extract media files from the response
 								            if response:
 								                media_files, response = adapter.extract_media(response)
 								                images, text_content = adapter.extract_images(response)
 								                preview = prompt[:60] + ("..." if len(prompt) > 60 else "")
 								                header = f'✅ Background task complete\nPrompt: "{preview}"\n\n'
 								                if text_content:
 								                    await adapter.send(
 								                        chat_id=source.chat_id,
 								                        content=header + text_content,
 								                        metadata=_thread_metadata,
 								                    )
 								                elif not images and not media_files:
 								                    await adapter.send(
 								                        chat_id=source.chat_id,
 								                        content=header + "(No response generated)",
 								                        metadata=_thread_metadata,
 								                    )
 								                # Send extracted images
 								                for image_url, alt_text in (images or []):
 								                    try:
 								                        await adapter.send_image(
 								                            chat_id=source.chat_id,
 								                            image_url=image_url,
 								                            caption=alt_text,
 								                        )
 								                    except Exception:
 								                        pass
 								                # Send media files
 								                for media_path in (media_files or []):
 								                    try:
 								                        await adapter.send_file(
 								                            chat_id=source.chat_id,
 								                            file_path=media_path,
 								                        )
 								                    except Exception:
 								                        pass
 								            else:
 								                preview = prompt[:60] + ("..." if len(prompt) > 60 else "")
 								                await adapter.send(
 								                    chat_id=source.chat_id,
 								                    content=f'✅ Background task complete\nPrompt: "{preview}"\n\n(No response generated)',
 								                    metadata=_thread_metadata,
 								                )
 								        except Exception as e:
 								            logger.exception("Background task %s failed", task_id)
 								            try:
 								                await adapter.send(
 								                    chat_id=source.chat_id,
 								                    content=f"❌ Background task {task_id} failed: {e}",
 								                    metadata=_thread_metadata,
 								                )
 								            except Exception:
 								                pass
 								    async def _handle_reasoning_command(self, event: MessageEvent) -> str:
 								        """Handle /reasoning command — manage reasoning effort and display toggle.
 								        Usage:
 								            /reasoning              Show current effort level and display state
 								            /reasoning <level>      Set reasoning effort (none, low, medium, high, xhigh)
 								            /reasoning show|on      Show model reasoning in responses
 								            /reasoning hide|off     Hide model reasoning from responses
 								        """
 								        import yaml
 								        args = event.get_command_args().strip().lower()
 								        config_path = _hermes_home / "config.yaml"
-												feat(gateway): add reasoning hot reload

Add a /reasoning command across gateway adapters so users can
inspect or change reasoning effort without editing config by hand.

Reload reasoning settings from config.yaml before each agent run,
including background tasks, so the next message picks up the new
value consistently.

											
										
										
											2026-03-11 22:12:11 +08:00
+								        self._reasoning_config = self._load_reasoning_config()
 								        self._show_reasoning = self._load_show_reasoning()
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
 								        def _save_config_key(key_path: str, value):
 								            """Save a dot-separated key to config.yaml."""
 								            try:
 								                user_config = {}
 								                if config_path.exists():
 								                    with open(config_path, encoding="utf-8") as f:
 								                        user_config = yaml.safe_load(f) or {}
 								                keys = key_path.split(".")
 								                current = user_config
 								                for k in keys[:-1]:
 								                    if k not in current or not isinstance(current[k], dict):
 								                        current[k] = {}
 								                    current = current[k]
 								                current[keys[-1]] = value
 								                with open(config_path, "w", encoding="utf-8") as f:
 								                    yaml.dump(user_config, f, default_flow_style=False, sort_keys=False)
 								                return True
 								            except Exception as e:
 								                logger.error("Failed to save config key %s: %s", key_path, e)
 								                return False
 								        if not args:
 								            # Show current state
 								            rc = self._reasoning_config
 								            if rc is None:
 								                level = "medium (default)"
 								            elif rc.get("enabled") is False:
 								                level = "none (disabled)"
 								            else:
 								                level = rc.get("effort", "medium")
 								            display_state = "on ✓" if self._show_reasoning else "off"
 								            return (
 								                "🧠 **Reasoning Settings**\n\n"
 								                f"**Effort:** `{level}`\n"
 								                f"**Display:** {display_state}\n\n"
 								                "_Usage:_ `/reasoning <none|low|medium|high|xhigh|show|hide>`"
 								            )
 								        # Display toggle
 								        if args in ("show", "on"):
 								            self._show_reasoning = True
 								            _save_config_key("display.show_reasoning", True)
 								            return "🧠 ✓ Reasoning display: **ON**\nModel thinking will be shown before each response."
 								        if args in ("hide", "off"):
 								            self._show_reasoning = False
 								            _save_config_key("display.show_reasoning", False)
 								            return "🧠 ✓ Reasoning display: **OFF**"
 								        # Effort level change
 								        effort = args.strip()
 								        if effort == "none":
 								            parsed = {"enabled": False}
 								        elif effort in ("xhigh", "high", "medium", "low", "minimal"):
 								            parsed = {"enabled": True, "effort": effort}
 								        else:
 								            return (
 								                f"⚠️ Unknown argument: `{effort}`\n\n"
 								                "**Valid levels:** none, low, minimal, medium, high, xhigh\n"
 								                "**Display:** show, hide"
 								            )
 								        self._reasoning_config = parsed
 								        if _save_config_key("agent.reasoning_effort", effort):
 								            return f"🧠 ✓ Reasoning effort set to `{effort}` (saved to config)\n_(takes effect on next message)_"
 								        else:
 								            return f"🧠 ✓ Reasoning effort set to `{effort}` (this session only)"
-												feat: config-gated /verbose command for messaging gateway (#3262)

* feat: config-gated /verbose command for messaging gateway

Add gateway_config_gate field to CommandDef, allowing cli_only commands
to be conditionally available in the gateway based on a config value.

- CommandDef gains gateway_config_gate: str | None — a config dotpath
  that, when truthy, overrides cli_only for gateway surfaces
- /verbose uses gateway_config_gate='display.tool_progress_command'
- Default is off (cli_only behavior preserved)
- When enabled, /verbose cycles tool_progress mode (off/new/all/verbose)
  in the gateway, saving to config.yaml — same cycle as the CLI
- Gateway helpers (help, telegram menus, slack mapping) dynamically
  check config to include/exclude config-gated commands
- GATEWAY_KNOWN_COMMANDS always includes config-gated commands so
  the gateway recognizes them and can respond appropriately
- Handles YAML 1.1 bool coercion (bare 'off' parses as False)
- 8 new tests for the config gate mechanism + gateway handler

* docs: document gateway_config_gate and /verbose messaging support

- AGENTS.md: add gateway_config_gate to CommandDef fields
- slash-commands.md: note /verbose can be enabled for messaging, update Notes
- configuration.md: add tool_progress_command to display section + usage note
- cli.md: cross-link to config docs for messaging enablement
- messaging/index.md: show tool_progress_command in config snippet
- plugins.md: add gateway_config_gate to register_command parameter table
											
										
										
											2026-03-26 14:41:04 -07:00
+								    async def _handle_verbose_command(self, event: MessageEvent) -> str:
 								        """Handle /verbose command — cycle tool progress display mode.
 								        Gated by ``display.tool_progress_command`` in config.yaml (default off).
 								        When enabled, cycles the tool progress mode through off → new → all →
 								        verbose → off, same as the CLI.
 								        """
 								        import yaml
 								        config_path = _hermes_home / "config.yaml"
 								        # --- check config gate ------------------------------------------------
 								        try:
 								            user_config = {}
 								            if config_path.exists():
 								                with open(config_path, encoding="utf-8") as f:
 								                    user_config = yaml.safe_load(f) or {}
 								            gate_enabled = user_config.get("display", {}).get("tool_progress_command", False)
 								        except Exception:
 								            gate_enabled = False
 								        if not gate_enabled:
 								            return (
 								                "The `/verbose` command is not enabled for messaging platforms.\n\n"
 								                "Enable it in `config.yaml`:\n```yaml\n"
 								                "display:\n  tool_progress_command: true\n```"
 								            )
 								        # --- cycle mode -------------------------------------------------------
 								        cycle = ["off", "new", "all", "verbose"]
 								        descriptions = {
 								            "off": "⚙️ Tool progress: **OFF** — no tool activity shown.",
 								            "new": "⚙️ Tool progress: **NEW** — shown when tool changes.",
 								            "all": "⚙️ Tool progress: **ALL** — every tool call shown.",
 								            "verbose": "⚙️ Tool progress: **VERBOSE** — full args and results.",
 								        }
 								        raw_progress = user_config.get("display", {}).get("tool_progress", "all")
 								        # YAML 1.1 parses bare "off" as boolean False — normalise back
 								        if raw_progress is False:
 								            current = "off"
 								        elif raw_progress is True:
 								            current = "all"
 								        else:
 								            current = str(raw_progress).lower()
 								        if current not in cycle:
 								            current = "all"
 								        idx = (cycle.index(current) + 1) % len(cycle)
 								        new_mode = cycle[idx]
 								        # Save to config.yaml
 								        try:
 								            if "display" not in user_config or not isinstance(user_config.get("display"), dict):
 								                user_config["display"] = {}
 								            user_config["display"]["tool_progress"] = new_mode
 								            with open(config_path, "w", encoding="utf-8") as f:
 								                yaml.dump(user_config, f, default_flow_style=False, sort_keys=False)
 								            return f"{descriptions[new_mode]}\n_(saved to config — takes effect on next message)_"
 								        except Exception as e:
 								            logger.warning("Failed to save tool_progress mode: %s", e)
 								            return f"{descriptions[new_mode]}\n_(could not save to config: {e})_"
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								    async def _handle_compress_command(self, event: MessageEvent) -> str:
 								        """Handle /compress command -- manually compress conversation context."""
 								        source = event.source
 								        session_entry = self.session_store.get_or_create_session(source)
 								        history = self.session_store.load_transcript(session_entry.session_id)
 								        if not history or len(history) < 4:
 								            return "Not enough conversation to compress (need at least 4 messages)."
 								        try:
 								            from run_agent import AIAgent
 								            from agent.model_metadata import estimate_messages_tokens_rough
 								            runtime_kwargs = _resolve_runtime_agent_kwargs()
 								            if not runtime_kwargs.get("api_key"):
 								                return "No provider configured -- cannot compress."
 								            # Resolve model from config (same reason as memory flush above).
 								            model = _resolve_gateway_model()
 								            msgs = [
 								                {"role": m.get("role"), "content": m.get("content")}
 								                for m in history
 								                if m.get("role") in ("user", "assistant") and m.get("content")
 								            ]
 								            original_count = len(msgs)
 								            approx_tokens = estimate_messages_tokens_rough(msgs)
 								            tmp_agent = AIAgent(
 								                **runtime_kwargs,
 								                model=model,
 								                max_iterations=4,
 								                quiet_mode=True,
 								                enabled_toolsets=["memory"],
 								                session_id=session_entry.session_id,
 								            )
-												fix(gateway): silence background agent terminal output (#3297)

* fix(gateway): silence flush agent terminal output

quiet_mode=True only suppresses AIAgent init messages.
Tool call output still leaks to the terminal through
_safe_print → _print_fn during session reset/expiry.

Since #2670 injected live memory state into the flush prompt,
the flush agent now reliably calls memory tools — making the
output leak noticeable for the first time.

Set _print_fn to a no-op so the background flush is fully silent.

* test(gateway): add test for flush agent terminal silence + fix dotenv mock

- Add TestFlushAgentSilenced: verifies _print_fn is set to a no-op on
  the flush agent so tool output never leaks to the terminal
- Fix pre-existing test failures: replace patch('run_agent.AIAgent')
  with sys.modules mock to avoid importing run_agent (requires openai)
- Add autouse _mock_dotenv fixture so all tests in this file run
  without the dotenv package installed

* fix(display): route KawaiiSpinner output through print_fn to fully silence flush agent

The previous fix set tmp_agent._print_fn = no-op on the flush agent but
spinner output and quiet-mode cute messages bypassed _print_fn entirely:
- KawaiiSpinner captured sys.stdout at __init__ and wrote directly to it
- quiet-mode tool results used builtin print() instead of _safe_print()

Add optional print_fn parameter to KawaiiSpinner.__init__; _write routes
through it when set. Pass self._print_fn to all spinner construction sites
in run_agent.py and change the quiet-mode cute message print to _safe_print.
The existing gateway fix (tmp_agent._print_fn = lambda) now propagates
correctly through both paths.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

* fix(gateway): silence hygiene and compression background agents

Two more background AIAgent instances in the gateway were created with
quiet_mode=True but without _print_fn = no-op, causing tool output to
leak to the terminal:
- _hyg_agent (in-turn hygiene memory agent)
- tmp_agent (_compress_context path)

Apply the same _print_fn no-op pattern used for the flush agent.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

* chore(display): remove unused _last_flush_time from KawaiiSpinner

Attribute was set but never read; upstream already removed it.
Leftover from conflict resolution during rebase onto upstream/main.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

---------

Co-authored-by: Dilee <uzmpsk.dilekakbas@gmail.com>
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
											
										
										
											2026-03-26 17:40:31 -07:00
+								            tmp_agent._print_fn = lambda *a, **kw: None
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
 								            loop = asyncio.get_event_loop()
 								            compressed, _ = await loop.run_in_executor(
 								                None,
-												fix(gateway): preserve transcript on /compress and hygiene compression (salvage #3516) (#3556)

* fix(gateway): preserve full transcript on /compress instead of overwriting

The /compress command calls _compress_context() which correctly ends the
old session (preserving its full transcript in SQLite) and creates a new
session_id for the continuation. However, it then immediately called
rewrite_transcript() on the OLD session_id, overwriting the preserved
transcript with the compressed version — destroying searchable history.

Auto-compression (triggered by context pressure) does not have this bug
because the gateway already handles the session_id swap via the
agent.session_id != session_id check after _run_agent_sync.

Fix: after _compress_context creates the new session, write the compressed
messages into the NEW session_id and update the session store pointer.
The old session's full transcript stays intact and searchable via
session_search.

Before: /compress destroys original messages, session_search can't find
details from compressed portions.

After: /compress behaves like /new for history — full transcript preserved,
compressed context for the live session.

* fix(gateway): preserve transcript on /compress and hygiene compression

Apply session_id swap after _compress_context in both /compress handler
and hygiene pre-compression. _compress_context creates a new session
(ending the old one), but both paths were calling rewrite_transcript on
the OLD session_id — overwriting the preserved transcript and destroying
searchable history.

Now follows the same pattern as the auto-compression handler (lines
5415-5423): detect the new session_id, update the session store entry,
and write compressed messages to the new session.

Also fix FakeCompressAgent test mock to include session_id attribute
and simulate the session_id change that real _compress_context performs.

Co-authored-by: MacroAnarchy <MacroAnarchy@users.noreply.github.com>

---------

Co-authored-by: MacroAnarchy <MacroAnarchy@users.noreply.github.com>
											
										
										
											2026-03-28 12:23:43 -07:00
+								                lambda: tmp_agent._compress_context(msgs, "", approx_tokens=approx_tokens)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            )
-												fix(gateway): preserve transcript on /compress and hygiene compression (salvage #3516) (#3556)

* fix(gateway): preserve full transcript on /compress instead of overwriting

The /compress command calls _compress_context() which correctly ends the
old session (preserving its full transcript in SQLite) and creates a new
session_id for the continuation. However, it then immediately called
rewrite_transcript() on the OLD session_id, overwriting the preserved
transcript with the compressed version — destroying searchable history.

Auto-compression (triggered by context pressure) does not have this bug
because the gateway already handles the session_id swap via the
agent.session_id != session_id check after _run_agent_sync.

Fix: after _compress_context creates the new session, write the compressed
messages into the NEW session_id and update the session store pointer.
The old session's full transcript stays intact and searchable via
session_search.

Before: /compress destroys original messages, session_search can't find
details from compressed portions.

After: /compress behaves like /new for history — full transcript preserved,
compressed context for the live session.

* fix(gateway): preserve transcript on /compress and hygiene compression

Apply session_id swap after _compress_context in both /compress handler
and hygiene pre-compression. _compress_context creates a new session
(ending the old one), but both paths were calling rewrite_transcript on
the OLD session_id — overwriting the preserved transcript and destroying
searchable history.

Now follows the same pattern as the auto-compression handler (lines
5415-5423): detect the new session_id, update the session store entry,
and write compressed messages to the new session.

Also fix FakeCompressAgent test mock to include session_id attribute
and simulate the session_id change that real _compress_context performs.

Co-authored-by: MacroAnarchy <MacroAnarchy@users.noreply.github.com>

---------

Co-authored-by: MacroAnarchy <MacroAnarchy@users.noreply.github.com>
											
										
										
											2026-03-28 12:23:43 -07:00
+								            # _compress_context already calls end_session() on the old session
 								            # (preserving its full transcript in SQLite) and creates a new
 								            # session_id for the continuation.  Write the compressed messages
 								            # into the NEW session so the original history stays searchable.
 								            new_session_id = tmp_agent.session_id
 								            if new_session_id != session_entry.session_id:
 								                session_entry.session_id = new_session_id
 								                self.session_store._save()
 								            self.session_store.rewrite_transcript(new_session_id, compressed)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            # Reset stored token count — transcript changed, old value is stale
 								            self.session_store.update_session(
-												fix(gateway): preserve transcript on /compress and hygiene compression (salvage #3516) (#3556)

* fix(gateway): preserve full transcript on /compress instead of overwriting

The /compress command calls _compress_context() which correctly ends the
old session (preserving its full transcript in SQLite) and creates a new
session_id for the continuation. However, it then immediately called
rewrite_transcript() on the OLD session_id, overwriting the preserved
transcript with the compressed version — destroying searchable history.

Auto-compression (triggered by context pressure) does not have this bug
because the gateway already handles the session_id swap via the
agent.session_id != session_id check after _run_agent_sync.

Fix: after _compress_context creates the new session, write the compressed
messages into the NEW session_id and update the session store pointer.
The old session's full transcript stays intact and searchable via
session_search.

Before: /compress destroys original messages, session_search can't find
details from compressed portions.

After: /compress behaves like /new for history — full transcript preserved,
compressed context for the live session.

* fix(gateway): preserve transcript on /compress and hygiene compression

Apply session_id swap after _compress_context in both /compress handler
and hygiene pre-compression. _compress_context creates a new session
(ending the old one), but both paths were calling rewrite_transcript on
the OLD session_id — overwriting the preserved transcript and destroying
searchable history.

Now follows the same pattern as the auto-compression handler (lines
5415-5423): detect the new session_id, update the session store entry,
and write compressed messages to the new session.

Also fix FakeCompressAgent test mock to include session_id attribute
and simulate the session_id change that real _compress_context performs.

Co-authored-by: MacroAnarchy <MacroAnarchy@users.noreply.github.com>

---------

Co-authored-by: MacroAnarchy <MacroAnarchy@users.noreply.github.com>
											
										
										
											2026-03-28 12:23:43 -07:00
+								                session_entry.session_key, last_prompt_tokens=0
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            )
 								            new_count = len(compressed)
 								            new_tokens = estimate_messages_tokens_rough(compressed)
 								            return (
 								                f"🗜️ Compressed: {original_count} → {new_count} messages\n"
 								                f"~{approx_tokens:,} → ~{new_tokens:,} tokens"
 								            )
 								        except Exception as e:
 								            logger.warning("Manual compress failed: %s", e)
 								            return f"Compression failed: {e}"
 								    async def _handle_title_command(self, event: MessageEvent) -> str:
 								        """Handle /title command — set or show the current session's title."""
 								        source = event.source
 								        session_entry = self.session_store.get_or_create_session(source)
 								        session_id = session_entry.session_id
 								        if not self._session_db:
 								            return "Session database not available."
-												fix(gateway): /title command fails when session doesn't exist in SQLite yet (#2379)

The /title command would fail with 'Session not found in database.' when
used as the first command in a new session. This happened because:

1. Gateway creates session in session_store (in-memory)
2. But SQLite _session_db only gets sessions when agent flushes messages
3. set_session_title() does UPDATE which fails if row doesn't exist

Now we check if session exists in SQLite and create it if needed before
attempting to set the title.

Fixes: Session not found in database. error on /title in new chats
											
										
										
											2026-03-21 19:04:53 -04:00
+								        # Ensure session exists in SQLite DB (it may only exist in session_store
 								        # if this is the first command in a new session)
 								        existing_title = self._session_db.get_session_title(session_id)
 								        if existing_title is None:
 								            # Session doesn't exist in DB yet — create it
 								            try:
 								                self._session_db.create_session(
 								                    session_id=session_id,
 								                    source=source.platform.value if source.platform else "unknown",
 								                    user_id=source.user_id,
 								                )
 								            except Exception:
 								                pass  # Session might already exist, ignore errors
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        title_arg = event.get_command_args().strip()
 								        if title_arg:
 								            # Sanitize the title before setting
 								            try:
 								                sanitized = self._session_db.sanitize_title(title_arg)
 								            except ValueError as e:
 								                return f"⚠️ {e}"
 								            if not sanitized:
 								                return "⚠️ Title is empty after cleanup. Please use printable characters."
 								            # Set the title
 								            try:
 								                if self._session_db.set_session_title(session_id, sanitized):
 								                    return f"✏️ Session title set: **{sanitized}**"
 								                else:
 								                    return "Session not found in database."
 								            except ValueError as e:
 								                return f"⚠️ {e}"
 								        else:
-												feat: auto-generate session titles after first exchange

After the first user→assistant exchange, Hermes now generates a short
descriptive session title via the auxiliary LLM (compression task config).
Title generation runs in a background thread so it never delays the
user-facing response.

Key behaviors:
- Fires only on the first 1-2 exchanges (checks user message count)
- Skips if a title already exists (user-set titles are never overwritten)
- Uses call_llm with compression task config (cheapest/fastest model)
- Truncates long messages to keep the title generation request small
- Cleans up LLM output: strips quotes, 'Title:' prefixes, enforces 80 char max
- Works in both CLI and gateway (Telegram/Discord/etc.)

Also updates /title (no args) to show the session ID alongside the title
in both CLI and gateway.

Implements #1426

											
										
										
											2026-03-17 04:14:40 -07:00
+								            # Show the current title and session ID
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            title = self._session_db.get_session_title(session_id)
 								            if title:
-												feat: auto-generate session titles after first exchange

After the first user→assistant exchange, Hermes now generates a short
descriptive session title via the auxiliary LLM (compression task config).
Title generation runs in a background thread so it never delays the
user-facing response.

Key behaviors:
- Fires only on the first 1-2 exchanges (checks user message count)
- Skips if a title already exists (user-set titles are never overwritten)
- Uses call_llm with compression task config (cheapest/fastest model)
- Truncates long messages to keep the title generation request small
- Cleans up LLM output: strips quotes, 'Title:' prefixes, enforces 80 char max
- Works in both CLI and gateway (Telegram/Discord/etc.)

Also updates /title (no args) to show the session ID alongside the title
in both CLI and gateway.

Implements #1426

											
										
										
											2026-03-17 04:14:40 -07:00
+								                return f"📌 Session: `{session_id}`\nTitle: **{title}**"
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            else:
-												feat: auto-generate session titles after first exchange

After the first user→assistant exchange, Hermes now generates a short
descriptive session title via the auxiliary LLM (compression task config).
Title generation runs in a background thread so it never delays the
user-facing response.

Key behaviors:
- Fires only on the first 1-2 exchanges (checks user message count)
- Skips if a title already exists (user-set titles are never overwritten)
- Uses call_llm with compression task config (cheapest/fastest model)
- Truncates long messages to keep the title generation request small
- Cleans up LLM output: strips quotes, 'Title:' prefixes, enforces 80 char max
- Works in both CLI and gateway (Telegram/Discord/etc.)

Also updates /title (no args) to show the session ID alongside the title
in both CLI and gateway.

Implements #1426

											
										
										
											2026-03-17 04:14:40 -07:00
+								                return f"📌 Session: `{session_id}`\nNo title set. Usage: `/title My Session Name`"
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
 								    async def _handle_resume_command(self, event: MessageEvent) -> str:
 								        """Handle /resume command — switch to a previously-named session."""
 								        if not self._session_db:
 								            return "Session database not available."
 								        source = event.source
-												fix(gateway): make group session isolation configurable

default group and channel sessions to per-user isolation, allow opting back into shared room sessions via config.yaml, and document Discord gateway routing and session behavior.

											
										
										
											2026-03-16 00:22:23 -07:00
+								        session_key = self._session_key_for_source(source)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        name = event.get_command_args().strip()
 								        if not name:
 								            # List recent titled sessions for this user/platform
 								            try:
 								                user_source = source.platform.value if source.platform else None
 								                sessions = self._session_db.list_sessions_rich(
 								                    source=user_source, limit=10
 								                )
 								                titled = [s for s in sessions if s.get("title")]
 								                if not titled:
 								                    return (
 								                        "No named sessions found.\n"
 								                        "Use `/title My Session` to name your current session, "
 								                        "then `/resume My Session` to return to it later."
 								                    )
 								                lines = ["📋 **Named Sessions**\n"]
 								                for s in titled[:10]:
 								                    title = s["title"]
 								                    preview = s.get("preview", "")[:40]
 								                    preview_part = f" — _{preview}_" if preview else ""
 								                    lines.append(f"• **{title}**{preview_part}")
 								                lines.append("\nUsage: `/resume <session name>`")
 								                return "\n".join(lines)
 								            except Exception as e:
 								                logger.debug("Failed to list titled sessions: %s", e)
 								                return f"Could not list sessions: {e}"
 								        # Resolve the name to a session ID
 								        target_id = self._session_db.resolve_session_by_title(name)
 								        if not target_id:
 								            return (
 								                f"No session found matching '**{name}**'.\n"
 								                "Use `/resume` with no arguments to see available sessions."
 								            )
 								        # Check if already on that session
 								        current_entry = self.session_store.get_or_create_session(source)
 								        if current_entry.session_id == target_id:
 								            return f"📌 Already on session **{name}**."
 								        # Flush memories for current session before switching
 								        try:
-												fix(gateway): track background task references in GatewayRunner (#3254)

Asyncio tasks created with create_task() but never stored can be
garbage collected mid-execution. Add self._background_tasks set to
hold references, with add_done_callback cleanup. Tracks:
- /background command task
- session-reset memory flush task
- session-resume memory flush task
Cancel all pending tasks in stop().

Update test fixtures that construct GatewayRunner via object.__new__()
to include the new _background_tasks attribute.

Cherry-picked from PR #3167 by memosr. The original PR also deleted
the DM topic auto-skill loading code — that deletion was excluded
from this salvage as it removes a shipped feature (#2598).

Co-authored-by: memosr.eth <96793918+memosr@users.noreply.github.com>
											
										
										
											2026-03-26 14:33:48 -07:00
+								            _flush_task = asyncio.create_task(
-												fix(honcho): isolate session routing for multi-user gateway (#1500)

Salvaged from PR #1470 by adavyas.

Core fix: Honcho tool calls in a multi-session gateway could route to
the wrong session because honcho_tools.py relied on process-global
state. Now threads session context through the call chain:
  AIAgent._invoke_tool() → handle_function_call() → registry.dispatch()
  → handler **kw → _resolve_session_context()

Changes:
- Add _resolve_session_context() to prefer per-call context over globals
- Plumb honcho_manager + honcho_session_key through handle_function_call
- Add sync_honcho=False to run_conversation() for synthetic flush turns
- Pass honcho_session_key through gateway memory flush lifecycle
- Harden gateway PID detection when /proc cmdline is unreadable
- Make interrupt test scripts import-safe for pytest-xdist
- Wrap BibTeX examples in Jekyll raw blocks for docs build
- Fix thread-order-dependent assertion in client lifecycle test
- Expand Honcho docs: session isolation, lifecycle, routing internals

Dropped from original PR:
- Indentation change in _create_request_openai_client that would move
  client creation inside the lock (causes unnecessary contention)

Co-authored-by: adavyas <adavyas@users.noreply.github.com>
											
										
										
											2026-03-16 00:23:47 -07:00
+								                self._async_flush_memories(current_entry.session_id, session_key)
 								            )
-												fix(gateway): track background task references in GatewayRunner (#3254)

Asyncio tasks created with create_task() but never stored can be
garbage collected mid-execution. Add self._background_tasks set to
hold references, with add_done_callback cleanup. Tracks:
- /background command task
- session-reset memory flush task
- session-resume memory flush task
Cancel all pending tasks in stop().

Update test fixtures that construct GatewayRunner via object.__new__()
to include the new _background_tasks attribute.

Cherry-picked from PR #3167 by memosr. The original PR also deleted
the DM topic auto-skill loading code — that deletion was excluded
from this salvage as it removes a shipped feature (#2598).

Co-authored-by: memosr.eth <96793918+memosr@users.noreply.github.com>
											
										
										
											2026-03-26 14:33:48 -07:00
+								            self._background_tasks.add(_flush_task)
 								            _flush_task.add_done_callback(self._background_tasks.discard)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        except Exception as e:
 								            logger.debug("Memory flush on resume failed: %s", e)
 								        self._shutdown_gateway_honcho(session_key)
 								        # Clear any running agent for this session key
 								        if session_key in self._running_agents:
 								            del self._running_agents[session_key]
 								        # Switch the session entry to point at the old session
 								        new_entry = self.session_store.switch_session(session_key, target_id)
 								        if not new_entry:
 								            return "Failed to switch session."
 								        # Get the title for confirmation
 								        title = self._session_db.get_session_title(target_id) or name
 								        # Count messages for context
 								        history = self.session_store.load_transcript(target_id)
 								        msg_count = len([m for m in history if m.get("role") == "user"]) if history else 0
 								        msg_part = f" ({msg_count} message{'s' if msg_count != 1 else ''})" if msg_count else ""
 								        return f"↻ Resumed session **{title}**{msg_part}. Conversation restored."
 								    async def _handle_usage_command(self, event: MessageEvent) -> str:
 								        """Handle /usage command -- show token usage for the session's last agent run."""
 								        source = event.source
-												fix(gateway): make group session isolation configurable

default group and channel sessions to per-user isolation, allow opting back into shared room sessions via config.yaml, and document Discord gateway routing and session behavior.

											
										
										
											2026-03-16 00:22:23 -07:00
+								        session_key = self._session_key_for_source(source)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
 								        agent = self._running_agents.get(session_key)
 								        if agent and hasattr(agent, "session_total_tokens") and agent.session_api_calls > 0:
 								            lines = [
 								                "📊 **Session Token Usage**",
 								                f"Prompt (input): {agent.session_prompt_tokens:,}",
 								                f"Completion (output): {agent.session_completion_tokens:,}",
 								                f"Total: {agent.session_total_tokens:,}",
 								                f"API calls: {agent.session_api_calls}",
 								            ]
 								            ctx = agent.context_compressor
 								            if ctx.last_prompt_tokens:
 								                pct = ctx.last_prompt_tokens / ctx.context_length * 100 if ctx.context_length else 0
 								                lines.append(f"Context: {ctx.last_prompt_tokens:,} / {ctx.context_length:,} ({pct:.0f}%)")
 								            if ctx.compression_count:
 								                lines.append(f"Compressions: {ctx.compression_count}")
 								            return "\n".join(lines)
 								        # No running agent -- check session history for a rough count
 								        session_entry = self.session_store.get_or_create_session(source)
 								        history = self.session_store.load_transcript(session_entry.session_id)
 								        if history:
 								            from agent.model_metadata import estimate_messages_tokens_rough
 								            msgs = [m for m in history if m.get("role") in ("user", "assistant") and m.get("content")]
 								            approx = estimate_messages_tokens_rough(msgs)
 								            return (
 								                f"📊 **Session Info**\n"
 								                f"Messages: {len(msgs)}\n"
 								                f"Estimated context: ~{approx:,} tokens\n"
 								                f"_(Detailed usage available during active conversations)_"
 								            )
 								        return "No usage data available for this session."
 								    async def _handle_insights_command(self, event: MessageEvent) -> str:
 								        """Handle /insights command -- show usage insights and analytics."""
 								        import asyncio as _asyncio
 								        args = event.get_command_args().strip()
 								        days = 30
 								        source = None
 								        # Parse simple args: /insights 7  or  /insights --days 7
 								        if args:
 								            parts = args.split()
 								            i = 0
 								            while i < len(parts):
 								                if parts[i] == "--days" and i + 1 < len(parts):
 								                    try:
 								                        days = int(parts[i + 1])
 								                    except ValueError:
 								                        return f"Invalid --days value: {parts[i + 1]}"
 								                    i += 2
 								                elif parts[i] == "--source" and i + 1 < len(parts):
 								                    source = parts[i + 1]
 								                    i += 2
 								                elif parts[i].isdigit():
 								                    days = int(parts[i])
 								                    i += 1
 								                else:
 								                    i += 1
 								        try:
 								            from hermes_state import SessionDB
 								            from agent.insights import InsightsEngine
 								            loop = _asyncio.get_event_loop()
 								            def _run_insights():
 								                db = SessionDB()
 								                engine = InsightsEngine(db)
 								                report = engine.generate(days=days, source=source)
 								                result = engine.format_gateway(report)
 								                db.close()
 								                return result
 								            return await loop.run_in_executor(None, _run_insights)
 								        except Exception as e:
 								            logger.error("Insights command error: %s", e, exc_info=True)
 								            return f"Error generating insights: {e}"
 								    async def _handle_reload_mcp_command(self, event: MessageEvent) -> str:
 								        """Handle /reload-mcp command -- disconnect and reconnect all MCP servers."""
 								        loop = asyncio.get_event_loop()
 								        try:
 								            from tools.mcp_tool import shutdown_mcp_servers, discover_mcp_tools, _load_mcp_config, _servers, _lock
 								            # Capture old server names before shutdown
 								            with _lock:
 								                old_servers = set(_servers.keys())
 								            # Read new config before shutting down, so we know what will be added/removed
 								            new_config = _load_mcp_config()
 								            new_server_names = set(new_config.keys())
 								            # Shutdown existing connections
 								            await loop.run_in_executor(None, shutdown_mcp_servers)
 								            # Reconnect by discovering tools (reads config.yaml fresh)
 								            new_tools = await loop.run_in_executor(None, discover_mcp_tools)
 								            # Compute what changed
 								            with _lock:
 								                connected_servers = set(_servers.keys())
 								            added = connected_servers - old_servers
 								            removed = old_servers - connected_servers
 								            reconnected = connected_servers & old_servers
 								            lines = ["🔄 **MCP Servers Reloaded**\n"]
 								            if reconnected:
 								                lines.append(f"♻️ Reconnected: {', '.join(sorted(reconnected))}")
 								            if added:
 								                lines.append(f"➕ Added: {', '.join(sorted(added))}")
 								            if removed:
 								                lines.append(f"➖ Removed: {', '.join(sorted(removed))}")
 								            if not connected_servers:
 								                lines.append("No MCP servers connected.")
 								            else:
 								                lines.append(f"\n🔧 {len(new_tools)} tool(s) available from {len(connected_servers)} server(s)")
 								            # Inject a message at the END of the session history so the
 								            # model knows tools changed on its next turn.  Appended after
 								            # all existing messages to preserve prompt-cache for the prefix.
 								            change_parts = []
 								            if added:
 								                change_parts.append(f"Added servers: {', '.join(sorted(added))}")
 								            if removed:
 								                change_parts.append(f"Removed servers: {', '.join(sorted(removed))}")
 								            if reconnected:
 								                change_parts.append(f"Reconnected servers: {', '.join(sorted(reconnected))}")
 								            tool_summary = f"{len(new_tools)} MCP tool(s) now available" if new_tools else "No MCP tools available"
 								            change_detail = ". ".join(change_parts) + ". " if change_parts else ""
 								            reload_msg = {
 								                "role": "user",
 								                "content": f"[SYSTEM: MCP servers have been reloaded. {change_detail}{tool_summary}. The tool list for this conversation has been updated accordingly.]",
 								            }
 								            try:
 								                session_entry = self.session_store.get_or_create_session(event.source)
 								                self.session_store.append_to_transcript(
 								                    session_entry.session_id, reload_msg
 								                )
 								            except Exception:
 								                pass  # Best-effort; don't fail the reload over a transcript write
 								            return "\n".join(lines)
 								        except Exception as e:
 								            logger.warning("MCP reload failed: %s", e)
 								            return f"❌ MCP reload failed: {e}"
-												fix(gateway): replace bare text approval with /approve and /deny commands (#2002)

The gateway approval system previously intercepted bare 'yes'/'no' text
from the user's next message to approve/deny dangerous commands. This was
fragile and dangerous — if the agent asked a clarify question and the user
said 'yes' to answer it, the gateway would execute the pending dangerous
command instead. (Fixes #1888)

Changes:
- Remove bare text matching ('yes', 'y', 'approve', 'ok', etc.) from
  _handle_message approval check
- Add /approve and /deny as gateway-only slash commands in the command
  registry
- /approve supports scoping: /approve (one-time), /approve session,
  /approve always (permanent)
- Add 5-minute timeout for stale approvals
- Gateway appends structured instructions to the agent response when a
  dangerous command is pending, telling the user exactly how to respond
- 9 tests covering approve, deny, timeout, scoping, and verification
  that bare 'yes' no longer triggers execution

Credit to @solo386 and @FlyByNight69420 for identifying and reporting
this security issue in PR #1971 and issue #1888.

Co-authored-by: Test <test@test.com>
											
										
										
											2026-03-18 16:58:20 -07:00
+								    # ------------------------------------------------------------------
 								    # /approve & /deny — explicit dangerous-command approval
 								    # ------------------------------------------------------------------
 								    _APPROVAL_TIMEOUT_SECONDS = 300  # 5 minutes
 								    async def _handle_approve_command(self, event: MessageEvent) -> str:
 								        """Handle /approve command — execute a pending dangerous command.
 								        Usage:
 								            /approve          — approve and execute the pending command
 								            /approve session  — approve and remember for this session
 								            /approve always   — approve this pattern permanently
 								        """
 								        source = event.source
 								        session_key = self._session_key_for_source(source)
 								        if session_key not in self._pending_approvals:
 								            return "No pending command to approve."
 								        import time as _time
 								        approval = self._pending_approvals[session_key]
 								        # Check for timeout
 								        ts = approval.get("timestamp", 0)
 								        if _time.time() - ts > self._APPROVAL_TIMEOUT_SECONDS:
 								            self._pending_approvals.pop(session_key, None)
 								            return "⚠️ Approval expired (timed out after 5 minutes). Ask the agent to try again."
 								        self._pending_approvals.pop(session_key)
 								        cmd = approval["command"]
 								        pattern_keys = approval.get("pattern_keys", [])
 								        if not pattern_keys:
 								            pk = approval.get("pattern_key", "")
 								            pattern_keys = [pk] if pk else []
 								        # Determine approval scope from args
 								        args = event.get_command_args().strip().lower()
 								        from tools.approval import approve_session, approve_permanent
 								        if args in ("always", "permanent", "permanently"):
 								            for pk in pattern_keys:
 								                approve_permanent(pk)
 								            scope_msg = " (pattern approved permanently)"
 								        elif args in ("session", "ses"):
 								            for pk in pattern_keys:
 								                approve_session(session_key, pk)
 								            scope_msg = " (pattern approved for this session)"
 								        else:
 								            # One-time approval — just approve for session so the immediate
 								            # replay works, but don't advertise it as session-wide
 								            for pk in pattern_keys:
 								                approve_session(session_key, pk)
 								            scope_msg = ""
 								        logger.info("User approved dangerous command via /approve: %s...%s", cmd[:60], scope_msg)
 								        from tools.terminal_tool import terminal_tool
 								        result = terminal_tool(command=cmd, force=True)
 								        return f"✅ Command approved and executed{scope_msg}.\n\n```\n{result[:3500]}\n```"
 								    async def _handle_deny_command(self, event: MessageEvent) -> str:
 								        """Handle /deny command — reject a pending dangerous command."""
 								        source = event.source
 								        session_key = self._session_key_for_source(source)
 								        if session_key not in self._pending_approvals:
 								            return "No pending command to deny."
 								        self._pending_approvals.pop(session_key)
 								        logger.info("User denied dangerous command via /deny")
 								        return "❌ Command denied."
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								    async def _handle_update_command(self, event: MessageEvent) -> str:
 								        """Handle /update command — update Hermes Agent to the latest version.
 								        Spawns ``hermes update`` in a separate systemd scope so it survives the
-												fix: notify gateway users when updates finish or fail

											
										
										
											2026-03-11 20:43:31 +01:00
+								        gateway restart that ``hermes update`` may trigger at the end. Marker
 								        files are written so either the current gateway process or the next one
 								        can notify the user when the update finishes.
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        """
 								        import json
 								        import shutil
 								        import subprocess
 								        from datetime import datetime
 								        project_root = Path(__file__).parent.parent.resolve()
 								        git_dir = project_root / '.git'
 								        if not git_dir.exists():
 								            return "✗ Not a git repository — cannot update."
-												fix(gateway): fall back to sys.executable -m hermes_cli.main when hermes not on PATH

When shutil.which('hermes') returns None, _resolve_hermes_bin() now tries
sys.executable -m hermes_cli.main as a fallback. This handles setups where
Hermes is launched via a venv or module invocation and the hermes symlink is
not on PATH for the gateway process.

Fixes #1049

											
										
										
											2026-03-12 18:02:21 +03:00
+								        hermes_cmd = _resolve_hermes_bin()
 								        if not hermes_cmd:
 								            return (
 								                "✗ Could not locate the `hermes` command. "
 								                "Hermes is running, but the update command could not find the "
 								                "executable on PATH or via the current Python interpreter. "
 								                "Try running `hermes update` manually in your terminal."
 								            )
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
 								        pending_path = _hermes_home / ".update_pending.json"
 								        output_path = _hermes_home / ".update_output.txt"
-												fix: notify gateway users when updates finish or fail

											
										
										
											2026-03-11 20:43:31 +01:00
+								        exit_code_path = _hermes_home / ".update_exit_code"
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        pending = {
 								            "platform": event.source.platform.value,
 								            "chat_id": event.source.chat_id,
 								            "user_id": event.source.user_id,
 								            "timestamp": datetime.now().isoformat(),
 								        }
 								        pending_path.write_text(json.dumps(pending))
-												fix: notify gateway users when updates finish or fail

											
										
										
											2026-03-11 20:43:31 +01:00
+								        exit_code_path.unlink(missing_ok=True)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
 								        # Spawn `hermes update` in a separate cgroup so it survives gateway
-												fix: notify gateway users when updates finish or fail

											
										
										
											2026-03-11 20:43:31 +01:00
+								        # restart. systemd-run --user --scope creates a transient scope unit.
-												fix(gateway): fall back to sys.executable -m hermes_cli.main when hermes not on PATH

When shutil.which('hermes') returns None, _resolve_hermes_bin() now tries
sys.executable -m hermes_cli.main as a fallback. This handles setups where
Hermes is launched via a venv or module invocation and the hermes symlink is
not on PATH for the gateway process.

Fixes #1049

											
										
										
											2026-03-12 18:02:21 +03:00
+								        hermes_cmd_str = " ".join(shlex.quote(part) for part in hermes_cmd)
-												fix: notify gateway users when updates finish or fail

											
										
										
											2026-03-11 20:43:31 +01:00
+								        update_cmd = (
-												fix(gateway): fall back to sys.executable -m hermes_cli.main when hermes not on PATH

When shutil.which('hermes') returns None, _resolve_hermes_bin() now tries
sys.executable -m hermes_cli.main as a fallback. This handles setups where
Hermes is launched via a venv or module invocation and the hermes symlink is
not on PATH for the gateway process.

Fixes #1049

											
										
										
											2026-03-12 18:02:21 +03:00
+								            f"{hermes_cmd_str} update > {shlex.quote(str(output_path))} 2>&1; "
-												fix: notify gateway users when updates finish or fail

											
										
										
											2026-03-11 20:43:31 +01:00
+								            f"status=$?; printf '%s' \"$status\" > {shlex.quote(str(exit_code_path))}"
 								        )
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        try:
 								            systemd_run = shutil.which("systemd-run")
 								            if systemd_run:
 								                subprocess.Popen(
 								                    [systemd_run, "--user", "--scope",
 								                     "--unit=hermes-update", "--",
 								                     "bash", "-c", update_cmd],
 								                    stdout=subprocess.DEVNULL,
 								                    stderr=subprocess.DEVNULL,
 								                    start_new_session=True,
 								                )
 								            else:
 								                # Fallback: best-effort detach with start_new_session
 								                subprocess.Popen(
 								                    ["bash", "-c", f"nohup {update_cmd} &"],
 								                    stdout=subprocess.DEVNULL,
 								                    stderr=subprocess.DEVNULL,
 								                    start_new_session=True,
 								                )
 								        except Exception as e:
 								            pending_path.unlink(missing_ok=True)
-												fix: notify gateway users when updates finish or fail

											
										
										
											2026-03-11 20:43:31 +01:00
+								            exit_code_path.unlink(missing_ok=True)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            return f"✗ Failed to start update: {e}"
-												fix: notify gateway users when updates finish or fail

											
										
										
											2026-03-11 20:43:31 +01:00
+								        self._schedule_update_notification_watch()
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        return "⚕ Starting Hermes update… I'll notify you when it's done."
-												fix: notify gateway users when updates finish or fail

											
										
										
											2026-03-11 20:43:31 +01:00
+								    def _schedule_update_notification_watch(self) -> None:
 								        """Ensure a background task is watching for update completion."""
 								        existing_task = getattr(self, "_update_notification_task", None)
 								        if existing_task and not existing_task.done():
 								            return
 								        try:
 								            self._update_notification_task = asyncio.create_task(
 								                self._watch_for_update_completion()
 								            )
 								        except RuntimeError:
 								            logger.debug("Skipping update notification watcher: no running event loop")
 								    async def _watch_for_update_completion(
 								        self,
 								        poll_interval: float = 2.0,
 								        timeout: float = 1800.0,
 								    ) -> None:
 								        """Wait for ``hermes update`` to finish, then send its notification."""
 								        pending_path = _hermes_home / ".update_pending.json"
 								        claimed_path = _hermes_home / ".update_pending.claimed.json"
 								        exit_code_path = _hermes_home / ".update_exit_code"
 								        loop = asyncio.get_running_loop()
 								        deadline = loop.time() + timeout
 								        while (pending_path.exists() or claimed_path.exists()) and loop.time() < deadline:
 								            if exit_code_path.exists():
 								                await self._send_update_notification()
 								                return
 								            await asyncio.sleep(poll_interval)
 								        if (pending_path.exists() or claimed_path.exists()) and not exit_code_path.exists():
 								            logger.warning("Update watcher timed out waiting for completion marker")
 								            exit_code_path.write_text("124")
 								            await self._send_update_notification()
 								    async def _send_update_notification(self) -> bool:
 								        """If an update finished, notify the user.
 								        Returns False when the update is still running so a caller can retry
 								        later. Returns True after a definitive send/skip decision.
 								        """
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        import json
 								        import re as _re
 								        pending_path = _hermes_home / ".update_pending.json"
-												fix: notify gateway users when updates finish or fail

											
										
										
											2026-03-11 20:43:31 +01:00
+								        claimed_path = _hermes_home / ".update_pending.claimed.json"
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        output_path = _hermes_home / ".update_output.txt"
-												fix: notify gateway users when updates finish or fail

											
										
										
											2026-03-11 20:43:31 +01:00
+								        exit_code_path = _hermes_home / ".update_exit_code"
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
-												fix: notify gateway users when updates finish or fail

											
										
										
											2026-03-11 20:43:31 +01:00
+								        if not pending_path.exists() and not claimed_path.exists():
 								            return False
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
-												fix: notify gateway users when updates finish or fail

											
										
										
											2026-03-11 20:43:31 +01:00
+								        cleanup = True
 								        active_pending_path = claimed_path
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        try:
-												fix: notify gateway users when updates finish or fail

											
										
										
											2026-03-11 20:43:31 +01:00
+								            if pending_path.exists():
 								                try:
 								                    pending_path.replace(claimed_path)
 								                except FileNotFoundError:
 								                    if not claimed_path.exists():
 								                        return True
 								            elif not claimed_path.exists():
 								                return True
 								            pending = json.loads(claimed_path.read_text())
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            platform_str = pending.get("platform")
 								            chat_id = pending.get("chat_id")
-												fix: notify gateway users when updates finish or fail

											
										
										
											2026-03-11 20:43:31 +01:00
+								            if not exit_code_path.exists():
 								                logger.info("Update notification deferred: update still running")
 								                cleanup = False
 								                active_pending_path = pending_path
 								                claimed_path.replace(pending_path)
 								                return False
 								            exit_code_raw = exit_code_path.read_text().strip() or "1"
 								            exit_code = int(exit_code_raw)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            # Read the captured update output
 								            output = ""
 								            if output_path.exists():
 								                output = output_path.read_text()
 								            # Resolve adapter
 								            platform = Platform(platform_str)
 								            adapter = self.adapters.get(platform)
 								            if adapter and chat_id:
 								                # Strip ANSI escape codes for clean display
 								                output = _re.sub(r'\x1b\[[0-9;]*m', '', output).strip()
 								                if output:
 								                    if len(output) > 3500:
 								                        output = "…" + output[-3500:]
-												fix: notify gateway users when updates finish or fail

											
										
										
											2026-03-11 20:43:31 +01:00
+								                    if exit_code == 0:
 								                        msg = f"✅ Hermes update finished.\n\n```\n{output}\n```"
 								                    else:
 								                        msg = f"❌ Hermes update failed.\n\n```\n{output}\n```"
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                else:
-												fix: notify gateway users when updates finish or fail

											
										
										
											2026-03-11 20:43:31 +01:00
+								                    if exit_code == 0:
 								                        msg = "✅ Hermes update finished successfully."
 								                    else:
 								                        msg = "❌ Hermes update failed. Check the gateway logs or run `hermes update` manually for details."
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                await adapter.send(chat_id, msg)
-												fix: notify gateway users when updates finish or fail

											
										
										
											2026-03-11 20:43:31 +01:00
+								                logger.info(
 								                    "Sent post-update notification to %s:%s (exit=%s)",
 								                    platform_str,
 								                    chat_id,
 								                    exit_code,
 								                )
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        except Exception as e:
 								            logger.warning("Post-update notification failed: %s", e)
 								        finally:
-												fix: notify gateway users when updates finish or fail

											
										
										
											2026-03-11 20:43:31 +01:00
+								            if cleanup:
 								                active_pending_path.unlink(missing_ok=True)
 								                claimed_path.unlink(missing_ok=True)
 								                output_path.unlink(missing_ok=True)
 								                exit_code_path.unlink(missing_ok=True)
 								        return True
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
 								    def _set_session_env(self, context: SessionContext) -> None:
 								        """Set environment variables for the current session."""
 								        os.environ["HERMES_SESSION_PLATFORM"] = context.source.platform.value
 								        os.environ["HERMES_SESSION_CHAT_ID"] = context.source.chat_id
 								        if context.source.chat_name:
 								            os.environ["HERMES_SESSION_CHAT_NAME"] = context.source.chat_name
-												fix: preserve thread context for cronjob deliver=origin

When a cronjob is created from within a Telegram or Slack thread,
deliver=origin was posting to the parent channel instead of the thread.

Root cause: the gateway never set HERMES_SESSION_THREAD_ID in the
session environment, so cronjob_tools.py could not capture thread_id
into the job's origin metadata — even though the scheduler already
reads origin.get('thread_id').

Fix:
- gateway/run.py: set HERMES_SESSION_THREAD_ID when thread_id is
  present on the session context, and clear it in _clear_session_env
- tools/cronjob_tools.py: read HERMES_SESSION_THREAD_ID into origin

Closes #1219

											
										
										
											2026-03-14 06:06:44 +03:00
+								        if context.source.thread_id:
 								            os.environ["HERMES_SESSION_THREAD_ID"] = str(context.source.thread_id)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
 								    def _clear_session_env(self) -> None:
 								        """Clear session environment variables."""
-												fix: preserve thread context for cronjob deliver=origin

When a cronjob is created from within a Telegram or Slack thread,
deliver=origin was posting to the parent channel instead of the thread.

Root cause: the gateway never set HERMES_SESSION_THREAD_ID in the
session environment, so cronjob_tools.py could not capture thread_id
into the job's origin metadata — even though the scheduler already
reads origin.get('thread_id').

Fix:
- gateway/run.py: set HERMES_SESSION_THREAD_ID when thread_id is
  present on the session context, and clear it in _clear_session_env
- tools/cronjob_tools.py: read HERMES_SESSION_THREAD_ID into origin

Closes #1219

											
										
										
											2026-03-14 06:06:44 +03:00
+								        for var in ["HERMES_SESSION_PLATFORM", "HERMES_SESSION_CHAT_ID", "HERMES_SESSION_CHAT_NAME", "HERMES_SESSION_THREAD_ID"]:
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            if var in os.environ:
 								                del os.environ[var]
 								    async def _enrich_message_with_vision(
 								        self,
 								        user_text: str,
 								        image_paths: List[str],
 								    ) -> str:
 								        """
 								        Auto-analyze user-attached images with the vision tool and prepend
 								        the descriptions to the message text.
 								        Each image is analyzed with a general-purpose prompt.  The resulting
 								        description *and* the local cache path are injected so the model can:
 . Immediately understand what the user sent (no extra tool call).
 . Re-examine the image with vision_analyze if it needs more detail.
 								        Args:
-												fix(gateway): remove app-specific Athabasca references from vision enrichment (#1529)

Salvaged from PR #1428 by jplew.

Removes Athabasca-specific persistence guidance accidentally merged
in PR #1422:
- Drop Athabasca docstring and injected note from _enrich_message_with_vision
- Delete tests/gateway/test_image_enrichment.py (asserted app-specific behavior)

Co-authored-by: jplew <jplew@users.noreply.github.com>
											
										
										
											2026-03-16 05:02:58 -07:00
+								            user_text:   The user's original caption / message text.
 								            image_paths: List of local file paths to cached images.
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
 								        Returns:
 								            The enriched message string with vision descriptions prepended.
 								        """
 								        from tools.vision_tools import vision_analyze_tool
 								        import json as _json
 								        analysis_prompt = (
 								            "Describe everything visible in this image in thorough detail. "
 								            "Include any text, code, data, objects, people, layout, colors, "
 								            "and any other notable visual information."
 								        )
 								        enriched_parts = []
 								        for path in image_paths:
 								            try:
 								                logger.debug("Auto-analyzing user image: %s", path)
 								                result_json = await vision_analyze_tool(
 								                    image_url=path,
 								                    user_prompt=analysis_prompt,
 								                )
 								                result = _json.loads(result_json)
 								                if result.get("success"):
 								                    description = result.get("analysis", "")
 								                    enriched_parts.append(
 								                        f"[The user sent an image~ Here's what I can see:\n{description}]\n"
 								                        f"[If you need a closer look, use vision_analyze with "
 								                        f"image_url: {path} ~]"
 								                    )
 								                else:
 								                    enriched_parts.append(
 								                        "[The user sent an image but I couldn't quite see it "
 								                        "this time (>_<) You can try looking at it yourself "
 								                        f"with vision_analyze using image_url: {path}]"
 								                    )
 								            except Exception as e:
 								                logger.error("Vision auto-analysis error: %s", e)
 								                enriched_parts.append(
 								                    f"[The user sent an image but something went wrong when I "
 								                    f"tried to look at it~ You can try examining it yourself "
 								                    f"with vision_analyze using image_url: {path}]"
 								                )
 								        # Combine: vision descriptions first, then the user's original text
 								        if enriched_parts:
 								            prefix = "\n\n".join(enriched_parts)
 								            if user_text:
 								                return f"{prefix}\n\n{user_text}"
 								            return prefix
 								        return user_text
 								    async def _enrich_message_with_transcription(
 								        self,
 								        user_text: str,
 								        audio_paths: List[str],
 								    ) -> str:
 								        """
-												fix(gateway): honor stt.enabled false for voice transcription

- bridge stt.enabled from config.yaml into gateway runtime config
- preserve the flag in GatewayConfig serialization
- skip gateway voice transcription when STT is disabled
- add regression tests for config loading and disabled transcription flow

											
										
										
											2026-03-14 22:09:53 -07:00
+								        Auto-transcribe user voice/audio messages using the configured STT provider
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        and prepend the transcript to the message text.
 								        Args:
 								            user_text:   The user's original caption / message text.
 								            audio_paths: List of local file paths to cached audio files.
 								        Returns:
 								            The enriched message string with transcriptions prepended.
 								        """
-												fix(gateway): honor stt.enabled false for voice transcription

- bridge stt.enabled from config.yaml into gateway runtime config
- preserve the flag in GatewayConfig serialization
- skip gateway voice transcription when STT is disabled
- add regression tests for config loading and disabled transcription flow

											
										
										
											2026-03-14 22:09:53 -07:00
+								        if not getattr(self.config, "stt_enabled", True):
-												fix: check skill availability before hinting at hermes-agent-setup

Only mention the hermes-agent-setup skill in STT failure notes (both
the direct user message and the agent context note) when the skill is
actually installed. Uses _find_skill() from skill_manager_tool.

Also confirmed: STT is the only user-facing failure case where the
setup skill hint helps. Vision failures are transient API issues,
runtime transcription errors indicate a configured-but-broken provider,
and platform startup warnings are server logs.

											
										
										
											2026-03-18 03:17:23 -07:00
+								            disabled_note = "[The user sent voice message(s), but transcription is disabled in config."
 								            if self._has_setup_skill():
 								                disabled_note += (
 								                    " You have a skill called hermes-agent-setup that can help "
 								                    "users configure Hermes features including voice, tools, and more."
 								                )
 								            disabled_note += "]"
-												fix(gateway): honor stt.enabled false for voice transcription

- bridge stt.enabled from config.yaml into gateway runtime config
- preserve the flag in GatewayConfig serialization
- skip gateway voice transcription when STT is disabled
- add regression tests for config loading and disabled transcription flow

											
										
										
											2026-03-14 22:09:53 -07:00
+								            if user_text:
 								                return f"{disabled_note}\n\n{user_text}"
 								            return disabled_note
-												refactor: extract get_stt_model_from_config helper to eliminate DRY violation

Duplicated YAML config parsing for stt.model existed in gateway/run.py
and gateway/platforms/discord.py. Moved to a single helper in
transcription_tools.py and added 5 tests covering all edge cases.

											
										
										
											2026-03-12 00:26:40 +03:00
+								        from tools.transcription_tools import transcribe_audio, get_stt_model_from_config
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        import asyncio
-												refactor: extract get_stt_model_from_config helper to eliminate DRY violation

Duplicated YAML config parsing for stt.model existed in gateway/run.py
and gateway/platforms/discord.py. Moved to a single helper in
transcription_tools.py and added 5 tests covering all edge cases.

											
										
										
											2026-03-12 00:26:40 +03:00
+								        stt_model = get_stt_model_from_config()
-												fix: make STT config env-overridable and fix doc issues

Code fixes:
- STT model, Groq base URL, and OpenAI STT base URL are now
  configurable via env vars (STT_GROQ_MODEL, STT_OPENAI_MODEL,
  GROQ_BASE_URL, STT_OPENAI_BASE_URL) instead of hardcoded
- Gateway and Discord VC now read stt.model from config.yaml
  (previously only CLI did this — gateway always used defaults)

Doc fixes:
- voice-mode.md: move Web UI troubleshooting to web.md (was duplicated)
- voice-mode.md: simplify "How It Works" for end users (remove NaCl,
  DAVE, RTP internals)
- voice-mode.md: clarify STT priority (OpenAI used first if both keys
  set, Groq recommended for free tier)
- voice-mode.md: document new STT env overrides in config reference
- web.md: remove duplicate Quick Start / Step 1-3 sections
- web.md: add mobile HTTPS mic workarounds (moved from voice-mode.md)
- web.md: clarify STT fallback order

											
										
										
											2026-03-12 00:15:38 +03:00
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        enriched_parts = []
 								        for path in audio_paths:
 								            try:
 								                logger.debug("Transcribing user voice: %s", path)
-												fix: make STT config env-overridable and fix doc issues

Code fixes:
- STT model, Groq base URL, and OpenAI STT base URL are now
  configurable via env vars (STT_GROQ_MODEL, STT_OPENAI_MODEL,
  GROQ_BASE_URL, STT_OPENAI_BASE_URL) instead of hardcoded
- Gateway and Discord VC now read stt.model from config.yaml
  (previously only CLI did this — gateway always used defaults)

Doc fixes:
- voice-mode.md: move Web UI troubleshooting to web.md (was duplicated)
- voice-mode.md: simplify "How It Works" for end users (remove NaCl,
  DAVE, RTP internals)
- voice-mode.md: clarify STT priority (OpenAI used first if both keys
  set, Groq recommended for free tier)
- voice-mode.md: document new STT env overrides in config reference
- web.md: remove duplicate Quick Start / Step 1-3 sections
- web.md: add mobile HTTPS mic workarounds (moved from voice-mode.md)
- web.md: clarify STT fallback order

											
										
										
											2026-03-12 00:15:38 +03:00
+								                result = await asyncio.to_thread(transcribe_audio, path, model=stt_model)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                if result["success"]:
 								                    transcript = result["transcript"]
 								                    enriched_parts.append(
 								                        f'[The user sent a voice message~ '
 								                        f'Here\'s what they said: "{transcript}"]'
 								                    )
 								                else:
 								                    error = result.get("error", "unknown error")
-												fix: restore local STT fallback for gateway voice notes

Restore local STT command fallback for voice transcription, detect whisper and ffmpeg in common local install paths, and avoid bogus no-provider messaging when only a backend-specific key is missing.

											
										
										
											2026-03-15 21:51:40 -07:00
+								                    if (
 								                        "No STT provider" in error
 								                        or error.startswith("Neither VOICE_TOOLS_OPENAI_KEY nor OPENAI_API_KEY is set")
 								                    ):
-												fix: check skill availability before hinting at hermes-agent-setup

Only mention the hermes-agent-setup skill in STT failure notes (both
the direct user message and the agent context note) when the skill is
actually installed. Uses _find_skill() from skill_manager_tool.

Also confirmed: STT is the only user-facing failure case where the
setup skill hint helps. Vision failures are transient API issues,
runtime transcription errors indicate a configured-but-broken provider,
and platform startup warnings are server logs.

											
										
										
											2026-03-18 03:17:23 -07:00
+								                        _no_stt_note = (
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                            "[The user sent a voice message but I can't listen "
-												feat: expand hermes-agent-setup skill + tell agent about it in STT notes

Skill now covers full CLI usage (hermes setup, hermes skills, hermes
tools, hermes config, session management, etc.), config file reference,
and expanded gateway commands.

Agent context notes for STT failure now mention the hermes-agent-setup
skill is available to help users configure Hermes features.

											
										
										
											2026-03-18 03:05:17 -07:00
+								                            "to it right now — no STT provider is configured. "
 								                            "A direct message has already been sent to the user "
-												fix: check skill availability before hinting at hermes-agent-setup

Only mention the hermes-agent-setup skill in STT failure notes (both
the direct user message and the agent context note) when the skill is
actually installed. Uses _find_skill() from skill_manager_tool.

Also confirmed: STT is the only user-facing failure case where the
setup skill hint helps. Vision failures are transient API issues,
runtime transcription errors indicate a configured-but-broken provider,
and platform startup warnings are server logs.

											
										
										
											2026-03-18 03:17:23 -07:00
+								                            "with setup instructions."
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                        )
-												fix: check skill availability before hinting at hermes-agent-setup

Only mention the hermes-agent-setup skill in STT failure notes (both
the direct user message and the agent context note) when the skill is
actually installed. Uses _find_skill() from skill_manager_tool.

Also confirmed: STT is the only user-facing failure case where the
setup skill hint helps. Vision failures are transient API issues,
runtime transcription errors indicate a configured-but-broken provider,
and platform startup warnings are server logs.

											
										
										
											2026-03-18 03:17:23 -07:00
+								                        if self._has_setup_skill():
 								                            _no_stt_note += (
 								                                " You have a skill called hermes-agent-setup "
 								                                "that can help users configure Hermes features "
 								                                "including voice, tools, and more."
 								                            )
 								                        _no_stt_note += "]"
 								                        enriched_parts.append(_no_stt_note)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                    else:
 								                        enriched_parts.append(
 								                            "[The user sent a voice message but I had trouble "
 								                            f"transcribing it~ ({error})]"
 								                        )
 								            except Exception as e:
 								                logger.error("Transcription error: %s", e)
 								                enriched_parts.append(
 								                    "[The user sent a voice message but something went wrong "
 								                    "when I tried to listen to it~ Let them know!]"
 								                )
 								        if enriched_parts:
 								            prefix = "\n\n".join(enriched_parts)
 								            if user_text:
 								                return f"{prefix}\n\n{user_text}"
 								            return prefix
 								        return user_text
 								    async def _run_process_watcher(self, watcher: dict) -> None:
 								        """
 								        Periodically check a background process and push updates to the user.
 								        Runs as an asyncio task. Stays silent when nothing changed.
 								        Auto-removes when the process exits or is killed.
 								        Notification mode (from ``display.background_process_notifications``):
 								          - ``all``    — running-output updates + final message
 								          - ``result`` — final completion message only
 								          - ``error``  — final message only when exit code != 0
 								          - ``off``    — no messages at all
 								        """
 								        from tools.process_registry import process_registry
 								        session_id = watcher["session_id"]
 								        interval = watcher["check_interval"]
 								        session_key = watcher.get("session_key", "")
 								        platform_name = watcher.get("platform", "")
 								        chat_id = watcher.get("chat_id", "")
-												fix: route background process watcher notifications to Telegram forum topics (#1481)

Salvaged from PR #1146 by spanishflu-est1918.

Background process progress/completion messages were sent with only
chat_id, landing in the general topic instead of the originating forum
topic. Thread the thread_id from HERMES_SESSION_THREAD_ID through the
watcher payload and pass it as metadata to adapter.send() so Telegram
routes notifications to the correct topic.

The env var export (HERMES_SESSION_THREAD_ID in _set_session_env /
_clear_session_env) already existed on main — this commit adds the
missing watcher plumbing.

Co-authored-by: spanishflu-est1918 <spanishflu-est1918@users.noreply.github.com>
											
										
										
											2026-03-15 23:01:57 -07:00
+								        thread_id = watcher.get("thread_id", "")
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        notify_mode = self._load_background_notifications_mode()
 								        logger.debug("Process watcher started: %s (every %ss, notify=%s)",
 								                      session_id, interval, notify_mode)
 								        if notify_mode == "off":
 								            # Still wait for the process to exit so we can log it, but don't
 								            # push any messages to the user.
 								            while True:
 								                await asyncio.sleep(interval)
 								                session = process_registry.get(session_id)
 								                if session is None or session.exited:
 								                    break
 								            logger.debug("Process watcher ended (silent): %s", session_id)
 								            return
 								        last_output_len = 0
 								        while True:
 								            await asyncio.sleep(interval)
 								            session = process_registry.get(session_id)
 								            if session is None:
 								                break
 								            current_output_len = len(session.output_buffer)
 								            has_new_output = current_output_len > last_output_len
 								            last_output_len = current_output_len
 								            if session.exited:
 								                # Decide whether to notify based on mode
 								                should_notify = (
 								                    notify_mode in ("all", "result")
 								                    or (notify_mode == "error" and session.exit_code not in (0, None))
 								                )
 								                if should_notify:
 								                    new_output = session.output_buffer[-1000:] if session.output_buffer else ""
 								                    message_text = (
 								                        f"[Background process {session_id} finished with exit code {session.exit_code}~ "
 								                        f"Here's the final output:\n{new_output}]"
 								                    )
 								                    adapter = None
 								                    for p, a in self.adapters.items():
 								                        if p.value == platform_name:
 								                            adapter = a
 								                            break
 								                    if adapter and chat_id:
 								                        try:
-												fix: route background process watcher notifications to Telegram forum topics (#1481)

Salvaged from PR #1146 by spanishflu-est1918.

Background process progress/completion messages were sent with only
chat_id, landing in the general topic instead of the originating forum
topic. Thread the thread_id from HERMES_SESSION_THREAD_ID through the
watcher payload and pass it as metadata to adapter.send() so Telegram
routes notifications to the correct topic.

The env var export (HERMES_SESSION_THREAD_ID in _set_session_env /
_clear_session_env) already existed on main — this commit adds the
missing watcher plumbing.

Co-authored-by: spanishflu-est1918 <spanishflu-est1918@users.noreply.github.com>
											
										
										
											2026-03-15 23:01:57 -07:00
+								                            send_meta = {"thread_id": thread_id} if thread_id else None
 								                            await adapter.send(chat_id, message_text, metadata=send_meta)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                        except Exception as e:
 								                            logger.error("Watcher delivery error: %s", e)
 								                break
 								            elif has_new_output and notify_mode == "all":
 								                # New output available -- deliver status update (only in "all" mode)
 								                new_output = session.output_buffer[-500:] if session.output_buffer else ""
 								                message_text = (
 								                    f"[Background process {session_id} is still running~ "
 								                    f"New output:\n{new_output}]"
 								                )
 								                adapter = None
 								                for p, a in self.adapters.items():
 								                    if p.value == platform_name:
 								                        adapter = a
 								                        break
 								                if adapter and chat_id:
 								                    try:
-												fix: route background process watcher notifications to Telegram forum topics (#1481)

Salvaged from PR #1146 by spanishflu-est1918.

Background process progress/completion messages were sent with only
chat_id, landing in the general topic instead of the originating forum
topic. Thread the thread_id from HERMES_SESSION_THREAD_ID through the
watcher payload and pass it as metadata to adapter.send() so Telegram
routes notifications to the correct topic.

The env var export (HERMES_SESSION_THREAD_ID in _set_session_env /
_clear_session_env) already existed on main — this commit adds the
missing watcher plumbing.

Co-authored-by: spanishflu-est1918 <spanishflu-est1918@users.noreply.github.com>
											
										
										
											2026-03-15 23:01:57 -07:00
+								                        send_meta = {"thread_id": thread_id} if thread_id else None
 								                        await adapter.send(chat_id, message_text, metadata=send_meta)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                    except Exception as e:
 								                        logger.error("Watcher delivery error: %s", e)
 								        logger.debug("Process watcher ended: %s", session_id)
-												fix(gateway): cap interrupt recursion depth to prevent resource exhaustion (#816)

* fix: prevent infinite 400 failure loop on context overflow (#1630)

When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message.  This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error.  Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.

Three-layer fix:

1. run_agent.py — Fallback heuristic: when a 400 error has a very short
   generic message AND the session is large (>40% of context or >80
   messages), treat it as a probable context overflow and trigger
   compression instead of aborting.

2. run_agent.py + gateway/run.py — Don't persist failed messages:
   when the agent returns failed=True before generating any response,
   skip writing the user's message to the transcript/DB. This prevents
   the session from growing on each failure.

3. gateway/run.py — Smarter error messages: detect context-overflow
   failures and suggest /compact or /reset specifically, instead of a
   generic 'try again' that will fail identically.

* fix(skills): detect prompt injection patterns and block cache file reads

Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):

1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
   (index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
   was the original injection vector — untrusted skill descriptions
   in the catalog contained adversarial text that the model executed.

2. skill_view: warns when skills are loaded from outside the trusted
   ~/.hermes/skills/ directory, and detects common injection patterns
   in skill content ("ignore previous instructions", "<system>", etc.).

Cherry-picked from PR #1562 by ygd58.

* fix(tools): chunk long messages in send_message_tool before dispatch (#1552)

Long messages sent via send_message tool or cron delivery silently
failed when exceeding platform limits. Gateway adapters handle this
via truncate_message(), but the standalone senders in send_message_tool
bypassed that entirely.

- Apply truncate_message() chunking in _send_to_platform() before
  dispatching to individual platform senders
- Remove naive message[i:i+2000] character split in _send_discord()
  in favor of centralized smart splitting
- Attach media files to last chunk only for Telegram
- Add regression tests for chunking and media placement

Cherry-picked from PR #1557 by llbn.

* fix(approval): show full command in dangerous command approval (#1553)

Previously the command was truncated to 80 chars in CLI (with a
[v]iew full option), 500 chars in Discord embeds, and missing entirely
in Telegram/Slack approval messages. Now the full command is always
displayed everywhere:

- CLI: removed 80-char truncation and [v]iew full menu option
- Gateway (TG/Slack): approval_required message includes full command
  in a code block
- Discord: embed shows full command up to 4096-char limit
- Windows: skip SIGALRM-based test timeout (Unix-only)
- Updated tests: replaced view-flow tests with direct approval tests

Cherry-picked from PR #1566 by crazywriter1.

* fix(cli): flush stdout during agent loop to prevent macOS display freeze (#1624)

The interrupt polling loop in chat() waited on the queue without
invalidating the prompt_toolkit renderer. On macOS, the StdoutProxy
buffer only flushed on input events, causing the CLI to appear frozen
during tool execution until the user typed a key.

Fix: call _invalidate() on each queue timeout (every ~100ms, throttled
to 150ms) to force the renderer to flush buffered agent output.

* fix(claw): warn when API keys are skipped during OpenClaw migration (#1580)

When --migrate-secrets is not passed (the default), API keys like
OPENROUTER_API_KEY are silently skipped with no warning. Users don't
realize their keys weren't migrated until the agent fails to connect.

Add a post-migration warning with actionable instructions: either
re-run with --migrate-secrets or add the key manually via
hermes config set.

Cherry-picked from PR #1593 by ygd58.

* fix(security): block sandbox backend creds from subprocess env (#1264)

Add Modal and Daytona sandbox credentials to the subprocess env
blocklist so they're not leaked to agent terminal sessions via
printenv/env.

Cherry-picked from PR #1571 by ygd58.

* fix(gateway): cap interrupt recursion depth to prevent resource exhaustion (#816)

When a user sends multiple messages while the agent keeps failing,
_run_agent() calls itself recursively with no depth limit. This can
exhaust stack/memory if the agent is in a failure loop.

Add _MAX_INTERRUPT_DEPTH = 3. When exceeded, the pending message is
logged and the current result is returned instead of recursing deeper.

The log handler duplication bug described in #816 was already fixed
separately (AIAgent.__init__ deduplicates handlers).

---------

Co-authored-by: buray <ygd58@users.noreply.github.com>
Co-authored-by: lbn <llbn@users.noreply.github.com>
Co-authored-by: crazywriter1 <53251494+crazywriter1@users.noreply.github.com>
											
										
										
											2026-03-17 02:23:07 -07:00
+								    _MAX_INTERRUPT_DEPTH = 3  # Cap recursive interrupt handling (#816)
-												feat(gateway): cache AIAgent per session for prompt caching

The gateway created a fresh AIAgent per message, rebuilding the system
prompt (including memory, skills, context files) every turn. This broke
prompt prefix caching — providers like Anthropic charge ~10x more for
uncached prefixes.

Now caches AIAgent instances per session_key with a config signature.
The cached agent is reused across messages in the same session,
preserving the frozen system prompt and tool schemas. Cache is
invalidated when:
- Config changes (model, provider, toolsets, reasoning, ephemeral
  prompt) — detected via signature mismatch
- /new, /reset, /clear — explicit session reset
- /model — global model change clears all cached agents
- /reasoning — global reasoning change clears all cached agents

Per-message state (callbacks, stream consumers, progress queues) is
set on the agent instance before each run_conversation() call.

This matches CLI behavior where a single AIAgent lives across all turns
in a session, with _cached_system_prompt built once and reused.

											
										
										
											2026-03-21 13:07:08 -07:00
+								    @staticmethod
 								    def _agent_config_signature(
 								        model: str,
 								        runtime: dict,
 								        enabled_toolsets: list,
 								        ephemeral_prompt: str,
 								    ) -> str:
 								        """Compute a stable string key from agent config values.
 								        When this signature changes between messages, the cached AIAgent is
 								        discarded and rebuilt.  When it stays the same, the cached agent is
 								        reused — preserving the frozen system prompt and tool schemas for
 								        prompt cache hits.
 								        """
 								        import hashlib, json as _j
-												fix(gateway): fingerprint full auth token in agent cache signature (#3247)

Previously _agent_config_signature() used only the first 8 characters of
the API key, which causes false cache hits for JWT/OAuth tokens that share
a common prefix (e.g. 'eyJhbGci'). This led to cross-account cache
collisions when switching OAuth accounts in multi-user gateway deployments.

Replace the 8-char prefix with a SHA-256 hash of the full key so the
signature is unique per credential while keeping secrets out of the
cache key.

Salvaged from PR #3117 by EmpireOperating.

Co-authored-by: EmpireOperating <EmpireOperating@users.noreply.github.com>
											
										
										
											2026-03-26 13:19:43 -07:00
 								        # Fingerprint the FULL credential string instead of using a short
 								        # prefix. OAuth/JWT-style tokens frequently share a common prefix
 								        # (e.g. "eyJhbGci"), which can cause false cache hits across auth
 								        # switches if only the first few characters are considered.
 								        _api_key = str(runtime.get("api_key", "") or "")
 								        _api_key_fingerprint = hashlib.sha256(_api_key.encode()).hexdigest() if _api_key else ""
-												feat(gateway): cache AIAgent per session for prompt caching

The gateway created a fresh AIAgent per message, rebuilding the system
prompt (including memory, skills, context files) every turn. This broke
prompt prefix caching — providers like Anthropic charge ~10x more for
uncached prefixes.

Now caches AIAgent instances per session_key with a config signature.
The cached agent is reused across messages in the same session,
preserving the frozen system prompt and tool schemas. Cache is
invalidated when:
- Config changes (model, provider, toolsets, reasoning, ephemeral
  prompt) — detected via signature mismatch
- /new, /reset, /clear — explicit session reset
- /model — global model change clears all cached agents
- /reasoning — global reasoning change clears all cached agents

Per-message state (callbacks, stream consumers, progress queues) is
set on the agent instance before each run_conversation() call.

This matches CLI behavior where a single AIAgent lives across all turns
in a session, with _cached_system_prompt built once and reused.

											
										
										
											2026-03-21 13:07:08 -07:00
+								        blob = _j.dumps(
 								            [
 								                model,
-												fix(gateway): fingerprint full auth token in agent cache signature (#3247)

Previously _agent_config_signature() used only the first 8 characters of
the API key, which causes false cache hits for JWT/OAuth tokens that share
a common prefix (e.g. 'eyJhbGci'). This led to cross-account cache
collisions when switching OAuth accounts in multi-user gateway deployments.

Replace the 8-char prefix with a SHA-256 hash of the full key so the
signature is unique per credential while keeping secrets out of the
cache key.

Salvaged from PR #3117 by EmpireOperating.

Co-authored-by: EmpireOperating <EmpireOperating@users.noreply.github.com>
											
										
										
											2026-03-26 13:19:43 -07:00
+								                _api_key_fingerprint,
-												feat(gateway): cache AIAgent per session for prompt caching

The gateway created a fresh AIAgent per message, rebuilding the system
prompt (including memory, skills, context files) every turn. This broke
prompt prefix caching — providers like Anthropic charge ~10x more for
uncached prefixes.

Now caches AIAgent instances per session_key with a config signature.
The cached agent is reused across messages in the same session,
preserving the frozen system prompt and tool schemas. Cache is
invalidated when:
- Config changes (model, provider, toolsets, reasoning, ephemeral
  prompt) — detected via signature mismatch
- /new, /reset, /clear — explicit session reset
- /model — global model change clears all cached agents
- /reasoning — global reasoning change clears all cached agents

Per-message state (callbacks, stream consumers, progress queues) is
set on the agent instance before each run_conversation() call.

This matches CLI behavior where a single AIAgent lives across all turns
in a session, with _cached_system_prompt built once and reused.

											
										
										
											2026-03-21 13:07:08 -07:00
+								                runtime.get("base_url", ""),
 								                runtime.get("provider", ""),
 								                runtime.get("api_mode", ""),
 								                sorted(enabled_toolsets) if enabled_toolsets else [],
 								                # reasoning_config excluded — it's set per-message on the
 								                # cached agent and doesn't affect system prompt or tools.
 								                ephemeral_prompt or "",
 								            ],
 								            sort_keys=True,
 								            default=str,
 								        )
 								        return hashlib.sha256(blob.encode()).hexdigest()[:16]
 								    def _evict_cached_agent(self, session_key: str) -> None:
 								        """Remove a cached agent for a session (called on /new, /model, etc)."""
 								        _lock = getattr(self, "_agent_cache_lock", None)
 								        if _lock:
 								            with _lock:
 								                self._agent_cache.pop(session_key, None)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								    async def _run_agent(
 								        self,
 								        message: str,
 								        context_prompt: str,
 								        history: List[Dict[str, Any]],
 								        source: SessionSource,
 								        session_id: str,
-												fix(gateway): cap interrupt recursion depth to prevent resource exhaustion (#816)

* fix: prevent infinite 400 failure loop on context overflow (#1630)

When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message.  This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error.  Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.

Three-layer fix:

1. run_agent.py — Fallback heuristic: when a 400 error has a very short
   generic message AND the session is large (>40% of context or >80
   messages), treat it as a probable context overflow and trigger
   compression instead of aborting.

2. run_agent.py + gateway/run.py — Don't persist failed messages:
   when the agent returns failed=True before generating any response,
   skip writing the user's message to the transcript/DB. This prevents
   the session from growing on each failure.

3. gateway/run.py — Smarter error messages: detect context-overflow
   failures and suggest /compact or /reset specifically, instead of a
   generic 'try again' that will fail identically.

* fix(skills): detect prompt injection patterns and block cache file reads

Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):

1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
   (index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
   was the original injection vector — untrusted skill descriptions
   in the catalog contained adversarial text that the model executed.

2. skill_view: warns when skills are loaded from outside the trusted
   ~/.hermes/skills/ directory, and detects common injection patterns
   in skill content ("ignore previous instructions", "<system>", etc.).

Cherry-picked from PR #1562 by ygd58.

* fix(tools): chunk long messages in send_message_tool before dispatch (#1552)

Long messages sent via send_message tool or cron delivery silently
failed when exceeding platform limits. Gateway adapters handle this
via truncate_message(), but the standalone senders in send_message_tool
bypassed that entirely.

- Apply truncate_message() chunking in _send_to_platform() before
  dispatching to individual platform senders
- Remove naive message[i:i+2000] character split in _send_discord()
  in favor of centralized smart splitting
- Attach media files to last chunk only for Telegram
- Add regression tests for chunking and media placement

Cherry-picked from PR #1557 by llbn.

* fix(approval): show full command in dangerous command approval (#1553)

Previously the command was truncated to 80 chars in CLI (with a
[v]iew full option), 500 chars in Discord embeds, and missing entirely
in Telegram/Slack approval messages. Now the full command is always
displayed everywhere:

- CLI: removed 80-char truncation and [v]iew full menu option
- Gateway (TG/Slack): approval_required message includes full command
  in a code block
- Discord: embed shows full command up to 4096-char limit
- Windows: skip SIGALRM-based test timeout (Unix-only)
- Updated tests: replaced view-flow tests with direct approval tests

Cherry-picked from PR #1566 by crazywriter1.

* fix(cli): flush stdout during agent loop to prevent macOS display freeze (#1624)

The interrupt polling loop in chat() waited on the queue without
invalidating the prompt_toolkit renderer. On macOS, the StdoutProxy
buffer only flushed on input events, causing the CLI to appear frozen
during tool execution until the user typed a key.

Fix: call _invalidate() on each queue timeout (every ~100ms, throttled
to 150ms) to force the renderer to flush buffered agent output.

* fix(claw): warn when API keys are skipped during OpenClaw migration (#1580)

When --migrate-secrets is not passed (the default), API keys like
OPENROUTER_API_KEY are silently skipped with no warning. Users don't
realize their keys weren't migrated until the agent fails to connect.

Add a post-migration warning with actionable instructions: either
re-run with --migrate-secrets or add the key manually via
hermes config set.

Cherry-picked from PR #1593 by ygd58.

* fix(security): block sandbox backend creds from subprocess env (#1264)

Add Modal and Daytona sandbox credentials to the subprocess env
blocklist so they're not leaked to agent terminal sessions via
printenv/env.

Cherry-picked from PR #1571 by ygd58.

* fix(gateway): cap interrupt recursion depth to prevent resource exhaustion (#816)

When a user sends multiple messages while the agent keeps failing,
_run_agent() calls itself recursively with no depth limit. This can
exhaust stack/memory if the agent is in a failure loop.

Add _MAX_INTERRUPT_DEPTH = 3. When exceeded, the pending message is
logged and the current result is returned instead of recursing deeper.

The log handler duplication bug described in #816 was already fixed
separately (AIAgent.__init__ deduplicates handlers).

---------

Co-authored-by: buray <ygd58@users.noreply.github.com>
Co-authored-by: lbn <llbn@users.noreply.github.com>
Co-authored-by: crazywriter1 <53251494+crazywriter1@users.noreply.github.com>
											
										
										
											2026-03-17 02:23:07 -07:00
+								        session_key: str = None,
 								        _interrupt_depth: int = 0,
-												fix(gateway/slack): send progress messages to correct thread (#3063)

Co-authored-by: Jneeee <jneeee@outlook.com>
											
										
										
											2026-03-25 15:51:15 -07:00
+								        event_message_id: Optional[str] = None,
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								    ) -> Dict[str, Any]:
 								        """
 								        Run the agent with the given message and context.
 								        Returns the full result dict from run_conversation, including:
 								          - "final_response": str (the text to send back)
 								          - "messages": list (full conversation including tool calls)
 								          - "api_calls": int
 								          - "completed": bool
 								        This is run in a thread pool to not block the event loop.
 								        Supports interruption via new messages.
 								        """
 								        from run_agent import AIAgent
 								        import queue
-												fix: MCP toolset resolution for runtime and config (#3252)

Gateway sessions had their own inline toolset resolution that only read
platform_toolsets from config, which never includes MCP server names.
MCP tools were discovered and registered but invisible to the model.

- Replace duplicated gateway toolset resolution in _run_agent() and
  _run_background_task() with calls to the shared _get_platform_tools()
- Extend _get_platform_tools() to include globally enabled MCP servers
  at runtime (include_default_mcp_servers=True), while config-editing
  flows use include_default_mcp_servers=False to avoid persisting
  implicit MCP defaults into platform_toolsets
- Add homeassistant to PLATFORMS dict (was missing, caused KeyError)
- Fix CLI entry point to use _get_platform_tools() as well, so MCP
  tools are visible in CLI mode too
- Remove redundant platform_key reassignment in _run_background_task

Co-authored-by: kshitijk4poor <kshitijk4poor@users.noreply.github.com>
											
										
										
											2026-03-26 13:39:41 -07:00
+								        user_config = _load_gateway_config()
 								        platform_key = _platform_config_key(source.platform)
 								        from hermes_cli.tools_config import _get_platform_tools
 								        enabled_toolsets = sorted(_get_platform_tools(user_config, platform_key))
-												feat: add voice conversation support and futuristic UI redesign

- Auto-TTS: voice messages get spoken response (audio first, then text)
- STT: Groq Whisper fallback when VOICE_TOOLS_OPENAI_KEY not set
- Futuristic UI: glassmorphism, centered container, purple theme, glow effects
- Voice bubble: custom waveform player with seek and progress
- Invisible TTS playback via play_tts() method (no audio file in chat)
- Add hermes-web toolset with full tool access
- Register Platform.WEB in toolset/config maps
- Update docs for voice conversation feature

											
										
										
											2026-03-11 20:16:57 +03:00
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        # Tool progress mode from config.yaml: "all", "new", "verbose", "off"
-												fix: YAML boolean handling for tool_progress config (#3300)

YAML 1.1 parses bare `off` as boolean False, which is falsy in
Python's `or` chain and silently falls through to the 'all' default.
Users setting `display.tool_progress: off` in config.yaml saw no
effect — tool progress stayed on.

Normalise False → 'off' before the or chain in both affected paths:
- gateway/run.py _run_agent() tool progress reader
- cli.py HermesCLI.__init__() tool_progress_mode

Reported by @gibbsoft in #2859. Closes #2859.
											
										
										
											2026-03-26 17:58:50 -07:00
+								        # Falls back to env vars for backward compatibility.
 								        # YAML 1.1 parses bare `off` as boolean False — normalise before
 								        # the `or` chain so it doesn't silently fall through to "all".
 								        _raw_tp = user_config.get("display", {}).get("tool_progress")
 								        if _raw_tp is False:
 								            _raw_tp = "off"
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        progress_mode = (
-												fix: YAML boolean handling for tool_progress config (#3300)

YAML 1.1 parses bare `off` as boolean False, which is falsy in
Python's `or` chain and silently falls through to the 'all' default.
Users setting `display.tool_progress: off` in config.yaml saw no
effect — tool progress stayed on.

Normalise False → 'off' before the or chain in both affected paths:
- gateway/run.py _run_agent() tool progress reader
- cli.py HermesCLI.__init__() tool_progress_mode

Reported by @gibbsoft in #2859. Closes #2859.
											
										
										
											2026-03-26 17:58:50 -07:00
+								            _raw_tp
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            or os.getenv("HERMES_TOOL_PROGRESS_MODE")
 								            or "all"
 								        )
 								        tool_progress_enabled = progress_mode != "off"
 								        # Queue for progress messages (thread-safe)
 								        progress_queue = queue.Queue() if tool_progress_enabled else None
 								        last_tool = [None]  # Mutable container for tracking in closure
 								        last_progress_msg = [None]  # Track last message for dedup
 								        repeat_count = [0]  # How many times the same message repeated
 								        def progress_callback(tool_name: str, preview: str = None, args: dict = None):
 								            """Callback invoked by agent when a tool is called."""
 								            if not progress_queue:
 								                return
 								            # "new" mode: only report when tool changes
 								            if progress_mode == "new" and tool_name == last_tool[0]:
 								                return
 								            last_tool[0] = tool_name
 								            # Build progress message with primary argument preview
-												feat(tools): centralize tool emoji metadata in registry + skin integration

- Add 'emoji' field to ToolEntry and 'get_emoji()' to ToolRegistry
- Add emoji= to all 50+ registry.register() calls across tool files
- Add get_tool_emoji() helper in agent/display.py with 3-tier resolution:
  skin override → registry default → hardcoded fallback
- Replace hardcoded emoji maps in run_agent.py, delegate_tool.py, and
  gateway/run.py with centralized get_tool_emoji() calls
- Add 'tool_emojis' field to SkinConfig so skins can override per-tool
  emojis (e.g. ares skin could use swords instead of wrenches)
- Add 11 tests (5 registry emoji, 6 display/skin integration)
- Update AGENTS.md skin docs table

Based on the approach from PR #1061 by ForgingAlex (emoji centralization
in registry). This salvage fixes several issues from the original:
- Does NOT split the cronjob tool (which would crash on missing schemas)
- Does NOT change image_generate toolset/requires_env/is_async
- Does NOT delete existing tests
- Completes the centralization (gateway/run.py was missed)
- Hooks into the skin system for full customizability

											
										
										
											2026-03-15 20:21:21 -07:00
+								            from agent.display import get_tool_emoji
 								            emoji = get_tool_emoji(tool_name, default="⚙️")
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
 								            # Verbose mode: show detailed arguments
 								            if progress_mode == "verbose" and args:
 								                import json as _json
 								                args_str = _json.dumps(args, ensure_ascii=False, default=str)
 								                if len(args_str) > 200:
 								                    args_str = args_str[:197] + "..."
 								                msg = f"{emoji} {tool_name}({list(args.keys())})\n{args_str}"
 								                progress_queue.put(msg)
 								                return
 								            if preview:
 								                # Truncate preview to keep messages clean
 								                if len(preview) > 80:
 								                    preview = preview[:77] + "..."
 								                msg = f"{emoji} {tool_name}: \"{preview}\""
 								            else:
 								                msg = f"{emoji} {tool_name}..."
 								            # Dedup: collapse consecutive identical progress messages.
 								            # Common with execute_code where models iterate with the same
 								            # code (same boilerplate imports → identical previews).
 								            if msg == last_progress_msg[0]:
 								                repeat_count[0] += 1
 								                # Update the last line in progress_lines with a counter
 								                # via a special "dedup" queue message.
 								                progress_queue.put(("__dedup__", msg, repeat_count[0]))
 								                return
 								            last_progress_msg[0] = msg
 								            repeat_count[0] = 0
 								            progress_queue.put(msg)
 								        # Background task to send progress messages
-												fix(gateway): scope progress thread fallback to Slack only (salvage #3414) (#3488)

* test(gateway): map fixture adapter by platform in progress threading tests

* fix(gateway): scope progress thread fallback to Slack only

---------

Co-authored-by: EmpireOperating <258363005+EmpireOperating@users.noreply.github.com>
											
										
										
											2026-03-27 22:37:53 -07:00
+								        # Accumulates tool lines into a single message that gets edited.
 								        #
 								        # Threading metadata is platform-specific:
 								        # - Slack DM threading needs event_message_id fallback (reply thread)
 								        # - Telegram uses message_thread_id only for forum topics; passing a
 								        #   normal DM/group message id as thread_id causes send failures
 								        # - Other platforms should use explicit source.thread_id only
 								        if source.platform == Platform.SLACK:
 								            _progress_thread_id = source.thread_id or event_message_id
 								        else:
 								            _progress_thread_id = source.thread_id
-												fix(gateway/slack): send progress messages to correct thread (#3063)

Co-authored-by: Jneeee <jneeee@outlook.com>
											
										
										
											2026-03-25 15:51:15 -07:00
+								        _progress_metadata = {"thread_id": _progress_thread_id} if _progress_thread_id else None
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
 								        async def send_progress_messages():
 								            if not progress_queue:
 								                return
 								            adapter = self.adapters.get(source.platform)
 								            if not adapter:
 								                return
 								            progress_lines = []      # Accumulated tool lines
 								            progress_msg_id = None   # ID of the progress message to edit
 								            can_edit = True          # False once an edit fails (platform doesn't support it)
 								            while True:
 								                try:
 								                    raw = progress_queue.get_nowait()
 								                    # Handle dedup messages: update last line with repeat counter
 								                    if isinstance(raw, tuple) and len(raw) == 3 and raw[0] == "__dedup__":
 								                        _, base_msg, count = raw
 								                        if progress_lines:
 								                            progress_lines[-1] = f"{base_msg} (×{count + 1})"
 								                        msg = progress_lines[-1] if progress_lines else base_msg
 								                    else:
 								                        msg = raw
 								                        progress_lines.append(msg)
 								                    if can_edit and progress_msg_id is not None:
 								                        # Try to edit the existing progress message
 								                        full_text = "\n".join(progress_lines)
 								                        result = await adapter.edit_message(
 								                            chat_id=source.chat_id,
 								                            message_id=progress_msg_id,
 								                            content=full_text,
 								                        )
 								                        if not result.success:
 								                            # Platform doesn't support editing — stop trying,
 								                            # send just this new line as a separate message
 								                            can_edit = False
 								                            await adapter.send(chat_id=source.chat_id, content=msg, metadata=_progress_metadata)
 								                    else:
 								                        if can_edit:
 								                            # First tool: send all accumulated text as new message
 								                            full_text = "\n".join(progress_lines)
 								                            result = await adapter.send(chat_id=source.chat_id, content=full_text, metadata=_progress_metadata)
 								                        else:
 								                            # Editing unsupported: send just this line
 								                            result = await adapter.send(chat_id=source.chat_id, content=msg, metadata=_progress_metadata)
 								                        if result.success and result.message_id:
 								                            progress_msg_id = result.message_id
 								                    # Restore typing indicator
 								                    await asyncio.sleep(0.3)
 								                    await adapter.send_typing(source.chat_id, metadata=_progress_metadata)
 								                except queue.Empty:
 								                    await asyncio.sleep(0.3)
 								                except asyncio.CancelledError:
 								                    # Drain remaining queued messages
 								                    while not progress_queue.empty():
 								                        try:
 								                            raw = progress_queue.get_nowait()
 								                            if isinstance(raw, tuple) and len(raw) == 3 and raw[0] == "__dedup__":
 								                                _, base_msg, count = raw
 								                                if progress_lines:
 								                                    progress_lines[-1] = f"{base_msg} (×{count + 1})"
 								                            else:
 								                                progress_lines.append(raw)
 								                        except Exception:
 								                            break
 								                    # Final edit with all remaining tools (only if editing works)
 								                    if can_edit and progress_lines and progress_msg_id:
 								                        full_text = "\n".join(progress_lines)
 								                        try:
 								                            await adapter.edit_message(
 								                                chat_id=source.chat_id,
 								                                message_id=progress_msg_id,
 								                                content=full_text,
 								                            )
 								                        except Exception:
 								                            pass
 								                    return
 								                except Exception as e:
 								                    logger.error("Progress message error: %s", e)
 								                    await asyncio.sleep(1)
 								        # We need to share the agent instance for interrupt support
 								        agent_holder = [None]  # Mutable container for the agent instance
 								        result_holder = [None]  # Mutable container for the result
 								        tools_holder = [None]   # Mutable container for the tool definitions
-												feat(gateway): streaming token delivery — StreamingConfig, GatewayStreamConsumer, already_sent

Stage 3 of streaming support. Gateway now streams tokens to messaging platforms:

- StreamingConfig dataclass (enabled, transport, edit_interval, buffer_threshold, cursor)
  on GatewayConfig with from_dict/to_dict serialization
- GatewayStreamConsumer: async queue-based consumer that progressively edits
  a single message on the target platform (edit transport)
- on_delta() → queue → run() async task → send_or_edit() with rate limiting
- already_sent propagation: when streaming delivered the response, handler
  returns None so base adapter skips duplicate send()
- stream_delta_callback wired into AIAgent constructor in _run_agent
- Consumer lifecycle: started as asyncio task, awaited with timeout in finally

Config (config.yaml):
  streaming:
    enabled: true
    transport: edit      # progressive editMessageText
    edit_interval: 0.3   # seconds between edits
    buffer_threshold: 40 # chars before forcing flush
    cursor: ' ▉'

Credit: jobless0x (#774, #1312), OutThisLife (#798), clicksingh (#697).

											
										
										
											2026-03-16 05:52:42 -07:00
+								        stream_consumer_holder = [None]  # Mutable container for stream consumer
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
 								        # Bridge sync step_callback → async hooks.emit for agent:step events
 								        _loop_for_step = asyncio.get_event_loop()
 								        _hooks_ref = self.hooks
 								        def _step_callback_sync(iteration: int, tool_names: list) -> None:
 								            try:
 								                asyncio.run_coroutine_threadsafe(
 								                    _hooks_ref.emit("agent:step", {
 								                        "platform": source.platform.value if source.platform else "",
 								                        "user_id": source.user_id,
 								                        "session_id": session_id,
 								                        "iteration": iteration,
 								                        "tool_names": tool_names,
 								                    }),
 								                    _loop_for_step,
 								                )
 								            except Exception as _e:
 								                logger.debug("agent:step hook error: %s", _e)
-												feat: context pressure warnings for CLI and gateway (#2159)

* feat: context pressure warnings for CLI and gateway

User-facing notifications as context approaches the compaction threshold.
Warnings fire at 60% and 85% of the way to compaction — relative to
the configured compression threshold, not the raw context window.

CLI: Formatted line with a progress bar showing distance to compaction.
Cyan at 60% (approaching), bold yellow at 85% (imminent).

  ◐ context ▰▰▰▰▰▰▰▰▰▰▰▰▱▱▱▱▱▱▱▱ 60% to compaction  100k threshold (50%) · approaching compaction
  ⚠ context ▰▰▰▰▰▰▰▰▰▰▰▰▰▰▰▰▰▱▱▱ 85% to compaction  100k threshold (50%) · compaction imminent

Gateway: Plain-text notification sent to the user's chat via the new
status_callback mechanism (asyncio.run_coroutine_threadsafe bridge,
same pattern as step_callback).

Does NOT inject into the message stream. The LLM never sees these
warnings. Flags reset after each compaction cycle.

Files changed:
- agent/display.py — format_context_pressure(), format_context_pressure_gateway()
- run_agent.py — status_callback param, _context_50/70_warned flags,
  _emit_context_pressure(), flag reset in _compress_context()
- gateway/run.py — _status_callback_sync bridge, wired to AIAgent
- tests/test_context_pressure.py — 23 tests

* Merge remote-tracking branch 'origin/main' into hermes/hermes-7ea545bf

---------

Co-authored-by: Test <test@test.com>
											
										
										
											2026-03-20 08:37:36 -07:00
+								        # Bridge sync status_callback → async adapter.send for context pressure
 								        _status_adapter = self.adapters.get(source.platform)
 								        _status_chat_id = source.chat_id
-												fix(gateway/slack): send progress messages to correct thread (#3063)

Co-authored-by: Jneeee <jneeee@outlook.com>
											
										
										
											2026-03-25 15:51:15 -07:00
+								        _status_thread_metadata = {"thread_id": _progress_thread_id} if _progress_thread_id else None
-												feat: context pressure warnings for CLI and gateway (#2159)

* feat: context pressure warnings for CLI and gateway

User-facing notifications as context approaches the compaction threshold.
Warnings fire at 60% and 85% of the way to compaction — relative to
the configured compression threshold, not the raw context window.

CLI: Formatted line with a progress bar showing distance to compaction.
Cyan at 60% (approaching), bold yellow at 85% (imminent).

  ◐ context ▰▰▰▰▰▰▰▰▰▰▰▰▱▱▱▱▱▱▱▱ 60% to compaction  100k threshold (50%) · approaching compaction
  ⚠ context ▰▰▰▰▰▰▰▰▰▰▰▰▰▰▰▰▰▱▱▱ 85% to compaction  100k threshold (50%) · compaction imminent

Gateway: Plain-text notification sent to the user's chat via the new
status_callback mechanism (asyncio.run_coroutine_threadsafe bridge,
same pattern as step_callback).

Does NOT inject into the message stream. The LLM never sees these
warnings. Flags reset after each compaction cycle.

Files changed:
- agent/display.py — format_context_pressure(), format_context_pressure_gateway()
- run_agent.py — status_callback param, _context_50/70_warned flags,
  _emit_context_pressure(), flag reset in _compress_context()
- gateway/run.py — _status_callback_sync bridge, wired to AIAgent
- tests/test_context_pressure.py — 23 tests

* Merge remote-tracking branch 'origin/main' into hermes/hermes-7ea545bf

---------

Co-authored-by: Test <test@test.com>
											
										
										
											2026-03-20 08:37:36 -07:00
 								        def _status_callback_sync(event_type: str, message: str) -> None:
 								            if not _status_adapter:
 								                return
 								            try:
 								                asyncio.run_coroutine_threadsafe(
 								                    _status_adapter.send(
 								                        _status_chat_id,
 								                        message,
 								                        metadata=_status_thread_metadata,
 								                    ),
 								                    _loop_for_step,
 								                )
 								            except Exception as _e:
 								                logger.debug("status_callback error (%s): %s", event_type, _e)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        def run_sync():
 								            # Pass session_key to process registry via env var so background
 								            # processes can be mapped back to this gateway session
 								            os.environ["HERMES_SESSION_KEY"] = session_key or ""
 								            # Read from env var or use default (same as CLI)
 								            max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "90"))
 								            # Map platform enum to the platform hint key the agent understands.
 								            # Platform.LOCAL ("local") maps to "cli"; others pass through as-is.
 								            platform_key = "cli" if source.platform == Platform.LOCAL else source.platform.value
 								            # Combine platform context with user-configured ephemeral system prompt
 								            combined_ephemeral = context_prompt or ""
 								            if self._ephemeral_system_prompt:
 								                combined_ephemeral = (combined_ephemeral + "\n\n" + self._ephemeral_system_prompt).strip()
 								            # Re-read .env and config for fresh credentials (gateway is long-lived,
 								            # keys may change without restart).
 								            try:
 								                load_dotenv(_env_path, override=True, encoding="utf-8")
 								            except UnicodeDecodeError:
 								                load_dotenv(_env_path, override=True, encoding="latin-1")
 								            except Exception:
 								                pass
-												fix: MCP toolset resolution for runtime and config (#3252)

Gateway sessions had their own inline toolset resolution that only read
platform_toolsets from config, which never includes MCP server names.
MCP tools were discovered and registered but invisible to the model.

- Replace duplicated gateway toolset resolution in _run_agent() and
  _run_background_task() with calls to the shared _get_platform_tools()
- Extend _get_platform_tools() to include globally enabled MCP servers
  at runtime (include_default_mcp_servers=True), while config-editing
  flows use include_default_mcp_servers=False to avoid persisting
  implicit MCP defaults into platform_toolsets
- Add homeassistant to PLATFORMS dict (was missing, caused KeyError)
- Fix CLI entry point to use _get_platform_tools() as well, so MCP
  tools are visible in CLI mode too
- Remove redundant platform_key reassignment in _run_background_task

Co-authored-by: kshitijk4poor <kshitijk4poor@users.noreply.github.com>
											
										
										
											2026-03-26 13:39:41 -07:00
+								            model = _resolve_gateway_model(user_config)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
 								            try:
 								                runtime_kwargs = _resolve_runtime_agent_kwargs()
 								            except Exception as exc:
 								                return {
 								                    "final_response": f"⚠️ Provider authentication failed: {exc}",
 								                    "messages": [],
 								                    "api_calls": 0,
 								                    "tools": [],
 								                }
 								            pr = self._provider_routing
 								            honcho_manager, honcho_config = self._get_or_create_gateway_honcho(session_key)
-												feat(gateway): add reasoning hot reload

Add a /reasoning command across gateway adapters so users can
inspect or change reasoning effort without editing config by hand.

Reload reasoning settings from config.yaml before each agent run,
including background tasks, so the next message picks up the new
value consistently.

											
										
										
											2026-03-11 22:12:11 +08:00
+								            reasoning_config = self._load_reasoning_config()
 								            self._reasoning_config = reasoning_config
-												feat(gateway): streaming token delivery — StreamingConfig, GatewayStreamConsumer, already_sent

Stage 3 of streaming support. Gateway now streams tokens to messaging platforms:

- StreamingConfig dataclass (enabled, transport, edit_interval, buffer_threshold, cursor)
  on GatewayConfig with from_dict/to_dict serialization
- GatewayStreamConsumer: async queue-based consumer that progressively edits
  a single message on the target platform (edit transport)
- on_delta() → queue → run() async task → send_or_edit() with rate limiting
- already_sent propagation: when streaming delivered the response, handler
  returns None so base adapter skips duplicate send()
- stream_delta_callback wired into AIAgent constructor in _run_agent
- Consumer lifecycle: started as asyncio task, awaited with timeout in finally

Config (config.yaml):
  streaming:
    enabled: true
    transport: edit      # progressive editMessageText
    edit_interval: 0.3   # seconds between edits
    buffer_threshold: 40 # chars before forcing flush
    cursor: ' ▉'

Credit: jobless0x (#774, #1312), OutThisLife (#798), clicksingh (#697).

											
										
										
											2026-03-16 05:52:42 -07:00
+								            # Set up streaming consumer if enabled
 								            _stream_consumer = None
 								            _stream_delta_cb = None
 								            _scfg = getattr(getattr(self, 'config', None), 'streaming', None)
 								            if _scfg is None:
 								                from gateway.config import StreamingConfig
 								                _scfg = StreamingConfig()
 								            if _scfg.enabled and _scfg.transport != "off":
 								                try:
 								                    from gateway.stream_consumer import GatewayStreamConsumer, StreamConsumerConfig
 								                    _adapter = self.adapters.get(source.platform)
 								                    if _adapter:
 								                        _consumer_cfg = StreamConsumerConfig(
 								                            edit_interval=_scfg.edit_interval,
 								                            buffer_threshold=_scfg.buffer_threshold,
 								                            cursor=_scfg.cursor,
 								                        )
 								                        _stream_consumer = GatewayStreamConsumer(
 								                            adapter=_adapter,
 								                            chat_id=source.chat_id,
 								                            config=_consumer_cfg,
-												fix(gateway/slack): send progress messages to correct thread (#3063)

Co-authored-by: Jneeee <jneeee@outlook.com>
											
										
										
											2026-03-25 15:51:15 -07:00
+								                            metadata={"thread_id": _progress_thread_id} if _progress_thread_id else None,
-												feat(gateway): streaming token delivery — StreamingConfig, GatewayStreamConsumer, already_sent

Stage 3 of streaming support. Gateway now streams tokens to messaging platforms:

- StreamingConfig dataclass (enabled, transport, edit_interval, buffer_threshold, cursor)
  on GatewayConfig with from_dict/to_dict serialization
- GatewayStreamConsumer: async queue-based consumer that progressively edits
  a single message on the target platform (edit transport)
- on_delta() → queue → run() async task → send_or_edit() with rate limiting
- already_sent propagation: when streaming delivered the response, handler
  returns None so base adapter skips duplicate send()
- stream_delta_callback wired into AIAgent constructor in _run_agent
- Consumer lifecycle: started as asyncio task, awaited with timeout in finally

Config (config.yaml):
  streaming:
    enabled: true
    transport: edit      # progressive editMessageText
    edit_interval: 0.3   # seconds between edits
    buffer_threshold: 40 # chars before forcing flush
    cursor: ' ▉'

Credit: jobless0x (#774, #1312), OutThisLife (#798), clicksingh (#697).

											
										
										
											2026-03-16 05:52:42 -07:00
+								                        )
 								                        _stream_delta_cb = _stream_consumer.on_delta
 								                        stream_consumer_holder[0] = _stream_consumer
 								                except Exception as _sc_err:
 								                    logger.debug("Could not set up stream consumer: %s", _sc_err)
-												fix: hermes update causes dual gateways on macOS (launchd) (#1567)

* feat: add optional smart model routing

Add a conservative cheap-vs-strong routing option that can send very short/simple turns to a cheaper model across providers while keeping the primary model for complex work. Wire it through CLI, gateway, and cron, and document the config.yaml workflow.

* fix(gateway): remove recursive ExecStop from systemd units, extend TimeoutStopSec to 60s

* fix(gateway): avoid recursive ExecStop in user systemd unit

* fix: extend ExecStop removal and TimeoutStopSec=60 to system unit

The cherry-picked PR #1448 fix only covered the user systemd unit.
The system unit had the same TimeoutStopSec=15 and could benefit
from the same 60s timeout for clean shutdown. Also adds a regression
test for the system unit.

---------

Co-authored-by: Ninja <ninja@local>

* feat(skills): add blender-mcp optional skill for 3D modeling

Control a running Blender instance from Hermes via socket connection
to the blender-mcp addon (port 9876). Supports creating 3D objects,
materials, animations, and running arbitrary bpy code.

Placed in optional-skills/ since it requires Blender 4.3+ desktop
with a third-party addon manually started each session.

* feat(acp): support slash commands in ACP adapter (#1532)

Adds /help, /model, /tools, /context, /reset, /compact, /version
to the ACP adapter (VS Code, Zed, JetBrains). Commands are handled
directly in the server without instantiating the TUI — each command
queries agent/session state and returns plain text.

Unrecognized /commands fall through to the LLM as normal messages.

/model uses detect_provider_for_model() for auto-detection when
switching models, matching the CLI and gateway behavior.

Fixes #1402

* fix(logging): improve error logging in session search tool (#1533)

* fix(gateway): restart on retryable startup failures (#1517)

* feat(email): add skip_attachments option via config.yaml

* feat(email): add skip_attachments option via config.yaml

Adds a config.yaml-driven option to skip email attachments in the
gateway email adapter. Useful for malware protection and bandwidth
savings.

Configure in config.yaml:
  platforms:
    email:
      skip_attachments: true

Based on PR #1521 by @an420eth, changed from env var to config.yaml
(via PlatformConfig.extra) to match the project's config-first pattern.

* docs: document skip_attachments option for email adapter

* fix(telegram): retry on transient TLS failures during connect and send

Add exponential-backoff retry (3 attempts) around initialize() to
handle transient TLS resets during gateway startup. Also catches
TimedOut and OSError in addition to NetworkError.

Add exponential-backoff retry (3 attempts) around send_message() for
NetworkError during message delivery, wrapping the existing Markdown
fallback logic.

Both imports are guarded with try/except ImportError for test
environments where telegram is mocked.

Based on PR #1527 by cmd8. Closes #1526.

* feat: permissive block_anchor thresholds and unicode normalization (#1539)

Salvaged from PR #1528 by an420eth. Closes #517.

Improves _strategy_block_anchor in fuzzy_match.py:
- Add unicode normalization (smart quotes, em/en-dashes, ellipsis,
  non-breaking spaces → ASCII) so LLM-produced unicode artifacts
  don't break anchor line matching
- Lower thresholds: 0.10 for unique matches (was 0.70), 0.30 for
  multiple candidates — if first/last lines match exactly, the
  block is almost certainly correct
- Use original (non-normalized) content for offset calculation to
  preserve correct character positions

Tested: 3 new scenarios fixed (em-dash anchors, non-breaking space
anchors, very-low-similarity unique matches), zero regressions on
all 9 existing fuzzy match tests.

Co-authored-by: an420eth <an420eth@users.noreply.github.com>

* feat(cli): add file path autocomplete in the input prompt (#1545)

When typing a path-like token (./  ../  ~/  /  or containing /),
the CLI now shows filesystem completions in the dropdown menu.
Directories show a trailing slash and 'dir' label; files show
their size. Completions are case-insensitive and capped at 30
entries.

Triggered by tokens like:
  edit ./src/ma     → shows ./src/main.py, ./src/manifest.json, ...
  check ~/doc       → shows ~/docs/, ~/documents/, ...
  read /etc/hos     → shows /etc/hosts, /etc/hostname, ...
  open tools/reg    → shows tools/registry.py

Slash command autocomplete (/help, /model, etc.) is unaffected —
it still triggers when the input starts with /.

Inspired by OpenCode PR #145 (file path completion menu).

Implementation:
- hermes_cli/commands.py: _extract_path_word() detects path-like
  tokens, _path_completions() yields filesystem Completions with
  size labels, get_completions() routes to paths vs slash commands
- tests/hermes_cli/test_path_completion.py: 26 tests covering
  path extraction, prefix filtering, directory markers, home
  expansion, case-insensitivity, integration with slash commands

* feat(privacy): redact PII from LLM context when privacy.redact_pii is enabled

Add privacy.redact_pii config option (boolean, default false). When
enabled, the gateway redacts personally identifiable information from
the system prompt before sending it to the LLM provider:

- Phone numbers (user IDs on WhatsApp/Signal) → hashed to user_<sha256>
- User IDs → hashed to user_<sha256>
- Chat IDs → numeric portion hashed, platform prefix preserved
- Home channel IDs → hashed
- Names/usernames → NOT affected (user-chosen, publicly visible)

Hashes are deterministic (same user → same hash) so the model can
still distinguish users in group chats. Routing and delivery use
the original values internally — redaction only affects LLM context.

Inspired by OpenClaw PR #47959.

* fix(privacy): skip PII redaction on Discord/Slack (mentions need real IDs)

Discord uses <@user_id> for mentions and Slack uses <@U12345> — the LLM
needs the real ID to tag users. Redaction now only applies to WhatsApp,
Signal, and Telegram where IDs are pure routing metadata.

Add 4 platform-specific tests covering Discord, WhatsApp, Signal, Slack.

* feat: smart approvals + /stop command (inspired by OpenAI Codex)

* feat: smart approvals — LLM-based risk assessment for dangerous commands

Adds a 'smart' approval mode that uses the auxiliary LLM to assess
whether a flagged command is genuinely dangerous or a false positive,
auto-approving low-risk commands without prompting the user.

Inspired by OpenAI Codex's Smart Approvals guardian subagent
(openai/codex#13860).

Config (config.yaml):
  approvals:
    mode: manual   # manual (default), smart, off

Modes:
- manual — current behavior, always prompt the user
- smart  — aux LLM evaluates risk: APPROVE (auto-allow), DENY (block),
           or ESCALATE (fall through to manual prompt)
- off    — skip all approval prompts (equivalent to --yolo)

When smart mode auto-approves, the pattern gets session-level approval
so subsequent uses of the same pattern don't trigger another LLM call.
When it denies, the command is blocked without user prompt. When
uncertain, it escalates to the normal manual approval flow.

The LLM prompt is carefully scoped: it sees only the command text and
the flagged reason, assesses actual risk vs false positive, and returns
a single-word verdict.

* feat: make smart approval model configurable via config.yaml

Adds auxiliary.approval section to config.yaml with the same
provider/model/base_url/api_key pattern as other aux tasks (vision,
web_extract, compression, etc.).

Config:
  auxiliary:
    approval:
      provider: auto
      model: ''        # fast/cheap model recommended
      base_url: ''
      api_key: ''

Bridged to env vars in both CLI and gateway paths so the aux client
picks them up automatically.

* feat: add /stop command to kill all background processes

Adds a /stop slash command that kills all running background processes
at once. Currently users have to process(list) then process(kill) for
each one individually.

Inspired by OpenAI Codex's separation of interrupt (Ctrl+C stops current
turn) from /stop (cleans up background processes). See openai/codex#14602.

Ctrl+C continues to only interrupt the active agent turn — background
dev servers, watchers, etc. are preserved. /stop is the explicit way
to clean them all up.

* feat: first-class plugin architecture + hide status bar cost by default (#1544)

The persistent status bar now shows context %, token counts, and
duration but NOT $ cost by default. Cost display is opt-in via:

  display:
    show_cost: true

in config.yaml, or: hermes config set display.show_cost true

The /usage command still shows full cost breakdown since the user
explicitly asked for it — this only affects the always-visible bar.

Status bar without cost:
  ⚕ claude-sonnet-4 │ 12K/200K │ 6% │ 15m

Status bar with show_cost: true:
  ⚕ claude-sonnet-4 │ 12K/200K │ 6% │ $0.06 │ 15m

* feat: improve memory prioritization + aggressive skill updates (inspired by OpenAI Codex)

* feat: improve memory prioritization — user preferences over procedural knowledge

Inspired by OpenAI Codex's memory prompt improvements (openai/codex#14493)
which focus memory writes on user preferences and recurring patterns
rather than procedural task details.

Key insight: 'Optimize for reducing future user steering — the most
valuable memory prevents the user from having to repeat themselves.'

Changes:
- MEMORY_GUIDANCE (prompt_builder.py): added prioritization hierarchy
  and the core principle about reducing user steering
- MEMORY_SCHEMA (memory_tool.py): reordered WHEN TO SAVE list to put
  corrections first, added explicit PRIORITY guidance
- Memory nudge (run_agent.py): now asks specifically about preferences,
  corrections, and workflow patterns instead of generic 'anything'
- Memory flush (run_agent.py): now instructs to prioritize user
  preferences and corrections over task-specific details

* feat: more aggressive skill creation and update prompting

Press harder on skill updates — the agent should proactively patch
skills when it encounters issues during use, not wait to be asked.

Changes:
- SKILLS_GUIDANCE: 'consider saving' → 'save'; added explicit instruction
  to patch skills immediately when found outdated/wrong
- Skills header: added instruction to update loaded skills before finishing
  if they had missing steps or wrong commands
- Skill nudge: more assertive ('save the approach' not 'consider saving'),
  now also prompts for updating existing skills used in the task
- Skill nudge interval: lowered default from 15 to 10 iterations
- skill_manage schema: added 'patch it immediately' to update triggers

* feat: first-class plugin architecture (#1555)

Plugin system for extending Hermes with custom tools, hooks, and
integrations — no source code changes required.

Core system (hermes_cli/plugins.py):
  - Plugin discovery from ~/.hermes/plugins/, .hermes/plugins/, and
    pip entry_points (hermes_agent.plugins group)
  - PluginContext with register_tool() and register_hook()
  - 6 lifecycle hooks: pre/post tool_call, pre/post llm_call,
    on_session_start/end
  - Namespace package handling for relative imports in plugins
  - Graceful error isolation — broken plugins never crash the agent

Integration (model_tools.py):
  - Plugin discovery runs after built-in + MCP tools
  - Plugin tools bypass toolset filter via get_plugin_tool_names()
  - Pre/post tool call hooks fire in handle_function_call()

CLI:
  - /plugins command shows loaded plugins, tool counts, status
  - Added to COMMANDS dict for autocomplete

Docs:
  - Getting started guide (build-a-hermes-plugin.md) — full tutorial
    building a calculator plugin step by step
  - Reference page (features/plugins.md) — quick overview + tables
  - Covers: file structure, schemas, handlers, hooks, data files,
    bundled skills, env var gating, pip distribution, common mistakes

Tests: 16 tests covering discovery, loading, hooks, tool visibility.

* fix: hermes update causes dual gateways on macOS (launchd)

Three bugs worked together to create the dual-gateway problem:

1. cmd_update only checked systemd for gateway restart, completely
   ignoring launchd on macOS. After killing the PID it would print
   'Restart it with: hermes gateway run' even when launchd was about
   to auto-respawn the process.

2. launchd's KeepAlive.SuccessfulExit=false respawns the gateway
   after SIGTERM (non-zero exit), so the user's manual restart
   created a second instance.

3. The launchd plist lacked --replace (systemd had it), so the
   respawned gateway didn't kill stale instances on startup.

Fixes:
- Add --replace to launchd ProgramArguments (matches systemd)
- Add launchd detection to cmd_update's auto-restart logic
- Print 'auto-restart via launchd' instead of manual restart hint

* fix: add launchd plist auto-refresh + explicit restart in cmd_update

Two integration issues with the initial fix:

1. Existing macOS users with old plist (no --replace) would never
   get the fix until manual uninstall/reinstall. Added
   refresh_launchd_plist_if_needed() — mirrors the existing
   refresh_systemd_unit_if_needed(). Called from launchd_start(),
   launchd_restart(), and cmd_update.

2. cmd_update relied on KeepAlive respawn after SIGTERM rather than
   explicit launchctl stop/start. This caused races: launchd would
   respawn the old process before the PID file was cleaned up.
   Now does explicit stop+start (matching how systemd gets an
   explicit systemctl restart), with plist refresh first so the
   new --replace flag is picked up.

---------

Co-authored-by: Ninja <ninja@local>
Co-authored-by: alireza78a <alireza78a@users.noreply.github.com>
Co-authored-by: Oktay Aydin <113846926+aydnOktay@users.noreply.github.com>
Co-authored-by: JP Lew <polydegen@protonmail.com>
Co-authored-by: an420eth <an420eth@users.noreply.github.com>
											
										
										
											2026-03-16 12:36:29 -07:00
+								            turn_route = self._resolve_turn_agent_config(message, model, runtime_kwargs)
-												feat(gateway): cache AIAgent per session for prompt caching

The gateway created a fresh AIAgent per message, rebuilding the system
prompt (including memory, skills, context files) every turn. This broke
prompt prefix caching — providers like Anthropic charge ~10x more for
uncached prefixes.

Now caches AIAgent instances per session_key with a config signature.
The cached agent is reused across messages in the same session,
preserving the frozen system prompt and tool schemas. Cache is
invalidated when:
- Config changes (model, provider, toolsets, reasoning, ephemeral
  prompt) — detected via signature mismatch
- /new, /reset, /clear — explicit session reset
- /model — global model change clears all cached agents
- /reasoning — global reasoning change clears all cached agents

Per-message state (callbacks, stream consumers, progress queues) is
set on the agent instance before each run_conversation() call.

This matches CLI behavior where a single AIAgent lives across all turns
in a session, with _cached_system_prompt built once and reused.

											
										
										
											2026-03-21 13:07:08 -07:00
 								            # Check agent cache — reuse the AIAgent from the previous message
 								            # in this session to preserve the frozen system prompt and tool
 								            # schemas for prompt cache hits.
 								            _sig = self._agent_config_signature(
 								                turn_route["model"],
 								                turn_route["runtime"],
 								                enabled_toolsets,
 								                combined_ephemeral,
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            )
-												feat(gateway): cache AIAgent per session for prompt caching

The gateway created a fresh AIAgent per message, rebuilding the system
prompt (including memory, skills, context files) every turn. This broke
prompt prefix caching — providers like Anthropic charge ~10x more for
uncached prefixes.

Now caches AIAgent instances per session_key with a config signature.
The cached agent is reused across messages in the same session,
preserving the frozen system prompt and tool schemas. Cache is
invalidated when:
- Config changes (model, provider, toolsets, reasoning, ephemeral
  prompt) — detected via signature mismatch
- /new, /reset, /clear — explicit session reset
- /model — global model change clears all cached agents
- /reasoning — global reasoning change clears all cached agents

Per-message state (callbacks, stream consumers, progress queues) is
set on the agent instance before each run_conversation() call.

This matches CLI behavior where a single AIAgent lives across all turns
in a session, with _cached_system_prompt built once and reused.

											
										
										
											2026-03-21 13:07:08 -07:00
+								            agent = None
 								            _cache_lock = getattr(self, "_agent_cache_lock", None)
 								            _cache = getattr(self, "_agent_cache", None)
 								            if _cache_lock and _cache is not None:
 								                with _cache_lock:
 								                    cached = _cache.get(session_key)
 								                    if cached and cached[1] == _sig:
 								                        agent = cached[0]
 								                        logger.debug("Reusing cached agent for session %s", session_key)
 								            if agent is None:
 								                # Config changed or first message — create fresh agent
 								                agent = AIAgent(
 								                    model=turn_route["model"],
 								                    **turn_route["runtime"],
 								                    max_iterations=max_iterations,
 								                    quiet_mode=True,
 								                    verbose_logging=False,
 								                    enabled_toolsets=enabled_toolsets,
 								                    ephemeral_system_prompt=combined_ephemeral or None,
 								                    prefill_messages=self._prefill_messages or None,
 								                    reasoning_config=reasoning_config,
 								                    providers_allowed=pr.get("only"),
 								                    providers_ignored=pr.get("ignore"),
 								                    providers_order=pr.get("order"),
 								                    provider_sort=pr.get("sort"),
 								                    provider_require_parameters=pr.get("require_parameters", False),
 								                    provider_data_collection=pr.get("data_collection"),
 								                    session_id=session_id,
 								                    platform=platform_key,
 								                    honcho_session_key=session_key,
 								                    honcho_manager=honcho_manager,
 								                    honcho_config=honcho_config,
 								                    session_db=self._session_db,
 								                    fallback_model=self._fallback_model,
 								                )
 								                if _cache_lock and _cache is not None:
 								                    with _cache_lock:
 								                        _cache[session_key] = (agent, _sig)
 								                logger.debug("Created new agent for session %s (sig=%s)", session_key, _sig)
 								            # Per-message state — callbacks and reasoning config change every
 								            # turn and must not be baked into the cached agent constructor.
 								            agent.tool_progress_callback = progress_callback if tool_progress_enabled else None
 								            agent.step_callback = _step_callback_sync if _hooks_ref.loaded_hooks else None
 								            agent.stream_delta_callback = _stream_delta_cb
 								            agent.status_callback = _status_callback_sync
 								            agent.reasoning_config = reasoning_config
-												feat(gateway): deliver background review notifications to user chat (#3293)

The background memory/skill review (_spawn_background_review) runs
after the agent response when turn/iteration counters exceed their
thresholds. It saves memories and skills, then prints a summary like
'💾 Memory updated · User profile updated'. In CLI mode this goes to
the terminal via _safe_print. In gateway mode, _safe_print routes to
print() which goes to stdout — invisible to the user.

Add a background_review_callback attribute to AIAgent. When set, the
background review thread calls it with the summary string after saves
complete. The gateway wires this to adapter.send() via the same
run_coroutine_threadsafe bridge used by status_callback, delivering
the notification to the user's chat.
											
										
										
											2026-03-26 17:38:24 -07:00
 								            # Background review delivery — send "💾 Memory updated" etc. to user
 								            def _bg_review_send(message: str) -> None:
 								                if not _status_adapter:
 								                    return
 								                try:
 								                    asyncio.run_coroutine_threadsafe(
 								                        _status_adapter.send(
 								                            _status_chat_id,
 								                            message,
 								                            metadata=_status_thread_metadata,
 								                        ),
 								                        _loop_for_step,
 								                    )
 								                except Exception as _e:
 								                    logger.debug("background_review_callback error: %s", _e)
 								            agent.background_review_callback = _bg_review_send
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            # Store agent reference for interrupt support
 								            agent_holder[0] = agent
 								            # Capture the full tool definitions for transcript logging
 								            tools_holder[0] = agent.tools if hasattr(agent, 'tools') else None
 								            # Convert history to agent format.
 								            # Two cases:
 								            #   1. Normal path (from transcript): simple {role, content, timestamp} dicts
 								            #      - Strip timestamps, keep role+content
 								            #   2. Interrupt path (from agent result["messages"]): full agent messages
 								            #      that may include tool_calls, tool_call_id, reasoning, etc.
 								            #      - These must be passed through intact so the API sees valid
 								            #        assistant→tool sequences (dropping tool_calls causes 500 errors)
 								            agent_history = []
 								            for msg in history:
 								                role = msg.get("role")
 								                if not role:
 								                    continue
 								                # Skip metadata entries (tool definitions, session info)
 								                # -- these are for transcript logging, not for the LLM
 								                if role in ("session_meta",):
 								                    continue
 								                # Skip system messages -- the agent rebuilds its own system prompt
 								                if role == "system":
 								                    continue
 								                # Rich agent messages (tool_calls, tool results) must be passed
 								                # through intact so the API sees valid assistant→tool sequences
 								                has_tool_calls = "tool_calls" in msg
 								                has_tool_call_id = "tool_call_id" in msg
 								                is_tool_message = role == "tool"
 								                if has_tool_calls or has_tool_call_id or is_tool_message:
 								                    clean_msg = {k: v for k, v in msg.items() if k != "timestamp"}
 								                    agent_history.append(clean_msg)
 								                else:
 								                    # Simple text message - just need role and content
 								                    content = msg.get("content")
 								                    if content:
 								                        # Tag cross-platform mirror messages so the agent knows their origin
 								                        if msg.get("mirror"):
 								                            mirror_src = msg.get("mirror_source", "another session")
 								                            content = f"[Delivered from {mirror_src}] {content}"
-												feat: persist reasoning across gateway session turns (schema v6) (#2974)

feat: persist reasoning across gateway session turns (schema v6)

Tested against OpenAI Codex (direct), Anthropic (direct + OAI-compat), and OpenRouter → 6 backends. All reasoning field types (reasoning, reasoning_details, codex_reasoning_items) round-trip through the DB correctly.
											
										
										
											2026-03-25 09:47:28 -07:00
+								                        entry = {"role": role, "content": content}
 								                        # Preserve reasoning fields on assistant messages so
 								                        # multi-turn reasoning context survives session reload.
 								                        # The agent's _build_api_kwargs converts these to the
 								                        # provider-specific format (reasoning_content, etc.).
 								                        if role == "assistant":
 								                            for _rkey in ("reasoning", "reasoning_details",
 								                                          "codex_reasoning_items"):
 								                                _rval = msg.get(_rkey)
 								                                if _rval:
 								                                    entry[_rkey] = _rval
 								                        agent_history.append(entry)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
 								            # Collect MEDIA paths already in history so we can exclude them
 								            # from the current turn's extraction. This is compression-safe:
 								            # even if the message list shrinks, we know which paths are old.
 								            _history_media_paths: set = set()
 								            for _hm in agent_history:
 								                if _hm.get("role") in ("tool", "function"):
 								                    _hc = _hm.get("content", "")
 								                    if "MEDIA:" in _hc:
 								                        for _match in re.finditer(r'MEDIA:(\S+)', _hc):
 								                            _p = _match.group(1).strip().rstrip('",}')
 								                            if _p:
 								                                _history_media_paths.add(_p)
 								            result = agent.run_conversation(message, conversation_history=agent_history, task_id=session_id)
 								            result_holder[0] = result
-												feat(gateway): streaming token delivery — StreamingConfig, GatewayStreamConsumer, already_sent

Stage 3 of streaming support. Gateway now streams tokens to messaging platforms:

- StreamingConfig dataclass (enabled, transport, edit_interval, buffer_threshold, cursor)
  on GatewayConfig with from_dict/to_dict serialization
- GatewayStreamConsumer: async queue-based consumer that progressively edits
  a single message on the target platform (edit transport)
- on_delta() → queue → run() async task → send_or_edit() with rate limiting
- already_sent propagation: when streaming delivered the response, handler
  returns None so base adapter skips duplicate send()
- stream_delta_callback wired into AIAgent constructor in _run_agent
- Consumer lifecycle: started as asyncio task, awaited with timeout in finally

Config (config.yaml):
  streaming:
    enabled: true
    transport: edit      # progressive editMessageText
    edit_interval: 0.3   # seconds between edits
    buffer_threshold: 40 # chars before forcing flush
    cursor: ' ▉'

Credit: jobless0x (#774, #1312), OutThisLife (#798), clicksingh (#697).

											
										
										
											2026-03-16 05:52:42 -07:00
 								            # Signal the stream consumer that the agent is done
 								            if _stream_consumer is not None:
 								                _stream_consumer.finish()
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
 								            # Return final response, or a message if something went wrong
 								            final_response = result.get("final_response")
-												fix(gateway): make /status report live state and tokens (#1476)
											
										
										
											2026-03-15 19:18:58 -07:00
+								            # Extract actual token counts from the agent instance used for this run
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            _last_prompt_toks = 0
-												fix(gateway): make /status report live state and tokens (#1476)
											
										
										
											2026-03-15 19:18:58 -07:00
+								            _input_toks = 0
 								            _output_toks = 0
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            _agent = agent_holder[0]
 								            if _agent and hasattr(_agent, "context_compressor"):
 								                _last_prompt_toks = getattr(_agent.context_compressor, "last_prompt_tokens", 0)
-												fix(gateway): make /status report live state and tokens (#1476)
											
										
										
											2026-03-15 19:18:58 -07:00
+								                _input_toks = getattr(_agent, "session_prompt_tokens", 0)
 								                _output_toks = getattr(_agent, "session_completion_tokens", 0)
-												fix: tolerate test doubles without model attr

Use getattr() when returning model metadata from GatewayRunner._run_agent so fake agents and minimal stubs without a model attribute do not break unrelated gateway flows while preserving the session-model backfill behavior.

											
										
										
											2026-03-14 06:47:39 -07:00
+								            _resolved_model = getattr(_agent, "model", None) if _agent else None
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
 								            if not final_response:
 								                error_msg = f"⚠️ {result['error']}" if result.get("error") else "(No response generated)"
 								                return {
 								                    "final_response": error_msg,
 								                    "messages": result.get("messages", []),
 								                    "api_calls": result.get("api_calls", 0),
 								                    "tools": tools_holder[0] or [],
 								                    "history_offset": len(agent_history),
 								                    "last_prompt_tokens": _last_prompt_toks,
-												fix(gateway): make /status report live state and tokens (#1476)
											
										
										
											2026-03-15 19:18:58 -07:00
+								                    "input_tokens": _input_toks,
 								                    "output_tokens": _output_toks,
-												fix: tolerate test doubles without model attr

Use getattr() when returning model metadata from GatewayRunner._run_agent so fake agents and minimal stubs without a model attribute do not break unrelated gateway flows while preserving the session-model backfill behavior.

											
										
										
											2026-03-14 06:47:39 -07:00
+								                    "model": _resolved_model,
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                }
 								            # Scan tool results for MEDIA:<path> tags that need to be delivered
 								            # as native audio/file attachments.  The TTS tool embeds MEDIA: tags
 								            # in its JSON response, but the model's final text reply usually
 								            # doesn't include them.  We collect unique tags from tool results and
 								            # append any that aren't already present in the final response, so the
 								            # adapter's extract_media() can find and deliver the files exactly once.
 								            #
 								            # Uses path-based deduplication against _history_media_paths (collected
 								            # before run_conversation) instead of index slicing. This is safe even
 								            # when context compression shrinks the message list. (Fixes #160)
 								            if "MEDIA:" not in final_response:
 								                media_tags = []
 								                has_voice_directive = False
 								                for msg in result.get("messages", []):
 								                    if msg.get("role") in ("tool", "function"):
 								                        content = msg.get("content", "")
 								                        if "MEDIA:" in content:
 								                            for match in re.finditer(r'MEDIA:(\S+)', content):
 								                                path = match.group(1).strip().rstrip('",}')
 								                                if path and path not in _history_media_paths:
 								                                    media_tags.append(f"MEDIA:{path}")
 								                            if "[[audio_as_voice]]" in content:
 								                                has_voice_directive = True
 								                if media_tags:
 								                    seen = set()
 								                    unique_tags = []
 								                    for tag in media_tags:
 								                        if tag not in seen:
 								                            seen.add(tag)
 								                            unique_tags.append(tag)
 								                    if has_voice_directive:
 								                        unique_tags.insert(0, "[[audio_as_voice]]")
 								                    final_response = final_response + "\n" + "\n".join(unique_tags)
 								            # Sync session_id: the agent may have created a new session during
 								            # mid-run context compression (_compress_context splits sessions).
 								            # If so, update the session store entry so the NEXT message loads
 								            # the compressed transcript, not the stale pre-compression one.
 								            agent = agent_holder[0]
 								            if agent and session_key and hasattr(agent, 'session_id') and agent.session_id != session_id:
 								                logger.info(
 								                    "Session split detected: %s → %s (compression)",
 								                    session_id, agent.session_id,
 								                )
 								                entry = self.session_store._entries.get(session_key)
 								                if entry:
 								                    entry.session_id = agent.session_id
 								                    self.session_store._save()
 								            effective_session_id = getattr(agent, 'session_id', session_id) if agent else session_id
-												feat: auto-generate session titles after first exchange

After the first user→assistant exchange, Hermes now generates a short
descriptive session title via the auxiliary LLM (compression task config).
Title generation runs in a background thread so it never delays the
user-facing response.

Key behaviors:
- Fires only on the first 1-2 exchanges (checks user message count)
- Skips if a title already exists (user-set titles are never overwritten)
- Uses call_llm with compression task config (cheapest/fastest model)
- Truncates long messages to keep the title generation request small
- Cleans up LLM output: strips quotes, 'Title:' prefixes, enforces 80 char max
- Works in both CLI and gateway (Telegram/Discord/etc.)

Also updates /title (no args) to show the session ID alongside the title
in both CLI and gateway.

Implements #1426

											
										
										
											2026-03-17 04:14:40 -07:00
+								            # Auto-generate session title after first exchange (non-blocking)
 								            if final_response and self._session_db:
 								                try:
 								                    from agent.title_generator import maybe_auto_title
 								                    all_msgs = result_holder[0].get("messages", []) if result_holder[0] else []
 								                    maybe_auto_title(
 								                        self._session_db,
 								                        effective_session_id,
 								                        message,
 								                        final_response,
 								                        all_msgs,
 								                    )
 								                except Exception:
 								                    pass
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            return {
 								                "final_response": final_response,
 								                "last_reasoning": result.get("last_reasoning"),
 								                "messages": result_holder[0].get("messages", []) if result_holder[0] else [],
 								                "api_calls": result_holder[0].get("api_calls", 0) if result_holder[0] else 0,
 								                "tools": tools_holder[0] or [],
 								                "history_offset": len(agent_history),
 								                "last_prompt_tokens": _last_prompt_toks,
-												fix(gateway): make /status report live state and tokens (#1476)
											
										
										
											2026-03-15 19:18:58 -07:00
+								                "input_tokens": _input_toks,
 								                "output_tokens": _output_toks,
-												fix: tolerate test doubles without model attr

Use getattr() when returning model metadata from GatewayRunner._run_agent so fake agents and minimal stubs without a model attribute do not break unrelated gateway flows while preserving the session-model backfill behavior.

											
										
										
											2026-03-14 06:47:39 -07:00
+								                "model": _resolved_model,
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                "session_id": effective_session_id,
 								            }
 								        # Start progress message sender if enabled
 								        progress_task = None
 								        if tool_progress_enabled:
 								            progress_task = asyncio.create_task(send_progress_messages())
-												feat(gateway): streaming token delivery — StreamingConfig, GatewayStreamConsumer, already_sent

Stage 3 of streaming support. Gateway now streams tokens to messaging platforms:

- StreamingConfig dataclass (enabled, transport, edit_interval, buffer_threshold, cursor)
  on GatewayConfig with from_dict/to_dict serialization
- GatewayStreamConsumer: async queue-based consumer that progressively edits
  a single message on the target platform (edit transport)
- on_delta() → queue → run() async task → send_or_edit() with rate limiting
- already_sent propagation: when streaming delivered the response, handler
  returns None so base adapter skips duplicate send()
- stream_delta_callback wired into AIAgent constructor in _run_agent
- Consumer lifecycle: started as asyncio task, awaited with timeout in finally

Config (config.yaml):
  streaming:
    enabled: true
    transport: edit      # progressive editMessageText
    edit_interval: 0.3   # seconds between edits
    buffer_threshold: 40 # chars before forcing flush
    cursor: ' ▉'

Credit: jobless0x (#774, #1312), OutThisLife (#798), clicksingh (#697).

											
										
										
											2026-03-16 05:52:42 -07:00
-												fix: audit fixes — 5 bugs found and resolved

Thorough code review found 5 issues across run_agent.py, cli.py, and gateway/:

1. CRITICAL — Gateway stream consumer task never started: stream_consumer_holder
   was checked BEFORE run_sync populated it. Fixed with async polling pattern
   (same as track_agent).

2. MEDIUM-HIGH — Streaming fallback after partial delivery caused double-response:
   if streaming failed after some tokens were delivered, the fallback would
   re-deliver the full response. Now tracks deltas_were_sent and only falls
   back when no tokens reached consumers yet.

3. MEDIUM — Codex mode lost on_first_delta spinner callback: _run_codex_stream
   now accepts on_first_delta parameter, fires it on first text delta. Passed
   through from _interruptible_streaming_api_call via _codex_on_first_delta
   instance attribute.

4. MEDIUM — CLI close-tag after-text bypassed tag filtering: text after a
   reasoning close tag was sent directly to _emit_stream_text, skipping
   open-tag detection. Now routes through _stream_delta for full filtering.

5. LOW — Removed 140 lines of dead code: old _streaming_api_call method
   (superseded by _interruptible_streaming_api_call). Updated 13 tests in
   test_run_agent.py and test_openai_client_lifecycle.py to use the new
   method name and signature.

4573 tests passing.

											
										
										
											2026-03-16 06:35:46 -07:00
+								        # Start stream consumer task — polls for consumer creation since it
 								        # happens inside run_sync (thread pool) after the agent is constructed.
-												feat(gateway): streaming token delivery — StreamingConfig, GatewayStreamConsumer, already_sent

Stage 3 of streaming support. Gateway now streams tokens to messaging platforms:

- StreamingConfig dataclass (enabled, transport, edit_interval, buffer_threshold, cursor)
  on GatewayConfig with from_dict/to_dict serialization
- GatewayStreamConsumer: async queue-based consumer that progressively edits
  a single message on the target platform (edit transport)
- on_delta() → queue → run() async task → send_or_edit() with rate limiting
- already_sent propagation: when streaming delivered the response, handler
  returns None so base adapter skips duplicate send()
- stream_delta_callback wired into AIAgent constructor in _run_agent
- Consumer lifecycle: started as asyncio task, awaited with timeout in finally

Config (config.yaml):
  streaming:
    enabled: true
    transport: edit      # progressive editMessageText
    edit_interval: 0.3   # seconds between edits
    buffer_threshold: 40 # chars before forcing flush
    cursor: ' ▉'

Credit: jobless0x (#774, #1312), OutThisLife (#798), clicksingh (#697).

											
										
										
											2026-03-16 05:52:42 -07:00
+								        stream_task = None
-												fix: audit fixes — 5 bugs found and resolved

Thorough code review found 5 issues across run_agent.py, cli.py, and gateway/:

1. CRITICAL — Gateway stream consumer task never started: stream_consumer_holder
   was checked BEFORE run_sync populated it. Fixed with async polling pattern
   (same as track_agent).

2. MEDIUM-HIGH — Streaming fallback after partial delivery caused double-response:
   if streaming failed after some tokens were delivered, the fallback would
   re-deliver the full response. Now tracks deltas_were_sent and only falls
   back when no tokens reached consumers yet.

3. MEDIUM — Codex mode lost on_first_delta spinner callback: _run_codex_stream
   now accepts on_first_delta parameter, fires it on first text delta. Passed
   through from _interruptible_streaming_api_call via _codex_on_first_delta
   instance attribute.

4. MEDIUM — CLI close-tag after-text bypassed tag filtering: text after a
   reasoning close tag was sent directly to _emit_stream_text, skipping
   open-tag detection. Now routes through _stream_delta for full filtering.

5. LOW — Removed 140 lines of dead code: old _streaming_api_call method
   (superseded by _interruptible_streaming_api_call). Updated 13 tests in
   test_run_agent.py and test_openai_client_lifecycle.py to use the new
   method name and signature.

4573 tests passing.

											
										
										
											2026-03-16 06:35:46 -07:00
 								        async def _start_stream_consumer():
 								            """Wait for the stream consumer to be created, then run it."""
 								            for _ in range(200):  # Up to 10s wait
 								                if stream_consumer_holder[0] is not None:
 								                    await stream_consumer_holder[0].run()
 								                    return
 								                await asyncio.sleep(0.05)
 								        stream_task = asyncio.create_task(_start_stream_consumer())
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
 								        # Track this agent as running for this session (for interrupt support)
 								        # We do this in a callback after the agent is created
 								        async def track_agent():
 								            # Wait for agent to be created
 								            while agent_holder[0] is None:
 								                await asyncio.sleep(0.05)
 								            if session_key:
 								                self._running_agents[session_key] = agent_holder[0]
 								        tracking_task = asyncio.create_task(track_agent())
 								        # Monitor for interrupts from the adapter (new messages arriving)
 								        async def monitor_for_interrupt():
 								            adapter = self.adapters.get(source.platform)
 								            if not adapter or not session_key:
 								                return
 								            while True:
 								                await asyncio.sleep(0.2)  # Check every 200ms
 								                # Check if adapter has a pending interrupt for this session.
 								                # Must use session_key (build_session_key output) — NOT
 								                # source.chat_id — because the adapter stores interrupt events
 								                # under the full session key.
 								                if hasattr(adapter, 'has_pending_interrupt') and adapter.has_pending_interrupt(session_key):
 								                    agent = agent_holder[0]
 								                    if agent:
 								                        pending_event = adapter.get_pending_message(session_key)
 								                        pending_text = pending_event.text if pending_event else None
 								                        logger.debug("Interrupt detected from adapter, signaling agent...")
 								                        agent.interrupt(pending_text)
 								                        break
 								        interrupt_monitor = asyncio.create_task(monitor_for_interrupt())
 								        try:
 								            # Run in thread pool to not block
 								            loop = asyncio.get_event_loop()
 								            response = await loop.run_in_executor(None, run_sync)
-												fix(gateway): /model shows active fallback model instead of config default (#1615)

* fix: prevent infinite 400 failure loop on context overflow (#1630)

When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message.  This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error.  Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.

Three-layer fix:

1. run_agent.py — Fallback heuristic: when a 400 error has a very short
   generic message AND the session is large (>40% of context or >80
   messages), treat it as a probable context overflow and trigger
   compression instead of aborting.

2. run_agent.py + gateway/run.py — Don't persist failed messages:
   when the agent returns failed=True before generating any response,
   skip writing the user's message to the transcript/DB. This prevents
   the session from growing on each failure.

3. gateway/run.py — Smarter error messages: detect context-overflow
   failures and suggest /compact or /reset specifically, instead of a
   generic 'try again' that will fail identically.

* fix(skills): detect prompt injection patterns and block cache file reads

Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):

1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
   (index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
   was the original injection vector — untrusted skill descriptions
   in the catalog contained adversarial text that the model executed.

2. skill_view: warns when skills are loaded from outside the trusted
   ~/.hermes/skills/ directory, and detects common injection patterns
   in skill content ("ignore previous instructions", "<system>", etc.).

Cherry-picked from PR #1562 by ygd58.

* fix(tools): chunk long messages in send_message_tool before dispatch (#1552)

Long messages sent via send_message tool or cron delivery silently
failed when exceeding platform limits. Gateway adapters handle this
via truncate_message(), but the standalone senders in send_message_tool
bypassed that entirely.

- Apply truncate_message() chunking in _send_to_platform() before
  dispatching to individual platform senders
- Remove naive message[i:i+2000] character split in _send_discord()
  in favor of centralized smart splitting
- Attach media files to last chunk only for Telegram
- Add regression tests for chunking and media placement

Cherry-picked from PR #1557 by llbn.

* fix(approval): show full command in dangerous command approval (#1553)

Previously the command was truncated to 80 chars in CLI (with a
[v]iew full option), 500 chars in Discord embeds, and missing entirely
in Telegram/Slack approval messages. Now the full command is always
displayed everywhere:

- CLI: removed 80-char truncation and [v]iew full menu option
- Gateway (TG/Slack): approval_required message includes full command
  in a code block
- Discord: embed shows full command up to 4096-char limit
- Windows: skip SIGALRM-based test timeout (Unix-only)
- Updated tests: replaced view-flow tests with direct approval tests

Cherry-picked from PR #1566 by crazywriter1.

* fix(cli): flush stdout during agent loop to prevent macOS display freeze (#1624)

The interrupt polling loop in chat() waited on the queue without
invalidating the prompt_toolkit renderer. On macOS, the StdoutProxy
buffer only flushed on input events, causing the CLI to appear frozen
during tool execution until the user typed a key.

Fix: call _invalidate() on each queue timeout (every ~100ms, throttled
to 150ms) to force the renderer to flush buffered agent output.

* fix(claw): warn when API keys are skipped during OpenClaw migration (#1580)

When --migrate-secrets is not passed (the default), API keys like
OPENROUTER_API_KEY are silently skipped with no warning. Users don't
realize their keys weren't migrated until the agent fails to connect.

Add a post-migration warning with actionable instructions: either
re-run with --migrate-secrets or add the key manually via
hermes config set.

Cherry-picked from PR #1593 by ygd58.

* fix(security): block sandbox backend creds from subprocess env (#1264)

Add Modal and Daytona sandbox credentials to the subprocess env
blocklist so they're not leaked to agent terminal sessions via
printenv/env.

Cherry-picked from PR #1571 by ygd58.

* fix(gateway): cap interrupt recursion depth to prevent resource exhaustion (#816)

When a user sends multiple messages while the agent keeps failing,
_run_agent() calls itself recursively with no depth limit. This can
exhaust stack/memory if the agent is in a failure loop.

Add _MAX_INTERRUPT_DEPTH = 3. When exceeded, the pending message is
logged and the current result is returned instead of recursing deeper.

The log handler duplication bug described in #816 was already fixed
separately (AIAgent.__init__ deduplicates handlers).

* fix(gateway): /model shows active fallback model instead of config default (#1615)

When the agent falls back to a different model (e.g. due to rate
limiting), /model still showed the config default. Now tracks the
effective model/provider after each agent run and displays it.

Cleared when the primary model succeeds again or the user explicitly
switches via /model.

Cherry-picked from PR #1616 by MaxKerkula. Added hasattr guard for
test compatibility.

---------

Co-authored-by: buray <ygd58@users.noreply.github.com>
Co-authored-by: lbn <llbn@users.noreply.github.com>
Co-authored-by: crazywriter1 <53251494+crazywriter1@users.noreply.github.com>
Co-authored-by: Max K <MaxKerkula@users.noreply.github.com>
											
										
										
											2026-03-17 02:26:51 -07:00
 								            # Track fallback model state: if the agent switched to a
 								            # fallback model during this run, persist it so /model shows
 								            # the actually-active model instead of the config default.
 								            _agent = agent_holder[0]
 								            if _agent is not None and hasattr(_agent, 'model'):
 								                _cfg_model = _resolve_gateway_model()
 								                if _agent.model != _cfg_model:
 								                    self._effective_model = _agent.model
 								                    self._effective_provider = getattr(_agent, 'provider', None)
-												feat(gateway): cache AIAgent per session for prompt caching

The gateway created a fresh AIAgent per message, rebuilding the system
prompt (including memory, skills, context files) every turn. This broke
prompt prefix caching — providers like Anthropic charge ~10x more for
uncached prefixes.

Now caches AIAgent instances per session_key with a config signature.
The cached agent is reused across messages in the same session,
preserving the frozen system prompt and tool schemas. Cache is
invalidated when:
- Config changes (model, provider, toolsets, reasoning, ephemeral
  prompt) — detected via signature mismatch
- /new, /reset, /clear — explicit session reset
- /model — global model change clears all cached agents
- /reasoning — global reasoning change clears all cached agents

Per-message state (callbacks, stream consumers, progress queues) is
set on the agent instance before each run_conversation() call.

This matches CLI behavior where a single AIAgent lives across all turns
in a session, with _cached_system_prompt built once and reused.

											
										
										
											2026-03-21 13:07:08 -07:00
+								                    # Fallback activated — evict cached agent so the next
 								                    # message starts fresh and retries the primary model.
 								                    self._evict_cached_agent(session_key)
-												fix(gateway): /model shows active fallback model instead of config default (#1615)

* fix: prevent infinite 400 failure loop on context overflow (#1630)

When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message.  This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error.  Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.

Three-layer fix:

1. run_agent.py — Fallback heuristic: when a 400 error has a very short
   generic message AND the session is large (>40% of context or >80
   messages), treat it as a probable context overflow and trigger
   compression instead of aborting.

2. run_agent.py + gateway/run.py — Don't persist failed messages:
   when the agent returns failed=True before generating any response,
   skip writing the user's message to the transcript/DB. This prevents
   the session from growing on each failure.

3. gateway/run.py — Smarter error messages: detect context-overflow
   failures and suggest /compact or /reset specifically, instead of a
   generic 'try again' that will fail identically.

* fix(skills): detect prompt injection patterns and block cache file reads

Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):

1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
   (index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
   was the original injection vector — untrusted skill descriptions
   in the catalog contained adversarial text that the model executed.

2. skill_view: warns when skills are loaded from outside the trusted
   ~/.hermes/skills/ directory, and detects common injection patterns
   in skill content ("ignore previous instructions", "<system>", etc.).

Cherry-picked from PR #1562 by ygd58.

* fix(tools): chunk long messages in send_message_tool before dispatch (#1552)

Long messages sent via send_message tool or cron delivery silently
failed when exceeding platform limits. Gateway adapters handle this
via truncate_message(), but the standalone senders in send_message_tool
bypassed that entirely.

- Apply truncate_message() chunking in _send_to_platform() before
  dispatching to individual platform senders
- Remove naive message[i:i+2000] character split in _send_discord()
  in favor of centralized smart splitting
- Attach media files to last chunk only for Telegram
- Add regression tests for chunking and media placement

Cherry-picked from PR #1557 by llbn.

* fix(approval): show full command in dangerous command approval (#1553)

Previously the command was truncated to 80 chars in CLI (with a
[v]iew full option), 500 chars in Discord embeds, and missing entirely
in Telegram/Slack approval messages. Now the full command is always
displayed everywhere:

- CLI: removed 80-char truncation and [v]iew full menu option
- Gateway (TG/Slack): approval_required message includes full command
  in a code block
- Discord: embed shows full command up to 4096-char limit
- Windows: skip SIGALRM-based test timeout (Unix-only)
- Updated tests: replaced view-flow tests with direct approval tests

Cherry-picked from PR #1566 by crazywriter1.

* fix(cli): flush stdout during agent loop to prevent macOS display freeze (#1624)

The interrupt polling loop in chat() waited on the queue without
invalidating the prompt_toolkit renderer. On macOS, the StdoutProxy
buffer only flushed on input events, causing the CLI to appear frozen
during tool execution until the user typed a key.

Fix: call _invalidate() on each queue timeout (every ~100ms, throttled
to 150ms) to force the renderer to flush buffered agent output.

* fix(claw): warn when API keys are skipped during OpenClaw migration (#1580)

When --migrate-secrets is not passed (the default), API keys like
OPENROUTER_API_KEY are silently skipped with no warning. Users don't
realize their keys weren't migrated until the agent fails to connect.

Add a post-migration warning with actionable instructions: either
re-run with --migrate-secrets or add the key manually via
hermes config set.

Cherry-picked from PR #1593 by ygd58.

* fix(security): block sandbox backend creds from subprocess env (#1264)

Add Modal and Daytona sandbox credentials to the subprocess env
blocklist so they're not leaked to agent terminal sessions via
printenv/env.

Cherry-picked from PR #1571 by ygd58.

* fix(gateway): cap interrupt recursion depth to prevent resource exhaustion (#816)

When a user sends multiple messages while the agent keeps failing,
_run_agent() calls itself recursively with no depth limit. This can
exhaust stack/memory if the agent is in a failure loop.

Add _MAX_INTERRUPT_DEPTH = 3. When exceeded, the pending message is
logged and the current result is returned instead of recursing deeper.

The log handler duplication bug described in #816 was already fixed
separately (AIAgent.__init__ deduplicates handlers).

* fix(gateway): /model shows active fallback model instead of config default (#1615)

When the agent falls back to a different model (e.g. due to rate
limiting), /model still showed the config default. Now tracks the
effective model/provider after each agent run and displays it.

Cleared when the primary model succeeds again or the user explicitly
switches via /model.

Cherry-picked from PR #1616 by MaxKerkula. Added hasattr guard for
test compatibility.

---------

Co-authored-by: buray <ygd58@users.noreply.github.com>
Co-authored-by: lbn <llbn@users.noreply.github.com>
Co-authored-by: crazywriter1 <53251494+crazywriter1@users.noreply.github.com>
Co-authored-by: Max K <MaxKerkula@users.noreply.github.com>
											
										
										
											2026-03-17 02:26:51 -07:00
+								                else:
 								                    # Primary model worked — clear any stale fallback state
 								                    self._effective_model = None
 								                    self._effective_provider = None
-												fix(gateway): process /queue'd messages after agent completion (#2469)

* fix: respect DashScope v1 runtime mode for alibaba

Remove the hardcoded Alibaba branch from resolve_runtime_provider()
that forced api_mode='anthropic_messages' regardless of the base URL.

Alibaba now goes through the generic API-key provider path, which
auto-detects the protocol from the URL:
- /apps/anthropic → anthropic_messages (via endswith check)
- /v1 → chat_completions (default)

This fixes Alibaba setup with OpenAI-compatible DashScope endpoints
(e.g. coding-intl.dashscope.aliyuncs.com/v1) that were broken because
runtime always forced Anthropic mode even when setup saved a /v1 URL.

Based on PR #2024 by @kshitijk4poor.

* docs(skill): add split, merge, search examples to ocr-and-documents skill

Adds pymupdf examples for PDF splitting, merging, and text search
to the existing ocr-and-documents skill. No new dependencies — pymupdf
already covers all three operations natively.

* fix: replace all production print() calls with logger in rl_training_tool

Replace all bare print() calls in production code paths with proper logger calls.

- Add `import logging` and module-level `logger = logging.getLogger(__name__)`
- Replace print() in _start_training_run() with logger.info()
- Replace print() in _stop_training_run() with logger.info()
- Replace print(Warning/Note) calls with logger.warning() and logger.info()

Using the logging framework allows log level filtering, proper formatting,
and log routing instead of always printing to stdout.

* fix(gateway): process /queue'd messages after agent completion

/queue stored messages in adapter._pending_messages but never consumed
them after normal (non-interrupted) completion. The consumption path
at line 5219 only checked pending messages when result.get('interrupted')
was True — since /queue deliberately doesn't interrupt, queued messages
were silently dropped.

Now checks adapter._pending_messages after both interrupted AND normal
completion. For queued messages (non-interrupt), the first response is
delivered before recursing to process the queued follow-up. Skips the
direct send when streaming already delivered the response.

Reported by GhostMode on Discord.

---------

Co-authored-by: kshitijk4poor <kshitijk4poor@users.noreply.github.com>
Co-authored-by: memosr.eth <96793918+memosr@users.noreply.github.com>
											
										
										
											2026-03-22 04:56:13 -07:00
+								            # Check if we were interrupted OR have a queued message (/queue).
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            result = result_holder[0]
 								            adapter = self.adapters.get(source.platform)
-												fix(gateway): process /queue'd messages after agent completion (#2469)

* fix: respect DashScope v1 runtime mode for alibaba

Remove the hardcoded Alibaba branch from resolve_runtime_provider()
that forced api_mode='anthropic_messages' regardless of the base URL.

Alibaba now goes through the generic API-key provider path, which
auto-detects the protocol from the URL:
- /apps/anthropic → anthropic_messages (via endswith check)
- /v1 → chat_completions (default)

This fixes Alibaba setup with OpenAI-compatible DashScope endpoints
(e.g. coding-intl.dashscope.aliyuncs.com/v1) that were broken because
runtime always forced Anthropic mode even when setup saved a /v1 URL.

Based on PR #2024 by @kshitijk4poor.

* docs(skill): add split, merge, search examples to ocr-and-documents skill

Adds pymupdf examples for PDF splitting, merging, and text search
to the existing ocr-and-documents skill. No new dependencies — pymupdf
already covers all three operations natively.

* fix: replace all production print() calls with logger in rl_training_tool

Replace all bare print() calls in production code paths with proper logger calls.

- Add `import logging` and module-level `logger = logging.getLogger(__name__)`
- Replace print() in _start_training_run() with logger.info()
- Replace print() in _stop_training_run() with logger.info()
- Replace print(Warning/Note) calls with logger.warning() and logger.info()

Using the logging framework allows log level filtering, proper formatting,
and log routing instead of always printing to stdout.

* fix(gateway): process /queue'd messages after agent completion

/queue stored messages in adapter._pending_messages but never consumed
them after normal (non-interrupted) completion. The consumption path
at line 5219 only checked pending messages when result.get('interrupted')
was True — since /queue deliberately doesn't interrupt, queued messages
were silently dropped.

Now checks adapter._pending_messages after both interrupted AND normal
completion. For queued messages (non-interrupt), the first response is
delivered before recursing to process the queued follow-up. Skips the
direct send when streaming already delivered the response.

Reported by GhostMode on Discord.

---------

Co-authored-by: kshitijk4poor <kshitijk4poor@users.noreply.github.com>
Co-authored-by: memosr.eth <96793918+memosr@users.noreply.github.com>
											
										
										
											2026-03-22 04:56:13 -07:00
+								            # Get pending message from adapter.
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            # Use session_key (not source.chat_id) to match adapter's storage keys.
 								            pending = None
-												fix(gateway): process /queue'd messages after agent completion (#2469)

* fix: respect DashScope v1 runtime mode for alibaba

Remove the hardcoded Alibaba branch from resolve_runtime_provider()
that forced api_mode='anthropic_messages' regardless of the base URL.

Alibaba now goes through the generic API-key provider path, which
auto-detects the protocol from the URL:
- /apps/anthropic → anthropic_messages (via endswith check)
- /v1 → chat_completions (default)

This fixes Alibaba setup with OpenAI-compatible DashScope endpoints
(e.g. coding-intl.dashscope.aliyuncs.com/v1) that were broken because
runtime always forced Anthropic mode even when setup saved a /v1 URL.

Based on PR #2024 by @kshitijk4poor.

* docs(skill): add split, merge, search examples to ocr-and-documents skill

Adds pymupdf examples for PDF splitting, merging, and text search
to the existing ocr-and-documents skill. No new dependencies — pymupdf
already covers all three operations natively.

* fix: replace all production print() calls with logger in rl_training_tool

Replace all bare print() calls in production code paths with proper logger calls.

- Add `import logging` and module-level `logger = logging.getLogger(__name__)`
- Replace print() in _start_training_run() with logger.info()
- Replace print() in _stop_training_run() with logger.info()
- Replace print(Warning/Note) calls with logger.warning() and logger.info()

Using the logging framework allows log level filtering, proper formatting,
and log routing instead of always printing to stdout.

* fix(gateway): process /queue'd messages after agent completion

/queue stored messages in adapter._pending_messages but never consumed
them after normal (non-interrupted) completion. The consumption path
at line 5219 only checked pending messages when result.get('interrupted')
was True — since /queue deliberately doesn't interrupt, queued messages
were silently dropped.

Now checks adapter._pending_messages after both interrupted AND normal
completion. For queued messages (non-interrupt), the first response is
delivered before recursing to process the queued follow-up. Skips the
direct send when streaming already delivered the response.

Reported by GhostMode on Discord.

---------

Co-authored-by: kshitijk4poor <kshitijk4poor@users.noreply.github.com>
Co-authored-by: memosr.eth <96793918+memosr@users.noreply.github.com>
											
										
										
											2026-03-22 04:56:13 -07:00
+								            if result and adapter and session_key:
 								                if result.get("interrupted"):
 								                    # Interrupted — consume the interrupt message
 								                    pending_event = adapter.get_pending_message(session_key)
 								                    if pending_event:
 								                        pending = pending_event.text
 								                    elif result.get("interrupt_message"):
 								                        pending = result.get("interrupt_message")
 								                else:
 								                    # Normal completion — check for /queue'd messages that were
 								                    # stored without triggering an interrupt.
 								                    pending_event = adapter.get_pending_message(session_key)
 								                    if pending_event:
 								                        pending = pending_event.text
 								                        logger.debug("Processing queued message after agent completion: '%s...'", pending[:40])
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
 								            if pending:
-												fix(gateway): process /queue'd messages after agent completion (#2469)

* fix: respect DashScope v1 runtime mode for alibaba

Remove the hardcoded Alibaba branch from resolve_runtime_provider()
that forced api_mode='anthropic_messages' regardless of the base URL.

Alibaba now goes through the generic API-key provider path, which
auto-detects the protocol from the URL:
- /apps/anthropic → anthropic_messages (via endswith check)
- /v1 → chat_completions (default)

This fixes Alibaba setup with OpenAI-compatible DashScope endpoints
(e.g. coding-intl.dashscope.aliyuncs.com/v1) that were broken because
runtime always forced Anthropic mode even when setup saved a /v1 URL.

Based on PR #2024 by @kshitijk4poor.

* docs(skill): add split, merge, search examples to ocr-and-documents skill

Adds pymupdf examples for PDF splitting, merging, and text search
to the existing ocr-and-documents skill. No new dependencies — pymupdf
already covers all three operations natively.

* fix: replace all production print() calls with logger in rl_training_tool

Replace all bare print() calls in production code paths with proper logger calls.

- Add `import logging` and module-level `logger = logging.getLogger(__name__)`
- Replace print() in _start_training_run() with logger.info()
- Replace print() in _stop_training_run() with logger.info()
- Replace print(Warning/Note) calls with logger.warning() and logger.info()

Using the logging framework allows log level filtering, proper formatting,
and log routing instead of always printing to stdout.

* fix(gateway): process /queue'd messages after agent completion

/queue stored messages in adapter._pending_messages but never consumed
them after normal (non-interrupted) completion. The consumption path
at line 5219 only checked pending messages when result.get('interrupted')
was True — since /queue deliberately doesn't interrupt, queued messages
were silently dropped.

Now checks adapter._pending_messages after both interrupted AND normal
completion. For queued messages (non-interrupt), the first response is
delivered before recursing to process the queued follow-up. Skips the
direct send when streaming already delivered the response.

Reported by GhostMode on Discord.

---------

Co-authored-by: kshitijk4poor <kshitijk4poor@users.noreply.github.com>
Co-authored-by: memosr.eth <96793918+memosr@users.noreply.github.com>
											
										
										
											2026-03-22 04:56:13 -07:00
+								                logger.debug("Processing pending message: '%s...'", pending[:40])
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
 								                # Clear the adapter's interrupt event so the next _run_agent call
 								                # doesn't immediately re-trigger the interrupt before the new agent
 								                # even makes its first API call (this was causing an infinite loop).
 								                if adapter and hasattr(adapter, '_active_sessions') and session_key and session_key in adapter._active_sessions:
 								                    adapter._active_sessions[session_key].clear()
-												fix(gateway): cap interrupt recursion depth to prevent resource exhaustion (#816)

* fix: prevent infinite 400 failure loop on context overflow (#1630)

When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message.  This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error.  Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.

Three-layer fix:

1. run_agent.py — Fallback heuristic: when a 400 error has a very short
   generic message AND the session is large (>40% of context or >80
   messages), treat it as a probable context overflow and trigger
   compression instead of aborting.

2. run_agent.py + gateway/run.py — Don't persist failed messages:
   when the agent returns failed=True before generating any response,
   skip writing the user's message to the transcript/DB. This prevents
   the session from growing on each failure.

3. gateway/run.py — Smarter error messages: detect context-overflow
   failures and suggest /compact or /reset specifically, instead of a
   generic 'try again' that will fail identically.

* fix(skills): detect prompt injection patterns and block cache file reads

Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):

1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
   (index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
   was the original injection vector — untrusted skill descriptions
   in the catalog contained adversarial text that the model executed.

2. skill_view: warns when skills are loaded from outside the trusted
   ~/.hermes/skills/ directory, and detects common injection patterns
   in skill content ("ignore previous instructions", "<system>", etc.).

Cherry-picked from PR #1562 by ygd58.

* fix(tools): chunk long messages in send_message_tool before dispatch (#1552)

Long messages sent via send_message tool or cron delivery silently
failed when exceeding platform limits. Gateway adapters handle this
via truncate_message(), but the standalone senders in send_message_tool
bypassed that entirely.

- Apply truncate_message() chunking in _send_to_platform() before
  dispatching to individual platform senders
- Remove naive message[i:i+2000] character split in _send_discord()
  in favor of centralized smart splitting
- Attach media files to last chunk only for Telegram
- Add regression tests for chunking and media placement

Cherry-picked from PR #1557 by llbn.

* fix(approval): show full command in dangerous command approval (#1553)

Previously the command was truncated to 80 chars in CLI (with a
[v]iew full option), 500 chars in Discord embeds, and missing entirely
in Telegram/Slack approval messages. Now the full command is always
displayed everywhere:

- CLI: removed 80-char truncation and [v]iew full menu option
- Gateway (TG/Slack): approval_required message includes full command
  in a code block
- Discord: embed shows full command up to 4096-char limit
- Windows: skip SIGALRM-based test timeout (Unix-only)
- Updated tests: replaced view-flow tests with direct approval tests

Cherry-picked from PR #1566 by crazywriter1.

* fix(cli): flush stdout during agent loop to prevent macOS display freeze (#1624)

The interrupt polling loop in chat() waited on the queue without
invalidating the prompt_toolkit renderer. On macOS, the StdoutProxy
buffer only flushed on input events, causing the CLI to appear frozen
during tool execution until the user typed a key.

Fix: call _invalidate() on each queue timeout (every ~100ms, throttled
to 150ms) to force the renderer to flush buffered agent output.

* fix(claw): warn when API keys are skipped during OpenClaw migration (#1580)

When --migrate-secrets is not passed (the default), API keys like
OPENROUTER_API_KEY are silently skipped with no warning. Users don't
realize their keys weren't migrated until the agent fails to connect.

Add a post-migration warning with actionable instructions: either
re-run with --migrate-secrets or add the key manually via
hermes config set.

Cherry-picked from PR #1593 by ygd58.

* fix(security): block sandbox backend creds from subprocess env (#1264)

Add Modal and Daytona sandbox credentials to the subprocess env
blocklist so they're not leaked to agent terminal sessions via
printenv/env.

Cherry-picked from PR #1571 by ygd58.

* fix(gateway): cap interrupt recursion depth to prevent resource exhaustion (#816)

When a user sends multiple messages while the agent keeps failing,
_run_agent() calls itself recursively with no depth limit. This can
exhaust stack/memory if the agent is in a failure loop.

Add _MAX_INTERRUPT_DEPTH = 3. When exceeded, the pending message is
logged and the current result is returned instead of recursing deeper.

The log handler duplication bug described in #816 was already fixed
separately (AIAgent.__init__ deduplicates handlers).

---------

Co-authored-by: buray <ygd58@users.noreply.github.com>
Co-authored-by: lbn <llbn@users.noreply.github.com>
Co-authored-by: crazywriter1 <53251494+crazywriter1@users.noreply.github.com>
											
										
										
											2026-03-17 02:23:07 -07:00
+								                # Cap recursion depth to prevent resource exhaustion when the
 								                # user sends multiple messages while the agent keeps failing. (#816)
 								                if _interrupt_depth >= self._MAX_INTERRUPT_DEPTH:
 								                    logger.warning(
 								                        "Interrupt recursion depth %d reached for session %s — "
 								                        "queueing message instead of recursing.",
 								                        _interrupt_depth, session_key,
 								                    )
 								                    # Queue the pending message for normal processing on next turn
 								                    adapter = self.adapters.get(source.platform)
 								                    if adapter and hasattr(adapter, 'queue_message'):
 								                        adapter.queue_message(session_key, pending)
 								                    return result_holder[0] or {"final_response": response, "messages": history}
-												fix(gateway): process /queue'd messages after agent completion (#2469)

* fix: respect DashScope v1 runtime mode for alibaba

Remove the hardcoded Alibaba branch from resolve_runtime_provider()
that forced api_mode='anthropic_messages' regardless of the base URL.

Alibaba now goes through the generic API-key provider path, which
auto-detects the protocol from the URL:
- /apps/anthropic → anthropic_messages (via endswith check)
- /v1 → chat_completions (default)

This fixes Alibaba setup with OpenAI-compatible DashScope endpoints
(e.g. coding-intl.dashscope.aliyuncs.com/v1) that were broken because
runtime always forced Anthropic mode even when setup saved a /v1 URL.

Based on PR #2024 by @kshitijk4poor.

* docs(skill): add split, merge, search examples to ocr-and-documents skill

Adds pymupdf examples for PDF splitting, merging, and text search
to the existing ocr-and-documents skill. No new dependencies — pymupdf
already covers all three operations natively.

* fix: replace all production print() calls with logger in rl_training_tool

Replace all bare print() calls in production code paths with proper logger calls.

- Add `import logging` and module-level `logger = logging.getLogger(__name__)`
- Replace print() in _start_training_run() with logger.info()
- Replace print() in _stop_training_run() with logger.info()
- Replace print(Warning/Note) calls with logger.warning() and logger.info()

Using the logging framework allows log level filtering, proper formatting,
and log routing instead of always printing to stdout.

* fix(gateway): process /queue'd messages after agent completion

/queue stored messages in adapter._pending_messages but never consumed
them after normal (non-interrupted) completion. The consumption path
at line 5219 only checked pending messages when result.get('interrupted')
was True — since /queue deliberately doesn't interrupt, queued messages
were silently dropped.

Now checks adapter._pending_messages after both interrupted AND normal
completion. For queued messages (non-interrupt), the first response is
delivered before recursing to process the queued follow-up. Skips the
direct send when streaming already delivered the response.

Reported by GhostMode on Discord.

---------

Co-authored-by: kshitijk4poor <kshitijk4poor@users.noreply.github.com>
Co-authored-by: memosr.eth <96793918+memosr@users.noreply.github.com>
											
										
										
											2026-03-22 04:56:13 -07:00
+								                was_interrupted = result.get("interrupted")
 								                if not was_interrupted:
 								                    # Queued message after normal completion — deliver the first
 								                    # response before processing the queued follow-up.
 								                    # Skip if streaming already delivered it.
 								                    _sc = stream_consumer_holder[0]
 								                    _already_streamed = _sc and getattr(_sc, "already_sent", False)
 								                    first_response = result.get("final_response", "")
 								                    if first_response and not _already_streamed:
 								                        try:
 								                            await adapter.send(source.chat_id, first_response,
 								                                               metadata=getattr(event, "metadata", None))
 								                        except Exception as e:
 								                            logger.warning("Failed to send first response before queued message: %s", e)
 								                # else: interrupted — discard the interrupted response ("Operation
 								                # interrupted." is just noise; the user already knows they sent a
 								                # new message).
 								                # Process the pending message with updated history
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                updated_history = result.get("messages", history)
 								                return await self._run_agent(
 								                    message=pending,
 								                    context_prompt=context_prompt,
 								                    history=updated_history,
 								                    source=source,
 								                    session_id=session_id,
-												fix(gateway): cap interrupt recursion depth to prevent resource exhaustion (#816)

* fix: prevent infinite 400 failure loop on context overflow (#1630)

When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message.  This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error.  Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.

Three-layer fix:

1. run_agent.py — Fallback heuristic: when a 400 error has a very short
   generic message AND the session is large (>40% of context or >80
   messages), treat it as a probable context overflow and trigger
   compression instead of aborting.

2. run_agent.py + gateway/run.py — Don't persist failed messages:
   when the agent returns failed=True before generating any response,
   skip writing the user's message to the transcript/DB. This prevents
   the session from growing on each failure.

3. gateway/run.py — Smarter error messages: detect context-overflow
   failures and suggest /compact or /reset specifically, instead of a
   generic 'try again' that will fail identically.

* fix(skills): detect prompt injection patterns and block cache file reads

Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):

1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
   (index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
   was the original injection vector — untrusted skill descriptions
   in the catalog contained adversarial text that the model executed.

2. skill_view: warns when skills are loaded from outside the trusted
   ~/.hermes/skills/ directory, and detects common injection patterns
   in skill content ("ignore previous instructions", "<system>", etc.).

Cherry-picked from PR #1562 by ygd58.

* fix(tools): chunk long messages in send_message_tool before dispatch (#1552)

Long messages sent via send_message tool or cron delivery silently
failed when exceeding platform limits. Gateway adapters handle this
via truncate_message(), but the standalone senders in send_message_tool
bypassed that entirely.

- Apply truncate_message() chunking in _send_to_platform() before
  dispatching to individual platform senders
- Remove naive message[i:i+2000] character split in _send_discord()
  in favor of centralized smart splitting
- Attach media files to last chunk only for Telegram
- Add regression tests for chunking and media placement

Cherry-picked from PR #1557 by llbn.

* fix(approval): show full command in dangerous command approval (#1553)

Previously the command was truncated to 80 chars in CLI (with a
[v]iew full option), 500 chars in Discord embeds, and missing entirely
in Telegram/Slack approval messages. Now the full command is always
displayed everywhere:

- CLI: removed 80-char truncation and [v]iew full menu option
- Gateway (TG/Slack): approval_required message includes full command
  in a code block
- Discord: embed shows full command up to 4096-char limit
- Windows: skip SIGALRM-based test timeout (Unix-only)
- Updated tests: replaced view-flow tests with direct approval tests

Cherry-picked from PR #1566 by crazywriter1.

* fix(cli): flush stdout during agent loop to prevent macOS display freeze (#1624)

The interrupt polling loop in chat() waited on the queue without
invalidating the prompt_toolkit renderer. On macOS, the StdoutProxy
buffer only flushed on input events, causing the CLI to appear frozen
during tool execution until the user typed a key.

Fix: call _invalidate() on each queue timeout (every ~100ms, throttled
to 150ms) to force the renderer to flush buffered agent output.

* fix(claw): warn when API keys are skipped during OpenClaw migration (#1580)

When --migrate-secrets is not passed (the default), API keys like
OPENROUTER_API_KEY are silently skipped with no warning. Users don't
realize their keys weren't migrated until the agent fails to connect.

Add a post-migration warning with actionable instructions: either
re-run with --migrate-secrets or add the key manually via
hermes config set.

Cherry-picked from PR #1593 by ygd58.

* fix(security): block sandbox backend creds from subprocess env (#1264)

Add Modal and Daytona sandbox credentials to the subprocess env
blocklist so they're not leaked to agent terminal sessions via
printenv/env.

Cherry-picked from PR #1571 by ygd58.

* fix(gateway): cap interrupt recursion depth to prevent resource exhaustion (#816)

When a user sends multiple messages while the agent keeps failing,
_run_agent() calls itself recursively with no depth limit. This can
exhaust stack/memory if the agent is in a failure loop.

Add _MAX_INTERRUPT_DEPTH = 3. When exceeded, the pending message is
logged and the current result is returned instead of recursing deeper.

The log handler duplication bug described in #816 was already fixed
separately (AIAgent.__init__ deduplicates handlers).

---------

Co-authored-by: buray <ygd58@users.noreply.github.com>
Co-authored-by: lbn <llbn@users.noreply.github.com>
Co-authored-by: crazywriter1 <53251494+crazywriter1@users.noreply.github.com>
											
										
										
											2026-03-17 02:23:07 -07:00
+								                    session_key=session_key,
 								                    _interrupt_depth=_interrupt_depth + 1,
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                )
 								        finally:
 								            # Stop progress sender and interrupt monitor
 								            if progress_task:
 								                progress_task.cancel()
 								            interrupt_monitor.cancel()
-												feat(gateway): streaming token delivery — StreamingConfig, GatewayStreamConsumer, already_sent

Stage 3 of streaming support. Gateway now streams tokens to messaging platforms:

- StreamingConfig dataclass (enabled, transport, edit_interval, buffer_threshold, cursor)
  on GatewayConfig with from_dict/to_dict serialization
- GatewayStreamConsumer: async queue-based consumer that progressively edits
  a single message on the target platform (edit transport)
- on_delta() → queue → run() async task → send_or_edit() with rate limiting
- already_sent propagation: when streaming delivered the response, handler
  returns None so base adapter skips duplicate send()
- stream_delta_callback wired into AIAgent constructor in _run_agent
- Consumer lifecycle: started as asyncio task, awaited with timeout in finally

Config (config.yaml):
  streaming:
    enabled: true
    transport: edit      # progressive editMessageText
    edit_interval: 0.3   # seconds between edits
    buffer_threshold: 40 # chars before forcing flush
    cursor: ' ▉'

Credit: jobless0x (#774, #1312), OutThisLife (#798), clicksingh (#697).

											
										
										
											2026-03-16 05:52:42 -07:00
 								            # Wait for stream consumer to finish its final edit
 								            if stream_task:
 								                try:
 								                    await asyncio.wait_for(stream_task, timeout=5.0)
 								                except (asyncio.TimeoutError, asyncio.CancelledError):
 								                    stream_task.cancel()
 								                    try:
 								                        await stream_task
 								                    except asyncio.CancelledError:
 								                        pass
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
 								            # Clean up tracking
 								            tracking_task.cancel()
 								            if session_key and session_key in self._running_agents:
 								                del self._running_agents[session_key]
 								            # Wait for cancelled tasks
 								            for task in [progress_task, interrupt_monitor, tracking_task]:
 								                if task:
 								                    try:
 								                        await task
 								                    except asyncio.CancelledError:
 								                        pass
-												feat(gateway): streaming token delivery — StreamingConfig, GatewayStreamConsumer, already_sent

Stage 3 of streaming support. Gateway now streams tokens to messaging platforms:

- StreamingConfig dataclass (enabled, transport, edit_interval, buffer_threshold, cursor)
  on GatewayConfig with from_dict/to_dict serialization
- GatewayStreamConsumer: async queue-based consumer that progressively edits
  a single message on the target platform (edit transport)
- on_delta() → queue → run() async task → send_or_edit() with rate limiting
- already_sent propagation: when streaming delivered the response, handler
  returns None so base adapter skips duplicate send()
- stream_delta_callback wired into AIAgent constructor in _run_agent
- Consumer lifecycle: started as asyncio task, awaited with timeout in finally

Config (config.yaml):
  streaming:
    enabled: true
    transport: edit      # progressive editMessageText
    edit_interval: 0.3   # seconds between edits
    buffer_threshold: 40 # chars before forcing flush
    cursor: ' ▉'

Credit: jobless0x (#774, #1312), OutThisLife (#798), clicksingh (#697).

											
										
										
											2026-03-16 05:52:42 -07:00
 								        # If streaming already delivered the response, mark it so the
 								        # caller's send() is skipped (avoiding duplicate messages).
 								        _sc = stream_consumer_holder[0]
 								        if _sc and _sc.already_sent and isinstance(response, dict):
 								            response["already_sent"] = True
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
 								        return response
 								def _start_cron_ticker(stop_event: threading.Event, adapters=None, interval: int = 60):
 								    """
 								    Background thread that ticks the cron scheduler at a regular interval.
 								    Runs inside the gateway process so cronjobs fire automatically without
 								    needing a separate `hermes cron daemon` or system cron entry.
 								    Also refreshes the channel directory every 5 minutes and prunes the
 								    image/audio/document cache once per hour.
 								    """
 								    from cron.scheduler import tick as cron_tick
 								    from gateway.platforms.base import cleanup_image_cache, cleanup_document_cache
 								    IMAGE_CACHE_EVERY = 60   # ticks — once per hour at default 60s interval
 								    CHANNEL_DIR_EVERY = 5    # ticks — every 5 minutes
 								    logger.info("Cron ticker started (interval=%ds)", interval)
 								    tick_count = 0
 								    while not stop_event.is_set():
 								        try:
 								            cron_tick(verbose=False)
 								        except Exception as e:
 								            logger.debug("Cron tick error: %s", e)
 								        tick_count += 1
 								        if tick_count % CHANNEL_DIR_EVERY == 0 and adapters:
 								            try:
 								                from gateway.channel_directory import build_channel_directory
 								                build_channel_directory(adapters)
 								            except Exception as e:
 								                logger.debug("Channel directory refresh error: %s", e)
 								        if tick_count % IMAGE_CACHE_EVERY == 0:
 								            try:
 								                removed = cleanup_image_cache(max_age_hours=24)
 								                if removed:
 								                    logger.info("Image cache cleanup: removed %d stale file(s)", removed)
 								            except Exception as e:
 								                logger.debug("Image cache cleanup error: %s", e)
 								            try:
 								                removed = cleanup_document_cache(max_age_hours=24)
 								                if removed:
 								                    logger.info("Document cache cleanup: removed %d stale file(s)", removed)
 								            except Exception as e:
 								                logger.debug("Document cache cleanup error: %s", e)
 								        stop_event.wait(timeout=interval)
 								    logger.info("Cron ticker stopped")
 								async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool = False) -> bool:
 								    """
 								    Start the gateway and run until interrupted.
 								    This is the main entry point for running the gateway.
 								    Returns True if the gateway ran successfully, False if it failed to start.
 								    A False return causes a non-zero exit code so systemd can auto-restart.
 								    Args:
 								        config: Optional gateway configuration override.
 								        replace: If True, kill any existing gateway instance before starting.
 								                 Useful for systemd services to avoid restart-loop deadlocks
 								                 when the previous process hasn't fully exited yet.
 								    """
 								    # ── Duplicate-instance guard ──────────────────────────────────────
 								    # Prevent two gateways from running under the same HERMES_HOME.
 								    # The PID file is scoped to HERMES_HOME, so future multi-profile
 								    # setups (each profile using a distinct HERMES_HOME) will naturally
 								    # allow concurrent instances without tripping this guard.
 								    import time as _time
 								    from gateway.status import get_running_pid, remove_pid_file
 								    existing_pid = get_running_pid()
 								    if existing_pid is not None and existing_pid != os.getpid():
 								        if replace:
 								            logger.info(
 								                "Replacing existing gateway instance (PID %d) with --replace.",
 								                existing_pid,
 								            )
 								            try:
 								                os.kill(existing_pid, signal.SIGTERM)
 								            except ProcessLookupError:
 								                pass  # Already gone
 								            except PermissionError:
 								                logger.error(
 								                    "Permission denied killing PID %d. Cannot replace.",
 								                    existing_pid,
 								                )
 								                return False
 								            # Wait up to 10 seconds for the old process to exit
 								            for _ in range(20):
 								                try:
 								                    os.kill(existing_pid, 0)
 								                    _time.sleep(0.5)
 								                except (ProcessLookupError, PermissionError):
 								                    break  # Process is gone
 								            else:
 								                # Still alive after 10s — force kill
 								                logger.warning(
 								                    "Old gateway (PID %d) did not exit after SIGTERM, sending SIGKILL.",
 								                    existing_pid,
 								                )
 								                try:
 								                    os.kill(existing_pid, signal.SIGKILL)
 								                    _time.sleep(0.5)
 								                except (ProcessLookupError, PermissionError):
 								                    pass
 								            remove_pid_file()
-												fix(gateway): detect stopped processes and release stale locks on --replace

											
										
										
											2026-03-21 18:13:53 -07:00
+								            # Also release all scoped locks left by the old process.
 								            # Stopped (Ctrl+Z) processes don't release locks on exit,
 								            # leaving stale lock files that block the new gateway from starting.
 								            try:
 								                from gateway.status import release_all_scoped_locks
 								                _released = release_all_scoped_locks()
 								                if _released:
 								                    logger.info("Released %d stale scoped lock(s) from old gateway.", _released)
 								            except Exception:
 								                pass
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        else:
-												refactor: consolidate get_hermes_home() and parse_reasoning_effort() (#3062)

Centralizes two widely-duplicated patterns into hermes_constants.py:

1. get_hermes_home() — Path resolution for ~/.hermes (HERMES_HOME env var)
   - Was copy-pasted inline across 30+ files as:
     Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
   - Now defined once in hermes_constants.py (zero-dependency module)
   - hermes_cli/config.py re-exports it for backward compatibility
   - Removed local wrapper functions in honcho_integration/client.py,
     tools/website_policy.py, tools/tirith_security.py, hermes_cli/uninstall.py

2. parse_reasoning_effort() — Reasoning effort string validation
   - Was copy-pasted in cli.py, gateway/run.py, cron/scheduler.py
   - Same validation logic: check against (xhigh, high, medium, low, minimal, none)
   - Now defined once in hermes_constants.py, called from all 3 locations
   - Warning log for unknown values kept at call sites (context-specific)

31 files changed, net +31 lines (125 insertions, 94 deletions)
Full test suite: 6179 passed, 0 failed
											
										
										
											2026-03-25 15:54:28 -07:00
+								            hermes_home = str(get_hermes_home())
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            logger.error(
 								                "Another gateway instance is already running (PID %d, HERMES_HOME=%s). "
 								                "Use 'hermes gateway restart' to replace it, or 'hermes gateway stop' first.",
 								                existing_pid, hermes_home,
 								            )
 								            print(
 								                f"\n❌ Gateway already running (PID {existing_pid}).\n"
 								                f"   Use 'hermes gateway restart' to replace it,\n"
 								                f"   or 'hermes gateway stop' to kill it first.\n"
 								                f"   Or use 'hermes gateway run --replace' to auto-replace.\n"
 								            )
 								            return False
 								    # Sync bundled skills on gateway start (fast -- skips unchanged)
 								    try:
 								        from tools.skills_sync import sync_skills
 								        sync_skills(quiet=True)
 								    except Exception:
 								        pass
 								    # Configure rotating file log so gateway output is persisted for debugging
 								    log_dir = _hermes_home / 'logs'
 								    log_dir.mkdir(parents=True, exist_ok=True)
 								    file_handler = RotatingFileHandler(
 								        log_dir / 'gateway.log',
 								        maxBytes=5 * 1024 * 1024,
 								        backupCount=3,
 								    )
 								    from agent.redact import RedactingFormatter
 								    file_handler.setFormatter(RedactingFormatter('%(asctime)s %(levelname)s %(name)s: %(message)s'))
 								    logging.getLogger().addHandler(file_handler)
 								    logging.getLogger().setLevel(logging.INFO)
 								    # Separate errors-only log for easy debugging
 								    error_handler = RotatingFileHandler(
 								        log_dir / 'errors.log',
 								        maxBytes=2 * 1024 * 1024,
 								        backupCount=2,
 								    )
 								    error_handler.setLevel(logging.WARNING)
 								    error_handler.setFormatter(RedactingFormatter('%(asctime)s %(levelname)s %(name)s: %(message)s'))
 								    logging.getLogger().addHandler(error_handler)
 								    runner = GatewayRunner(config)
 								    # Set up signal handlers
 								    def signal_handler():
 								        asyncio.create_task(runner.stop())
 								    loop = asyncio.get_event_loop()
 								    for sig in (signal.SIGINT, signal.SIGTERM):
 								        try:
 								            loop.add_signal_handler(sig, signal_handler)
 								        except NotImplementedError:
 								            pass
 								    # Start the gateway
 								    success = await runner.start()
 								    if not success:
 								        return False
-												fix: preserve current gateway update and startup behavior

Follow up on salvaged PR #1052.
Restore current-main gateway lifecycle handling after conflict resolution and
adapt the update fallback to use shell-quoted argv parts safely.

											
										
										
											2026-03-14 18:03:50 -07:00
+								    if runner.should_exit_cleanly:
 								        if runner.exit_reason:
 								            logger.error("Gateway exiting cleanly: %s", runner.exit_reason)
 								        return True
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
 								    # Write PID file so CLI can detect gateway is running
 								    import atexit
 								    from gateway.status import write_pid_file, remove_pid_file
 								    write_pid_file()
 								    atexit.register(remove_pid_file)
 								    # Start background cron ticker so scheduled jobs fire automatically
 								    cron_stop = threading.Event()
 								    cron_thread = threading.Thread(
 								        target=_start_cron_ticker,
 								        args=(cron_stop,),
 								        kwargs={"adapters": runner.adapters},
 								        daemon=True,
 								        name="cron-ticker",
 								    )
 								    cron_thread.start()
 								    # Wait for shutdown
 								    await runner.wait_for_shutdown()
-												fix(gateway): restart on whatsapp bridge child exit (#2334)

Co-authored-by: Frederico Ribeiro <fr@tecompanytea.com>
											
										
										
											2026-03-21 09:38:52 -07:00
 								    if runner.should_exit_with_failure:
 								        if runner.exit_reason:
 								            logger.error("Gateway exiting with failure: %s", runner.exit_reason)
 								        return False
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
 								    # Stop cron ticker cleanly
 								    cron_stop.set()
 								    cron_thread.join(timeout=5)
 								    # Close MCP server connections
 								    try:
 								        from tools.mcp_tool import shutdown_mcp_servers
 								        shutdown_mcp_servers()
 								    except Exception:
 								        pass
 								    return True
 								def main():
 								    """CLI entry point for the gateway."""
 								    import argparse
 								    parser = argparse.ArgumentParser(description="Hermes Gateway - Multi-platform messaging")
 								    parser.add_argument("--config", "-c", help="Path to gateway config file")
 								    parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output")
 								    args = parser.parse_args()
 								    config = None
 								    if args.config:
 								        import json
 								        with open(args.config, encoding="utf-8") as f:
 								            data = json.load(f)
 								            config = GatewayConfig.from_dict(data)
 								    # Run the gateway - exit with code 1 if no platforms connected,
 								    # so systemd Restart=on-failure will retry on transient errors (e.g. DNS)
 								    success = asyncio.run(start_gateway(config))
 								    if not success:
 								        sys.exit(1)
 								if __name__ == "__main__":
 								    main()