gateway/run.py

"""
Gateway runner - entry point for messaging platform integrations.

This module provides:
- start_gateway(): Start all configured platform adapters
- GatewayRunner: Main class managing the gateway lifecycle

Usage:
    # Start the gateway
    python -m gateway.run
    
    # Or from CLI
    python cli.py --gateway
"""

import asyncio
import json
import logging
import os
import re
import shlex
import sys
import signal
import tempfile
import threading
import time
from contextvars import copy_context
from pathlib import Path
from datetime import datetime
from typing import Dict, Optional, Any, List

# ---------------------------------------------------------------------------
# SSL certificate auto-detection for NixOS and other non-standard systems.
# Must run BEFORE any HTTP library (discord, aiohttp, etc.) is imported.
# ---------------------------------------------------------------------------
def _ensure_ssl_certs() -> None:
    """Set SSL_CERT_FILE if the system doesn't expose CA certs to Python."""
    if "SSL_CERT_FILE" in os.environ:
        return  # user already configured it

    import ssl

    # 1. Python's compiled-in defaults
    paths = ssl.get_default_verify_paths()
    for candidate in (paths.cafile, paths.openssl_cafile):
        if candidate and os.path.exists(candidate):
            os.environ["SSL_CERT_FILE"] = candidate
            return

    # 2. certifi (ships its own Mozilla bundle)
    try:
        import certifi
        os.environ["SSL_CERT_FILE"] = certifi.where()
        return
    except ImportError:
        pass

    # 3. Common distro / macOS locations
    for candidate in (
        "/etc/ssl/certs/ca-certificates.crt",               # Debian/Ubuntu/Gentoo
        "/etc/pki/tls/certs/ca-bundle.crt",                 # RHEL/CentOS 7
        "/etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem", # RHEL/CentOS 8+
        "/etc/ssl/ca-bundle.pem",                            # SUSE/OpenSUSE
        "/etc/ssl/cert.pem",                                 # Alpine / macOS
        "/etc/pki/tls/cert.pem",                             # Fedora
        "/usr/local/etc/openssl@1.1/cert.pem",               # macOS Homebrew Intel
        "/opt/homebrew/etc/openssl@1.1/cert.pem",            # macOS Homebrew ARM
    ):
        if os.path.exists(candidate):
            os.environ["SSL_CERT_FILE"] = candidate
            return

_ensure_ssl_certs()

# Add parent directory to path
sys.path.insert(0, str(Path(__file__).parent.parent))

# Resolve Hermes home directory (respects HERMES_HOME override)
from hermes_constants import get_hermes_home
from utils import atomic_yaml_write, is_truthy_value
_hermes_home = get_hermes_home()

# Load environment variables from ~/.hermes/.env first.
# User-managed env files should override stale shell exports on restart.
from dotenv import load_dotenv  # backward-compat for tests that monkeypatch this symbol
from hermes_cli.env_loader import load_hermes_dotenv
_env_path = _hermes_home / '.env'
load_hermes_dotenv(hermes_home=_hermes_home, project_env=Path(__file__).resolve().parents[1] / '.env')

# Bridge config.yaml values into the environment so os.getenv() picks them up.
# config.yaml is authoritative for terminal settings — overrides .env.
_config_path = _hermes_home / 'config.yaml'
if _config_path.exists():
    try:
        import yaml as _yaml
        with open(_config_path, encoding="utf-8") as _f:
            _cfg = _yaml.safe_load(_f) or {}
        # Expand ${ENV_VAR} references before bridging to env vars.
        from hermes_cli.config import _expand_env_vars
        _cfg = _expand_env_vars(_cfg)
        # Top-level simple values (fallback only — don't override .env)
        for _key, _val in _cfg.items():
            if isinstance(_val, (str, int, float, bool)) and _key not in os.environ:
                os.environ[_key] = str(_val)
        # Terminal config is nested — bridge to TERMINAL_* env vars.
        # config.yaml overrides .env for these since it's the documented config path.
        _terminal_cfg = _cfg.get("terminal", {})
        if _terminal_cfg and isinstance(_terminal_cfg, dict):
            _terminal_env_map = {
                "backend": "TERMINAL_ENV",
                "cwd": "TERMINAL_CWD",
                "timeout": "TERMINAL_TIMEOUT",
                "lifetime_seconds": "TERMINAL_LIFETIME_SECONDS",
                "docker_image": "TERMINAL_DOCKER_IMAGE",
                "docker_forward_env": "TERMINAL_DOCKER_FORWARD_ENV",
                "singularity_image": "TERMINAL_SINGULARITY_IMAGE",
                "modal_image": "TERMINAL_MODAL_IMAGE",
                "daytona_image": "TERMINAL_DAYTONA_IMAGE",
                "ssh_host": "TERMINAL_SSH_HOST",
                "ssh_user": "TERMINAL_SSH_USER",
                "ssh_port": "TERMINAL_SSH_PORT",
                "ssh_key": "TERMINAL_SSH_KEY",
                "container_cpu": "TERMINAL_CONTAINER_CPU",
                "container_memory": "TERMINAL_CONTAINER_MEMORY",
                "container_disk": "TERMINAL_CONTAINER_DISK",
                "container_persistent": "TERMINAL_CONTAINER_PERSISTENT",
                "docker_volumes": "TERMINAL_DOCKER_VOLUMES",
                "sandbox_dir": "TERMINAL_SANDBOX_DIR",
                "persistent_shell": "TERMINAL_PERSISTENT_SHELL",
            }
            for _cfg_key, _env_var in _terminal_env_map.items():
                if _cfg_key in _terminal_cfg:
                    _val = _terminal_cfg[_cfg_key]
                    # Skip cwd placeholder values (".", "auto", "cwd") — the
                    # gateway resolves these to Path.home() later (line ~255).
                    # Writing the raw placeholder here would just be noise.
                    # Only bridge explicit absolute paths from config.yaml.
                    if _cfg_key == "cwd" and str(_val) in (".", "auto", "cwd"):
                        continue
                    if isinstance(_val, list):
                        os.environ[_env_var] = json.dumps(_val)
                    else:
                        os.environ[_env_var] = str(_val)
        # Compression config is read directly from config.yaml by run_agent.py
        # and auxiliary_client.py — no env var bridging needed.
        # Auxiliary model/direct-endpoint overrides (vision, web_extract).
        # Each task has provider/model/base_url/api_key; bridge non-default values to env vars.
        _auxiliary_cfg = _cfg.get("auxiliary", {})
        if _auxiliary_cfg and isinstance(_auxiliary_cfg, dict):
            _aux_task_env = {
                "vision": {
                    "provider": "AUXILIARY_VISION_PROVIDER",
                    "model": "AUXILIARY_VISION_MODEL",
                    "base_url": "AUXILIARY_VISION_BASE_URL",
                    "api_key": "AUXILIARY_VISION_API_KEY",
                },
                "web_extract": {
                    "provider": "AUXILIARY_WEB_EXTRACT_PROVIDER",
                    "model": "AUXILIARY_WEB_EXTRACT_MODEL",
                    "base_url": "AUXILIARY_WEB_EXTRACT_BASE_URL",
                    "api_key": "AUXILIARY_WEB_EXTRACT_API_KEY",
                },
                "approval": {
                    "provider": "AUXILIARY_APPROVAL_PROVIDER",
                    "model": "AUXILIARY_APPROVAL_MODEL",
                    "base_url": "AUXILIARY_APPROVAL_BASE_URL",
                    "api_key": "AUXILIARY_APPROVAL_API_KEY",
                },
            }
            for _task_key, _env_map in _aux_task_env.items():
                _task_cfg = _auxiliary_cfg.get(_task_key, {})
                if not isinstance(_task_cfg, dict):
                    continue
                _prov = str(_task_cfg.get("provider", "")).strip()
                _model = str(_task_cfg.get("model", "")).strip()
                _base_url = str(_task_cfg.get("base_url", "")).strip()
                _api_key = str(_task_cfg.get("api_key", "")).strip()
                if _prov and _prov != "auto":
                    os.environ[_env_map["provider"]] = _prov
                if _model:
                    os.environ[_env_map["model"]] = _model
                if _base_url:
                    os.environ[_env_map["base_url"]] = _base_url
                if _api_key:
                    os.environ[_env_map["api_key"]] = _api_key
        _agent_cfg = _cfg.get("agent", {})
        if _agent_cfg and isinstance(_agent_cfg, dict):
            if "max_turns" in _agent_cfg:
                os.environ["HERMES_MAX_ITERATIONS"] = str(_agent_cfg["max_turns"])
            # Bridge agent.gateway_timeout → HERMES_AGENT_TIMEOUT env var.
            # Env var from .env takes precedence (already in os.environ).
            if "gateway_timeout" in _agent_cfg and "HERMES_AGENT_TIMEOUT" not in os.environ:
                os.environ["HERMES_AGENT_TIMEOUT"] = str(_agent_cfg["gateway_timeout"])
            if "gateway_timeout_warning" in _agent_cfg and "HERMES_AGENT_TIMEOUT_WARNING" not in os.environ:
                os.environ["HERMES_AGENT_TIMEOUT_WARNING"] = str(_agent_cfg["gateway_timeout_warning"])
            if "gateway_notify_interval" in _agent_cfg and "HERMES_AGENT_NOTIFY_INTERVAL" not in os.environ:
                os.environ["HERMES_AGENT_NOTIFY_INTERVAL"] = str(_agent_cfg["gateway_notify_interval"])
            if "restart_drain_timeout" in _agent_cfg and "HERMES_RESTART_DRAIN_TIMEOUT" not in os.environ:
                os.environ["HERMES_RESTART_DRAIN_TIMEOUT"] = str(_agent_cfg["restart_drain_timeout"])
        _display_cfg = _cfg.get("display", {})
        if _display_cfg and isinstance(_display_cfg, dict):
            if "busy_input_mode" in _display_cfg and "HERMES_GATEWAY_BUSY_INPUT_MODE" not in os.environ:
                os.environ["HERMES_GATEWAY_BUSY_INPUT_MODE"] = str(_display_cfg["busy_input_mode"])
        # Timezone: bridge config.yaml → HERMES_TIMEZONE env var.
        # HERMES_TIMEZONE from .env takes precedence (already in os.environ).
        _tz_cfg = _cfg.get("timezone", "")
        if _tz_cfg and isinstance(_tz_cfg, str) and "HERMES_TIMEZONE" not in os.environ:
            os.environ["HERMES_TIMEZONE"] = _tz_cfg.strip()
        # Security settings
        _security_cfg = _cfg.get("security", {})
        if isinstance(_security_cfg, dict):
            _redact = _security_cfg.get("redact_secrets")
            if _redact is not None:
                os.environ["HERMES_REDACT_SECRETS"] = str(_redact).lower()
    except Exception:
        pass  # Non-fatal; gateway can still run with .env values

# Apply IPv4 preference if configured (before any HTTP clients are created).
try:
    from hermes_constants import apply_ipv4_preference
    _network_cfg = (_cfg if '_cfg' in dir() else {}).get("network", {})
    if isinstance(_network_cfg, dict) and _network_cfg.get("force_ipv4"):
        apply_ipv4_preference(force=True)
except Exception:
    pass

# Validate config structure early — log warnings so gateway operators see problems
try:
    from hermes_cli.config import print_config_warnings
    print_config_warnings()
except Exception:
    pass

# Warn if user has deprecated MESSAGING_CWD / TERMINAL_CWD in .env
try:
    from hermes_cli.config import warn_deprecated_cwd_env_vars
    warn_deprecated_cwd_env_vars()
except Exception:
    pass

# Gateway runs in quiet mode - suppress debug output and use cwd directly (no temp dirs)
os.environ["HERMES_QUIET"] = "1"

# Enable interactive exec approval for dangerous commands on messaging platforms
os.environ["HERMES_EXEC_ASK"] = "1"

# Set terminal working directory for messaging platforms.
# config.yaml terminal.cwd is the canonical source (bridged to TERMINAL_CWD
# by the config bridge above).  When it's unset or a placeholder, default
# to home directory.  MESSAGING_CWD is accepted as a backward-compat
# fallback (deprecated — the warning above tells users to migrate).
_configured_cwd = os.environ.get("TERMINAL_CWD", "")
if not _configured_cwd or _configured_cwd in (".", "auto", "cwd"):
    _fallback = os.getenv("MESSAGING_CWD") or str(Path.home())
    os.environ["TERMINAL_CWD"] = _fallback

from gateway.config import (
    Platform,
    GatewayConfig,
    load_gateway_config,
)
from gateway.session import (
    SessionStore,
    SessionSource,
    SessionContext,
    build_session_context,
    build_session_context_prompt,
    build_session_key,
)
from gateway.delivery import DeliveryRouter
from gateway.platforms.base import (
    BasePlatformAdapter,
    MessageEvent,
    MessageType,
    merge_pending_message_event,
)
from gateway.restart import (
    DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT,
    GATEWAY_SERVICE_RESTART_EXIT_CODE,
    parse_restart_drain_timeout,
)


def _normalize_whatsapp_identifier(value: str) -> str:
    """Strip WhatsApp JID/LID syntax down to its stable numeric identifier."""
    return (
        str(value or "")
        .strip()
        .replace("+", "", 1)
        .split(":", 1)[0]
        .split("@", 1)[0]
    )


def _expand_whatsapp_auth_aliases(identifier: str) -> set:
    """Resolve WhatsApp phone/LID aliases using bridge session mapping files."""
    normalized = _normalize_whatsapp_identifier(identifier)
    if not normalized:
        return set()

    session_dir = _hermes_home / "whatsapp" / "session"
    resolved = set()
    queue = [normalized]

    while queue:
        current = queue.pop(0)
        if not current or current in resolved:
            continue

        resolved.add(current)
        for suffix in ("", "_reverse"):
            mapping_path = session_dir / f"lid-mapping-{current}{suffix}.json"
            if not mapping_path.exists():
                continue
            try:
                mapped = _normalize_whatsapp_identifier(
                    json.loads(mapping_path.read_text(encoding="utf-8"))
                )
            except Exception:
                continue
            if mapped and mapped not in resolved:
                queue.append(mapped)

    return resolved

logger = logging.getLogger(__name__)

# Sentinel placed into _running_agents immediately when a session starts
# processing, *before* any await.  Prevents a second message for the same
# session from bypassing the "already running" guard during the async gap
# between the guard check and actual agent creation.
_AGENT_PENDING_SENTINEL = object()


def _resolve_runtime_agent_kwargs() -> dict:
    """Resolve provider credentials for gateway-created AIAgent instances."""
    from hermes_cli.runtime_provider import (
        resolve_runtime_provider,
        format_runtime_provider_error,
    )

    try:
        runtime = resolve_runtime_provider(
            requested=os.getenv("HERMES_INFERENCE_PROVIDER"),
        )
    except Exception as exc:
        raise RuntimeError(format_runtime_provider_error(exc)) from exc

    return {
        "api_key": runtime.get("api_key"),
        "base_url": runtime.get("base_url"),
        "provider": runtime.get("provider"),
        "api_mode": runtime.get("api_mode"),
        "command": runtime.get("command"),
        "args": list(runtime.get("args") or []),
        "credential_pool": runtime.get("credential_pool"),
    }


def _build_media_placeholder(event) -> str:
    """Build a text placeholder for media-only events so they aren't dropped.

    When a photo/document is queued during active processing and later
    dequeued, only .text is extracted.  If the event has no caption,
    the media would be silently lost.  This builds a placeholder that
    the vision enrichment pipeline will replace with a real description.
    """
    parts = []
    media_urls = getattr(event, "media_urls", None) or []
    media_types = getattr(event, "media_types", None) or []
    for i, url in enumerate(media_urls):
        mtype = media_types[i] if i < len(media_types) else ""
        if mtype.startswith("image/") or getattr(event, "message_type", None) == MessageType.PHOTO:
            parts.append(f"[User sent an image: {url}]")
        elif mtype.startswith("audio/"):
            parts.append(f"[User sent audio: {url}]")
        else:
            parts.append(f"[User sent a file: {url}]")
    return "\n".join(parts)


def _dequeue_pending_event(adapter, session_key: str) -> MessageEvent | None:
    """Consume and return the full pending event for a session.

    Queued follow-ups must preserve their media metadata so they can re-enter
    the normal image/STT/document preprocessing path instead of being reduced
    to a placeholder string.
    """
    return adapter.get_pending_message(session_key)


def _check_unavailable_skill(command_name: str) -> str | None:
    """Check if a command matches a known-but-inactive skill.

    Returns a helpful message if the skill exists but is disabled or only
    available as an optional install. Returns None if no match found.
    """
    # Normalize: command uses hyphens, skill names may use hyphens or underscores
    normalized = command_name.lower().replace("_", "-")
    try:
        from tools.skills_tool import _get_disabled_skill_names
        from agent.skill_utils import get_all_skills_dirs
        disabled = _get_disabled_skill_names()

        # Check disabled skills across all dirs (local + external)
        for skills_dir in get_all_skills_dirs():
            if not skills_dir.exists():
                continue
            for skill_md in skills_dir.rglob("SKILL.md"):
                if any(part in ('.git', '.github', '.hub') for part in skill_md.parts):
                    continue
                name = skill_md.parent.name.lower().replace("_", "-")
                if name == normalized and name in disabled:
                    return (
                        f"The **{command_name}** skill is installed but disabled.\n"
                        f"Enable it with: `hermes skills config`"
                    )

        # Check optional skills (shipped with repo but not installed)
        from hermes_constants import get_optional_skills_dir
        repo_root = Path(__file__).resolve().parent.parent
        optional_dir = get_optional_skills_dir(repo_root / "optional-skills")
        if optional_dir.exists():
            for skill_md in optional_dir.rglob("SKILL.md"):
                name = skill_md.parent.name.lower().replace("_", "-")
                if name == normalized:
                    # Build install path: official/<category>/<name>
                    rel = skill_md.parent.relative_to(optional_dir)
                    parts = list(rel.parts)
                    install_path = f"official/{'/'.join(parts)}"
                    return (
                        f"The **{command_name}** skill is available but not installed.\n"
                        f"Install it with: `hermes skills install {install_path}`"
                    )
    except Exception:
        pass
    return None


def _platform_config_key(platform: "Platform") -> str:
    """Map a Platform enum to its config.yaml key (LOCAL→"cli", rest→enum value)."""
    return "cli" if platform == Platform.LOCAL else platform.value


def _load_gateway_config() -> dict:
    """Load and parse ~/.hermes/config.yaml, returning {} on any error."""
    try:
        config_path = _hermes_home / 'config.yaml'
        if config_path.exists():
            import yaml
            with open(config_path, 'r', encoding='utf-8') as f:
                return yaml.safe_load(f) or {}
    except Exception:
        logger.debug("Could not load gateway config from %s", _hermes_home / 'config.yaml')
    return {}


def _resolve_gateway_model(config: dict | None = None) -> str:
    """Read model from config.yaml — single source of truth.

    Without this, temporary AIAgent instances (memory flush, /compress) fall
    back to the hardcoded default which fails when the active provider is
    openai-codex.
    """
    cfg = config if config is not None else _load_gateway_config()
    model_cfg = cfg.get("model", {})
    if isinstance(model_cfg, str):
        return model_cfg
    elif isinstance(model_cfg, dict):
        return model_cfg.get("default") or model_cfg.get("model") or ""
    return ""


def _resolve_hermes_bin() -> Optional[list[str]]:
    """Resolve the Hermes update command as argv parts.

    Tries in order:
    1. ``shutil.which("hermes")`` — standard PATH lookup
    2. ``sys.executable -m hermes_cli.main`` — fallback when Hermes is running
       from a venv/module invocation and the ``hermes`` shim is not on PATH

    Returns argv parts ready for quoting/joining, or ``None`` if neither works.
    """
    import shutil

    hermes_bin = shutil.which("hermes")
    if hermes_bin:
        return [hermes_bin]

    try:
        import importlib.util

        if importlib.util.find_spec("hermes_cli") is not None:
            return [sys.executable, "-m", "hermes_cli.main"]
    except Exception:
        pass

    return None


def _parse_session_key(session_key: str) -> "dict | None":
    """Parse a session key into its component parts.

    Session keys follow the format
    ``agent:main:{platform}:{chat_type}:{chat_id}[:{extra}...]``.
    Returns a dict with ``platform``, ``chat_type``, ``chat_id``, and
    optionally ``thread_id`` keys, or None if the key doesn't match.

    The 6th element is only returned as ``thread_id`` for chat types where
    it is unambiguous (``dm`` and ``thread``).  For group/channel sessions
    the suffix may be a user_id (per-user isolation) rather than a
    thread_id, so we leave ``thread_id`` out to avoid mis-routing.
    """
    parts = session_key.split(":")
    if len(parts) >= 5 and parts[0] == "agent" and parts[1] == "main":
        result = {
            "platform": parts[2],
            "chat_type": parts[3],
            "chat_id": parts[4],
        }
        if len(parts) > 5 and parts[3] in ("dm", "thread"):
            result["thread_id"] = parts[5]
        return result
    return None


def _format_gateway_process_notification(evt: dict) -> "str | None":
    """Format a watch pattern event from completion_queue into a [SYSTEM:] message."""
    evt_type = evt.get("type", "completion")
    _sid = evt.get("session_id", "unknown")
    _cmd = evt.get("command", "unknown")

    if evt_type == "watch_disabled":
        return f"[SYSTEM: {evt.get('message', '')}]"

    if evt_type == "watch_match":
        _pat = evt.get("pattern", "?")
        _out = evt.get("output", "")
        _sup = evt.get("suppressed", 0)
        text = (
            f"[SYSTEM: Background process {_sid} matched "
            f"watch pattern \"{_pat}\".\n"
            f"Command: {_cmd}\n"
            f"Matched output:\n{_out}"
        )
        if _sup:
            text += f"\n({_sup} earlier matches were suppressed by rate limit)"
        text += "]"
        return text

    return None


class GatewayRunner:
    """
    Main gateway controller.

    Manages the lifecycle of all platform adapters and routes
    messages to/from the agent.
    """

    # Class-level defaults so partial construction in tests doesn't
    # blow up on attribute access.
    _running_agents_ts: Dict[str, float] = {}
    _busy_input_mode: str = "interrupt"
    _restart_drain_timeout: float = DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT
    _exit_code: Optional[int] = None
    _draining: bool = False
    _restart_requested: bool = False
    _restart_task_started: bool = False
    _restart_detached: bool = False
    _restart_via_service: bool = False
    _stop_task: Optional[asyncio.Task] = None
    _session_model_overrides: Dict[str, Dict[str, str]] = {}
    
    def __init__(self, config: Optional[GatewayConfig] = None):
        self.config = config or load_gateway_config()
        self.adapters: Dict[Platform, BasePlatformAdapter] = {}

        # Load ephemeral config from config.yaml / env vars.
        # Both are injected at API-call time only and never persisted.
        self._prefill_messages = self._load_prefill_messages()
        self._ephemeral_system_prompt = self._load_ephemeral_system_prompt()
        self._reasoning_config = self._load_reasoning_config()
        self._service_tier = self._load_service_tier()
        self._show_reasoning = self._load_show_reasoning()
        self._busy_input_mode = self._load_busy_input_mode()
        self._restart_drain_timeout = self._load_restart_drain_timeout()
        self._provider_routing = self._load_provider_routing()
        self._fallback_model = self._load_fallback_model()
        self._smart_model_routing = self._load_smart_model_routing()

        # Wire process registry into session store for reset protection
        from tools.process_registry import process_registry
        self.session_store = SessionStore(
            self.config.sessions_dir, self.config,
            has_active_processes_fn=lambda key: process_registry.has_active_for_session(key),
        )
        self.delivery_router = DeliveryRouter(self.config)
        self._running = False
        self._shutdown_event = asyncio.Event()
        self._exit_cleanly = False
        self._exit_with_failure = False
        self._exit_reason: Optional[str] = None
        self._exit_code: Optional[int] = None
        self._draining = False
        self._restart_requested = False
        self._restart_task_started = False
        self._restart_detached = False
        self._restart_via_service = False
        self._stop_task: Optional[asyncio.Task] = None
        
        # Track running agents per session for interrupt support
        # Key: session_key, Value: AIAgent instance
        self._running_agents: Dict[str, Any] = {}
        self._running_agents_ts: Dict[str, float] = {}  # start timestamp per session
        self._pending_messages: Dict[str, str] = {}  # Queued messages during interrupt
        self._busy_ack_ts: Dict[str, float] = {}  # last busy-ack timestamp per session (debounce)

        # Cache AIAgent instances per session to preserve prompt caching.
        # Without this, a new AIAgent is created per message, rebuilding the
        # system prompt (including memory) every turn — breaking prefix cache
        # and costing ~10x more on providers with prompt caching (Anthropic).
        # Key: session_key, Value: (AIAgent, config_signature_str)
        import threading as _threading
        self._agent_cache: Dict[str, tuple] = {}
        self._agent_cache_lock = _threading.Lock()

        # Per-session model overrides from /model command.
        # Key: session_key, Value: dict with model/provider/api_key/base_url/api_mode
        self._session_model_overrides: Dict[str, Dict[str, str]] = {}
        # Track pending exec approvals per session
        # Key: session_key, Value: {"command": str, "pattern_key": str, ...}
        self._pending_approvals: Dict[str, Dict[str, Any]] = {}

        # Track platforms that failed to connect for background reconnection.
        # Key: Platform enum, Value: {"config": platform_config, "attempts": int, "next_retry": float}
        self._failed_platforms: Dict[Platform, Dict[str, Any]] = {}

        # Track pending /update prompt responses per session.
        # Key: session_key, Value: True when a prompt is waiting for user input.
        self._update_prompt_pending: Dict[str, bool] = {}

        # Persistent Honcho managers keyed by gateway session key.
        # This preserves write_frequency="session" semantics across short-lived
        # per-message AIAgent instances.


        # Ensure tirith security scanner is available (downloads if needed)
        try:
            from tools.tirith_security import ensure_installed
            ensure_installed(log_failures=False)
        except Exception:
            pass  # Non-fatal — fail-open at scan time if unavailable
        
        # Initialize session database for session_search tool support
        self._session_db = None
        try:
            from hermes_state import SessionDB
            self._session_db = SessionDB()
        except Exception as e:
            logger.debug("SQLite session store not available: %s", e)
        
        # DM pairing store for code-based user authorization
        from gateway.pairing import PairingStore
        self.pairing_store = PairingStore()
        
        # Event hook system
        from gateway.hooks import HookRegistry
        self.hooks = HookRegistry()

        # Per-chat voice reply mode: "off" | "voice_only" | "all"
        self._voice_mode: Dict[str, str] = self._load_voice_modes()

        # Track background tasks to prevent garbage collection mid-execution
        self._background_tasks: set = set()


    # -- Setup skill availability ----------------------------------------

    def _has_setup_skill(self) -> bool:
        """Check if the hermes-agent-setup skill is installed."""
        try:
            from tools.skill_manager_tool import _find_skill
            return _find_skill("hermes-agent-setup") is not None
        except Exception:
            return False

    # -- Voice mode persistence ------------------------------------------

    _VOICE_MODE_PATH = _hermes_home / "gateway_voice_mode.json"

    def _load_voice_modes(self) -> Dict[str, str]:
        try:
            data = json.loads(self._VOICE_MODE_PATH.read_text())
        except (FileNotFoundError, json.JSONDecodeError, OSError):
            return {}

        if not isinstance(data, dict):
            return {}

        valid_modes = {"off", "voice_only", "all"}
        return {
            str(chat_id): mode
            for chat_id, mode in data.items()
            if mode in valid_modes
        }

    def _save_voice_modes(self) -> None:
        try:
            self._VOICE_MODE_PATH.parent.mkdir(parents=True, exist_ok=True)
            self._VOICE_MODE_PATH.write_text(
                json.dumps(self._voice_mode, indent=2)
            )
        except OSError as e:
            logger.warning("Failed to save voice modes: %s", e)

    def _set_adapter_auto_tts_disabled(self, adapter, chat_id: str, disabled: bool) -> None:
        """Update an adapter's in-memory auto-TTS suppression set if present."""
        disabled_chats = getattr(adapter, "_auto_tts_disabled_chats", None)
        if not isinstance(disabled_chats, set):
            return
        if disabled:
            disabled_chats.add(chat_id)
        else:
            disabled_chats.discard(chat_id)

    def _sync_voice_mode_state_to_adapter(self, adapter) -> None:
        """Restore persisted /voice off state into a live platform adapter."""
        disabled_chats = getattr(adapter, "_auto_tts_disabled_chats", None)
        if not isinstance(disabled_chats, set):
            return
        disabled_chats.clear()
        disabled_chats.update(
            chat_id for chat_id, mode in self._voice_mode.items() if mode == "off"
        )

    # -----------------------------------------------------------------

    def _flush_memories_for_session(
        self,
        old_session_id: str,
        session_key: Optional[str] = None,
    ):
        """Prompt the agent to save memories/skills before context is lost.

        Synchronous worker — meant to be called via run_in_executor from
        an async context so it doesn't block the event loop.
        """
        # Skip cron sessions — they run headless with no meaningful user
        # conversation to extract memories from.
        if old_session_id and old_session_id.startswith("cron_"):
            logger.debug("Skipping memory flush for cron session: %s", old_session_id)
            return

        try:
            history = self.session_store.load_transcript(old_session_id)
            if not history or len(history) < 4:
                return

            from run_agent import AIAgent
            model, runtime_kwargs = self._resolve_session_agent_runtime(
                session_key=session_key,
            )
            if not runtime_kwargs.get("api_key"):
                return

            tmp_agent = AIAgent(
                **runtime_kwargs,
                model=model,
                max_iterations=8,
                quiet_mode=True,
                skip_memory=True,  # Flush agent — no memory provider
                enabled_toolsets=["memory", "skills"],
                session_id=old_session_id,
            )
            try:
                # Fully silence the flush agent — quiet_mode only suppresses init
                # messages; tool call output still leaks to the terminal through
                # _safe_print → _print_fn.  Set a no-op to prevent that.
                tmp_agent._print_fn = lambda *a, **kw: None

                # Build conversation history from transcript
                msgs = [
                    {"role": m.get("role"), "content": m.get("content")}
                    for m in history
                    if m.get("role") in ("user", "assistant") and m.get("content")
                ]

                # Read live memory state from disk so the flush agent can see
                # what's already saved and avoid overwriting newer entries.
                _current_memory = ""
                try:
                    from tools.memory_tool import get_memory_dir
                    _mem_dir = get_memory_dir()
                    for fname, label in [
                        ("MEMORY.md", "MEMORY (your personal notes)"),
                        ("USER.md", "USER PROFILE (who the user is)"),
                    ]:
                        fpath = _mem_dir / fname
                        if fpath.exists():
                            content = fpath.read_text(encoding="utf-8").strip()
                            if content:
                                _current_memory += f"\n\n## Current {label}:\n{content}"
                except Exception:
                    pass  # Non-fatal — flush still works, just without the guard

                # Give the agent a real turn to think about what to save
                flush_prompt = (
                    "[System: This session is about to be automatically reset due to "
                    "inactivity or a scheduled daily reset. The conversation context "
                    "will be cleared after this turn.\n\n"
                    "Review the conversation above and:\n"
                    "1. Save any important facts, preferences, or decisions to memory "
                    "(user profile or your notes) that would be useful in future sessions.\n"
                    "2. If you discovered a reusable workflow or solved a non-trivial "
                    "problem, consider saving it as a skill.\n"
                    "3. If nothing is worth saving, that's fine — just skip.\n\n"
                )

                if _current_memory:
                    flush_prompt += (
                        "IMPORTANT — here is the current live state of memory. Other "
                        "sessions, cron jobs, or the user may have updated it since this "
                        "conversation ended. Do NOT overwrite or remove entries unless "
                        "the conversation above reveals something that genuinely "
                        "supersedes them. Only add new information that is not already "
                        "captured below."
                        f"{_current_memory}\n\n"
                    )

                flush_prompt += (
                    "Do NOT respond to the user. Just use the memory and skill_manage "
                    "tools if needed, then stop.]"
                )

                tmp_agent.run_conversation(
                    user_message=flush_prompt,
                    conversation_history=msgs,
                )
            finally:
                self._cleanup_agent_resources(tmp_agent)
            logger.info("Pre-reset memory flush completed for session %s", old_session_id)
        except Exception as e:
            logger.debug("Pre-reset memory flush failed for session %s: %s", old_session_id, e)

    async def _async_flush_memories(
        self,
        old_session_id: str,
        session_key: Optional[str] = None,
    ):
        """Run the sync memory flush in a thread pool so it won't block the event loop."""
        loop = asyncio.get_running_loop()
        await loop.run_in_executor(
            None,
            self._flush_memories_for_session,
            old_session_id,
            session_key,
        )

    @property
    def should_exit_cleanly(self) -> bool:
        return self._exit_cleanly

    @property
    def should_exit_with_failure(self) -> bool:
        return self._exit_with_failure

    @property
    def exit_reason(self) -> Optional[str]:
        return self._exit_reason

    @property
    def exit_code(self) -> Optional[int]:
        return self._exit_code

    def _session_key_for_source(self, source: SessionSource) -> str:
        """Resolve the current session key for a source, honoring gateway config when available."""
        if hasattr(self, "session_store") and self.session_store is not None:
            try:
                session_key = self.session_store._generate_session_key(source)
                if isinstance(session_key, str) and session_key:
                    return session_key
            except Exception:
                pass
        config = getattr(self, "config", None)
        return build_session_key(
            source,
            group_sessions_per_user=getattr(config, "group_sessions_per_user", True),
            thread_sessions_per_user=getattr(config, "thread_sessions_per_user", False),
        )

    def _resolve_session_agent_runtime(
        self,
        *,
        source: Optional[SessionSource] = None,
        session_key: Optional[str] = None,
        user_config: Optional[dict] = None,
    ) -> tuple[str, dict]:
        """Resolve model/runtime for a session, honoring session-scoped /model overrides.

        If the session override already contains a complete provider bundle
        (provider/api_key/base_url/api_mode), prefer it directly instead of
        resolving fresh global runtime state first.
        """
        resolved_session_key = session_key
        if not resolved_session_key and source is not None:
            try:
                resolved_session_key = self._session_key_for_source(source)
            except Exception:
                resolved_session_key = None

        model = _resolve_gateway_model(user_config)
        override = self._session_model_overrides.get(resolved_session_key) if resolved_session_key else None
        if override:
            override_model = override.get("model", model)
            override_runtime = {
                "provider": override.get("provider"),
                "api_key": override.get("api_key"),
                "base_url": override.get("base_url"),
                "api_mode": override.get("api_mode"),
            }
            if override_runtime.get("api_key"):
                logger.debug(
                    "Session model override (fast): session=%s config_model=%s -> override_model=%s provider=%s",
                    (resolved_session_key or "")[:30], model, override_model,
                    override_runtime.get("provider"),
                )
                return override_model, override_runtime
            # Override exists but has no api_key — fall through to env-based
            # resolution and apply model/provider from the override on top.
            logger.debug(
                "Session model override (no api_key, fallback): session=%s config_model=%s override_model=%s",
                (resolved_session_key or "")[:30], model, override_model,
            )
        else:
            logger.debug(
                "No session model override: session=%s config_model=%s override_keys=%s",
                (resolved_session_key or "")[:30], model,
                list(self._session_model_overrides.keys())[:5] if self._session_model_overrides else "[]",
            )

        runtime_kwargs = _resolve_runtime_agent_kwargs()
        if override and resolved_session_key:
            model, runtime_kwargs = self._apply_session_model_override(
                resolved_session_key, model, runtime_kwargs
            )

        # When the config has no model.default but a provider was resolved
        # (e.g. user ran `hermes auth add openai-codex` without `hermes model`),
        # fall back to the provider's first catalog model so the API call
        # doesn't fail with "model must be a non-empty string".
        if not model and runtime_kwargs.get("provider"):
            try:
                from hermes_cli.models import get_default_model_for_provider
                model = get_default_model_for_provider(runtime_kwargs["provider"])
                if model:
                    logger.info(
                        "No model configured — defaulting to %s for provider %s",
                        model, runtime_kwargs["provider"],
                    )
            except Exception:
                pass

        return model, runtime_kwargs

    def _resolve_turn_agent_config(self, user_message: str, model: str, runtime_kwargs: dict) -> dict:
        from agent.smart_model_routing import resolve_turn_route
        from hermes_cli.models import resolve_fast_mode_overrides

        primary = {
            "model": model,
            "api_key": runtime_kwargs.get("api_key"),
            "base_url": runtime_kwargs.get("base_url"),
            "provider": runtime_kwargs.get("provider"),
            "api_mode": runtime_kwargs.get("api_mode"),
            "command": runtime_kwargs.get("command"),
            "args": list(runtime_kwargs.get("args") or []),
            "credential_pool": runtime_kwargs.get("credential_pool"),
        }
        route = resolve_turn_route(user_message, getattr(self, "_smart_model_routing", {}), primary)

        service_tier = getattr(self, "_service_tier", None)
        if not service_tier:
            route["request_overrides"] = None
            return route

        try:
            overrides = resolve_fast_mode_overrides(route.get("model"))
        except Exception:
            overrides = None
        route["request_overrides"] = overrides
        return route

    async def _handle_adapter_fatal_error(self, adapter: BasePlatformAdapter) -> None:
        """React to an adapter failure after startup.

        If the error is retryable (e.g. network blip, DNS failure), queue the
        platform for background reconnection instead of giving up permanently.
        """
        logger.error(
            "Fatal %s adapter error (%s): %s",
            adapter.platform.value,
            adapter.fatal_error_code or "unknown",
            adapter.fatal_error_message or "unknown error",
        )
        self._update_platform_runtime_status(
            adapter.platform.value,
            platform_state="retrying" if adapter.fatal_error_retryable else "fatal",
            error_code=adapter.fatal_error_code,
            error_message=adapter.fatal_error_message,
        )

        existing = self.adapters.get(adapter.platform)
        if existing is adapter:
            try:
                await adapter.disconnect()
            finally:
                self.adapters.pop(adapter.platform, None)
                self.delivery_router.adapters = self.adapters

        # Queue retryable failures for background reconnection
        if adapter.fatal_error_retryable:
            platform_config = self.config.platforms.get(adapter.platform)
            if platform_config and adapter.platform not in self._failed_platforms:
                self._failed_platforms[adapter.platform] = {
                    "config": platform_config,
                    "attempts": 0,
                    "next_retry": time.monotonic() + 30,
                }
                logger.info(
                    "%s queued for background reconnection",
                    adapter.platform.value,
                )

        if not self.adapters and not self._failed_platforms:
            self._exit_reason = adapter.fatal_error_message or "All messaging adapters disconnected"
            if adapter.fatal_error_retryable:
                self._exit_with_failure = True
                logger.error("No connected messaging platforms remain. Shutting down gateway for service restart.")
            else:
                logger.error("No connected messaging platforms remain. Shutting down gateway cleanly.")
            await self.stop()
        elif not self.adapters and self._failed_platforms:
            # All platforms are down and queued for background reconnection.
            # If the error is retryable, exit with failure so systemd Restart=on-failure
            # can restart the process. Otherwise stay alive and keep retrying in background.
            if adapter.fatal_error_retryable:
                self._exit_reason = adapter.fatal_error_message or "All messaging platforms failed with retryable errors"
                self._exit_with_failure = True
                logger.error(
                    "All messaging platforms failed with retryable errors. "
                    "Shutting down gateway for service restart (systemd will retry)."
                )
                await self.stop()
            else:
                logger.warning(
                    "No connected messaging platforms remain, but %d platform(s) queued for reconnection",
                    len(self._failed_platforms),
                )

    def _request_clean_exit(self, reason: str) -> None:
        self._exit_cleanly = True
        self._exit_reason = reason
        self._shutdown_event.set()

    def _running_agent_count(self) -> int:
        return len(self._running_agents)

    def _status_action_label(self) -> str:
        return "restart" if self._restart_requested else "shutdown"

    def _status_action_gerund(self) -> str:
        return "restarting" if self._restart_requested else "shutting down"

    def _queue_during_drain_enabled(self) -> bool:
        return self._restart_requested and self._busy_input_mode == "queue"

    def _update_runtime_status(self, gateway_state: Optional[str] = None, exit_reason: Optional[str] = None) -> None:
        try:
            from gateway.status import write_runtime_status
            write_runtime_status(
                gateway_state=gateway_state,
                exit_reason=exit_reason,
                restart_requested=self._restart_requested,
                active_agents=self._running_agent_count(),
            )
        except Exception:
            pass

    def _update_platform_runtime_status(
        self,
        platform: str,
        *,
        platform_state: Optional[str] = None,
        error_code: Optional[str] = None,
        error_message: Optional[str] = None,
    ) -> None:
        try:
            from gateway.status import write_runtime_status
            write_runtime_status(
                platform=platform,
                platform_state=platform_state,
                error_code=error_code,
                error_message=error_message,
            )
        except Exception:
            pass
    
    @staticmethod
    def _load_prefill_messages() -> List[Dict[str, Any]]:
        """Load ephemeral prefill messages from config or env var.
        
        Checks HERMES_PREFILL_MESSAGES_FILE env var first, then falls back to
        the prefill_messages_file key in ~/.hermes/config.yaml.
        Relative paths are resolved from ~/.hermes/.
        """
        import json as _json
        file_path = os.getenv("HERMES_PREFILL_MESSAGES_FILE", "")
        if not file_path:
            try:
                import yaml as _y
                cfg_path = _hermes_home / "config.yaml"
                if cfg_path.exists():
                    with open(cfg_path, encoding="utf-8") as _f:
                        cfg = _y.safe_load(_f) or {}
                    file_path = cfg.get("prefill_messages_file", "")
            except Exception:
                pass
        if not file_path:
            return []
        path = Path(file_path).expanduser()
        if not path.is_absolute():
            path = _hermes_home / path
        if not path.exists():
            logger.warning("Prefill messages file not found: %s", path)
            return []
        try:
            with open(path, "r", encoding="utf-8") as f:
                data = _json.load(f)
            if not isinstance(data, list):
                logger.warning("Prefill messages file must contain a JSON array: %s", path)
                return []
            return data
        except Exception as e:
            logger.warning("Failed to load prefill messages from %s: %s", path, e)
            return []

    @staticmethod
    def _load_ephemeral_system_prompt() -> str:
        """Load ephemeral system prompt from config or env var.
        
        Checks HERMES_EPHEMERAL_SYSTEM_PROMPT env var first, then falls back to
        agent.system_prompt in ~/.hermes/config.yaml.
        """
        prompt = os.getenv("HERMES_EPHEMERAL_SYSTEM_PROMPT", "")
        if prompt:
            return prompt
        try:
            import yaml as _y
            cfg_path = _hermes_home / "config.yaml"
            if cfg_path.exists():
                with open(cfg_path, encoding="utf-8") as _f:
                    cfg = _y.safe_load(_f) or {}
                return (cfg.get("agent", {}).get("system_prompt", "") or "").strip()
        except Exception:
            pass
        return ""

    @staticmethod
    def _load_reasoning_config() -> dict | None:
        """Load reasoning effort from config.yaml.

        Reads agent.reasoning_effort from config.yaml. Valid: "none",
        "minimal", "low", "medium", "high", "xhigh". Returns None to use
        default (medium).
        """
        from hermes_constants import parse_reasoning_effort
        effort = ""
        try:
            import yaml as _y
            cfg_path = _hermes_home / "config.yaml"
            if cfg_path.exists():
                with open(cfg_path, encoding="utf-8") as _f:
                    cfg = _y.safe_load(_f) or {}
                effort = str(cfg.get("agent", {}).get("reasoning_effort", "") or "").strip()
        except Exception:
            pass
        result = parse_reasoning_effort(effort)
        if effort and effort.strip() and result is None:
            logger.warning("Unknown reasoning_effort '%s', using default (medium)", effort)
        return result

    @staticmethod
    def _load_service_tier() -> str | None:
        """Load Priority Processing setting from config.yaml.

        Reads agent.service_tier from config.yaml. Accepted values mirror the CLI:
        "fast"/"priority"/"on" => "priority", while "normal"/"off" disables it.
        Returns None when unset or unsupported.
        """
        raw = ""
        try:
            import yaml as _y
            cfg_path = _hermes_home / "config.yaml"
            if cfg_path.exists():
                with open(cfg_path, encoding="utf-8") as _f:
                    cfg = _y.safe_load(_f) or {}
                raw = str(cfg.get("agent", {}).get("service_tier", "") or "").strip()
        except Exception:
            pass

        value = raw.lower()
        if not value or value in {"normal", "default", "standard", "off", "none"}:
            return None
        if value in {"fast", "priority", "on"}:
            return "priority"
        logger.warning("Unknown service_tier '%s', ignoring", raw)
        return None

    @staticmethod
    def _load_show_reasoning() -> bool:
        """Load show_reasoning toggle from config.yaml display section."""
        try:
            import yaml as _y
            cfg_path = _hermes_home / "config.yaml"
            if cfg_path.exists():
                with open(cfg_path, encoding="utf-8") as _f:
                    cfg = _y.safe_load(_f) or {}
                return bool(cfg.get("display", {}).get("show_reasoning", False))
        except Exception:
            pass
        return False

    @staticmethod
    def _load_busy_input_mode() -> str:
        """Load gateway drain-time busy-input behavior from config/env."""
        mode = os.getenv("HERMES_GATEWAY_BUSY_INPUT_MODE", "").strip().lower()
        if not mode:
            try:
                import yaml as _y
                cfg_path = _hermes_home / "config.yaml"
                if cfg_path.exists():
                    with open(cfg_path, encoding="utf-8") as _f:
                        cfg = _y.safe_load(_f) or {}
                    mode = str(cfg.get("display", {}).get("busy_input_mode", "") or "").strip().lower()
            except Exception:
                pass
        return "queue" if mode == "queue" else "interrupt"

    @staticmethod
    def _load_restart_drain_timeout() -> float:
        """Load graceful gateway restart/stop drain timeout in seconds."""
        raw = os.getenv("HERMES_RESTART_DRAIN_TIMEOUT", "").strip()
        if not raw:
            try:
                import yaml as _y
                cfg_path = _hermes_home / "config.yaml"
                if cfg_path.exists():
                    with open(cfg_path, encoding="utf-8") as _f:
                        cfg = _y.safe_load(_f) or {}
                    raw = str(cfg.get("agent", {}).get("restart_drain_timeout", "") or "").strip()
            except Exception:
                pass
        value = parse_restart_drain_timeout(raw)
        if raw and value == DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT:
            try:
                float(raw)
            except (TypeError, ValueError):
                logger.warning(
                    "Invalid restart_drain_timeout '%s', using default %.0fs",
                    raw,
                    DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT,
                )
        return value

    @staticmethod
    def _load_background_notifications_mode() -> str:
        """Load background process notification mode from config or env var.

        Modes:
          - ``all``    — push running-output updates *and* the final message (default)
          - ``result`` — only the final completion message (regardless of exit code)
          - ``error``  — only the final message when exit code is non-zero
          - ``off``    — no watcher messages at all
        """
        mode = os.getenv("HERMES_BACKGROUND_NOTIFICATIONS", "")
        if not mode:
            try:
                import yaml as _y
                cfg_path = _hermes_home / "config.yaml"
                if cfg_path.exists():
                    with open(cfg_path, encoding="utf-8") as _f:
                        cfg = _y.safe_load(_f) or {}
                    raw = cfg.get("display", {}).get("background_process_notifications")
                    if raw is False:
                        mode = "off"
                    elif raw not in (None, ""):
                        mode = str(raw)
            except Exception:
                pass
        mode = (mode or "all").strip().lower()
        valid = {"all", "result", "error", "off"}
        if mode not in valid:
            logger.warning(
                "Unknown background_process_notifications '%s', defaulting to 'all'",
                mode,
            )
            return "all"
        return mode

    @staticmethod
    def _load_provider_routing() -> dict:
        """Load OpenRouter provider routing preferences from config.yaml."""
        try:
            import yaml as _y
            cfg_path = _hermes_home / "config.yaml"
            if cfg_path.exists():
                with open(cfg_path, encoding="utf-8") as _f:
                    cfg = _y.safe_load(_f) or {}
                return cfg.get("provider_routing", {}) or {}
        except Exception:
            pass
        return {}

    @staticmethod
    def _load_fallback_model() -> list | dict | None:
        """Load fallback provider chain from config.yaml.

        Returns a list of provider dicts (``fallback_providers``), a single
        dict (legacy ``fallback_model``), or None if not configured.
        AIAgent.__init__ normalizes both formats into a chain.
        """
        try:
            import yaml as _y
            cfg_path = _hermes_home / "config.yaml"
            if cfg_path.exists():
                with open(cfg_path, encoding="utf-8") as _f:
                    cfg = _y.safe_load(_f) or {}
                fb = cfg.get("fallback_providers") or cfg.get("fallback_model") or None
                if fb:
                    return fb
        except Exception:
            pass
        return None

    @staticmethod
    def _load_smart_model_routing() -> dict:
        """Load optional smart cheap-vs-strong model routing config."""
        try:
            import yaml as _y
            cfg_path = _hermes_home / "config.yaml"
            if cfg_path.exists():
                with open(cfg_path, encoding="utf-8") as _f:
                    cfg = _y.safe_load(_f) or {}
                return cfg.get("smart_model_routing", {}) or {}
        except Exception:
            pass
        return {}

    def _snapshot_running_agents(self) -> Dict[str, Any]:
        return {
            session_key: agent
            for session_key, agent in self._running_agents.items()
            if agent is not _AGENT_PENDING_SENTINEL
        }

    def _queue_or_replace_pending_event(self, session_key: str, event: MessageEvent) -> None:
        adapter = self.adapters.get(event.source.platform)
        if not adapter:
            return
        merge_pending_message_event(adapter._pending_messages, session_key, event)

    async def _handle_active_session_busy_message(self, event: MessageEvent, session_key: str) -> bool:
        # --- Draining case (gateway restarting/stopping) ---
        if self._draining:
            adapter = self.adapters.get(event.source.platform)
            if not adapter:
                return True

            thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None
            if self._queue_during_drain_enabled():
                self._queue_or_replace_pending_event(session_key, event)
                message = f"⏳ Gateway {self._status_action_gerund()} — queued for the next turn after it comes back."
            else:
                message = f"⏳ Gateway is {self._status_action_gerund()} and is not accepting another turn right now."

            await adapter._send_with_retry(
                chat_id=event.source.chat_id,
                content=message,
                reply_to=event.message_id,
                metadata=thread_meta,
            )
            return True

        # --- Normal busy case (agent actively running a task) ---
        # The user sent a message while the agent is working.  Interrupt the
        # agent immediately so it stops the current tool-calling loop and
        # processes the new message.  The pending message is stored in the
        # adapter so the base adapter picks it up once the interrupted run
        # returns.  A brief ack tells the user what's happening (debounced
        # to avoid spam when they fire multiple messages quickly).

        adapter = self.adapters.get(event.source.platform)
        if not adapter:
            return False  # let default path handle it

        # Store the message so it's processed as the next turn after the
        # interrupt causes the current run to exit.
        from gateway.platforms.base import merge_pending_message_event
        merge_pending_message_event(adapter._pending_messages, session_key, event)

        # Interrupt the running agent — this aborts in-flight tool calls and
        # causes the agent loop to exit at the next check point.
        running_agent = self._running_agents.get(session_key)
        if running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
            try:
                running_agent.interrupt(event.text)
            except Exception:
                pass  # don't let interrupt failure block the ack

        # Debounce: only send an acknowledgment once every 30 seconds per session
        # to avoid spamming the user when they send multiple messages quickly
        _BUSY_ACK_COOLDOWN = 30
        now = time.time()
        last_ack = self._busy_ack_ts.get(session_key, 0)
        if now - last_ack < _BUSY_ACK_COOLDOWN:
            return True  # interrupt sent, ack already delivered recently

        self._busy_ack_ts[session_key] = now

        # Build a status-rich acknowledgment
        status_parts = []
        if running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
            try:
                summary = running_agent.get_activity_summary()
                iteration = summary.get("api_call_count", 0)
                max_iter = summary.get("max_iterations", 0)
                current_tool = summary.get("current_tool")
                start_ts = self._running_agents_ts.get(session_key, 0)
                if start_ts:
                    elapsed_min = int((now - start_ts) / 60)
                    if elapsed_min > 0:
                        status_parts.append(f"{elapsed_min} min elapsed")
                if max_iter:
                    status_parts.append(f"iteration {iteration}/{max_iter}")
                if current_tool:
                    status_parts.append(f"running: {current_tool}")
            except Exception:
                pass

        status_detail = f" ({', '.join(status_parts)})" if status_parts else ""
        message = (
            f"⚡ Interrupting current task{status_detail}. "
            f"I'll respond to your message shortly."
        )

        thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None
        try:
            await adapter._send_with_retry(
                chat_id=event.source.chat_id,
                content=message,
                reply_to=event.message_id,
                metadata=thread_meta,
            )
        except Exception as e:
            logger.debug("Failed to send busy-ack: %s", e)

        return True

    async def _drain_active_agents(self, timeout: float) -> tuple[Dict[str, Any], bool]:
        snapshot = self._snapshot_running_agents()
        last_active_count = self._running_agent_count()
        last_status_at = 0.0

        def _maybe_update_status(force: bool = False) -> None:
            nonlocal last_active_count, last_status_at
            now = asyncio.get_running_loop().time()
            active_count = self._running_agent_count()
            if force or active_count != last_active_count or (now - last_status_at) >= 1.0:
                self._update_runtime_status("draining")
                last_active_count = active_count
                last_status_at = now

        if not self._running_agents:
            _maybe_update_status(force=True)
            return snapshot, False

        _maybe_update_status(force=True)
        if timeout <= 0:
            return snapshot, True

        deadline = asyncio.get_running_loop().time() + timeout
        while self._running_agents and asyncio.get_running_loop().time() < deadline:
            _maybe_update_status()
            await asyncio.sleep(0.1)
        timed_out = bool(self._running_agents)
        _maybe_update_status(force=True)
        return snapshot, timed_out

    def _interrupt_running_agents(self, reason: str) -> None:
        for session_key, agent in list(self._running_agents.items()):
            if agent is _AGENT_PENDING_SENTINEL:
                continue
            try:
                agent.interrupt(reason)
                logger.debug("Interrupted running agent for session %s during shutdown", session_key[:20])
            except Exception as e:
                logger.debug("Failed interrupting agent during shutdown: %s", e)

    async def _notify_active_sessions_of_shutdown(self) -> None:
        """Send a notification to every chat with an active agent.

        Called at the very start of stop() — adapters are still connected so
        messages can be delivered.  Best-effort: individual send failures are
        logged and swallowed so they never block the shutdown sequence.
        """
        active = self._snapshot_running_agents()
        if not active:
            return

        action = "restarting" if self._restart_requested else "shutting down"
        hint = (
            "Your current task will be interrupted. "
            "Send any message after restart to resume where it left off."
            if self._restart_requested
            else "Your current task will be interrupted."
        )
        msg = f"⚠️ Gateway {action} — {hint}"

        notified: set = set()
        for session_key in active:
            # Parse platform + chat_id from the session key.
            _parsed = _parse_session_key(session_key)
            if not _parsed:
                continue
            platform_str = _parsed["platform"]
            chat_id = _parsed["chat_id"]

            # Deduplicate: one notification per chat, even if multiple
            # sessions (different users/threads) share the same chat.
            dedup_key = (platform_str, chat_id)
            if dedup_key in notified:
                continue

            try:
                platform = Platform(platform_str)
                adapter = self.adapters.get(platform)
                if not adapter:
                    continue

                # Include thread_id if present so the message lands in the
                # correct forum topic / thread.
                thread_id = _parsed.get("thread_id")
                metadata = {"thread_id": thread_id} if thread_id else None

                await adapter.send(chat_id, msg, metadata=metadata)
                notified.add(dedup_key)
                logger.info(
                    "Sent shutdown notification to %s:%s",
                    platform_str, chat_id,
                )
            except Exception as e:
                logger.debug(
                    "Failed to send shutdown notification to %s:%s: %s",
                    platform_str, chat_id, e,
                )

    def _finalize_shutdown_agents(self, active_agents: Dict[str, Any]) -> None:
        for agent in active_agents.values():
            try:
                from hermes_cli.plugins import invoke_hook as _invoke_hook
                _invoke_hook(
                    "on_session_finalize",
                    session_id=getattr(agent, "session_id", None),
                    platform="gateway",
                )
            except Exception:
                pass
            self._cleanup_agent_resources(agent)

    def _cleanup_agent_resources(self, agent: Any) -> None:
        """Best-effort cleanup for temporary or cached agent instances."""
        if agent is None:
            return
        try:
            if hasattr(agent, "shutdown_memory_provider"):
                agent.shutdown_memory_provider()
        except Exception:
            pass
        # Close tool resources (terminal sandboxes, browser daemons,
        # background processes, httpx clients) to prevent zombie
        # process accumulation.
        try:
            if hasattr(agent, "close"):
                agent.close()
        except Exception:
            pass

    _STUCK_LOOP_THRESHOLD = 3  # restarts while active before auto-suspend
    _STUCK_LOOP_FILE = ".restart_failure_counts"

    def _increment_restart_failure_counts(self, active_session_keys: set) -> None:
        """Increment restart-failure counters for sessions active at shutdown.

        Persists to a JSON file so counters survive across restarts.
        Sessions NOT in active_session_keys are removed (they completed
        successfully, so the loop is broken).
        """
        import json

        path = _hermes_home / self._STUCK_LOOP_FILE
        try:
            counts = json.loads(path.read_text()) if path.exists() else {}
        except Exception:
            counts = {}

        # Increment active sessions, remove inactive ones (loop broken)
        new_counts = {}
        for key in active_session_keys:
            new_counts[key] = counts.get(key, 0) + 1
        # Keep any entries that are still above 0 even if not active now
        # (they might become active again next restart)

        try:
            path.write_text(json.dumps(new_counts))
        except Exception:
            pass

    def _suspend_stuck_loop_sessions(self) -> int:
        """Suspend sessions that have been active across too many restarts.

        Returns the number of sessions suspended.  Called on gateway startup
        AFTER suspend_recently_active() to catch the stuck-loop pattern:
        session loads → agent gets stuck → gateway restarts → repeat.
        """
        import json

        path = _hermes_home / self._STUCK_LOOP_FILE
        if not path.exists():
            return 0

        try:
            counts = json.loads(path.read_text())
        except Exception:
            return 0

        suspended = 0
        stuck_keys = [k for k, v in counts.items() if v >= self._STUCK_LOOP_THRESHOLD]

        for session_key in stuck_keys:
            try:
                entry = self.session_store._entries.get(session_key)
                if entry and not entry.suspended:
                    entry.suspended = True
                    suspended += 1
                    logger.warning(
                        "Auto-suspended stuck session %s (active across %d "
                        "consecutive restarts — likely a stuck loop)",
                        session_key[:30], counts[session_key],
                    )
            except Exception:
                pass

        if suspended:
            try:
                self.session_store._save()
            except Exception:
                pass

        # Clear the file — counters start fresh after suspension
        try:
            path.unlink(missing_ok=True)
        except Exception:
            pass

        return suspended

    def _clear_restart_failure_count(self, session_key: str) -> None:
        """Clear the restart-failure counter for a session that completed OK.

        Called after a successful agent turn to signal the loop is broken.
        """
        import json

        path = _hermes_home / self._STUCK_LOOP_FILE
        if not path.exists():
            return
        try:
            counts = json.loads(path.read_text())
            if session_key in counts:
                del counts[session_key]
                if counts:
                    path.write_text(json.dumps(counts))
                else:
                    path.unlink(missing_ok=True)
        except Exception:
            pass

    async def _launch_detached_restart_command(self) -> None:
        import shutil
        import subprocess

        hermes_cmd = _resolve_hermes_bin()
        if not hermes_cmd:
            logger.error("Could not locate hermes binary for detached /restart")
            return

        current_pid = os.getpid()
        cmd = " ".join(shlex.quote(part) for part in hermes_cmd)
        shell_cmd = (
            f"while kill -0 {current_pid} 2>/dev/null; do sleep 0.2; done; "
            f"{cmd} gateway restart"
        )
        setsid_bin = shutil.which("setsid")
        if setsid_bin:
            subprocess.Popen(
                [setsid_bin, "bash", "-lc", shell_cmd],
                stdout=subprocess.DEVNULL,
                stderr=subprocess.DEVNULL,
                start_new_session=True,
            )
        else:
            subprocess.Popen(
                ["bash", "-lc", shell_cmd],
                stdout=subprocess.DEVNULL,
                stderr=subprocess.DEVNULL,
                start_new_session=True,
            )

    def request_restart(self, *, detached: bool = False, via_service: bool = False) -> bool:
        if self._restart_task_started:
            return False
        self._restart_requested = True
        self._restart_detached = detached
        self._restart_via_service = via_service
        self._restart_task_started = True

        async def _run_restart() -> None:
            await asyncio.sleep(0.05)
            await self.stop(restart=True, detached_restart=detached, service_restart=via_service)

        task = asyncio.create_task(_run_restart())
        self._background_tasks.add(task)
        task.add_done_callback(self._background_tasks.discard)
        return True

    async def start(self) -> bool:
        """
        Start the gateway and all configured platform adapters.
        
        Returns True if at least one adapter connected successfully.
        """
        logger.info("Starting Hermes Gateway...")
        logger.info("Session storage: %s", self.config.sessions_dir)
        try:
            from hermes_cli.profiles import get_active_profile_name
            _profile = get_active_profile_name()
            if _profile and _profile != "default":
                logger.info("Active profile: %s", _profile)
        except Exception:
            pass
        try:
            from gateway.status import write_runtime_status
            write_runtime_status(gateway_state="starting", exit_reason=None)
        except Exception:
            pass
        
        # Warn if no user allowlists are configured and open access is not opted in
        _any_allowlist = any(
            os.getenv(v)
            for v in ("TELEGRAM_ALLOWED_USERS", "DISCORD_ALLOWED_USERS",
                       "WHATSAPP_ALLOWED_USERS", "SLACK_ALLOWED_USERS",
                       "SIGNAL_ALLOWED_USERS", "SIGNAL_GROUP_ALLOWED_USERS",
                       "EMAIL_ALLOWED_USERS",
                       "SMS_ALLOWED_USERS", "MATTERMOST_ALLOWED_USERS",
                       "MATRIX_ALLOWED_USERS", "DINGTALK_ALLOWED_USERS",
                       "FEISHU_ALLOWED_USERS",
                       "WECOM_ALLOWED_USERS",
                       "WECOM_CALLBACK_ALLOWED_USERS",
                       "WEIXIN_ALLOWED_USERS",
                       "BLUEBUBBLES_ALLOWED_USERS",
                       "QQ_ALLOWED_USERS",
                       "GATEWAY_ALLOWED_USERS")
        )
        _allow_all = os.getenv("GATEWAY_ALLOW_ALL_USERS", "").lower() in ("true", "1", "yes") or any(
            os.getenv(v, "").lower() in ("true", "1", "yes")
            for v in ("TELEGRAM_ALLOW_ALL_USERS", "DISCORD_ALLOW_ALL_USERS",
                       "WHATSAPP_ALLOW_ALL_USERS", "SLACK_ALLOW_ALL_USERS",
                       "SIGNAL_ALLOW_ALL_USERS", "EMAIL_ALLOW_ALL_USERS",
                       "SMS_ALLOW_ALL_USERS", "MATTERMOST_ALLOW_ALL_USERS",
                       "MATRIX_ALLOW_ALL_USERS", "DINGTALK_ALLOW_ALL_USERS",
                       "FEISHU_ALLOW_ALL_USERS",
                       "WECOM_ALLOW_ALL_USERS",
                       "WECOM_CALLBACK_ALLOW_ALL_USERS",
                       "WEIXIN_ALLOW_ALL_USERS",
                       "BLUEBUBBLES_ALLOW_ALL_USERS",
                       "QQ_ALLOW_ALL_USERS")
        )
        if not _any_allowlist and not _allow_all:
            logger.warning(
                "No user allowlists configured. All unauthorized users will be denied. "
                "Set GATEWAY_ALLOW_ALL_USERS=true in ~/.hermes/.env to allow open access, "
                "or configure platform allowlists (e.g., TELEGRAM_ALLOWED_USERS=your_id)."
            )
        
        # Discover and load event hooks
        self.hooks.discover_and_load()
        
        # Recover background processes from checkpoint (crash recovery)
        try:
            from tools.process_registry import process_registry
            recovered = process_registry.recover_from_checkpoint()
            if recovered:
                logger.info("Recovered %s background process(es) from previous run", recovered)
        except Exception as e:
            logger.warning("Process checkpoint recovery: %s", e)

        # Suspend sessions that were active when the gateway last exited.
        # This prevents stuck sessions from being blindly resumed on restart,
        # which can create an unrecoverable loop (#7536).  Suspended sessions
        # auto-reset on the next incoming message, giving the user a clean start.
        #
        # SKIP suspension after a clean (graceful) shutdown — the previous
        # process already drained active agents, so sessions aren't stuck.
        # This prevents unwanted auto-resets after `hermes update`,
        # `hermes gateway restart`, or `/restart`.
        _clean_marker = _hermes_home / ".clean_shutdown"
        if _clean_marker.exists():
            logger.info("Previous gateway exited cleanly — skipping session suspension")
            try:
                _clean_marker.unlink()
            except Exception:
                pass
        else:
            try:
                suspended = self.session_store.suspend_recently_active()
                if suspended:
                    logger.info("Suspended %d in-flight session(s) from previous run", suspended)
            except Exception as e:
                logger.warning("Session suspension on startup failed: %s", e)

        # Stuck-loop detection (#7536): if a session has been active across
        # 3+ consecutive restarts, it's probably stuck in a loop (the same
        # history keeps causing the agent to hang).  Auto-suspend it so the
        # user gets a clean slate on the next message.
        try:
            stuck = self._suspend_stuck_loop_sessions()
            if stuck:
                logger.warning("Auto-suspended %d stuck-loop session(s)", stuck)
        except Exception as e:
            logger.debug("Stuck-loop detection failed: %s", e)

        connected_count = 0
        enabled_platform_count = 0
        startup_nonretryable_errors: list[str] = []
        startup_retryable_errors: list[str] = []
        
        # Initialize and connect each configured platform
        for platform, platform_config in self.config.platforms.items():
            if not platform_config.enabled:
                continue
            enabled_platform_count += 1
            
            adapter = self._create_adapter(platform, platform_config)
            if not adapter:
                logger.warning("No adapter available for %s", platform.value)
                continue
            
            # Set up message + fatal error handlers
            adapter.set_message_handler(self._handle_message)
            adapter.set_fatal_error_handler(self._handle_adapter_fatal_error)
            adapter.set_session_store(self.session_store)
            adapter.set_busy_session_handler(self._handle_active_session_busy_message)
            
            # Try to connect
            logger.info("Connecting to %s...", platform.value)
            self._update_platform_runtime_status(
                platform.value,
                platform_state="connecting",
                error_code=None,
                error_message=None,
            )
            try:
                success = await adapter.connect()
                if success:
                    self.adapters[platform] = adapter
                    self._sync_voice_mode_state_to_adapter(adapter)
                    connected_count += 1
                    self._update_platform_runtime_status(
                        platform.value,
                        platform_state="connected",
                        error_code=None,
                        error_message=None,
                    )
                    logger.info("✓ %s connected", platform.value)
                else:
                    logger.warning("✗ %s failed to connect", platform.value)
                    if adapter.has_fatal_error:
                        self._update_platform_runtime_status(
                            platform.value,
                            platform_state="retrying" if adapter.fatal_error_retryable else "fatal",
                            error_code=adapter.fatal_error_code,
                            error_message=adapter.fatal_error_message,
                        )
                        target = (
                            startup_retryable_errors
                            if adapter.fatal_error_retryable
                            else startup_nonretryable_errors
                        )
                        target.append(
                            f"{platform.value}: {adapter.fatal_error_message}"
                        )
                        # Queue for reconnection if the error is retryable
                        if adapter.fatal_error_retryable:
                            self._failed_platforms[platform] = {
                                "config": platform_config,
                                "attempts": 1,
                                "next_retry": time.monotonic() + 30,
                            }
                    else:
                        self._update_platform_runtime_status(
                            platform.value,
                            platform_state="retrying",
                            error_code=None,
                            error_message="failed to connect",
                        )
                        startup_retryable_errors.append(
                            f"{platform.value}: failed to connect"
                        )
                        # No fatal error info means likely a transient issue — queue for retry
                        self._failed_platforms[platform] = {
                            "config": platform_config,
                            "attempts": 1,
                            "next_retry": time.monotonic() + 30,
                        }
            except Exception as e:
                logger.error("✗ %s error: %s", platform.value, e)
                self._update_platform_runtime_status(
                    platform.value,
                    platform_state="retrying",
                    error_code=None,
                    error_message=str(e),
                )
                startup_retryable_errors.append(f"{platform.value}: {e}")
                # Unexpected exceptions are typically transient — queue for retry
                self._failed_platforms[platform] = {
                    "config": platform_config,
                    "attempts": 1,
                    "next_retry": time.monotonic() + 30,
                }
        
        if connected_count == 0:
            if startup_nonretryable_errors:
                reason = "; ".join(startup_nonretryable_errors)
                logger.error("Gateway hit a non-retryable startup conflict: %s", reason)
                try:
                    from gateway.status import write_runtime_status
                    write_runtime_status(gateway_state="startup_failed", exit_reason=reason)
                except Exception:
                    pass
                self._request_clean_exit(reason)
                return True
            if enabled_platform_count > 0:
                reason = "; ".join(startup_retryable_errors) or "all configured messaging platforms failed to connect"
                logger.error("Gateway failed to connect any configured messaging platform: %s", reason)
                try:
                    from gateway.status import write_runtime_status
                    write_runtime_status(gateway_state="startup_failed", exit_reason=reason)
                except Exception:
                    pass
                return False
            logger.warning("No messaging platforms enabled.")
            logger.info("Gateway will continue running for cron job execution.")
        
        # Update delivery router with adapters
        self.delivery_router.adapters = self.adapters
        
        self._running = True
        self._update_runtime_status("running")
        
        # Emit gateway:startup hook
        hook_count = len(self.hooks.loaded_hooks)
        if hook_count:
            logger.info("%s hook(s) loaded", hook_count)
        await self.hooks.emit("gateway:startup", {
            "platforms": [p.value for p in self.adapters.keys()],
        })
        
        if connected_count > 0:
            logger.info("Gateway running with %s platform(s)", connected_count)
        
        # Build initial channel directory for send_message name resolution
        try:
            from gateway.channel_directory import build_channel_directory
            directory = build_channel_directory(self.adapters)
            ch_count = sum(len(chs) for chs in directory.get("platforms", {}).values())
            logger.info("Channel directory built: %d target(s)", ch_count)
        except Exception as e:
            logger.warning("Channel directory build failed: %s", e)
        
        # Check if we're restarting after a /update command. If the update is
        # still running, keep watching so we notify once it actually finishes.
        notified = await self._send_update_notification()
        if not notified and any(
            path.exists()
            for path in (
                _hermes_home / ".update_pending.json",
                _hermes_home / ".update_pending.claimed.json",
            )
        ):
            self._schedule_update_notification_watch()

        # Notify the chat that initiated /restart that the gateway is back.
        await self._send_restart_notification()

        # Drain any recovered process watchers (from crash recovery checkpoint)
        try:
            from tools.process_registry import process_registry
            while process_registry.pending_watchers:
                watcher = process_registry.pending_watchers.pop(0)
                asyncio.create_task(self._run_process_watcher(watcher))
                logger.info("Resumed watcher for recovered process %s", watcher.get("session_id"))
        except Exception as e:
            logger.error("Recovered watcher setup error: %s", e)

        # Start background session expiry watcher for proactive memory flushing
        asyncio.create_task(self._session_expiry_watcher())

        # Start background reconnection watcher for platforms that failed at startup
        if self._failed_platforms:
            logger.info(
                "Starting reconnection watcher for %d failed platform(s): %s",
                len(self._failed_platforms),
                ", ".join(p.value for p in self._failed_platforms),
            )
        asyncio.create_task(self._platform_reconnect_watcher())

        logger.info("Press Ctrl+C to stop")
        
        return True
    
    async def _session_expiry_watcher(self, interval: int = 300):
        """Background task that proactively flushes memories for expired sessions.
        
        Runs every `interval` seconds (default 5 min).  For each session that
        has expired according to its reset policy, flushes memories in a thread
        pool and marks the session so it won't be flushed again.

        This means memories are already saved by the time the user sends their
        next message, so there's no blocking delay.
        """
        await asyncio.sleep(60)  # initial delay — let the gateway fully start
        _flush_failures: dict[str, int] = {}  # session_id -> consecutive failure count
        _MAX_FLUSH_RETRIES = 3
        while self._running:
            try:
                self.session_store._ensure_loaded()
                # Collect expired sessions first, then log a single summary.
                _expired_entries = []
                for key, entry in list(self.session_store._entries.items()):
                    if entry.memory_flushed:
                        continue
                    if not self.session_store._is_session_expired(entry):
                        continue
                    _expired_entries.append((key, entry))

                if _expired_entries:
                    # Extract platform names from session keys for a compact summary.
                    # Keys look like "agent:main:telegram:dm:12345" — platform is field [2].
                    _platforms: dict[str, int] = {}
                    for _k, _e in _expired_entries:
                        _parts = _k.split(":")
                        _plat = _parts[2] if len(_parts) > 2 else "unknown"
                        _platforms[_plat] = _platforms.get(_plat, 0) + 1
                    _plat_summary = ", ".join(
                        f"{p}:{c}" for p, c in sorted(_platforms.items())
                    )
                    logger.info(
                        "Session expiry: %d sessions to flush (%s)",
                        len(_expired_entries), _plat_summary,
                    )

                for key, entry in _expired_entries:
                    try:
                        await self._async_flush_memories(entry.session_id, key)
                        # Shut down memory provider and close tool resources
                        # on the cached agent.  Idle agents live in
                        # _agent_cache (not _running_agents), so look there.
                        _cached_agent = None
                        _cache_lock = getattr(self, "_agent_cache_lock", None)
                        if _cache_lock is not None:
                            with _cache_lock:
                                _cached = self._agent_cache.get(key)
                                _cached_agent = _cached[0] if isinstance(_cached, tuple) else _cached if _cached else None
                        # Fall back to _running_agents in case the agent is
                        # still mid-turn when the expiry fires.
                        if _cached_agent is None:
                            _cached_agent = self._running_agents.get(key)
                        if _cached_agent and _cached_agent is not _AGENT_PENDING_SENTINEL:
                            self._cleanup_agent_resources(_cached_agent)
                        # Mark as flushed and persist to disk so the flag
                        # survives gateway restarts.
                        with self.session_store._lock:
                            entry.memory_flushed = True
                            self.session_store._save()
                        logger.debug(
                            "Memory flush completed for session %s",
                            entry.session_id,
                        )
                        _flush_failures.pop(entry.session_id, None)
                    except Exception as e:
                        failures = _flush_failures.get(entry.session_id, 0) + 1
                        _flush_failures[entry.session_id] = failures
                        if failures >= _MAX_FLUSH_RETRIES:
                            logger.warning(
                                "Memory flush gave up after %d attempts for %s: %s. "
                                "Marking as flushed to prevent infinite retry loop.",
                                failures, entry.session_id, e,
                            )
                            with self.session_store._lock:
                                entry.memory_flushed = True
                                self.session_store._save()
                            _flush_failures.pop(entry.session_id, None)
                        else:
                            logger.debug(
                                "Memory flush failed (%d/%d) for %s: %s",
                                failures, _MAX_FLUSH_RETRIES, entry.session_id, e,
                            )

                if _expired_entries:
                    _flushed = sum(
                        1 for _, e in _expired_entries if e.memory_flushed
                    )
                    _failed = len(_expired_entries) - _flushed
                    if _failed:
                        logger.info(
                            "Session expiry done: %d flushed, %d pending retry",
                            _flushed, _failed,
                        )
                    else:
                        logger.info(
                            "Session expiry done: %d flushed", _flushed,
                        )
            except Exception as e:
                logger.debug("Session expiry watcher error: %s", e)
            # Sleep in small increments so we can stop quickly
            for _ in range(interval):
                if not self._running:
                    break
                await asyncio.sleep(1)

    async def _platform_reconnect_watcher(self) -> None:
        """Background task that periodically retries connecting failed platforms.

        Uses exponential backoff: 30s → 60s → 120s → 240s → 300s (cap).
        Stops retrying a platform after 20 failed attempts or if the error
        is non-retryable (e.g. bad auth token).
        """
        _MAX_ATTEMPTS = 20
        _BACKOFF_CAP = 300  # 5 minutes max between retries

        await asyncio.sleep(10)  # initial delay — let startup finish
        while self._running:
            if not self._failed_platforms:
                # Nothing to reconnect — sleep and check again
                for _ in range(30):
                    if not self._running:
                        return
                    await asyncio.sleep(1)
                continue

            now = time.monotonic()
            for platform in list(self._failed_platforms.keys()):
                if not self._running:
                    return
                info = self._failed_platforms[platform]
                if now < info["next_retry"]:
                    continue  # not time yet

                if info["attempts"] >= _MAX_ATTEMPTS:
                    logger.warning(
                        "Giving up reconnecting %s after %d attempts",
                        platform.value, info["attempts"],
                    )
                    del self._failed_platforms[platform]
                    continue

                platform_config = info["config"]
                attempt = info["attempts"] + 1
                logger.info(
                    "Reconnecting %s (attempt %d/%d)...",
                    platform.value, attempt, _MAX_ATTEMPTS,
                )

                try:
                    adapter = self._create_adapter(platform, platform_config)
                    if not adapter:
                        logger.warning(
                            "Reconnect %s: adapter creation returned None, removing from retry queue",
                            platform.value,
                        )
                        del self._failed_platforms[platform]
                        continue

                    adapter.set_message_handler(self._handle_message)
                    adapter.set_fatal_error_handler(self._handle_adapter_fatal_error)
                    adapter.set_session_store(self.session_store)
                    adapter.set_busy_session_handler(self._handle_active_session_busy_message)

                    success = await adapter.connect()
                    if success:
                        self.adapters[platform] = adapter
                        self._sync_voice_mode_state_to_adapter(adapter)
                        self.delivery_router.adapters = self.adapters
                        del self._failed_platforms[platform]
                        self._update_platform_runtime_status(
                            platform.value,
                            platform_state="connected",
                            error_code=None,
                            error_message=None,
                        )
                        logger.info("✓ %s reconnected successfully", platform.value)

                        # Rebuild channel directory with the new adapter
                        try:
                            from gateway.channel_directory import build_channel_directory
                            build_channel_directory(self.adapters)
                        except Exception:
                            pass
                    else:
                        # Check if the failure is non-retryable
                        if adapter.has_fatal_error and not adapter.fatal_error_retryable:
                            self._update_platform_runtime_status(
                                platform.value,
                                platform_state="fatal",
                                error_code=adapter.fatal_error_code,
                                error_message=adapter.fatal_error_message,
                            )
                            logger.warning(
                                "Reconnect %s: non-retryable error (%s), removing from retry queue",
                                platform.value, adapter.fatal_error_message,
                            )
                            del self._failed_platforms[platform]
                        else:
                            self._update_platform_runtime_status(
                                platform.value,
                                platform_state="retrying",
                                error_code=adapter.fatal_error_code,
                                error_message=adapter.fatal_error_message or "failed to reconnect",
                            )
                            backoff = min(30 * (2 ** (attempt - 1)), _BACKOFF_CAP)
                            info["attempts"] = attempt
                            info["next_retry"] = time.monotonic() + backoff
                            logger.info(
                                "Reconnect %s failed, next retry in %ds",
                                platform.value, backoff,
                            )
                except Exception as e:
                    self._update_platform_runtime_status(
                        platform.value,
                        platform_state="retrying",
                        error_code=None,
                        error_message=str(e),
                    )
                    backoff = min(30 * (2 ** (attempt - 1)), _BACKOFF_CAP)
                    info["attempts"] = attempt
                    info["next_retry"] = time.monotonic() + backoff
                    logger.warning(
                        "Reconnect %s error: %s, next retry in %ds",
                        platform.value, e, backoff,
                    )

            # Check every 10 seconds for platforms that need reconnection
            for _ in range(10):
                if not self._running:
                    return
                await asyncio.sleep(1)

    async def stop(
        self,
        *,
        restart: bool = False,
        detached_restart: bool = False,
        service_restart: bool = False,
    ) -> None:
        """Stop the gateway and disconnect all adapters."""
        if restart:
            self._restart_requested = True
            self._restart_detached = detached_restart
            self._restart_via_service = service_restart
        if self._stop_task is not None:
            await self._stop_task
            return

        async def _stop_impl() -> None:
            logger.info(
                "Stopping gateway%s...",
                " for restart" if self._restart_requested else "",
            )
            self._running = False
            self._draining = True

            # Notify all chats with active agents BEFORE draining.
            # Adapters are still connected here, so messages can be sent.
            await self._notify_active_sessions_of_shutdown()

            timeout = self._restart_drain_timeout
            active_agents, timed_out = await self._drain_active_agents(timeout)
            if timed_out:
                logger.warning(
                    "Gateway drain timed out after %.1fs with %d active agent(s); interrupting remaining work.",
                    timeout,
                    self._running_agent_count(),
                )
                self._interrupt_running_agents(
                    "Gateway restarting" if self._restart_requested else "Gateway shutting down"
                )
                interrupt_deadline = asyncio.get_running_loop().time() + 5.0
                while self._running_agents and asyncio.get_running_loop().time() < interrupt_deadline:
                    self._update_runtime_status("draining")
                    await asyncio.sleep(0.1)

            if self._restart_requested and self._restart_detached:
                try:
                    await self._launch_detached_restart_command()
                except Exception as e:
                    logger.error("Failed to launch detached gateway restart: %s", e)

            self._finalize_shutdown_agents(active_agents)

            for platform, adapter in list(self.adapters.items()):
                try:
                    await adapter.cancel_background_tasks()
                except Exception as e:
                    logger.debug("✗ %s background-task cancel error: %s", platform.value, e)
                try:
                    await adapter.disconnect()
                    logger.info("✓ %s disconnected", platform.value)
                except Exception as e:
                    logger.error("✗ %s disconnect error: %s", platform.value, e)

            for _task in list(self._background_tasks):
                if _task is self._stop_task:
                    continue
                _task.cancel()
            self._background_tasks.clear()

            self.adapters.clear()
            self._running_agents.clear()
            self._pending_messages.clear()
            self._pending_approvals.clear()
            if hasattr(self, '_busy_ack_ts'):
                self._busy_ack_ts.clear()
            self._shutdown_event.set()

            # Global cleanup: kill any remaining tool subprocesses not tied
            # to a specific agent (catch-all for zombie prevention).
            try:
                from tools.process_registry import process_registry
                process_registry.kill_all()
            except Exception:
                pass
            try:
                from tools.terminal_tool import cleanup_all_environments
                cleanup_all_environments()
            except Exception:
                pass
            try:
                from tools.browser_tool import cleanup_all_browsers
                cleanup_all_browsers()
            except Exception:
                pass

            from gateway.status import remove_pid_file
            remove_pid_file()

            # Write a clean-shutdown marker so the next startup knows this
            # wasn't a crash.  suspend_recently_active() only needs to run
            # after unexpected exits.  However, if the drain timed out and
            # agents were force-interrupted, their sessions may be in an
            # incomplete state (trailing tool response, no final assistant
            # message).  Skip the marker in that case so the next startup
            # suspends those sessions — giving users a clean slate instead
            # of resuming a half-finished tool loop.
            if not timed_out:
                try:
                    (_hermes_home / ".clean_shutdown").touch()
                except Exception:
                    pass
            else:
                logger.info(
                    "Skipping .clean_shutdown marker — drain timed out with "
                    "interrupted agents; next startup will suspend recently "
                    "active sessions."
                )

            # Track sessions that were active at shutdown for stuck-loop
            # detection (#7536).  On each restart, the counter increments
            # for sessions that were running.  If a session hits the
            # threshold (3 consecutive restarts while active), the next
            # startup auto-suspends it — breaking the loop.
            if active_agents:
                self._increment_restart_failure_counts(set(active_agents.keys()))

            if self._restart_requested and self._restart_via_service:
                self._exit_code = GATEWAY_SERVICE_RESTART_EXIT_CODE
                self._exit_reason = self._exit_reason or "Gateway restart requested"

            self._draining = False
            self._update_runtime_status("stopped", self._exit_reason)
            logger.info("Gateway stopped")

        self._stop_task = asyncio.create_task(_stop_impl())
        await self._stop_task
    
    async def wait_for_shutdown(self) -> None:
        """Wait for shutdown signal."""
        await self._shutdown_event.wait()
    
    def _create_adapter(
        self, 
        platform: Platform, 
        config: Any
    ) -> Optional[BasePlatformAdapter]:
        """Create the appropriate adapter for a platform."""
        if hasattr(config, "extra") and isinstance(config.extra, dict):
            config.extra.setdefault(
                "group_sessions_per_user",
                self.config.group_sessions_per_user,
            )
            config.extra.setdefault(
                "thread_sessions_per_user",
                getattr(self.config, "thread_sessions_per_user", False),
            )

        if platform == Platform.TELEGRAM:
            from gateway.platforms.telegram import TelegramAdapter, check_telegram_requirements
            if not check_telegram_requirements():
                logger.warning("Telegram: python-telegram-bot not installed")
                return None
            return TelegramAdapter(config)
        
        elif platform == Platform.DISCORD:
            from gateway.platforms.discord import DiscordAdapter, check_discord_requirements
            if not check_discord_requirements():
                logger.warning("Discord: discord.py not installed")
                return None
            return DiscordAdapter(config)
        
        elif platform == Platform.WHATSAPP:
            from gateway.platforms.whatsapp import WhatsAppAdapter, check_whatsapp_requirements
            if not check_whatsapp_requirements():
                logger.warning("WhatsApp: Node.js not installed or bridge not configured")
                return None
            return WhatsAppAdapter(config)
        
        elif platform == Platform.SLACK:
            from gateway.platforms.slack import SlackAdapter, check_slack_requirements
            if not check_slack_requirements():
                logger.warning("Slack: slack-bolt not installed. Run: pip install 'hermes-agent[slack]'")
                return None
            return SlackAdapter(config)

        elif platform == Platform.SIGNAL:
            from gateway.platforms.signal import SignalAdapter, check_signal_requirements
            if not check_signal_requirements():
                logger.warning("Signal: SIGNAL_HTTP_URL or SIGNAL_ACCOUNT not configured")
                return None
            return SignalAdapter(config)

        elif platform == Platform.HOMEASSISTANT:
            from gateway.platforms.homeassistant import HomeAssistantAdapter, check_ha_requirements
            if not check_ha_requirements():
                logger.warning("HomeAssistant: aiohttp not installed or HASS_TOKEN not set")
                return None
            return HomeAssistantAdapter(config)

        elif platform == Platform.EMAIL:
            from gateway.platforms.email import EmailAdapter, check_email_requirements
            if not check_email_requirements():
                logger.warning("Email: EMAIL_ADDRESS, EMAIL_PASSWORD, EMAIL_IMAP_HOST, or EMAIL_SMTP_HOST not set")
                return None
            return EmailAdapter(config)

        elif platform == Platform.SMS:
            from gateway.platforms.sms import SmsAdapter, check_sms_requirements
            if not check_sms_requirements():
                logger.warning("SMS: aiohttp not installed or TWILIO_ACCOUNT_SID/TWILIO_AUTH_TOKEN not set")
                return None
            return SmsAdapter(config)

        elif platform == Platform.DINGTALK:
            from gateway.platforms.dingtalk import DingTalkAdapter, check_dingtalk_requirements
            if not check_dingtalk_requirements():
                logger.warning("DingTalk: dingtalk-stream not installed or DINGTALK_CLIENT_ID/SECRET not set")
                return None
            return DingTalkAdapter(config)

        elif platform == Platform.FEISHU:
            from gateway.platforms.feishu import FeishuAdapter, check_feishu_requirements
            if not check_feishu_requirements():
                logger.warning("Feishu: lark-oapi not installed or FEISHU_APP_ID/SECRET not set")
                return None
            return FeishuAdapter(config)

        elif platform == Platform.WECOM_CALLBACK:
            from gateway.platforms.wecom_callback import (
                WecomCallbackAdapter,
                check_wecom_callback_requirements,
            )
            if not check_wecom_callback_requirements():
                logger.warning("WeComCallback: aiohttp/httpx not installed")
                return None
            return WecomCallbackAdapter(config)

        elif platform == Platform.WECOM:
            from gateway.platforms.wecom import WeComAdapter, check_wecom_requirements
            if not check_wecom_requirements():
                logger.warning("WeCom: aiohttp not installed or WECOM_BOT_ID/SECRET not set")
                return None
            return WeComAdapter(config)

        elif platform == Platform.WEIXIN:
            from gateway.platforms.weixin import WeixinAdapter, check_weixin_requirements
            if not check_weixin_requirements():
                logger.warning("Weixin: aiohttp/cryptography not installed")
                return None
            return WeixinAdapter(config)

        elif platform == Platform.MATTERMOST:
            from gateway.platforms.mattermost import MattermostAdapter, check_mattermost_requirements
            if not check_mattermost_requirements():
                logger.warning("Mattermost: MATTERMOST_TOKEN or MATTERMOST_URL not set, or aiohttp missing")
                return None
            return MattermostAdapter(config)

        elif platform == Platform.MATRIX:
            from gateway.platforms.matrix import MatrixAdapter, check_matrix_requirements
            if not check_matrix_requirements():
                logger.warning("Matrix: mautrix not installed or credentials not set. Run: pip install 'mautrix[encryption]'")
                return None
            return MatrixAdapter(config)

        elif platform == Platform.API_SERVER:
            from gateway.platforms.api_server import APIServerAdapter, check_api_server_requirements
            if not check_api_server_requirements():
                logger.warning("API Server: aiohttp not installed")
                return None
            return APIServerAdapter(config)

        elif platform == Platform.WEBHOOK:
            from gateway.platforms.webhook import WebhookAdapter, check_webhook_requirements
            if not check_webhook_requirements():
                logger.warning("Webhook: aiohttp not installed")
                return None
            adapter = WebhookAdapter(config)
            adapter.gateway_runner = self  # For cross-platform delivery
            return adapter

        elif platform == Platform.BLUEBUBBLES:
            from gateway.platforms.bluebubbles import BlueBubblesAdapter, check_bluebubbles_requirements
            if not check_bluebubbles_requirements():
                logger.warning("BlueBubbles: aiohttp/httpx missing or BLUEBUBBLES_SERVER_URL/BLUEBUBBLES_PASSWORD not configured")
                return None
            return BlueBubblesAdapter(config)

        elif platform == Platform.QQBOT:
            from gateway.platforms.qqbot import QQAdapter, check_qq_requirements
            if not check_qq_requirements():
                logger.warning("QQBot: aiohttp/httpx missing or QQ_APP_ID/QQ_CLIENT_SECRET not configured")
                return None
            return QQAdapter(config)

        return None

    def _is_user_authorized(self, source: SessionSource) -> bool:
        """
        Check if a user is authorized to use the bot.
        
        Checks in order:
        1. Per-platform allow-all flag (e.g., DISCORD_ALLOW_ALL_USERS=true)
        2. Environment variable allowlists (TELEGRAM_ALLOWED_USERS, etc.)
        3. DM pairing approved list
        4. Global allow-all (GATEWAY_ALLOW_ALL_USERS=true)
        5. Default: deny
        """
        # Home Assistant events are system-generated (state changes), not
        # user-initiated messages.  The HASS_TOKEN already authenticates the
        # connection, so HA events are always authorized.
        # Webhook events are authenticated via HMAC signature validation in
        # the adapter itself — no user allowlist applies.
        if source.platform in (Platform.HOMEASSISTANT, Platform.WEBHOOK):
            return True

        user_id = source.user_id
        if not user_id:
            return False

        platform_env_map = {
            Platform.TELEGRAM: "TELEGRAM_ALLOWED_USERS",
            Platform.DISCORD: "DISCORD_ALLOWED_USERS",
            Platform.WHATSAPP: "WHATSAPP_ALLOWED_USERS",
            Platform.SLACK: "SLACK_ALLOWED_USERS",
            Platform.SIGNAL: "SIGNAL_ALLOWED_USERS",
            Platform.EMAIL: "EMAIL_ALLOWED_USERS",
            Platform.SMS: "SMS_ALLOWED_USERS",
            Platform.MATTERMOST: "MATTERMOST_ALLOWED_USERS",
            Platform.MATRIX: "MATRIX_ALLOWED_USERS",
            Platform.DINGTALK: "DINGTALK_ALLOWED_USERS",
            Platform.FEISHU: "FEISHU_ALLOWED_USERS",
            Platform.WECOM: "WECOM_ALLOWED_USERS",
            Platform.WECOM_CALLBACK: "WECOM_CALLBACK_ALLOWED_USERS",
            Platform.WEIXIN: "WEIXIN_ALLOWED_USERS",
            Platform.BLUEBUBBLES: "BLUEBUBBLES_ALLOWED_USERS",
            Platform.QQBOT: "QQ_ALLOWED_USERS",
        }
        platform_allow_all_map = {
            Platform.TELEGRAM: "TELEGRAM_ALLOW_ALL_USERS",
            Platform.DISCORD: "DISCORD_ALLOW_ALL_USERS",
            Platform.WHATSAPP: "WHATSAPP_ALLOW_ALL_USERS",
            Platform.SLACK: "SLACK_ALLOW_ALL_USERS",
            Platform.SIGNAL: "SIGNAL_ALLOW_ALL_USERS",
            Platform.EMAIL: "EMAIL_ALLOW_ALL_USERS",
            Platform.SMS: "SMS_ALLOW_ALL_USERS",
            Platform.MATTERMOST: "MATTERMOST_ALLOW_ALL_USERS",
            Platform.MATRIX: "MATRIX_ALLOW_ALL_USERS",
            Platform.DINGTALK: "DINGTALK_ALLOW_ALL_USERS",
            Platform.FEISHU: "FEISHU_ALLOW_ALL_USERS",
            Platform.WECOM: "WECOM_ALLOW_ALL_USERS",
            Platform.WECOM_CALLBACK: "WECOM_CALLBACK_ALLOW_ALL_USERS",
            Platform.WEIXIN: "WEIXIN_ALLOW_ALL_USERS",
            Platform.BLUEBUBBLES: "BLUEBUBBLES_ALLOW_ALL_USERS",
            Platform.QQBOT: "QQ_ALLOW_ALL_USERS",
        }

        # Per-platform allow-all flag (e.g., DISCORD_ALLOW_ALL_USERS=true)
        platform_allow_all_var = platform_allow_all_map.get(source.platform, "")
        if platform_allow_all_var and os.getenv(platform_allow_all_var, "").lower() in ("true", "1", "yes"):
            return True

        # Check pairing store (always checked, regardless of allowlists)
        platform_name = source.platform.value if source.platform else ""
        if self.pairing_store.is_approved(platform_name, user_id):
            return True

        # Check platform-specific and global allowlists
        platform_allowlist = os.getenv(platform_env_map.get(source.platform, ""), "").strip()
        global_allowlist = os.getenv("GATEWAY_ALLOWED_USERS", "").strip()

        if not platform_allowlist and not global_allowlist:
            # No allowlists configured -- check global allow-all flag
            return os.getenv("GATEWAY_ALLOW_ALL_USERS", "").lower() in ("true", "1", "yes")

        # Check if user is in any allowlist
        allowed_ids = set()
        if platform_allowlist:
            allowed_ids.update(uid.strip() for uid in platform_allowlist.split(",") if uid.strip())
        if global_allowlist:
            allowed_ids.update(uid.strip() for uid in global_allowlist.split(",") if uid.strip())

        # "*" in any allowlist means allow everyone (consistent with
        # SIGNAL_GROUP_ALLOWED_USERS precedent)
        if "*" in allowed_ids:
            return True

        check_ids = {user_id}
        if "@" in user_id:
            check_ids.add(user_id.split("@")[0])

        # WhatsApp: resolve phone↔LID aliases from bridge session mapping files
        if source.platform == Platform.WHATSAPP:
            normalized_allowed_ids = set()
            for allowed_id in allowed_ids:
                normalized_allowed_ids.update(_expand_whatsapp_auth_aliases(allowed_id))
            if normalized_allowed_ids:
                allowed_ids = normalized_allowed_ids

            check_ids.update(_expand_whatsapp_auth_aliases(user_id))
            normalized_user_id = _normalize_whatsapp_identifier(user_id)
            if normalized_user_id:
                check_ids.add(normalized_user_id)

        return bool(check_ids & allowed_ids)

    def _get_unauthorized_dm_behavior(self, platform: Optional[Platform]) -> str:
        """Return how unauthorized DMs should be handled for a platform."""
        config = getattr(self, "config", None)
        if config and hasattr(config, "get_unauthorized_dm_behavior"):
            return config.get_unauthorized_dm_behavior(platform)
        return "pair"
    
    async def _handle_message(self, event: MessageEvent) -> Optional[str]:
        """
        Handle an incoming message from any platform.
        
        This is the core message processing pipeline:
        1. Check user authorization
        2. Check for commands (/new, /reset, etc.)
        3. Check for running agent and interrupt if needed
        4. Get or create session
        5. Build context for agent
        6. Run agent conversation
        7. Return response
        """
        source = event.source

        # Internal events (e.g. background-process completion notifications)
        # are system-generated and must skip user authorization.
        if getattr(event, "internal", False):
            pass
        elif source.user_id is None:
            # Messages with no user identity (Telegram service messages,
            # channel forwards, anonymous admin actions) cannot be
            # authorized — drop silently instead of triggering the pairing
            # flow with a None user_id.
            logger.debug("Ignoring message with no user_id from %s", source.platform.value)
            return None
        elif not self._is_user_authorized(source):
            logger.warning("Unauthorized user: %s (%s) on %s", source.user_id, source.user_name, source.platform.value)
            # In DMs: offer pairing code. In groups: silently ignore.
            if source.chat_type == "dm" and self._get_unauthorized_dm_behavior(source.platform) == "pair":
                platform_name = source.platform.value if source.platform else "unknown"
                # Rate-limit ALL pairing responses (code or rejection) to
                # prevent spamming the user with repeated messages when
                # multiple DMs arrive in quick succession.
                if self.pairing_store._is_rate_limited(platform_name, source.user_id):
                    return None
                code = self.pairing_store.generate_code(
                    platform_name, source.user_id, source.user_name or ""
                )
                if code:
                    adapter = self.adapters.get(source.platform)
                    if adapter:
                        await adapter.send(
                            source.chat_id,
                            f"Hi~ I don't recognize you yet!\n\n"
                            f"Here's your pairing code: `{code}`\n\n"
                            f"Ask the bot owner to run:\n"
                            f"`hermes pairing approve {platform_name} {code}`"
                        )
                else:
                    adapter = self.adapters.get(source.platform)
                    if adapter:
                        await adapter.send(
                            source.chat_id,
                            "Too many pairing requests right now~ "
                            "Please try again later!"
                        )
                    # Record rate limit so subsequent messages are silently ignored
                    self.pairing_store._record_rate_limit(platform_name, source.user_id)
            return None
        
        # Intercept messages that are responses to a pending /update prompt.
        # The update process (detached) wrote .update_prompt.json; the watcher
        # forwarded it to the user; now the user's reply goes back via
        # .update_response so the update process can continue.
        _quick_key = self._session_key_for_source(source)
        _update_prompts = getattr(self, "_update_prompt_pending", {})
        if _update_prompts.get(_quick_key):
            raw = (event.text or "").strip()
            # Accept /approve and /deny as shorthand for yes/no
            cmd = event.get_command()
            if cmd in ("approve", "yes"):
                response_text = "y"
            elif cmd in ("deny", "no"):
                response_text = "n"
            else:
                response_text = raw
            if response_text:
                response_path = _hermes_home / ".update_response"
                try:
                    tmp = response_path.with_suffix(".tmp")
                    tmp.write_text(response_text)
                    tmp.replace(response_path)
                except OSError as e:
                    logger.warning("Failed to write update response: %s", e)
                    return f"✗ Failed to send response to update process: {e}"
                _update_prompts.pop(_quick_key, None)
                label = response_text if len(response_text) <= 20 else response_text[:20] + "…"
                return f"✓ Sent `{label}` to the update process."

        # PRIORITY handling when an agent is already running for this session.
        # Default behavior is to interrupt immediately so user text/stop messages
        # are handled with minimal latency.
        #
        # Special case: Telegram/photo bursts often arrive as multiple near-
        # simultaneous updates. Do NOT interrupt for photo-only follow-ups here;
        # let the adapter-level batching/queueing logic absorb them.

        # Staleness eviction: detect leaked locks from hung/crashed handlers.
        # With inactivity-based timeout, active tasks can run for hours, so
        # wall-clock age alone isn't sufficient.  Evict only when the agent
        # has been *idle* beyond the inactivity threshold (or when the agent
        # object has no activity tracker and wall-clock age is extreme).
        _raw_stale_timeout = float(os.getenv("HERMES_AGENT_TIMEOUT", 1800))
        _stale_ts = self._running_agents_ts.get(_quick_key, 0)
        if _quick_key in self._running_agents and _stale_ts:
            _stale_age = time.time() - _stale_ts
            _stale_agent = self._running_agents.get(_quick_key)
            # Never evict the pending sentinel — it was just placed moments
            # ago during the async setup phase before the real agent is
            # created.  Sentinels have no get_activity_summary(), so the
            # idle check below would always evaluate to inf >= timeout and
            # immediately evict them, racing with the setup path.
            _stale_idle = float("inf")  # assume idle if we can't check
            _stale_detail = ""
            if _stale_agent and hasattr(_stale_agent, "get_activity_summary"):
                try:
                    _sa = _stale_agent.get_activity_summary()
                    _stale_idle = _sa.get("seconds_since_activity", float("inf"))
                    _stale_detail = (
                        f" | last_activity={_sa.get('last_activity_desc', 'unknown')} "
                        f"({_stale_idle:.0f}s ago) "
                        f"| iteration={_sa.get('api_call_count', 0)}/{_sa.get('max_iterations', 0)}"
                    )
                except Exception:
                    pass
            # Evict if: agent is idle beyond timeout, OR wall-clock age is
            # extreme (10x timeout or 2h, whichever is larger — catches
            # cases where the agent object was garbage-collected).
            _wall_ttl = max(_raw_stale_timeout * 10, 7200) if _raw_stale_timeout > 0 else float("inf")
            _should_evict = (
                _stale_agent is not _AGENT_PENDING_SENTINEL
                and (
                    (_raw_stale_timeout > 0 and _stale_idle >= _raw_stale_timeout)
                    or _stale_age > _wall_ttl
                )
            )
            if _should_evict:
                logger.warning(
                    "Evicting stale _running_agents entry for %s "
                    "(age: %.0fs, idle: %.0fs, timeout: %.0fs)%s",
                    _quick_key[:30], _stale_age, _stale_idle,
                    _raw_stale_timeout, _stale_detail,
                )
                del self._running_agents[_quick_key]
                self._running_agents_ts.pop(_quick_key, None)
                self._busy_ack_ts.pop(_quick_key, None)

        if _quick_key in self._running_agents:
            if event.get_command() == "status":
                return await self._handle_status_command(event)

            # Resolve the command once for all early-intercept checks below.
            from hermes_cli.commands import resolve_command as _resolve_cmd_inner
            _evt_cmd = event.get_command()
            _cmd_def_inner = _resolve_cmd_inner(_evt_cmd) if _evt_cmd else None

            if _cmd_def_inner and _cmd_def_inner.name == "restart":
                return await self._handle_restart_command(event)

            # /stop must hard-kill the session when an agent is running.
            # A soft interrupt (agent.interrupt()) doesn't help when the agent
            # is truly hung — the executor thread is blocked and never checks
            # _interrupt_requested.  Force-clean _running_agents so the session
            # is unlocked and subsequent messages are processed normally.
            if _cmd_def_inner and _cmd_def_inner.name == "stop":
                running_agent = self._running_agents.get(_quick_key)
                if running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
                    running_agent.interrupt("Stop requested")
                # Force-clean: remove the session lock regardless of agent state
                adapter = self.adapters.get(source.platform)
                if adapter and hasattr(adapter, 'get_pending_message'):
                    adapter.get_pending_message(_quick_key)  # consume and discard
                self._pending_messages.pop(_quick_key, None)
                if _quick_key in self._running_agents:
                    del self._running_agents[_quick_key]
                logger.info("STOP for session %s — agent interrupted, session lock released", _quick_key[:20])
                return "⚡ Stopped. You can continue this session."

            # /reset and /new must bypass the running-agent guard so they
            # actually dispatch as commands instead of being queued as user
            # text (which would be fed back to the agent with the same
            # broken history — #2170).  Interrupt the agent first, then
            # clear the adapter's pending queue so the stale "/reset" text
            # doesn't get re-processed as a user message after the
            # interrupt completes.
            if _cmd_def_inner and _cmd_def_inner.name == "new":
                running_agent = self._running_agents.get(_quick_key)
                if running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
                    running_agent.interrupt("Session reset requested")
                # Clear any pending messages so the old text doesn't replay
                adapter = self.adapters.get(source.platform)
                if adapter and hasattr(adapter, 'get_pending_message'):
                    adapter.get_pending_message(_quick_key)  # consume and discard
                self._pending_messages.pop(_quick_key, None)
                # Clean up the running agent entry so the reset handler
                # doesn't think an agent is still active.
                if _quick_key in self._running_agents:
                    del self._running_agents[_quick_key]
                return await self._handle_reset_command(event)

            # /queue <prompt> — queue without interrupting
            if event.get_command() in ("queue", "q"):
                queued_text = event.get_command_args().strip()
                if not queued_text:
                    return "Usage: /queue <prompt>"
                adapter = self.adapters.get(source.platform)
                if adapter:
                    from gateway.platforms.base import MessageEvent as _ME, MessageType as _MT
                    queued_event = _ME(
                        text=queued_text,
                        message_type=_MT.TEXT,
                        source=event.source,
                        message_id=event.message_id,
                        channel_prompt=event.channel_prompt,
                    )
                    adapter._pending_messages[_quick_key] = queued_event
                return "Queued for the next turn."

            # /model must not be used while the agent is running.
            if _cmd_def_inner and _cmd_def_inner.name == "model":
                return "Agent is running — wait or /stop first, then switch models."

            # /approve and /deny must bypass the running-agent interrupt path.
            # The agent thread is blocked on a threading.Event inside
            # tools/approval.py — sending an interrupt won't unblock it.
            # Route directly to the approval handler so the event is signalled.
            if _cmd_def_inner and _cmd_def_inner.name in ("approve", "deny"):
                if _cmd_def_inner.name == "approve":
                    return await self._handle_approve_command(event)
                return await self._handle_deny_command(event)

            # /background must bypass the running-agent guard — it starts a
            # parallel task and must never interrupt the active conversation.
            if _cmd_def_inner and _cmd_def_inner.name == "background":
                return await self._handle_background_command(event)

            if event.message_type == MessageType.PHOTO:
                logger.debug("PRIORITY photo follow-up for session %s — queueing without interrupt", _quick_key[:20])
                adapter = self.adapters.get(source.platform)
                if adapter:
                    merge_pending_message_event(adapter._pending_messages, _quick_key, event)
                return None

            _telegram_followup_grace = float(
                os.getenv("HERMES_TELEGRAM_FOLLOWUP_GRACE_SECONDS", "3.0")
            )
            _started_at = self._running_agents_ts.get(_quick_key, 0)
            if (
                source.platform == Platform.TELEGRAM
                and event.message_type == MessageType.TEXT
                and _telegram_followup_grace > 0
                and _started_at
                and (time.time() - _started_at) <= _telegram_followup_grace
            ):
                logger.debug(
                    "Telegram follow-up arrived %.2fs after run start for %s — queueing without interrupt",
                    time.time() - _started_at,
                    _quick_key[:20],
                )
                adapter = self.adapters.get(source.platform)
                if adapter:
                    merge_pending_message_event(
                        adapter._pending_messages,
                        _quick_key,
                        event,
                        merge_text=True,
                    )
                return None

            running_agent = self._running_agents.get(_quick_key)
            if running_agent is _AGENT_PENDING_SENTINEL:
                # Agent is being set up but not ready yet.
                if event.get_command() == "stop":
                    # Force-clean the sentinel so the session is unlocked.
                    if _quick_key in self._running_agents:
                        del self._running_agents[_quick_key]
                    logger.info("HARD STOP (pending) for session %s — sentinel cleared", _quick_key[:20])
                    return "⚡ Force-stopped. The agent was still starting — session unlocked."
                # Queue the message so it will be picked up after the
                # agent starts.
                adapter = self.adapters.get(source.platform)
                if adapter:
                    merge_pending_message_event(
                        adapter._pending_messages,
                        _quick_key,
                        event,
                        merge_text=True,
                    )
                return None
            if self._draining:
                if self._queue_during_drain_enabled():
                    self._queue_or_replace_pending_event(_quick_key, event)
                return (
                    f"⏳ Gateway {self._status_action_gerund()} — queued for the next turn after it comes back."
                    if self._queue_during_drain_enabled()
                    else f"⏳ Gateway is {self._status_action_gerund()} and is not accepting another turn right now."
                )
            logger.debug("PRIORITY interrupt for session %s", _quick_key[:20])
            running_agent.interrupt(event.text)
            if _quick_key in self._pending_messages:
                self._pending_messages[_quick_key] += "\n" + event.text
            else:
                self._pending_messages[_quick_key] = event.text
            return None

        # Check for commands
        command = event.get_command()
        
        # Emit command:* hook for any recognized slash command.
        # GATEWAY_KNOWN_COMMANDS is derived from the central COMMAND_REGISTRY
        # in hermes_cli/commands.py — no hardcoded set to maintain here.
        from hermes_cli.commands import GATEWAY_KNOWN_COMMANDS, resolve_command as _resolve_cmd
        if command and command in GATEWAY_KNOWN_COMMANDS:
            await self.hooks.emit(f"command:{command}", {
                "platform": source.platform.value if source.platform else "",
                "user_id": source.user_id,
                "command": command,
                "args": event.get_command_args().strip(),
            })

        # Resolve aliases to canonical name so dispatch only checks canonicals.
        _cmd_def = _resolve_cmd(command) if command else None
        canonical = _cmd_def.name if _cmd_def else command

        if canonical == "new":
            return await self._handle_reset_command(event)
        
        if canonical == "help":
            return await self._handle_help_command(event)

        if canonical == "commands":
            return await self._handle_commands_command(event)
        
        if canonical == "profile":
            return await self._handle_profile_command(event)

        if canonical == "status":
            return await self._handle_status_command(event)

        if canonical == "restart":
            return await self._handle_restart_command(event)
        
        if canonical == "stop":
            return await self._handle_stop_command(event)
        
        if canonical == "reasoning":
            return await self._handle_reasoning_command(event)

        if canonical == "fast":
            return await self._handle_fast_command(event)

        if canonical == "verbose":
            return await self._handle_verbose_command(event)

        if canonical == "yolo":
            return await self._handle_yolo_command(event)

        if canonical == "model":
            return await self._handle_model_command(event)

        if canonical == "provider":
            return await self._handle_provider_command(event)
        
        if canonical == "personality":
            return await self._handle_personality_command(event)

        if canonical == "plan":
            try:
                from agent.skill_commands import build_plan_path, build_skill_invocation_message

                user_instruction = event.get_command_args().strip()
                plan_path = build_plan_path(user_instruction)
                event.text = build_skill_invocation_message(
                    "/plan",
                    user_instruction,
                    task_id=_quick_key,
                    runtime_note=(
                        "Save the markdown plan with write_file to this exact relative path "
                        f"inside the active workspace/backend cwd: {plan_path}"
                    ),
                )
                if not event.text:
                    return "Failed to load the bundled /plan skill."
                canonical = None
            except Exception as e:
                logger.exception("Failed to prepare /plan command")
                return f"Failed to enter plan mode: {e}"
        
        if canonical == "retry":
            return await self._handle_retry_command(event)
        
        if canonical == "undo":
            return await self._handle_undo_command(event)
        
        if canonical == "sethome":
            return await self._handle_set_home_command(event)

        if canonical == "compress":
            return await self._handle_compress_command(event)

        if canonical == "usage":
            return await self._handle_usage_command(event)

        if canonical == "insights":
            return await self._handle_insights_command(event)

        if canonical == "reload-mcp":
            return await self._handle_reload_mcp_command(event)

        if canonical == "approve":
            return await self._handle_approve_command(event)

        if canonical == "deny":
            return await self._handle_deny_command(event)

        if canonical == "update":
            return await self._handle_update_command(event)

        if canonical == "debug":
            return await self._handle_debug_command(event)

        if canonical == "title":
            return await self._handle_title_command(event)

        if canonical == "resume":
            return await self._handle_resume_command(event)

        if canonical == "branch":
            return await self._handle_branch_command(event)

        if canonical == "rollback":
            return await self._handle_rollback_command(event)

        if canonical == "background":
            return await self._handle_background_command(event)

        if canonical == "btw":
            return await self._handle_btw_command(event)

        if canonical == "voice":
            return await self._handle_voice_command(event)

        if self._draining:
            return f"⏳ Gateway is {self._status_action_gerund()} and is not accepting new work right now."

        # User-defined quick commands (bypass agent loop, no LLM call)
        if command:
            if isinstance(self.config, dict):
                quick_commands = self.config.get("quick_commands", {}) or {}
            else:
                quick_commands = getattr(self.config, "quick_commands", {}) or {}
            if not isinstance(quick_commands, dict):
                quick_commands = {}
            if command in quick_commands:
                qcmd = quick_commands[command]
                if qcmd.get("type") == "exec":
                    exec_cmd = qcmd.get("command", "")
                    if exec_cmd:
                        try:
                            proc = await asyncio.create_subprocess_shell(
                                exec_cmd,
                                stdout=asyncio.subprocess.PIPE,
                                stderr=asyncio.subprocess.PIPE,
                            )
                            stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=30)
                            output = (stdout or stderr).decode().strip()
                            return output if output else "Command returned no output."
                        except asyncio.TimeoutError:
                            return "Quick command timed out (30s)."
                        except Exception as e:
                            return f"Quick command error: {e}"
                    else:
                        return f"Quick command '/{command}' has no command defined."
                elif qcmd.get("type") == "alias":
                    target = qcmd.get("target", "").strip()
                    if target:
                        target = target if target.startswith("/") else f"/{target}"
                        target_command = target.lstrip("/")
                        user_args = event.get_command_args().strip()
                        event.text = f"{target} {user_args}".strip()
                        command = target_command
                        # Fall through to normal command dispatch below
                    else:
                        return f"Quick command '/{command}' has no target defined."
                else:
                    return f"Quick command '/{command}' has unsupported type (supported: 'exec', 'alias')."

        # Plugin-registered slash commands
        if command:
            try:
                from hermes_cli.plugins import get_plugin_command_handler
                # Normalize underscores to hyphens so Telegram's underscored
                # autocomplete form matches plugin commands registered with
                # hyphens. See hermes_cli/commands.py:_build_telegram_menu.
                plugin_handler = get_plugin_command_handler(command.replace("_", "-"))
                if plugin_handler:
                    user_args = event.get_command_args().strip()
                    import asyncio as _aio
                    result = plugin_handler(user_args)
                    if _aio.iscoroutine(result):
                        result = await result
                    return str(result) if result else None
            except Exception as e:
                logger.debug("Plugin command dispatch failed (non-fatal): %s", e)

        # Skill slash commands: /skill-name loads the skill and sends to agent.
        # resolve_skill_command_key() handles the Telegram underscore/hyphen
        # round-trip so /claude_code from Telegram autocomplete still resolves
        # to the claude-code skill.
        if command:
            try:
                from agent.skill_commands import (
                    get_skill_commands,
                    build_skill_invocation_message,
                    resolve_skill_command_key,
                )
                skill_cmds = get_skill_commands()
                cmd_key = resolve_skill_command_key(command)
                if cmd_key is not None:
                    # Check per-platform disabled status before executing.
                    # get_skill_commands() only applies the *global* disabled
                    # list at scan time; per-platform overrides need checking
                    # here because the cache is process-global across platforms.
                    _skill_name = skill_cmds[cmd_key].get("name", "")
                    _plat = source.platform.value if source.platform else None
                    if _plat and _skill_name:
                        from agent.skill_utils import get_disabled_skill_names as _get_plat_disabled
                        if _skill_name in _get_plat_disabled(platform=_plat):
                            return (
                                f"The **{_skill_name}** skill is disabled for {_plat}.\n"
                                f"Enable it with: `hermes skills config`"
                            )
                    user_instruction = event.get_command_args().strip()
                    msg = build_skill_invocation_message(
                        cmd_key, user_instruction, task_id=_quick_key
                    )
                    if msg:
                        event.text = msg
                        # Fall through to normal message processing with skill content
                else:
                    # Not an active skill — check if it's a known-but-disabled or
                    # uninstalled skill and give actionable guidance.
                    _unavail_msg = _check_unavailable_skill(command)
                    if _unavail_msg:
                        return _unavail_msg
                    # Genuinely unrecognized /command: not a built-in, not a
                    # plugin, not a skill, not a known-inactive skill. Warn
                    # the user instead of silently forwarding it to the LLM
                    # as free text (which leads to silent-failure behavior
                    # like the model inventing a delegate_task call).
                    # Normalize to hyphenated form before checking known
                    # built-ins (command may be an alias target set by the
                    # quick-command block above, so _cmd_def can be stale).
                    if command.replace("_", "-") not in GATEWAY_KNOWN_COMMANDS:
                        logger.warning(
                            "Unrecognized slash command /%s from %s — "
                            "replying with unknown-command notice",
                            command,
                            source.platform.value if source.platform else "?",
                        )
                        return (
                            f"Unknown command `/{command}`. "
                            f"Type /commands to see what's available, "
                            f"or resend without the leading slash to send "
                            f"as a regular message."
                        )
            except Exception as e:
                logger.debug("Skill command check failed (non-fatal): %s", e)
        
        # Pending exec approvals are handled by /approve and /deny commands above.
        # No bare text matching — "yes" in normal conversation must not trigger
        # execution of a dangerous command.

        # ── Claim this session before any await ───────────────────────
        # Between here and _run_agent registering the real AIAgent, there
        # are numerous await points (hooks, vision enrichment, STT,
        # session hygiene compression).  Without this sentinel a second
        # message arriving during any of those yields would pass the
        # "already running" guard and spin up a duplicate agent for the
        # same session — corrupting the transcript.
        self._running_agents[_quick_key] = _AGENT_PENDING_SENTINEL
        self._running_agents_ts[_quick_key] = time.time()

        try:
            return await self._handle_message_with_agent(event, source, _quick_key)
        finally:
            # If _run_agent replaced the sentinel with a real agent and
            # then cleaned it up, this is a no-op.  If we exited early
            # (exception, command fallthrough, etc.) the sentinel must
            # not linger or the session would be permanently locked out.
            if self._running_agents.get(_quick_key) is _AGENT_PENDING_SENTINEL:
                del self._running_agents[_quick_key]
            self._running_agents_ts.pop(_quick_key, None)

    async def _prepare_inbound_message_text(
        self,
        *,
        event: MessageEvent,
        source: SessionSource,
        history: List[Dict[str, Any]],
    ) -> Optional[str]:
        """Prepare inbound event text for the agent.

        Keep the normal inbound path and the queued follow-up path on the same
        preprocessing pipeline so sender attribution, image enrichment, STT,
        document notes, reply context, and @ references all behave the same.
        """
        history = history or []
        message_text = event.text or ""

        _is_shared_thread = (
            source.chat_type != "dm"
            and source.thread_id
            and not getattr(self.config, "thread_sessions_per_user", False)
        )
        if _is_shared_thread and source.user_name:
            message_text = f"[{source.user_name}] {message_text}"

        if event.media_urls:
            image_paths = []
            audio_paths = []
            for i, path in enumerate(event.media_urls):
                mtype = event.media_types[i] if i < len(event.media_types) else ""
                if mtype.startswith("image/") or event.message_type == MessageType.PHOTO:
                    image_paths.append(path)
                if mtype.startswith("audio/") or event.message_type in (MessageType.VOICE, MessageType.AUDIO):
                    audio_paths.append(path)

            if image_paths:
                message_text = await self._enrich_message_with_vision(
                    message_text,
                    image_paths,
                )

            if audio_paths:
                message_text = await self._enrich_message_with_transcription(
                    message_text,
                    audio_paths,
                )
                _stt_fail_markers = (
                    "No STT provider",
                    "STT is disabled",
                    "can't listen",
                    "VOICE_TOOLS_OPENAI_KEY",
                )
                if any(marker in message_text for marker in _stt_fail_markers):
                    _stt_adapter = self.adapters.get(source.platform)
                    _stt_meta = {"thread_id": source.thread_id} if source.thread_id else None
                    if _stt_adapter:
                        try:
                            _stt_msg = (
                                "🎤 I received your voice message but can't transcribe it — "
                                "no speech-to-text provider is configured.\n\n"
                                "To enable voice: install faster-whisper "
                                "(`pip install faster-whisper` in the Hermes venv) "
                                "and set `stt.enabled: true` in config.yaml, "
                                "then /restart the gateway."
                            )
                            if self._has_setup_skill():
                                _stt_msg += "\n\nFor full setup instructions, type: `/skill hermes-agent-setup`"
                            await _stt_adapter.send(
                                source.chat_id,
                                _stt_msg,
                                metadata=_stt_meta,
                            )
                        except Exception:
                            pass

        if event.media_urls and event.message_type == MessageType.DOCUMENT:
            import mimetypes as _mimetypes

            _TEXT_EXTENSIONS = {".txt", ".md", ".csv", ".log", ".json", ".xml", ".yaml", ".yml", ".toml", ".ini", ".cfg"}
            for i, path in enumerate(event.media_urls):
                mtype = event.media_types[i] if i < len(event.media_types) else ""
                if mtype in ("", "application/octet-stream"):
                    import os as _os2

                    _ext = _os2.path.splitext(path)[1].lower()
                    if _ext in _TEXT_EXTENSIONS:
                        mtype = "text/plain"
                    else:
                        guessed, _ = _mimetypes.guess_type(path)
                        if guessed:
                            mtype = guessed
                if not mtype.startswith(("application/", "text/")):
                    continue

                import os as _os
                import re as _re

                basename = _os.path.basename(path)
                parts = basename.split("_", 2)
                display_name = parts[2] if len(parts) >= 3 else basename
                display_name = _re.sub(r'[^\w.\- ]', '_', display_name)

                if mtype.startswith("text/"):
                    context_note = (
                        f"[The user sent a text document: '{display_name}'. "
                        f"Its content has been included below. "
                        f"The file is also saved at: {path}]"
                    )
                else:
                    context_note = (
                        f"[The user sent a document: '{display_name}'. "
                        f"The file is saved at: {path}. "
                        f"Ask the user what they'd like you to do with it.]"
                    )
                message_text = f"{context_note}\n\n{message_text}"

        if getattr(event, "reply_to_text", None) and event.reply_to_message_id:
            reply_snippet = event.reply_to_text[:500]
            found_in_history = any(
                reply_snippet[:200] in (msg.get("content") or "")
                for msg in history
                if msg.get("role") in ("assistant", "user", "tool")
            )
            if not found_in_history:
                message_text = f'[Replying to: "{reply_snippet}"]\n\n{message_text}'

        if "@" in message_text:
            try:
                from agent.context_references import preprocess_context_references_async
                from agent.model_metadata import get_model_context_length

                _msg_cwd = os.environ.get("TERMINAL_CWD", os.path.expanduser("~"))
                _msg_ctx_len = get_model_context_length(
                    self._model,
                    base_url=self._base_url or "",
                )
                _ctx_result = await preprocess_context_references_async(
                    message_text,
                    cwd=_msg_cwd,
                    context_length=_msg_ctx_len,
                    allowed_root=_msg_cwd,
                )
                if _ctx_result.blocked:
                    _adapter = self.adapters.get(source.platform)
                    if _adapter:
                        await _adapter.send(
                            source.chat_id,
                            "\n".join(_ctx_result.warnings) or "Context injection refused.",
                        )
                    return None
                if _ctx_result.expanded:
                    message_text = _ctx_result.message
            except Exception as exc:
                logger.debug("@ context reference expansion failed: %s", exc)

        return message_text

    async def _handle_message_with_agent(self, event, source, _quick_key: str):
        """Inner handler that runs under the _running_agents sentinel guard."""
        _msg_start_time = time.time()
        _platform_name = source.platform.value if hasattr(source.platform, "value") else str(source.platform)
        _msg_preview = (event.text or "")[:80].replace("\n", " ")
        logger.info(
            "inbound message: platform=%s user=%s chat=%s msg=%r",
            _platform_name, source.user_name or source.user_id or "unknown",
            source.chat_id or "unknown", _msg_preview,
        )

        # Get or create session
        session_entry = self.session_store.get_or_create_session(source)
        session_key = session_entry.session_key
        
        # Emit session:start for new or auto-reset sessions
        _is_new_session = (
            session_entry.created_at == session_entry.updated_at
            or getattr(session_entry, "was_auto_reset", False)
        )
        if _is_new_session:
            await self.hooks.emit("session:start", {
                "platform": source.platform.value if source.platform else "",
                "user_id": source.user_id,
                "session_id": session_entry.session_id,
                "session_key": session_key,
            })
        
        # Build session context
        context = build_session_context(source, self.config, session_entry)
        
        # Set session context variables for tools (task-local, concurrency-safe)
        _session_env_tokens = self._set_session_env(context)
        
        # Read privacy.redact_pii from config (re-read per message)
        _redact_pii = False
        try:
            import yaml as _pii_yaml
            with open(_config_path, encoding="utf-8") as _pf:
                _pcfg = _pii_yaml.safe_load(_pf) or {}
            _redact_pii = bool((_pcfg.get("privacy") or {}).get("redact_pii", False))
        except Exception:
            pass

        # Build the context prompt to inject
        context_prompt = build_session_context_prompt(context, redact_pii=_redact_pii)
        
        # If the previous session expired and was auto-reset, prepend a notice
        # so the agent knows this is a fresh conversation (not an intentional /reset).
        if getattr(session_entry, 'was_auto_reset', False):
            reset_reason = getattr(session_entry, 'auto_reset_reason', None) or 'idle'
            if reset_reason == "suspended":
                context_note = "[System note: The user's previous session was stopped and suspended. This is a fresh conversation with no prior context.]"
            elif reset_reason == "daily":
                context_note = "[System note: The user's session was automatically reset by the daily schedule. This is a fresh conversation with no prior context.]"
            else:
                context_note = "[System note: The user's previous session expired due to inactivity. This is a fresh conversation with no prior context.]"
            context_prompt = context_note + "\n\n" + context_prompt

            # Send a user-facing notification explaining the reset, unless:
            # - notifications are disabled in config
            # - the platform is excluded (e.g. api_server, webhook)
            # - the expired session had no activity (nothing was cleared)
            try:
                policy = self.session_store.config.get_reset_policy(
                    platform=source.platform,
                    session_type=getattr(source, 'chat_type', 'dm'),
                )
                platform_name = source.platform.value if source.platform else ""
                had_activity = getattr(session_entry, 'reset_had_activity', False)
                # Suspended sessions always notify (they were explicitly stopped
                # or crashed mid-operation) — skip the policy check.
                should_notify = reset_reason == "suspended" or (
                    policy.notify
                    and had_activity
                    and platform_name not in policy.notify_exclude_platforms
                )
                if should_notify:
                    adapter = self.adapters.get(source.platform)
                    if adapter:
                        if reset_reason == "suspended":
                            reason_text = "previous session was stopped or interrupted"
                        elif reset_reason == "daily":
                            reason_text = f"daily schedule at {policy.at_hour}:00"
                        else:
                            hours = policy.idle_minutes // 60
                            mins = policy.idle_minutes % 60
                            duration = f"{hours}h" if not mins else f"{hours}h {mins}m" if hours else f"{mins}m"
                            reason_text = f"inactive for {duration}"
                        notice = (
                            f"◐ Session automatically reset ({reason_text}). "
                            f"Conversation history cleared.\n"
                            f"Use /resume to browse and restore a previous session.\n"
                            f"Adjust reset timing in config.yaml under session_reset."
                        )
                        try:
                            session_info = self._format_session_info()
                            if session_info:
                                notice = f"{notice}\n\n{session_info}"
                        except Exception:
                            pass
                        await adapter.send(
                            source.chat_id, notice,
                            metadata=getattr(event, 'metadata', None),
                        )
            except Exception as e:
                logger.debug("Auto-reset notification failed (non-fatal): %s", e)

            session_entry.was_auto_reset = False
            session_entry.auto_reset_reason = None

        # Auto-load skill(s) for topic/channel bindings (Telegram DM Topics,
        # Discord channel_skill_bindings).  Supports a single name or ordered list.
        # Only inject on NEW sessions — ongoing conversations already have the
        # skill content in their conversation history from the first message.
        _auto = getattr(event, "auto_skill", None)
        if _is_new_session and _auto:
            _skill_names = [_auto] if isinstance(_auto, str) else list(_auto)
            try:
                from agent.skill_commands import _load_skill_payload, _build_skill_message
                _combined_parts: list[str] = []
                _loaded_names: list[str] = []
                for _sname in _skill_names:
                    _loaded = _load_skill_payload(_sname, task_id=_quick_key)
                    if _loaded:
                        _loaded_skill, _skill_dir, _display_name = _loaded
                        _note = (
                            f'[SYSTEM: The "{_display_name}" skill is auto-loaded. '
                            f"Follow its instructions for this session.]"
                        )
                        _part = _build_skill_message(_loaded_skill, _skill_dir, _note)
                        if _part:
                            _combined_parts.append(_part)
                            _loaded_names.append(_sname)
                    else:
                        logger.warning("[Gateway] Auto-skill '%s' not found", _sname)
                if _combined_parts:
                    # Append the user's original text after all skill payloads
                    _combined_parts.append(event.text)
                    event.text = "\n\n".join(_combined_parts)
                    logger.info(
                        "[Gateway] Auto-loaded skill(s) %s for session %s",
                        _loaded_names, session_key,
                    )
            except Exception as e:
                logger.warning("[Gateway] Failed to auto-load skill(s) %s: %s", _skill_names, e)

        # Load conversation history from transcript
        history = self.session_store.load_transcript(session_entry.session_id)
        
        # -----------------------------------------------------------------
        # Session hygiene: auto-compress pathologically large transcripts
        #
        # Long-lived gateway sessions can accumulate enough history that
        # every new message rehydrates an oversized transcript, causing
        # repeated truncation/context failures.  Detect this early and
        # compress proactively — before the agent even starts.  (#628)
        #
        # Token source priority:
        # 1. Actual API-reported prompt_tokens from the last turn
        #    (stored in session_entry.last_prompt_tokens)
        # 2. Rough char-based estimate (str(msg)//4). Overestimates
        #    by 30-50% on code/JSON-heavy sessions, but that just
        #    means hygiene fires a bit early — safe and harmless.
        # -----------------------------------------------------------------
        if history and len(history) >= 4:
            from agent.model_metadata import (
                estimate_messages_tokens_rough,
                get_model_context_length,
            )

            # Read model + compression config from config.yaml.
            # NOTE: hygiene threshold is intentionally HIGHER than the agent's
            # own compressor (0.85 vs 0.50).  Hygiene is a safety net for
            # sessions that grew too large between turns — it fires pre-agent
            # to prevent API failures.  The agent's own compressor handles
            # normal context management during its tool loop with accurate
            # real token counts.  Having hygiene at 0.50 caused premature
            # compression on every turn in long gateway sessions.
            _hyg_model = "anthropic/claude-sonnet-4.6"
            _hyg_threshold_pct = 0.85
            _hyg_compression_enabled = True
            _hyg_config_context_length = None
            _hyg_provider = None
            _hyg_base_url = None
            _hyg_api_key = None
            _hyg_data = {}
            try:
                _hyg_cfg_path = _hermes_home / "config.yaml"
                if _hyg_cfg_path.exists():
                    import yaml as _hyg_yaml
                    with open(_hyg_cfg_path, encoding="utf-8") as _hyg_f:
                        _hyg_data = _hyg_yaml.safe_load(_hyg_f) or {}

                    # Resolve model name (same logic as run_sync)
                    _model_cfg = _hyg_data.get("model", {})
                    if isinstance(_model_cfg, str):
                        _hyg_model = _model_cfg
                    elif isinstance(_model_cfg, dict):
                        _hyg_model = _model_cfg.get("default") or _model_cfg.get("model") or _hyg_model
                        # Read explicit context_length override from model config
                        # (same as run_agent.py lines 995-1005)
                        _raw_ctx = _model_cfg.get("context_length")
                        if _raw_ctx is not None:
                            try:
                                _hyg_config_context_length = int(_raw_ctx)
                            except (TypeError, ValueError):
                                pass
                        # Read provider for accurate context detection
                        _hyg_provider = _model_cfg.get("provider") or None
                        _hyg_base_url = _model_cfg.get("base_url") or None

                    # Read compression settings — only use enabled flag.
                    # The threshold is intentionally separate from the agent's
                    # compression.threshold (hygiene runs higher).
                    _comp_cfg = _hyg_data.get("compression", {})
                    if isinstance(_comp_cfg, dict):
                        _hyg_compression_enabled = str(
                            _comp_cfg.get("enabled", True)
                        ).lower() in ("true", "1", "yes")

                try:
                    _hyg_model, _hyg_runtime = self._resolve_session_agent_runtime(
                        source=source,
                        session_key=session_key,
                        user_config=_hyg_data if isinstance(_hyg_data, dict) else None,
                    )
                    _hyg_provider = _hyg_runtime.get("provider") or _hyg_provider
                    _hyg_base_url = _hyg_runtime.get("base_url") or _hyg_base_url
                    _hyg_api_key = _hyg_runtime.get("api_key") or _hyg_api_key
                except Exception:
                    pass

                # Check custom_providers per-model context_length
                # (same fallback as run_agent.py lines 1171-1189).
                # Must run after runtime resolution so _hyg_base_url is set.
                if _hyg_config_context_length is None and _hyg_base_url:
                    try:
                        try:
                            from hermes_cli.config import get_compatible_custom_providers as _gw_gcp
                            _hyg_custom_providers = _gw_gcp(_hyg_data)
                        except Exception:
                            _hyg_custom_providers = _hyg_data.get("custom_providers")
                            if not isinstance(_hyg_custom_providers, list):
                                _hyg_custom_providers = []
                        for _cp in _hyg_custom_providers:
                            if not isinstance(_cp, dict):
                                continue
                            _cp_url = (_cp.get("base_url") or "").rstrip("/")
                            if _cp_url and _cp_url == _hyg_base_url.rstrip("/"):
                                _cp_models = _cp.get("models", {})
                                if isinstance(_cp_models, dict):
                                    _cp_model_cfg = _cp_models.get(_hyg_model, {})
                                    if isinstance(_cp_model_cfg, dict):
                                        _cp_ctx = _cp_model_cfg.get("context_length")
                                        if _cp_ctx is not None:
                                            _hyg_config_context_length = int(_cp_ctx)
                                break
                    except (TypeError, ValueError):
                        pass
            except Exception:
                pass

            if _hyg_compression_enabled:
                _hyg_context_length = get_model_context_length(
                    _hyg_model,
                    base_url=_hyg_base_url or "",
                    api_key=_hyg_api_key or "",
                    config_context_length=_hyg_config_context_length,
                    provider=_hyg_provider or "",
                )
                _compress_token_threshold = int(
                    _hyg_context_length * _hyg_threshold_pct
                )
                _warn_token_threshold = int(_hyg_context_length * 0.95)

                _msg_count = len(history)

                # Prefer actual API-reported tokens from the last turn
                # (stored in session entry) over the rough char-based estimate.
                _stored_tokens = session_entry.last_prompt_tokens
                if _stored_tokens > 0:
                    _approx_tokens = _stored_tokens
                    _token_source = "actual"
                else:
                    _approx_tokens = estimate_messages_tokens_rough(history)
                    _token_source = "estimated"
                    # Note: rough estimates overestimate by 30-50% for code/JSON-heavy
                    # sessions, but that just means hygiene fires a bit early — which
                    # is safe and harmless.  The 85% threshold already provides ample
                    # headroom (agent's own compressor runs at 50%).  A previous 1.4x
                    # multiplier tried to compensate by inflating the threshold, but
                    # 85% * 1.4 = 119% of context — which exceeds the model's limit
                    # and prevented hygiene from ever firing for ~200K models (GLM-5).

                # Hard safety valve: force compression if message count is
                # extreme, regardless of token estimates.  This breaks the
                # death spiral where API disconnects prevent token data
                # collection, which prevents compression, which causes more
                # disconnects.  400 messages is well above normal sessions
                # but catches runaway growth before it becomes unrecoverable.
                # (#2153)
                _HARD_MSG_LIMIT = 400
                _needs_compress = (
                    _approx_tokens >= _compress_token_threshold
                    or _msg_count >= _HARD_MSG_LIMIT
                )

                if _needs_compress:
                    logger.info(
                        "Session hygiene: %s messages, ~%s tokens (%s) — auto-compressing "
                        "(threshold: %s%% of %s = %s tokens)",
                        _msg_count, f"{_approx_tokens:,}", _token_source,
                        int(_hyg_threshold_pct * 100),
                        f"{_hyg_context_length:,}",
                        f"{_compress_token_threshold:,}",
                    )

                    _hyg_meta = {"thread_id": source.thread_id} if source.thread_id else None

                    try:
                        from run_agent import AIAgent

                        _hyg_model, _hyg_runtime = self._resolve_session_agent_runtime(
                            source=source,
                            session_key=session_key,
                            user_config=_hyg_data if isinstance(_hyg_data, dict) else None,
                        )
                        if _hyg_runtime.get("api_key"):
                            _hyg_msgs = [
                                {"role": m.get("role"), "content": m.get("content")}
                                for m in history
                                if m.get("role") in ("user", "assistant")
                                and m.get("content")
                            ]

                            if len(_hyg_msgs) >= 4:
                                _hyg_agent = AIAgent(
                                    **_hyg_runtime,
                                    model=_hyg_model,
                                    max_iterations=4,
                                    quiet_mode=True,
                                    skip_memory=True,
                                    enabled_toolsets=["memory"],
                                    session_id=session_entry.session_id,
                                )
                                try:
                                    _hyg_agent._print_fn = lambda *a, **kw: None

                                    loop = asyncio.get_running_loop()
                                    _compressed, _ = await loop.run_in_executor(
                                        None,
                                        lambda: _hyg_agent._compress_context(
                                            _hyg_msgs, "",
                                            approx_tokens=_approx_tokens,
                                        ),
                                    )

                                    # _compress_context ends the old session and creates
                                    # a new session_id.  Write compressed messages into
                                    # the NEW session so the old transcript stays intact
                                    # and searchable via session_search.
                                    _hyg_new_sid = _hyg_agent.session_id
                                    if _hyg_new_sid != session_entry.session_id:
                                        session_entry.session_id = _hyg_new_sid
                                        self.session_store._save()

                                    self.session_store.rewrite_transcript(
                                        session_entry.session_id, _compressed
                                    )
                                    # Reset stored token count — transcript was rewritten
                                    session_entry.last_prompt_tokens = 0
                                    history = _compressed
                                    _new_count = len(_compressed)
                                    _new_tokens = estimate_messages_tokens_rough(
                                        _compressed
                                    )

                                    logger.info(
                                        "Session hygiene: compressed %s → %s msgs, "
                                        "~%s → ~%s tokens",
                                        _msg_count, _new_count,
                                        f"{_approx_tokens:,}", f"{_new_tokens:,}",
                                    )

                                    if _new_tokens >= _warn_token_threshold:
                                        logger.warning(
                                            "Session hygiene: still ~%s tokens after "
                                            "compression",
                                            f"{_new_tokens:,}",
                                        )
                                finally:
                                    self._cleanup_agent_resources(_hyg_agent)

                    except Exception as e:
                        logger.warning(
                            "Session hygiene auto-compress failed: %s", e
                        )

        # First-message onboarding -- only on the very first interaction ever
        if not history and not self.session_store.has_any_sessions():
            context_prompt += (
                "\n\n[System note: This is the user's very first message ever. "
                "Briefly introduce yourself and mention that /help shows available commands. "
                "Keep the introduction concise -- one or two sentences max.]"
            )
        
        # One-time prompt if no home channel is set for this platform
        # Skip for webhooks - they deliver directly to configured targets (github_comment, etc.)
        if not history and source.platform and source.platform != Platform.LOCAL and source.platform != Platform.WEBHOOK:
            platform_name = source.platform.value
            env_key = f"{platform_name.upper()}_HOME_CHANNEL"
            if not os.getenv(env_key):
                adapter = self.adapters.get(source.platform)
                if adapter:
                    await adapter.send(
                        source.chat_id,
                        f"📬 No home channel is set for {platform_name.title()}. "
                        f"A home channel is where Hermes delivers cron job results "
                        f"and cross-platform messages.\n\n"
                        f"Type /sethome to make this chat your home channel, "
                        f"or ignore to skip."
                    )
        
        # -----------------------------------------------------------------
        # Voice channel awareness — inject current voice channel state
        # into context so the agent knows who is in the channel and who
        # is speaking, without needing a separate tool call.
        # -----------------------------------------------------------------
        if source.platform == Platform.DISCORD:
            adapter = self.adapters.get(Platform.DISCORD)
            guild_id = self._get_guild_id(event)
            if guild_id and adapter and hasattr(adapter, "get_voice_channel_context"):
                vc_context = adapter.get_voice_channel_context(guild_id)
                if vc_context:
                    context_prompt += f"\n\n{vc_context}"

        # -----------------------------------------------------------------
        # Auto-analyze images sent by the user
        #
        # If the user attached image(s), we run the vision tool eagerly so
        # the conversation model always receives a text description.  The
        # local file path is also included so the model can re-examine the
        # image later with a more targeted question via vision_analyze.
        #
        # We filter to image paths only (by media_type) so that non-image
        # attachments (documents, audio, etc.) are not sent to the vision
        # tool even when they appear in the same message.
        # -----------------------------------------------------------------
        message_text = await self._prepare_inbound_message_text(
            event=event,
            source=source,
            history=history,
        )
        if message_text is None:
            return

        try:
            # Emit agent:start hook
            hook_ctx = {
                "platform": source.platform.value if source.platform else "",
                "user_id": source.user_id,
                "session_id": session_entry.session_id,
                "message": message_text[:500],
            }
            await self.hooks.emit("agent:start", hook_ctx)

            # Run the agent
            agent_result = await self._run_agent(
                message=message_text,
                context_prompt=context_prompt,
                history=history,
                source=source,
                session_id=session_entry.session_id,
                session_key=session_key,
                event_message_id=event.message_id,
                channel_prompt=event.channel_prompt,
            )

            # Stop persistent typing indicator now that the agent is done
            try:
                _typing_adapter = self.adapters.get(source.platform)
                if _typing_adapter and hasattr(_typing_adapter, "stop_typing"):
                    await _typing_adapter.stop_typing(source.chat_id)
            except Exception:
                pass

            response = agent_result.get("final_response") or ""

            # Convert the agent's internal "(empty)" sentinel into a
            # user-friendly message.  "(empty)" means the model failed to
            # produce visible content after exhausting all retries (nudge,
            # prefill, empty-retry, fallback).  Sending the raw sentinel
            # looks like a bug; a short explanation is more helpful.
            if response == "(empty)":
                response = (
                    "⚠️ The model returned no response after processing tool "
                    "results. This can happen with some models — try again or "
                    "rephrase your question."
                )
            agent_messages = agent_result.get("messages", [])
            _response_time = time.time() - _msg_start_time
            _api_calls = agent_result.get("api_calls", 0)
            _resp_len = len(response)
            logger.info(
                "response ready: platform=%s chat=%s time=%.1fs api_calls=%d response=%d chars",
                _platform_name, source.chat_id or "unknown",
                _response_time, _api_calls, _resp_len,
            )

            # Successful turn — clear any stuck-loop counter for this session.
            # This ensures the counter only accumulates across CONSECUTIVE
            # restarts where the session was active (never completed).
            if session_key:
                self._clear_restart_failure_count(session_key)

            # Surface error details when the agent failed silently (final_response=None)
            if not response and agent_result.get("failed"):
                error_detail = agent_result.get("error", "unknown error")
                error_str = str(error_detail).lower()

                # Detect context-overflow failures and give specific guidance.
                # Generic 400 "Error" from Anthropic with large sessions is the
                # most common cause of this (#1630).
                _is_ctx_fail = any(p in error_str for p in (
                    "context", "token", "too large", "too long",
                    "exceed", "payload",
                )) or (
                    "400" in error_str
                    and len(history) > 50
                )

                if _is_ctx_fail:
                    response = (
                        "⚠️ Session too large for the model's context window.\n"
                        "Use /compact to compress the conversation, or "
                        "/reset to start fresh."
                    )
                else:
                    response = (
                        f"The request failed: {str(error_detail)[:300]}\n"
                        "Try again or use /reset to start a fresh session."
                    )

            # If the agent's session_id changed during compression, update
            # session_entry so transcript writes below go to the right session.
            if agent_result.get("session_id") and agent_result["session_id"] != session_entry.session_id:
                session_entry.session_id = agent_result["session_id"]

            # Prepend reasoning/thinking if display is enabled (per-platform)
            try:
                from gateway.display_config import resolve_display_setting as _rds
                _show_reasoning_effective = _rds(
                    _load_gateway_config(),
                    _platform_config_key(source.platform),
                    "show_reasoning",
                    getattr(self, "_show_reasoning", False),
                )
            except Exception:
                _show_reasoning_effective = getattr(self, "_show_reasoning", False)
            if _show_reasoning_effective and response:
                last_reasoning = agent_result.get("last_reasoning")
                if last_reasoning:
                    # Collapse long reasoning to keep messages readable
                    lines = last_reasoning.strip().splitlines()
                    if len(lines) > 15:
                        display_reasoning = "\n".join(lines[:15])
                        display_reasoning += f"\n_... ({len(lines) - 15} more lines)_"
                    else:
                        display_reasoning = last_reasoning.strip()
                    response = f"💭 **Reasoning:**\n```\n{display_reasoning}\n```\n\n{response}"

            # Emit agent:end hook
            await self.hooks.emit("agent:end", {
                **hook_ctx,
                "response": (response or "")[:500],
            })
            
            # Check for pending process watchers (check_interval on background processes)
            try:
                from tools.process_registry import process_registry
                while process_registry.pending_watchers:
                    watcher = process_registry.pending_watchers.pop(0)
                    asyncio.create_task(self._run_process_watcher(watcher))
            except Exception as e:
                logger.error("Process watcher setup error: %s", e)

            # Drain watch pattern notifications that arrived during the agent run.
            # Watch events and completions share the same queue; completions are
            # already handled by the per-process watcher task above, so we only
            # inject watch-type events here.
            try:
                from tools.process_registry import process_registry as _pr
                _watch_events = []
                while not _pr.completion_queue.empty():
                    evt = _pr.completion_queue.get_nowait()
                    evt_type = evt.get("type", "completion")
                    if evt_type in ("watch_match", "watch_disabled"):
                        _watch_events.append(evt)
                    # else: completion events are handled by the watcher task
                for evt in _watch_events:
                    synth_text = _format_gateway_process_notification(evt)
                    if synth_text:
                        try:
                            await self._inject_watch_notification(synth_text, evt)
                        except Exception as e2:
                            logger.error("Watch notification injection error: %s", e2)
            except Exception as e:
                logger.debug("Watch queue drain error: %s", e)

            # NOTE: Dangerous command approvals are now handled inline by the
            # blocking gateway approval mechanism in tools/approval.py.  The agent
            # thread blocks until the user responds with /approve or /deny, so by
            # the time we reach here the approval has already been resolved.  The
            # old post-loop pop_pending + approval_hint code was removed in favour
            # of the blocking approach that mirrors CLI's synchronous input().
            
            # Save the full conversation to the transcript, including tool calls.
            # This preserves the complete agent loop (tool_calls, tool results,
            # intermediate reasoning) so sessions can be resumed with full context
            # and transcripts are useful for debugging and training data.
            #
            # IMPORTANT: When the agent failed (e.g. context-overflow 400,
            # compression exhausted), do NOT persist the user's message.
            # Persisting it would make the session even larger, causing the
            # same failure on the next attempt — an infinite loop. (#1630, #9893)
            agent_failed_early = bool(agent_result.get("failed"))
            if agent_failed_early:
                logger.info(
                    "Skipping transcript persistence for failed request in "
                    "session %s to prevent session growth loop.",
                    session_entry.session_id,
                )

            # When compression is exhausted, the session is permanently too
            # large to process.  Auto-reset it so the next message starts
            # fresh instead of replaying the same oversized context in an
            # infinite fail loop.  (#9893)
            if agent_result.get("compression_exhausted") and session_entry and session_key:
                logger.info(
                    "Auto-resetting session %s after compression exhaustion.",
                    session_entry.session_id,
                )
                self.session_store.reset_session(session_key)
                self._evict_cached_agent(session_key)
                self._session_model_overrides.pop(session_key, None)
                response = (response or "") + (
                    "\n\n🔄 Session auto-reset — the conversation exceeded the "
                    "maximum context size and could not be compressed further. "
                    "Your next message will start a fresh session."
                )

            ts = datetime.now().isoformat()
            
            # If this is a fresh session (no history), write the full tool
            # definitions as the first entry so the transcript is self-describing
            # -- the same list of dicts sent as tools=[...] in the API request.
            if agent_failed_early:
                pass  # Skip all transcript writes — don't grow a broken session
            elif not history:
                tool_defs = agent_result.get("tools", [])
                self.session_store.append_to_transcript(
                    session_entry.session_id,
                    {
                        "role": "session_meta",
                        "tools": tool_defs or [],
                        "model": _resolve_gateway_model(),
                        "platform": source.platform.value if source.platform else "",
                        "timestamp": ts,
                    }
                )
            
            # Find only the NEW messages from this turn (skip history we loaded).
            # Use the filtered history length (history_offset) that was actually
            # passed to the agent, not len(history) which includes session_meta
            # entries that were stripped before the agent saw them.
            if not agent_failed_early:
                history_len = agent_result.get("history_offset", len(history))
                new_messages = agent_messages[history_len:] if len(agent_messages) > history_len else []
                
                # If no new messages found (edge case), fall back to simple user/assistant
                if not new_messages:
                    self.session_store.append_to_transcript(
                        session_entry.session_id,
                        {"role": "user", "content": message_text, "timestamp": ts}
                    )
                    if response:
                        self.session_store.append_to_transcript(
                            session_entry.session_id,
                            {"role": "assistant", "content": response, "timestamp": ts}
                        )
                else:
                    # The agent already persisted these messages to SQLite via
                    # _flush_messages_to_session_db(), so skip the DB write here
                    # to prevent the duplicate-write bug (#860).  We still write
                    # to JSONL for backward compatibility and as a backup.
                    agent_persisted = self._session_db is not None
                    for msg in new_messages:
                        # Skip system messages (they're rebuilt each run)
                        if msg.get("role") == "system":
                            continue
                        # Add timestamp to each message for debugging
                        entry = {**msg, "timestamp": ts}
                        self.session_store.append_to_transcript(
                            session_entry.session_id, entry,
                            skip_db=agent_persisted,
                        )
            
            # Token counts and model are now persisted by the agent directly.
            # Keep only last_prompt_tokens here for context-window tracking and
            # compression decisions.
            self.session_store.update_session(
                session_entry.session_key,
                last_prompt_tokens=agent_result.get("last_prompt_tokens", 0),
            )

            # Auto voice reply: send TTS audio before the text response
            _already_sent = bool(agent_result.get("already_sent"))
            if self._should_send_voice_reply(event, response, agent_messages, already_sent=_already_sent):
                await self._send_voice_reply(event, response)

            # If streaming already delivered the response, extract and
            # deliver any MEDIA: files before returning None.  Streaming
            # sends raw text chunks that include MEDIA: tags — the normal
            # post-processing in _process_message_background is skipped
            # when already_sent is True, so media files would never be
            # delivered without this.
            #
            # Never skip when the agent failed — the error message is new
            # content the user hasn't seen (streaming only sent earlier
            # partial output before the failure).  Without this guard,
            # users see the agent "stop responding without explanation."
            if agent_result.get("already_sent") and not agent_result.get("failed"):
                if response:
                    _media_adapter = self.adapters.get(source.platform)
                    if _media_adapter:
                        await self._deliver_media_from_response(
                            response, event, _media_adapter,
                        )
                return None

            return response
            
        except Exception as e:
            # Stop typing indicator on error too
            try:
                _err_adapter = self.adapters.get(source.platform)
                if _err_adapter and hasattr(_err_adapter, "stop_typing"):
                    await _err_adapter.stop_typing(source.chat_id)
            except Exception:
                pass
            logger.exception("Agent error in session %s", session_key)
            error_type = type(e).__name__
            error_detail = str(e)[:300] if str(e) else "no details available"
            status_hint = ""
            status_code = getattr(e, "status_code", None)
            _hist_len = len(history) if 'history' in locals() else 0
            if status_code == 401:
                status_hint = " Check your API key or run `claude /login` to refresh OAuth credentials."
            elif status_code == 402:
                status_hint = " Your API balance or quota is exhausted. Check your provider dashboard."
            elif status_code == 429:
                # Check if this is a plan usage limit (resets on a schedule) vs a transient rate limit
                _err_body = getattr(e, "response", None)
                _err_json = {}
                try:
                    if _err_body is not None:
                        _err_json = _err_body.json().get("error", {})
                except Exception:
                    pass
                if _err_json.get("type") == "usage_limit_reached":
                    _resets_in = _err_json.get("resets_in_seconds")
                    if _resets_in and _resets_in > 0:
                        import math
                        _hours = math.ceil(_resets_in / 3600)
                        status_hint = f" Your plan's usage limit has been reached. It resets in ~{_hours}h."
                    else:
                        status_hint = " Your plan's usage limit has been reached. Please wait until it resets."
                else:
                    status_hint = " You are being rate-limited. Please wait a moment and try again."
            elif status_code == 529:
                status_hint = " The API is temporarily overloaded. Please try again shortly."
            elif status_code in (400, 500):
                # 400 with a large session is context overflow.
                # 500 with a large session often means the payload is too large
                # for the API to process — treat it the same way.
                if _hist_len > 50:
                    return (
                        "⚠️ Session too large for the model's context window.\n"
                        "Use /compact to compress the conversation, or "
                        "/reset to start fresh."
                    )
                elif status_code == 400:
                    status_hint = " The request was rejected by the API."
            return (
                f"Sorry, I encountered an error ({error_type}).\n"
                f"{error_detail}\n"
                f"{status_hint}"
                "Try again or use /reset to start a fresh session."
            )
        finally:
            # Restore session context variables to their pre-handler state
            self._clear_session_env(_session_env_tokens)
    
    def _format_session_info(self) -> str:
        """Resolve current model config and return a formatted info block.

        Surfaces model, provider, context length, and endpoint so gateway
        users can immediately see if context detection went wrong (e.g.
        local models falling to the 128K default).
        """
        from agent.model_metadata import get_model_context_length, DEFAULT_FALLBACK_CONTEXT

        model = _resolve_gateway_model()
        config_context_length = None
        provider = None
        base_url = None
        api_key = None

        try:
            cfg_path = _hermes_home / "config.yaml"
            if cfg_path.exists():
                import yaml as _info_yaml
                with open(cfg_path, encoding="utf-8") as f:
                    data = _info_yaml.safe_load(f) or {}
                model_cfg = data.get("model", {})
                if isinstance(model_cfg, dict):
                    raw_ctx = model_cfg.get("context_length")
                    if raw_ctx is not None:
                        try:
                            config_context_length = int(raw_ctx)
                        except (TypeError, ValueError):
                            pass
                    provider = model_cfg.get("provider") or None
                    base_url = model_cfg.get("base_url") or None
        except Exception:
            pass

        # Resolve runtime credentials for probing
        try:
            runtime = _resolve_runtime_agent_kwargs()
            provider = provider or runtime.get("provider")
            base_url = base_url or runtime.get("base_url")
            api_key = runtime.get("api_key")
        except Exception:
            pass

        context_length = get_model_context_length(
            model,
            base_url=base_url or "",
            api_key=api_key or "",
            config_context_length=config_context_length,
            provider=provider or "",
        )

        # Format context source hint
        if config_context_length is not None:
            ctx_source = "config"
        elif context_length == DEFAULT_FALLBACK_CONTEXT:
            ctx_source = "default — set model.context_length in config to override"
        else:
            ctx_source = "detected"

        # Format context length for display
        if context_length >= 1_000_000:
            ctx_display = f"{context_length / 1_000_000:.1f}M"
        elif context_length >= 1_000:
            ctx_display = f"{context_length // 1_000}K"
        else:
            ctx_display = str(context_length)

        lines = [
            f"◆ Model: `{model}`",
            f"◆ Provider: {provider or 'openrouter'}",
            f"◆ Context: {ctx_display} tokens ({ctx_source})",
        ]

        # Show endpoint for local/custom setups
        if base_url and ("localhost" in base_url or "127.0.0.1" in base_url or "0.0.0.0" in base_url):
            lines.append(f"◆ Endpoint: {base_url}")

        return "\n".join(lines)

    async def _handle_reset_command(self, event: MessageEvent) -> str:
        """Handle /new or /reset command."""
        source = event.source
        
        # Get existing session key
        session_key = self._session_key_for_source(source)
        
        # Flush memories in the background (fire-and-forget) so the user
        # gets the "Session reset!" response immediately.
        try:
            old_entry = self.session_store._entries.get(session_key)
            if old_entry:
                _flush_task = asyncio.create_task(
                    self._async_flush_memories(old_entry.session_id, session_key)
                )
                self._background_tasks.add(_flush_task)
                _flush_task.add_done_callback(self._background_tasks.discard)
        except Exception as e:
            logger.debug("Gateway memory flush on reset failed: %s", e)
        # Close tool resources on the old agent (terminal sandboxes, browser
        # daemons, background processes) before evicting from cache.
        # Guard with getattr because test fixtures may skip __init__.
        _cache_lock = getattr(self, "_agent_cache_lock", None)
        if _cache_lock is not None:
            with _cache_lock:
                _cached = self._agent_cache.get(session_key)
                _old_agent = _cached[0] if isinstance(_cached, tuple) else _cached if _cached else None
            if _old_agent is not None:
                self._cleanup_agent_resources(_old_agent)
        self._evict_cached_agent(session_key)

        try:
            from tools.env_passthrough import clear_env_passthrough
            clear_env_passthrough()
        except Exception:
            pass

        try:
            from tools.credential_files import clear_credential_files
            clear_credential_files()
        except Exception:
            pass

        # Reset the session
        new_entry = self.session_store.reset_session(session_key)

        # Clear any session-scoped model override so the next agent picks up
        # the configured default instead of the previously switched model.
        self._session_model_overrides.pop(session_key, None)

        # Fire plugin on_session_finalize hook (session boundary)
        try:
            from hermes_cli.plugins import invoke_hook as _invoke_hook
            _old_sid = old_entry.session_id if old_entry else None
            _invoke_hook("on_session_finalize", session_id=_old_sid,
                         platform=source.platform.value if source.platform else "")
        except Exception:
            pass

        # Emit session:end hook (session is ending)
        await self.hooks.emit("session:end", {
            "platform": source.platform.value if source.platform else "",
            "user_id": source.user_id,
            "session_key": session_key,
        })

        # Emit session:reset hook
        await self.hooks.emit("session:reset", {
            "platform": source.platform.value if source.platform else "",
            "user_id": source.user_id,
            "session_key": session_key,
        })

        # Resolve session config info to surface to the user
        try:
            session_info = self._format_session_info()
        except Exception:
            session_info = ""

        if new_entry:
            header = "✨ Session reset! Starting fresh."
        else:
            # No existing session, just create one
            new_entry = self.session_store.get_or_create_session(source, force_new=True)
            header = "✨ New session started!"

        # Fire plugin on_session_reset hook (new session guaranteed to exist)
        try:
            from hermes_cli.plugins import invoke_hook as _invoke_hook
            _new_sid = new_entry.session_id if new_entry else None
            _invoke_hook("on_session_reset", session_id=_new_sid,
                         platform=source.platform.value if source.platform else "")
        except Exception:
            pass

        # Append a random tip to the reset message
        try:
            from hermes_cli.tips import get_random_tip
            _tip_line = f"\n✦ Tip: {get_random_tip()}"
        except Exception:
            _tip_line = ""

        if session_info:
            return f"{header}\n\n{session_info}{_tip_line}"
        return f"{header}{_tip_line}"
    
    async def _handle_profile_command(self, event: MessageEvent) -> str:
        """Handle /profile — show active profile name and home directory."""
        from hermes_constants import display_hermes_home
        from hermes_cli.profiles import get_active_profile_name

        display = display_hermes_home()
        profile_name = get_active_profile_name()

        lines = [
            f"👤 **Profile:** `{profile_name}`",
            f"📂 **Home:** `{display}`",
        ]

        return "\n".join(lines)

    async def _handle_status_command(self, event: MessageEvent) -> str:
        """Handle /status command."""
        source = event.source
        session_entry = self.session_store.get_or_create_session(source)

        connected_platforms = [p.value for p in self.adapters.keys()]

        # Check if there's an active agent
        session_key = session_entry.session_key
        is_running = session_key in self._running_agents

        title = None
        if self._session_db:
            try:
                title = self._session_db.get_session_title(session_entry.session_id)
            except Exception:
                title = None

        lines = [
            "📊 **Hermes Gateway Status**",
            "",
            f"**Session ID:** `{session_entry.session_id}`",
        ]
        if title:
            lines.append(f"**Title:** {title}")
        lines.extend([
            f"**Created:** {session_entry.created_at.strftime('%Y-%m-%d %H:%M')}",
            f"**Last Activity:** {session_entry.updated_at.strftime('%Y-%m-%d %H:%M')}",
            f"**Tokens:** {session_entry.total_tokens:,}",
            f"**Agent Running:** {'Yes ⚡' if is_running else 'No'}",
            "",
            f"**Connected Platforms:** {', '.join(connected_platforms)}",
        ])

        return "\n".join(lines)
    
    async def _handle_stop_command(self, event: MessageEvent) -> str:
        """Handle /stop command - interrupt a running agent.

        When an agent is truly hung (blocked thread that never checks
        _interrupt_requested), the early intercept in _handle_message()
        handles /stop before this method is reached.  This handler fires
        only through normal command dispatch (no running agent) or as a
        fallback.  Force-clean the session lock in all cases for safety.

        The session is preserved so the user can continue the conversation.
        """
        source = event.source
        session_entry = self.session_store.get_or_create_session(source)
        session_key = session_entry.session_key

        agent = self._running_agents.get(session_key)
        if agent is _AGENT_PENDING_SENTINEL:
            # Force-clean the sentinel so the session is unlocked.
            if session_key in self._running_agents:
                del self._running_agents[session_key]
            logger.info("STOP (pending) for session %s — sentinel cleared", session_key[:20])
            return "⚡ Stopped. The agent hadn't started yet — you can continue this session."
        if agent:
            agent.interrupt("Stop requested")
            # Force-clean the session lock so a truly hung agent doesn't
            # keep it locked forever.
            if session_key in self._running_agents:
                del self._running_agents[session_key]
            return "⚡ Stopped. You can continue this session."
        else:
            return "No active task to stop."

    async def _handle_restart_command(self, event: MessageEvent) -> str:
        """Handle /restart command - drain active work, then restart the gateway."""
        if self._restart_requested or self._draining:
            count = self._running_agent_count()
            if count:
                return f"⏳ Draining {count} active agent(s) before restart..."
            return "⏳ Gateway restart already in progress..."

        # Save the requester's routing info so the new gateway process can
        # notify them once it comes back online.
        try:
            import json as _json
            notify_data = {
                "platform": event.source.platform.value if event.source.platform else None,
                "chat_id": event.source.chat_id,
            }
            if event.source.thread_id:
                notify_data["thread_id"] = event.source.thread_id
            (_hermes_home / ".restart_notify.json").write_text(
                _json.dumps(notify_data)
            )
        except Exception as e:
            logger.debug("Failed to write restart notify file: %s", e)

        active_agents = self._running_agent_count()
        # When running under a service manager (systemd/launchd), use the
        # service restart path: exit with code 75 so the service manager
        # restarts us.  The detached subprocess approach (setsid + bash)
        # doesn't work under systemd because KillMode=mixed kills all
        # processes in the cgroup, including the detached helper.
        _under_service = bool(os.environ.get("INVOCATION_ID"))  # systemd sets this
        if _under_service:
            self.request_restart(detached=False, via_service=True)
        else:
            self.request_restart(detached=True, via_service=False)
        if active_agents:
            return f"⏳ Draining {active_agents} active agent(s) before restart..."
        return "♻ Restarting gateway. If you aren't notified within 60 seconds, restart from the console with `hermes gateway restart`."

    async def _handle_help_command(self, event: MessageEvent) -> str:
        """Handle /help command - list available commands."""
        from hermes_cli.commands import gateway_help_lines
        lines = [
            "📖 **Hermes Commands**\n",
            *gateway_help_lines(),
        ]
        try:
            from agent.skill_commands import get_skill_commands
            skill_cmds = get_skill_commands()
            if skill_cmds:
                lines.append(f"\n⚡ **Skill Commands** ({len(skill_cmds)} active):")
                # Show first 10, then point to /commands for the rest
                sorted_cmds = sorted(skill_cmds)
                for cmd in sorted_cmds[:10]:
                    lines.append(f"`{cmd}` — {skill_cmds[cmd]['description']}")
                if len(sorted_cmds) > 10:
                    lines.append(f"\n... and {len(sorted_cmds) - 10} more. Use `/commands` for the full paginated list.")
        except Exception:
            pass
        return "\n".join(lines)

    async def _handle_commands_command(self, event: MessageEvent) -> str:
        """Handle /commands [page] - paginated list of all commands and skills."""
        from hermes_cli.commands import gateway_help_lines

        raw_args = event.get_command_args().strip()
        if raw_args:
            try:
                requested_page = int(raw_args)
            except ValueError:
                return "Usage: `/commands [page]`"
        else:
            requested_page = 1

        # Build combined entry list: built-in commands + skill commands
        entries = list(gateway_help_lines())
        try:
            from agent.skill_commands import get_skill_commands
            skill_cmds = get_skill_commands()
            if skill_cmds:
                entries.append("")
                entries.append("⚡ **Skill Commands**:")
                for cmd in sorted(skill_cmds):
                    desc = skill_cmds[cmd].get("description", "").strip() or "Skill command"
                    entries.append(f"`{cmd}` — {desc}")
        except Exception:
            pass

        if not entries:
            return "No commands available."

        from gateway.config import Platform
        page_size = 15 if event.source.platform == Platform.TELEGRAM else 20
        total_pages = max(1, (len(entries) + page_size - 1) // page_size)
        page = max(1, min(requested_page, total_pages))
        start = (page - 1) * page_size
        page_entries = entries[start:start + page_size]

        lines = [
            f"📚 **Commands** ({len(entries)} total, page {page}/{total_pages})",
            "",
            *page_entries,
        ]
        if total_pages > 1:
            nav_parts = []
            if page > 1:
                nav_parts.append(f"`/commands {page - 1}` ← prev")
            if page < total_pages:
                nav_parts.append(f"next → `/commands {page + 1}`")
            lines.extend(["", " | ".join(nav_parts)])
        if page != requested_page:
            lines.append(f"_(Requested page {requested_page} was out of range, showing page {page}.)_")
        return "\n".join(lines)
    
    async def _handle_model_command(self, event: MessageEvent) -> Optional[str]:
        """Handle /model command — switch model for this session.

        Supports:
          /model                              — interactive picker (Telegram/Discord) or text list
          /model <name>                       — switch for this session only
          /model <name> --global              — switch and persist to config.yaml
          /model <name> --provider <provider> — switch provider + model
          /model --provider <provider>        — switch to provider, auto-detect model
        """
        import yaml
        from hermes_cli.model_switch import (
            switch_model as _switch_model, parse_model_flags,
            list_authenticated_providers,
        )
        from hermes_cli.providers import get_label

        raw_args = event.get_command_args().strip()

        # Parse --provider and --global flags
        model_input, explicit_provider, persist_global = parse_model_flags(raw_args)

        # Read current model/provider from config
        current_model = ""
        current_provider = "openrouter"
        current_base_url = ""
        current_api_key = ""
        user_provs = None
        custom_provs = None
        config_path = _hermes_home / "config.yaml"
        try:
            if config_path.exists():
                with open(config_path, encoding="utf-8") as f:
                    cfg = yaml.safe_load(f) or {}
                model_cfg = cfg.get("model", {})
                if isinstance(model_cfg, dict):
                    current_model = model_cfg.get("default", "")
                    current_provider = model_cfg.get("provider", current_provider)
                    current_base_url = model_cfg.get("base_url", "")
                user_provs = cfg.get("providers")
                try:
                    from hermes_cli.config import get_compatible_custom_providers
                    custom_provs = get_compatible_custom_providers(cfg)
                except Exception:
                    custom_provs = cfg.get("custom_providers")
        except Exception:
            pass

        # Check for session override
        source = event.source
        session_key = self._session_key_for_source(source)
        override = self._session_model_overrides.get(session_key, {})
        if override:
            current_model = override.get("model", current_model)
            current_provider = override.get("provider", current_provider)
            current_base_url = override.get("base_url", current_base_url)
            current_api_key = override.get("api_key", current_api_key)

        # No args: show interactive picker (Telegram/Discord) or text list
        if not model_input and not explicit_provider:
            # Try interactive picker if the platform supports it
            adapter = self.adapters.get(source.platform)
            has_picker = (
                adapter is not None
                and getattr(type(adapter), "send_model_picker", None) is not None
            )

            if has_picker:
                try:
                    providers = list_authenticated_providers(
                        current_provider=current_provider,
                        user_providers=user_provs,
                        custom_providers=custom_provs,
                        max_models=50,
                    )
                except Exception:
                    providers = []

                if providers:
                    # Build a callback closure for when the user picks a model.
                    # Captures self + locals needed for the switch logic.
                    _self = self
                    _session_key = session_key
                    _cur_model = current_model
                    _cur_provider = current_provider
                    _cur_base_url = current_base_url
                    _cur_api_key = current_api_key

                    async def _on_model_selected(
                        _chat_id: str, model_id: str, provider_slug: str
                    ) -> str:
                        """Perform the model switch and return confirmation text."""
                        result = _switch_model(
                            raw_input=model_id,
                            current_provider=_cur_provider,
                            current_model=_cur_model,
                            current_base_url=_cur_base_url,
                            current_api_key=_cur_api_key,
                            is_global=False,
                            explicit_provider=provider_slug,
                            user_providers=user_provs,
                            custom_providers=custom_provs,
                        )
                        if not result.success:
                            return f"Error: {result.error_message}"

                        # Update cached agent in-place
                        cached_entry = None
                        _cache_lock = getattr(_self, "_agent_cache_lock", None)
                        _cache = getattr(_self, "_agent_cache", None)
                        if _cache_lock and _cache is not None:
                            with _cache_lock:
                                cached_entry = _cache.get(_session_key)
                        if cached_entry and cached_entry[0] is not None:
                            try:
                                cached_entry[0].switch_model(
                                    new_model=result.new_model,
                                    new_provider=result.target_provider,
                                    api_key=result.api_key,
                                    base_url=result.base_url,
                                    api_mode=result.api_mode,
                                )
                            except Exception as exc:
                                logger.warning("Picker model switch failed for cached agent: %s", exc)

                        # Store model note + session override
                        if not hasattr(_self, "_pending_model_notes"):
                            _self._pending_model_notes = {}
                        _self._pending_model_notes[_session_key] = (
                            f"[Note: model was just switched from {_cur_model} to {result.new_model} "
                            f"via {result.provider_label or result.target_provider}. "
                            f"Adjust your self-identification accordingly.]"
                        )
                        _self._session_model_overrides[_session_key] = {
                            "model": result.new_model,
                            "provider": result.target_provider,
                            "api_key": result.api_key,
                            "base_url": result.base_url,
                            "api_mode": result.api_mode,
                        }

                        # Evict cached agent so the next turn creates a fresh
                        # agent from the override rather than relying on the
                        # stale cache signature to trigger a rebuild.
                        _self._evict_cached_agent(_session_key)

                        # Build confirmation text
                        plabel = result.provider_label or result.target_provider
                        lines = [f"Model switched to `{result.new_model}`"]
                        lines.append(f"Provider: {plabel}")
                        mi = result.model_info
                        if mi:
                            if mi.context_window:
                                lines.append(f"Context: {mi.context_window:,} tokens")
                            if mi.max_output:
                                lines.append(f"Max output: {mi.max_output:,} tokens")
                            if mi.has_cost_data():
                                lines.append(f"Cost: {mi.format_cost()}")
                            lines.append(f"Capabilities: {mi.format_capabilities()}")
                        lines.append("_(session only — use `/model <name> --global` to persist)_")
                        return "\n".join(lines)

                    metadata = {"thread_id": source.thread_id} if source.thread_id else None
                    result = await adapter.send_model_picker(
                        chat_id=source.chat_id,
                        providers=providers,
                        current_model=current_model,
                        current_provider=current_provider,
                        session_key=session_key,
                        on_model_selected=_on_model_selected,
                        metadata=metadata,
                    )
                    if result.success:
                        return None  # Picker sent — adapter handles the response

            # Fallback: text list (for platforms without picker or if picker failed)
            provider_label = get_label(current_provider)
            lines = [f"Current: `{current_model or 'unknown'}` on {provider_label}", ""]

            try:
                providers = list_authenticated_providers(
                    current_provider=current_provider,
                    user_providers=user_provs,
                    custom_providers=custom_provs,
                    max_models=5,
                )
                for p in providers:
                    tag = " (current)" if p["is_current"] else ""
                    lines.append(f"**{p['name']}** `--provider {p['slug']}`{tag}:")
                    if p["models"]:
                        model_strs = ", ".join(f"`{m}`" for m in p["models"])
                        extra = f" (+{p['total_models'] - len(p['models'])} more)" if p["total_models"] > len(p["models"]) else ""
                        lines.append(f"  {model_strs}{extra}")
                    elif p.get("api_url"):
                        lines.append(f"  `{p['api_url']}`")
                    lines.append("")
            except Exception:
                pass

            lines.append("`/model <name>` — switch model")
            lines.append("`/model <name> --provider <slug>` — switch provider")
            lines.append("`/model <name> --global` — persist")
            return "\n".join(lines)

        # Perform the switch
        result = _switch_model(
            raw_input=model_input,
            current_provider=current_provider,
            current_model=current_model,
            current_base_url=current_base_url,
            current_api_key=current_api_key,
            is_global=persist_global,
            explicit_provider=explicit_provider,
            user_providers=user_provs,
            custom_providers=custom_provs,
        )

        if not result.success:
            return f"Error: {result.error_message}"

        # If there's a cached agent, update it in-place
        cached_entry = None
        _cache_lock = getattr(self, "_agent_cache_lock", None)
        _cache = getattr(self, "_agent_cache", None)
        if _cache_lock and _cache is not None:
            with _cache_lock:
                cached_entry = _cache.get(session_key)

        if cached_entry and cached_entry[0] is not None:
            try:
                cached_entry[0].switch_model(
                    new_model=result.new_model,
                    new_provider=result.target_provider,
                    api_key=result.api_key,
                    base_url=result.base_url,
                    api_mode=result.api_mode,
                )
            except Exception as exc:
                logger.warning("In-place model switch failed for cached agent: %s", exc)

        # Store a note to prepend to the next user message so the model
        # knows about the switch (avoids system messages mid-history).
        if not hasattr(self, "_pending_model_notes"):
            self._pending_model_notes = {}
        self._pending_model_notes[session_key] = (
            f"[Note: model was just switched from {current_model} to {result.new_model} "
            f"via {result.provider_label or result.target_provider}. "
            f"Adjust your self-identification accordingly.]"
        )

        # Store session override so next agent creation uses the new model
        self._session_model_overrides[session_key] = {
            "model": result.new_model,
            "provider": result.target_provider,
            "api_key": result.api_key,
            "base_url": result.base_url,
            "api_mode": result.api_mode,
        }

        # Evict cached agent so the next turn creates a fresh agent from the
        # override rather than relying on cache signature mismatch detection.
        self._evict_cached_agent(session_key)

        # Persist to config if --global
        if persist_global:
            try:
                if config_path.exists():
                    with open(config_path, encoding="utf-8") as f:
                        cfg = yaml.safe_load(f) or {}
                else:
                    cfg = {}
                model_cfg = cfg.setdefault("model", {})
                model_cfg["default"] = result.new_model
                model_cfg["provider"] = result.target_provider
                if result.base_url:
                    model_cfg["base_url"] = result.base_url
                from hermes_cli.config import save_config
                save_config(cfg)
            except Exception as e:
                logger.warning("Failed to persist model switch: %s", e)

        # Build confirmation message with full metadata
        provider_label = result.provider_label or result.target_provider
        lines = [f"Model switched to `{result.new_model}`"]
        lines.append(f"Provider: {provider_label}")

        # Rich metadata from models.dev
        mi = result.model_info
        if mi:
            if mi.context_window:
                lines.append(f"Context: {mi.context_window:,} tokens")
            if mi.max_output:
                lines.append(f"Max output: {mi.max_output:,} tokens")
            if mi.has_cost_data():
                lines.append(f"Cost: {mi.format_cost()}")
            lines.append(f"Capabilities: {mi.format_capabilities()}")
        else:
            try:
                from agent.model_metadata import get_model_context_length
                ctx = get_model_context_length(
                    result.new_model,
                    base_url=result.base_url or current_base_url,
                    api_key=result.api_key or current_api_key,
                    provider=result.target_provider,
                )
                lines.append(f"Context: {ctx:,} tokens")
            except Exception:
                pass

        # Cache notice
        cache_enabled = (
            ("openrouter" in (result.base_url or "").lower() and "claude" in result.new_model.lower())
            or result.api_mode == "anthropic_messages"
        )
        if cache_enabled:
            lines.append("Prompt caching: enabled")

        if result.warning_message:
            lines.append(f"Warning: {result.warning_message}")

        if persist_global:
            lines.append("Saved to config.yaml (`--global`)")
        else:
            lines.append("_(session only -- add `--global` to persist)_")

        return "\n".join(lines)

    async def _handle_provider_command(self, event: MessageEvent) -> str:
        """Handle /provider command - show available providers."""
        import yaml
        from hermes_cli.models import (
            list_available_providers,
            normalize_provider,
            _PROVIDER_LABELS,
        )

        # Resolve current provider from config
        current_provider = "openrouter"
        model_cfg = {}
        config_path = _hermes_home / 'config.yaml'
        try:
            if config_path.exists():
                with open(config_path, encoding="utf-8") as f:
                    cfg = yaml.safe_load(f) or {}
                model_cfg = cfg.get("model", {})
                if isinstance(model_cfg, dict):
                    current_provider = model_cfg.get("provider", current_provider)
        except Exception:
            pass

        current_provider = normalize_provider(current_provider)
        if current_provider == "auto":
            try:
                from hermes_cli.auth import resolve_provider as _resolve_provider
                current_provider = _resolve_provider(current_provider)
            except Exception:
                current_provider = "openrouter"

        # Detect custom endpoint from config base_url
        if current_provider == "openrouter":
            _cfg_base = model_cfg.get("base_url", "") if isinstance(model_cfg, dict) else ""
            if _cfg_base and "openrouter.ai" not in _cfg_base:
                current_provider = "custom"

        current_label = _PROVIDER_LABELS.get(current_provider, current_provider)

        lines = [
            f"🔌 **Current provider:** {current_label} (`{current_provider}`)",
            "",
            "**Available providers:**",
        ]

        providers = list_available_providers()
        for p in providers:
            marker = " ← active" if p["id"] == current_provider else ""
            auth = "✅" if p["authenticated"] else "❌"
            aliases = f"  _(also: {', '.join(p['aliases'])})_" if p["aliases"] else ""
            lines.append(f"{auth} `{p['id']}` — {p['label']}{aliases}{marker}")

        lines.append("")
        lines.append("Switch: `/model provider:model-name`")
        lines.append("Setup: `hermes setup`")
        return "\n".join(lines)
    
    async def _handle_personality_command(self, event: MessageEvent) -> str:
        """Handle /personality command - list or set a personality."""
        import yaml
        from hermes_constants import display_hermes_home

        args = event.get_command_args().strip().lower()
        config_path = _hermes_home / 'config.yaml'

        try:
            if config_path.exists():
                with open(config_path, 'r', encoding="utf-8") as f:
                    config = yaml.safe_load(f) or {}
                personalities = config.get("agent", {}).get("personalities", {})
            else:
                config = {}
                personalities = {}
        except Exception:
            config = {}
            personalities = {}

        if not personalities:
            return f"No personalities configured in `{display_hermes_home()}/config.yaml`"

        if not args:
            lines = ["🎭 **Available Personalities**\n"]
            lines.append("• `none` — (no personality overlay)")
            for name, prompt in personalities.items():
                if isinstance(prompt, dict):
                    preview = prompt.get("description") or prompt.get("system_prompt", "")[:50]
                else:
                    preview = prompt[:50] + "..." if len(prompt) > 50 else prompt
                lines.append(f"• `{name}` — {preview}")
            lines.append("\nUsage: `/personality <name>`")
            return "\n".join(lines)

        def _resolve_prompt(value):
            if isinstance(value, dict):
                parts = [value.get("system_prompt", "")]
                if value.get("tone"):
                    parts.append(f'Tone: {value["tone"]}')
                if value.get("style"):
                    parts.append(f'Style: {value["style"]}')
                return "\n".join(p for p in parts if p)
            return str(value)

        if args in ("none", "default", "neutral"):
            try:
                if "agent" not in config or not isinstance(config.get("agent"), dict):
                    config["agent"] = {}
                config["agent"]["system_prompt"] = ""
                atomic_yaml_write(config_path, config)
            except Exception as e:
                return f"⚠️ Failed to save personality change: {e}"
            self._ephemeral_system_prompt = ""
            return "🎭 Personality cleared — using base agent behavior.\n_(takes effect on next message)_"
        elif args in personalities:
            new_prompt = _resolve_prompt(personalities[args])

            # Write to config.yaml, same pattern as CLI save_config_value.
            try:
                if "agent" not in config or not isinstance(config.get("agent"), dict):
                    config["agent"] = {}
                config["agent"]["system_prompt"] = new_prompt
                atomic_yaml_write(config_path, config)
            except Exception as e:
                return f"⚠️ Failed to save personality change: {e}"

            # Update in-memory so it takes effect on the very next message.
            self._ephemeral_system_prompt = new_prompt

            return f"🎭 Personality set to **{args}**\n_(takes effect on next message)_"

        available = "`none`, " + ", ".join(f"`{n}`" for n in personalities)
        return f"Unknown personality: `{args}`\n\nAvailable: {available}"
    
    async def _handle_retry_command(self, event: MessageEvent) -> str:
        """Handle /retry command - re-send the last user message."""
        source = event.source
        session_entry = self.session_store.get_or_create_session(source)
        history = self.session_store.load_transcript(session_entry.session_id)
        
        # Find the last user message
        last_user_msg = None
        last_user_idx = None
        for i in range(len(history) - 1, -1, -1):
            if history[i].get("role") == "user":
                last_user_msg = history[i].get("content", "")
                last_user_idx = i
                break
        
        if not last_user_msg:
            return "No previous message to retry."
        
        # Truncate history to before the last user message and persist
        truncated = history[:last_user_idx]
        self.session_store.rewrite_transcript(session_entry.session_id, truncated)
        # Reset stored token count — transcript was truncated
        session_entry.last_prompt_tokens = 0
        
        # Re-send by creating a fake text event with the old message
        retry_event = MessageEvent(
            text=last_user_msg,
            message_type=MessageType.TEXT,
            source=source,
            raw_message=event.raw_message,
            channel_prompt=event.channel_prompt,
        )
        
        # Let the normal message handler process it
        return await self._handle_message(retry_event)
    
    async def _handle_undo_command(self, event: MessageEvent) -> str:
        """Handle /undo command - remove the last user/assistant exchange."""
        source = event.source
        session_entry = self.session_store.get_or_create_session(source)
        history = self.session_store.load_transcript(session_entry.session_id)
        
        # Find the last user message and remove everything from it onward
        last_user_idx = None
        for i in range(len(history) - 1, -1, -1):
            if history[i].get("role") == "user":
                last_user_idx = i
                break
        
        if last_user_idx is None:
            return "Nothing to undo."
        
        removed_msg = history[last_user_idx].get("content", "")
        removed_count = len(history) - last_user_idx
        self.session_store.rewrite_transcript(session_entry.session_id, history[:last_user_idx])
        # Reset stored token count — transcript was truncated
        session_entry.last_prompt_tokens = 0
        
        preview = removed_msg[:40] + "..." if len(removed_msg) > 40 else removed_msg
        return f"↩️ Undid {removed_count} message(s).\nRemoved: \"{preview}\""
    
    async def _handle_set_home_command(self, event: MessageEvent) -> str:
        """Handle /sethome command -- set the current chat as the platform's home channel."""
        source = event.source
        platform_name = source.platform.value if source.platform else "unknown"
        chat_id = source.chat_id
        chat_name = source.chat_name or chat_id
        
        env_key = f"{platform_name.upper()}_HOME_CHANNEL"
        
        # Save to config.yaml
        try:
            import yaml
            config_path = _hermes_home / 'config.yaml'
            user_config = {}
            if config_path.exists():
                with open(config_path, encoding="utf-8") as f:
                    user_config = yaml.safe_load(f) or {}
            user_config[env_key] = chat_id
            atomic_yaml_write(config_path, user_config)
            # Also set in the current environment so it takes effect immediately
            os.environ[env_key] = str(chat_id)
        except Exception as e:
            return f"Failed to save home channel: {e}"
        
        return (
            f"✅ Home channel set to **{chat_name}** (ID: {chat_id}).\n"
            f"Cron jobs and cross-platform messages will be delivered here."
        )
    
    @staticmethod
    def _get_guild_id(event: MessageEvent) -> Optional[int]:
        """Extract Discord guild_id from the raw message object."""
        raw = getattr(event, "raw_message", None)
        if raw is None:
            return None
        # Slash command interaction
        if hasattr(raw, "guild_id") and raw.guild_id:
            return int(raw.guild_id)
        # Regular message
        if hasattr(raw, "guild") and raw.guild:
            return raw.guild.id
        return None

    async def _handle_voice_command(self, event: MessageEvent) -> str:
        """Handle /voice [on|off|tts|channel|leave|status] command."""
        args = event.get_command_args().strip().lower()
        chat_id = event.source.chat_id

        adapter = self.adapters.get(event.source.platform)

        if args in ("on", "enable"):
            self._voice_mode[chat_id] = "voice_only"
            self._save_voice_modes()
            if adapter:
                self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=False)
            return (
                "Voice mode enabled.\n"
                "I'll reply with voice when you send voice messages.\n"
                "Use /voice tts to get voice replies for all messages."
            )
        elif args in ("off", "disable"):
            self._voice_mode[chat_id] = "off"
            self._save_voice_modes()
            if adapter:
                self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=True)
            return "Voice mode disabled. Text-only replies."
        elif args == "tts":
            self._voice_mode[chat_id] = "all"
            self._save_voice_modes()
            if adapter:
                self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=False)
            return (
                "Auto-TTS enabled.\n"
                "All replies will include a voice message."
            )
        elif args in ("channel", "join"):
            return await self._handle_voice_channel_join(event)
        elif args == "leave":
            return await self._handle_voice_channel_leave(event)
        elif args == "status":
            mode = self._voice_mode.get(chat_id, "off")
            labels = {
                "off": "Off (text only)",
                "voice_only": "On (voice reply to voice messages)",
                "all": "TTS (voice reply to all messages)",
            }
            # Append voice channel info if connected
            adapter = self.adapters.get(event.source.platform)
            guild_id = self._get_guild_id(event)
            if guild_id and hasattr(adapter, "get_voice_channel_info"):
                info = adapter.get_voice_channel_info(guild_id)
                if info:
                    lines = [
                        f"Voice mode: {labels.get(mode, mode)}",
                        f"Voice channel: #{info['channel_name']}",
                        f"Participants: {info['member_count']}",
                    ]
                    for m in info["members"]:
                        status = " (speaking)" if m.get("is_speaking") else ""
                        lines.append(f"  - {m['display_name']}{status}")
                    return "\n".join(lines)
            return f"Voice mode: {labels.get(mode, mode)}"
        else:
            # Toggle: off → on, on/all → off
            current = self._voice_mode.get(chat_id, "off")
            if current == "off":
                self._voice_mode[chat_id] = "voice_only"
                self._save_voice_modes()
                if adapter:
                    self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=False)
                return "Voice mode enabled."
            else:
                self._voice_mode[chat_id] = "off"
                self._save_voice_modes()
                if adapter:
                    self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=True)
                return "Voice mode disabled."

    async def _handle_voice_channel_join(self, event: MessageEvent) -> str:
        """Join the user's current Discord voice channel."""
        adapter = self.adapters.get(event.source.platform)
        if not hasattr(adapter, "join_voice_channel"):
            return "Voice channels are not supported on this platform."

        guild_id = self._get_guild_id(event)
        if not guild_id:
            return "This command only works in a Discord server."

        voice_channel = await adapter.get_user_voice_channel(
            guild_id, event.source.user_id
        )
        if not voice_channel:
            return "You need to be in a voice channel first."

        # Wire callbacks BEFORE join so voice input arriving immediately
        # after connection is not lost.
        if hasattr(adapter, "_voice_input_callback"):
            adapter._voice_input_callback = self._handle_voice_channel_input
        if hasattr(adapter, "_on_voice_disconnect"):
            adapter._on_voice_disconnect = self._handle_voice_timeout_cleanup

        try:
            success = await adapter.join_voice_channel(voice_channel)
        except Exception as e:
            logger.warning("Failed to join voice channel: %s", e)
            adapter._voice_input_callback = None
            err_lower = str(e).lower()
            if "pynacl" in err_lower or "nacl" in err_lower or "davey" in err_lower:
                return (
                    "Voice dependencies are missing (PyNaCl / davey). "
                    "Install or reinstall Hermes with the messaging extra, e.g. "
                    "`pip install hermes-agent[messaging]`."
                )
            return f"Failed to join voice channel: {e}"

        if success:
            adapter._voice_text_channels[guild_id] = int(event.source.chat_id)
            if hasattr(adapter, "_voice_sources"):
                adapter._voice_sources[guild_id] = event.source.to_dict()
            self._voice_mode[event.source.chat_id] = "all"
            self._save_voice_modes()
            self._set_adapter_auto_tts_disabled(adapter, event.source.chat_id, disabled=False)
            return (
                f"Joined voice channel **{voice_channel.name}**.\n"
                f"I'll speak my replies and listen to you. Use /voice leave to disconnect."
            )
        # Join failed — clear callback
        adapter._voice_input_callback = None
        return "Failed to join voice channel. Check bot permissions (Connect + Speak)."

    async def _handle_voice_channel_leave(self, event: MessageEvent) -> str:
        """Leave the Discord voice channel."""
        adapter = self.adapters.get(event.source.platform)
        guild_id = self._get_guild_id(event)

        if not guild_id or not hasattr(adapter, "leave_voice_channel"):
            return "Not in a voice channel."

        if not hasattr(adapter, "is_in_voice_channel") or not adapter.is_in_voice_channel(guild_id):
            return "Not in a voice channel."

        try:
            await adapter.leave_voice_channel(guild_id)
        except Exception as e:
            logger.warning("Error leaving voice channel: %s", e)
        # Always clean up state even if leave raised an exception
        self._voice_mode[event.source.chat_id] = "off"
        self._save_voice_modes()
        self._set_adapter_auto_tts_disabled(adapter, event.source.chat_id, disabled=True)
        if hasattr(adapter, "_voice_input_callback"):
            adapter._voice_input_callback = None
        return "Left voice channel."

    def _handle_voice_timeout_cleanup(self, chat_id: str) -> None:
        """Called by the adapter when a voice channel times out.

        Cleans up runner-side voice_mode state that the adapter cannot reach.
        """
        self._voice_mode[chat_id] = "off"
        self._save_voice_modes()
        adapter = self.adapters.get(Platform.DISCORD)
        self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=True)

    async def _handle_voice_channel_input(
        self, guild_id: int, user_id: int, transcript: str
    ):
        """Handle transcribed voice from a user in a voice channel.

        Creates a synthetic MessageEvent and processes it through the
        adapter's full message pipeline (session, typing, agent, TTS reply).
        """
        adapter = self.adapters.get(Platform.DISCORD)
        if not adapter:
            return

        text_ch_id = adapter._voice_text_channels.get(guild_id)
        if not text_ch_id:
            return

        # Build source — reuse the linked text channel's metadata when available
        # so voice input shares the same session as the bound text conversation.
        source_data = getattr(adapter, "_voice_sources", {}).get(guild_id)
        if source_data:
            source = SessionSource.from_dict(source_data)
            source.user_id = str(user_id)
            source.user_name = str(user_id)
        else:
            source = SessionSource(
                platform=Platform.DISCORD,
                chat_id=str(text_ch_id),
                user_id=str(user_id),
                user_name=str(user_id),
                chat_type="channel",
            )

        # Check authorization before processing voice input
        if not self._is_user_authorized(source):
            logger.debug("Unauthorized voice input from user %d, ignoring", user_id)
            return

        # Show transcript in text channel (after auth, with mention sanitization)
        try:
            channel = adapter._client.get_channel(text_ch_id)
            if channel:
                safe_text = transcript[:2000].replace("@everyone", "@\u200beveryone").replace("@here", "@\u200bhere")
                await channel.send(f"**[Voice]** <@{user_id}>: {safe_text}")
        except Exception:
            pass

        # Build a synthetic MessageEvent and feed through the normal pipeline
        # Use SimpleNamespace as raw_message so _get_guild_id() can extract
        # guild_id and _send_voice_reply() plays audio in the voice channel.
        from types import SimpleNamespace
        event = MessageEvent(
            source=source,
            text=transcript,
            message_type=MessageType.VOICE,
            raw_message=SimpleNamespace(guild_id=guild_id, guild=None),
        )

        await adapter.handle_message(event)

    def _should_send_voice_reply(
        self,
        event: MessageEvent,
        response: str,
        agent_messages: list,
        already_sent: bool = False,
    ) -> bool:
        """Decide whether the runner should send a TTS voice reply.

        Returns False when:
        - voice_mode is off for this chat
        - response is empty or an error
        - agent already called text_to_speech tool (dedup)
        - voice input and base adapter auto-TTS already handled it (skip_double)
          UNLESS streaming already consumed the response (already_sent=True),
          in which case the base adapter won't have text for auto-TTS so the
          runner must handle it.
        """
        if not response or response.startswith("Error:"):
            return False

        chat_id = event.source.chat_id
        voice_mode = self._voice_mode.get(chat_id, "off")
        is_voice_input = (event.message_type == MessageType.VOICE)

        should = (
            (voice_mode == "all")
            or (voice_mode == "voice_only" and is_voice_input)
        )
        if not should:
            return False

        # Dedup: agent already called TTS tool
        has_agent_tts = any(
            msg.get("role") == "assistant"
            and any(
                tc.get("function", {}).get("name") == "text_to_speech"
                for tc in (msg.get("tool_calls") or [])
            )
            for msg in agent_messages
        )
        if has_agent_tts:
            return False

        # Dedup: base adapter auto-TTS already handles voice input
        # (play_tts plays in VC when connected, so runner can skip).
        # When streaming already delivered the text (already_sent=True),
        # the base adapter will receive None and can't run auto-TTS,
        # so the runner must take over.
        if is_voice_input and not already_sent:
            return False

        return True

    async def _send_voice_reply(self, event: MessageEvent, text: str) -> None:
        """Generate TTS audio and send as a voice message before the text reply."""
        import uuid as _uuid
        audio_path = None
        actual_path = None
        try:
            from tools.tts_tool import text_to_speech_tool, _strip_markdown_for_tts

            tts_text = _strip_markdown_for_tts(text[:4000])
            if not tts_text:
                return

            # Use .mp3 extension so edge-tts conversion to opus works correctly.
            # The TTS tool may convert to .ogg — use file_path from result.
            audio_path = os.path.join(
                tempfile.gettempdir(), "hermes_voice",
                f"tts_reply_{_uuid.uuid4().hex[:12]}.mp3",
            )
            os.makedirs(os.path.dirname(audio_path), exist_ok=True)

            result_json = await asyncio.to_thread(
                text_to_speech_tool, text=tts_text, output_path=audio_path
            )
            result = json.loads(result_json)

            # Use the actual file path from result (may differ after opus conversion)
            actual_path = result.get("file_path", audio_path)
            if not result.get("success") or not os.path.isfile(actual_path):
                logger.warning("Auto voice reply TTS failed: %s", result.get("error"))
                return

            adapter = self.adapters.get(event.source.platform)

            # If connected to a voice channel, play there instead of sending a file
            guild_id = self._get_guild_id(event)
            if (guild_id
                    and hasattr(adapter, "play_in_voice_channel")
                    and hasattr(adapter, "is_in_voice_channel")
                    and adapter.is_in_voice_channel(guild_id)):
                await adapter.play_in_voice_channel(guild_id, actual_path)
            elif adapter and hasattr(adapter, "send_voice"):
                send_kwargs: Dict[str, Any] = {
                    "chat_id": event.source.chat_id,
                    "audio_path": actual_path,
                    "reply_to": event.message_id,
                }
                if event.source.thread_id:
                    send_kwargs["metadata"] = {"thread_id": event.source.thread_id}
                await adapter.send_voice(**send_kwargs)
        except Exception as e:
            logger.warning("Auto voice reply failed: %s", e, exc_info=True)
        finally:
            for p in {audio_path, actual_path} - {None}:
                try:
                    os.unlink(p)
                except OSError:
                    pass

    async def _deliver_media_from_response(
        self,
        response: str,
        event: MessageEvent,
        adapter,
    ) -> None:
        """Extract MEDIA: tags and local file paths from a response and deliver them.

        Called after streaming has already sent the text to the user, so the
        text itself is already delivered — this only handles file attachments
        that the normal _process_message_background path would have caught.
        """
        from pathlib import Path

        try:
            media_files, _ = adapter.extract_media(response)
            _, cleaned = adapter.extract_images(response)
            local_files, _ = adapter.extract_local_files(cleaned)

            _thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None

            _AUDIO_EXTS = {'.ogg', '.opus', '.mp3', '.wav', '.m4a'}
            _VIDEO_EXTS = {'.mp4', '.mov', '.avi', '.mkv', '.webm', '.3gp'}
            _IMAGE_EXTS = {'.jpg', '.jpeg', '.png', '.webp', '.gif'}

            for media_path, is_voice in media_files:
                try:
                    ext = Path(media_path).suffix.lower()
                    if ext in _AUDIO_EXTS:
                        await adapter.send_voice(
                            chat_id=event.source.chat_id,
                            audio_path=media_path,
                            metadata=_thread_meta,
                        )
                    elif ext in _VIDEO_EXTS:
                        await adapter.send_video(
                            chat_id=event.source.chat_id,
                            video_path=media_path,
                            metadata=_thread_meta,
                        )
                    elif ext in _IMAGE_EXTS:
                        await adapter.send_image_file(
                            chat_id=event.source.chat_id,
                            image_path=media_path,
                            metadata=_thread_meta,
                        )
                    else:
                        await adapter.send_document(
                            chat_id=event.source.chat_id,
                            file_path=media_path,
                            metadata=_thread_meta,
                        )
                except Exception as e:
                    logger.warning("[%s] Post-stream media delivery failed: %s", adapter.name, e)

            for file_path in local_files:
                try:
                    ext = Path(file_path).suffix.lower()
                    if ext in _IMAGE_EXTS:
                        await adapter.send_image_file(
                            chat_id=event.source.chat_id,
                            image_path=file_path,
                            metadata=_thread_meta,
                        )
                    else:
                        await adapter.send_document(
                            chat_id=event.source.chat_id,
                            file_path=file_path,
                            metadata=_thread_meta,
                        )
                except Exception as e:
                    logger.warning("[%s] Post-stream file delivery failed: %s", adapter.name, e)

        except Exception as e:
            logger.warning("Post-stream media extraction failed: %s", e)

    async def _handle_rollback_command(self, event: MessageEvent) -> str:
        """Handle /rollback command — list or restore filesystem checkpoints."""
        from tools.checkpoint_manager import CheckpointManager, format_checkpoint_list

        # Read checkpoint config from config.yaml
        cp_cfg = {}
        try:
            import yaml as _y
            _cfg_path = _hermes_home / "config.yaml"
            if _cfg_path.exists():
                with open(_cfg_path, encoding="utf-8") as _f:
                    _data = _y.safe_load(_f) or {}
                cp_cfg = _data.get("checkpoints", {})
                if isinstance(cp_cfg, bool):
                    cp_cfg = {"enabled": cp_cfg}
        except Exception:
            pass

        if not cp_cfg.get("enabled", False):
            return (
                "Checkpoints are not enabled.\n"
                "Enable in config.yaml:\n```\ncheckpoints:\n  enabled: true\n```"
            )

        mgr = CheckpointManager(
            enabled=True,
            max_snapshots=cp_cfg.get("max_snapshots", 50),
        )

        cwd = os.getenv("TERMINAL_CWD", str(Path.home()))
        arg = event.get_command_args().strip()

        if not arg:
            checkpoints = mgr.list_checkpoints(cwd)
            return format_checkpoint_list(checkpoints, cwd)

        # Restore by number or hash
        checkpoints = mgr.list_checkpoints(cwd)
        if not checkpoints:
            return f"No checkpoints found for {cwd}"

        target_hash = None
        try:
            idx = int(arg) - 1
            if 0 <= idx < len(checkpoints):
                target_hash = checkpoints[idx]["hash"]
            else:
                return f"Invalid checkpoint number. Use 1-{len(checkpoints)}."
        except ValueError:
            target_hash = arg

        result = mgr.restore(cwd, target_hash)
        if result["success"]:
            return (
                f"✅ Restored to checkpoint {result['restored_to']}: {result['reason']}\n"
                f"A pre-rollback snapshot was saved automatically."
            )
        return f"❌ {result['error']}"

    async def _handle_background_command(self, event: MessageEvent) -> str:
        """Handle /background <prompt> — run a prompt in a separate background session.

        Spawns a new AIAgent in a background thread with its own session.
        When it completes, sends the result back to the same chat without
        modifying the active session's conversation history.
        """
        prompt = event.get_command_args().strip()
        if not prompt:
            return (
                "Usage: /background <prompt>\n"
                "Example: /background Summarize the top HN stories today\n\n"
                "Runs the prompt in a separate session. "
                "You can keep chatting — the result will appear here when done."
            )

        source = event.source
        task_id = f"bg_{datetime.now().strftime('%H%M%S')}_{os.urandom(3).hex()}"

        # Fire-and-forget the background task
        _task = asyncio.create_task(
            self._run_background_task(prompt, source, task_id)
        )
        self._background_tasks.add(_task)
        _task.add_done_callback(self._background_tasks.discard)

        preview = prompt[:60] + ("..." if len(prompt) > 60 else "")
        return f'🔄 Background task started: "{preview}"\nTask ID: {task_id}\nYou can keep chatting — results will appear when done.'

    async def _run_background_task(
        self, prompt: str, source: "SessionSource", task_id: str
    ) -> None:
        """Execute a background agent task and deliver the result to the chat."""
        from run_agent import AIAgent

        adapter = self.adapters.get(source.platform)
        if not adapter:
            logger.warning("No adapter for platform %s in background task %s", source.platform, task_id)
            return

        _thread_metadata = {"thread_id": source.thread_id} if source.thread_id else None

        try:
            user_config = _load_gateway_config()
            model, runtime_kwargs = self._resolve_session_agent_runtime(
                source=source,
                user_config=user_config,
            )
            if not runtime_kwargs.get("api_key"):
                await adapter.send(
                    source.chat_id,
                    f"❌ Background task {task_id} failed: no provider credentials configured.",
                    metadata=_thread_metadata,
                )
                return

            platform_key = _platform_config_key(source.platform)

            from hermes_cli.tools_config import _get_platform_tools
            enabled_toolsets = sorted(_get_platform_tools(user_config, platform_key))

            pr = self._provider_routing
            max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "90"))
            reasoning_config = self._load_reasoning_config()
            self._reasoning_config = reasoning_config
            self._service_tier = self._load_service_tier()
            turn_route = self._resolve_turn_agent_config(prompt, model, runtime_kwargs)

            def run_sync():
                agent = AIAgent(
                    model=turn_route["model"],
                    **turn_route["runtime"],
                    max_iterations=max_iterations,
                    quiet_mode=True,
                    verbose_logging=False,
                    enabled_toolsets=enabled_toolsets,
                    reasoning_config=reasoning_config,
                    service_tier=self._service_tier,
                    request_overrides=turn_route.get("request_overrides"),
                    providers_allowed=pr.get("only"),
                    providers_ignored=pr.get("ignore"),
                    providers_order=pr.get("order"),
                    provider_sort=pr.get("sort"),
                    provider_require_parameters=pr.get("require_parameters", False),
                    provider_data_collection=pr.get("data_collection"),
                    session_id=task_id,
                    platform=platform_key,
                    user_id=source.user_id,
                    session_db=self._session_db,
                    fallback_model=self._fallback_model,
                )
                try:
                    return agent.run_conversation(
                        user_message=prompt,
                        task_id=task_id,
                    )
                finally:
                    self._cleanup_agent_resources(agent)

            result = await self._run_in_executor_with_context(run_sync)

            response = result.get("final_response", "") if result else ""
            if not response and result and result.get("error"):
                response = f"Error: {result['error']}"

            # Extract media files from the response
            if response:
                media_files, response = adapter.extract_media(response)
                images, text_content = adapter.extract_images(response)

                preview = prompt[:60] + ("..." if len(prompt) > 60 else "")
                header = f'✅ Background task complete\nPrompt: "{preview}"\n\n'

                if text_content:
                    await adapter.send(
                        chat_id=source.chat_id,
                        content=header + text_content,
                        metadata=_thread_metadata,
                    )
                elif not images and not media_files:
                    await adapter.send(
                        chat_id=source.chat_id,
                        content=header + "(No response generated)",
                        metadata=_thread_metadata,
                    )

                # Send extracted images
                for image_url, alt_text in (images or []):
                    try:
                        await adapter.send_image(
                            chat_id=source.chat_id,
                            image_url=image_url,
                            caption=alt_text,
                        )
                    except Exception:
                        pass

                # Send media files
                for media_path in (media_files or []):
                    try:
                        await adapter.send_document(
                            chat_id=source.chat_id,
                            file_path=media_path,
                        )
                    except Exception:
                        pass
            else:
                preview = prompt[:60] + ("..." if len(prompt) > 60 else "")
                await adapter.send(
                    chat_id=source.chat_id,
                    content=f'✅ Background task complete\nPrompt: "{preview}"\n\n(No response generated)',
                    metadata=_thread_metadata,
                )

        except Exception as e:
            logger.exception("Background task %s failed", task_id)
            try:
                await adapter.send(
                    chat_id=source.chat_id,
                    content=f"❌ Background task {task_id} failed: {e}",
                    metadata=_thread_metadata,
                )
            except Exception:
                pass

    async def _handle_btw_command(self, event: MessageEvent) -> str:
        """Handle /btw <question> — ephemeral side question in the same chat."""
        question = event.get_command_args().strip()
        if not question:
            return (
                "Usage: /btw <question>\n"
                "Example: /btw what module owns session title sanitization?\n\n"
                "Answers using session context. No tools, not persisted."
            )

        source = event.source
        session_key = self._session_key_for_source(source)

        # Guard: one /btw at a time per session
        existing = getattr(self, "_active_btw_tasks", {}).get(session_key)
        if existing and not existing.done():
            return "A /btw is already running for this chat. Wait for it to finish."

        if not hasattr(self, "_active_btw_tasks"):
            self._active_btw_tasks: dict = {}

        import uuid as _uuid
        task_id = f"btw_{datetime.now().strftime('%H%M%S')}_{_uuid.uuid4().hex[:6]}"
        _task = asyncio.create_task(self._run_btw_task(question, source, session_key, task_id))
        self._background_tasks.add(_task)
        self._active_btw_tasks[session_key] = _task

        def _cleanup(task):
            self._background_tasks.discard(task)
            if self._active_btw_tasks.get(session_key) is task:
                self._active_btw_tasks.pop(session_key, None)

        _task.add_done_callback(_cleanup)

        preview = question[:60] + ("..." if len(question) > 60 else "")
        return f'💬 /btw: "{preview}"\nReply will appear here shortly.'

    async def _run_btw_task(
        self, question: str, source, session_key: str, task_id: str,
    ) -> None:
        """Execute an ephemeral /btw side question and deliver the answer."""
        from run_agent import AIAgent

        adapter = self.adapters.get(source.platform)
        if not adapter:
            logger.warning("No adapter for platform %s in /btw task %s", source.platform, task_id)
            return

        _thread_meta = {"thread_id": source.thread_id} if source.thread_id else None

        try:
            user_config = _load_gateway_config()
            model, runtime_kwargs = self._resolve_session_agent_runtime(
                source=source,
                session_key=session_key,
                user_config=user_config,
            )
            if not runtime_kwargs.get("api_key"):
                await adapter.send(
                    source.chat_id,
                    "❌ /btw failed: no provider credentials configured.",
                    metadata=_thread_meta,
                )
                return

            platform_key = _platform_config_key(source.platform)
            reasoning_config = self._load_reasoning_config()
            self._service_tier = self._load_service_tier()
            turn_route = self._resolve_turn_agent_config(question, model, runtime_kwargs)
            pr = self._provider_routing

            # Snapshot history from running agent or stored transcript
            running_agent = self._running_agents.get(session_key)
            if running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
                history_snapshot = list(getattr(running_agent, "_session_messages", []) or [])
            else:
                session_entry = self.session_store.get_or_create_session(source)
                history_snapshot = self.session_store.load_transcript(session_entry.session_id)

            btw_prompt = (
                "[Ephemeral /btw side question. Answer using the conversation "
                "context. No tools available. Be direct and concise.]\n\n"
                + question
            )

            def run_sync():
                agent = AIAgent(
                    model=turn_route["model"],
                    **turn_route["runtime"],
                    max_iterations=8,
                    quiet_mode=True,
                    verbose_logging=False,
                    enabled_toolsets=[],
                    reasoning_config=reasoning_config,
                    service_tier=self._service_tier,
                    request_overrides=turn_route.get("request_overrides"),
                    providers_allowed=pr.get("only"),
                    providers_ignored=pr.get("ignore"),
                    providers_order=pr.get("order"),
                    provider_sort=pr.get("sort"),
                    provider_require_parameters=pr.get("require_parameters", False),
                    provider_data_collection=pr.get("data_collection"),
                    session_id=task_id,
                    platform=platform_key,
                    session_db=None,
                    fallback_model=self._fallback_model,
                    skip_memory=True,
                    skip_context_files=True,
                    persist_session=False,
                )
                try:
                    return agent.run_conversation(
                        user_message=btw_prompt,
                        conversation_history=history_snapshot,
                        task_id=task_id,
                    )
                finally:
                    self._cleanup_agent_resources(agent)

            result = await self._run_in_executor_with_context(run_sync)

            response = (result.get("final_response") or "") if result else ""
            if not response and result and result.get("error"):
                response = f"Error: {result['error']}"
            if not response:
                response = "(No response generated)"

            media_files, response = adapter.extract_media(response)
            images, text_content = adapter.extract_images(response)
            preview = question[:60] + ("..." if len(question) > 60 else "")
            header = f'💬 /btw: "{preview}"\n\n'

            if text_content:
                await adapter.send(
                    chat_id=source.chat_id,
                    content=header + text_content,
                    metadata=_thread_meta,
                )
            elif not images and not media_files:
                await adapter.send(
                    chat_id=source.chat_id,
                    content=header + "(No response generated)",
                    metadata=_thread_meta,
                )

            for image_url, alt_text in (images or []):
                try:
                    await adapter.send_image(chat_id=source.chat_id, image_url=image_url, caption=alt_text)
                except Exception:
                    pass

            for media_path in (media_files or []):
                try:
                    await adapter.send_file(chat_id=source.chat_id, file_path=media_path)
                except Exception:
                    pass

        except Exception as e:
            logger.exception("/btw task %s failed", task_id)
            try:
                await adapter.send(
                    chat_id=source.chat_id,
                    content=f"❌ /btw failed: {e}",
                    metadata=_thread_meta,
                )
            except Exception:
                pass

    async def _handle_reasoning_command(self, event: MessageEvent) -> str:
        """Handle /reasoning command — manage reasoning effort and display toggle.

        Usage:
            /reasoning              Show current effort level and display state
            /reasoning <level>      Set reasoning effort (none, minimal, low, medium, high, xhigh)
            /reasoning show|on      Show model reasoning in responses
            /reasoning hide|off     Hide model reasoning from responses
        """
        import yaml

        args = event.get_command_args().strip().lower()
        config_path = _hermes_home / "config.yaml"
        self._reasoning_config = self._load_reasoning_config()
        self._show_reasoning = self._load_show_reasoning()

        def _save_config_key(key_path: str, value):
            """Save a dot-separated key to config.yaml."""
            try:
                user_config = {}
                if config_path.exists():
                    with open(config_path, encoding="utf-8") as f:
                        user_config = yaml.safe_load(f) or {}
                keys = key_path.split(".")
                current = user_config
                for k in keys[:-1]:
                    if k not in current or not isinstance(current[k], dict):
                        current[k] = {}
                    current = current[k]
                current[keys[-1]] = value
                atomic_yaml_write(config_path, user_config)
                return True
            except Exception as e:
                logger.error("Failed to save config key %s: %s", key_path, e)
                return False

        if not args:
            # Show current state
            rc = self._reasoning_config
            if rc is None:
                level = "medium (default)"
            elif rc.get("enabled") is False:
                level = "none (disabled)"
            else:
                level = rc.get("effort", "medium")
            display_state = "on ✓" if self._show_reasoning else "off"
            return (
                "🧠 **Reasoning Settings**\n\n"
                f"**Effort:** `{level}`\n"
                f"**Display:** {display_state}\n\n"
                "_Usage:_ `/reasoning <none|minimal|low|medium|high|xhigh|show|hide>`"
            )

        # Display toggle (per-platform)
        platform_key = _platform_config_key(event.source.platform)
        if args in ("show", "on"):
            self._show_reasoning = True
            _save_config_key(f"display.platforms.{platform_key}.show_reasoning", True)
            return (
                "🧠 ✓ Reasoning display: **ON**\n"
                f"Model thinking will be shown before each response on **{platform_key}**."
            )

        if args in ("hide", "off"):
            self._show_reasoning = False
            _save_config_key(f"display.platforms.{platform_key}.show_reasoning", False)
            return f"🧠 ✓ Reasoning display: **OFF** for **{platform_key}**"

        # Effort level change
        effort = args.strip()
        if effort == "none":
            parsed = {"enabled": False}
        elif effort in ("minimal", "low", "medium", "high", "xhigh"):
            parsed = {"enabled": True, "effort": effort}
        else:
            return (
                f"⚠️ Unknown argument: `{effort}`\n\n"
                "**Valid levels:** none, minimal, low, medium, high, xhigh\n"
                "**Display:** show, hide"
            )

        self._reasoning_config = parsed
        if _save_config_key("agent.reasoning_effort", effort):
            return f"🧠 ✓ Reasoning effort set to `{effort}` (saved to config)\n_(takes effect on next message)_"
        else:
            return f"🧠 ✓ Reasoning effort set to `{effort}` (this session only)"

    async def _handle_fast_command(self, event: MessageEvent) -> str:
        """Handle /fast — mirror the CLI Priority Processing toggle in gateway chats."""
        import yaml
        from hermes_cli.models import model_supports_fast_mode

        args = event.get_command_args().strip().lower()
        config_path = _hermes_home / "config.yaml"
        self._service_tier = self._load_service_tier()

        user_config = _load_gateway_config()
        model = _resolve_gateway_model(user_config)
        if not model_supports_fast_mode(model):
            return "⚡ /fast is only available for OpenAI models that support Priority Processing."

        def _save_config_key(key_path: str, value):
            """Save a dot-separated key to config.yaml."""
            try:
                user_config = {}
                if config_path.exists():
                    with open(config_path, encoding="utf-8") as f:
                        user_config = yaml.safe_load(f) or {}
                keys = key_path.split(".")
                current = user_config
                for k in keys[:-1]:
                    if k not in current or not isinstance(current[k], dict):
                        current[k] = {}
                    current = current[k]
                current[keys[-1]] = value
                atomic_yaml_write(config_path, user_config)
                return True
            except Exception as e:
                logger.error("Failed to save config key %s: %s", key_path, e)
                return False

        if not args or args == "status":
            status = "fast" if self._service_tier == "priority" else "normal"
            return (
                "⚡ Priority Processing\n\n"
                f"Current mode: `{status}`\n\n"
                "_Usage:_ `/fast <normal|fast|status>`"
            )

        if args in {"fast", "on"}:
            self._service_tier = "priority"
            saved_value = "fast"
            label = "FAST"
        elif args in {"normal", "off"}:
            self._service_tier = None
            saved_value = "normal"
            label = "NORMAL"
        else:
            return (
                f"⚠️ Unknown argument: `{args}`\n\n"
                "**Valid options:** normal, fast, status"
            )

        if _save_config_key("agent.service_tier", saved_value):
            return f"⚡ ✓ Priority Processing: **{label}** (saved to config)\n_(takes effect on next message)_"
        return f"⚡ ✓ Priority Processing: **{label}** (this session only)"

    async def _handle_yolo_command(self, event: MessageEvent) -> str:
        """Handle /yolo — toggle dangerous command approval bypass for this session only."""
        from tools.approval import (
            disable_session_yolo,
            enable_session_yolo,
            is_session_yolo_enabled,
        )

        session_key = self._session_key_for_source(event.source)
        current = is_session_yolo_enabled(session_key)
        if current:
            disable_session_yolo(session_key)
            return "⚠️ YOLO mode **OFF** for this session — dangerous commands will require approval."
        else:
            enable_session_yolo(session_key)
            return "⚡ YOLO mode **ON** for this session — all commands auto-approved. Use with caution."

    async def _handle_verbose_command(self, event: MessageEvent) -> str:
        """Handle /verbose command — cycle tool progress display mode.

        Gated by ``display.tool_progress_command`` in config.yaml (default off).
        When enabled, cycles the tool progress mode through off → new → all →
        verbose → off for the *current platform*.  The setting is saved to
        ``display.platforms.<platform>.tool_progress`` so each channel can
        have its own verbosity level independently.
        """
        import yaml

        config_path = _hermes_home / "config.yaml"
        platform_key = _platform_config_key(event.source.platform)

        # --- check config gate ------------------------------------------------
        try:
            user_config = {}
            if config_path.exists():
                with open(config_path, encoding="utf-8") as f:
                    user_config = yaml.safe_load(f) or {}
            gate_enabled = user_config.get("display", {}).get("tool_progress_command", False)
        except Exception:
            gate_enabled = False

        if not gate_enabled:
            return (
                "The `/verbose` command is not enabled for messaging platforms.\n\n"
                "Enable it in `config.yaml`:\n```yaml\n"
                "display:\n  tool_progress_command: true\n```"
            )

        # --- cycle mode (per-platform) ----------------------------------------
        cycle = ["off", "new", "all", "verbose"]
        descriptions = {
            "off": "⚙️ Tool progress: **OFF** — no tool activity shown.",
            "new": "⚙️ Tool progress: **NEW** — shown when tool changes (preview length: `display.tool_preview_length`, default 40).",
            "all": "⚙️ Tool progress: **ALL** — every tool call shown (preview length: `display.tool_preview_length`, default 40).",
            "verbose": "⚙️ Tool progress: **VERBOSE** — every tool call with full arguments.",
        }

        # Read current effective mode for this platform via the resolver
        from gateway.display_config import resolve_display_setting
        current = resolve_display_setting(user_config, platform_key, "tool_progress", "all")
        if current not in cycle:
            current = "all"
        idx = (cycle.index(current) + 1) % len(cycle)
        new_mode = cycle[idx]

        # Save to display.platforms.<platform>.tool_progress
        try:
            if "display" not in user_config or not isinstance(user_config.get("display"), dict):
                user_config["display"] = {}
            display = user_config["display"]
            if "platforms" not in display or not isinstance(display.get("platforms"), dict):
                display["platforms"] = {}
            if platform_key not in display["platforms"] or not isinstance(display["platforms"].get(platform_key), dict):
                display["platforms"][platform_key] = {}
            display["platforms"][platform_key]["tool_progress"] = new_mode
            atomic_yaml_write(config_path, user_config)
            return (
                f"{descriptions[new_mode]}\n"
                f"_(saved for **{platform_key}** — takes effect on next message)_"
            )
        except Exception as e:
            logger.warning("Failed to save tool_progress mode: %s", e)
            return f"{descriptions[new_mode]}\n_(could not save to config: {e})_"

    async def _handle_compress_command(self, event: MessageEvent) -> str:
        """Handle /compress command -- manually compress conversation context.

        Accepts an optional focus topic: ``/compress <focus>`` guides the
        summariser to preserve information related to *focus* while being
        more aggressive about discarding everything else.
        """
        source = event.source
        session_entry = self.session_store.get_or_create_session(source)
        history = self.session_store.load_transcript(session_entry.session_id)

        if not history or len(history) < 4:
            return "Not enough conversation to compress (need at least 4 messages)."

        # Extract optional focus topic from command args
        focus_topic = (event.get_command_args() or "").strip() or None

        try:
            from run_agent import AIAgent
            from agent.manual_compression_feedback import summarize_manual_compression
            from agent.model_metadata import estimate_messages_tokens_rough

            session_key = self._session_key_for_source(source)
            model, runtime_kwargs = self._resolve_session_agent_runtime(
                source=source,
                session_key=session_key,
            )
            if not runtime_kwargs.get("api_key"):
                return "No provider configured -- cannot compress."

            msgs = [
                {"role": m.get("role"), "content": m.get("content")}
                for m in history
                if m.get("role") in ("user", "assistant") and m.get("content")
            ]
            original_count = len(msgs)
            approx_tokens = estimate_messages_tokens_rough(msgs)

            tmp_agent = AIAgent(
                **runtime_kwargs,
                model=model,
                max_iterations=4,
                quiet_mode=True,
                skip_memory=True,
                enabled_toolsets=["memory"],
                session_id=session_entry.session_id,
            )
            try:
                tmp_agent._print_fn = lambda *a, **kw: None

                compressor = tmp_agent.context_compressor
                compress_start = compressor.protect_first_n
                compress_start = compressor._align_boundary_forward(msgs, compress_start)
                compress_end = compressor._find_tail_cut_by_tokens(msgs, compress_start)
                if compress_start >= compress_end:
                    return "Nothing to compress yet (the transcript is still all protected context)."

                loop = asyncio.get_running_loop()
                compressed, _ = await loop.run_in_executor(
                    None,
                    lambda: tmp_agent._compress_context(msgs, "", approx_tokens=approx_tokens, focus_topic=focus_topic)
                )

                # _compress_context already calls end_session() on the old session
                # (preserving its full transcript in SQLite) and creates a new
                # session_id for the continuation.  Write the compressed messages
                # into the NEW session so the original history stays searchable.
                new_session_id = tmp_agent.session_id
                if new_session_id != session_entry.session_id:
                    session_entry.session_id = new_session_id
                    self.session_store._save()

                self.session_store.rewrite_transcript(new_session_id, compressed)
                # Reset stored token count — transcript changed, old value is stale
                self.session_store.update_session(
                    session_entry.session_key, last_prompt_tokens=0
                )
                new_tokens = estimate_messages_tokens_rough(compressed)
                summary = summarize_manual_compression(
                    msgs,
                    compressed,
                    approx_tokens,
                    new_tokens,
                )
            finally:
                self._cleanup_agent_resources(tmp_agent)
            lines = [f"🗜️ {summary['headline']}"]
            if focus_topic:
                lines.append(f"Focus: \"{focus_topic}\"")
            lines.append(summary["token_line"])
            if summary["note"]:
                lines.append(summary["note"])
            return "\n".join(lines)
        except Exception as e:
            logger.warning("Manual compress failed: %s", e)
            return f"Compression failed: {e}"

    async def _handle_title_command(self, event: MessageEvent) -> str:
        """Handle /title command — set or show the current session's title."""
        source = event.source
        session_entry = self.session_store.get_or_create_session(source)
        session_id = session_entry.session_id

        if not self._session_db:
            return "Session database not available."

        # Ensure session exists in SQLite DB (it may only exist in session_store
        # if this is the first command in a new session)
        existing_title = self._session_db.get_session_title(session_id)
        if existing_title is None:
            # Session doesn't exist in DB yet — create it
            try:
                self._session_db.create_session(
                    session_id=session_id,
                    source=source.platform.value if source.platform else "unknown",
                    user_id=source.user_id,
                )
            except Exception:
                pass  # Session might already exist, ignore errors

        title_arg = event.get_command_args().strip()
        if title_arg:
            # Sanitize the title before setting
            try:
                sanitized = self._session_db.sanitize_title(title_arg)
            except ValueError as e:
                return f"⚠️ {e}"
            if not sanitized:
                return "⚠️ Title is empty after cleanup. Please use printable characters."
            # Set the title
            try:
                if self._session_db.set_session_title(session_id, sanitized):
                    return f"✏️ Session title set: **{sanitized}**"
                else:
                    return "Session not found in database."
            except ValueError as e:
                return f"⚠️ {e}"
        else:
            # Show the current title and session ID
            title = self._session_db.get_session_title(session_id)
            if title:
                return f"📌 Session: `{session_id}`\nTitle: **{title}**"
            else:
                return f"📌 Session: `{session_id}`\nNo title set. Usage: `/title My Session Name`"

    async def _handle_resume_command(self, event: MessageEvent) -> str:
        """Handle /resume command — switch to a previously-named session."""
        if not self._session_db:
            return "Session database not available."

        source = event.source
        session_key = self._session_key_for_source(source)
        name = event.get_command_args().strip()

        if not name:
            # List recent titled sessions for this user/platform
            try:
                user_source = source.platform.value if source.platform else None
                sessions = self._session_db.list_sessions_rich(
                    source=user_source, limit=10
                )
                titled = [s for s in sessions if s.get("title")]
                if not titled:
                    return (
                        "No named sessions found.\n"
                        "Use `/title My Session` to name your current session, "
                        "then `/resume My Session` to return to it later."
                    )
                lines = ["📋 **Named Sessions**\n"]
                for s in titled[:10]:
                    title = s["title"]
                    preview = s.get("preview", "")[:40]
                    preview_part = f" — _{preview}_" if preview else ""
                    lines.append(f"• **{title}**{preview_part}")
                lines.append("\nUsage: `/resume <session name>`")
                return "\n".join(lines)
            except Exception as e:
                logger.debug("Failed to list titled sessions: %s", e)
                return f"Could not list sessions: {e}"

        # Resolve the name to a session ID
        target_id = self._session_db.resolve_session_by_title(name)
        if not target_id:
            return (
                f"No session found matching '**{name}**'.\n"
                "Use `/resume` with no arguments to see available sessions."
            )

        # Check if already on that session
        current_entry = self.session_store.get_or_create_session(source)
        if current_entry.session_id == target_id:
            return f"📌 Already on session **{name}**."

        # Flush memories for current session before switching
        try:
            _flush_task = asyncio.create_task(
                self._async_flush_memories(current_entry.session_id, session_key)
            )
            self._background_tasks.add(_flush_task)
            _flush_task.add_done_callback(self._background_tasks.discard)
        except Exception as e:
            logger.debug("Memory flush on resume failed: %s", e)

        # Clear any running agent for this session key
        if session_key in self._running_agents:
            del self._running_agents[session_key]

        # Switch the session entry to point at the old session
        new_entry = self.session_store.switch_session(session_key, target_id)
        if not new_entry:
            return "Failed to switch session."

        # Get the title for confirmation
        title = self._session_db.get_session_title(target_id) or name

        # Count messages for context
        history = self.session_store.load_transcript(target_id)
        msg_count = len([m for m in history if m.get("role") == "user"]) if history else 0
        msg_part = f" ({msg_count} message{'s' if msg_count != 1 else ''})" if msg_count else ""

        return f"↻ Resumed session **{title}**{msg_part}. Conversation restored."

    async def _handle_branch_command(self, event: MessageEvent) -> str:
        """Handle /branch [name] — fork the current session into a new independent copy.

        Copies conversation history to a new session so the user can explore
        a different approach without losing the original.
        Inspired by Claude Code's /branch command.
        """
        import uuid as _uuid

        if not self._session_db:
            return "Session database not available."

        source = event.source
        session_key = self._session_key_for_source(source)

        # Load the current session and its transcript
        current_entry = self.session_store.get_or_create_session(source)
        history = self.session_store.load_transcript(current_entry.session_id)
        if not history:
            return "No conversation to branch — send a message first."

        branch_name = event.get_command_args().strip()

        # Generate the new session ID
        from datetime import datetime as _dt
        now = _dt.now()
        timestamp_str = now.strftime("%Y%m%d_%H%M%S")
        short_uuid = _uuid.uuid4().hex[:6]
        new_session_id = f"{timestamp_str}_{short_uuid}"

        # Determine branch title
        if branch_name:
            branch_title = branch_name
        else:
            current_title = self._session_db.get_session_title(current_entry.session_id)
            base = current_title or "branch"
            branch_title = self._session_db.get_next_title_in_lineage(base)

        parent_session_id = current_entry.session_id

        # Create the new session with parent link
        try:
            self._session_db.create_session(
                session_id=new_session_id,
                source=source.platform.value if source.platform else "gateway",
                model=(self.config.get("model", {}) or {}).get("default") if isinstance(self.config, dict) else None,
                parent_session_id=parent_session_id,
            )
        except Exception as e:
            logger.error("Failed to create branch session: %s", e)
            return f"Failed to create branch: {e}"

        # Copy conversation history to the new session
        for msg in history:
            try:
                self._session_db.append_message(
                    session_id=new_session_id,
                    role=msg.get("role", "user"),
                    content=msg.get("content"),
                    tool_name=msg.get("tool_name") or msg.get("name"),
                    tool_calls=msg.get("tool_calls"),
                    tool_call_id=msg.get("tool_call_id"),
                    reasoning=msg.get("reasoning"),
                )
            except Exception:
                pass  # Best-effort copy

        # Set title
        try:
            self._session_db.set_session_title(new_session_id, branch_title)
        except Exception:
            pass

        # Switch the session store entry to the new session
        new_entry = self.session_store.switch_session(session_key, new_session_id)
        if not new_entry:
            return "Branch created but failed to switch to it."

        # Evict any cached agent for this session
        self._evict_cached_agent(session_key)

        msg_count = len([m for m in history if m.get("role") == "user"])
        return (
            f"⑂ Branched to **{branch_title}**"
            f" ({msg_count} message{'s' if msg_count != 1 else ''} copied)\n"
            f"Original: `{parent_session_id}`\n"
            f"Branch: `{new_session_id}`\n"
            f"Use `/resume` to switch back to the original."
        )

    async def _handle_usage_command(self, event: MessageEvent) -> str:
        """Handle /usage command -- show token usage for the current session.

        Checks both _running_agents (mid-turn) and _agent_cache (between turns)
        so that rate limits, cost estimates, and detailed token breakdowns are
        available whenever the user asks, not only while the agent is running.
        """
        source = event.source
        session_key = self._session_key_for_source(source)

        # Try running agent first (mid-turn), then cached agent (between turns)
        agent = self._running_agents.get(session_key)
        if not agent or agent is _AGENT_PENDING_SENTINEL:
            _cache_lock = getattr(self, "_agent_cache_lock", None)
            _cache = getattr(self, "_agent_cache", None)
            if _cache_lock and _cache is not None:
                with _cache_lock:
                    cached = _cache.get(session_key)
                    if cached:
                        agent = cached[0]

        if agent and hasattr(agent, "session_total_tokens") and agent.session_api_calls > 0:
            lines = []

            # Rate limits (when available from provider headers)
            rl_state = agent.get_rate_limit_state()
            if rl_state and rl_state.has_data:
                from agent.rate_limit_tracker import format_rate_limit_compact
                lines.append(f"⏱️ **Rate Limits:** {format_rate_limit_compact(rl_state)}")
                lines.append("")

            # Session token usage — detailed breakdown matching CLI
            input_tokens = getattr(agent, "session_input_tokens", 0) or 0
            output_tokens = getattr(agent, "session_output_tokens", 0) or 0
            cache_read = getattr(agent, "session_cache_read_tokens", 0) or 0
            cache_write = getattr(agent, "session_cache_write_tokens", 0) or 0

            lines.append("📊 **Session Token Usage**")
            lines.append(f"Model: `{agent.model}`")
            lines.append(f"Input tokens: {input_tokens:,}")
            if cache_read:
                lines.append(f"Cache read tokens: {cache_read:,}")
            if cache_write:
                lines.append(f"Cache write tokens: {cache_write:,}")
            lines.append(f"Output tokens: {output_tokens:,}")
            lines.append(f"Total: {agent.session_total_tokens:,}")
            lines.append(f"API calls: {agent.session_api_calls}")

            # Cost estimation
            try:
                from agent.usage_pricing import CanonicalUsage, estimate_usage_cost
                cost_result = estimate_usage_cost(
                    agent.model,
                    CanonicalUsage(
                        input_tokens=input_tokens,
                        output_tokens=output_tokens,
                        cache_read_tokens=cache_read,
                        cache_write_tokens=cache_write,
                    ),
                    provider=getattr(agent, "provider", None),
                    base_url=getattr(agent, "base_url", None),
                )
                if cost_result.amount_usd is not None:
                    prefix = "~" if cost_result.status == "estimated" else ""
                    lines.append(f"Cost: {prefix}${float(cost_result.amount_usd):.4f}")
                elif cost_result.status == "included":
                    lines.append("Cost: included")
            except Exception:
                pass

            # Context window and compressions
            ctx = agent.context_compressor
            if ctx.last_prompt_tokens:
                pct = min(100, ctx.last_prompt_tokens / ctx.context_length * 100) if ctx.context_length else 0
                lines.append(f"Context: {ctx.last_prompt_tokens:,} / {ctx.context_length:,} ({pct:.0f}%)")
            if ctx.compression_count:
                lines.append(f"Compressions: {ctx.compression_count}")

            return "\n".join(lines)

        # No agent at all -- check session history for a rough count
        session_entry = self.session_store.get_or_create_session(source)
        history = self.session_store.load_transcript(session_entry.session_id)
        if history:
            from agent.model_metadata import estimate_messages_tokens_rough
            msgs = [m for m in history if m.get("role") in ("user", "assistant") and m.get("content")]
            approx = estimate_messages_tokens_rough(msgs)
            return (
                f"📊 **Session Info**\n"
                f"Messages: {len(msgs)}\n"
                f"Estimated context: ~{approx:,} tokens\n"
                f"_(Detailed usage available after the first agent response)_"
            )
        return "No usage data available for this session."

    async def _handle_insights_command(self, event: MessageEvent) -> str:
        """Handle /insights command -- show usage insights and analytics."""
        import asyncio as _asyncio

        args = event.get_command_args().strip()

        # Normalize Unicode dashes (Telegram/iOS auto-converts -- to em/en dash)
        import re as _re
        args = _re.sub(r'[\u2012\u2013\u2014\u2015](days|source)', r'--\1', args)

        days = 30
        source = None

        # Parse simple args: /insights 7  or  /insights --days 7
        if args:
            parts = args.split()
            i = 0
            while i < len(parts):
                if parts[i] == "--days" and i + 1 < len(parts):
                    try:
                        days = int(parts[i + 1])
                    except ValueError:
                        return f"Invalid --days value: {parts[i + 1]}"
                    i += 2
                elif parts[i] == "--source" and i + 1 < len(parts):
                    source = parts[i + 1]
                    i += 2
                elif parts[i].isdigit():
                    days = int(parts[i])
                    i += 1
                else:
                    i += 1

        try:
            from hermes_state import SessionDB
            from agent.insights import InsightsEngine

            loop = _asyncio.get_running_loop()

            def _run_insights():
                db = SessionDB()
                engine = InsightsEngine(db)
                report = engine.generate(days=days, source=source)
                result = engine.format_gateway(report)
                db.close()
                return result

            return await loop.run_in_executor(None, _run_insights)
        except Exception as e:
            logger.error("Insights command error: %s", e, exc_info=True)
            return f"Error generating insights: {e}"

    async def _handle_reload_mcp_command(self, event: MessageEvent) -> str:
        """Handle /reload-mcp command -- disconnect and reconnect all MCP servers."""
        loop = asyncio.get_running_loop()
        try:
            from tools.mcp_tool import shutdown_mcp_servers, discover_mcp_tools, _servers, _lock

            # Capture old server names before shutdown
            with _lock:
                old_servers = set(_servers.keys())

            # Read new config before shutting down, so we know what will be added/removed
            # Shutdown existing connections
            await loop.run_in_executor(None, shutdown_mcp_servers)

            # Reconnect by discovering tools (reads config.yaml fresh)
            new_tools = await loop.run_in_executor(None, discover_mcp_tools)

            # Compute what changed
            with _lock:
                connected_servers = set(_servers.keys())

            added = connected_servers - old_servers
            removed = old_servers - connected_servers
            reconnected = connected_servers & old_servers

            lines = ["🔄 **MCP Servers Reloaded**\n"]
            if reconnected:
                lines.append(f"♻️ Reconnected: {', '.join(sorted(reconnected))}")
            if added:
                lines.append(f"➕ Added: {', '.join(sorted(added))}")
            if removed:
                lines.append(f"➖ Removed: {', '.join(sorted(removed))}")
            if not connected_servers:
                lines.append("No MCP servers connected.")
            else:
                lines.append(f"\n🔧 {len(new_tools)} tool(s) available from {len(connected_servers)} server(s)")

            # Inject a message at the END of the session history so the
            # model knows tools changed on its next turn.  Appended after
            # all existing messages to preserve prompt-cache for the prefix.
            change_parts = []
            if added:
                change_parts.append(f"Added servers: {', '.join(sorted(added))}")
            if removed:
                change_parts.append(f"Removed servers: {', '.join(sorted(removed))}")
            if reconnected:
                change_parts.append(f"Reconnected servers: {', '.join(sorted(reconnected))}")
            tool_summary = f"{len(new_tools)} MCP tool(s) now available" if new_tools else "No MCP tools available"
            change_detail = ". ".join(change_parts) + ". " if change_parts else ""
            reload_msg = {
                "role": "user",
                "content": f"[SYSTEM: MCP servers have been reloaded. {change_detail}{tool_summary}. The tool list for this conversation has been updated accordingly.]",
            }
            try:
                session_entry = self.session_store.get_or_create_session(event.source)
                self.session_store.append_to_transcript(
                    session_entry.session_id, reload_msg
                )
            except Exception:
                pass  # Best-effort; don't fail the reload over a transcript write

            return "\n".join(lines)

        except Exception as e:
            logger.warning("MCP reload failed: %s", e)
            return f"❌ MCP reload failed: {e}"

    # ------------------------------------------------------------------
    # /approve & /deny — explicit dangerous-command approval
    # ------------------------------------------------------------------

    _APPROVAL_TIMEOUT_SECONDS = 300  # 5 minutes

    async def _handle_approve_command(self, event: MessageEvent) -> Optional[str]:
        """Handle /approve command — unblock waiting agent thread(s).

        The agent thread(s) are blocked inside tools/approval.py waiting for
        the user to respond.  This handler signals the event so the agent
        resumes and the terminal_tool executes the command inline — the same
        flow as the CLI's synchronous input() approval.

        Supports multiple concurrent approvals (parallel subagents,
        execute_code).  ``/approve`` resolves the oldest pending command;
        ``/approve all`` resolves every pending command at once.

        Usage:
            /approve              — approve oldest pending command once
            /approve all          — approve ALL pending commands at once
            /approve session      — approve oldest + remember for session
            /approve all session  — approve all + remember for session
            /approve always       — approve oldest + remember permanently
            /approve all always   — approve all + remember permanently
        """
        source = event.source
        session_key = self._session_key_for_source(source)

        from tools.approval import (
            resolve_gateway_approval, has_blocking_approval,
        )

        if not has_blocking_approval(session_key):
            if session_key in self._pending_approvals:
                self._pending_approvals.pop(session_key)
                return "⚠️ Approval expired (agent is no longer waiting). Ask the agent to try again."
            return "No pending command to approve."

        # Parse args: support "all", "all session", "all always", "session", "always"
        args = event.get_command_args().strip().lower().split()
        resolve_all = "all" in args
        remaining = [a for a in args if a != "all"]

        if any(a in ("always", "permanent", "permanently") for a in remaining):
            choice = "always"
            scope_msg = " (pattern approved permanently)"
        elif any(a in ("session", "ses") for a in remaining):
            choice = "session"
            scope_msg = " (pattern approved for this session)"
        else:
            choice = "once"
            scope_msg = ""

        count = resolve_gateway_approval(session_key, choice, resolve_all=resolve_all)
        if not count:
            return "No pending command to approve."

        # Resume typing indicator — agent is about to continue processing.
        _adapter = self.adapters.get(source.platform)
        if _adapter:
            _adapter.resume_typing_for_chat(source.chat_id)

        count_msg = f" ({count} commands)" if count > 1 else ""
        logger.info("User approved %d dangerous command(s) via /approve%s", count, scope_msg)
        return f"✅ Command{'s' if count > 1 else ''} approved{scope_msg}{count_msg}. The agent is resuming..."

    async def _handle_deny_command(self, event: MessageEvent) -> str:
        """Handle /deny command — reject pending dangerous command(s).

        Signals blocked agent thread(s) with a 'deny' result so they receive
        a definitive BLOCKED message, same as the CLI deny flow.

        ``/deny`` denies the oldest; ``/deny all`` denies everything.
        """
        source = event.source
        session_key = self._session_key_for_source(source)

        from tools.approval import (
            resolve_gateway_approval, has_blocking_approval,
        )

        if not has_blocking_approval(session_key):
            if session_key in self._pending_approvals:
                self._pending_approvals.pop(session_key)
                return "❌ Command denied (approval was stale)."
            return "No pending command to deny."

        args = event.get_command_args().strip().lower()
        resolve_all = "all" in args

        count = resolve_gateway_approval(session_key, "deny", resolve_all=resolve_all)
        if not count:
            return "No pending command to deny."

        # Resume typing indicator — agent continues (with BLOCKED result).
        _adapter = self.adapters.get(source.platform)
        if _adapter:
            _adapter.resume_typing_for_chat(source.chat_id)

        count_msg = f" ({count} commands)" if count > 1 else ""
        logger.info("User denied %d dangerous command(s) via /deny", count)
        return f"❌ Command{'s' if count > 1 else ''} denied{count_msg}."

    # Platforms where /update is allowed.  ACP, API server, and webhooks are
    # programmatic interfaces that should not trigger system updates.
    _UPDATE_ALLOWED_PLATFORMS = frozenset({
        Platform.TELEGRAM, Platform.DISCORD, Platform.SLACK, Platform.WHATSAPP,
        Platform.SIGNAL, Platform.MATTERMOST, Platform.MATRIX,
        Platform.HOMEASSISTANT, Platform.EMAIL, Platform.SMS, Platform.DINGTALK,
        Platform.FEISHU, Platform.WECOM, Platform.WECOM_CALLBACK, Platform.WEIXIN, Platform.BLUEBUBBLES, Platform.QQBOT, Platform.LOCAL,
    })

    async def _handle_debug_command(self, event: MessageEvent) -> str:
        """Handle /debug — upload debug report (summary only) and return paste URLs.

        Gateway uploads ONLY the summary report (system info + log tails),
        NOT full log files, to protect conversation privacy.  Users who need
        full log uploads should use ``hermes debug share`` from the CLI.
        """
        import asyncio
        from hermes_cli.debug import (
            _capture_dump, collect_debug_report,
            upload_to_pastebin, _schedule_auto_delete,
            _GATEWAY_PRIVACY_NOTICE,
        )

        loop = asyncio.get_running_loop()

        # Run blocking I/O (dump capture, log reads, uploads) in a thread.
        def _collect_and_upload():
            dump_text = _capture_dump()
            report = collect_debug_report(log_lines=200, dump_text=dump_text)

            urls = {}
            try:
                urls["Report"] = upload_to_pastebin(report)
            except Exception as exc:
                return f"✗ Failed to upload debug report: {exc}"

            # Schedule auto-deletion after 6 hours
            _schedule_auto_delete(list(urls.values()))

            lines = [_GATEWAY_PRIVACY_NOTICE, "", "**Debug report uploaded:**", ""]
            label_width = max(len(k) for k in urls)
            for label, url in urls.items():
                lines.append(f"`{label:<{label_width}}`  {url}")

            lines.append("")
            lines.append("⏱ Pastes will auto-delete in 6 hours.")
            lines.append("For full log uploads, use `hermes debug share` from the CLI.")
            lines.append("Share these links with the Hermes team for support.")
            return "\n".join(lines)

        return await loop.run_in_executor(None, _collect_and_upload)

    async def _handle_update_command(self, event: MessageEvent) -> str:
        """Handle /update command — update Hermes Agent to the latest version.

        Spawns ``hermes update`` in a detached session (via ``setsid``) so it
        survives the gateway restart that ``hermes update`` may trigger. Marker
        files are written so either the current gateway process or the next one
        can notify the user when the update finishes.
        """
        import json
        import shutil
        import subprocess
        from datetime import datetime
        from hermes_cli.config import is_managed, format_managed_message

        # Block non-messaging platforms (API server, webhooks, ACP)
        platform = event.source.platform
        if platform not in self._UPDATE_ALLOWED_PLATFORMS:
            return "✗ /update is only available from messaging platforms. Run `hermes update` from the terminal."

        if is_managed():
            return f"✗ {format_managed_message('update Hermes Agent')}"

        project_root = Path(__file__).parent.parent.resolve()
        git_dir = project_root / '.git'

        if not git_dir.exists():
            return "✗ Not a git repository — cannot update."

        hermes_cmd = _resolve_hermes_bin()
        if not hermes_cmd:
            return (
                "✗ Could not locate the `hermes` command. "
                "Hermes is running, but the update command could not find the "
                "executable on PATH or via the current Python interpreter. "
                "Try running `hermes update` manually in your terminal."
            )

        pending_path = _hermes_home / ".update_pending.json"
        output_path = _hermes_home / ".update_output.txt"
        exit_code_path = _hermes_home / ".update_exit_code"
        session_key = self._session_key_for_source(event.source)
        pending = {
            "platform": event.source.platform.value,
            "chat_id": event.source.chat_id,
            "user_id": event.source.user_id,
            "session_key": session_key,
            "timestamp": datetime.now().isoformat(),
        }
        _tmp_pending = pending_path.with_suffix(".tmp")
        _tmp_pending.write_text(json.dumps(pending))
        _tmp_pending.replace(pending_path)
        exit_code_path.unlink(missing_ok=True)

        # Spawn `hermes update --gateway` detached so it survives gateway restart.
        # --gateway enables file-based IPC for interactive prompts (stash
        # restore, config migration) so the gateway can forward them to the
        # user instead of silently skipping them.
        # Use setsid for portable session detach (works under system services
        # where systemd-run --user fails due to missing D-Bus session).
        # PYTHONUNBUFFERED ensures output is flushed line-by-line so the
        # gateway can stream it to the messenger in near-real-time.
        hermes_cmd_str = " ".join(shlex.quote(part) for part in hermes_cmd)
        update_cmd = (
            f"PYTHONUNBUFFERED=1 {hermes_cmd_str} update --gateway"
            f" > {shlex.quote(str(output_path))} 2>&1; "
            f"status=$?; printf '%s' \"$status\" > {shlex.quote(str(exit_code_path))}"
        )
        try:
            setsid_bin = shutil.which("setsid")
            if setsid_bin:
                # Preferred: setsid creates a new session, fully detached
                subprocess.Popen(
                    [setsid_bin, "bash", "-c", update_cmd],
                    stdout=subprocess.DEVNULL,
                    stderr=subprocess.DEVNULL,
                    start_new_session=True,
                )
            else:
                # Fallback: start_new_session=True calls os.setsid() in child
                subprocess.Popen(
                    ["bash", "-c", update_cmd],
                    stdout=subprocess.DEVNULL,
                    stderr=subprocess.DEVNULL,
                    start_new_session=True,
                )
        except Exception as e:
            pending_path.unlink(missing_ok=True)
            exit_code_path.unlink(missing_ok=True)
            return f"✗ Failed to start update: {e}"

        self._schedule_update_notification_watch()
        return "⚕ Starting Hermes update… I'll stream progress here."

    def _schedule_update_notification_watch(self) -> None:
        """Ensure a background task is watching for update completion."""
        existing_task = getattr(self, "_update_notification_task", None)
        if existing_task and not existing_task.done():
            return

        try:
            self._update_notification_task = asyncio.create_task(
                self._watch_update_progress()
            )
        except RuntimeError:
            logger.debug("Skipping update notification watcher: no running event loop")

    async def _watch_update_progress(
        self,
        poll_interval: float = 2.0,
        stream_interval: float = 4.0,
        timeout: float = 1800.0,
    ) -> None:
        """Watch ``hermes update --gateway``, streaming output + forwarding prompts.

        Polls ``.update_output.txt`` for new content and sends chunks to the
        user periodically.  Detects ``.update_prompt.json`` (written by the
        update process when it needs user input) and forwards the prompt to
        the messenger.  The user's next message is intercepted by
        ``_handle_message`` and written to ``.update_response``.
        """
        import json
        import re as _re

        pending_path = _hermes_home / ".update_pending.json"
        claimed_path = _hermes_home / ".update_pending.claimed.json"
        output_path = _hermes_home / ".update_output.txt"
        exit_code_path = _hermes_home / ".update_exit_code"
        prompt_path = _hermes_home / ".update_prompt.json"

        loop = asyncio.get_running_loop()
        deadline = loop.time() + timeout

        # Resolve the adapter and chat_id for sending messages
        adapter = None
        chat_id = None
        session_key = None
        for path in (claimed_path, pending_path):
            if path.exists():
                try:
                    pending = json.loads(path.read_text())
                    platform_str = pending.get("platform")
                    chat_id = pending.get("chat_id")
                    session_key = pending.get("session_key")
                    if platform_str and chat_id:
                        platform = Platform(platform_str)
                        adapter = self.adapters.get(platform)
                        # Fallback session key if not stored (old pending files)
                        if not session_key:
                            session_key = f"{platform_str}:{chat_id}"
                    break
                except Exception:
                    pass

        if not adapter or not chat_id:
            logger.warning("Update watcher: cannot resolve adapter/chat_id, falling back to completion-only")
            # Fall back to old behavior: wait for exit code and send final notification
            while (pending_path.exists() or claimed_path.exists()) and loop.time() < deadline:
                if exit_code_path.exists():
                    await self._send_update_notification()
                    return
                await asyncio.sleep(poll_interval)
            if (pending_path.exists() or claimed_path.exists()) and not exit_code_path.exists():
                exit_code_path.write_text("124")
                await self._send_update_notification()
            return

        def _strip_ansi(text: str) -> str:
            return _re.sub(r'\x1b\[[0-9;]*[A-Za-z]', '', text)

        bytes_sent = 0
        last_stream_time = loop.time()
        buffer = ""

        async def _flush_buffer() -> None:
            """Send buffered output to the user."""
            nonlocal buffer, last_stream_time
            if not buffer.strip():
                buffer = ""
                return
            # Chunk to fit message limits (Telegram: 4096, others: generous)
            clean = _strip_ansi(buffer).strip()
            buffer = ""
            last_stream_time = loop.time()
            if not clean:
                return
            # Split into chunks if too long
            max_chunk = 3500
            chunks = [clean[i:i + max_chunk] for i in range(0, len(clean), max_chunk)]
            for chunk in chunks:
                try:
                    await adapter.send(chat_id, f"```\n{chunk}\n```")
                except Exception as e:
                    logger.debug("Update stream send failed: %s", e)

        while loop.time() < deadline:
            # Check for completion
            if exit_code_path.exists():
                # Read any remaining output
                if output_path.exists():
                    try:
                        content = output_path.read_text()
                        if len(content) > bytes_sent:
                            buffer += content[bytes_sent:]
                            bytes_sent = len(content)
                    except OSError:
                        pass
                await _flush_buffer()

                # Send final status
                try:
                    exit_code_raw = exit_code_path.read_text().strip() or "1"
                    exit_code = int(exit_code_raw)
                    if exit_code == 0:
                        await adapter.send(chat_id, "✅ Hermes update finished.")
                    else:
                        await adapter.send(chat_id, "❌ Hermes update failed (exit code {}).".format(exit_code))
                    logger.info("Update finished (exit=%s), notified %s", exit_code, session_key)
                except Exception as e:
                    logger.warning("Update final notification failed: %s", e)

                # Cleanup
                for p in (pending_path, claimed_path, output_path,
                          exit_code_path, prompt_path):
                    p.unlink(missing_ok=True)
                (_hermes_home / ".update_response").unlink(missing_ok=True)
                self._update_prompt_pending.pop(session_key, None)
                return

            # Check for new output
            if output_path.exists():
                try:
                    content = output_path.read_text()
                    if len(content) > bytes_sent:
                        buffer += content[bytes_sent:]
                        bytes_sent = len(content)
                except OSError:
                    pass

            # Flush buffer periodically
            if buffer.strip() and (loop.time() - last_stream_time) >= stream_interval:
                await _flush_buffer()

            # Check for prompts — only forward if we haven't already sent
            # one that's still awaiting a response.  Without this guard the
            # watcher would re-read the same .update_prompt.json every poll
            # cycle and spam the user with duplicate prompt messages.
            if (prompt_path.exists() and session_key
                    and not self._update_prompt_pending.get(session_key)):
                try:
                    prompt_data = json.loads(prompt_path.read_text())
                    prompt_text = prompt_data.get("prompt", "")
                    default = prompt_data.get("default", "")
                    if prompt_text:
                        # Flush any buffered output first so the user sees
                        # context before the prompt
                        await _flush_buffer()
                        # Try platform-native buttons first (Discord, Telegram)
                        sent_buttons = False
                        if getattr(type(adapter), "send_update_prompt", None) is not None:
                            try:
                                await adapter.send_update_prompt(
                                    chat_id=chat_id,
                                    prompt=prompt_text,
                                    default=default,
                                    session_key=session_key,
                                )
                                sent_buttons = True
                            except Exception as btn_err:
                                logger.debug("Button-based update prompt failed: %s", btn_err)
                        if not sent_buttons:
                            default_hint = f" (default: {default})" if default else ""
                            await adapter.send(
                                chat_id,
                                f"⚕ **Update needs your input:**\n\n"
                                f"{prompt_text}{default_hint}\n\n"
                                f"Reply `/approve` (yes) or `/deny` (no), "
                                f"or type your answer directly."
                            )
                        self._update_prompt_pending[session_key] = True
                        # Remove the prompt file so it isn't re-read on the
                        # next poll cycle.  The update process only needs
                        # .update_response to continue — it doesn't re-check
                        # .update_prompt.json while waiting.
                        prompt_path.unlink(missing_ok=True)
                        logger.info("Forwarded update prompt to %s: %s", session_key, prompt_text[:80])
                except (json.JSONDecodeError, OSError) as e:
                    logger.debug("Failed to read update prompt: %s", e)

            await asyncio.sleep(poll_interval)

        # Timeout
        if not exit_code_path.exists():
            logger.warning("Update watcher timed out after %.0fs", timeout)
            exit_code_path.write_text("124")
            await _flush_buffer()
            try:
                await adapter.send(chat_id, "❌ Hermes update timed out after 30 minutes.")
            except Exception:
                pass
            for p in (pending_path, claimed_path, output_path,
                      exit_code_path, prompt_path):
                p.unlink(missing_ok=True)
            (_hermes_home / ".update_response").unlink(missing_ok=True)
            self._update_prompt_pending.pop(session_key, None)

    async def _send_update_notification(self) -> bool:
        """If an update finished, notify the user.

        Returns False when the update is still running so a caller can retry
        later. Returns True after a definitive send/skip decision.

        This is the legacy notification path used when the streaming watcher
        cannot resolve the adapter (e.g. after a gateway restart where the
        platform hasn't reconnected yet).
        """
        import json
        import re as _re

        pending_path = _hermes_home / ".update_pending.json"
        claimed_path = _hermes_home / ".update_pending.claimed.json"
        output_path = _hermes_home / ".update_output.txt"
        exit_code_path = _hermes_home / ".update_exit_code"

        if not pending_path.exists() and not claimed_path.exists():
            return False

        cleanup = True
        active_pending_path = claimed_path
        try:
            if pending_path.exists():
                try:
                    pending_path.replace(claimed_path)
                except FileNotFoundError:
                    if not claimed_path.exists():
                        return True
            elif not claimed_path.exists():
                return True

            pending = json.loads(claimed_path.read_text())
            platform_str = pending.get("platform")
            chat_id = pending.get("chat_id")

            if not exit_code_path.exists():
                logger.info("Update notification deferred: update still running")
                cleanup = False
                active_pending_path = pending_path
                claimed_path.replace(pending_path)
                return False

            exit_code_raw = exit_code_path.read_text().strip() or "1"
            exit_code = int(exit_code_raw)

            # Read the captured update output
            output = ""
            if output_path.exists():
                output = output_path.read_text()

            # Resolve adapter
            platform = Platform(platform_str)
            adapter = self.adapters.get(platform)

            if adapter and chat_id:
                # Strip ANSI escape codes for clean display
                output = _re.sub(r'\x1b\[[0-9;]*m', '', output).strip()
                if output:
                    if len(output) > 3500:
                        output = "…" + output[-3500:]
                    if exit_code == 0:
                        msg = f"✅ Hermes update finished.\n\n```\n{output}\n```"
                    else:
                        msg = f"❌ Hermes update failed.\n\n```\n{output}\n```"
                else:
                    if exit_code == 0:
                        msg = "✅ Hermes update finished successfully."
                    else:
                        msg = "❌ Hermes update failed. Check the gateway logs or run `hermes update` manually for details."
                await adapter.send(chat_id, msg)
                logger.info(
                    "Sent post-update notification to %s:%s (exit=%s)",
                    platform_str,
                    chat_id,
                    exit_code,
                )
        except Exception as e:
            logger.warning("Post-update notification failed: %s", e)
        finally:
            if cleanup:
                active_pending_path.unlink(missing_ok=True)
                claimed_path.unlink(missing_ok=True)
                output_path.unlink(missing_ok=True)
                exit_code_path.unlink(missing_ok=True)

        return True

    async def _send_restart_notification(self) -> None:
        """Notify the chat that initiated /restart that the gateway is back."""
        import json as _json

        notify_path = _hermes_home / ".restart_notify.json"
        if not notify_path.exists():
            return

        try:
            data = _json.loads(notify_path.read_text())
            platform_str = data.get("platform")
            chat_id = data.get("chat_id")
            thread_id = data.get("thread_id")

            if not platform_str or not chat_id:
                return

            platform = Platform(platform_str)
            adapter = self.adapters.get(platform)
            if not adapter:
                logger.debug(
                    "Restart notification skipped: %s adapter not connected",
                    platform_str,
                )
                return

            metadata = {"thread_id": thread_id} if thread_id else None
            await adapter.send(
                chat_id,
                "♻ Gateway restarted successfully. Your session continues.",
                metadata=metadata,
            )
            logger.info(
                "Sent restart notification to %s:%s",
                platform_str,
                chat_id,
            )
        except Exception as e:
            logger.warning("Restart notification failed: %s", e)
        finally:
            notify_path.unlink(missing_ok=True)

    def _set_session_env(self, context: SessionContext) -> list:
        """Set session context variables for the current async task.

        Uses ``contextvars`` instead of ``os.environ`` so that concurrent
        gateway messages cannot overwrite each other's session state.

        Returns a list of reset tokens; pass them to ``_clear_session_env``
        in a ``finally`` block.
        """
        from gateway.session_context import set_session_vars
        return set_session_vars(
            platform=context.source.platform.value,
            chat_id=context.source.chat_id,
            chat_name=context.source.chat_name or "",
            thread_id=str(context.source.thread_id) if context.source.thread_id else "",
            user_id=str(context.source.user_id) if context.source.user_id else "",
            user_name=str(context.source.user_name) if context.source.user_name else "",
            session_key=context.session_key,
        )

    def _clear_session_env(self, tokens: list) -> None:
        """Restore session context variables to their pre-handler values."""
        from gateway.session_context import clear_session_vars
        clear_session_vars(tokens)

    async def _run_in_executor_with_context(self, func, *args):
        """Run blocking work in the thread pool while preserving session contextvars."""
        loop = asyncio.get_running_loop()
        ctx = copy_context()
        return await loop.run_in_executor(None, ctx.run, func, *args)

    async def _enrich_message_with_vision(
        self,
        user_text: str,
        image_paths: List[str],
    ) -> str:
        """
        Auto-analyze user-attached images with the vision tool and prepend
        the descriptions to the message text.

        Each image is analyzed with a general-purpose prompt.  The resulting
        description *and* the local cache path are injected so the model can:
          1. Immediately understand what the user sent (no extra tool call).
          2. Re-examine the image with vision_analyze if it needs more detail.

        Args:
            user_text:   The user's original caption / message text.
            image_paths: List of local file paths to cached images.

        Returns:
            The enriched message string with vision descriptions prepended.
        """
        from tools.vision_tools import vision_analyze_tool
        import json as _json

        analysis_prompt = (
            "Describe everything visible in this image in thorough detail. "
            "Include any text, code, data, objects, people, layout, colors, "
            "and any other notable visual information."
        )

        enriched_parts = []
        for path in image_paths:
            try:
                logger.debug("Auto-analyzing user image: %s", path)
                result_json = await vision_analyze_tool(
                    image_url=path,
                    user_prompt=analysis_prompt,
                )
                result = _json.loads(result_json)
                if result.get("success"):
                    description = result.get("analysis", "")
                    enriched_parts.append(
                        f"[The user sent an image~ Here's what I can see:\n{description}]\n"
                        f"[If you need a closer look, use vision_analyze with "
                        f"image_url: {path} ~]"
                    )
                else:
                    enriched_parts.append(
                        "[The user sent an image but I couldn't quite see it "
                        "this time (>_<) You can try looking at it yourself "
                        f"with vision_analyze using image_url: {path}]"
                    )
            except Exception as e:
                logger.error("Vision auto-analysis error: %s", e)
                enriched_parts.append(
                    f"[The user sent an image but something went wrong when I "
                    f"tried to look at it~ You can try examining it yourself "
                    f"with vision_analyze using image_url: {path}]"
                )

        # Combine: vision descriptions first, then the user's original text
        if enriched_parts:
            prefix = "\n\n".join(enriched_parts)
            if user_text:
                return f"{prefix}\n\n{user_text}"
            return prefix
        return user_text

    async def _enrich_message_with_transcription(
        self,
        user_text: str,
        audio_paths: List[str],
    ) -> str:
        """
        Auto-transcribe user voice/audio messages using the configured STT provider
        and prepend the transcript to the message text.

        Args:
            user_text:   The user's original caption / message text.
            audio_paths: List of local file paths to cached audio files.

        Returns:
            The enriched message string with transcriptions prepended.
        """
        if not getattr(self.config, "stt_enabled", True):
            disabled_note = "[The user sent voice message(s), but transcription is disabled in config."
            if self._has_setup_skill():
                disabled_note += (
                    " You have a skill called hermes-agent-setup that can help "
                    "users configure Hermes features including voice, tools, and more."
                )
            disabled_note += "]"
            if user_text:
                return f"{disabled_note}\n\n{user_text}"
            return disabled_note

        from tools.transcription_tools import transcribe_audio
        import asyncio

        enriched_parts = []
        for path in audio_paths:
            try:
                logger.debug("Transcribing user voice: %s", path)
                result = await asyncio.to_thread(transcribe_audio, path)
                if result["success"]:
                    transcript = result["transcript"]
                    enriched_parts.append(
                        f'[The user sent a voice message~ '
                        f'Here\'s what they said: "{transcript}"]'
                    )
                else:
                    error = result.get("error", "unknown error")
                    if (
                        "No STT provider" in error
                        or error.startswith("Neither VOICE_TOOLS_OPENAI_KEY nor OPENAI_API_KEY is set")
                    ):
                        _no_stt_note = (
                            "[The user sent a voice message but I can't listen "
                            "to it right now — no STT provider is configured. "
                            "A direct message has already been sent to the user "
                            "with setup instructions."
                        )
                        if self._has_setup_skill():
                            _no_stt_note += (
                                " You have a skill called hermes-agent-setup "
                                "that can help users configure Hermes features "
                                "including voice, tools, and more."
                            )
                        _no_stt_note += "]"
                        enriched_parts.append(_no_stt_note)
                    else:
                        enriched_parts.append(
                            "[The user sent a voice message but I had trouble "
                            f"transcribing it~ ({error})]"
                        )
            except Exception as e:
                logger.error("Transcription error: %s", e)
                enriched_parts.append(
                    "[The user sent a voice message but something went wrong "
                    "when I tried to listen to it~ Let them know!]"
                )

        if enriched_parts:
            prefix = "\n\n".join(enriched_parts)
            # Strip the empty-content placeholder from the Discord adapter
            # when we successfully transcribed the audio — it's redundant.
            _placeholder = "(The user sent a message with no text content)"
            if user_text and user_text.strip() == _placeholder:
                return prefix
            if user_text:
                return f"{prefix}\n\n{user_text}"
            return prefix
        return user_text

    def _build_process_event_source(self, evt: dict):
        """Resolve the canonical source for a synthetic background-process event.

        Prefer the persisted session-store origin for the event's session key.
        Falling back to the currently active foreground event is what causes
        cross-topic bleed, so don't do that.
        """
        from gateway.session import SessionSource

        session_key = str(evt.get("session_key") or "").strip()
        derived_platform = ""
        derived_chat_type = ""
        derived_chat_id = ""

        if session_key:
            try:
                self.session_store._ensure_loaded()
                entry = self.session_store._entries.get(session_key)
                if entry and getattr(entry, "origin", None):
                    return entry.origin
            except Exception as exc:
                logger.debug(
                    "Synthetic process-event session-store lookup failed for %s: %s",
                    session_key,
                    exc,
                )

            _parsed = _parse_session_key(session_key)
            if _parsed:
                derived_platform = _parsed["platform"]
                derived_chat_type = _parsed["chat_type"]
                derived_chat_id = _parsed["chat_id"]

        platform_name = str(evt.get("platform") or derived_platform or "").strip().lower()
        chat_type = str(evt.get("chat_type") or derived_chat_type or "").strip().lower()
        chat_id = str(evt.get("chat_id") or derived_chat_id or "").strip()
        if not platform_name or not chat_type or not chat_id:
            return None

        try:
            platform = Platform(platform_name)
        except Exception:
            logger.warning(
                "Synthetic process event has invalid platform metadata: %r",
                platform_name,
            )
            return None

        return SessionSource(
            platform=platform,
            chat_id=chat_id,
            chat_type=chat_type,
            thread_id=str(evt.get("thread_id") or "").strip() or None,
            user_id=str(evt.get("user_id") or "").strip() or None,
            user_name=str(evt.get("user_name") or "").strip() or None,
        )

    async def _inject_watch_notification(self, synth_text: str, evt: dict) -> None:
        """Inject a watch-pattern notification as a synthetic message event.

        Routing must come from the queued watch event itself, not from whatever
        foreground message happened to be active when the queue was drained.
        """
        source = self._build_process_event_source(evt)
        if not source:
            logger.warning(
                "Dropping watch notification with no routing metadata for process %s",
                evt.get("session_id", "unknown"),
            )
            return
        platform_name = source.platform.value if hasattr(source.platform, "value") else str(source.platform)
        adapter = None
        for p, a in self.adapters.items():
            if p.value == platform_name:
                adapter = a
                break
        if not adapter:
            return
        try:
            from gateway.platforms.base import MessageEvent, MessageType
            synth_event = MessageEvent(
                text=synth_text,
                message_type=MessageType.TEXT,
                source=source,
                internal=True,
            )
            logger.info(
                "Watch pattern notification — injecting for %s chat=%s thread=%s",
                platform_name,
                source.chat_id,
                source.thread_id,
            )
            await adapter.handle_message(synth_event)
        except Exception as e:
            logger.error("Watch notification injection error: %s", e)

    async def _run_process_watcher(self, watcher: dict) -> None:
        """
        Periodically check a background process and push updates to the user.

        Runs as an asyncio task. Stays silent when nothing changed.
        Auto-removes when the process exits or is killed.

        Notification mode (from ``display.background_process_notifications``):
          - ``all``    — running-output updates + final message
          - ``result`` — final completion message only
          - ``error``  — final message only when exit code != 0
          - ``off``    — no messages at all
        """
        from tools.process_registry import process_registry

        session_id = watcher["session_id"]
        interval = watcher["check_interval"]
        session_key = watcher.get("session_key", "")
        platform_name = watcher.get("platform", "")
        chat_id = watcher.get("chat_id", "")
        thread_id = watcher.get("thread_id", "")
        user_id = watcher.get("user_id", "")
        user_name = watcher.get("user_name", "")
        agent_notify = watcher.get("notify_on_complete", False)
        notify_mode = self._load_background_notifications_mode()

        logger.debug("Process watcher started: %s (every %ss, notify=%s, agent_notify=%s)",
                      session_id, interval, notify_mode, agent_notify)

        if notify_mode == "off" and not agent_notify:
            # Still wait for the process to exit so we can log it, but don't
            # push any messages to the user.
            while True:
                await asyncio.sleep(interval)
                session = process_registry.get(session_id)
                if session is None or session.exited:
                    break
            logger.debug("Process watcher ended (silent): %s", session_id)
            return

        last_output_len = 0
        while True:
            await asyncio.sleep(interval)

            session = process_registry.get(session_id)
            if session is None:
                break

            current_output_len = len(session.output_buffer)
            has_new_output = current_output_len > last_output_len
            last_output_len = current_output_len

            if session.exited:
                # --- Agent-triggered completion: inject synthetic message ---
                # Skip if the agent already consumed the result via wait/poll/log
                from tools.process_registry import process_registry as _pr_check
                if agent_notify and not _pr_check.is_completion_consumed(session_id):
                    from tools.ansi_strip import strip_ansi
                    _out = strip_ansi(session.output_buffer[-2000:]) if session.output_buffer else ""
                    synth_text = (
                        f"[SYSTEM: Background process {session_id} completed "
                        f"(exit code {session.exit_code}).\n"
                        f"Command: {session.command}\n"
                        f"Output:\n{_out}]"
                    )
                    source = self._build_process_event_source({
                        "session_id": session_id,
                        "session_key": session_key,
                        "platform": platform_name,
                        "chat_id": chat_id,
                        "thread_id": thread_id,
                        "user_id": user_id,
                        "user_name": user_name,
                    })
                    if not source:
                        logger.warning(
                            "Dropping completion notification with no routing metadata for process %s",
                            session_id,
                        )
                        break

                    adapter = None
                    for p, a in self.adapters.items():
                        if p == source.platform:
                            adapter = a
                            break
                    if adapter and source.chat_id:
                        try:
                            from gateway.platforms.base import MessageEvent, MessageType
                            synth_event = MessageEvent(
                                text=synth_text,
                                message_type=MessageType.TEXT,
                                source=source,
                                internal=True,
                            )
                            logger.info(
                                "Process %s finished — injecting agent notification for session %s chat=%s thread=%s",
                                session_id,
                                session_key,
                                source.chat_id,
                                source.thread_id,
                            )
                            await adapter.handle_message(synth_event)
                        except Exception as e:
                            logger.error("Agent notify injection error: %s", e)
                    break

                # --- Normal text-only notification ---
                # Decide whether to notify based on mode
                should_notify = (
                    notify_mode in ("all", "result")
                    or (notify_mode == "error" and session.exit_code not in (0, None))
                )
                if should_notify:
                    new_output = session.output_buffer[-1000:] if session.output_buffer else ""
                    message_text = (
                        f"[Background process {session_id} finished with exit code {session.exit_code}~ "
                        f"Here's the final output:\n{new_output}]"
                    )
                    adapter = None
                    for p, a in self.adapters.items():
                        if p.value == platform_name:
                            adapter = a
                            break
                    if adapter and chat_id:
                        try:
                            send_meta = {"thread_id": thread_id} if thread_id else None
                            await adapter.send(chat_id, message_text, metadata=send_meta)
                        except Exception as e:
                            logger.error("Watcher delivery error: %s", e)
                break

            elif has_new_output and notify_mode == "all" and not agent_notify:
                # New output available -- deliver status update (only in "all" mode)
                # Skip periodic updates for agent_notify watchers (they only care about completion)
                new_output = session.output_buffer[-500:] if session.output_buffer else ""
                message_text = (
                    f"[Background process {session_id} is still running~ "
                    f"New output:\n{new_output}]"
                )
                adapter = None
                for p, a in self.adapters.items():
                    if p.value == platform_name:
                        adapter = a
                        break
                if adapter and chat_id:
                    try:
                        send_meta = {"thread_id": thread_id} if thread_id else None
                        await adapter.send(chat_id, message_text, metadata=send_meta)
                    except Exception as e:
                        logger.error("Watcher delivery error: %s", e)

        logger.debug("Process watcher ended: %s", session_id)

    _MAX_INTERRUPT_DEPTH = 3  # Cap recursive interrupt handling (#816)

    @staticmethod
    def _agent_config_signature(
        model: str,
        runtime: dict,
        enabled_toolsets: list,
        ephemeral_prompt: str,
    ) -> str:
        """Compute a stable string key from agent config values.

        When this signature changes between messages, the cached AIAgent is
        discarded and rebuilt.  When it stays the same, the cached agent is
        reused — preserving the frozen system prompt and tool schemas for
        prompt cache hits.
        """
        import hashlib, json as _j

        # Fingerprint the FULL credential string instead of using a short
        # prefix. OAuth/JWT-style tokens frequently share a common prefix
        # (e.g. "eyJhbGci"), which can cause false cache hits across auth
        # switches if only the first few characters are considered.
        _api_key = str(runtime.get("api_key", "") or "")
        _api_key_fingerprint = hashlib.sha256(_api_key.encode()).hexdigest() if _api_key else ""

        blob = _j.dumps(
            [
                model,
                _api_key_fingerprint,
                runtime.get("base_url", ""),
                runtime.get("provider", ""),
                runtime.get("api_mode", ""),
                sorted(enabled_toolsets) if enabled_toolsets else [],
                # reasoning_config excluded — it's set per-message on the
                # cached agent and doesn't affect system prompt or tools.
                ephemeral_prompt or "",
            ],
            sort_keys=True,
            default=str,
        )
        return hashlib.sha256(blob.encode()).hexdigest()[:16]

    def _apply_session_model_override(
        self, session_key: str, model: str, runtime_kwargs: dict
    ) -> tuple:
        """Apply /model session overrides if present, returning (model, runtime_kwargs).

        The gateway /model command stores per-session overrides in
        ``_session_model_overrides``.  These must take precedence over
        config.yaml defaults so the switched model is actually used for
        subsequent messages.  Fields with ``None`` values are skipped so
        partial overrides don't clobber valid config defaults.
        """
        override = self._session_model_overrides.get(session_key)
        if not override:
            return model, runtime_kwargs
        model = override.get("model", model)
        for key in ("provider", "api_key", "base_url", "api_mode"):
            val = override.get(key)
            if val is not None:
                runtime_kwargs[key] = val
        return model, runtime_kwargs

    def _is_intentional_model_switch(self, session_key: str, agent_model: str) -> bool:
        """Return True if *agent_model* matches an active /model session override."""
        override = self._session_model_overrides.get(session_key)
        return override is not None and override.get("model") == agent_model

    def _evict_cached_agent(self, session_key: str) -> None:
        """Remove a cached agent for a session (called on /new, /model, etc)."""
        _lock = getattr(self, "_agent_cache_lock", None)
        if _lock:
            with _lock:
                self._agent_cache.pop(session_key, None)

    # ------------------------------------------------------------------
    # Proxy mode: forward messages to a remote Hermes API server
    # ------------------------------------------------------------------

    def _get_proxy_url(self) -> Optional[str]:
        """Return the proxy URL if proxy mode is configured, else None.

        Checks GATEWAY_PROXY_URL env var first (convenient for Docker),
        then ``gateway.proxy_url`` in config.yaml.
        """
        url = os.getenv("GATEWAY_PROXY_URL", "").strip()
        if url:
            return url.rstrip("/")
        cfg = _load_gateway_config()
        url = (cfg.get("gateway") or {}).get("proxy_url", "").strip()
        if url:
            return url.rstrip("/")
        return None

    async def _run_agent_via_proxy(
        self,
        message: str,
        context_prompt: str,
        history: List[Dict[str, Any]],
        source: "SessionSource",
        session_id: str,
        session_key: str = None,
        event_message_id: Optional[str] = None,
    ) -> Dict[str, Any]:
        """Forward the message to a remote Hermes API server instead of
        running a local AIAgent.

        When ``GATEWAY_PROXY_URL`` (or ``gateway.proxy_url`` in config.yaml)
        is set, the gateway becomes a thin relay: it handles platform I/O
        (encryption, threading, media) and delegates all agent work to the
        remote server via ``POST /v1/chat/completions`` with SSE streaming.

        This lets a Docker container handle Matrix E2EE while the actual
        agent runs on the host with full access to local files, memory,
        skills, and a unified session store.
        """
        try:
            from aiohttp import ClientSession as _AioClientSession, ClientTimeout
        except ImportError:
            return {
                "final_response": "⚠️ Proxy mode requires aiohttp. Install with: pip install aiohttp",
                "messages": [],
                "api_calls": 0,
                "tools": [],
            }

        proxy_url = self._get_proxy_url()
        if not proxy_url:
            return {
                "final_response": "⚠️ Proxy URL not configured (GATEWAY_PROXY_URL or gateway.proxy_url)",
                "messages": [],
                "api_calls": 0,
                "tools": [],
            }

        proxy_key = os.getenv("GATEWAY_PROXY_KEY", "").strip()

        # Build messages in OpenAI chat format --------------------------
        #
        # The remote api_server can maintain session continuity via
        # X-Hermes-Session-Id, so it loads its own history.  We only
        # need to send the current user message.  If the remote has
        # no history for this session yet, include what we have locally
        # so the first exchange has context.
        #
        # We always include the current message.  For history, send a
        # compact version (text-only user/assistant turns) — the remote
        # handles tool replay and system prompts.
        api_messages: List[Dict[str, str]] = []

        if context_prompt:
            api_messages.append({"role": "system", "content": context_prompt})

        for msg in history:
            role = msg.get("role")
            content = msg.get("content")
            if role in ("user", "assistant") and content:
                api_messages.append({"role": role, "content": content})

        api_messages.append({"role": "user", "content": message})

        # HTTP headers ---------------------------------------------------
        headers: Dict[str, str] = {"Content-Type": "application/json"}
        if proxy_key:
            headers["Authorization"] = f"Bearer {proxy_key}"
        if session_id:
            headers["X-Hermes-Session-Id"] = session_id

        body = {
            "model": "hermes-agent",
            "messages": api_messages,
            "stream": True,
        }

        # Set up platform streaming if available -------------------------
        _stream_consumer = None
        _scfg = getattr(getattr(self, "config", None), "streaming", None)
        if _scfg is None:
            from gateway.config import StreamingConfig
            _scfg = StreamingConfig()

        platform_key = _platform_config_key(source.platform)
        user_config = _load_gateway_config()
        from gateway.display_config import resolve_display_setting
        _plat_streaming = resolve_display_setting(
            user_config, platform_key, "streaming"
        )
        _streaming_enabled = (
            _scfg.enabled and _scfg.transport != "off"
            if _plat_streaming is None
            else bool(_plat_streaming)
        )

        if source.thread_id:
            _thread_metadata: Optional[Dict[str, Any]] = {"thread_id": source.thread_id}
        else:
            _thread_metadata = None

        if _streaming_enabled:
            try:
                from gateway.stream_consumer import GatewayStreamConsumer, StreamConsumerConfig
                from gateway.config import Platform
                _adapter = self.adapters.get(source.platform)
                if _adapter:
                    _adapter_supports_edit = getattr(_adapter, "SUPPORTS_MESSAGE_EDITING", True)
                    _effective_cursor = _scfg.cursor if _adapter_supports_edit else ""
                    _buffer_only = False
                    if source.platform == Platform.MATRIX:
                        _effective_cursor = ""
                        _buffer_only = True
                    _consumer_cfg = StreamConsumerConfig(
                        edit_interval=_scfg.edit_interval,
                        buffer_threshold=_scfg.buffer_threshold,
                        cursor=_effective_cursor,
                        buffer_only=_buffer_only,
                    )
                    _stream_consumer = GatewayStreamConsumer(
                        adapter=_adapter,
                        chat_id=source.chat_id,
                        config=_consumer_cfg,
                        metadata=_thread_metadata,
                    )
            except Exception as _sc_err:
                logger.debug("Proxy: could not set up stream consumer: %s", _sc_err)

        # Run the stream consumer task in the background
        stream_task = None
        if _stream_consumer:
            stream_task = asyncio.create_task(_stream_consumer.run())

        # Send typing indicator
        _adapter = self.adapters.get(source.platform)
        if _adapter:
            try:
                await _adapter.send_typing(source.chat_id, metadata=_thread_metadata)
            except Exception:
                pass

        # Make the HTTP request with SSE streaming -----------------------
        full_response = ""
        _start = time.time()

        try:
            _timeout = ClientTimeout(total=0, sock_read=1800)
            async with _AioClientSession(timeout=_timeout) as session:
                async with session.post(
                    f"{proxy_url}/v1/chat/completions",
                    json=body,
                    headers=headers,
                ) as resp:
                    if resp.status != 200:
                        error_text = await resp.text()
                        logger.warning(
                            "Proxy error (%d) from %s: %s",
                            resp.status, proxy_url, error_text[:500],
                        )
                        return {
                            "final_response": f"⚠️ Proxy error ({resp.status}): {error_text[:300]}",
                            "messages": [],
                            "api_calls": 0,
                            "tools": [],
                        }

                    # Parse SSE stream
                    buffer = ""
                    async for chunk in resp.content.iter_any():
                        text = chunk.decode("utf-8", errors="replace")
                        buffer += text

                        # Process complete SSE lines
                        while "\n" in buffer:
                            line, buffer = buffer.split("\n", 1)
                            line = line.strip()
                            if not line:
                                continue
                            if line.startswith("data: "):
                                data = line[6:]
                                if data.strip() == "[DONE]":
                                    break
                                try:
                                    obj = json.loads(data)
                                    choices = obj.get("choices", [])
                                    if choices:
                                        delta = choices[0].get("delta", {})
                                        content = delta.get("content", "")
                                        if content:
                                            full_response += content
                                            if _stream_consumer:
                                                _stream_consumer.on_delta(content)
                                except json.JSONDecodeError:
                                    pass

        except asyncio.CancelledError:
            raise
        except Exception as e:
            logger.error("Proxy connection error to %s: %s", proxy_url, e)
            if not full_response:
                return {
                    "final_response": f"⚠️ Proxy connection error: {e}",
                    "messages": [],
                    "api_calls": 0,
                    "tools": [],
                }
            # Partial response — return what we got
        finally:
            # Finalize stream consumer
            if _stream_consumer:
                _stream_consumer.finish()
            if stream_task:
                try:
                    await asyncio.wait_for(stream_task, timeout=5.0)
                except (asyncio.TimeoutError, asyncio.CancelledError):
                    stream_task.cancel()

        _elapsed = time.time() - _start
        logger.info(
            "proxy response: url=%s session=%s time=%.1fs response=%d chars",
            proxy_url, (session_id or "")[:20], _elapsed, len(full_response),
        )

        return {
            "final_response": full_response or "(No response from remote agent)",
            "messages": [
                {"role": "user", "content": message},
                {"role": "assistant", "content": full_response},
            ],
            "api_calls": 1,
            "tools": [],
            "history_offset": len(history),
            "session_id": session_id,
            "response_previewed": _stream_consumer is not None and bool(full_response),
        }

    # ------------------------------------------------------------------

    async def _run_agent(
        self,
        message: str,
        context_prompt: str,
        history: List[Dict[str, Any]],
        source: SessionSource,
        session_id: str,
        session_key: str = None,
        _interrupt_depth: int = 0,
        event_message_id: Optional[str] = None,
        channel_prompt: Optional[str] = None,
    ) -> Dict[str, Any]:
        """
        Run the agent with the given message and context.
        
        Returns the full result dict from run_conversation, including:
          - "final_response": str (the text to send back)
          - "messages": list (full conversation including tool calls)
          - "api_calls": int
          - "completed": bool
        
        This is run in a thread pool to not block the event loop.
        Supports interruption via new messages.
        """
        # ---- Proxy mode: delegate to remote API server ----
        if self._get_proxy_url():
            return await self._run_agent_via_proxy(
                message=message,
                context_prompt=context_prompt,
                history=history,
                source=source,
                session_id=session_id,
                session_key=session_key,
                event_message_id=event_message_id,
            )

        from run_agent import AIAgent
        import queue
        
        user_config = _load_gateway_config()
        platform_key = _platform_config_key(source.platform)

        from hermes_cli.tools_config import _get_platform_tools
        enabled_toolsets = sorted(_get_platform_tools(user_config, platform_key))

        display_config = user_config.get("display", {})
        if not isinstance(display_config, dict):
            display_config = {}

        # Per-platform display settings — resolve via display_config module
        # which checks display.platforms.<platform>.<key> first, then
        # display.<key> global, then built-in platform defaults.
        from gateway.display_config import resolve_display_setting

        # Apply tool preview length config (0 = no limit)
        try:
            from agent.display import set_tool_preview_max_len
            _tpl = resolve_display_setting(user_config, platform_key, "tool_preview_length", 0)
            set_tool_preview_max_len(int(_tpl) if _tpl else 0)
        except Exception:
            pass

        # Tool progress mode — resolved per-platform with env var fallback
        _resolved_tp = resolve_display_setting(user_config, platform_key, "tool_progress")
        progress_mode = (
            _resolved_tp
            or os.getenv("HERMES_TOOL_PROGRESS_MODE")
            or "all"
        )
        # Disable tool progress for webhooks - they don't support message editing,
        # so each progress line would be sent as a separate message.
        from gateway.config import Platform
        tool_progress_enabled = progress_mode != "off" and source.platform != Platform.WEBHOOK
        # Natural assistant status messages are intentionally independent from
        # tool progress and token streaming. Users can keep tool_progress quiet
        # in chat platforms while opting into concise mid-turn updates.
        interim_assistant_messages_enabled = (
            source.platform != Platform.WEBHOOK
            and is_truthy_value(
                display_config.get("interim_assistant_messages"),
                default=True,
            )
        )
        
        # Queue for progress messages (thread-safe)
        progress_queue = queue.Queue() if tool_progress_enabled else None
        last_tool = [None]  # Mutable container for tracking in closure
        last_progress_msg = [None]  # Track last message for dedup
        repeat_count = [0]  # How many times the same message repeated
        
        def progress_callback(event_type: str, tool_name: str = None, preview: str = None, args: dict = None, **kwargs):
            """Callback invoked by agent on tool lifecycle events."""
            if not progress_queue:
                return

            # Only act on tool.started events (ignore tool.completed, reasoning.available, etc.)
            if event_type not in ("tool.started",):
                return

            # "new" mode: only report when tool changes
            if progress_mode == "new" and tool_name == last_tool[0]:
                return
            last_tool[0] = tool_name
            
            # Build progress message with primary argument preview
            from agent.display import get_tool_emoji
            emoji = get_tool_emoji(tool_name, default="⚙️")
            
            # Verbose mode: show detailed arguments, respects tool_preview_length
            if progress_mode == "verbose":
                if args:
                    from agent.display import get_tool_preview_max_len
                    _pl = get_tool_preview_max_len()
                    import json as _json
                    args_str = _json.dumps(args, ensure_ascii=False, default=str)
                    # When tool_preview_length is 0 (default), don't truncate
                    # in verbose mode — the user explicitly asked for full
                    # detail.  Platform message-length limits handle the rest.
                    if _pl > 0 and len(args_str) > _pl:
                        args_str = args_str[:_pl - 3] + "..."
                    msg = f"{emoji} {tool_name}({list(args.keys())})\n{args_str}"
                elif preview:
                    msg = f"{emoji} {tool_name}: \"{preview}\""
                else:
                    msg = f"{emoji} {tool_name}..."
                progress_queue.put(msg)
                return
            
            # "all" / "new" modes: short preview, respects tool_preview_length
            # config (defaults to 40 chars when unset to keep gateway messages
            # compact — unlike CLI spinners, these persist as permanent messages).
            if preview:
                from agent.display import get_tool_preview_max_len
                _pl = get_tool_preview_max_len()
                _cap = _pl if _pl > 0 else 40
                if len(preview) > _cap:
                    preview = preview[:_cap - 3] + "..."
                msg = f"{emoji} {tool_name}: \"{preview}\""
            else:
                msg = f"{emoji} {tool_name}..."
            
            # Dedup: collapse consecutive identical progress messages.
            # Common with execute_code where models iterate with the same
            # code (same boilerplate imports → identical previews).
            if msg == last_progress_msg[0]:
                repeat_count[0] += 1
                # Update the last line in progress_lines with a counter
                # via a special "dedup" queue message.
                progress_queue.put(("__dedup__", msg, repeat_count[0]))
                return
            last_progress_msg[0] = msg
            repeat_count[0] = 0
            
            progress_queue.put(msg)
        
        # Background task to send progress messages
        # Accumulates tool lines into a single message that gets edited.
        #
        # Threading metadata is platform-specific:
        # - Slack DM threading needs event_message_id fallback (reply thread)
        # - Telegram uses message_thread_id only for forum topics; passing a
        #   normal DM/group message id as thread_id causes send failures
        # - Other platforms should use explicit source.thread_id only
        if source.platform == Platform.SLACK:
            _progress_thread_id = source.thread_id or event_message_id
        else:
            _progress_thread_id = source.thread_id
        _progress_metadata = {"thread_id": _progress_thread_id} if _progress_thread_id else None

        async def send_progress_messages():
            if not progress_queue:
                return

            adapter = self.adapters.get(source.platform)
            if not adapter:
                return

            # Skip tool progress for platforms that don't support message
            # editing (e.g. iMessage/BlueBubbles) — each progress update
            # would become a separate message bubble, which is noisy.
            from gateway.platforms.base import BasePlatformAdapter as _BaseAdapter
            if type(adapter).edit_message is _BaseAdapter.edit_message:
                while not progress_queue.empty():
                    try:
                        progress_queue.get_nowait()
                    except Exception:
                        break
                return

            progress_lines = []      # Accumulated tool lines
            progress_msg_id = None   # ID of the progress message to edit
            can_edit = True          # False once an edit fails (platform doesn't support it)
            _last_edit_ts = 0.0      # Throttle edits to avoid Telegram flood control
            _PROGRESS_EDIT_INTERVAL = 1.5  # Minimum seconds between edits

            while True:
                try:
                    raw = progress_queue.get_nowait()

                    # Handle dedup messages: update last line with repeat counter
                    if isinstance(raw, tuple) and len(raw) == 3 and raw[0] == "__dedup__":
                        _, base_msg, count = raw
                        if progress_lines:
                            progress_lines[-1] = f"{base_msg} (×{count + 1})"
                        msg = progress_lines[-1] if progress_lines else base_msg
                    else:
                        msg = raw
                        progress_lines.append(msg)

                    # Throttle edits: batch rapid tool updates into fewer
                    # API calls to avoid hitting Telegram flood control.
                    # (grammY auto-retry pattern: proactively rate-limit
                    # instead of reacting to 429s.)
                    _now = time.monotonic()
                    _remaining = _PROGRESS_EDIT_INTERVAL - (_now - _last_edit_ts)
                    if _remaining > 0:
                        # Wait out the throttle interval, then loop back to
                        # drain any additional queued messages before sending
                        # a single batched edit.
                        await asyncio.sleep(_remaining)
                        continue

                    if can_edit and progress_msg_id is not None:
                        # Try to edit the existing progress message
                        full_text = "\n".join(progress_lines)
                        result = await adapter.edit_message(
                            chat_id=source.chat_id,
                            message_id=progress_msg_id,
                            content=full_text,
                        )
                        if not result.success:
                            _err = (getattr(result, "error", "") or "").lower()
                            if "flood" in _err or "retry after" in _err:
                                # Flood control hit — disable further edits,
                                # switch to sending new messages only for
                                # important updates.  Don't block 23s.
                                logger.info(
                                    "[%s] Progress edits disabled due to flood control",
                                    adapter.name,
                                )
                            can_edit = False
                            await adapter.send(chat_id=source.chat_id, content=msg, metadata=_progress_metadata)
                    else:
                        if can_edit:
                            # First tool: send all accumulated text as new message
                            full_text = "\n".join(progress_lines)
                            result = await adapter.send(chat_id=source.chat_id, content=full_text, metadata=_progress_metadata)
                        else:
                            # Editing unsupported: send just this line
                            result = await adapter.send(chat_id=source.chat_id, content=msg, metadata=_progress_metadata)
                        if result.success and result.message_id:
                            progress_msg_id = result.message_id

                    _last_edit_ts = time.monotonic()

                    # Restore typing indicator
                    await asyncio.sleep(0.3)
                    await adapter.send_typing(source.chat_id, metadata=_progress_metadata)

                except queue.Empty:
                    await asyncio.sleep(0.3)
                except asyncio.CancelledError:
                    # Drain remaining queued messages
                    while not progress_queue.empty():
                        try:
                            raw = progress_queue.get_nowait()
                            if isinstance(raw, tuple) and len(raw) == 3 and raw[0] == "__dedup__":
                                _, base_msg, count = raw
                                if progress_lines:
                                    progress_lines[-1] = f"{base_msg} (×{count + 1})"
                            else:
                                progress_lines.append(raw)
                        except Exception:
                            break
                    # Final edit with all remaining tools (only if editing works)
                    if can_edit and progress_lines and progress_msg_id:
                        full_text = "\n".join(progress_lines)
                        try:
                            await adapter.edit_message(
                                chat_id=source.chat_id,
                                message_id=progress_msg_id,
                                content=full_text,
                            )
                        except Exception:
                            pass
                    return
                except Exception as e:
                    logger.error("Progress message error: %s", e)
                    await asyncio.sleep(1)
        
        # We need to share the agent instance for interrupt support
        agent_holder = [None]  # Mutable container for the agent instance
        result_holder = [None]  # Mutable container for the result
        tools_holder = [None]   # Mutable container for the tool definitions
        stream_consumer_holder = [None]  # Mutable container for stream consumer
        
        # Bridge sync step_callback → async hooks.emit for agent:step events
        _loop_for_step = asyncio.get_running_loop()
        _hooks_ref = self.hooks

        def _step_callback_sync(iteration: int, prev_tools: list) -> None:
            try:
                # prev_tools may be list[str] or list[dict] with "name"/"result"
                # keys.  Normalise to keep "tool_names" backward-compatible for
                # user-authored hooks that do ', '.join(tool_names)'.
                _names: list[str] = []
                for _t in (prev_tools or []):
                    if isinstance(_t, dict):
                        _names.append(_t.get("name") or "")
                    else:
                        _names.append(str(_t))
                asyncio.run_coroutine_threadsafe(
                    _hooks_ref.emit("agent:step", {
                        "platform": source.platform.value if source.platform else "",
                        "user_id": source.user_id,
                        "session_id": session_id,
                        "iteration": iteration,
                        "tool_names": _names,
                        "tools": prev_tools,
                    }),
                    _loop_for_step,
                )
            except Exception as _e:
                logger.debug("agent:step hook error: %s", _e)

        # Bridge sync status_callback → async adapter.send for context pressure
        _status_adapter = self.adapters.get(source.platform)
        _status_chat_id = source.chat_id
        _status_thread_metadata = {"thread_id": _progress_thread_id} if _progress_thread_id else None

        def _status_callback_sync(event_type: str, message: str) -> None:
            if not _status_adapter:
                return
            try:
                asyncio.run_coroutine_threadsafe(
                    _status_adapter.send(
                        _status_chat_id,
                        message,
                        metadata=_status_thread_metadata,
                    ),
                    _loop_for_step,
                )
            except Exception as _e:
                logger.debug("status_callback error (%s): %s", event_type, _e)

        def run_sync():
            # The conditional re-assignment of `message` further below
            # (prepending model-switch notes) makes Python treat it as a
            # local variable in the entire function.  `nonlocal` lets us
            # read *and* reassign the outer `_run_agent` parameter without
            # triggering an UnboundLocalError on the earlier read at
            # `_resolve_turn_agent_config(message, …)`.
            nonlocal message

            # session_key is now set via contextvars in _set_session_env()
            # (concurrency-safe). Keep os.environ as fallback for CLI/cron.
            os.environ["HERMES_SESSION_KEY"] = session_key or ""

            # Read from env var or use default (same as CLI)
            max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "90"))
            
            # Map platform enum to the platform hint key the agent understands.
            # Platform.LOCAL ("local") maps to "cli"; others pass through as-is.
            platform_key = "cli" if source.platform == Platform.LOCAL else source.platform.value
            
            # Combine platform context, per-channel context, and the user-configured
            # ephemeral system prompt.
            combined_ephemeral = context_prompt or ""
            event_channel_prompt = (channel_prompt or "").strip()
            if event_channel_prompt:
                combined_ephemeral = (combined_ephemeral + "\n\n" + event_channel_prompt).strip()
            if self._ephemeral_system_prompt:
                combined_ephemeral = (combined_ephemeral + "\n\n" + self._ephemeral_system_prompt).strip()

            # Re-read .env and config for fresh credentials (gateway is long-lived,
            # keys may change without restart).
            try:
                load_dotenv(_env_path, override=True, encoding="utf-8")
            except UnicodeDecodeError:
                load_dotenv(_env_path, override=True, encoding="latin-1")
            except Exception:
                pass

            try:
                model, runtime_kwargs = self._resolve_session_agent_runtime(
                    source=source,
                    session_key=session_key,
                    user_config=user_config,
                )
                logger.debug(
                    "run_agent resolved: model=%s provider=%s session=%s",
                    model, runtime_kwargs.get("provider"), (session_key or "")[:30],
                )
            except Exception as exc:
                return {
                    "final_response": f"⚠️ Provider authentication failed: {exc}",
                    "messages": [],
                    "api_calls": 0,
                    "tools": [],
                }

            pr = self._provider_routing
            reasoning_config = self._load_reasoning_config()
            self._reasoning_config = reasoning_config
            self._service_tier = self._load_service_tier()
            # Set up stream consumer for token streaming or interim commentary.
            _stream_consumer = None
            _stream_delta_cb = None
            _scfg = getattr(getattr(self, 'config', None), 'streaming', None)
            if _scfg is None:
                from gateway.config import StreamingConfig
                _scfg = StreamingConfig()

            # Per-platform streaming gate: display.platforms.<plat>.streaming
            # can disable streaming for specific platforms even when the global
            # streaming config is enabled.
            _plat_streaming = resolve_display_setting(
                user_config, platform_key, "streaming"
            )
            # None = no per-platform override → follow global config
            _streaming_enabled = (
                _scfg.enabled and _scfg.transport != "off"
                if _plat_streaming is None
                else bool(_plat_streaming)
            )
            _want_stream_deltas = _streaming_enabled
            _want_interim_messages = interim_assistant_messages_enabled
            _want_interim_consumer = _want_interim_messages
            if _want_stream_deltas or _want_interim_consumer:
                try:
                    from gateway.stream_consumer import GatewayStreamConsumer, StreamConsumerConfig
                    _adapter = self.adapters.get(source.platform)
                    if _adapter:
                        # Platforms that don't support editing sent messages
                        # (e.g. QQ, WeChat) should skip streaming entirely —
                        # without edit support, the consumer sends a partial
                        # first message that can never be updated, resulting in
                        # duplicate messages (partial + final).
                        _adapter_supports_edit = getattr(_adapter, "SUPPORTS_MESSAGE_EDITING", True)
                        if not _adapter_supports_edit:
                            raise RuntimeError("skip streaming for non-editable platform")
                        _effective_cursor = _scfg.cursor
                        # Some Matrix clients render the streaming cursor
                        # as a visible tofu/white-box artifact.  Keep
                        # streaming text on Matrix, but suppress the cursor.
                        _buffer_only = False
                        if source.platform == Platform.MATRIX:
                            _effective_cursor = ""
                            _buffer_only = True
                        _consumer_cfg = StreamConsumerConfig(
                            edit_interval=_scfg.edit_interval,
                            buffer_threshold=_scfg.buffer_threshold,
                            cursor=_effective_cursor,
                            buffer_only=_buffer_only,
                        )
                        _stream_consumer = GatewayStreamConsumer(
                            adapter=_adapter,
                            chat_id=source.chat_id,
                            config=_consumer_cfg,
                            metadata={"thread_id": _progress_thread_id} if _progress_thread_id else None,
                        )
                        if _want_stream_deltas:
                            _stream_delta_cb = _stream_consumer.on_delta
                        stream_consumer_holder[0] = _stream_consumer
                except Exception as _sc_err:
                    logger.debug("Could not set up stream consumer: %s", _sc_err)

            def _interim_assistant_cb(text: str, *, already_streamed: bool = False) -> None:
                if _stream_consumer is not None:
                    if already_streamed:
                        _stream_consumer.on_segment_break()
                    else:
                        _stream_consumer.on_commentary(text)
                    return
                if already_streamed or not _status_adapter or not str(text or "").strip():
                    return
                try:
                    asyncio.run_coroutine_threadsafe(
                        _status_adapter.send(
                            _status_chat_id,
                            text,
                            metadata=_status_thread_metadata,
                        ),
                        _loop_for_step,
                    )
                except Exception as _e:
                    logger.debug("interim_assistant_callback error: %s", _e)

            turn_route = self._resolve_turn_agent_config(message, model, runtime_kwargs)

            # Check agent cache — reuse the AIAgent from the previous message
            # in this session to preserve the frozen system prompt and tool
            # schemas for prompt cache hits.
            _sig = self._agent_config_signature(
                turn_route["model"],
                turn_route["runtime"],
                enabled_toolsets,
                combined_ephemeral,
            )
            agent = None
            _cache_lock = getattr(self, "_agent_cache_lock", None)
            _cache = getattr(self, "_agent_cache", None)
            if _cache_lock and _cache is not None:
                with _cache_lock:
                    cached = _cache.get(session_key)
                    if cached and cached[1] == _sig:
                        agent = cached[0]
                        # Reset activity timestamp so the inactivity timeout
                        # handler doesn't see stale idle time from the previous
                        # turn and immediately kill this agent.  (#9051)
                        agent._last_activity_ts = time.time()
                        agent._last_activity_desc = "starting new turn (cached)"
                        agent._api_call_count = 0
                        logger.debug("Reusing cached agent for session %s", session_key)

            if agent is None:
                # Config changed or first message — create fresh agent
                agent = AIAgent(
                    model=turn_route["model"],
                    **turn_route["runtime"],
                    max_iterations=max_iterations,
                    quiet_mode=True,
                    verbose_logging=False,
                    enabled_toolsets=enabled_toolsets,
                    ephemeral_system_prompt=combined_ephemeral or None,
                    prefill_messages=self._prefill_messages or None,
                    reasoning_config=reasoning_config,
                    service_tier=self._service_tier,
                    request_overrides=turn_route.get("request_overrides"),
                    providers_allowed=pr.get("only"),
                    providers_ignored=pr.get("ignore"),
                    providers_order=pr.get("order"),
                    provider_sort=pr.get("sort"),
                    provider_require_parameters=pr.get("require_parameters", False),
                    provider_data_collection=pr.get("data_collection"),
                    session_id=session_id,
                    platform=platform_key,
                    user_id=source.user_id,
                    gateway_session_key=session_key,
                    session_db=self._session_db,
                    fallback_model=self._fallback_model,
                )
                if _cache_lock and _cache is not None:
                    with _cache_lock:
                        _cache[session_key] = (agent, _sig)
                logger.debug("Created new agent for session %s (sig=%s)", session_key, _sig)

            # Per-message state — callbacks and reasoning config change every
            # turn and must not be baked into the cached agent constructor.
            agent.tool_progress_callback = progress_callback if tool_progress_enabled else None
            agent.step_callback = _step_callback_sync if _hooks_ref.loaded_hooks else None
            agent.stream_delta_callback = _stream_delta_cb
            agent.interim_assistant_callback = _interim_assistant_cb if _want_interim_messages else None
            agent.status_callback = _status_callback_sync
            agent.reasoning_config = reasoning_config
            agent.service_tier = self._service_tier
            agent.request_overrides = turn_route.get("request_overrides")

            _bg_review_release = threading.Event()
            _bg_review_pending: list[str] = []
            _bg_review_pending_lock = threading.Lock()

            def _deliver_bg_review_message(message: str) -> None:
                if not _status_adapter:
                    return
                try:
                    asyncio.run_coroutine_threadsafe(
                        _status_adapter.send(
                            _status_chat_id,
                            message,
                            metadata=_status_thread_metadata,
                        ),
                        _loop_for_step,
                    )
                except Exception as _e:
                    logger.debug("background_review_callback error: %s", _e)

            def _release_bg_review_messages() -> None:
                _bg_review_release.set()
                with _bg_review_pending_lock:
                    pending = list(_bg_review_pending)
                    _bg_review_pending.clear()
                for queued in pending:
                    _deliver_bg_review_message(queued)

            # Background review delivery — send "💾 Memory updated" etc. to user
            def _bg_review_send(message: str) -> None:
                if not _status_adapter:
                    return
                if not _bg_review_release.is_set():
                    with _bg_review_pending_lock:
                        if not _bg_review_release.is_set():
                            _bg_review_pending.append(message)
                            return
                _deliver_bg_review_message(message)

            agent.background_review_callback = _bg_review_send
            # Register the release hook on the adapter so base.py's finally
            # block can fire it after delivering the main response.
            if _status_adapter and session_key:
                _pdc = getattr(_status_adapter, "_post_delivery_callbacks", None)
                if _pdc is not None:
                    _pdc[session_key] = _release_bg_review_messages

            # Store agent reference for interrupt support
            agent_holder[0] = agent
            # Capture the full tool definitions for transcript logging
            tools_holder[0] = agent.tools if hasattr(agent, 'tools') else None
            
            # Convert history to agent format.
            # Two cases:
            #   1. Normal path (from transcript): simple {role, content, timestamp} dicts
            #      - Strip timestamps, keep role+content
            #   2. Interrupt path (from agent result["messages"]): full agent messages
            #      that may include tool_calls, tool_call_id, reasoning, etc.
            #      - These must be passed through intact so the API sees valid
            #        assistant→tool sequences (dropping tool_calls causes 500 errors)
            agent_history = []
            for msg in history:
                role = msg.get("role")
                if not role:
                    continue
                
                # Skip metadata entries (tool definitions, session info)
                # -- these are for transcript logging, not for the LLM
                if role in ("session_meta",):
                    continue
                
                # Skip system messages -- the agent rebuilds its own system prompt
                if role == "system":
                    continue
                
                # Rich agent messages (tool_calls, tool results) must be passed
                # through intact so the API sees valid assistant→tool sequences
                has_tool_calls = "tool_calls" in msg
                has_tool_call_id = "tool_call_id" in msg
                is_tool_message = role == "tool"
                
                if has_tool_calls or has_tool_call_id or is_tool_message:
                    clean_msg = {k: v for k, v in msg.items() if k != "timestamp"}
                    agent_history.append(clean_msg)
                else:
                    # Simple text message - just need role and content
                    content = msg.get("content")
                    if content:
                        # Tag cross-platform mirror messages so the agent knows their origin
                        if msg.get("mirror"):
                            mirror_src = msg.get("mirror_source", "another session")
                            content = f"[Delivered from {mirror_src}] {content}"
                        entry = {"role": role, "content": content}
                        # Preserve reasoning fields on assistant messages so
                        # multi-turn reasoning context survives session reload.
                        # The agent's _build_api_kwargs converts these to the
                        # provider-specific format (reasoning_content, etc.).
                        if role == "assistant":
                            for _rkey in ("reasoning", "reasoning_details",
                                          "codex_reasoning_items"):
                                _rval = msg.get(_rkey)
                                if _rval:
                                    entry[_rkey] = _rval
                        agent_history.append(entry)
            
            # Collect MEDIA paths already in history so we can exclude them
            # from the current turn's extraction. This is compression-safe:
            # even if the message list shrinks, we know which paths are old.
            _history_media_paths: set = set()
            for _hm in agent_history:
                if _hm.get("role") in ("tool", "function"):
                    _hc = _hm.get("content", "")
                    if "MEDIA:" in _hc:
                        for _match in re.finditer(r'MEDIA:(\S+)', _hc):
                            _p = _match.group(1).strip().rstrip('",}')
                            if _p:
                                _history_media_paths.add(_p)
            
            # Register per-session gateway approval callback so dangerous
            # command approval blocks the agent thread (mirrors CLI input()).
            # The callback bridges sync→async to send the approval request
            # to the user immediately.
            from tools.approval import (
                register_gateway_notify,
                reset_current_session_key,
                set_current_session_key,
                unregister_gateway_notify,
            )

            def _approval_notify_sync(approval_data: dict) -> None:
                """Send the approval request to the user from the agent thread.

                If the adapter supports interactive button-based approvals
                (e.g. Discord's ``send_exec_approval``), use that for a richer
                UX.  Otherwise fall back to a plain text message with
                ``/approve`` instructions.
                """
                # Pause the typing indicator while the agent waits for
                # user approval.  Critical for Slack's Assistant API where
                # assistant_threads_setStatus disables the compose box — the
                # user literally cannot type /approve while "is thinking..."
                # is active.  The approval message send auto-clears the Slack
                # status; pausing prevents _keep_typing from re-setting it.
                # Typing resumes in _handle_approve_command/_handle_deny_command.
                _status_adapter.pause_typing_for_chat(_status_chat_id)

                cmd = approval_data.get("command", "")
                desc = approval_data.get("description", "dangerous command")

                # Prefer button-based approval when the adapter supports it.
                # Check the *class* for the method, not the instance — avoids
                # false positives from MagicMock auto-attribute creation in tests.
                if getattr(type(_status_adapter), "send_exec_approval", None) is not None:
                    try:
                        _approval_result = asyncio.run_coroutine_threadsafe(
                            _status_adapter.send_exec_approval(
                                chat_id=_status_chat_id,
                                command=cmd,
                                session_key=_approval_session_key,
                                description=desc,
                                metadata=_status_thread_metadata,
                            ),
                            _loop_for_step,
                        ).result(timeout=15)
                        if _approval_result.success:
                            return
                        logger.warning(
                            "Button-based approval failed (send returned error), falling back to text: %s",
                            _approval_result.error,
                        )
                    except Exception as _e:
                        logger.warning(
                            "Button-based approval failed, falling back to text: %s", _e
                        )

                # Fallback: plain text approval prompt
                cmd_preview = cmd[:200] + "..." if len(cmd) > 200 else cmd
                msg = (
                    f"⚠️ **Dangerous command requires approval:**\n"
                    f"```\n{cmd_preview}\n```\n"
                    f"Reason: {desc}\n\n"
                    f"Reply `/approve` to execute, `/approve session` to approve this pattern "
                    f"for the session, `/approve always` to approve permanently, or `/deny` to cancel."
                )
                try:
                    asyncio.run_coroutine_threadsafe(
                        _status_adapter.send(
                            _status_chat_id,
                            msg,
                            metadata=_status_thread_metadata,
                        ),
                        _loop_for_step,
                    ).result(timeout=15)
                except Exception as _e:
                    logger.error("Failed to send approval request: %s", _e)

            # Prepend pending model switch note so the model knows about the switch
            _pending_notes = getattr(self, '_pending_model_notes', {})
            _msn = _pending_notes.pop(session_key, None) if session_key else None
            if _msn:
                message = _msn + "\n\n" + message

            # Auto-continue: if the loaded history ends with a tool result,
            # the previous agent turn was interrupted mid-work (gateway
            # restart, crash, SIGTERM).  Prepend a system note so the model
            # finishes processing the pending tool results before addressing
            # the user's new message.  (#4493)
            if agent_history and agent_history[-1].get("role") == "tool":
                message = (
                    "[System note: Your previous turn was interrupted before you could "
                    "process the last tool result(s). The conversation history contains "
                    "tool outputs you haven't responded to yet. Please finish processing "
                    "those results and summarize what was accomplished, then address the "
                    "user's new message below.]\n\n"
                    + message
                )

            _approval_session_key = session_key or ""
            _approval_session_token = set_current_session_key(_approval_session_key)
            register_gateway_notify(_approval_session_key, _approval_notify_sync)
            try:
                result = agent.run_conversation(message, conversation_history=agent_history, task_id=session_id)
            finally:
                unregister_gateway_notify(_approval_session_key)
                reset_current_session_key(_approval_session_token)
            result_holder[0] = result

            # Signal the stream consumer that the agent is done
            if _stream_consumer is not None:
                _stream_consumer.finish()
            
            # Return final response, or a message if something went wrong
            final_response = result.get("final_response")

            # Extract actual token counts from the agent instance used for this run
            _last_prompt_toks = 0
            _input_toks = 0
            _output_toks = 0
            _agent = agent_holder[0]
            if _agent and hasattr(_agent, "context_compressor"):
                _last_prompt_toks = getattr(_agent.context_compressor, "last_prompt_tokens", 0)
                _input_toks = getattr(_agent, "session_prompt_tokens", 0)
                _output_toks = getattr(_agent, "session_completion_tokens", 0)
            _resolved_model = getattr(_agent, "model", None) if _agent else None

            if not final_response:
                error_msg = f"⚠️ {result['error']}" if result.get("error") else ""
                return {
                    "final_response": error_msg,
                    "messages": result.get("messages", []),
                    "api_calls": result.get("api_calls", 0),
                    "failed": result.get("failed", False),
                    "compression_exhausted": result.get("compression_exhausted", False),
                    "tools": tools_holder[0] or [],
                    "history_offset": len(agent_history),
                    "last_prompt_tokens": _last_prompt_toks,
                    "input_tokens": _input_toks,
                    "output_tokens": _output_toks,
                    "model": _resolved_model,
                }
            
            # Scan tool results for MEDIA:<path> tags that need to be delivered
            # as native audio/file attachments.  The TTS tool embeds MEDIA: tags
            # in its JSON response, but the model's final text reply usually
            # doesn't include them.  We collect unique tags from tool results and
            # append any that aren't already present in the final response, so the
            # adapter's extract_media() can find and deliver the files exactly once.
            #
            # Uses path-based deduplication against _history_media_paths (collected
            # before run_conversation) instead of index slicing. This is safe even
            # when context compression shrinks the message list. (Fixes #160)
            if "MEDIA:" not in final_response:
                media_tags = []
                has_voice_directive = False
                for msg in result.get("messages", []):
                    if msg.get("role") in ("tool", "function"):
                        content = msg.get("content", "")
                        if "MEDIA:" in content:
                            for match in re.finditer(r'MEDIA:(\S+)', content):
                                path = match.group(1).strip().rstrip('",}')
                                if path and path not in _history_media_paths:
                                    media_tags.append(f"MEDIA:{path}")
                            if "[[audio_as_voice]]" in content:
                                has_voice_directive = True
                
                if media_tags:
                    seen = set()
                    unique_tags = []
                    for tag in media_tags:
                        if tag not in seen:
                            seen.add(tag)
                            unique_tags.append(tag)
                    if has_voice_directive:
                        unique_tags.insert(0, "[[audio_as_voice]]")
                    final_response = final_response + "\n" + "\n".join(unique_tags)
            
            # Sync session_id: the agent may have created a new session during
            # mid-run context compression (_compress_context splits sessions).
            # If so, update the session store entry so the NEXT message loads
            # the compressed transcript, not the stale pre-compression one.
            agent = agent_holder[0]
            _session_was_split = False
            if agent and session_key and hasattr(agent, 'session_id') and agent.session_id != session_id:
                _session_was_split = True
                logger.info(
                    "Session split detected: %s → %s (compression)",
                    session_id, agent.session_id,
                )
                entry = self.session_store._entries.get(session_key)
                if entry:
                    entry.session_id = agent.session_id
                    self.session_store._save()

            effective_session_id = getattr(agent, 'session_id', session_id) if agent else session_id

            # When compression created a new session, the messages list was
            # shortened.  Using the original history offset would produce an
            # empty new_messages slice, causing the gateway to write only a
            # user/assistant pair — losing the compressed summary and tail.
            # Reset to 0 so the gateway writes ALL compressed messages.
            _effective_history_offset = 0 if _session_was_split else len(agent_history)

            # Auto-generate session title after first exchange (non-blocking)
            if final_response and self._session_db:
                try:
                    from agent.title_generator import maybe_auto_title
                    all_msgs = result_holder[0].get("messages", []) if result_holder[0] else []
                    maybe_auto_title(
                        self._session_db,
                        effective_session_id,
                        message,
                        final_response,
                        all_msgs,
                    )
                except Exception:
                    pass

            return {
                "final_response": final_response,
                "last_reasoning": result.get("last_reasoning"),
                "messages": result_holder[0].get("messages", []) if result_holder[0] else [],
                "api_calls": result_holder[0].get("api_calls", 0) if result_holder[0] else 0,
                "tools": tools_holder[0] or [],
                "history_offset": _effective_history_offset,
                "last_prompt_tokens": _last_prompt_toks,
                "input_tokens": _input_toks,
                "output_tokens": _output_toks,
                "model": _resolved_model,
                "session_id": effective_session_id,
                "response_previewed": result.get("response_previewed", False),
            }
        
        # Start progress message sender if enabled
        progress_task = None
        if tool_progress_enabled:
            progress_task = asyncio.create_task(send_progress_messages())

        # Start stream consumer task — polls for consumer creation since it
        # happens inside run_sync (thread pool) after the agent is constructed.
        stream_task = None

        async def _start_stream_consumer():
            """Wait for the stream consumer to be created, then run it."""
            for _ in range(200):  # Up to 10s wait
                if stream_consumer_holder[0] is not None:
                    await stream_consumer_holder[0].run()
                    return
                await asyncio.sleep(0.05)

        stream_task = asyncio.create_task(_start_stream_consumer())
        
        # Track this agent as running for this session (for interrupt support)
        # We do this in a callback after the agent is created
        async def track_agent():
            # Wait for agent to be created
            while agent_holder[0] is None:
                await asyncio.sleep(0.05)
            if session_key:
                self._running_agents[session_key] = agent_holder[0]
                if self._draining:
                    self._update_runtime_status("draining")
        
        tracking_task = asyncio.create_task(track_agent())
        
        # Monitor for interrupts from the adapter (new messages arriving).
        # This is the PRIMARY interrupt path for regular text messages —
        # Level 1 (base.py) catches them before _handle_message() is reached,
        # so the Level 2 running_agent.interrupt() path never fires.
        # The inactivity poll loop below has a BACKUP check in case this
        # task dies (no error handling = silent death = lost interrupts).
        _interrupt_detected = asyncio.Event()  # shared with backup check

        async def monitor_for_interrupt():
            if not session_key:
                return

            while True:
                await asyncio.sleep(0.2)  # Check every 200ms
                try:
                    # Re-resolve adapter each iteration so reconnects don't
                    # leave us holding a stale reference.
                    _adapter = self.adapters.get(source.platform)
                    if not _adapter:
                        continue
                    # Check if adapter has a pending interrupt for this session.
                    # Must use session_key (build_session_key output) — NOT
                    # source.chat_id — because the adapter stores interrupt events
                    # under the full session key.
                    if hasattr(_adapter, 'has_pending_interrupt') and _adapter.has_pending_interrupt(session_key):
                        agent = agent_holder[0]
                        if agent:
                            # Peek at the pending message text WITHOUT consuming it.
                            # The message must remain in _pending_messages so the
                            # post-run dequeue at _dequeue_pending_event() can
                            # retrieve the full MessageEvent (with media metadata).
                            # If we pop here, a race exists: the agent may finish
                            # before checking _interrupt_requested, and the message
                            # is lost — neither the interrupt path nor the dequeue
                            # path finds it.
                            _peek_event = _adapter._pending_messages.get(session_key)
                            pending_text = _peek_event.text if _peek_event else None
                            logger.debug("Interrupt detected from adapter, signaling agent...")
                            agent.interrupt(pending_text)
                            _interrupt_detected.set()
                            break
                except asyncio.CancelledError:
                    raise
                except Exception as _mon_err:
                    logger.debug("monitor_for_interrupt error (will retry): %s", _mon_err)
        
        interrupt_monitor = asyncio.create_task(monitor_for_interrupt())

        # Periodic "still working" notifications for long-running tasks.
        # Fires every N seconds so the user knows the agent hasn't died.
        # Config: agent.gateway_notify_interval in config.yaml, or
        # HERMES_AGENT_NOTIFY_INTERVAL env var.  Default 600s (10 min).
        # 0 = disable notifications.
        _NOTIFY_INTERVAL_RAW = float(os.getenv("HERMES_AGENT_NOTIFY_INTERVAL", 600))
        _NOTIFY_INTERVAL = _NOTIFY_INTERVAL_RAW if _NOTIFY_INTERVAL_RAW > 0 else None
        _notify_start = time.time()

        async def _notify_long_running():
            if _NOTIFY_INTERVAL is None:
                return  # Notifications disabled (gateway_notify_interval: 0)
            _notify_adapter = self.adapters.get(source.platform)
            if not _notify_adapter:
                return
            while True:
                await asyncio.sleep(_NOTIFY_INTERVAL)
                _elapsed_mins = int((time.time() - _notify_start) // 60)
                # Include agent activity context if available.
                _agent_ref = agent_holder[0]
                _status_detail = ""
                if _agent_ref and hasattr(_agent_ref, "get_activity_summary"):
                    try:
                        _a = _agent_ref.get_activity_summary()
                        _parts = [f"iteration {_a['api_call_count']}/{_a['max_iterations']}"]
                        if _a.get("current_tool"):
                            _parts.append(f"running: {_a['current_tool']}")
                        else:
                            _parts.append(_a.get("last_activity_desc", ""))
                        _status_detail = " — " + ", ".join(_parts)
                    except Exception:
                        pass
                try:
                    await _notify_adapter.send(
                        source.chat_id,
                        f"⏳ Still working... ({_elapsed_mins} min elapsed{_status_detail})",
                        metadata=_status_thread_metadata,
                    )
                except Exception as _ne:
                    logger.debug("Long-running notification error: %s", _ne)

        _notify_task = asyncio.create_task(_notify_long_running())

        try:
            # Run in thread pool to not block.  Use an *inactivity*-based
            # timeout instead of a wall-clock limit: the agent can run for
            # hours if it's actively calling tools / receiving stream tokens,
            # but a hung API call or stuck tool with no activity for the
            # configured duration is caught and killed.  (#4815)
            #
            # Config: agent.gateway_timeout in config.yaml, or
            # HERMES_AGENT_TIMEOUT env var (env var takes precedence).
            # Default 1800s (30 min inactivity).  0 = unlimited.
            _agent_timeout_raw = float(os.getenv("HERMES_AGENT_TIMEOUT", 1800))
            _agent_timeout = _agent_timeout_raw if _agent_timeout_raw > 0 else None
            _agent_warning_raw = float(os.getenv("HERMES_AGENT_TIMEOUT_WARNING", 900))
            _agent_warning = _agent_warning_raw if _agent_warning_raw > 0 else None
            _warning_fired = False
            _executor_task = asyncio.ensure_future(
                self._run_in_executor_with_context(run_sync)
            )

            _inactivity_timeout = False
            _POLL_INTERVAL = 5.0

            if _agent_timeout is None:
                # Unlimited — still poll periodically for backup interrupt
                # detection in case monitor_for_interrupt() silently died.
                response = None
                while True:
                    done, _ = await asyncio.wait(
                        {_executor_task}, timeout=_POLL_INTERVAL
                    )
                    if done:
                        response = _executor_task.result()
                        break
                    # Backup interrupt check: if the monitor task died or
                    # missed the interrupt, catch it here.
                    if not _interrupt_detected.is_set() and session_key:
                        _backup_adapter = self.adapters.get(source.platform)
                        _backup_agent = agent_holder[0]
                        if (_backup_adapter and _backup_agent
                                and hasattr(_backup_adapter, 'has_pending_interrupt')
                                and _backup_adapter.has_pending_interrupt(session_key)):
                            _bp_event = _backup_adapter._pending_messages.get(session_key)
                            _bp_text = _bp_event.text if _bp_event else None
                            logger.info(
                                "Backup interrupt detected for session %s "
                                "(monitor task state: %s)",
                                session_key[:20],
                                "done" if interrupt_monitor.done() else "running",
                            )
                            _backup_agent.interrupt(_bp_text)
                            _interrupt_detected.set()
            else:
                # Poll loop: check the agent's built-in activity tracker
                # (updated by _touch_activity() on every tool call, API
                # call, and stream delta) every few seconds.
                response = None
                while True:
                    done, _ = await asyncio.wait(
                        {_executor_task}, timeout=_POLL_INTERVAL
                    )
                    if done:
                        response = _executor_task.result()
                        break
                    # Agent still running — check inactivity.
                    _agent_ref = agent_holder[0]
                    _idle_secs = 0.0
                    if _agent_ref and hasattr(_agent_ref, "get_activity_summary"):
                        try:
                            _act = _agent_ref.get_activity_summary()
                            _idle_secs = _act.get("seconds_since_activity", 0.0)
                        except Exception:
                            pass
                    # Staged warning: fire once before escalating to full timeout.
                    if (not _warning_fired and _agent_warning is not None
                            and _idle_secs >= _agent_warning):
                        _warning_fired = True
                        _warn_adapter = self.adapters.get(source.platform)
                        if _warn_adapter:
                            _elapsed_warn = int(_agent_warning // 60) or 1
                            _remaining_mins = int((_agent_timeout - _agent_warning) // 60) or 1
                            try:
                                await _warn_adapter.send(
                                    source.chat_id,
                                    f"⚠️ No activity for {_elapsed_warn} min. "
                                    f"If the agent does not respond soon, it will "
                                    f"be timed out in {_remaining_mins} min. "
                                    f"You can continue waiting or use /reset.",
                                    metadata=_status_thread_metadata,
                                )
                            except Exception as _warn_err:
                                logger.debug("Inactivity warning send error: %s", _warn_err)
                    if _idle_secs >= _agent_timeout:
                        _inactivity_timeout = True
                        break
                    # Backup interrupt check (same as unlimited path).
                    if not _interrupt_detected.is_set() and session_key:
                        _backup_adapter = self.adapters.get(source.platform)
                        _backup_agent = agent_holder[0]
                        if (_backup_adapter and _backup_agent
                                and hasattr(_backup_adapter, 'has_pending_interrupt')
                                and _backup_adapter.has_pending_interrupt(session_key)):
                            _bp_event = _backup_adapter._pending_messages.get(session_key)
                            _bp_text = _bp_event.text if _bp_event else None
                            logger.info(
                                "Backup interrupt detected for session %s "
                                "(monitor task state: %s)",
                                session_key[:20],
                                "done" if interrupt_monitor.done() else "running",
                            )
                            _backup_agent.interrupt(_bp_text)
                            _interrupt_detected.set()

            if _inactivity_timeout:
                # Build a diagnostic summary from the agent's activity tracker.
                _timed_out_agent = agent_holder[0]
                _activity = {}
                if _timed_out_agent and hasattr(_timed_out_agent, "get_activity_summary"):
                    try:
                        _activity = _timed_out_agent.get_activity_summary()
                    except Exception:
                        pass

                _last_desc = _activity.get("last_activity_desc", "unknown")
                _secs_ago = _activity.get("seconds_since_activity", 0)
                _cur_tool = _activity.get("current_tool")
                _iter_n = _activity.get("api_call_count", 0)
                _iter_max = _activity.get("max_iterations", 0)

                logger.error(
                    "Agent idle for %.0fs (timeout %.0fs) in session %s "
                    "| last_activity=%s | iteration=%s/%s | tool=%s",
                    _secs_ago, _agent_timeout, session_key,
                    _last_desc, _iter_n, _iter_max,
                    _cur_tool or "none",
                )

                # Interrupt the agent if it's still running so the thread
                # pool worker is freed.
                if _timed_out_agent and hasattr(_timed_out_agent, "interrupt"):
                    _timed_out_agent.interrupt("Execution timed out (inactivity)")

                _timeout_mins = int(_agent_timeout // 60) or 1

                # Construct a user-facing message with diagnostic context.
                _diag_lines = [
                    f"⏱️ Agent inactive for {_timeout_mins} min — no tool calls "
                    f"or API responses."
                ]
                if _cur_tool:
                    _diag_lines.append(
                        f"The agent appears stuck on tool `{_cur_tool}` "
                        f"({_secs_ago:.0f}s since last activity, "
                        f"iteration {_iter_n}/{_iter_max})."
                    )
                else:
                    _diag_lines.append(
                        f"Last activity: {_last_desc} ({_secs_ago:.0f}s ago, "
                        f"iteration {_iter_n}/{_iter_max}). "
                        "The agent may have been waiting on an API response."
                    )
                _diag_lines.append(
                    "To increase the limit, set agent.gateway_timeout in config.yaml "
                    "(value in seconds, 0 = no limit) and restart the gateway.\n"
                    "Try again, or use /reset to start fresh."
                )

                response = {
                    "final_response": "\n".join(_diag_lines),
                    "messages": result_holder[0].get("messages", []) if result_holder[0] else [],
                    "api_calls": _iter_n,
                    "tools": tools_holder[0] or [],
                    "history_offset": 0,
                    "failed": True,
                }

            # Track fallback model state: if the agent switched to a
            # fallback model during this run, persist it so /model shows
            # the actually-active model instead of the config default.
            # Skip eviction when the run failed — evicting a failed agent
            # forces MCP reinit on the next message for no benefit (the
            # same error will recur).  This was the root cause of #7130:
            # a bad model ID triggered fallback → eviction → recreation →
            # MCP reinit → same 400 → loop, burning 91% CPU for hours.
            _agent = agent_holder[0]
            _result_for_fb = result_holder[0]
            _run_failed = _result_for_fb.get("failed") if _result_for_fb else False
            if _agent is not None and hasattr(_agent, 'model') and not _run_failed:
                _cfg_model = _resolve_gateway_model()
                if _agent.model != _cfg_model and not self._is_intentional_model_switch(session_key, _agent.model):
                    # Fallback activated on a successful run — evict cached
                    # agent so the next message retries the primary model.
                    self._evict_cached_agent(session_key)

            # Check if we were interrupted OR have a queued message (/queue).
            result = result_holder[0]
            adapter = self.adapters.get(source.platform)
            
            # Get pending message from adapter.
            # Use session_key (not source.chat_id) to match adapter's storage keys.
            pending_event = None
            pending = None
            if result and adapter and session_key:
                pending_event = _dequeue_pending_event(adapter, session_key)
                if result.get("interrupted") and not pending_event and result.get("interrupt_message"):
                    pending = result.get("interrupt_message")
                elif pending_event:
                    pending = pending_event.text or _build_media_placeholder(pending_event)
                    logger.debug("Processing queued message after agent completion: '%s...'", pending[:40])

            # Safety net: if the pending text is a slash command (e.g. "/stop",
            # "/new"), discard it — commands should never be passed to the agent
            # as user input.  The primary fix is in base.py (commands bypass the
            # active-session guard), but this catches edge cases where command
            # text leaks through the interrupt_message fallback.
            if pending and pending.strip().startswith("/"):
                _pending_parts = pending.strip().split(None, 1)
                _pending_cmd_word = _pending_parts[0][1:].lower() if _pending_parts else ""
                if _pending_cmd_word:
                    try:
                        from hermes_cli.commands import resolve_command as _rc_pending
                        if _rc_pending(_pending_cmd_word):
                            logger.info(
                                "Discarding command '/%s' from pending queue — "
                                "commands must not be passed as agent input",
                                _pending_cmd_word,
                            )
                            pending_event = None
                            pending = None
                    except Exception:
                        pass

            if self._draining and (pending_event or pending):
                logger.info(
                    "Discarding pending follow-up for session %s during gateway %s",
                    session_key[:20] if session_key else "?",
                    self._status_action_label(),
                )
                pending_event = None
                pending = None

            if pending_event or pending:
                logger.debug("Processing pending message: '%s...'", pending[:40])

                # Clear the adapter's interrupt event so the next _run_agent call
                # doesn't immediately re-trigger the interrupt before the new agent
                # even makes its first API call (this was causing an infinite loop).
                if adapter and hasattr(adapter, '_active_sessions') and session_key and session_key in adapter._active_sessions:
                    adapter._active_sessions[session_key].clear()

                # Cap recursion depth to prevent resource exhaustion when the
                # user sends multiple messages while the agent keeps failing. (#816)
                if _interrupt_depth >= self._MAX_INTERRUPT_DEPTH:
                    logger.warning(
                        "Interrupt recursion depth %d reached for session %s — "
                        "queueing message instead of recursing.",
                        _interrupt_depth, session_key,
                    )
                    adapter = self.adapters.get(source.platform)
                    if adapter and pending_event:
                        merge_pending_message_event(adapter._pending_messages, session_key, pending_event)
                    elif adapter and hasattr(adapter, 'queue_message'):
                        adapter.queue_message(session_key, pending)
                    return result_holder[0] or {"final_response": response, "messages": history}

                was_interrupted = result.get("interrupted")
                if not was_interrupted:
                    # Queued message after normal completion — deliver the first
                    # response before processing the queued follow-up.
                    # Skip if streaming already delivered it.
                    _sc = stream_consumer_holder[0]
                    if _sc and stream_task:
                        try:
                            await asyncio.wait_for(stream_task, timeout=5.0)
                        except (asyncio.TimeoutError, asyncio.CancelledError):
                            stream_task.cancel()
                            try:
                                await stream_task
                            except asyncio.CancelledError:
                                pass
                        except Exception as e:
                            logger.debug("Stream consumer wait before queued message failed: %s", e)
                    _previewed = bool(result.get("response_previewed"))
                    _already_streamed = bool(
                        (_sc and getattr(_sc, "final_response_sent", False))
                        or _previewed
                    )
                    first_response = result.get("final_response", "")
                    if first_response and not _already_streamed:
                        try:
                            logger.info(
                                "Queued follow-up for session %s: final stream delivery not confirmed; sending first response before continuing.",
                                session_key[:20] if session_key else "?",
                            )
                            await adapter.send(
                                source.chat_id,
                                first_response,
                                metadata=_status_thread_metadata,
                            )
                        except Exception as e:
                            logger.warning("Failed to send first response before queued message: %s", e)
                    elif first_response:
                        logger.info(
                            "Queued follow-up for session %s: skipping resend because final streamed delivery was confirmed.",
                            session_key[:20] if session_key else "?",
                        )
                    # Release deferred bg-review notifications now that the
                    # first response has been delivered.  Pop from the
                    # adapter's callback dict (prevents double-fire in
                    # base.py's finally block) and call it.
                    if adapter and hasattr(adapter, "_post_delivery_callbacks"):
                        _bg_cb = adapter._post_delivery_callbacks.pop(session_key, None)
                        if callable(_bg_cb):
                            try:
                                _bg_cb()
                            except Exception:
                                pass
                # else: interrupted — discard the interrupted response ("Operation
                # interrupted." is just noise; the user already knows they sent a
                # new message).

                updated_history = result.get("messages", history)
                next_source = source
                next_message = pending
                next_message_id = None
                next_channel_prompt = None
                if pending_event is not None:
                    next_source = getattr(pending_event, "source", None) or source
                    next_message = await self._prepare_inbound_message_text(
                        event=pending_event,
                        source=next_source,
                        history=updated_history,
                    )
                    if next_message is None:
                        return result
                    next_message_id = getattr(pending_event, "message_id", None)
                    next_channel_prompt = getattr(pending_event, "channel_prompt", None)

                # Restart typing indicator so the user sees activity while
                # the follow-up turn runs.  The outer _process_message_background
                # typing task is still alive but may be stale.
                _followup_adapter = self.adapters.get(source.platform)
                if _followup_adapter:
                    try:
                        await _followup_adapter.send_typing(
                            source.chat_id,
                            metadata=_status_thread_metadata,
                        )
                    except Exception:
                        pass

                return await self._run_agent(
                    message=next_message,
                    context_prompt=context_prompt,
                    history=updated_history,
                    source=next_source,
                    session_id=session_id,
                    session_key=session_key,
                    _interrupt_depth=_interrupt_depth + 1,
                    event_message_id=next_message_id,
                    channel_prompt=next_channel_prompt,
                )
        finally:
            # Stop progress sender, interrupt monitor, and notification task
            if progress_task:
                progress_task.cancel()
            interrupt_monitor.cancel()
            _notify_task.cancel()

            # Wait for stream consumer to finish its final edit
            if stream_task:
                try:
                    await asyncio.wait_for(stream_task, timeout=5.0)
                except (asyncio.TimeoutError, asyncio.CancelledError):
                    stream_task.cancel()
                    try:
                        await stream_task
                    except asyncio.CancelledError:
                        pass
            
            # Clean up tracking
            tracking_task.cancel()
            if session_key and session_key in self._running_agents:
                del self._running_agents[session_key]
            if session_key:
                self._running_agents_ts.pop(session_key, None)
            if self._draining:
                self._update_runtime_status("draining")
            
            # Wait for cancelled tasks
            for task in [progress_task, interrupt_monitor, tracking_task, _notify_task]:
                if task:
                    try:
                        await task
                    except asyncio.CancelledError:
                        pass

        # If streaming already delivered the response, mark it so the
        # caller's send() is skipped (avoiding duplicate messages).
        # BUT: never suppress delivery when the agent failed — the error
        # message is new content the user hasn't seen, and it must reach
        # them even if streaming had sent earlier partial output.
        #
        # Also never suppress when the final response is "(empty)" — this
        # means the model failed to produce content after tool calls (common
        # with mimo-v2-pro, GLM-5, etc.).  The stream consumer may have
        # sent intermediate text ("Let me search for that…") alongside the
        # tool call, setting already_sent=True, but that text is NOT the
        # final answer.  Suppressing delivery here leaves the user staring
        # at silence.  (#10xxx — "agent stops after web search")
        _sc = stream_consumer_holder[0]
        if isinstance(response, dict) and not response.get("failed"):
            _final = response.get("final_response") or ""
            _is_empty_sentinel = not _final or _final == "(empty)"
            _streamed = bool(
                _sc and getattr(_sc, "final_response_sent", False)
            )
            # response_previewed means the interim_assistant_callback already
            # sent the final text via the adapter (non-streaming path).
            _previewed = bool(response.get("response_previewed"))
            if not _is_empty_sentinel and (_streamed or _previewed):
                logger.info(
                    "Suppressing normal final send for session %s: final delivery already confirmed (streamed=%s previewed=%s).",
                    session_key[:20] if session_key else "?",
                    _streamed,
                    _previewed,
                )
                response["already_sent"] = True
        
        return response


def _start_cron_ticker(stop_event: threading.Event, adapters=None, loop=None, interval: int = 60):
    """
    Background thread that ticks the cron scheduler at a regular interval.
    
    Runs inside the gateway process so cronjobs fire automatically without
    needing a separate `hermes cron daemon` or system cron entry.

    When ``adapters`` and ``loop`` are provided, passes them through to the
    cron delivery path so live adapters can be used for E2EE rooms.

    Also refreshes the channel directory every 5 minutes and prunes the
    image/audio/document cache once per hour.
    """
    from cron.scheduler import tick as cron_tick
    from gateway.platforms.base import cleanup_image_cache, cleanup_document_cache

    IMAGE_CACHE_EVERY = 60   # ticks — once per hour at default 60s interval
    CHANNEL_DIR_EVERY = 5    # ticks — every 5 minutes

    logger.info("Cron ticker started (interval=%ds)", interval)
    tick_count = 0
    while not stop_event.is_set():
        try:
            cron_tick(verbose=False, adapters=adapters, loop=loop)
        except Exception as e:
            logger.debug("Cron tick error: %s", e)

        tick_count += 1

        if tick_count % CHANNEL_DIR_EVERY == 0 and adapters:
            try:
                from gateway.channel_directory import build_channel_directory
                build_channel_directory(adapters)
            except Exception as e:
                logger.debug("Channel directory refresh error: %s", e)

        if tick_count % IMAGE_CACHE_EVERY == 0:
            try:
                removed = cleanup_image_cache(max_age_hours=24)
                if removed:
                    logger.info("Image cache cleanup: removed %d stale file(s)", removed)
            except Exception as e:
                logger.debug("Image cache cleanup error: %s", e)
            try:
                removed = cleanup_document_cache(max_age_hours=24)
                if removed:
                    logger.info("Document cache cleanup: removed %d stale file(s)", removed)
            except Exception as e:
                logger.debug("Document cache cleanup error: %s", e)

        stop_event.wait(timeout=interval)
    logger.info("Cron ticker stopped")


async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool = False, verbosity: Optional[int] = 0) -> bool:
    """
    Start the gateway and run until interrupted.
    
    This is the main entry point for running the gateway.
    Returns True if the gateway ran successfully, False if it failed to start.
    A False return causes a non-zero exit code so systemd can auto-restart.
    
    Args:
        config: Optional gateway configuration override.
        replace: If True, kill any existing gateway instance before starting.
                 Useful for systemd services to avoid restart-loop deadlocks
                 when the previous process hasn't fully exited yet.
    """
    # ── Duplicate-instance guard ──────────────────────────────────────
    # Prevent two gateways from running under the same HERMES_HOME.
    # The PID file is scoped to HERMES_HOME, so future multi-profile
    # setups (each profile using a distinct HERMES_HOME) will naturally
    # allow concurrent instances without tripping this guard.
    import time as _time
    from gateway.status import get_running_pid, remove_pid_file, terminate_pid
    existing_pid = get_running_pid()
    if existing_pid is not None and existing_pid != os.getpid():
        if replace:
            logger.info(
                "Replacing existing gateway instance (PID %d) with --replace.",
                existing_pid,
            )
            try:
                terminate_pid(existing_pid, force=False)
            except ProcessLookupError:
                pass  # Already gone
            except (PermissionError, OSError):
                logger.error(
                    "Permission denied killing PID %d. Cannot replace.",
                    existing_pid,
                )
                return False
            # Wait up to 10 seconds for the old process to exit
            for _ in range(20):
                try:
                    os.kill(existing_pid, 0)
                    _time.sleep(0.5)
                except (ProcessLookupError, PermissionError):
                    break  # Process is gone
            else:
                # Still alive after 10s — force kill
                logger.warning(
                    "Old gateway (PID %d) did not exit after SIGTERM, sending SIGKILL.",
                    existing_pid,
                )
                try:
                    terminate_pid(existing_pid, force=True)
                    _time.sleep(0.5)
                except (ProcessLookupError, PermissionError, OSError):
                    pass
            remove_pid_file()
            # Also release all scoped locks left by the old process.
            # Stopped (Ctrl+Z) processes don't release locks on exit,
            # leaving stale lock files that block the new gateway from starting.
            try:
                from gateway.status import release_all_scoped_locks
                _released = release_all_scoped_locks()
                if _released:
                    logger.info("Released %d stale scoped lock(s) from old gateway.", _released)
            except Exception:
                pass
        else:
            hermes_home = str(get_hermes_home())
            logger.error(
                "Another gateway instance is already running (PID %d, HERMES_HOME=%s). "
                "Use 'hermes gateway restart' to replace it, or 'hermes gateway stop' first.",
                existing_pid, hermes_home,
            )
            print(
                f"\n❌ Gateway already running (PID {existing_pid}).\n"
                f"   Use 'hermes gateway restart' to replace it,\n"
                f"   or 'hermes gateway stop' to kill it first.\n"
                f"   Or use 'hermes gateway run --replace' to auto-replace.\n"
            )
            return False

    # Sync bundled skills on gateway start (fast -- skips unchanged)
    try:
        from tools.skills_sync import sync_skills
        sync_skills(quiet=True)
    except Exception:
        pass

    # Centralized logging — agent.log (INFO+), errors.log (WARNING+),
    # and gateway.log (INFO+, gateway-component records only).
    # Idempotent, so repeated calls from AIAgent.__init__ won't duplicate.
    from hermes_logging import setup_logging
    setup_logging(hermes_home=_hermes_home, mode="gateway")

    # Optional stderr handler — level driven by -v/-q flags on the CLI.
    # verbosity=None (-q/--quiet): no stderr output
    # verbosity=0    (default):    WARNING and above
    # verbosity=1    (-v):         INFO and above
    # verbosity=2+   (-vv/-vvv):   DEBUG
    if verbosity is not None:
        from agent.redact import RedactingFormatter

        _stderr_level = {0: logging.WARNING, 1: logging.INFO}.get(verbosity, logging.DEBUG)
        _stderr_handler = logging.StreamHandler()
        _stderr_handler.setLevel(_stderr_level)
        _stderr_handler.setFormatter(RedactingFormatter('%(levelname)s %(name)s: %(message)s'))
        logging.getLogger().addHandler(_stderr_handler)
        # Lower root logger level if needed so DEBUG records can reach the handler
        if _stderr_level < logging.getLogger().level:
            logging.getLogger().setLevel(_stderr_level)

    runner = GatewayRunner(config)
    
    # Track whether a signal initiated the shutdown (vs. internal request).
    # When an unexpected SIGTERM kills the gateway, we exit non-zero so
    # systemd's Restart=on-failure revives the process.  systemctl stop
    # is safe: systemd tracks stop-requested state independently of exit
    # code, so Restart= never fires for a deliberate stop.
    _signal_initiated_shutdown = False

    # Set up signal handlers
    def shutdown_signal_handler():
        nonlocal _signal_initiated_shutdown
        _signal_initiated_shutdown = True
        logger.info("Received SIGTERM/SIGINT — initiating shutdown")
        # Diagnostic: log all hermes-related processes so we can identify
        # what triggered the signal (hermes update, hermes gateway restart,
        # a stale detached subprocess, etc.).
        try:
            import subprocess as _sp
            _ps = _sp.run(
                ["ps", "aux"],
                capture_output=True, text=True, timeout=3,
            )
            _hermes_procs = [
                line for line in _ps.stdout.splitlines()
                if ("hermes" in line.lower() or "gateway" in line.lower())
                and str(os.getpid()) not in line.split()[1:2]  # exclude self
            ]
            if _hermes_procs:
                logger.warning(
                    "Shutdown diagnostic — other hermes processes running:\n  %s",
                    "\n  ".join(_hermes_procs),
                )
            else:
                logger.info("Shutdown diagnostic — no other hermes processes found")
        except Exception:
            pass
        asyncio.create_task(runner.stop())

    def restart_signal_handler():
        runner.request_restart(detached=False, via_service=True)
    
    loop = asyncio.get_running_loop()
    if threading.current_thread() is threading.main_thread():
        for sig in (signal.SIGINT, signal.SIGTERM):
            try:
                loop.add_signal_handler(sig, shutdown_signal_handler)
            except NotImplementedError:
                pass
        if hasattr(signal, "SIGUSR1"):
            try:
                loop.add_signal_handler(signal.SIGUSR1, restart_signal_handler)
            except NotImplementedError:
                pass
    else:
        logger.info("Skipping signal handlers (not running in main thread).")
    
    # Start the gateway
    success = await runner.start()
    if not success:
        return False
    if runner.should_exit_cleanly:
        if runner.exit_reason:
            logger.error("Gateway exiting cleanly: %s", runner.exit_reason)
        return True
    
    # Write PID file so CLI can detect gateway is running
    import atexit
    from gateway.status import write_pid_file, remove_pid_file
    write_pid_file()
    atexit.register(remove_pid_file)
    
    # Start background cron ticker so scheduled jobs fire automatically.
    # Pass the event loop so cron delivery can use live adapters (E2EE support).
    cron_stop = threading.Event()
    cron_thread = threading.Thread(
        target=_start_cron_ticker,
        args=(cron_stop,),
        kwargs={"adapters": runner.adapters, "loop": asyncio.get_running_loop()},
        daemon=True,
        name="cron-ticker",
    )
    cron_thread.start()
    
    # Wait for shutdown
    await runner.wait_for_shutdown()

    if runner.should_exit_with_failure:
        if runner.exit_reason:
            logger.error("Gateway exiting with failure: %s", runner.exit_reason)
        return False
    
    # Stop cron ticker cleanly
    cron_stop.set()
    cron_thread.join(timeout=5)

    # Close MCP server connections
    try:
        from tools.mcp_tool import shutdown_mcp_servers
        shutdown_mcp_servers()
    except Exception:
        pass

    if runner.exit_code is not None:
        raise SystemExit(runner.exit_code)

    # When a signal (SIGTERM/SIGINT) caused the shutdown and it wasn't a
    # planned restart (/restart, /update, SIGUSR1), exit non-zero so
    # systemd's Restart=on-failure revives the process.  This covers:
    #   - hermes update killing the gateway mid-work
    #   - External kill commands
    #   - WSL2/container runtime sending unexpected signals
    # systemctl stop is safe: systemd tracks "stop requested" state
    # independently of exit code, so Restart= never fires for it.
    if _signal_initiated_shutdown and not runner._restart_requested:
        logger.info(
            "Exiting with code 1 (signal-initiated shutdown without restart "
            "request) so systemd Restart=on-failure can revive the gateway."
        )
        return False  # → sys.exit(1) in the caller

    return True


def main():
    """CLI entry point for the gateway."""
    import argparse
    
    parser = argparse.ArgumentParser(description="Hermes Gateway - Multi-platform messaging")
    parser.add_argument("--config", "-c", help="Path to gateway config file")
    parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output")
    
    args = parser.parse_args()
    
    config = None
    if args.config:
        import yaml
        with open(args.config, encoding="utf-8") as f:
            data = yaml.safe_load(f)
            config = GatewayConfig.from_dict(data)
    
    # Run the gateway - exit with code 1 if no platforms connected,
    # so systemd Restart=on-failure will retry on transient errors (e.g. DNS)
    success = asyncio.run(start_gateway(config))
    if not success:
        sys.exit(1)


if __name__ == "__main__":
    main()
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								"""
 								Gateway runner - entry point for messaging platform integrations.
 								This module provides:
 								- start_gateway(): Start all configured platform adapters
 								- GatewayRunner: Main class managing the gateway lifecycle
 								Usage:
 								    # Start the gateway
 								    python -m gateway.run
 								    # Or from CLI
 								    python cli.py --gateway
 								"""
 								import asyncio
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
+								import json
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								import logging
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								import os
-												Add Text-to-Speech (TTS) support with Edge TTS and ElevenLabs integration

- Updated `pyproject.toml` to include Edge TTS and ElevenLabs as dependencies.
- Enhanced documentation to detail voice message capabilities across platforms and TTS provider options.
- Modified the GatewayRunner to handle MEDIA tags from TTS tool responses, ensuring proper delivery of audio messages.

											
										
										
											2026-02-14 16:08:14 -08:00
+								import re
-												fix: notify gateway users when updates finish or fail

											
										
										
											2026-03-11 20:43:31 +01:00
+								import shlex
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								import sys
 								import signal
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
+								import tempfile
-												refactor: streamline cron job handling and update CLI commands

- Removed legacy cron daemon functionality, integrating cron job execution directly into the gateway process for improved efficiency.
- Updated CLI commands to reflect changes, replacing `hermes cron daemon` with `hermes cron status` and enhancing documentation for cron job management.
- Clarified messaging in the README and other documentation regarding the gateway's role in managing cron jobs.
- Removed obsolete terminal_hecate tool and related configurations to simplify the codebase.

											
										
										
											2026-02-21 16:21:19 -08:00
+								import threading
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
+								import time
-												fix(gateway): preserve notify context in executor threads

Gateway executor work now inherits the active session contextvars via
copy_context() so background process watchers retain the correct
platform/chat/user/session metadata for routing completion events back
to the originating chat.

Cherry-picked from #10647 by @helix4u with:
- Use asyncio.get_running_loop() instead of deprecated get_event_loop()
- Strip trailing whitespace
- Add *args forwarding test
- Add exception propagation test

											
										
										
											2026-04-16 14:27:54 +05:30
+								from contextvars import copy_context
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								from pathlib import Path
 								from datetime import datetime
 								from typing import Dict, Optional, Any, List
-												fix(gateway): SSL certificate auto-detection for NixOS and non-standard systems

Add _ensure_ssl_certs() that discovers CA certificate bundles before any
HTTP library is imported.  Resolution order:
1. Python's ssl.get_default_verify_paths()
2. certifi (if installed)
3. Common distro/macOS paths

Only sets SSL_CERT_FILE if not already present in the environment.
Wrapped in a function (called immediately) to avoid polluting module
namespace.

Based on PR #1151 by sylvesterroos.

											
										
										
											2026-03-15 23:04:34 -07:00
+								# ---------------------------------------------------------------------------
 								# SSL certificate auto-detection for NixOS and other non-standard systems.
 								# Must run BEFORE any HTTP library (discord, aiohttp, etc.) is imported.
 								# ---------------------------------------------------------------------------
 								def _ensure_ssl_certs() -> None:
 								    """Set SSL_CERT_FILE if the system doesn't expose CA certs to Python."""
 								    if "SSL_CERT_FILE" in os.environ:
 								        return  # user already configured it
 								    import ssl
 								    # 1. Python's compiled-in defaults
 								    paths = ssl.get_default_verify_paths()
 								    for candidate in (paths.cafile, paths.openssl_cafile):
 								        if candidate and os.path.exists(candidate):
 								            os.environ["SSL_CERT_FILE"] = candidate
 								            return
 								    # 2. certifi (ships its own Mozilla bundle)
 								    try:
 								        import certifi
 								        os.environ["SSL_CERT_FILE"] = certifi.where()
 								        return
 								    except ImportError:
 								        pass
 								    # 3. Common distro / macOS locations
 								    for candidate in (
 								        "/etc/ssl/certs/ca-certificates.crt",               # Debian/Ubuntu/Gentoo
 								        "/etc/pki/tls/certs/ca-bundle.crt",                 # RHEL/CentOS 7
 								        "/etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem", # RHEL/CentOS 8+
 								        "/etc/ssl/ca-bundle.pem",                            # SUSE/OpenSUSE
 								        "/etc/ssl/cert.pem",                                 # Alpine / macOS
 								        "/etc/pki/tls/cert.pem",                             # Fedora
 								        "/usr/local/etc/openssl@1.1/cert.pem",               # macOS Homebrew Intel
 								        "/opt/homebrew/etc/openssl@1.1/cert.pem",            # macOS Homebrew ARM
 								    ):
 								        if os.path.exists(candidate):
 								            os.environ["SSL_CERT_FILE"] = candidate
 								            return
 								_ensure_ssl_certs()
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								# Add parent directory to path
 								sys.path.insert(0, str(Path(__file__).parent.parent))
-												fix: respect HERMES_HOME env var in gateway and cron scheduler

Both entry points hardcoded Path.home() / ".hermes" for .env, config.yaml,
logs, and lock files. Now uses _hermes_home which reads HERMES_HOME env var
with ~/.hermes as default, matching cli.py and run_agent.py.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-26 18:51:46 +11:00
+								# Resolve Hermes home directory (respects HERMES_HOME override)
-												refactor: consolidate get_hermes_home() and parse_reasoning_effort() (#3062)

Centralizes two widely-duplicated patterns into hermes_constants.py:

1. get_hermes_home() — Path resolution for ~/.hermes (HERMES_HOME env var)
   - Was copy-pasted inline across 30+ files as:
     Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
   - Now defined once in hermes_constants.py (zero-dependency module)
   - hermes_cli/config.py re-exports it for backward compatibility
   - Removed local wrapper functions in honcho_integration/client.py,
     tools/website_policy.py, tools/tirith_security.py, hermes_cli/uninstall.py

2. parse_reasoning_effort() — Reasoning effort string validation
   - Was copy-pasted in cli.py, gateway/run.py, cron/scheduler.py
   - Same validation logic: check against (xhigh, high, medium, low, minimal, none)
   - Now defined once in hermes_constants.py, called from all 3 locations
   - Warning log for unknown values kept at call sites (context-specific)

31 files changed, net +31 lines (125 insertions, 94 deletions)
Full test suite: 6179 passed, 0 failed
											
										
										
											2026-03-25 15:54:28 -07:00
+								from hermes_constants import get_hermes_home
-												feat(gateway): surface natural mid-turn assistant messages in chat platforms

Add display.interim_assistant_messages config (enabled by default) that
forwards completed assistant commentary between tool calls to the user
as separate chat messages. Models already emit useful status text like
'I'll inspect the repo first.' — this surfaces it on Telegram, Discord,
and other messaging platforms instead of swallowing it.

Independent from tool_progress and gateway streaming. Disabled for
webhooks. Uses GatewayStreamConsumer when available, falls back to
direct adapter send. Tracks response_previewed to prevent double-delivery
when interim message matches the final response.

Also fixes: cursor not stripped from fallback prefix in stream consumer
(affected continuation calculation on no-edit platforms like Signal).

Cherry-picked from PR #7885 by asheriif, default changed to enabled.
Fixes #5016

											
										
										
											2026-04-11 16:03:52 -07:00
+								from utils import atomic_yaml_write, is_truthy_value
-												refactor: consolidate get_hermes_home() and parse_reasoning_effort() (#3062)

Centralizes two widely-duplicated patterns into hermes_constants.py:

1. get_hermes_home() — Path resolution for ~/.hermes (HERMES_HOME env var)
   - Was copy-pasted inline across 30+ files as:
     Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
   - Now defined once in hermes_constants.py (zero-dependency module)
   - hermes_cli/config.py re-exports it for backward compatibility
   - Removed local wrapper functions in honcho_integration/client.py,
     tools/website_policy.py, tools/tirith_security.py, hermes_cli/uninstall.py

2. parse_reasoning_effort() — Reasoning effort string validation
   - Was copy-pasted in cli.py, gateway/run.py, cron/scheduler.py
   - Same validation logic: check against (xhigh, high, medium, low, minimal, none)
   - Now defined once in hermes_constants.py, called from all 3 locations
   - Warning log for unknown values kept at call sites (context-specific)

31 files changed, net +31 lines (125 insertions, 94 deletions)
Full test suite: 6179 passed, 0 failed
											
										
										
											2026-03-25 15:54:28 -07:00
+								_hermes_home = get_hermes_home()
-												fix: respect HERMES_HOME env var in gateway and cron scheduler

Both entry points hardcoded Path.home() / ".hermes" for .env, config.yaml,
logs, and lock files. Now uses _hermes_home which reads HERMES_HOME env var
with ~/.hermes as default, matching cli.py and run_agent.py.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-26 18:51:46 +11:00
-												fix(config): reload .env over stale shell overrides

Hermes startup entrypoints now load ~/.hermes/.env and project fallback env files with user config taking precedence over stale shell-exported values. This makes model/provider/base URL changes in .env actually take effect after restarting Hermes. Adds a shared env loader plus regression coverage, and reproduces the original bug case where OPENAI_BASE_URL and HERMES_INFERENCE_PROVIDER remained stuck on old shell values before import.

											
										
										
											2026-03-15 06:46:28 -07:00
+								# Load environment variables from ~/.hermes/.env first.
 								# User-managed env files should override stale shell exports on restart.
 								from dotenv import load_dotenv  # backward-compat for tests that monkeypatch this symbol
 								from hermes_cli.env_loader import load_hermes_dotenv
-												fix: respect HERMES_HOME env var in gateway and cron scheduler

Both entry points hardcoded Path.home() / ".hermes" for .env, config.yaml,
logs, and lock files. Now uses _hermes_home which reads HERMES_HOME env var
with ~/.hermes as default, matching cli.py and run_agent.py.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-26 18:51:46 +11:00
+								_env_path = _hermes_home / '.env'
-												fix(config): reload .env over stale shell overrides

Hermes startup entrypoints now load ~/.hermes/.env and project fallback env files with user config taking precedence over stale shell-exported values. This makes model/provider/base URL changes in .env actually take effect after restarting Hermes. Adds a shared env loader plus regression coverage, and reproduces the original bug case where OPENAI_BASE_URL and HERMES_INFERENCE_PROVIDER remained stuck on old shell values before import.

											
										
										
											2026-03-15 06:46:28 -07:00
+								load_hermes_dotenv(hermes_home=_hermes_home, project_env=Path(__file__).resolve().parents[1] / '.env')
-												Update requirements and enhance environment variable loading in gateway

- Updated requirements.txt to uncomment and ensure the installation of `python-telegram-bot` and `discord.py` packages.
- Enhanced the gateway run script to load environment variables from a specified path, improving configuration management and flexibility for different environments.

											
										
										
											2026-02-03 07:02:59 -08:00
-												feat: integrate config.yaml values into environment for enhanced flexibility

- Added functionality to load values from config.yaml into the environment, allowing os.getenv() to access them.
- Ensured that existing environment variables take precedence over config values.
- Updated DiscordAdapter to resolve usernames in DISCORD_ALLOWED_USERS to numeric IDs, improving user authorization checks.
- Enhanced event handling to provide clearer logging and ensure proper synchronization of slash commands.

											
										
										
											2026-02-22 17:35:45 -08:00
+								# Bridge config.yaml values into the environment so os.getenv() picks them up.
-												feat(config): enhance terminal environment variable management

- Updated .env.example to clarify terminal backend configuration and its relationship with config.yaml.
- Modified gateway/run.py to ensure terminal settings from config.yaml take precedence over .env, improving consistency in environment variable handling.
- Added mapping for terminal configuration options to corresponding environment variables for better integration.

											
										
										
											2026-02-26 20:05:35 -08:00
+								# config.yaml is authoritative for terminal settings — overrides .env.
-												fix: respect HERMES_HOME env var in gateway and cron scheduler

Both entry points hardcoded Path.home() / ".hermes" for .env, config.yaml,
logs, and lock files. Now uses _hermes_home which reads HERMES_HOME env var
with ~/.hermes as default, matching cli.py and run_agent.py.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-26 18:51:46 +11:00
+								_config_path = _hermes_home / 'config.yaml'
-												feat: integrate config.yaml values into environment for enhanced flexibility

- Added functionality to load values from config.yaml into the environment, allowing os.getenv() to access them.
- Ensured that existing environment variables take precedence over config values.
- Updated DiscordAdapter to resolve usernames in DISCORD_ALLOWED_USERS to numeric IDs, improving user authorization checks.
- Enhanced event handling to provide clearer logging and ensure proper synchronization of slash commands.

											
										
										
											2026-02-22 17:35:45 -08:00
+								if _config_path.exists():
 								    try:
 								        import yaml as _yaml
-												Add explicit encoding="utf-8" to all config/data file open() calls

On Windows, open() defaults to the system locale encoding (cp1252,
cp1254, etc.) rather than UTF-8. This breaks any file containing
non-ASCII characters, and also causes crashes when writing JSON with
ensure_ascii=False.

This adds encoding="utf-8" to open() calls in:
- gateway/run.py (config.yaml reads/writes throughout)
- gateway/config.py (gateway.json and config.yaml)
- hermes_cli/config.py (config.yaml load/save)
- hermes_cli/main.py (session export with ensure_ascii=False)
- hermes_cli/status.py (jobs.json and sessions.json)

											
										
										
											2026-03-05 17:04:33 -05:00
+								        with open(_config_path, encoding="utf-8") as _f:
-												feat: integrate config.yaml values into environment for enhanced flexibility

- Added functionality to load values from config.yaml into the environment, allowing os.getenv() to access them.
- Ensured that existing environment variables take precedence over config values.
- Updated DiscordAdapter to resolve usernames in DISCORD_ALLOWED_USERS to numeric IDs, improving user authorization checks.
- Enhanced event handling to provide clearer logging and ensure proper synchronization of slash commands.

											
										
										
											2026-02-22 17:35:45 -08:00
+								            _cfg = _yaml.safe_load(_f) or {}
-												feat(config): support ${ENV_VAR} substitution in config.yaml (#2684)

* feat(config): support ${ENV_VAR} substitution in config.yaml

* fix: extend env var expansion to CLI and gateway config loaders

The original PR (#2680) only wired _expand_env_vars into load_config(),
which is used by 'hermes tools' and 'hermes setup'. The two primary
config paths were missed:

- load_cli_config() in cli.py (interactive CLI)
- Module-level _cfg in gateway/run.py (gateway — bridges api_keys to env vars)

Also:
- Remove redundant 'import re' (already imported at module level)
- Add missing blank lines between top-level functions (PEP 8)
- Add tests for load_cli_config() expansion

---------

Co-authored-by: teyrebaz33 <hakanerten02@hotmail.com>
											
										
										
											2026-03-23 16:02:06 -07:00
+								        # Expand ${ENV_VAR} references before bridging to env vars.
 								        from hermes_cli.config import _expand_env_vars
 								        _cfg = _expand_env_vars(_cfg)
-												feat(config): enhance terminal environment variable management

- Updated .env.example to clarify terminal backend configuration and its relationship with config.yaml.
- Modified gateway/run.py to ensure terminal settings from config.yaml take precedence over .env, improving consistency in environment variable handling.
- Added mapping for terminal configuration options to corresponding environment variables for better integration.

											
										
										
											2026-02-26 20:05:35 -08:00
+								        # Top-level simple values (fallback only — don't override .env)
-												feat: integrate config.yaml values into environment for enhanced flexibility

- Added functionality to load values from config.yaml into the environment, allowing os.getenv() to access them.
- Ensured that existing environment variables take precedence over config values.
- Updated DiscordAdapter to resolve usernames in DISCORD_ALLOWED_USERS to numeric IDs, improving user authorization checks.
- Enhanced event handling to provide clearer logging and ensure proper synchronization of slash commands.

											
										
										
											2026-02-22 17:35:45 -08:00
+								        for _key, _val in _cfg.items():
 								            if isinstance(_val, (str, int, float, bool)) and _key not in os.environ:
 								                os.environ[_key] = str(_val)
-												feat(config): enhance terminal environment variable management

- Updated .env.example to clarify terminal backend configuration and its relationship with config.yaml.
- Modified gateway/run.py to ensure terminal settings from config.yaml take precedence over .env, improving consistency in environment variable handling.
- Added mapping for terminal configuration options to corresponding environment variables for better integration.

											
										
										
											2026-02-26 20:05:35 -08:00
+								        # Terminal config is nested — bridge to TERMINAL_* env vars.
 								        # config.yaml overrides .env for these since it's the documented config path.
 								        _terminal_cfg = _cfg.get("terminal", {})
 								        if _terminal_cfg and isinstance(_terminal_cfg, dict):
 								            _terminal_env_map = {
 								                "backend": "TERMINAL_ENV",
 								                "cwd": "TERMINAL_CWD",
 								                "timeout": "TERMINAL_TIMEOUT",
 								                "lifetime_seconds": "TERMINAL_LIFETIME_SECONDS",
 								                "docker_image": "TERMINAL_DOCKER_IMAGE",
-												fix(docker): add explicit env allowlist for container credentials (#1436)

Docker terminal sessions are secret-dark by default. This adds
terminal.docker_forward_env as an explicit allowlist for env vars
that may be forwarded into Docker containers.

Values resolve from the current shell first, then fall back to
~/.hermes/.env. Only variables the user explicitly lists are
forwarded — nothing is auto-exposed.

Cherry-picked from PR #1449 by @teknium1, conflict-resolved onto
current main.

Fixes #1436
Supersedes #1439

											
										
										
											2026-03-17 02:34:25 -07:00
+								                "docker_forward_env": "TERMINAL_DOCKER_FORWARD_ENV",
-												feat(config): enhance terminal environment variable management

- Updated .env.example to clarify terminal backend configuration and its relationship with config.yaml.
- Modified gateway/run.py to ensure terminal settings from config.yaml take precedence over .env, improving consistency in environment variable handling.
- Added mapping for terminal configuration options to corresponding environment variables for better integration.

											
										
										
											2026-02-26 20:05:35 -08:00
+								                "singularity_image": "TERMINAL_SINGULARITY_IMAGE",
 								                "modal_image": "TERMINAL_MODAL_IMAGE",
-												fix(daytona): add missing config mappings in gateway, CLI defaults, and config display

Signed-off-by: rovle <lovre.pesut@gmail.com>

											
										
										
											2026-03-05 11:12:50 -08:00
+								                "daytona_image": "TERMINAL_DAYTONA_IMAGE",
-												feat(config): enhance terminal environment variable management

- Updated .env.example to clarify terminal backend configuration and its relationship with config.yaml.
- Modified gateway/run.py to ensure terminal settings from config.yaml take precedence over .env, improving consistency in environment variable handling.
- Added mapping for terminal configuration options to corresponding environment variables for better integration.

											
										
										
											2026-02-26 20:05:35 -08:00
+								                "ssh_host": "TERMINAL_SSH_HOST",
 								                "ssh_user": "TERMINAL_SSH_USER",
 								                "ssh_port": "TERMINAL_SSH_PORT",
 								                "ssh_key": "TERMINAL_SSH_KEY",
 								                "container_cpu": "TERMINAL_CONTAINER_CPU",
 								                "container_memory": "TERMINAL_CONTAINER_MEMORY",
 								                "container_disk": "TERMINAL_CONTAINER_DISK",
 								                "container_persistent": "TERMINAL_CONTAINER_PERSISTENT",
-												fix: gateway missing docker_volumes config bridge + list serialization bug

The gateway's config.yaml → env var bridge was missing docker_volumes,
so Docker volume mounts configured in config.yaml were ignored for
gateway sessions (Telegram, Discord, etc.) while working in CLI.

Also fixes list serialization: str() produces Python repr with single
quotes which json.loads() in terminal_tool.py can't parse. Now uses
json.dumps() for list values.

Based on PR #431 by @manuelschipper (applied manually due to stale branch).

											
										
										
											2026-03-09 17:24:00 -07:00
+								                "docker_volumes": "TERMINAL_DOCKER_VOLUMES",
-												fix: respect config.yaml cwd in gateway, add sandbox_dir config option

Two fixes:

1. Gateway CWD override: TERMINAL_CWD from config.yaml was being
   unconditionally overwritten by the messaging_cwd fallback (line 114).
   Now explicit paths in config.yaml are respected — only '.' / 'auto' /
   'cwd' (or unset) fall back to MESSAGING_CWD or home directory.

2. sandbox_dir config: Added terminal.sandbox_dir to config.yaml bridge
   in gateway/run.py, cli.py, and hermes_cli/config.py. Maps to
   TERMINAL_SANDBOX_DIR env var, which get_sandbox_dir() reads to
   determine where Docker/Singularity sandbox data is stored (default:
   ~/.hermes/sandboxes/). Users can now set:
     hermes config set terminal.sandbox_dir /data/hermes-sandboxes

											
										
										
											2026-03-08 01:33:46 -08:00
+								                "sandbox_dir": "TERMINAL_SANDBOX_DIR",
-												feat: enable persistent shell by default for SSH, add config option

SSH persistent shell now defaults to true — non-local backends benefit
most from state persistence across execute() calls. Local backend
remains opt-in via TERMINAL_LOCAL_PERSISTENT env var.

New config.yaml option: terminal.persistent_shell (default: true)
Controls the default for non-local backends. Users can disable with:
  hermes config set terminal.persistent_shell false

Precedence: per-backend env var > TERMINAL_PERSISTENT_SHELL > default.

Wired through cli.py, gateway/run.py, and hermes_cli/config.py so the
config.yaml value reaches terminal_tool via env var bridge.

											
										
										
											2026-03-15 20:17:13 -07:00
+								                "persistent_shell": "TERMINAL_PERSISTENT_SHELL",
-												feat(config): enhance terminal environment variable management

- Updated .env.example to clarify terminal backend configuration and its relationship with config.yaml.
- Modified gateway/run.py to ensure terminal settings from config.yaml take precedence over .env, improving consistency in environment variable handling.
- Added mapping for terminal configuration options to corresponding environment variables for better integration.

											
										
										
											2026-02-26 20:05:35 -08:00
+								            }
 								            for _cfg_key, _env_var in _terminal_env_map.items():
 								                if _cfg_key in _terminal_cfg:
-												fix: gateway missing docker_volumes config bridge + list serialization bug

The gateway's config.yaml → env var bridge was missing docker_volumes,
so Docker volume mounts configured in config.yaml were ignored for
gateway sessions (Telegram, Discord, etc.) while working in CLI.

Also fixes list serialization: str() produces Python repr with single
quotes which json.loads() in terminal_tool.py can't parse. Now uses
json.dumps() for list values.

Based on PR #431 by @manuelschipper (applied manually due to stale branch).

											
										
										
											2026-03-09 17:24:00 -07:00
+								                    _val = _terminal_cfg[_cfg_key]
-												fix: enforce config.yaml as sole CWD source + deprecate .env CWD vars + add hermes memory reset (#11029)

config.yaml terminal.cwd is now the single source of truth for working
directory. MESSAGING_CWD and TERMINAL_CWD in .env are deprecated with a
migration warning.

Changes:

1. config.py: Remove MESSAGING_CWD from OPTIONAL_ENV_VARS (setup wizard
   no longer prompts for it). Add warn_deprecated_cwd_env_vars() that
   prints a migration hint when deprecated env vars are detected.

2. gateway/run.py: Replace all MESSAGING_CWD reads with TERMINAL_CWD
   (which is bridged from config.yaml terminal.cwd). MESSAGING_CWD is
   still accepted as a backward-compat fallback with deprecation warning.
   Config bridge skips cwd placeholder values so they don't clobber
   the resolved TERMINAL_CWD.

3. cli.py: Guard against lazy-import clobbering — when cli.py is
   imported lazily during gateway runtime (via delegate_tool), don't
   let load_cli_config() overwrite an already-resolved TERMINAL_CWD
   with os.getcwd() of the service's working directory. (#10817)

4. hermes_cli/main.py: Add 'hermes memory reset' command with
   --target all/memory/user and --yes flags. Profile-scoped via
   HERMES_HOME.

Migration path for users with .env settings:
  Remove MESSAGING_CWD / TERMINAL_CWD from .env
  Add to config.yaml:
    terminal:
      cwd: /your/project/path

Addresses: #10225, #4672, #10817, #7663
											
										
										
											2026-04-16 06:48:33 -07:00
+								                    # Skip cwd placeholder values (".", "auto", "cwd") — the
 								                    # gateway resolves these to Path.home() later (line ~255).
 								                    # Writing the raw placeholder here would just be noise.
 								                    # Only bridge explicit absolute paths from config.yaml.
 								                    if _cfg_key == "cwd" and str(_val) in (".", "auto", "cwd"):
 								                        continue
-												fix: gateway missing docker_volumes config bridge + list serialization bug

The gateway's config.yaml → env var bridge was missing docker_volumes,
so Docker volume mounts configured in config.yaml were ignored for
gateway sessions (Telegram, Discord, etc.) while working in CLI.

Also fixes list serialization: str() produces Python repr with single
quotes which json.loads() in terminal_tool.py can't parse. Now uses
json.dumps() for list values.

Based on PR #431 by @manuelschipper (applied manually due to stale branch).

											
										
										
											2026-03-09 17:24:00 -07:00
+								                    if isinstance(_val, list):
 								                        os.environ[_env_var] = json.dumps(_val)
 								                    else:
 								                        os.environ[_env_var] = str(_val)
-												feat(compression): add summary_base_url + move compression config to YAML-only

- Add summary_base_url config option to compression block for custom
  OpenAI-compatible endpoints (e.g. zai, DeepSeek, Ollama)
- Remove compression env var bridges from cli.py and gateway/run.py
  (CONTEXT_COMPRESSION_* env vars no longer set from config)
- Switch run_agent.py to read compression config directly from
  config.yaml instead of env vars
- Fix backwards-compat block in _resolve_task_provider_model to also
  fire when auxiliary.compression.provider is 'auto' (DEFAULT_CONFIG
  sets this, which was silently preventing the compression section's
  summary_* keys from being read)
- Add test for summary_base_url config-to-client flow
- Update docs to show compression as config.yaml-only

Closes #1591
Based on PR #1702 by @uzaylisak
											
										
										
											2026-03-17 04:46:15 -07:00
+								        # Compression config is read directly from config.yaml by run_agent.py
 								        # and auxiliary_client.py — no env var bridging needed.
-												feat: add direct endpoint overrides for auxiliary and delegation

Add base_url/api_key overrides for auxiliary tasks and delegation so users can
route those flows straight to a custom OpenAI-compatible endpoint without
having to rely on provider=main or named custom providers.

Also clear gateway session env vars in test isolation so the full suite stays
deterministic when run from a messaging-backed agent session.

											
										
										
											2026-03-14 20:48:29 -07:00
+								        # Auxiliary model/direct-endpoint overrides (vision, web_extract).
 								        # Each task has provider/model/base_url/api_key; bridge non-default values to env vars.
-												fix: harden auxiliary model config — gateway bridge, vision safety, tests

Improvements on top of PR #606 (auxiliary model configuration):

1. Gateway bridge: Added auxiliary.* and compression.summary_provider
   config bridging to gateway/run.py so config.yaml settings work from
   messaging platforms (not just CLI). Matches the pattern in cli.py.

2. Vision auto-fallback safety: In auto mode, vision now only tries
   OpenRouter + Nous Portal (known multimodal-capable providers).
   Custom endpoints, Codex, and API-key providers are skipped to avoid
   confusing errors from providers that don't support vision input.
   Explicit provider override (AUXILIARY_VISION_PROVIDER=main) still
   allows using any provider.

3. Comprehensive tests (46 new):
   - _get_auxiliary_provider env var resolution (8 tests)
   - _resolve_forced_provider with all provider types (8 tests)
   - Per-task provider routing integration (4 tests)
   - Vision auto-fallback safety (7 tests)
   - Config bridging logic (11 tests)
   - Gateway/CLI bridge parity (2 tests)
   - Vision model override via env var (2 tests)
   - DEFAULT_CONFIG shape validation (4 tests)

4. Docs: Added auxiliary_client.py to AGENTS.md project structure.
   Updated module docstring with separate text/vision resolution chains.

Tests: 2429 passed (was 2383).

											
										
										
											2026-03-08 18:06:40 -07:00
+								        _auxiliary_cfg = _cfg.get("auxiliary", {})
 								        if _auxiliary_cfg and isinstance(_auxiliary_cfg, dict):
 								            _aux_task_env = {
-												feat: add direct endpoint overrides for auxiliary and delegation

Add base_url/api_key overrides for auxiliary tasks and delegation so users can
route those flows straight to a custom OpenAI-compatible endpoint without
having to rely on provider=main or named custom providers.

Also clear gateway session env vars in test isolation so the full suite stays
deterministic when run from a messaging-backed agent session.

											
										
										
											2026-03-14 20:48:29 -07:00
+								                "vision": {
 								                    "provider": "AUXILIARY_VISION_PROVIDER",
 								                    "model": "AUXILIARY_VISION_MODEL",
 								                    "base_url": "AUXILIARY_VISION_BASE_URL",
 								                    "api_key": "AUXILIARY_VISION_API_KEY",
 								                },
 								                "web_extract": {
 								                    "provider": "AUXILIARY_WEB_EXTRACT_PROVIDER",
 								                    "model": "AUXILIARY_WEB_EXTRACT_MODEL",
 								                    "base_url": "AUXILIARY_WEB_EXTRACT_BASE_URL",
 								                    "api_key": "AUXILIARY_WEB_EXTRACT_API_KEY",
 								                },
-												feat: smart approvals + /stop command (inspired by OpenAI Codex)

* feat: smart approvals — LLM-based risk assessment for dangerous commands

Adds a 'smart' approval mode that uses the auxiliary LLM to assess
whether a flagged command is genuinely dangerous or a false positive,
auto-approving low-risk commands without prompting the user.

Inspired by OpenAI Codex's Smart Approvals guardian subagent
(openai/codex#13860).

Config (config.yaml):
  approvals:
    mode: manual   # manual (default), smart, off

Modes:
- manual — current behavior, always prompt the user
- smart  — aux LLM evaluates risk: APPROVE (auto-allow), DENY (block),
           or ESCALATE (fall through to manual prompt)
- off    — skip all approval prompts (equivalent to --yolo)

When smart mode auto-approves, the pattern gets session-level approval
so subsequent uses of the same pattern don't trigger another LLM call.
When it denies, the command is blocked without user prompt. When
uncertain, it escalates to the normal manual approval flow.

The LLM prompt is carefully scoped: it sees only the command text and
the flagged reason, assesses actual risk vs false positive, and returns
a single-word verdict.

* feat: make smart approval model configurable via config.yaml

Adds auxiliary.approval section to config.yaml with the same
provider/model/base_url/api_key pattern as other aux tasks (vision,
web_extract, compression, etc.).

Config:
  auxiliary:
    approval:
      provider: auto
      model: ''        # fast/cheap model recommended
      base_url: ''
      api_key: ''

Bridged to env vars in both CLI and gateway paths so the aux client
picks them up automatically.

* feat: add /stop command to kill all background processes

Adds a /stop slash command that kills all running background processes
at once. Currently users have to process(list) then process(kill) for
each one individually.

Inspired by OpenAI Codex's separation of interrupt (Ctrl+C stops current
turn) from /stop (cleans up background processes). See openai/codex#14602.

Ctrl+C continues to only interrupt the active agent turn — background
dev servers, watchers, etc. are preserved. /stop is the explicit way
to clean them all up.
											
										
										
											2026-03-16 06:20:11 -07:00
+								                "approval": {
 								                    "provider": "AUXILIARY_APPROVAL_PROVIDER",
 								                    "model": "AUXILIARY_APPROVAL_MODEL",
 								                    "base_url": "AUXILIARY_APPROVAL_BASE_URL",
 								                    "api_key": "AUXILIARY_APPROVAL_API_KEY",
 								                },
-												fix: harden auxiliary model config — gateway bridge, vision safety, tests

Improvements on top of PR #606 (auxiliary model configuration):

1. Gateway bridge: Added auxiliary.* and compression.summary_provider
   config bridging to gateway/run.py so config.yaml settings work from
   messaging platforms (not just CLI). Matches the pattern in cli.py.

2. Vision auto-fallback safety: In auto mode, vision now only tries
   OpenRouter + Nous Portal (known multimodal-capable providers).
   Custom endpoints, Codex, and API-key providers are skipped to avoid
   confusing errors from providers that don't support vision input.
   Explicit provider override (AUXILIARY_VISION_PROVIDER=main) still
   allows using any provider.

3. Comprehensive tests (46 new):
   - _get_auxiliary_provider env var resolution (8 tests)
   - _resolve_forced_provider with all provider types (8 tests)
   - Per-task provider routing integration (4 tests)
   - Vision auto-fallback safety (7 tests)
   - Config bridging logic (11 tests)
   - Gateway/CLI bridge parity (2 tests)
   - Vision model override via env var (2 tests)
   - DEFAULT_CONFIG shape validation (4 tests)

4. Docs: Added auxiliary_client.py to AGENTS.md project structure.
   Updated module docstring with separate text/vision resolution chains.

Tests: 2429 passed (was 2383).

											
										
										
											2026-03-08 18:06:40 -07:00
+								            }
-												feat: add direct endpoint overrides for auxiliary and delegation

Add base_url/api_key overrides for auxiliary tasks and delegation so users can
route those flows straight to a custom OpenAI-compatible endpoint without
having to rely on provider=main or named custom providers.

Also clear gateway session env vars in test isolation so the full suite stays
deterministic when run from a messaging-backed agent session.

											
										
										
											2026-03-14 20:48:29 -07:00
+								            for _task_key, _env_map in _aux_task_env.items():
-												fix: harden auxiliary model config — gateway bridge, vision safety, tests

Improvements on top of PR #606 (auxiliary model configuration):

1. Gateway bridge: Added auxiliary.* and compression.summary_provider
   config bridging to gateway/run.py so config.yaml settings work from
   messaging platforms (not just CLI). Matches the pattern in cli.py.

2. Vision auto-fallback safety: In auto mode, vision now only tries
   OpenRouter + Nous Portal (known multimodal-capable providers).
   Custom endpoints, Codex, and API-key providers are skipped to avoid
   confusing errors from providers that don't support vision input.
   Explicit provider override (AUXILIARY_VISION_PROVIDER=main) still
   allows using any provider.

3. Comprehensive tests (46 new):
   - _get_auxiliary_provider env var resolution (8 tests)
   - _resolve_forced_provider with all provider types (8 tests)
   - Per-task provider routing integration (4 tests)
   - Vision auto-fallback safety (7 tests)
   - Config bridging logic (11 tests)
   - Gateway/CLI bridge parity (2 tests)
   - Vision model override via env var (2 tests)
   - DEFAULT_CONFIG shape validation (4 tests)

4. Docs: Added auxiliary_client.py to AGENTS.md project structure.
   Updated module docstring with separate text/vision resolution chains.

Tests: 2429 passed (was 2383).

											
										
										
											2026-03-08 18:06:40 -07:00
+								                _task_cfg = _auxiliary_cfg.get(_task_key, {})
 								                if not isinstance(_task_cfg, dict):
 								                    continue
 								                _prov = str(_task_cfg.get("provider", "")).strip()
 								                _model = str(_task_cfg.get("model", "")).strip()
-												feat: add direct endpoint overrides for auxiliary and delegation

Add base_url/api_key overrides for auxiliary tasks and delegation so users can
route those flows straight to a custom OpenAI-compatible endpoint without
having to rely on provider=main or named custom providers.

Also clear gateway session env vars in test isolation so the full suite stays
deterministic when run from a messaging-backed agent session.

											
										
										
											2026-03-14 20:48:29 -07:00
+								                _base_url = str(_task_cfg.get("base_url", "")).strip()
 								                _api_key = str(_task_cfg.get("api_key", "")).strip()
-												fix: harden auxiliary model config — gateway bridge, vision safety, tests

Improvements on top of PR #606 (auxiliary model configuration):

1. Gateway bridge: Added auxiliary.* and compression.summary_provider
   config bridging to gateway/run.py so config.yaml settings work from
   messaging platforms (not just CLI). Matches the pattern in cli.py.

2. Vision auto-fallback safety: In auto mode, vision now only tries
   OpenRouter + Nous Portal (known multimodal-capable providers).
   Custom endpoints, Codex, and API-key providers are skipped to avoid
   confusing errors from providers that don't support vision input.
   Explicit provider override (AUXILIARY_VISION_PROVIDER=main) still
   allows using any provider.

3. Comprehensive tests (46 new):
   - _get_auxiliary_provider env var resolution (8 tests)
   - _resolve_forced_provider with all provider types (8 tests)
   - Per-task provider routing integration (4 tests)
   - Vision auto-fallback safety (7 tests)
   - Config bridging logic (11 tests)
   - Gateway/CLI bridge parity (2 tests)
   - Vision model override via env var (2 tests)
   - DEFAULT_CONFIG shape validation (4 tests)

4. Docs: Added auxiliary_client.py to AGENTS.md project structure.
   Updated module docstring with separate text/vision resolution chains.

Tests: 2429 passed (was 2383).

											
										
										
											2026-03-08 18:06:40 -07:00
+								                if _prov and _prov != "auto":
-												feat: add direct endpoint overrides for auxiliary and delegation

Add base_url/api_key overrides for auxiliary tasks and delegation so users can
route those flows straight to a custom OpenAI-compatible endpoint without
having to rely on provider=main or named custom providers.

Also clear gateway session env vars in test isolation so the full suite stays
deterministic when run from a messaging-backed agent session.

											
										
										
											2026-03-14 20:48:29 -07:00
+								                    os.environ[_env_map["provider"]] = _prov
-												fix: harden auxiliary model config — gateway bridge, vision safety, tests

Improvements on top of PR #606 (auxiliary model configuration):

1. Gateway bridge: Added auxiliary.* and compression.summary_provider
   config bridging to gateway/run.py so config.yaml settings work from
   messaging platforms (not just CLI). Matches the pattern in cli.py.

2. Vision auto-fallback safety: In auto mode, vision now only tries
   OpenRouter + Nous Portal (known multimodal-capable providers).
   Custom endpoints, Codex, and API-key providers are skipped to avoid
   confusing errors from providers that don't support vision input.
   Explicit provider override (AUXILIARY_VISION_PROVIDER=main) still
   allows using any provider.

3. Comprehensive tests (46 new):
   - _get_auxiliary_provider env var resolution (8 tests)
   - _resolve_forced_provider with all provider types (8 tests)
   - Per-task provider routing integration (4 tests)
   - Vision auto-fallback safety (7 tests)
   - Config bridging logic (11 tests)
   - Gateway/CLI bridge parity (2 tests)
   - Vision model override via env var (2 tests)
   - DEFAULT_CONFIG shape validation (4 tests)

4. Docs: Added auxiliary_client.py to AGENTS.md project structure.
   Updated module docstring with separate text/vision resolution chains.

Tests: 2429 passed (was 2383).

											
										
										
											2026-03-08 18:06:40 -07:00
+								                if _model:
-												feat: add direct endpoint overrides for auxiliary and delegation

Add base_url/api_key overrides for auxiliary tasks and delegation so users can
route those flows straight to a custom OpenAI-compatible endpoint without
having to rely on provider=main or named custom providers.

Also clear gateway session env vars in test isolation so the full suite stays
deterministic when run from a messaging-backed agent session.

											
										
										
											2026-03-14 20:48:29 -07:00
+								                    os.environ[_env_map["model"]] = _model
 								                if _base_url:
 								                    os.environ[_env_map["base_url"]] = _base_url
 								                if _api_key:
 								                    os.environ[_env_map["api_key"]] = _api_key
-												refactor(cli): update max turns configuration precedence and enhance documentation

											
										
										
											2026-02-28 10:35:49 -08:00
+								        _agent_cfg = _cfg.get("agent", {})
 								        if _agent_cfg and isinstance(_agent_cfg, dict):
 								            if "max_turns" in _agent_cfg:
 								                os.environ["HERMES_MAX_ITERATIONS"] = str(_agent_cfg["max_turns"])
-												fix(gateway): replace wall-clock agent timeout with inactivity-based timeout (#5389)

The gateway previously used a hard wall-clock asyncio.wait_for timeout
that killed agents after a fixed duration regardless of activity. This
punished legitimate long-running tasks (subagent delegation, reasoning
models, multi-step research).

Now uses an inactivity-based polling loop that checks the agent's
built-in activity tracker (get_activity_summary) every 5 seconds. The
agent can run indefinitely as long as it's actively calling tools or
receiving API responses. Only fires when the agent has been completely
idle for the configured duration.

Changes:
- Replace asyncio.wait_for with asyncio.wait poll loop checking
  agent idle time via get_activity_summary()
- Add agent.gateway_timeout config.yaml key (default 1800s, 0=unlimited)
- Update stale session eviction to use agent idle time instead of
  pure wall-clock (prevents evicting active long-running tasks)
- Preserve all existing diagnostic logging and user-facing context

Inspired by PR #4864 (Mibayy) and issue #4815 (BongSuCHOI).
Reimplemented on current main using existing _touch_activity()
infrastructure rather than a parallel tracker.
											
										
										
											2026-04-05 19:38:21 -07:00
+								            # Bridge agent.gateway_timeout → HERMES_AGENT_TIMEOUT env var.
 								            # Env var from .env takes precedence (already in os.environ).
 								            if "gateway_timeout" in _agent_cfg and "HERMES_AGENT_TIMEOUT" not in os.environ:
 								                os.environ["HERMES_AGENT_TIMEOUT"] = str(_agent_cfg["gateway_timeout"])
-												fix(gateway): add staged inactivity warning before timeout escalation

Introduce gateway_timeout_warning (default 900s) as a pre-timeout alert
layer.  When inactivity reaches the warning threshold, a single
notification is sent to the user offering to wait or reset.  If
inactivity continues to the gateway_timeout (default 1800s), the full
timeout fires as before.

This gives users a chance to intervene before work is lost on slow
API providers without disabling the safety timeout entirely.

Config: agent.gateway_timeout_warning in config.yaml, or
HERMES_AGENT_TIMEOUT_WARNING env var (0 = disable warning).

											
										
										
											2026-04-08 21:39:27 +02:00
+								            if "gateway_timeout_warning" in _agent_cfg and "HERMES_AGENT_TIMEOUT_WARNING" not in os.environ:
 								                os.environ["HERMES_AGENT_TIMEOUT_WARNING"] = str(_agent_cfg["gateway_timeout_warning"])
-												feat: make gateway 'still working' notification interval configurable (#8572)

Add agent.gateway_notify_interval config option (default 600s).
Set to 0 to disable periodic 'still working' notifications.
Bridged to HERMES_AGENT_NOTIFY_INTERVAL env var (same pattern as
gateway_timeout and gateway_timeout_warning).

The inactivity warning (gateway_timeout_warning) was already
configurable; this makes the wall-clock ping configurable too.
											
										
										
											2026-04-12 13:06:34 -07:00
+								            if "gateway_notify_interval" in _agent_cfg and "HERMES_AGENT_NOTIFY_INTERVAL" not in os.environ:
 								                os.environ["HERMES_AGENT_NOTIFY_INTERVAL"] = str(_agent_cfg["gateway_notify_interval"])
-												fix(gateway): drain in-flight work before restart

											
										
										
											2026-04-10 10:19:17 -07:00
+								            if "restart_drain_timeout" in _agent_cfg and "HERMES_RESTART_DRAIN_TIMEOUT" not in os.environ:
 								                os.environ["HERMES_RESTART_DRAIN_TIMEOUT"] = str(_agent_cfg["restart_drain_timeout"])
 								        _display_cfg = _cfg.get("display", {})
 								        if _display_cfg and isinstance(_display_cfg, dict):
 								            if "busy_input_mode" in _display_cfg and "HERMES_GATEWAY_BUSY_INPUT_MODE" not in os.environ:
 								                os.environ["HERMES_GATEWAY_BUSY_INPUT_MODE"] = str(_display_cfg["busy_input_mode"])
-												fix(timezone): add timezone-aware clock across agent, cron, and execute_code

											
										
										
											2026-03-03 11:57:18 +05:30
+								        # Timezone: bridge config.yaml → HERMES_TIMEZONE env var.
 								        # HERMES_TIMEZONE from .env takes precedence (already in os.environ).
 								        _tz_cfg = _cfg.get("timezone", "")
 								        if _tz_cfg and isinstance(_tz_cfg, str) and "HERMES_TIMEZONE" not in os.environ:
 								            os.environ["HERMES_TIMEZONE"] = _tz_cfg.strip()
-												feat: add config toggle to disable secret redaction

New config option:

  security:
    redact_secrets: false  # default: true

When set to false, API keys, tokens, and passwords are shown in
full in read_file, search_files, and terminal output. Useful for
debugging auth issues where you need to verify the actual key value.

Bridged to both CLI and gateway via HERMES_REDACT_SECRETS env var.
The check is in redact_sensitive_text() itself, so all call sites
(terminal, file tools, log formatter) respect it.

											
										
										
											2026-03-09 01:04:33 -07:00
+								        # Security settings
 								        _security_cfg = _cfg.get("security", {})
 								        if isinstance(_security_cfg, dict):
 								            _redact = _security_cfg.get("redact_secrets")
 								            if _redact is not None:
 								                os.environ["HERMES_REDACT_SECRETS"] = str(_redact).lower()
-												feat: integrate config.yaml values into environment for enhanced flexibility

- Added functionality to load values from config.yaml into the environment, allowing os.getenv() to access them.
- Ensured that existing environment variables take precedence over config values.
- Updated DiscordAdapter to resolve usernames in DISCORD_ALLOWED_USERS to numeric IDs, improving user authorization checks.
- Enhanced event handling to provide clearer logging and ensure proper synchronization of slash commands.

											
										
										
											2026-02-22 17:35:45 -08:00
+								    except Exception:
 								        pass  # Non-fatal; gateway can still run with .env values
-												feat: add network.force_ipv4 config to fix IPv6 timeout issues (#8196)

On servers with broken or unreachable IPv6, Python's socket.getaddrinfo
returns AAAA records first. urllib/httpx/requests all try IPv6 connections
first and hang for the full TCP timeout before falling back to IPv4. This
affects web_extract, web_search, the OpenAI SDK, and all HTTP tools.

Adds network.force_ipv4 config option (default: false) that monkey-patches
socket.getaddrinfo to resolve as AF_INET when the caller didn't specify a
family. Falls back to full resolution if no A record exists, so pure-IPv6
hosts still work.

Applied early at all three entry points (CLI, gateway, cron scheduler)
before any HTTP clients are created.

Reported by user @29n — Chinese Ubuntu server with unreachable IPv6 causing
timeouts on lobste.rs and other IPv6-enabled sites while Google/GitHub
worked fine (IPv4-only resolution).
											
										
										
											2026-04-11 23:12:11 -07:00
+								# Apply IPv4 preference if configured (before any HTTP clients are created).
 								try:
 								    from hermes_constants import apply_ipv4_preference
 								    _network_cfg = (_cfg if '_cfg' in dir() else {}).get("network", {})
 								    if isinstance(_network_cfg, dict) and _network_cfg.get("force_ipv4"):
 								        apply_ipv4_preference(force=True)
 								except Exception:
 								    pass
-												feat: config structure validation — detect malformed YAML at startup (#5426)

Add validate_config_structure() that catches common config.yaml mistakes:
- custom_providers as dict instead of list (missing '-' in YAML)
- fallback_model accidentally nested inside another section
- custom_providers entries missing required fields (name, base_url)
- Missing model section when custom_providers is configured
- Root-level keys that look like misplaced custom_providers fields

Surface these diagnostics at three levels:
1. Startup: print_config_warnings() runs at CLI and gateway module load,
   so users see issues before hitting cryptic errors
2. Error time: 'Unknown provider' errors in auth.py and model_switch.py
   now include config diagnostics with fix suggestions
3. Doctor: 'hermes doctor' shows a Config Structure section with all
   issues and fix hints

Also adds a warning log in runtime_provider.py when custom_providers
is a dict (previously returned None silently).

Motivated by a Discord user who had malformed custom_providers YAML
and got only 'Unknown Provider' with no guidance on what was wrong.

17 new tests covering all validation paths.
											
										
										
											2026-04-05 23:31:20 -07:00
+								# Validate config structure early — log warnings so gateway operators see problems
 								try:
 								    from hermes_cli.config import print_config_warnings
 								    print_config_warnings()
 								except Exception:
 								    pass
-												fix: enforce config.yaml as sole CWD source + deprecate .env CWD vars + add hermes memory reset (#11029)

config.yaml terminal.cwd is now the single source of truth for working
directory. MESSAGING_CWD and TERMINAL_CWD in .env are deprecated with a
migration warning.

Changes:

1. config.py: Remove MESSAGING_CWD from OPTIONAL_ENV_VARS (setup wizard
   no longer prompts for it). Add warn_deprecated_cwd_env_vars() that
   prints a migration hint when deprecated env vars are detected.

2. gateway/run.py: Replace all MESSAGING_CWD reads with TERMINAL_CWD
   (which is bridged from config.yaml terminal.cwd). MESSAGING_CWD is
   still accepted as a backward-compat fallback with deprecation warning.
   Config bridge skips cwd placeholder values so they don't clobber
   the resolved TERMINAL_CWD.

3. cli.py: Guard against lazy-import clobbering — when cli.py is
   imported lazily during gateway runtime (via delegate_tool), don't
   let load_cli_config() overwrite an already-resolved TERMINAL_CWD
   with os.getcwd() of the service's working directory. (#10817)

4. hermes_cli/main.py: Add 'hermes memory reset' command with
   --target all/memory/user and --yes flags. Profile-scoped via
   HERMES_HOME.

Migration path for users with .env settings:
  Remove MESSAGING_CWD / TERMINAL_CWD from .env
  Add to config.yaml:
    terminal:
      cwd: /your/project/path

Addresses: #10225, #4672, #10817, #7663
											
										
										
											2026-04-16 06:48:33 -07:00
+								# Warn if user has deprecated MESSAGING_CWD / TERMINAL_CWD in .env
 								try:
 								    from hermes_cli.config import warn_deprecated_cwd_env_vars
 								    warn_deprecated_cwd_env_vars()
 								except Exception:
 								    pass
-												Enhance messaging gateway configuration and security features

- Added new environment variables for Telegram and Discord bot configurations, including `TELEGRAM_ALLOWED_USERS` and `DISCORD_ALLOWED_USERS`, to restrict bot access to specific users.
- Updated documentation in AGENTS.md and README.md to include detailed setup instructions for the messaging gateway, emphasizing the importance of user allowlists for security.
- Improved the CLI setup wizard to prompt for allowed user IDs during configuration, enhancing user guidance and security awareness.
- Refined the gateway run script to support user authorization checks, ensuring only allowed users can interact with the bot.

											
										
										
											2026-02-03 10:46:23 -08:00
+								# Gateway runs in quiet mode - suppress debug output and use cwd directly (no temp dirs)
 								os.environ["HERMES_QUIET"] = "1"
-												Add Text-to-Speech (TTS) functionality with multiple providers

Add tool previews

Add AGENTS and SOUL.md support

Add Exec Approval

											
										
										
											2026-02-12 10:05:08 -08:00
+								# Enable interactive exec approval for dangerous commands on messaging platforms
 								os.environ["HERMES_EXEC_ASK"] = "1"
-												fix: respect config.yaml cwd in gateway, add sandbox_dir config option

Two fixes:

1. Gateway CWD override: TERMINAL_CWD from config.yaml was being
   unconditionally overwritten by the messaging_cwd fallback (line 114).
   Now explicit paths in config.yaml are respected — only '.' / 'auto' /
   'cwd' (or unset) fall back to MESSAGING_CWD or home directory.

2. sandbox_dir config: Added terminal.sandbox_dir to config.yaml bridge
   in gateway/run.py, cli.py, and hermes_cli/config.py. Maps to
   TERMINAL_SANDBOX_DIR env var, which get_sandbox_dir() reads to
   determine where Docker/Singularity sandbox data is stored (default:
   ~/.hermes/sandboxes/). Users can now set:
     hermes config set terminal.sandbox_dir /data/hermes-sandboxes

											
										
										
											2026-03-08 01:33:46 -08:00
+								# Set terminal working directory for messaging platforms.
-												fix: enforce config.yaml as sole CWD source + deprecate .env CWD vars + add hermes memory reset (#11029)

config.yaml terminal.cwd is now the single source of truth for working
directory. MESSAGING_CWD and TERMINAL_CWD in .env are deprecated with a
migration warning.

Changes:

1. config.py: Remove MESSAGING_CWD from OPTIONAL_ENV_VARS (setup wizard
   no longer prompts for it). Add warn_deprecated_cwd_env_vars() that
   prints a migration hint when deprecated env vars are detected.

2. gateway/run.py: Replace all MESSAGING_CWD reads with TERMINAL_CWD
   (which is bridged from config.yaml terminal.cwd). MESSAGING_CWD is
   still accepted as a backward-compat fallback with deprecation warning.
   Config bridge skips cwd placeholder values so they don't clobber
   the resolved TERMINAL_CWD.

3. cli.py: Guard against lazy-import clobbering — when cli.py is
   imported lazily during gateway runtime (via delegate_tool), don't
   let load_cli_config() overwrite an already-resolved TERMINAL_CWD
   with os.getcwd() of the service's working directory. (#10817)

4. hermes_cli/main.py: Add 'hermes memory reset' command with
   --target all/memory/user and --yes flags. Profile-scoped via
   HERMES_HOME.

Migration path for users with .env settings:
  Remove MESSAGING_CWD / TERMINAL_CWD from .env
  Add to config.yaml:
    terminal:
      cwd: /your/project/path

Addresses: #10225, #4672, #10817, #7663
											
										
										
											2026-04-16 06:48:33 -07:00
+								# config.yaml terminal.cwd is the canonical source (bridged to TERMINAL_CWD
 								# by the config bridge above).  When it's unset or a placeholder, default
 								# to home directory.  MESSAGING_CWD is accepted as a backward-compat
 								# fallback (deprecated — the warning above tells users to migrate).
-												fix: respect config.yaml cwd in gateway, add sandbox_dir config option

Two fixes:

1. Gateway CWD override: TERMINAL_CWD from config.yaml was being
   unconditionally overwritten by the messaging_cwd fallback (line 114).
   Now explicit paths in config.yaml are respected — only '.' / 'auto' /
   'cwd' (or unset) fall back to MESSAGING_CWD or home directory.

2. sandbox_dir config: Added terminal.sandbox_dir to config.yaml bridge
   in gateway/run.py, cli.py, and hermes_cli/config.py. Maps to
   TERMINAL_SANDBOX_DIR env var, which get_sandbox_dir() reads to
   determine where Docker/Singularity sandbox data is stored (default:
   ~/.hermes/sandboxes/). Users can now set:
     hermes config set terminal.sandbox_dir /data/hermes-sandboxes

											
										
										
											2026-03-08 01:33:46 -08:00
+								_configured_cwd = os.environ.get("TERMINAL_CWD", "")
 								if not _configured_cwd or _configured_cwd in (".", "auto", "cwd"):
-												fix: enforce config.yaml as sole CWD source + deprecate .env CWD vars + add hermes memory reset (#11029)

config.yaml terminal.cwd is now the single source of truth for working
directory. MESSAGING_CWD and TERMINAL_CWD in .env are deprecated with a
migration warning.

Changes:

1. config.py: Remove MESSAGING_CWD from OPTIONAL_ENV_VARS (setup wizard
   no longer prompts for it). Add warn_deprecated_cwd_env_vars() that
   prints a migration hint when deprecated env vars are detected.

2. gateway/run.py: Replace all MESSAGING_CWD reads with TERMINAL_CWD
   (which is bridged from config.yaml terminal.cwd). MESSAGING_CWD is
   still accepted as a backward-compat fallback with deprecation warning.
   Config bridge skips cwd placeholder values so they don't clobber
   the resolved TERMINAL_CWD.

3. cli.py: Guard against lazy-import clobbering — when cli.py is
   imported lazily during gateway runtime (via delegate_tool), don't
   let load_cli_config() overwrite an already-resolved TERMINAL_CWD
   with os.getcwd() of the service's working directory. (#10817)

4. hermes_cli/main.py: Add 'hermes memory reset' command with
   --target all/memory/user and --yes flags. Profile-scoped via
   HERMES_HOME.

Migration path for users with .env settings:
  Remove MESSAGING_CWD / TERMINAL_CWD from .env
  Add to config.yaml:
    terminal:
      cwd: /your/project/path

Addresses: #10225, #4672, #10817, #7663
											
										
										
											2026-04-16 06:48:33 -07:00
+								    _fallback = os.getenv("MESSAGING_CWD") or str(Path.home())
 								    os.environ["TERMINAL_CWD"] = _fallback
-												Enhance messaging gateway configuration and security features

- Added new environment variables for Telegram and Discord bot configurations, including `TELEGRAM_ALLOWED_USERS` and `DISCORD_ALLOWED_USERS`, to restrict bot access to specific users.
- Updated documentation in AGENTS.md and README.md to include detailed setup instructions for the messaging gateway, emphasizing the importance of user allowlists for security.
- Improved the CLI setup wizard to prompt for allowed user IDs during configuration, enhancing user guidance and security awareness.
- Refined the gateway run script to support user authorization checks, ensuring only allowed users can interact with the bot.

											
										
										
											2026-02-03 10:46:23 -08:00
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								from gateway.config import (
 								    Platform,
 								    GatewayConfig,
 								    load_gateway_config,
 								)
 								from gateway.session import (
 								    SessionStore,
 								    SessionSource,
 								    SessionContext,
 								    build_session_context,
 								    build_session_context_prompt,
-												refactor: extract build_session_key() as single source of truth

The session key construction logic was duplicated in 4 places
(session.py + 3 inline copies in run.py), which is exactly the
kind of drift that caused issue #349 in the first place.

Extracted build_session_key() as a public function in session.py.
SessionStore._generate_session_key() now delegates to it, and all
inline key construction in run.py has been replaced with calls to
the shared function. Tests updated to test the function directly.

											
										
										
											2026-03-04 03:34:45 -08:00
+								    build_session_key,
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								)
-												chore: remove ~100 unused imports across 55 files (#3016)

Automated cleanup via pyflakes + autoflake with manual review.

Changes:
- Removed unused stdlib imports (os, sys, json, pathlib.Path, etc.)
- Removed unused typing imports (List, Dict, Any, Optional, Tuple, Set, etc.)
- Removed unused internal imports (hermes_cli.auth, hermes_cli.config, etc.)
- Fixed cli.py: removed 8 shadowed banner imports (imported from hermes_cli.banner
  then immediately redefined locally — only build_welcome_banner is actually used)
- Added noqa comments to imports that appear unused but serve a purpose:
  - Re-exports (gateway/session.py SessionResetPolicy, tools/terminal_tool.py
    is_interrupted/_interrupt_event)
  - SDK presence checks in try/except (daytona, fal_client, discord)
  - Test mock targets (auxiliary_client.py Path, mcp_config.py get_hermes_home)

Zero behavioral changes. Full test suite passes (6162/6162, 2 pre-existing
streaming test failures unrelated to this change).
											
										
										
											2026-03-25 15:02:03 -07:00
+								from gateway.delivery import DeliveryRouter
-												fix(gateway): address restart review feedback

											
										
										
											2026-04-10 14:00:21 -07:00
+								from gateway.platforms.base import (
 								    BasePlatformAdapter,
 								    MessageEvent,
 								    MessageType,
 								    merge_pending_message_event,
 								)
 								from gateway.restart import (
 								    DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT,
 								    GATEWAY_SERVICE_RESTART_EXIT_CODE,
 								    parse_restart_drain_timeout,
 								)
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
-												fix(whatsapp): resolve LID↔phone aliases in allowlist matching (#3830)

WhatsApp DMs can arrive with LID sender IDs even when
WHATSAPP_ALLOWED_USERS is configured with phone numbers. The allowlist
check now reads bridge session mapping files (lid-mapping-*.json) to
resolve phone↔LID aliases, matching users regardless of which
identifier format the message uses.

Both the Python gateway (_is_user_authorized) and the Node bridge
(allowlist.js) now share the same mapping-file-based resolution logic.

Co-authored-by: Frederico Ribeiro <fr@tecompanytea.com>
											
										
										
											2026-03-29 18:21:50 -07:00
 								def _normalize_whatsapp_identifier(value: str) -> str:
 								    """Strip WhatsApp JID/LID syntax down to its stable numeric identifier."""
 								    return (
 								        str(value or "")
 								        .strip()
 								        .replace("+", "", 1)
 								        .split(":", 1)[0]
 								        .split("@", 1)[0]
 								    )
 								def _expand_whatsapp_auth_aliases(identifier: str) -> set:
 								    """Resolve WhatsApp phone/LID aliases using bridge session mapping files."""
 								    normalized = _normalize_whatsapp_identifier(identifier)
 								    if not normalized:
 								        return set()
 								    session_dir = _hermes_home / "whatsapp" / "session"
 								    resolved = set()
 								    queue = [normalized]
 								    while queue:
 								        current = queue.pop(0)
 								        if not current or current in resolved:
 								            continue
 								        resolved.add(current)
 								        for suffix in ("", "_reverse"):
 								            mapping_path = session_dir / f"lid-mapping-{current}{suffix}.json"
 								            if not mapping_path.exists():
 								                continue
 								            try:
 								                mapped = _normalize_whatsapp_identifier(
 								                    json.loads(mapping_path.read_text(encoding="utf-8"))
 								                )
 								            except Exception:
 								                continue
 								            if mapped and mapped not in resolved:
 								                queue.append(mapped)
 								    return resolved
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								logger = logging.getLogger(__name__)
-												fix(gateway): prevent concurrent agent runs for the same session

Place a sentinel in _running_agents immediately after the "already
running" guard check passes — before any await.  Without this, the
numerous await points between the guard (line 1324) and agent
registration (track_agent at line 4790) create a window where a
second message for the same session can bypass the guard and start
a duplicate agent, corrupting the transcript.

The await gap includes: hook emissions, vision enrichment (external
API call), audio transcription (external API call), session hygiene
compression, and the run_in_executor call itself.  For messages with
media attachments the window can be several seconds wide.

The sentinel is wrapped in try/finally so it is always cleaned up —
even if the handler raises or takes an early-return path.  When the
real AIAgent is created, track_agent() overwrites the sentinel with
the actual instance (preserving interrupt support).

Also handles the edge case where a message arrives while the sentinel
is set but no real agent exists yet: the message is queued via the
adapter's pending-message mechanism instead of attempting to call
interrupt() on the sentinel object.

											
										
										
											2026-03-19 22:32:37 +03:00
+								# Sentinel placed into _running_agents immediately when a session starts
 								# processing, *before* any await.  Prevents a second message for the same
 								# session from bypassing the "already running" guard during the async gap
 								# between the guard check and actual agent creation.
 								_AGENT_PENDING_SENTINEL = object()
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
-												Add OpenAI Codex provider runtime and responses integration (without .agent/PLANS.md)

											
										
										
											2026-02-25 18:20:38 -08:00
+								def _resolve_runtime_agent_kwargs() -> dict:
 								    """Resolve provider credentials for gateway-created AIAgent instances."""
 								    from hermes_cli.runtime_provider import (
 								        resolve_runtime_provider,
 								        format_runtime_provider_error,
 								    )
 								    try:
 								        runtime = resolve_runtime_provider(
 								            requested=os.getenv("HERMES_INFERENCE_PROVIDER"),
 								        )
 								    except Exception as exc:
 								        raise RuntimeError(format_runtime_provider_error(exc)) from exc
 								    return {
 								        "api_key": runtime.get("api_key"),
 								        "base_url": runtime.get("base_url"),
 								        "provider": runtime.get("provider"),
 								        "api_mode": runtime.get("api_mode"),
-												feat: integrate GitHub Copilot providers across Hermes

Add first-class GitHub Copilot and Copilot ACP provider support across
model selection, runtime provider resolution, CLI sessions, delegated
subagents, cron jobs, and the Telegram gateway.

This also normalizes Copilot model catalogs and API modes, introduces a
Copilot ACP OpenAI-compatible shim, and fixes service-mode auth by
resolving Homebrew-installed gh binaries under launchd.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

											
										
										
											2026-03-17 23:40:22 -07:00
+								        "command": runtime.get("command"),
 								        "args": list(runtime.get("args") or []),
-												feat(auth): same-provider credential pools with rotation, custom endpoint support, and interactive CLI (#2647)

* feat(auth): add same-provider credential pools and rotation UX

Add same-provider credential pooling so Hermes can rotate across
multiple credentials for a single provider, recover from exhausted
credentials without jumping providers immediately, and configure
that behavior directly in hermes setup.

- agent/credential_pool.py: persisted per-provider credential pools
- hermes auth add/list/remove/reset CLI commands
- 429/402/401 recovery with pool rotation in run_agent.py
- Setup wizard integration for pool strategy configuration
- Auto-seeding from env vars and existing OAuth state

Co-authored-by: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com>
Salvaged from PR #2647

* fix(tests): prevent pool auto-seeding from host env in credential pool tests

Tests for non-pool Anthropic paths and auth remove were failing when
host env vars (ANTHROPIC_API_KEY) or file-backed OAuth credentials
were present. The pool auto-seeding picked these up, causing unexpected
pool entries in tests.

- Mock _select_pool_entry in auxiliary_client OAuth flag tests
- Clear Anthropic env vars and mock _seed_from_singletons in auth remove test

* feat(auth): add thread safety, least_used strategy, and request counting

- Add threading.Lock to CredentialPool for gateway thread safety
  (concurrent requests from multiple gateway sessions could race on
  pool state mutations without this)
- Add 'least_used' rotation strategy that selects the credential
  with the lowest request_count, distributing load more evenly
- Add request_count field to PooledCredential for usage tracking
- Add mark_used() method to increment per-credential request counts
- Wrap select(), mark_exhausted_and_rotate(), and try_refresh_current()
  with lock acquisition
- Add tests: least_used selection, mark_used counting, concurrent
  thread safety (4 threads × 20 selects with no corruption)

* feat(auth): add interactive mode for bare 'hermes auth' command

When 'hermes auth' is called without a subcommand, it now launches an
interactive wizard that:

1. Shows full credential pool status across all providers
2. Offers a menu: add, remove, reset cooldowns, set strategy
3. For OAuth-capable providers (anthropic, nous, openai-codex), the
   add flow explicitly asks 'API key or OAuth login?' — making it
   clear that both auth types are supported for the same provider
4. Strategy picker shows all 4 options (fill_first, round_robin,
   least_used, random) with the current selection marked
5. Remove flow shows entries with indices for easy selection

The subcommand paths (hermes auth add/list/remove/reset) still work
exactly as before for scripted/non-interactive use.

* fix(tests): update runtime_provider tests for config.yaml source of truth (#4165)

Tests were using OPENAI_BASE_URL env var which is no longer consulted
after #4165. Updated to use model config (provider, base_url, api_key)
which is the new single source of truth for custom endpoint URLs.

* feat(auth): support custom endpoint credential pools keyed by provider name

Custom OpenAI-compatible endpoints all share provider='custom', making
the provider-keyed pool useless. Now pools for custom endpoints are
keyed by 'custom:<normalized_name>' where the name comes from the
custom_providers config list (auto-generated from URL hostname).

- Pool key format: 'custom:together.ai', 'custom:local-(localhost:8080)'
- load_pool('custom:name') seeds from custom_providers api_key AND
  model.api_key when base_url matches
- hermes auth add/list now shows custom endpoints alongside registry
  providers
- _resolve_openrouter_runtime and _resolve_named_custom_runtime check
  pool before falling back to single config key
- 6 new tests covering custom pool keying, seeding, and listing

* docs: add Excalidraw diagram of full credential pool flow

Comprehensive architecture diagram showing:
- Credential sources (env vars, auth.json OAuth, config.yaml, CLI)
- Pool storage and auto-seeding
- Runtime resolution paths (registry, custom, OpenRouter)
- Error recovery (429 retry-then-rotate, 402 immediate, 401 refresh)
- CLI management commands and strategy configuration

Open at: https://excalidraw.com/#json=2Ycqhqpi6f12E_3ITyiwh,c7u9jSt5BwrmiVzHGbm87g

* fix(tests): update setup wizard pool tests for unified select_provider_and_model flow

The setup wizard now delegates to select_provider_and_model() instead
of using its own prompt_choice-based provider picker. Tests needed:
- Mock select_provider_and_model as no-op (provider pre-written to config)
- Call _stub_tts BEFORE custom prompt_choice mock (it overwrites it)
- Pre-write model.provider to config so the pool step is reached

* docs: add comprehensive credential pool documentation

- New page: website/docs/user-guide/features/credential-pools.md
  Full guide covering quick start, CLI commands, rotation strategies,
  error recovery, custom endpoint pools, auto-discovery, thread safety,
  architecture, and storage format.
- Updated fallback-providers.md to reference credential pools as the
  first layer of resilience (same-provider rotation before cross-provider)
- Added hermes auth to CLI commands reference with usage examples
- Added credential_pool_strategies to configuration guide

* chore: remove excalidraw diagram from repo (external link only)

* refactor: simplify credential pool code — extract helpers, collapse extras, dedup patterns

- _load_config_safe(): replace 4 identical try/except/import blocks
- _iter_custom_providers(): shared generator for custom provider iteration
- PooledCredential.extra dict: collapse 11 round-trip-only fields
  (token_type, scope, client_id, portal_base_url, obtained_at,
  expires_in, agent_key_id, agent_key_expires_in, agent_key_reused,
  agent_key_obtained_at, tls) into a single extra dict with
  __getattr__ for backward-compatible access
- _available_entries(): shared exhaustion-check between select and peek
- Dedup anthropic OAuth seeding (hermes_pkce + claude_code identical)
- SimpleNamespace replaces class _Args boilerplate in auth_commands
- _try_resolve_from_custom_pool(): shared pool-check in runtime_provider

Net -17 lines. All 383 targeted tests pass.

---------

Co-authored-by: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com>
											
										
										
											2026-03-31 03:10:01 -07:00
+								        "credential_pool": runtime.get("credential_pool"),
-												Add OpenAI Codex provider runtime and responses integration (without .agent/PLANS.md)

											
										
										
											2026-02-25 18:20:38 -08:00
+								    }
-												fix(gateway): race condition, photo media loss, and flood control in Telegram

Three bugs causing intermittent silent drops, partial responses, and
flood control delays on the Telegram platform:

1. Race condition in handle_message() — _active_sessions was set inside
   the background task, not before create_task(). Two rapid messages
   could both pass the guard and spawn duplicate processing tasks.
   Fix: set _active_sessions synchronously before spawning the task
   (grammY sequentialize / aiogram EventIsolation pattern).

2. Photo media loss on dequeue — when a photo (no caption) was queued
   during active processing and later dequeued, only .text was
   extracted. Empty text → message silently dropped.
   Fix: _build_media_placeholder() creates text context for media-only
   events so they survive the dequeue path.

3. Progress message edits triggered Telegram flood control — rapid tool
   calls edited the progress message every 0.3s, hitting Telegram's
   rate limit (23s+ waits). This blocked progress updates and could
   cause stream consumer timeouts.
   Fix: throttle edits to 1.5s minimum interval, detect flood control
   errors and gracefully degrade to new messages. edit_message() now
   returns failure for flood waits >5s instead of blocking.

											
										
										
											2026-04-02 16:32:21 +05:30
+								def _build_media_placeholder(event) -> str:
 								    """Build a text placeholder for media-only events so they aren't dropped.
 								    When a photo/document is queued during active processing and later
 								    dequeued, only .text is extracted.  If the event has no caption,
 								    the media would be silently lost.  This builds a placeholder that
 								    the vision enrichment pipeline will replace with a real description.
 								    """
 								    parts = []
 								    media_urls = getattr(event, "media_urls", None) or []
 								    media_types = getattr(event, "media_types", None) or []
 								    for i, url in enumerate(media_urls):
 								        mtype = media_types[i] if i < len(media_types) else ""
 								        if mtype.startswith("image/") or getattr(event, "message_type", None) == MessageType.PHOTO:
 								            parts.append(f"[User sent an image: {url}]")
 								        elif mtype.startswith("audio/"):
 								            parts.append(f"[User sent audio: {url}]")
 								        else:
 								            parts.append(f"[User sent a file: {url}]")
 								    return "\n".join(parts)
-												fix(gateway): preserve queued voice events for STT

											
										
										
											2026-04-11 21:36:05 +01:00
+								def _dequeue_pending_event(adapter, session_key: str) -> MessageEvent | None:
 								    """Consume and return the full pending event for a session.
-												refactor: simplify and harden PR fixes after review

- Fix cron ThreadPoolExecutor blocking on timeout: use shutdown(wait=False,
  cancel_futures=True) instead of context manager that waits indefinitely
- Extract _dequeue_pending_text() to deduplicate media-placeholder logic
  in interrupt and normal-completion dequeue paths
- Remove hasattr guards for _running_agents_ts: add class-level default
  so partial test construction works without scattered defensive checks
- Move `import concurrent.futures` to top of cron/scheduler.py
- Progress throttle: sleep remaining interval instead of busy-looping
  0.1s (~15 wakeups per 1.5s window → 1 wakeup)
- Deduplicate _load_stt_config() in transcription_tools.py:
  _has_openai_audio_backend() now delegates to _resolve_openai_audio_client_config()

											
										
										
											2026-04-02 23:41:38 +05:30
-												fix(gateway): preserve queued voice events for STT

											
										
										
											2026-04-11 21:36:05 +01:00
+								    Queued follow-ups must preserve their media metadata so they can re-enter
 								    the normal image/STT/document preprocessing path instead of being reduced
 								    to a placeholder string.
-												refactor: simplify and harden PR fixes after review

- Fix cron ThreadPoolExecutor blocking on timeout: use shutdown(wait=False,
  cancel_futures=True) instead of context manager that waits indefinitely
- Extract _dequeue_pending_text() to deduplicate media-placeholder logic
  in interrupt and normal-completion dequeue paths
- Remove hasattr guards for _running_agents_ts: add class-level default
  so partial test construction works without scattered defensive checks
- Move `import concurrent.futures` to top of cron/scheduler.py
- Progress throttle: sleep remaining interval instead of busy-looping
  0.1s (~15 wakeups per 1.5s window → 1 wakeup)
- Deduplicate _load_stt_config() in transcription_tools.py:
  _has_openai_audio_backend() now delegates to _resolve_openai_audio_client_config()

											
										
										
											2026-04-02 23:41:38 +05:30
+								    """
-												fix(gateway): preserve queued voice events for STT

											
										
										
											2026-04-11 21:36:05 +01:00
+								    return adapter.get_pending_message(session_key)
-												refactor: simplify and harden PR fixes after review

- Fix cron ThreadPoolExecutor blocking on timeout: use shutdown(wait=False,
  cancel_futures=True) instead of context manager that waits indefinitely
- Extract _dequeue_pending_text() to deduplicate media-placeholder logic
  in interrupt and normal-completion dequeue paths
- Remove hasattr guards for _running_agents_ts: add class-level default
  so partial test construction works without scattered defensive checks
- Move `import concurrent.futures` to top of cron/scheduler.py
- Progress throttle: sleep remaining interval instead of busy-looping
  0.1s (~15 wakeups per 1.5s window → 1 wakeup)
- Deduplicate _load_stt_config() in transcription_tools.py:
  _has_openai_audio_backend() now delegates to _resolve_openai_audio_client_config()

											
										
										
											2026-04-02 23:41:38 +05:30
-												feat(gateway): skill-aware slash commands, paginated /commands, Telegram 100-cap (#3934)

* feat(gateway): skill-aware slash commands, paginated /commands, Telegram 100-cap

Map active skills to Telegram's slash command menu so users can
discover and invoke skills directly. Three changes:

1. Telegram menu now includes active skill commands alongside built-in
   commands, capped at 100 entries (Telegram Bot API limit). Overflow
   commands remain callable but hidden from the picker. Logged at
   startup when cap is hit.

2. New /commands [page] gateway command for paginated browsing of all
   commands + skills. /help now shows first 10 skill commands and
   points to /commands for the full list.

3. When a user types a slash command that matches a disabled or
   uninstalled skill, they get actionable guidance:
   - Disabled: 'Enable it with: hermes skills config'
   - Optional (not installed): 'Install with: hermes skills install official/<path>'

Built on ideas from PR #3921 by @kshitijk4poor.

* chore: move 21 niche skills to optional-skills

Move specialized/niche skills from built-in (skills/) to optional
(optional-skills/) to reduce the default skill count. Users can
install them with: hermes skills install official/<category>/<name>

Moved skills (21):
- mlops: accelerate, chroma, faiss, flash-attention,
  hermes-atropos-environments, huggingface-tokenizers, instructor,
  lambda-labs, llava, nemo-curator, pinecone, pytorch-lightning,
  qdrant, saelens, simpo, slime, tensorrt-llm, torchtitan
- research: domain-intel, duckduckgo-search
- devops: inference-sh cli

Built-in skills: 96 → 75
Optional skills: 22 → 43

* fix: only include repo built-in skills in Telegram menu, not user-installed

User-installed skills (from hub or manually added) stay accessible via
/skills and by typing the command directly, but don't get registered
in the Telegram slash command picker. Only skills whose SKILL.md is
under the repo's skills/ directory are included in the menu.

This keeps the Telegram menu focused on the curated built-in set while
user-installed skills remain discoverable through /skills and /commands.
											
										
										
											2026-03-30 10:57:30 -07:00
+								def _check_unavailable_skill(command_name: str) -> str | None:
 								    """Check if a command matches a known-but-inactive skill.
 								    Returns a helpful message if the skill exists but is disabled or only
 								    available as an optional install. Returns None if no match found.
 								    """
 								    # Normalize: command uses hyphens, skill names may use hyphens or underscores
 								    normalized = command_name.lower().replace("_", "-")
 								    try:
-												feat: wire skills.external_dirs into all remaining discovery paths

The config key skills.external_dirs and core resolution (get_all_skills_dirs,
get_external_skills_dirs in agent/skill_utils.py) already existed but several
code paths still only scanned SKILLS_DIR. Now external dirs are respected
everywhere:

- skills_categories(): scan all dirs for category discovery
- _get_category_from_path(): resolve categories against any skills root
- skill_manager_tool._find_skill(): search all dirs for edit/patch/delete
- credential_files.get_skills_directory_mount(): mount all dirs into
  Docker/Singularity containers (external dirs at external_skills/<idx>)
- credential_files.iter_skills_files(): list files from all dirs for
  Modal/Daytona upload
- tools/environments/ssh.py: rsync all skill dirs to remote hosts
- gateway _check_unavailable_skill(): check disabled skills across all dirs

Usage in config.yaml:
  skills:
    external_dirs:
      - ~/repos/agent-skills/hermes
      - /shared/team-skills

											
										
										
											2026-04-03 21:14:34 -07:00
+								        from tools.skills_tool import _get_disabled_skill_names
 								        from agent.skill_utils import get_all_skills_dirs
-												feat(gateway): skill-aware slash commands, paginated /commands, Telegram 100-cap (#3934)

* feat(gateway): skill-aware slash commands, paginated /commands, Telegram 100-cap

Map active skills to Telegram's slash command menu so users can
discover and invoke skills directly. Three changes:

1. Telegram menu now includes active skill commands alongside built-in
   commands, capped at 100 entries (Telegram Bot API limit). Overflow
   commands remain callable but hidden from the picker. Logged at
   startup when cap is hit.

2. New /commands [page] gateway command for paginated browsing of all
   commands + skills. /help now shows first 10 skill commands and
   points to /commands for the full list.

3. When a user types a slash command that matches a disabled or
   uninstalled skill, they get actionable guidance:
   - Disabled: 'Enable it with: hermes skills config'
   - Optional (not installed): 'Install with: hermes skills install official/<path>'

Built on ideas from PR #3921 by @kshitijk4poor.

* chore: move 21 niche skills to optional-skills

Move specialized/niche skills from built-in (skills/) to optional
(optional-skills/) to reduce the default skill count. Users can
install them with: hermes skills install official/<category>/<name>

Moved skills (21):
- mlops: accelerate, chroma, faiss, flash-attention,
  hermes-atropos-environments, huggingface-tokenizers, instructor,
  lambda-labs, llava, nemo-curator, pinecone, pytorch-lightning,
  qdrant, saelens, simpo, slime, tensorrt-llm, torchtitan
- research: domain-intel, duckduckgo-search
- devops: inference-sh cli

Built-in skills: 96 → 75
Optional skills: 22 → 43

* fix: only include repo built-in skills in Telegram menu, not user-installed

User-installed skills (from hub or manually added) stay accessible via
/skills and by typing the command directly, but don't get registered
in the Telegram slash command picker. Only skills whose SKILL.md is
under the repo's skills/ directory are included in the menu.

This keeps the Telegram menu focused on the curated built-in set while
user-installed skills remain discoverable through /skills and /commands.
											
										
										
											2026-03-30 10:57:30 -07:00
+								        disabled = _get_disabled_skill_names()
-												feat: wire skills.external_dirs into all remaining discovery paths

The config key skills.external_dirs and core resolution (get_all_skills_dirs,
get_external_skills_dirs in agent/skill_utils.py) already existed but several
code paths still only scanned SKILLS_DIR. Now external dirs are respected
everywhere:

- skills_categories(): scan all dirs for category discovery
- _get_category_from_path(): resolve categories against any skills root
- skill_manager_tool._find_skill(): search all dirs for edit/patch/delete
- credential_files.get_skills_directory_mount(): mount all dirs into
  Docker/Singularity containers (external dirs at external_skills/<idx>)
- credential_files.iter_skills_files(): list files from all dirs for
  Modal/Daytona upload
- tools/environments/ssh.py: rsync all skill dirs to remote hosts
- gateway _check_unavailable_skill(): check disabled skills across all dirs

Usage in config.yaml:
  skills:
    external_dirs:
      - ~/repos/agent-skills/hermes
      - /shared/team-skills

											
										
										
											2026-04-03 21:14:34 -07:00
+								        # Check disabled skills across all dirs (local + external)
 								        for skills_dir in get_all_skills_dirs():
 								            if not skills_dir.exists():
-												feat(gateway): skill-aware slash commands, paginated /commands, Telegram 100-cap (#3934)

* feat(gateway): skill-aware slash commands, paginated /commands, Telegram 100-cap

Map active skills to Telegram's slash command menu so users can
discover and invoke skills directly. Three changes:

1. Telegram menu now includes active skill commands alongside built-in
   commands, capped at 100 entries (Telegram Bot API limit). Overflow
   commands remain callable but hidden from the picker. Logged at
   startup when cap is hit.

2. New /commands [page] gateway command for paginated browsing of all
   commands + skills. /help now shows first 10 skill commands and
   points to /commands for the full list.

3. When a user types a slash command that matches a disabled or
   uninstalled skill, they get actionable guidance:
   - Disabled: 'Enable it with: hermes skills config'
   - Optional (not installed): 'Install with: hermes skills install official/<path>'

Built on ideas from PR #3921 by @kshitijk4poor.

* chore: move 21 niche skills to optional-skills

Move specialized/niche skills from built-in (skills/) to optional
(optional-skills/) to reduce the default skill count. Users can
install them with: hermes skills install official/<category>/<name>

Moved skills (21):
- mlops: accelerate, chroma, faiss, flash-attention,
  hermes-atropos-environments, huggingface-tokenizers, instructor,
  lambda-labs, llava, nemo-curator, pinecone, pytorch-lightning,
  qdrant, saelens, simpo, slime, tensorrt-llm, torchtitan
- research: domain-intel, duckduckgo-search
- devops: inference-sh cli

Built-in skills: 96 → 75
Optional skills: 22 → 43

* fix: only include repo built-in skills in Telegram menu, not user-installed

User-installed skills (from hub or manually added) stay accessible via
/skills and by typing the command directly, but don't get registered
in the Telegram slash command picker. Only skills whose SKILL.md is
under the repo's skills/ directory are included in the menu.

This keeps the Telegram menu focused on the curated built-in set while
user-installed skills remain discoverable through /skills and /commands.
											
										
										
											2026-03-30 10:57:30 -07:00
+								                continue
-												feat: wire skills.external_dirs into all remaining discovery paths

The config key skills.external_dirs and core resolution (get_all_skills_dirs,
get_external_skills_dirs in agent/skill_utils.py) already existed but several
code paths still only scanned SKILLS_DIR. Now external dirs are respected
everywhere:

- skills_categories(): scan all dirs for category discovery
- _get_category_from_path(): resolve categories against any skills root
- skill_manager_tool._find_skill(): search all dirs for edit/patch/delete
- credential_files.get_skills_directory_mount(): mount all dirs into
  Docker/Singularity containers (external dirs at external_skills/<idx>)
- credential_files.iter_skills_files(): list files from all dirs for
  Modal/Daytona upload
- tools/environments/ssh.py: rsync all skill dirs to remote hosts
- gateway _check_unavailable_skill(): check disabled skills across all dirs

Usage in config.yaml:
  skills:
    external_dirs:
      - ~/repos/agent-skills/hermes
      - /shared/team-skills

											
										
										
											2026-04-03 21:14:34 -07:00
+								            for skill_md in skills_dir.rglob("SKILL.md"):
 								                if any(part in ('.git', '.github', '.hub') for part in skill_md.parts):
 								                    continue
 								                name = skill_md.parent.name.lower().replace("_", "-")
 								                if name == normalized and name in disabled:
 								                    return (
 								                        f"The **{command_name}** skill is installed but disabled.\n"
 								                        f"Enable it with: `hermes skills config`"
 								                    )
-												feat(gateway): skill-aware slash commands, paginated /commands, Telegram 100-cap (#3934)

* feat(gateway): skill-aware slash commands, paginated /commands, Telegram 100-cap

Map active skills to Telegram's slash command menu so users can
discover and invoke skills directly. Three changes:

1. Telegram menu now includes active skill commands alongside built-in
   commands, capped at 100 entries (Telegram Bot API limit). Overflow
   commands remain callable but hidden from the picker. Logged at
   startup when cap is hit.

2. New /commands [page] gateway command for paginated browsing of all
   commands + skills. /help now shows first 10 skill commands and
   points to /commands for the full list.

3. When a user types a slash command that matches a disabled or
   uninstalled skill, they get actionable guidance:
   - Disabled: 'Enable it with: hermes skills config'
   - Optional (not installed): 'Install with: hermes skills install official/<path>'

Built on ideas from PR #3921 by @kshitijk4poor.

* chore: move 21 niche skills to optional-skills

Move specialized/niche skills from built-in (skills/) to optional
(optional-skills/) to reduce the default skill count. Users can
install them with: hermes skills install official/<category>/<name>

Moved skills (21):
- mlops: accelerate, chroma, faiss, flash-attention,
  hermes-atropos-environments, huggingface-tokenizers, instructor,
  lambda-labs, llava, nemo-curator, pinecone, pytorch-lightning,
  qdrant, saelens, simpo, slime, tensorrt-llm, torchtitan
- research: domain-intel, duckduckgo-search
- devops: inference-sh cli

Built-in skills: 96 → 75
Optional skills: 22 → 43

* fix: only include repo built-in skills in Telegram menu, not user-installed

User-installed skills (from hub or manually added) stay accessible via
/skills and by typing the command directly, but don't get registered
in the Telegram slash command picker. Only skills whose SKILL.md is
under the repo's skills/ directory are included in the menu.

This keeps the Telegram menu focused on the curated built-in set while
user-installed skills remain discoverable through /skills and /commands.
											
										
										
											2026-03-30 10:57:30 -07:00
 								        # Check optional skills (shipped with repo but not installed)
-												refactor: codebase-wide lint cleanup — unused imports, dead code, and inefficient patterns (#5821)

Comprehensive cleanup across 80 files based on automated (ruff, pyflakes, vulture)
and manual analysis of the entire codebase.

Changes by category:

Unused imports removed (~95 across 55 files):
- Removed genuinely unused imports from all major subsystems
- agent/, hermes_cli/, tools/, gateway/, plugins/, cron/
- Includes imports in try/except blocks that were truly unused
  (vs availability checks which were left alone)

Unused variables removed (~25):
- Removed dead variables: connected, inner, channels, last_exc,
  source, new_server_names, verify, pconfig, default_terminal,
  result, pending_handled, temperature, loop
- Dropped unused argparse subparser assignments in hermes_cli/main.py
  (12 instances of add_parser() where result was never used)

Dead code removed:
- run_agent.py: Removed dead ternary (None if False else None) and
  surrounding unreachable branch in identity fallback
- run_agent.py: Removed write-only attribute _last_reported_tool
- hermes_cli/providers.py: Removed dead @property decorator on
  module-level function (decorator has no effect outside a class)
- gateway/run.py: Removed unused MCP config load before reconnect
- gateway/platforms/slack.py: Removed dead SessionSource construction

Undefined name bugs fixed (would cause NameError at runtime):
- batch_runner.py: Added missing logger = logging.getLogger(__name__)
- tools/environments/daytona.py: Added missing Dict and Path imports

Unnecessary global statements removed (14):
- tools/terminal_tool.py: 5 functions declared global for dicts
  they only mutated via .pop()/[key]=value (no rebinding)
- tools/browser_tool.py: cleanup thread loop only reads flag
- tools/rl_training_tool.py: 4 functions only do dict mutations
- tools/mcp_oauth.py: only reads the global
- hermes_time.py: only reads cached values

Inefficient patterns fixed:
- startswith/endswith tuple form: 15 instances of
  x.startswith('a') or x.startswith('b') consolidated to
  x.startswith(('a', 'b'))
- len(x)==0 / len(x)>0: 13 instances replaced with pythonic
  truthiness checks (not x / bool(x))
- in dict.keys(): 5 instances simplified to in dict
- Redefined unused name: removed duplicate _strip_mdv2 import in
  send_message_tool.py

Other fixes:
- hermes_cli/doctor.py: Replaced undefined logger.debug() with pass
- hermes_cli/config.py: Consolidated chained .endswith() calls

Test results: 3934 passed, 17 failed (all pre-existing on main),
19 skipped. Zero regressions.
											
										
										
											2026-04-07 10:25:31 -07:00
+								        from hermes_constants import get_optional_skills_dir
-												feat(gateway): skill-aware slash commands, paginated /commands, Telegram 100-cap (#3934)

* feat(gateway): skill-aware slash commands, paginated /commands, Telegram 100-cap

Map active skills to Telegram's slash command menu so users can
discover and invoke skills directly. Three changes:

1. Telegram menu now includes active skill commands alongside built-in
   commands, capped at 100 entries (Telegram Bot API limit). Overflow
   commands remain callable but hidden from the picker. Logged at
   startup when cap is hit.

2. New /commands [page] gateway command for paginated browsing of all
   commands + skills. /help now shows first 10 skill commands and
   points to /commands for the full list.

3. When a user types a slash command that matches a disabled or
   uninstalled skill, they get actionable guidance:
   - Disabled: 'Enable it with: hermes skills config'
   - Optional (not installed): 'Install with: hermes skills install official/<path>'

Built on ideas from PR #3921 by @kshitijk4poor.

* chore: move 21 niche skills to optional-skills

Move specialized/niche skills from built-in (skills/) to optional
(optional-skills/) to reduce the default skill count. Users can
install them with: hermes skills install official/<category>/<name>

Moved skills (21):
- mlops: accelerate, chroma, faiss, flash-attention,
  hermes-atropos-environments, huggingface-tokenizers, instructor,
  lambda-labs, llava, nemo-curator, pinecone, pytorch-lightning,
  qdrant, saelens, simpo, slime, tensorrt-llm, torchtitan
- research: domain-intel, duckduckgo-search
- devops: inference-sh cli

Built-in skills: 96 → 75
Optional skills: 22 → 43

* fix: only include repo built-in skills in Telegram menu, not user-installed

User-installed skills (from hub or manually added) stay accessible via
/skills and by typing the command directly, but don't get registered
in the Telegram slash command picker. Only skills whose SKILL.md is
under the repo's skills/ directory are included in the menu.

This keeps the Telegram menu focused on the curated built-in set while
user-installed skills remain discoverable through /skills and /commands.
											
										
										
											2026-03-30 10:57:30 -07:00
+								        repo_root = Path(__file__).resolve().parent.parent
-												chore: prepare Hermes for Homebrew packaging (#4099)

Co-authored-by: Yabuku-xD <78594762+Yabuku-xD@users.noreply.github.com>
											
										
										
											2026-03-30 17:34:43 -07:00
+								        optional_dir = get_optional_skills_dir(repo_root / "optional-skills")
-												feat(gateway): skill-aware slash commands, paginated /commands, Telegram 100-cap (#3934)

* feat(gateway): skill-aware slash commands, paginated /commands, Telegram 100-cap

Map active skills to Telegram's slash command menu so users can
discover and invoke skills directly. Three changes:

1. Telegram menu now includes active skill commands alongside built-in
   commands, capped at 100 entries (Telegram Bot API limit). Overflow
   commands remain callable but hidden from the picker. Logged at
   startup when cap is hit.

2. New /commands [page] gateway command for paginated browsing of all
   commands + skills. /help now shows first 10 skill commands and
   points to /commands for the full list.

3. When a user types a slash command that matches a disabled or
   uninstalled skill, they get actionable guidance:
   - Disabled: 'Enable it with: hermes skills config'
   - Optional (not installed): 'Install with: hermes skills install official/<path>'

Built on ideas from PR #3921 by @kshitijk4poor.

* chore: move 21 niche skills to optional-skills

Move specialized/niche skills from built-in (skills/) to optional
(optional-skills/) to reduce the default skill count. Users can
install them with: hermes skills install official/<category>/<name>

Moved skills (21):
- mlops: accelerate, chroma, faiss, flash-attention,
  hermes-atropos-environments, huggingface-tokenizers, instructor,
  lambda-labs, llava, nemo-curator, pinecone, pytorch-lightning,
  qdrant, saelens, simpo, slime, tensorrt-llm, torchtitan
- research: domain-intel, duckduckgo-search
- devops: inference-sh cli

Built-in skills: 96 → 75
Optional skills: 22 → 43

* fix: only include repo built-in skills in Telegram menu, not user-installed

User-installed skills (from hub or manually added) stay accessible via
/skills and by typing the command directly, but don't get registered
in the Telegram slash command picker. Only skills whose SKILL.md is
under the repo's skills/ directory are included in the menu.

This keeps the Telegram menu focused on the curated built-in set while
user-installed skills remain discoverable through /skills and /commands.
											
										
										
											2026-03-30 10:57:30 -07:00
+								        if optional_dir.exists():
 								            for skill_md in optional_dir.rglob("SKILL.md"):
 								                name = skill_md.parent.name.lower().replace("_", "-")
 								                if name == normalized:
 								                    # Build install path: official/<category>/<name>
 								                    rel = skill_md.parent.relative_to(optional_dir)
 								                    parts = list(rel.parts)
 								                    install_path = f"official/{'/'.join(parts)}"
 								                    return (
 								                        f"The **{command_name}** skill is available but not installed.\n"
 								                        f"Install it with: `hermes skills install {install_path}`"
 								                    )
 								    except Exception:
 								        pass
 								    return None
-												fix: MCP toolset resolution for runtime and config (#3252)

Gateway sessions had their own inline toolset resolution that only read
platform_toolsets from config, which never includes MCP server names.
MCP tools were discovered and registered but invisible to the model.

- Replace duplicated gateway toolset resolution in _run_agent() and
  _run_background_task() with calls to the shared _get_platform_tools()
- Extend _get_platform_tools() to include globally enabled MCP servers
  at runtime (include_default_mcp_servers=True), while config-editing
  flows use include_default_mcp_servers=False to avoid persisting
  implicit MCP defaults into platform_toolsets
- Add homeassistant to PLATFORMS dict (was missing, caused KeyError)
- Fix CLI entry point to use _get_platform_tools() as well, so MCP
  tools are visible in CLI mode too
- Remove redundant platform_key reassignment in _run_background_task

Co-authored-by: kshitijk4poor <kshitijk4poor@users.noreply.github.com>
											
										
										
											2026-03-26 13:39:41 -07:00
+								def _platform_config_key(platform: "Platform") -> str:
 								    """Map a Platform enum to its config.yaml key (LOCAL→"cli", rest→enum value)."""
 								    return "cli" if platform == Platform.LOCAL else platform.value
 								def _load_gateway_config() -> dict:
 								    """Load and parse ~/.hermes/config.yaml, returning {} on any error."""
 								    try:
 								        config_path = _hermes_home / 'config.yaml'
 								        if config_path.exists():
 								            import yaml
 								            with open(config_path, 'r', encoding='utf-8') as f:
 								                return yaml.safe_load(f) or {}
 								    except Exception:
 								        logger.debug("Could not load gateway config from %s", _hermes_home / 'config.yaml')
 								    return {}
 								def _resolve_gateway_model(config: dict | None = None) -> str:
-												refactor: make config.yaml the single source of truth for endpoint URLs (#4165)

OPENAI_BASE_URL was written to .env AND config.yaml, creating a dual-source
confusion. Users (especially Docker) would see the URL in .env and assume
that's where all config lives, then wonder why LLM_MODEL in .env didn't work.

Changes:
- Remove all 27 save_env_value("OPENAI_BASE_URL", ...) calls across main.py,
  setup.py, and tools_config.py
- Remove OPENAI_BASE_URL env var reading from runtime_provider.py, cli.py,
  models.py, and gateway/run.py
- Remove LLM_MODEL/HERMES_MODEL env var reading from gateway/run.py and
  auxiliary_client.py — config.yaml model.default is authoritative
- Vision base URL now saved to config.yaml auxiliary.vision.base_url
  (both setup wizard and tools_config paths)
- Tests updated to set config values instead of env vars

Convention enforced: .env is for SECRETS only (API keys). All other
configuration (model names, base URLs, provider selection) lives
exclusively in config.yaml.
											
										
										
											2026-03-30 22:02:53 -07:00
+								    """Read model from config.yaml — single source of truth.
-												fix(gateway): pass model to temporary AIAgent instances

Memory flush, /compress, and session hygiene create AIAgent without
model=, falling back to the hardcoded default "anthropic/claude-opus-4.6".
This fails with a 400 error when the active provider is openai-codex
(Codex only accepts its own model names like gpt-5.1-codex-mini).

Add _resolve_gateway_model() that mirrors the env/config resolution
already used by _run_agent_sync, and wire it into all three temporary
agent creation sites.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 00:09:37 +01:00
 								    Without this, temporary AIAgent instances (memory flush, /compress) fall
-												fix(auth): stop silently falling back to OpenRouter when no provider is configured (#3862)

Previously, when no API keys or provider credentials were found, Hermes
silently defaulted to OpenRouter + Claude Opus. This caused confusion
when users configured local servers (LM Studio, Ollama, etc.) with a
typo or unrecognized provider name — the system would silently route to
OpenRouter instead of telling them something was wrong.

Changes:
- resolve_provider() now raises AuthError when no credentials are found
  instead of returning 'openrouter' as a silent fallback
- Added local server aliases: lmstudio, ollama, vllm, llamacpp → custom
- Removed hardcoded 'anthropic/claude-opus-4.6' fallback from gateway
  and cron scheduler (they read from config.yaml instead)
- Updated cli-config.yaml.example with complete provider documentation
  including all supported providers, aliases, and local server setup
											
										
										
											2026-03-29 21:06:35 -07:00
+								    back to the hardcoded default which fails when the active provider is
 								    openai-codex.
-												fix(gateway): pass model to temporary AIAgent instances

Memory flush, /compress, and session hygiene create AIAgent without
model=, falling back to the hardcoded default "anthropic/claude-opus-4.6".
This fails with a 400 error when the active provider is openai-codex
(Codex only accepts its own model names like gpt-5.1-codex-mini).

Add _resolve_gateway_model() that mirrors the env/config resolution
already used by _run_agent_sync, and wire it into all three temporary
agent creation sites.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 00:09:37 +01:00
+								    """
-												fix: MCP toolset resolution for runtime and config (#3252)

Gateway sessions had their own inline toolset resolution that only read
platform_toolsets from config, which never includes MCP server names.
MCP tools were discovered and registered but invisible to the model.

- Replace duplicated gateway toolset resolution in _run_agent() and
  _run_background_task() with calls to the shared _get_platform_tools()
- Extend _get_platform_tools() to include globally enabled MCP servers
  at runtime (include_default_mcp_servers=True), while config-editing
  flows use include_default_mcp_servers=False to avoid persisting
  implicit MCP defaults into platform_toolsets
- Add homeassistant to PLATFORMS dict (was missing, caused KeyError)
- Fix CLI entry point to use _get_platform_tools() as well, so MCP
  tools are visible in CLI mode too
- Remove redundant platform_key reassignment in _run_background_task

Co-authored-by: kshitijk4poor <kshitijk4poor@users.noreply.github.com>
											
										
										
											2026-03-26 13:39:41 -07:00
+								    cfg = config if config is not None else _load_gateway_config()
 								    model_cfg = cfg.get("model", {})
 								    if isinstance(model_cfg, str):
-												refactor: make config.yaml the single source of truth for endpoint URLs (#4165)

OPENAI_BASE_URL was written to .env AND config.yaml, creating a dual-source
confusion. Users (especially Docker) would see the URL in .env and assume
that's where all config lives, then wonder why LLM_MODEL in .env didn't work.

Changes:
- Remove all 27 save_env_value("OPENAI_BASE_URL", ...) calls across main.py,
  setup.py, and tools_config.py
- Remove OPENAI_BASE_URL env var reading from runtime_provider.py, cli.py,
  models.py, and gateway/run.py
- Remove LLM_MODEL/HERMES_MODEL env var reading from gateway/run.py and
  auxiliary_client.py — config.yaml model.default is authoritative
- Vision base URL now saved to config.yaml auxiliary.vision.base_url
  (both setup wizard and tools_config paths)
- Tests updated to set config values instead of env vars

Convention enforced: .env is for SECRETS only (API keys). All other
configuration (model names, base URLs, provider selection) lives
exclusively in config.yaml.
											
										
										
											2026-03-30 22:02:53 -07:00
+								        return model_cfg
-												fix: MCP toolset resolution for runtime and config (#3252)

Gateway sessions had their own inline toolset resolution that only read
platform_toolsets from config, which never includes MCP server names.
MCP tools were discovered and registered but invisible to the model.

- Replace duplicated gateway toolset resolution in _run_agent() and
  _run_background_task() with calls to the shared _get_platform_tools()
- Extend _get_platform_tools() to include globally enabled MCP servers
  at runtime (include_default_mcp_servers=True), while config-editing
  flows use include_default_mcp_servers=False to avoid persisting
  implicit MCP defaults into platform_toolsets
- Add homeassistant to PLATFORMS dict (was missing, caused KeyError)
- Fix CLI entry point to use _get_platform_tools() as well, so MCP
  tools are visible in CLI mode too
- Remove redundant platform_key reassignment in _run_background_task

Co-authored-by: kshitijk4poor <kshitijk4poor@users.noreply.github.com>
											
										
										
											2026-03-26 13:39:41 -07:00
+								    elif isinstance(model_cfg, dict):
-												refactor: make config.yaml the single source of truth for endpoint URLs (#4165)

OPENAI_BASE_URL was written to .env AND config.yaml, creating a dual-source
confusion. Users (especially Docker) would see the URL in .env and assume
that's where all config lives, then wonder why LLM_MODEL in .env didn't work.

Changes:
- Remove all 27 save_env_value("OPENAI_BASE_URL", ...) calls across main.py,
  setup.py, and tools_config.py
- Remove OPENAI_BASE_URL env var reading from runtime_provider.py, cli.py,
  models.py, and gateway/run.py
- Remove LLM_MODEL/HERMES_MODEL env var reading from gateway/run.py and
  auxiliary_client.py — config.yaml model.default is authoritative
- Vision base URL now saved to config.yaml auxiliary.vision.base_url
  (both setup wizard and tools_config paths)
- Tests updated to set config values instead of env vars

Convention enforced: .env is for SECRETS only (API keys). All other
configuration (model names, base URLs, provider selection) lives
exclusively in config.yaml.
											
										
										
											2026-03-30 22:02:53 -07:00
+								        return model_cfg.get("default") or model_cfg.get("model") or ""
 								    return ""
-												fix(gateway): pass model to temporary AIAgent instances

Memory flush, /compress, and session hygiene create AIAgent without
model=, falling back to the hardcoded default "anthropic/claude-opus-4.6".
This fails with a 400 error when the active provider is openai-codex
(Codex only accepts its own model names like gpt-5.1-codex-mini).

Add _resolve_gateway_model() that mirrors the env/config resolution
already used by _run_agent_sync, and wire it into all three temporary
agent creation sites.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 00:09:37 +01:00
-												fix(gateway): fall back to sys.executable -m hermes_cli.main when hermes not on PATH

When shutil.which('hermes') returns None, _resolve_hermes_bin() now tries
sys.executable -m hermes_cli.main as a fallback. This handles setups where
Hermes is launched via a venv or module invocation and the hermes symlink is
not on PATH for the gateway process.

Fixes #1049

											
										
										
											2026-03-12 18:02:21 +03:00
+								def _resolve_hermes_bin() -> Optional[list[str]]:
 								    """Resolve the Hermes update command as argv parts.
 								    Tries in order:
 . ``shutil.which("hermes")`` — standard PATH lookup
 . ``sys.executable -m hermes_cli.main`` — fallback when Hermes is running
 								       from a venv/module invocation and the ``hermes`` shim is not on PATH
 								    Returns argv parts ready for quoting/joining, or ``None`` if neither works.
 								    """
 								    import shutil
 								    hermes_bin = shutil.which("hermes")
 								    if hermes_bin:
 								        return [hermes_bin]
 								    try:
 								        import importlib.util
 								        if importlib.util.find_spec("hermes_cli") is not None:
 								            return [sys.executable, "-m", "hermes_cli.main"]
 								    except Exception:
 								        pass
 								    return None
-												fix: follow-up improvements for watch notification routing (#9537)

- Populate watcher_* routing fields for watch-only processes (not just
  notify_on_complete), so watch-pattern events carry direct metadata
  instead of relying solely on session_key parsing fallback
- Extract _parse_session_key() helper to dedupe session key parsing
  at two call sites in gateway/run.py
- Add negative test proving cross-thread leakage doesn't happen
- Add edge-case tests for _build_process_event_source returning None
  (empty evt, invalid platform, short session_key)
- Add unit tests for _parse_session_key helper

											
										
										
											2026-04-15 22:52:30 +05:30
+								def _parse_session_key(session_key: str) -> "dict | None":
 								    """Parse a session key into its component parts.
-												fix: include thread_id in _parse_session_key and fix stale parts reference

_parse_session_key() now extracts the optional 6th part (thread_id) from
session keys, and _notify_active_sessions_of_shutdown uses _parsed.get()
instead of the removed 'parts' variable. Without this, shutdown notifications
silently failed (NameError caught by try/except) and forum topic routing
was lost.

											
										
										
											2026-04-15 11:07:24 -07:00
+								    Session keys follow the format
-												fix(gateway): don't treat group session user_id as thread_id in shutdown notifications (#10546)

_parse_session_key() blindly assigned parts[5] as thread_id for all
chat types. For group sessions with per-user isolation, parts[5] is
a user_id, not a thread_id. This could cause shutdown notifications
to route with incorrect thread metadata.

Only return thread_id for chat types where the 6th element is
unambiguous: dm and thread. For group/channel sessions, omit
thread_id since the suffix may be a user_id.

Based on the approach from PR #9938 by @Ruzzgar.
											
										
										
											2026-04-15 15:09:23 -07:00
+								    ``agent:main:{platform}:{chat_type}:{chat_id}[:{extra}...]``.
-												fix: include thread_id in _parse_session_key and fix stale parts reference

_parse_session_key() now extracts the optional 6th part (thread_id) from
session keys, and _notify_active_sessions_of_shutdown uses _parsed.get()
instead of the removed 'parts' variable. Without this, shutdown notifications
silently failed (NameError caught by try/except) and forum topic routing
was lost.

											
										
										
											2026-04-15 11:07:24 -07:00
+								    Returns a dict with ``platform``, ``chat_type``, ``chat_id``, and
 								    optionally ``thread_id`` keys, or None if the key doesn't match.
-												fix(gateway): don't treat group session user_id as thread_id in shutdown notifications (#10546)

_parse_session_key() blindly assigned parts[5] as thread_id for all
chat types. For group sessions with per-user isolation, parts[5] is
a user_id, not a thread_id. This could cause shutdown notifications
to route with incorrect thread metadata.

Only return thread_id for chat types where the 6th element is
unambiguous: dm and thread. For group/channel sessions, omit
thread_id since the suffix may be a user_id.

Based on the approach from PR #9938 by @Ruzzgar.
											
										
										
											2026-04-15 15:09:23 -07:00
 								    The 6th element is only returned as ``thread_id`` for chat types where
 								    it is unambiguous (``dm`` and ``thread``).  For group/channel sessions
 								    the suffix may be a user_id (per-user isolation) rather than a
 								    thread_id, so we leave ``thread_id`` out to avoid mis-routing.
-												fix: follow-up improvements for watch notification routing (#9537)

- Populate watcher_* routing fields for watch-only processes (not just
  notify_on_complete), so watch-pattern events carry direct metadata
  instead of relying solely on session_key parsing fallback
- Extract _parse_session_key() helper to dedupe session key parsing
  at two call sites in gateway/run.py
- Add negative test proving cross-thread leakage doesn't happen
- Add edge-case tests for _build_process_event_source returning None
  (empty evt, invalid platform, short session_key)
- Add unit tests for _parse_session_key helper

											
										
										
											2026-04-15 22:52:30 +05:30
+								    """
 								    parts = session_key.split(":")
 								    if len(parts) >= 5 and parts[0] == "agent" and parts[1] == "main":
-												fix: include thread_id in _parse_session_key and fix stale parts reference

_parse_session_key() now extracts the optional 6th part (thread_id) from
session keys, and _notify_active_sessions_of_shutdown uses _parsed.get()
instead of the removed 'parts' variable. Without this, shutdown notifications
silently failed (NameError caught by try/except) and forum topic routing
was lost.

											
										
										
											2026-04-15 11:07:24 -07:00
+								        result = {
-												fix: follow-up improvements for watch notification routing (#9537)

- Populate watcher_* routing fields for watch-only processes (not just
  notify_on_complete), so watch-pattern events carry direct metadata
  instead of relying solely on session_key parsing fallback
- Extract _parse_session_key() helper to dedupe session key parsing
  at two call sites in gateway/run.py
- Add negative test proving cross-thread leakage doesn't happen
- Add edge-case tests for _build_process_event_source returning None
  (empty evt, invalid platform, short session_key)
- Add unit tests for _parse_session_key helper

											
										
										
											2026-04-15 22:52:30 +05:30
+								            "platform": parts[2],
 								            "chat_type": parts[3],
 								            "chat_id": parts[4],
 								        }
-												fix(gateway): don't treat group session user_id as thread_id in shutdown notifications (#10546)

_parse_session_key() blindly assigned parts[5] as thread_id for all
chat types. For group sessions with per-user isolation, parts[5] is
a user_id, not a thread_id. This could cause shutdown notifications
to route with incorrect thread metadata.

Only return thread_id for chat types where the 6th element is
unambiguous: dm and thread. For group/channel sessions, omit
thread_id since the suffix may be a user_id.

Based on the approach from PR #9938 by @Ruzzgar.
											
										
										
											2026-04-15 15:09:23 -07:00
+								        if len(parts) > 5 and parts[3] in ("dm", "thread"):
-												fix: include thread_id in _parse_session_key and fix stale parts reference

_parse_session_key() now extracts the optional 6th part (thread_id) from
session keys, and _notify_active_sessions_of_shutdown uses _parsed.get()
instead of the removed 'parts' variable. Without this, shutdown notifications
silently failed (NameError caught by try/except) and forum topic routing
was lost.

											
										
										
											2026-04-15 11:07:24 -07:00
+								            result["thread_id"] = parts[5]
 								        return result
-												fix: follow-up improvements for watch notification routing (#9537)

- Populate watcher_* routing fields for watch-only processes (not just
  notify_on_complete), so watch-pattern events carry direct metadata
  instead of relying solely on session_key parsing fallback
- Extract _parse_session_key() helper to dedupe session key parsing
  at two call sites in gateway/run.py
- Add negative test proving cross-thread leakage doesn't happen
- Add edge-case tests for _build_process_event_source returning None
  (empty evt, invalid platform, short session_key)
- Add unit tests for _parse_session_key helper

											
										
										
											2026-04-15 22:52:30 +05:30
+								    return None
-												feat: background process monitoring — watch_patterns for real-time output alerts

* feat: add watch_patterns to background processes for output monitoring

Adds a new 'watch_patterns' parameter to terminal(background=true) that
lets the agent specify strings to watch for in process output. When a
matching line appears, a notification is queued and injected as a
synthetic message — triggering a new agent turn, similar to
notify_on_complete but mid-process.

Implementation:
- ProcessSession gets watch_patterns field + rate-limit state
- _check_watch_patterns() in ProcessRegistry scans new output chunks
  from all three reader threads (local, PTY, env-poller)
- Rate limited: max 8 notifications per 10s window
- Sustained overload (45s) permanently disables watching for that process
- watch_queue alongside completion_queue, same consumption pattern
- CLI drains watch_queue in both idle loop and post-turn drain
- Gateway drains after agent runs via _inject_watch_notification()
- Checkpoint persistence + crash recovery includes watch_patterns
- Blocked in execute_code sandbox (like other bg params)
- 20 new tests covering matching, rate limiting, overload kill,
  checkpoint persistence, schema, and handler passthrough

Usage:
  terminal(
      command='npm run dev',
      background=true,
      watch_patterns=['ERROR', 'WARN', 'listening on port']
  )

* refactor: merge watch_queue into completion_queue

Unified queue with 'type' field distinguishing 'completion',
'watch_match', and 'watch_disabled' events. Extracted
_format_process_notification() in CLI and gateway to handle
all event types in a single drain loop. Removes duplication
across both CLI drain sites and the gateway.
											
										
										
											2026-04-11 03:13:23 -07:00
+								def _format_gateway_process_notification(evt: dict) -> "str | None":
 								    """Format a watch pattern event from completion_queue into a [SYSTEM:] message."""
 								    evt_type = evt.get("type", "completion")
 								    _sid = evt.get("session_id", "unknown")
 								    _cmd = evt.get("command", "unknown")
 								    if evt_type == "watch_disabled":
 								        return f"[SYSTEM: {evt.get('message', '')}]"
 								    if evt_type == "watch_match":
 								        _pat = evt.get("pattern", "?")
 								        _out = evt.get("output", "")
 								        _sup = evt.get("suppressed", 0)
 								        text = (
 								            f"[SYSTEM: Background process {_sid} matched "
 								            f"watch pattern \"{_pat}\".\n"
 								            f"Command: {_cmd}\n"
 								            f"Matched output:\n{_out}"
 								        )
 								        if _sup:
 								            text += f"\n({_sup} earlier matches were suppressed by rate limit)"
 								        text += "]"
 								        return text
 								    return None
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								class GatewayRunner:
 								    """
 								    Main gateway controller.
-												refactor: simplify and harden PR fixes after review

- Fix cron ThreadPoolExecutor blocking on timeout: use shutdown(wait=False,
  cancel_futures=True) instead of context manager that waits indefinitely
- Extract _dequeue_pending_text() to deduplicate media-placeholder logic
  in interrupt and normal-completion dequeue paths
- Remove hasattr guards for _running_agents_ts: add class-level default
  so partial test construction works without scattered defensive checks
- Move `import concurrent.futures` to top of cron/scheduler.py
- Progress throttle: sleep remaining interval instead of busy-looping
  0.1s (~15 wakeups per 1.5s window → 1 wakeup)
- Deduplicate _load_stt_config() in transcription_tools.py:
  _has_openai_audio_backend() now delegates to _resolve_openai_audio_client_config()

											
										
										
											2026-04-02 23:41:38 +05:30
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								    Manages the lifecycle of all platform adapters and routes
 								    messages to/from the agent.
 								    """
-												fix: move class-level attribute after docstring, clarify throttle comment

Follow-up nits for salvaged PR #4577:
- Move _running_agents_ts class attribute below the docstring so
  GatewayRunner.__doc__ is preserved.
- Add clarifying comment explaining the throttle continue behavior
  (batches queued messages during the throttle interval).

											
										
										
											2026-04-03 00:45:31 -07:00
 								    # Class-level defaults so partial construction in tests doesn't
 								    # blow up on attribute access.
 								    _running_agents_ts: Dict[str, float] = {}
-												fix(gateway): tolerate partial runner construction

											
										
										
											2026-04-10 10:32:06 -07:00
+								    _busy_input_mode: str = "interrupt"
-												fix(gateway): address restart review feedback

											
										
										
											2026-04-10 14:00:21 -07:00
+								    _restart_drain_timeout: float = DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT
-												fix(gateway): tolerate partial runner construction

											
										
										
											2026-04-10 10:32:06 -07:00
+								    _exit_code: Optional[int] = None
 								    _draining: bool = False
 								    _restart_requested: bool = False
 								    _restart_task_started: bool = False
 								    _restart_detached: bool = False
 								    _restart_via_service: bool = False
 								    _stop_task: Optional[asyncio.Task] = None
-												fix(gateway): address restart review feedback

											
										
										
											2026-04-10 14:00:21 -07:00
+								    _session_model_overrides: Dict[str, Dict[str, str]] = {}
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
 								    def __init__(self, config: Optional[GatewayConfig] = None):
 								        self.config = config or load_gateway_config()
 								        self.adapters: Dict[Platform, BasePlatformAdapter] = {}
-												Add background process management with process tool, wait, PTY, and stdin support

New process registry and tool for managing long-running background processes
across all terminal backends (local, Docker, Singularity, Modal, SSH).

Process Registry (tools/process_registry.py):
- ProcessSession tracking with rolling 200KB output buffer
- spawn_local() with optional PTY via ptyprocess for interactive CLIs
- spawn_via_env() for non-local backends (runs inside sandbox, never on host)
- Background reader threads per process (Popen stdout or PTY)
- wait() with timeout clamping, interrupt support, and transparent limit reporting
- JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery
- Module-level singleton shared across agent loop, gateway, and RL

Process Tool (model_tools.py):
- 7 actions: list, poll, log, wait, kill, write, submit
- Paired with terminal in all toolsets (CLI, messaging, RL)
- Timeout clamping with transparent notes in response

Terminal Tool Updates (tools/terminal_tool.py):
- Replaced nohup background mode with registry spawn (returns session_id)
- Added workdir parameter for per-command working directory
- Added check_interval parameter for gateway auto-check watchers
- Added pty parameter for interactive CLI tools (Codex, Claude Code)
- Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs
- Cleanup thread now respects active background processes (won't reap sandbox)

Gateway Integration (gateway/run.py, session.py, config.py):
- Session reset protection: sessions with active processes exempt from reset
- Default idle timeout increased from 2 hours to 24 hours
- from_dict fallback aligned to match (was 120, now 1440)
- session_key env var propagated to process registry for session mapping
- Crash recovery on gateway startup via checkpoint probe
- check_interval watcher: asyncio task polls process, delivers updates to platform

RL Safety (environments/):
- tool_context.py cleanup() kills background processes on episode end
- hermes_base_env.py warns when enabled_toolsets is None (loads all tools)
- Process tool safe in RL via wait() blocking the agent loop

Also:
- Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all])
- Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP
- Updated AGENTS.md with process management docs and project structure
- Updated README.md terminal section with process management overview

											
										
										
											2026-02-17 02:51:31 -08:00
-												feat: add ephemeral prefill messages and system prompt loading

- Implemented functionality to load ephemeral prefill messages from a JSON file, enhancing few-shot priming capabilities for the agent.
- Introduced a mechanism to load an ephemeral system prompt from environment variables or configuration files, ensuring dynamic prompt adjustments at API-call time.
- Updated the CLI and agent initialization to utilize the new prefill messages and system prompt, improving the overall interaction experience.
- Enhanced configuration options with new environment variables for prefill messages and system prompts, allowing for greater customization without persistence.

											
										
										
											2026-02-23 23:55:42 -08:00
+								        # Load ephemeral config from config.yaml / env vars.
 								        # Both are injected at API-call time only and never persisted.
 								        self._prefill_messages = self._load_prefill_messages()
 								        self._ephemeral_system_prompt = self._load_ephemeral_system_prompt()
-												feat: add reasoning effort configuration for agent

- Introduced a new configuration option for reasoning effort in the CLI, allowing users to specify the level of reasoning the agent should perform before responding.
- Updated the CLI and agent initialization to incorporate the reasoning configuration, enhancing the agent's responsiveness and adaptability.
- Implemented logic to load reasoning effort from environment variables and configuration files, providing flexibility in agent behavior.
- Enhanced the documentation in the example configuration file to clarify the new reasoning effort options available.

											
										
										
											2026-02-24 03:30:19 -08:00
+								        self._reasoning_config = self._load_reasoning_config()
-												feat(gateway): add fast mode support to gateway chats

											
										
										
											2026-04-10 08:32:56 +01:00
+								        self._service_tier = self._load_service_tier()
-												fix: /reasoning command — add gateway support, fix display, persist settings (#1031)

* fix: /reasoning command output ordering, display, and inline think extraction

Three issues with the /reasoning command:

1. Output interleaving: The command echo used print() while feedback
   used _cprint(), causing them to render out-of-order under
   prompt_toolkit's patch_stdout. Changed echo to use _cprint() so
   all output renders through the same path in correct order.

2. Reasoning display not working: /reasoning show toggled a flag
   but reasoning never appeared for models that embed thinking in
   inline <think> blocks rather than structured API fields. Added
   fallback extraction in _build_assistant_message to capture
   <think> block content as reasoning when no structured reasoning
   fields (reasoning, reasoning_content, reasoning_details) are
   present. This feeds into both the reasoning callback (during
   tool loops) and the post-response reasoning box display.

3. Feedback clarity: Added checkmarks to confirm actions, persisted
   show/hide to config (was session-only before), and aligned the
   status display for readability.

Tests: 7 new tests for inline think block extraction (41 total).

* feat: add /reasoning command to gateway (Telegram/Discord/etc)

The /reasoning command only existed in the CLI — messaging platforms
had no way to view or change reasoning settings. This adds:

1. /reasoning command handler in the gateway:
   - No args: shows current effort level and display state
   - /reasoning <level>: sets reasoning effort (none/low/medium/high/xhigh)
   - /reasoning show|hide: toggles reasoning display in responses
   - All changes saved to config.yaml immediately

2. Reasoning display in gateway responses:
   - When show_reasoning is enabled, prepends a 'Reasoning' block
     with the model's last_reasoning content before the response
   - Collapses long reasoning (>15 lines) to keep messages readable
   - Uses last_reasoning from run_conversation result dict

3. Plumbing:
   - Added _show_reasoning attribute loaded from config at startup
   - Propagated last_reasoning through _run_agent return dict
   - Added /reasoning to help text and known_commands set
   - Uses getattr for _show_reasoning to handle test stubs
											
										
										
											2026-03-12 05:38:19 -07:00
+								        self._show_reasoning = self._load_show_reasoning()
-												fix(gateway): drain in-flight work before restart

											
										
										
											2026-04-10 10:19:17 -07:00
+								        self._busy_input_mode = self._load_busy_input_mode()
 								        self._restart_drain_timeout = self._load_restart_drain_timeout()
-												feat(provider-routing): add OpenRouter provider routing configuration

Introduced a new `provider_routing` section in the CLI configuration to control how requests are routed across providers when using OpenRouter. This includes options for sorting providers by throughput, latency, or price, as well as allowing or ignoring specific providers, setting the order of provider attempts, and managing data collection policies. Updated relevant classes and documentation to support these features, enhancing flexibility in provider selection.

											
										
										
											2026-03-01 18:24:27 -08:00
+								        self._provider_routing = self._load_provider_routing()
-												feat: simple fallback model for provider resilience

When the primary model/provider fails after retries (rate limit, overload,
auth errors, connection failures), Hermes automatically switches to a
configured fallback model for the remainder of the session.

Config (in ~/.hermes/config.yaml):

  fallback_model:
    provider: openrouter
    model: anthropic/claude-sonnet-4

Supports all major providers: OpenRouter, OpenAI, Nous, DeepSeek, Together,
Groq, Fireworks, Mistral, Gemini — plus custom endpoints via base_url and
api_key_env overrides.

Design principles:
- Dead simple: one fallback model, not a chain
- One-shot: switches once, doesn't ping-pong back
- Zero new dependencies: uses existing OpenAI client
- Minimal code: ~100 lines in run_agent.py, ~5 lines in cli.py/gateway
- Three trigger points: max retries exhausted, non-retryable client errors,
  and invalid response exhaustion

Does NOT trigger on context overflow or payload-too-large errors (those
are handled by the existing compression system).

Addresses #737.

25 new tests, 2492 total passing.

											
										
										
											2026-03-08 20:22:33 -07:00
+								        self._fallback_model = self._load_fallback_model()
-												fix: hermes update causes dual gateways on macOS (launchd) (#1567)

* feat: add optional smart model routing

Add a conservative cheap-vs-strong routing option that can send very short/simple turns to a cheaper model across providers while keeping the primary model for complex work. Wire it through CLI, gateway, and cron, and document the config.yaml workflow.

* fix(gateway): remove recursive ExecStop from systemd units, extend TimeoutStopSec to 60s

* fix(gateway): avoid recursive ExecStop in user systemd unit

* fix: extend ExecStop removal and TimeoutStopSec=60 to system unit

The cherry-picked PR #1448 fix only covered the user systemd unit.
The system unit had the same TimeoutStopSec=15 and could benefit
from the same 60s timeout for clean shutdown. Also adds a regression
test for the system unit.

---------

Co-authored-by: Ninja <ninja@local>

* feat(skills): add blender-mcp optional skill for 3D modeling

Control a running Blender instance from Hermes via socket connection
to the blender-mcp addon (port 9876). Supports creating 3D objects,
materials, animations, and running arbitrary bpy code.

Placed in optional-skills/ since it requires Blender 4.3+ desktop
with a third-party addon manually started each session.

* feat(acp): support slash commands in ACP adapter (#1532)

Adds /help, /model, /tools, /context, /reset, /compact, /version
to the ACP adapter (VS Code, Zed, JetBrains). Commands are handled
directly in the server without instantiating the TUI — each command
queries agent/session state and returns plain text.

Unrecognized /commands fall through to the LLM as normal messages.

/model uses detect_provider_for_model() for auto-detection when
switching models, matching the CLI and gateway behavior.

Fixes #1402

* fix(logging): improve error logging in session search tool (#1533)

* fix(gateway): restart on retryable startup failures (#1517)

* feat(email): add skip_attachments option via config.yaml

* feat(email): add skip_attachments option via config.yaml

Adds a config.yaml-driven option to skip email attachments in the
gateway email adapter. Useful for malware protection and bandwidth
savings.

Configure in config.yaml:
  platforms:
    email:
      skip_attachments: true

Based on PR #1521 by @an420eth, changed from env var to config.yaml
(via PlatformConfig.extra) to match the project's config-first pattern.

* docs: document skip_attachments option for email adapter

* fix(telegram): retry on transient TLS failures during connect and send

Add exponential-backoff retry (3 attempts) around initialize() to
handle transient TLS resets during gateway startup. Also catches
TimedOut and OSError in addition to NetworkError.

Add exponential-backoff retry (3 attempts) around send_message() for
NetworkError during message delivery, wrapping the existing Markdown
fallback logic.

Both imports are guarded with try/except ImportError for test
environments where telegram is mocked.

Based on PR #1527 by cmd8. Closes #1526.

* feat: permissive block_anchor thresholds and unicode normalization (#1539)

Salvaged from PR #1528 by an420eth. Closes #517.

Improves _strategy_block_anchor in fuzzy_match.py:
- Add unicode normalization (smart quotes, em/en-dashes, ellipsis,
  non-breaking spaces → ASCII) so LLM-produced unicode artifacts
  don't break anchor line matching
- Lower thresholds: 0.10 for unique matches (was 0.70), 0.30 for
  multiple candidates — if first/last lines match exactly, the
  block is almost certainly correct
- Use original (non-normalized) content for offset calculation to
  preserve correct character positions

Tested: 3 new scenarios fixed (em-dash anchors, non-breaking space
anchors, very-low-similarity unique matches), zero regressions on
all 9 existing fuzzy match tests.

Co-authored-by: an420eth <an420eth@users.noreply.github.com>

* feat(cli): add file path autocomplete in the input prompt (#1545)

When typing a path-like token (./  ../  ~/  /  or containing /),
the CLI now shows filesystem completions in the dropdown menu.
Directories show a trailing slash and 'dir' label; files show
their size. Completions are case-insensitive and capped at 30
entries.

Triggered by tokens like:
  edit ./src/ma     → shows ./src/main.py, ./src/manifest.json, ...
  check ~/doc       → shows ~/docs/, ~/documents/, ...
  read /etc/hos     → shows /etc/hosts, /etc/hostname, ...
  open tools/reg    → shows tools/registry.py

Slash command autocomplete (/help, /model, etc.) is unaffected —
it still triggers when the input starts with /.

Inspired by OpenCode PR #145 (file path completion menu).

Implementation:
- hermes_cli/commands.py: _extract_path_word() detects path-like
  tokens, _path_completions() yields filesystem Completions with
  size labels, get_completions() routes to paths vs slash commands
- tests/hermes_cli/test_path_completion.py: 26 tests covering
  path extraction, prefix filtering, directory markers, home
  expansion, case-insensitivity, integration with slash commands

* feat(privacy): redact PII from LLM context when privacy.redact_pii is enabled

Add privacy.redact_pii config option (boolean, default false). When
enabled, the gateway redacts personally identifiable information from
the system prompt before sending it to the LLM provider:

- Phone numbers (user IDs on WhatsApp/Signal) → hashed to user_<sha256>
- User IDs → hashed to user_<sha256>
- Chat IDs → numeric portion hashed, platform prefix preserved
- Home channel IDs → hashed
- Names/usernames → NOT affected (user-chosen, publicly visible)

Hashes are deterministic (same user → same hash) so the model can
still distinguish users in group chats. Routing and delivery use
the original values internally — redaction only affects LLM context.

Inspired by OpenClaw PR #47959.

* fix(privacy): skip PII redaction on Discord/Slack (mentions need real IDs)

Discord uses <@user_id> for mentions and Slack uses <@U12345> — the LLM
needs the real ID to tag users. Redaction now only applies to WhatsApp,
Signal, and Telegram where IDs are pure routing metadata.

Add 4 platform-specific tests covering Discord, WhatsApp, Signal, Slack.

* feat: smart approvals + /stop command (inspired by OpenAI Codex)

* feat: smart approvals — LLM-based risk assessment for dangerous commands

Adds a 'smart' approval mode that uses the auxiliary LLM to assess
whether a flagged command is genuinely dangerous or a false positive,
auto-approving low-risk commands without prompting the user.

Inspired by OpenAI Codex's Smart Approvals guardian subagent
(openai/codex#13860).

Config (config.yaml):
  approvals:
    mode: manual   # manual (default), smart, off

Modes:
- manual — current behavior, always prompt the user
- smart  — aux LLM evaluates risk: APPROVE (auto-allow), DENY (block),
           or ESCALATE (fall through to manual prompt)
- off    — skip all approval prompts (equivalent to --yolo)

When smart mode auto-approves, the pattern gets session-level approval
so subsequent uses of the same pattern don't trigger another LLM call.
When it denies, the command is blocked without user prompt. When
uncertain, it escalates to the normal manual approval flow.

The LLM prompt is carefully scoped: it sees only the command text and
the flagged reason, assesses actual risk vs false positive, and returns
a single-word verdict.

* feat: make smart approval model configurable via config.yaml

Adds auxiliary.approval section to config.yaml with the same
provider/model/base_url/api_key pattern as other aux tasks (vision,
web_extract, compression, etc.).

Config:
  auxiliary:
    approval:
      provider: auto
      model: ''        # fast/cheap model recommended
      base_url: ''
      api_key: ''

Bridged to env vars in both CLI and gateway paths so the aux client
picks them up automatically.

* feat: add /stop command to kill all background processes

Adds a /stop slash command that kills all running background processes
at once. Currently users have to process(list) then process(kill) for
each one individually.

Inspired by OpenAI Codex's separation of interrupt (Ctrl+C stops current
turn) from /stop (cleans up background processes). See openai/codex#14602.

Ctrl+C continues to only interrupt the active agent turn — background
dev servers, watchers, etc. are preserved. /stop is the explicit way
to clean them all up.

* feat: first-class plugin architecture + hide status bar cost by default (#1544)

The persistent status bar now shows context %, token counts, and
duration but NOT $ cost by default. Cost display is opt-in via:

  display:
    show_cost: true

in config.yaml, or: hermes config set display.show_cost true

The /usage command still shows full cost breakdown since the user
explicitly asked for it — this only affects the always-visible bar.

Status bar without cost:
  ⚕ claude-sonnet-4 │ 12K/200K │ 6% │ 15m

Status bar with show_cost: true:
  ⚕ claude-sonnet-4 │ 12K/200K │ 6% │ $0.06 │ 15m

* feat: improve memory prioritization + aggressive skill updates (inspired by OpenAI Codex)

* feat: improve memory prioritization — user preferences over procedural knowledge

Inspired by OpenAI Codex's memory prompt improvements (openai/codex#14493)
which focus memory writes on user preferences and recurring patterns
rather than procedural task details.

Key insight: 'Optimize for reducing future user steering — the most
valuable memory prevents the user from having to repeat themselves.'

Changes:
- MEMORY_GUIDANCE (prompt_builder.py): added prioritization hierarchy
  and the core principle about reducing user steering
- MEMORY_SCHEMA (memory_tool.py): reordered WHEN TO SAVE list to put
  corrections first, added explicit PRIORITY guidance
- Memory nudge (run_agent.py): now asks specifically about preferences,
  corrections, and workflow patterns instead of generic 'anything'
- Memory flush (run_agent.py): now instructs to prioritize user
  preferences and corrections over task-specific details

* feat: more aggressive skill creation and update prompting

Press harder on skill updates — the agent should proactively patch
skills when it encounters issues during use, not wait to be asked.

Changes:
- SKILLS_GUIDANCE: 'consider saving' → 'save'; added explicit instruction
  to patch skills immediately when found outdated/wrong
- Skills header: added instruction to update loaded skills before finishing
  if they had missing steps or wrong commands
- Skill nudge: more assertive ('save the approach' not 'consider saving'),
  now also prompts for updating existing skills used in the task
- Skill nudge interval: lowered default from 15 to 10 iterations
- skill_manage schema: added 'patch it immediately' to update triggers

* feat: first-class plugin architecture (#1555)

Plugin system for extending Hermes with custom tools, hooks, and
integrations — no source code changes required.

Core system (hermes_cli/plugins.py):
  - Plugin discovery from ~/.hermes/plugins/, .hermes/plugins/, and
    pip entry_points (hermes_agent.plugins group)
  - PluginContext with register_tool() and register_hook()
  - 6 lifecycle hooks: pre/post tool_call, pre/post llm_call,
    on_session_start/end
  - Namespace package handling for relative imports in plugins
  - Graceful error isolation — broken plugins never crash the agent

Integration (model_tools.py):
  - Plugin discovery runs after built-in + MCP tools
  - Plugin tools bypass toolset filter via get_plugin_tool_names()
  - Pre/post tool call hooks fire in handle_function_call()

CLI:
  - /plugins command shows loaded plugins, tool counts, status
  - Added to COMMANDS dict for autocomplete

Docs:
  - Getting started guide (build-a-hermes-plugin.md) — full tutorial
    building a calculator plugin step by step
  - Reference page (features/plugins.md) — quick overview + tables
  - Covers: file structure, schemas, handlers, hooks, data files,
    bundled skills, env var gating, pip distribution, common mistakes

Tests: 16 tests covering discovery, loading, hooks, tool visibility.

* fix: hermes update causes dual gateways on macOS (launchd)

Three bugs worked together to create the dual-gateway problem:

1. cmd_update only checked systemd for gateway restart, completely
   ignoring launchd on macOS. After killing the PID it would print
   'Restart it with: hermes gateway run' even when launchd was about
   to auto-respawn the process.

2. launchd's KeepAlive.SuccessfulExit=false respawns the gateway
   after SIGTERM (non-zero exit), so the user's manual restart
   created a second instance.

3. The launchd plist lacked --replace (systemd had it), so the
   respawned gateway didn't kill stale instances on startup.

Fixes:
- Add --replace to launchd ProgramArguments (matches systemd)
- Add launchd detection to cmd_update's auto-restart logic
- Print 'auto-restart via launchd' instead of manual restart hint

* fix: add launchd plist auto-refresh + explicit restart in cmd_update

Two integration issues with the initial fix:

1. Existing macOS users with old plist (no --replace) would never
   get the fix until manual uninstall/reinstall. Added
   refresh_launchd_plist_if_needed() — mirrors the existing
   refresh_systemd_unit_if_needed(). Called from launchd_start(),
   launchd_restart(), and cmd_update.

2. cmd_update relied on KeepAlive respawn after SIGTERM rather than
   explicit launchctl stop/start. This caused races: launchd would
   respawn the old process before the PID file was cleaned up.
   Now does explicit stop+start (matching how systemd gets an
   explicit systemctl restart), with plist refresh first so the
   new --replace flag is picked up.

---------

Co-authored-by: Ninja <ninja@local>
Co-authored-by: alireza78a <alireza78a@users.noreply.github.com>
Co-authored-by: Oktay Aydin <113846926+aydnOktay@users.noreply.github.com>
Co-authored-by: JP Lew <polydegen@protonmail.com>
Co-authored-by: an420eth <an420eth@users.noreply.github.com>
											
										
										
											2026-03-16 12:36:29 -07:00
+								        self._smart_model_routing = self._load_smart_model_routing()
-												feat: add ephemeral prefill messages and system prompt loading

- Implemented functionality to load ephemeral prefill messages from a JSON file, enhancing few-shot priming capabilities for the agent.
- Introduced a mechanism to load an ephemeral system prompt from environment variables or configuration files, ensuring dynamic prompt adjustments at API-call time.
- Updated the CLI and agent initialization to utilize the new prefill messages and system prompt, improving the overall interaction experience.
- Enhanced configuration options with new environment variables for prefill messages and system prompts, allowing for greater customization without persistence.

											
										
										
											2026-02-23 23:55:42 -08:00
-												Add background process management with process tool, wait, PTY, and stdin support

New process registry and tool for managing long-running background processes
across all terminal backends (local, Docker, Singularity, Modal, SSH).

Process Registry (tools/process_registry.py):
- ProcessSession tracking with rolling 200KB output buffer
- spawn_local() with optional PTY via ptyprocess for interactive CLIs
- spawn_via_env() for non-local backends (runs inside sandbox, never on host)
- Background reader threads per process (Popen stdout or PTY)
- wait() with timeout clamping, interrupt support, and transparent limit reporting
- JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery
- Module-level singleton shared across agent loop, gateway, and RL

Process Tool (model_tools.py):
- 7 actions: list, poll, log, wait, kill, write, submit
- Paired with terminal in all toolsets (CLI, messaging, RL)
- Timeout clamping with transparent notes in response

Terminal Tool Updates (tools/terminal_tool.py):
- Replaced nohup background mode with registry spawn (returns session_id)
- Added workdir parameter for per-command working directory
- Added check_interval parameter for gateway auto-check watchers
- Added pty parameter for interactive CLI tools (Codex, Claude Code)
- Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs
- Cleanup thread now respects active background processes (won't reap sandbox)

Gateway Integration (gateway/run.py, session.py, config.py):
- Session reset protection: sessions with active processes exempt from reset
- Default idle timeout increased from 2 hours to 24 hours
- from_dict fallback aligned to match (was 120, now 1440)
- session_key env var propagated to process registry for session mapping
- Crash recovery on gateway startup via checkpoint probe
- check_interval watcher: asyncio task polls process, delivers updates to platform

RL Safety (environments/):
- tool_context.py cleanup() kills background processes on episode end
- hermes_base_env.py warns when enabled_toolsets is None (loads all tools)
- Process tool safe in RL via wait() blocking the agent loop

Also:
- Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all])
- Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP
- Updated AGENTS.md with process management docs and project structure
- Updated README.md terminal section with process management overview

											
										
										
											2026-02-17 02:51:31 -08:00
+								        # Wire process registry into session store for reset protection
 								        from tools.process_registry import process_registry
 								        self.session_store = SessionStore(
 								            self.config.sessions_dir, self.config,
 								            has_active_processes_fn=lambda key: process_registry.has_active_for_session(key),
 								        )
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        self.delivery_router = DeliveryRouter(self.config)
 								        self._running = False
 								        self._shutdown_event = asyncio.Event()
-												fix: preserve current gateway update and startup behavior

Follow up on salvaged PR #1052.
Restore current-main gateway lifecycle handling after conflict resolution and
adapt the update fallback to use shell-quoted argv parts safely.

											
										
										
											2026-03-14 18:03:50 -07:00
+								        self._exit_cleanly = False
-												fix(gateway): restart on whatsapp bridge child exit (#2334)

Co-authored-by: Frederico Ribeiro <fr@tecompanytea.com>
											
										
										
											2026-03-21 09:38:52 -07:00
+								        self._exit_with_failure = False
-												fix: preserve current gateway update and startup behavior

Follow up on salvaged PR #1052.
Restore current-main gateway lifecycle handling after conflict resolution and
adapt the update fallback to use shell-quoted argv parts safely.

											
										
										
											2026-03-14 18:03:50 -07:00
+								        self._exit_reason: Optional[str] = None
-												fix(gateway): drain in-flight work before restart

											
										
										
											2026-04-10 10:19:17 -07:00
+								        self._exit_code: Optional[int] = None
 								        self._draining = False
 								        self._restart_requested = False
 								        self._restart_task_started = False
 								        self._restart_detached = False
 								        self._restart_via_service = False
 								        self._stop_task: Optional[asyncio.Task] = None
-												Implement interrupt handling for agent and CLI input and persistent prompt line at bottom of CLI :)

- Enhanced the AIAgent class to support interrupt requests, allowing for graceful interruption of ongoing tasks and processing of new messages.
- Updated the HermesCLI to manage user input in a persistent manner, enabling real-time interruption of the agent's conversation.
- Introduced a mechanism in the GatewayRunner to handle incoming messages while an agent is running, allowing for immediate response to user commands.
- Improved overall user experience by providing feedback during interruptions and ensuring that pending messages are processed correctly.

											
										
										
											2026-02-03 16:15:49 -08:00
 								        # Track running agents per session for interrupt support
 								        # Key: session_key, Value: AIAgent instance
 								        self._running_agents: Dict[str, Any] = {}
-												fix(gateway): prevent stuck sessions with agent timeout and staleness eviction

Three changes to prevent sessions from getting permanently locked:

1. Agent execution timeout (HERMES_AGENT_TIMEOUT, default 10min):
   Wraps run_in_executor with asyncio.wait_for so a hung API call or
   runaway tool can't lock a session indefinitely. On timeout, the
   agent is interrupted and the user gets an actionable error message.

2. Staleness eviction for _running_agents:
   Tracks start timestamps for each session entry. When a new message
   arrives and the entry is older than timeout + 1min grace, it's
   evicted as a leaked lock. Safety net for any cleanup path that
   fails to remove the entry.

3. Cron job timeout (HERMES_CRON_TIMEOUT, default 10min):
   Wraps run_conversation in a ThreadPoolExecutor with timeout so a
   hung cron job doesn't block the ticker thread (and all subsequent
   cron jobs) indefinitely.

Follows grammY runner's per-update timeout pattern and aiogram's
asyncio.wait_for approach for handler deadlines.

											
										
										
											2026-04-02 22:52:52 +05:30
+								        self._running_agents_ts: Dict[str, float] = {}  # start timestamp per session
-												Implement interrupt handling for agent and CLI input and persistent prompt line at bottom of CLI :)

- Enhanced the AIAgent class to support interrupt requests, allowing for graceful interruption of ongoing tasks and processing of new messages.
- Updated the HermesCLI to manage user input in a persistent manner, enabling real-time interruption of the agent's conversation.
- Introduced a mechanism in the GatewayRunner to handle incoming messages while an agent is running, allowing for immediate response to user commands.
- Improved overall user experience by providing feedback during interruptions and ensuring that pending messages are processed correctly.

											
										
										
											2026-02-03 16:15:49 -08:00
+								        self._pending_messages: Dict[str, str] = {}  # Queued messages during interrupt
-												fix: interrupt agent immediately when user messages during active run (#10068)

When a user sends a message while the agent is executing a task on the
gateway, the agent is now interrupted immediately — not silently queued.
Previously, messages were stored in _pending_messages with zero feedback
to the user, potentially leaving them waiting 1+ hours.

Root cause: Level 1 guard (base.py) intercepted all messages for active
sessions and returned with no response. Level 2 (gateway/run.py) which
calls agent.interrupt() was never reached.

Fix: Expand _handle_active_session_busy_message to handle the normal
(non-draining) case:
  1. Call running_agent.interrupt(text) to abort in-flight tool calls
     and exit the agent loop at the next check point
  2. Store the message as pending so it becomes the next turn once the
     interrupted run returns
  3. Send a brief ack: 'Interrupting current task (10 min elapsed,
     iteration 21/60, running: terminal). I'll respond shortly.'
  4. Debounce acks to once per 30s to avoid spam on rapid messages

Reported by @Lonely__MH.
											
										
										
											2026-04-14 22:07:28 -07:00
+								        self._busy_ack_ts: Dict[str, float] = {}  # last busy-ack timestamp per session (debounce)
-												fix(gateway): /model shows active fallback model instead of config default (#1615)

* fix: prevent infinite 400 failure loop on context overflow (#1630)

When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message.  This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error.  Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.

Three-layer fix:

1. run_agent.py — Fallback heuristic: when a 400 error has a very short
   generic message AND the session is large (>40% of context or >80
   messages), treat it as a probable context overflow and trigger
   compression instead of aborting.

2. run_agent.py + gateway/run.py — Don't persist failed messages:
   when the agent returns failed=True before generating any response,
   skip writing the user's message to the transcript/DB. This prevents
   the session from growing on each failure.

3. gateway/run.py — Smarter error messages: detect context-overflow
   failures and suggest /compact or /reset specifically, instead of a
   generic 'try again' that will fail identically.

* fix(skills): detect prompt injection patterns and block cache file reads

Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):

1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
   (index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
   was the original injection vector — untrusted skill descriptions
   in the catalog contained adversarial text that the model executed.

2. skill_view: warns when skills are loaded from outside the trusted
   ~/.hermes/skills/ directory, and detects common injection patterns
   in skill content ("ignore previous instructions", "<system>", etc.).

Cherry-picked from PR #1562 by ygd58.

* fix(tools): chunk long messages in send_message_tool before dispatch (#1552)

Long messages sent via send_message tool or cron delivery silently
failed when exceeding platform limits. Gateway adapters handle this
via truncate_message(), but the standalone senders in send_message_tool
bypassed that entirely.

- Apply truncate_message() chunking in _send_to_platform() before
  dispatching to individual platform senders
- Remove naive message[i:i+2000] character split in _send_discord()
  in favor of centralized smart splitting
- Attach media files to last chunk only for Telegram
- Add regression tests for chunking and media placement

Cherry-picked from PR #1557 by llbn.

* fix(approval): show full command in dangerous command approval (#1553)

Previously the command was truncated to 80 chars in CLI (with a
[v]iew full option), 500 chars in Discord embeds, and missing entirely
in Telegram/Slack approval messages. Now the full command is always
displayed everywhere:

- CLI: removed 80-char truncation and [v]iew full menu option
- Gateway (TG/Slack): approval_required message includes full command
  in a code block
- Discord: embed shows full command up to 4096-char limit
- Windows: skip SIGALRM-based test timeout (Unix-only)
- Updated tests: replaced view-flow tests with direct approval tests

Cherry-picked from PR #1566 by crazywriter1.

* fix(cli): flush stdout during agent loop to prevent macOS display freeze (#1624)

The interrupt polling loop in chat() waited on the queue without
invalidating the prompt_toolkit renderer. On macOS, the StdoutProxy
buffer only flushed on input events, causing the CLI to appear frozen
during tool execution until the user typed a key.

Fix: call _invalidate() on each queue timeout (every ~100ms, throttled
to 150ms) to force the renderer to flush buffered agent output.

* fix(claw): warn when API keys are skipped during OpenClaw migration (#1580)

When --migrate-secrets is not passed (the default), API keys like
OPENROUTER_API_KEY are silently skipped with no warning. Users don't
realize their keys weren't migrated until the agent fails to connect.

Add a post-migration warning with actionable instructions: either
re-run with --migrate-secrets or add the key manually via
hermes config set.

Cherry-picked from PR #1593 by ygd58.

* fix(security): block sandbox backend creds from subprocess env (#1264)

Add Modal and Daytona sandbox credentials to the subprocess env
blocklist so they're not leaked to agent terminal sessions via
printenv/env.

Cherry-picked from PR #1571 by ygd58.

* fix(gateway): cap interrupt recursion depth to prevent resource exhaustion (#816)

When a user sends multiple messages while the agent keeps failing,
_run_agent() calls itself recursively with no depth limit. This can
exhaust stack/memory if the agent is in a failure loop.

Add _MAX_INTERRUPT_DEPTH = 3. When exceeded, the pending message is
logged and the current result is returned instead of recursing deeper.

The log handler duplication bug described in #816 was already fixed
separately (AIAgent.__init__ deduplicates handlers).

* fix(gateway): /model shows active fallback model instead of config default (#1615)

When the agent falls back to a different model (e.g. due to rate
limiting), /model still showed the config default. Now tracks the
effective model/provider after each agent run and displays it.

Cleared when the primary model succeeds again or the user explicitly
switches via /model.

Cherry-picked from PR #1616 by MaxKerkula. Added hasattr guard for
test compatibility.

---------

Co-authored-by: buray <ygd58@users.noreply.github.com>
Co-authored-by: lbn <llbn@users.noreply.github.com>
Co-authored-by: crazywriter1 <53251494+crazywriter1@users.noreply.github.com>
Co-authored-by: Max K <MaxKerkula@users.noreply.github.com>
											
										
										
											2026-03-17 02:26:51 -07:00
-												feat(gateway): cache AIAgent per session for prompt caching

The gateway created a fresh AIAgent per message, rebuilding the system
prompt (including memory, skills, context files) every turn. This broke
prompt prefix caching — providers like Anthropic charge ~10x more for
uncached prefixes.

Now caches AIAgent instances per session_key with a config signature.
The cached agent is reused across messages in the same session,
preserving the frozen system prompt and tool schemas. Cache is
invalidated when:
- Config changes (model, provider, toolsets, reasoning, ephemeral
  prompt) — detected via signature mismatch
- /new, /reset, /clear — explicit session reset
- /model — global model change clears all cached agents
- /reasoning — global reasoning change clears all cached agents

Per-message state (callbacks, stream consumers, progress queues) is
set on the agent instance before each run_conversation() call.

This matches CLI behavior where a single AIAgent lives across all turns
in a session, with _cached_system_prompt built once and reused.

											
										
										
											2026-03-21 13:07:08 -07:00
+								        # Cache AIAgent instances per session to preserve prompt caching.
 								        # Without this, a new AIAgent is created per message, rebuilding the
 								        # system prompt (including memory) every turn — breaking prefix cache
 								        # and costing ~10x more on providers with prompt caching (Anthropic).
 								        # Key: session_key, Value: (AIAgent, config_signature_str)
 								        import threading as _threading
 								        self._agent_cache: Dict[str, tuple] = {}
 								        self._agent_cache_lock = _threading.Lock()
-												feat: /model command — models.dev primary database + --provider flag (#5181)

Full overhaul of the model/provider system.

## What changed
- models.dev (109 providers, 4000+ models) as primary database for provider identity AND model metadata
- --provider flag replaces colon syntax for explicit provider switching
- Full ModelInfo/ProviderInfo dataclasses with context, cost, capabilities, modalities
- HermesOverlay system merges models.dev + Hermes-specific transport/auth/aggregator flags
- User-defined endpoints via config.yaml providers: section
- /model (no args) lists authenticated providers with curated model catalog
- Rich metadata display: context window, max output, cost/M tokens, capabilities
- Config migration: custom_providers list → providers dict (v11→v12)
- AIAgent.switch_model() for in-place model swap preserving conversation

## Files
agent/models_dev.py, hermes_cli/providers.py, hermes_cli/model_switch.py,
hermes_cli/model_normalize.py, cli.py, gateway/run.py, run_agent.py,
hermes_cli/config.py, hermes_cli/commands.py
											
										
										
											2026-04-05 01:04:44 -07:00
+								        # Per-session model overrides from /model command.
 								        # Key: session_key, Value: dict with model/provider/api_key/base_url/api_mode
 								        self._session_model_overrides: Dict[str, Dict[str, str]] = {}
-												Add Text-to-Speech (TTS) functionality with multiple providers

Add tool previews

Add AGENTS and SOUL.md support

Add Exec Approval

											
										
										
											2026-02-12 10:05:08 -08:00
+								        # Track pending exec approvals per session
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        # Key: session_key, Value: {"command": str, "pattern_key": str, ...}
 								        self._pending_approvals: Dict[str, Dict[str, Any]] = {}
-												fix(gateway): persist Honcho managers across session requests

											
										
										
											2026-03-10 02:06:17 -07:00
-												feat: auto-reconnect failed gateway platforms with exponential backoff (#2584)

When a messaging platform fails to connect at startup (e.g. transient DNS
failure) or disconnects at runtime with a retryable error, the gateway now
queues it for background reconnection instead of giving up permanently.

- New _platform_reconnect_watcher background task runs alongside the
  existing session expiry watcher
- Exponential backoff: 30s, 60s, 120s, 240s, 300s cap
- Max 20 retry attempts before giving up on a platform
- Non-retryable errors (bad auth token, etc.) are not retried
- Runtime disconnections via _handle_adapter_fatal_error now queue
  retryable failures instead of triggering gateway shutdown
- On successful reconnect, adapter is wired up and channel directory
  is rebuilt automatically

Fixes the case where a DNS blip during gateway startup caused Telegram
and Discord to be permanently unavailable until manual restart.
											
										
										
											2026-03-22 23:48:24 -07:00
+								        # Track platforms that failed to connect for background reconnection.
 								        # Key: Platform enum, Value: {"config": platform_config, "attempts": int, "next_retry": float}
 								        self._failed_platforms: Dict[Platform, Dict[str, Any]] = {}
-												feat(gateway): live-stream /update output + interactive prompt buttons (#5180)

* feat(gateway): live-stream /update output + forward interactive prompts

Adds real-time output streaming and interactive prompt forwarding for
the gateway /update command, so users on Telegram/Discord/etc see the
full update progress and can respond to prompts (stash restore, config
migration) without needing terminal access.

Changes:

hermes_cli/main.py:
- Add --gateway flag to 'hermes update' argparse
- Add _gateway_prompt() file-based IPC function that writes
  .update_prompt.json and polls for .update_response
- Modify _restore_stashed_changes() to accept optional input_fn
  parameter for gateway mode prompt forwarding
- cmd_update() uses _gateway_prompt when --gateway is set, enabling
  interactive stash restore and config migration prompts

gateway/run.py:
- _handle_update_command: spawn with --gateway flag and
  PYTHONUNBUFFERED=1 for real-time output flushing
- Store session_key in .update_pending.json for cross-restart
  session matching
- Add _update_prompt_pending dict to track sessions awaiting
  update prompt responses
- Replace _watch_for_update_completion with _watch_update_progress:
  streams output chunks every ~4s, detects .update_prompt.json and
  forwards prompts to the user, handles completion/failure/timeout
- Add update prompt interception in _handle_message: when a prompt
  is pending, the user's next message is written to .update_response
  instead of being processed normally
- Preserve _send_update_notification as legacy fallback for
  post-restart cases where adapter isn't available yet

File-based IPC protocol:
- .update_prompt.json: written by update process with prompt text,
  default value, and unique ID
- .update_response: written by gateway with user's answer
- .update_output.txt: existing, now streamed in real-time
- .update_exit_code: existing completion marker

Tests: 16 new tests covering _gateway_prompt IPC, output streaming,
prompt detection/forwarding, message interception, and cleanup.

* feat: interactive buttons for update prompts (Telegram + Discord)

Telegram: Inline keyboard with ✓ Yes / ✗ No buttons. Clicking a button
answers the callback query, edits the message to show the choice, and
writes .update_response directly. CallbackQueryHandler registered on
the update_prompt: prefix.

Discord: UpdatePromptView (discord.ui.View) with green Yes / red No
buttons. Follows the ExecApprovalView pattern — auth check, embed color
update, disabled-after-click. Writes .update_response on click.

All platforms: /approve and /deny (and /yes, /no) now work as shorthand
for yes/no when an update prompt is pending. The text fallback message
instructs users to use these commands. Raw message interception still
works as a fallback for non-command responses.

Gateway watcher checks adapter for send_update_prompt method (class-level
check to avoid MagicMock false positives) and falls back to text prompt
with /approve instructions when unavailable.

* fix: block /update on non-messaging platforms (API, webhooks, ACP)

Add _UPDATE_ALLOWED_PLATFORMS frozenset that explicitly lists messaging
platforms where /update is permitted. API server, webhook, and ACP
platforms get a clear error directing them to run hermes update from
the terminal instead.

ACP and API server already don't reach _handle_message (separate
codepaths), and webhooks have distinct session keys that can't collide
with messaging sessions. This guard is belt-and-suspenders.
											
										
										
											2026-04-05 00:28:58 -07:00
+								        # Track pending /update prompt responses per session.
 								        # Key: session_key, Value: True when a prompt is waiting for user input.
 								        self._update_prompt_pending: Dict[str, bool] = {}
-												fix(gateway): persist Honcho managers across session requests

											
										
										
											2026-03-10 02:06:17 -07:00
+								        # Persistent Honcho managers keyed by gateway session key.
 								        # This preserves write_frequency="session" semantics across short-lived
 								        # per-message AIAgent instances.
-												feat(security): add tirith pre-exec command scanning

Integrate tirith as a pre-execution security scanner that detects
homograph URLs, pipe-to-interpreter patterns, terminal injection,
zero-width Unicode, and environment variable manipulation — threats
the existing 50-pattern dangerous command detector doesn't cover.

Architecture: gather-then-decide — both tirith and the dangerous
command detector run before any approval prompt, preventing gateway
force=True replay from bypassing one check when only the other was
shown to the user.

New files:
- tools/tirith_security.py: subprocess wrapper with auto-installer,
  mandatory cosign provenance verification, non-blocking background
  download, disk-persistent failure markers with retryable-cause
  tracking (cosign_missing auto-clears when cosign appears on PATH)
- tests/tools/test_tirith_security.py: 62 tests covering exit code
  mapping, fail_open, cosign verification, background install,
  HERMES_HOME isolation, and failure recovery
- tests/tools/test_command_guards.py: 21 integration tests for the
  combined guard orchestration

Modified files:
- tools/approval.py: add check_all_command_guards() orchestrator,
  add allow_permanent parameter to prompt_dangerous_approval()
- tools/terminal_tool.py: replace _check_dangerous_command with
  consolidated check_all_command_guards
- cli.py: update _approval_callback for allow_permanent kwarg,
  call ensure_installed() at startup
- gateway/run.py: iterate pattern_keys list on replay approval,
  call ensure_installed() at startup
- hermes_cli/config.py: add security config defaults, split
  commented sections for independent fallback
- cli-config.yaml.example: document tirith security config

											
										
										
											2026-03-11 14:20:32 +05:30
-												fix(gateway): remove user-facing compression warnings (#4139)

Auto-compression still runs silently in the background with server-side
logging, but no longer sends messages to the user's chat about it.

Removed:
- 'Session is large... Auto-compressing' pre-compression notification
- 'Compressed: N → M messages' post-compression notification
- 'Session is still very large after compression' warning
- 'Auto-compression failed' warning
- Rate-limit tracking (only existed for these warnings)
											
										
										
											2026-03-30 19:17:07 -07:00
-												fix: prevent context pressure warning spam after compression (#4012)

* feat: add /yolo slash command to toggle dangerous command approvals

Adds a /yolo command that toggles HERMES_YOLO_MODE at runtime, skipping
all dangerous command approval prompts for the current session. Works in
both CLI and gateway (Telegram, Discord, etc.).

- /yolo -> ON: all commands auto-approved, no confirmation prompts
- /yolo -> OFF: normal approval flow restored

The --yolo CLI flag already existed for launch-time opt-in. This adds
the ability to toggle mid-session without restarting.

Session-scoped — resets when the process ends. Uses the existing
HERMES_YOLO_MODE env var that check_all_command_guards() already
respects.

* fix: prevent context pressure warning spam (agent loop + gateway rate-limit)

Two complementary fixes for repeated context pressure warnings spamming
gateway users (Telegram, Discord, etc.):

1. Agent-level loop fix (run_agent.py):
   After compression, only reset _context_pressure_warned if the
   post-compression estimate is actually below the 85% warning level.
   Previously the flag was unconditionally reset, causing the warning
   to re-fire every loop iteration when compression couldn't reduce
   below 85% of the threshold (e.g. very low threshold like 15%,
   or system prompt alone exceeds the warning level).

2. Gateway-level rate-limit (gateway/run.py, salvaged from PR #3786):
   Per-chat_id cooldown of 1 hour on compression warning messages.
   Both warning paths ('still large after compression' and 'compression
   failed') are gated. Defense-in-depth — even if the agent-level fix
   has edge cases, users won't see more than one warning per hour.

Co-authored-by: dlkakbs <dlkakbs@users.noreply.github.com>

---------

Co-authored-by: dlkakbs <dlkakbs@users.noreply.github.com>
											
										
										
											2026-03-30 13:18:21 -07:00
-												feat(security): add tirith pre-exec command scanning

Integrate tirith as a pre-execution security scanner that detects
homograph URLs, pipe-to-interpreter patterns, terminal injection,
zero-width Unicode, and environment variable manipulation — threats
the existing 50-pattern dangerous command detector doesn't cover.

Architecture: gather-then-decide — both tirith and the dangerous
command detector run before any approval prompt, preventing gateway
force=True replay from bypassing one check when only the other was
shown to the user.

New files:
- tools/tirith_security.py: subprocess wrapper with auto-installer,
  mandatory cosign provenance verification, non-blocking background
  download, disk-persistent failure markers with retryable-cause
  tracking (cosign_missing auto-clears when cosign appears on PATH)
- tests/tools/test_tirith_security.py: 62 tests covering exit code
  mapping, fail_open, cosign verification, background install,
  HERMES_HOME isolation, and failure recovery
- tests/tools/test_command_guards.py: 21 integration tests for the
  combined guard orchestration

Modified files:
- tools/approval.py: add check_all_command_guards() orchestrator,
  add allow_permanent parameter to prompt_dangerous_approval()
- tools/terminal_tool.py: replace _check_dangerous_command with
  consolidated check_all_command_guards
- cli.py: update _approval_callback for allow_permanent kwarg,
  call ensure_installed() at startup
- gateway/run.py: iterate pattern_keys list on replay approval,
  call ensure_installed() at startup
- hermes_cli/config.py: add security config defaults, split
  commented sections for independent fallback
- cli-config.yaml.example: document tirith security config

											
										
										
											2026-03-11 14:20:32 +05:30
+								        # Ensure tirith security scanner is available (downloads if needed)
 								        try:
 								            from tools.tirith_security import ensure_installed
-												fix(cli): silence tirith prefetch install warnings at startup (#1452)
											
										
										
											2026-03-15 18:07:03 -07:00
+								            ensure_installed(log_failures=False)
-												feat(security): add tirith pre-exec command scanning

Integrate tirith as a pre-execution security scanner that detects
homograph URLs, pipe-to-interpreter patterns, terminal injection,
zero-width Unicode, and environment variable manipulation — threats
the existing 50-pattern dangerous command detector doesn't cover.

Architecture: gather-then-decide — both tirith and the dangerous
command detector run before any approval prompt, preventing gateway
force=True replay from bypassing one check when only the other was
shown to the user.

New files:
- tools/tirith_security.py: subprocess wrapper with auto-installer,
  mandatory cosign provenance verification, non-blocking background
  download, disk-persistent failure markers with retryable-cause
  tracking (cosign_missing auto-clears when cosign appears on PATH)
- tests/tools/test_tirith_security.py: 62 tests covering exit code
  mapping, fail_open, cosign verification, background install,
  HERMES_HOME isolation, and failure recovery
- tests/tools/test_command_guards.py: 21 integration tests for the
  combined guard orchestration

Modified files:
- tools/approval.py: add check_all_command_guards() orchestrator,
  add allow_permanent parameter to prompt_dangerous_approval()
- tools/terminal_tool.py: replace _check_dangerous_command with
  consolidated check_all_command_guards
- cli.py: update _approval_callback for allow_permanent kwarg,
  call ensure_installed() at startup
- gateway/run.py: iterate pattern_keys list on replay approval,
  call ensure_installed() at startup
- hermes_cli/config.py: add security config defaults, split
  commented sections for independent fallback
- cli-config.yaml.example: document tirith security config

											
										
										
											2026-03-11 14:20:32 +05:30
+								        except Exception:
 								            pass  # Non-fatal — fail-open at scan time if unavailable
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
-												fix(gateway): Pass session_db to AIAgent, fixing session_search error

When running via the gateway (e.g. Telegram), the session_search tool
returned: {"error": "session_search must be handled by the agent loop"}

Root cause:
- gateway/run.py creates AIAgent without passing session_db=
- self._session_db is None in the agent instance
- The dispatch condition "elif function_name == 'session_search' and self._session_db"
  skips when _session_db is None, falling through to the generic error

This fix:
1. Initializes self._session_db in GatewayRunner.__init__()
2. Passes session_db to all AIAgent instantiations in gateway/run.py
3. Adds defensive fallback in run_agent.py to return a clear error when
   session_db is unavailable, instead of falling through

Fixes #105

											
										
										
											2026-02-27 00:32:17 -05:00
+								        # Initialize session database for session_search tool support
 								        self._session_db = None
 								        try:
 								            from hermes_state import SessionDB
 								            self._session_db = SessionDB()
 								        except Exception as e:
 								            logger.debug("SQLite session store not available: %s", e)
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
+								        # DM pairing store for code-based user authorization
 								        from gateway.pairing import PairingStore
 								        self.pairing_store = PairingStore()
 								        # Event hook system
 								        from gateway.hooks import HookRegistry
 								        self.hooks = HookRegistry()
-												fix(gateway): persist Honcho managers across session requests

											
										
										
											2026-03-10 02:06:17 -07:00
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
+								        # Per-chat voice reply mode: "off" | "voice_only" | "all"
 								        self._voice_mode: Dict[str, str] = self._load_voice_modes()
-												fix(gateway): track background task references in GatewayRunner (#3254)

Asyncio tasks created with create_task() but never stored can be
garbage collected mid-execution. Add self._background_tasks set to
hold references, with add_done_callback cleanup. Tracks:
- /background command task
- session-reset memory flush task
- session-resume memory flush task
Cancel all pending tasks in stop().

Update test fixtures that construct GatewayRunner via object.__new__()
to include the new _background_tasks attribute.

Cherry-picked from PR #3167 by memosr. The original PR also deleted
the DM topic auto-skill loading code — that deletion was excluded
from this salvage as it removes a shipped feature (#2598).

Co-authored-by: memosr.eth <96793918+memosr@users.noreply.github.com>
											
										
										
											2026-03-26 14:33:48 -07:00
+								        # Track background tasks to prevent garbage collection mid-execution
 								        self._background_tasks: set = set()
-												fix(gateway): persist Honcho managers across session requests

											
										
										
											2026-03-10 02:06:17 -07:00
-												fix: check skill availability before hinting at hermes-agent-setup

Only mention the hermes-agent-setup skill in STT failure notes (both
the direct user message and the agent context note) when the skill is
actually installed. Uses _find_skill() from skill_manager_tool.

Also confirmed: STT is the only user-facing failure case where the
setup skill hint helps. Vision failures are transient API issues,
runtime transcription errors indicate a configured-but-broken provider,
and platform startup warnings are server logs.

											
										
										
											2026-03-18 03:17:23 -07:00
+								    # -- Setup skill availability ----------------------------------------
 								    def _has_setup_skill(self) -> bool:
 								        """Check if the hermes-agent-setup skill is installed."""
 								        try:
 								            from tools.skill_manager_tool import _find_skill
 								            return _find_skill("hermes-agent-setup") is not None
 								        except Exception:
 								            return False
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
+								    # -- Voice mode persistence ------------------------------------------
 								    _VOICE_MODE_PATH = _hermes_home / "gateway_voice_mode.json"
 								    def _load_voice_modes(self) -> Dict[str, str]:
 								        try:
-												fix: persist clean voice transcripts and /voice off state

- keep CLI voice prefixes API-local while storing the original user text
- persist explicit gateway off state and restore adapter auto-TTS suppression on restart
- add regression coverage for both behaviors

											
										
										
											2026-03-14 06:14:22 -07:00
+								            data = json.loads(self._VOICE_MODE_PATH.read_text())
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
+								        except (FileNotFoundError, json.JSONDecodeError, OSError):
 								            return {}
-												fix: persist clean voice transcripts and /voice off state

- keep CLI voice prefixes API-local while storing the original user text
- persist explicit gateway off state and restore adapter auto-TTS suppression on restart
- add regression coverage for both behaviors

											
										
										
											2026-03-14 06:14:22 -07:00
+								        if not isinstance(data, dict):
 								            return {}
 								        valid_modes = {"off", "voice_only", "all"}
 								        return {
 								            str(chat_id): mode
 								            for chat_id, mode in data.items()
 								            if mode in valid_modes
 								        }
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
+								    def _save_voice_modes(self) -> None:
 								        try:
 								            self._VOICE_MODE_PATH.parent.mkdir(parents=True, exist_ok=True)
 								            self._VOICE_MODE_PATH.write_text(
 								                json.dumps(self._voice_mode, indent=2)
 								            )
 								        except OSError as e:
 								            logger.warning("Failed to save voice modes: %s", e)
-												fix: persist clean voice transcripts and /voice off state

- keep CLI voice prefixes API-local while storing the original user text
- persist explicit gateway off state and restore adapter auto-TTS suppression on restart
- add regression coverage for both behaviors

											
										
										
											2026-03-14 06:14:22 -07:00
+								    def _set_adapter_auto_tts_disabled(self, adapter, chat_id: str, disabled: bool) -> None:
 								        """Update an adapter's in-memory auto-TTS suppression set if present."""
 								        disabled_chats = getattr(adapter, "_auto_tts_disabled_chats", None)
 								        if not isinstance(disabled_chats, set):
 								            return
 								        if disabled:
 								            disabled_chats.add(chat_id)
 								        else:
 								            disabled_chats.discard(chat_id)
 								    def _sync_voice_mode_state_to_adapter(self, adapter) -> None:
 								        """Restore persisted /voice off state into a live platform adapter."""
 								        disabled_chats = getattr(adapter, "_auto_tts_disabled_chats", None)
 								        if not isinstance(disabled_chats, set):
 								            return
 								        disabled_chats.clear()
 								        disabled_chats.update(
 								            chat_id for chat_id, mode in self._voice_mode.items() if mode == "off"
 								        )
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
+								    # -----------------------------------------------------------------
-												fix(honcho): isolate session routing for multi-user gateway (#1500)

Salvaged from PR #1470 by adavyas.

Core fix: Honcho tool calls in a multi-session gateway could route to
the wrong session because honcho_tools.py relied on process-global
state. Now threads session context through the call chain:
  AIAgent._invoke_tool() → handle_function_call() → registry.dispatch()
  → handler **kw → _resolve_session_context()

Changes:
- Add _resolve_session_context() to prefer per-call context over globals
- Plumb honcho_manager + honcho_session_key through handle_function_call
- Add sync_honcho=False to run_conversation() for synthetic flush turns
- Pass honcho_session_key through gateway memory flush lifecycle
- Harden gateway PID detection when /proc cmdline is unreadable
- Make interrupt test scripts import-safe for pytest-xdist
- Wrap BibTeX examples in Jekyll raw blocks for docs build
- Fix thread-order-dependent assertion in client lifecycle test
- Expand Honcho docs: session isolation, lifecycle, routing internals

Dropped from original PR:
- Indentation change in _create_request_openai_client that would move
  client creation inside the lock (causes unnecessary contention)

Co-authored-by: adavyas <adavyas@users.noreply.github.com>
											
										
										
											2026-03-16 00:23:47 -07:00
+								    def _flush_memories_for_session(
 								        self,
 								        old_session_id: str,
-												fix: honor session-scoped gateway model overrides

											
										
										
											2026-04-11 04:24:45 -05:00
+								        session_key: Optional[str] = None,
-												fix(honcho): isolate session routing for multi-user gateway (#1500)

Salvaged from PR #1470 by adavyas.

Core fix: Honcho tool calls in a multi-session gateway could route to
the wrong session because honcho_tools.py relied on process-global
state. Now threads session context through the call chain:
  AIAgent._invoke_tool() → handle_function_call() → registry.dispatch()
  → handler **kw → _resolve_session_context()

Changes:
- Add _resolve_session_context() to prefer per-call context over globals
- Plumb honcho_manager + honcho_session_key through handle_function_call
- Add sync_honcho=False to run_conversation() for synthetic flush turns
- Pass honcho_session_key through gateway memory flush lifecycle
- Harden gateway PID detection when /proc cmdline is unreadable
- Make interrupt test scripts import-safe for pytest-xdist
- Wrap BibTeX examples in Jekyll raw blocks for docs build
- Fix thread-order-dependent assertion in client lifecycle test
- Expand Honcho docs: session isolation, lifecycle, routing internals

Dropped from original PR:
- Indentation change in _create_request_openai_client that would move
  client creation inside the lock (causes unnecessary contention)

Co-authored-by: adavyas <adavyas@users.noreply.github.com>
											
										
										
											2026-03-16 00:23:47 -07:00
+								    ):
-												feat(gateway): proactive async memory flush on session expiry

Previously, when a session expired (idle/daily reset), the memory flush
ran synchronously inside get_or_create_session — blocking the user's
message for 10-60s while an LLM call saved memories.

Now a background watcher task (_session_expiry_watcher) runs every 5 min,
detects expired sessions, and flushes memories proactively in a thread
pool.  By the time the user sends their next message, memories are
already saved and the response is immediate.

Changes:
- Add _is_session_expired(entry) to SessionStore — works from entry
  alone without needing a SessionSource
- Add _pre_flushed_sessions set to track already-flushed sessions
- Remove sync _on_auto_reset callback from get_or_create_session
- Refactor flush into _flush_memories_for_session (sync worker) +
  _async_flush_memories (thread pool wrapper)
- Add _session_expiry_watcher background task, started in start()
- Simplify /reset command to use shared fire-and-forget flush
- Add 10 tests for expiry detection, callback removal, tracking

											
										
										
											2026-03-07 11:27:50 -08:00
+								        """Prompt the agent to save memories/skills before context is lost.
 								        Synchronous worker — meant to be called via run_in_executor from
 								        an async context so it doesn't block the event loop.
-												feat(session): implement session reset policy for messaging platforms

- Added configuration options for automatic session resets based on inactivity or daily boundaries in cli-config.yaml.
- Enhanced SessionResetPolicy class to support a "none" mode for no auto-resets.
- Implemented memory flushing before session resets in SessionStore to preserve important information.
- Updated setup wizard to guide users in configuring session reset preferences.

											
										
										
											2026-02-26 21:20:50 -08:00
+								        """
-												fix(gateway): prevent stale memory overwrites by flush agent (#2670)

The gateway memory flush agent reviews old conversation history on session
reset/expiry and writes to memory. It had no awareness of memory changes
made after that conversation ended (by the live agent, cron jobs, or other
sessions), causing silent overwrites of newer entries.

Two fixes:

1. Skip memory flush entirely for cron sessions (session IDs starting with
   'cron_'). Cron sessions are headless with no meaningful user conversation
   to extract memories from.

2. Inject the current live memory state (MEMORY.md + USER.md) directly into
   the flush prompt. The flush agent can now see what's already saved and
   make informed decisions — only adding genuinely new information rather
   than blindly overwriting entries that may have been updated since the
   conversation ended.

Addresses the root cause identified in #2670: the flush agent was making
memory decisions blind to the current state of memory, causing stale
context to overwrite newer entries on gateway restarts and session resets.

Co-authored-by: devorun <devorun@users.noreply.github.com>
Co-authored-by: dlkakbs <dlkakbs@users.noreply.github.com>
											
										
										
											2026-03-23 16:08:38 -07:00
+								        # Skip cron sessions — they run headless with no meaningful user
 								        # conversation to extract memories from.
 								        if old_session_id and old_session_id.startswith("cron_"):
 								            logger.debug("Skipping memory flush for cron session: %s", old_session_id)
 								            return
-												feat(session): implement session reset policy for messaging platforms

- Added configuration options for automatic session resets based on inactivity or daily boundaries in cli-config.yaml.
- Enhanced SessionResetPolicy class to support a "none" mode for no auto-resets.
- Implemented memory flushing before session resets in SessionStore to preserve important information.
- Updated setup wizard to guide users in configuring session reset preferences.

											
										
										
											2026-02-26 21:20:50 -08:00
+								        try:
-												feat(gateway): proactive async memory flush on session expiry

Previously, when a session expired (idle/daily reset), the memory flush
ran synchronously inside get_or_create_session — blocking the user's
message for 10-60s while an LLM call saved memories.

Now a background watcher task (_session_expiry_watcher) runs every 5 min,
detects expired sessions, and flushes memories proactively in a thread
pool.  By the time the user sends their next message, memories are
already saved and the response is immediate.

Changes:
- Add _is_session_expired(entry) to SessionStore — works from entry
  alone without needing a SessionSource
- Add _pre_flushed_sessions set to track already-flushed sessions
- Remove sync _on_auto_reset callback from get_or_create_session
- Refactor flush into _flush_memories_for_session (sync worker) +
  _async_flush_memories (thread pool wrapper)
- Add _session_expiry_watcher background task, started in start()
- Simplify /reset command to use shared fire-and-forget flush
- Add 10 tests for expiry detection, callback removal, tracking

											
										
										
											2026-03-07 11:27:50 -08:00
+								            history = self.session_store.load_transcript(old_session_id)
-												feat(session): implement session reset policy for messaging platforms

- Added configuration options for automatic session resets based on inactivity or daily boundaries in cli-config.yaml.
- Enhanced SessionResetPolicy class to support a "none" mode for no auto-resets.
- Implemented memory flushing before session resets in SessionStore to preserve important information.
- Updated setup wizard to guide users in configuring session reset preferences.

											
										
										
											2026-02-26 21:20:50 -08:00
+								            if not history or len(history) < 4:
 								                return
 								            from run_agent import AIAgent
-												fix: honor session-scoped gateway model overrides

											
										
										
											2026-04-11 04:24:45 -05:00
+								            model, runtime_kwargs = self._resolve_session_agent_runtime(
 								                session_key=session_key,
 								            )
-												refactor(cli): Finalize OpenAI Codex Integration with OAuth

- Enhanced Codex model discovery by fetching available models from the API, with fallback to local cache and defaults.
- Updated the context compressor's summary target tokens to 2500 for improved performance.
- Added external credential detection for Codex CLI to streamline authentication.
- Refactored various components to ensure consistent handling of authentication and model selection across the application.

											
										
										
											2026-02-28 21:47:51 -08:00
+								            if not runtime_kwargs.get("api_key"):
-												feat(session): implement session reset policy for messaging platforms

- Added configuration options for automatic session resets based on inactivity or daily boundaries in cli-config.yaml.
- Enhanced SessionResetPolicy class to support a "none" mode for no auto-resets.
- Implemented memory flushing before session resets in SessionStore to preserve important information.
- Updated setup wizard to guide users in configuring session reset preferences.

											
										
										
											2026-02-26 21:20:50 -08:00
+								                return
 								            tmp_agent = AIAgent(
-												refactor(cli): Finalize OpenAI Codex Integration with OAuth

- Enhanced Codex model discovery by fetching available models from the API, with fallback to local cache and defaults.
- Updated the context compressor's summary target tokens to 2500 for improved performance.
- Added external credential detection for Codex CLI to streamline authentication.
- Refactored various components to ensure consistent handling of authentication and model selection across the application.

											
										
										
											2026-02-28 21:47:51 -08:00
+								                **runtime_kwargs,
-												fix(gateway): pass model to temporary AIAgent instances

Memory flush, /compress, and session hygiene create AIAgent without
model=, falling back to the hardcoded default "anthropic/claude-opus-4.6".
This fails with a 400 error when the active provider is openai-codex
(Codex only accepts its own model names like gpt-5.1-codex-mini).

Add _resolve_gateway_model() that mirrors the env/config resolution
already used by _run_agent_sync, and wire it into all three temporary
agent creation sites.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 00:09:37 +01:00
+								                model=model,
-												feat(session): implement session reset policy for messaging platforms

- Added configuration options for automatic session resets based on inactivity or daily boundaries in cli-config.yaml.
- Enhanced SessionResetPolicy class to support a "none" mode for no auto-resets.
- Implemented memory flushing before session resets in SessionStore to preserve important information.
- Updated setup wizard to guide users in configuring session reset preferences.

											
										
										
											2026-02-26 21:20:50 -08:00
+								                max_iterations=8,
 								                quiet_mode=True,
-												feat(memory): pluggable memory provider interface with profile isolation, review fixes, and honcho CLI restoration (#4623)

* feat(memory): add pluggable memory provider interface with profile isolation

Introduces a pluggable MemoryProvider ABC so external memory backends can
integrate with Hermes without modifying core files. Each backend becomes a
plugin implementing a standard interface, orchestrated by MemoryManager.

Key architecture:
- agent/memory_provider.py — ABC with core + optional lifecycle hooks
- agent/memory_manager.py — single integration point in the agent loop
- agent/builtin_memory_provider.py — wraps existing MEMORY.md/USER.md

Profile isolation fixes applied to all 6 shipped plugins:
- Cognitive Memory: use get_hermes_home() instead of raw env var
- Hindsight Memory: check $HERMES_HOME/hindsight/config.json first,
  fall back to legacy ~/.hindsight/ for backward compat
- Hermes Memory Store: replace hardcoded ~/.hermes paths with
  get_hermes_home() for config loading and DB path defaults
- Mem0 Memory: use get_hermes_home() instead of raw env var
- RetainDB Memory: auto-derive profile-scoped project name from
  hermes_home path (hermes-<profile>), explicit env var overrides
- OpenViking Memory: read-only, no local state, isolation via .env

MemoryManager.initialize_all() now injects hermes_home into kwargs so
every provider can resolve profile-scoped storage without importing
get_hermes_home() themselves.

Plugin system: adds register_memory_provider() to PluginContext and
get_plugin_memory_providers() accessor.

Based on PR #3825. 46 tests (37 unit + 5 E2E + 4 plugin registration).

* refactor(memory): drop cognitive plugin, rewrite OpenViking as full provider

Remove cognitive-memory plugin (#727) — core mechanics are broken:
decay runs 24x too fast (hourly not daily), prefetch uses row ID as
timestamp, search limited by importance not similarity.

Rewrite openviking-memory plugin from a read-only search wrapper into
a full bidirectional memory provider using the complete OpenViking
session lifecycle API:

- sync_turn: records user/assistant messages to OpenViking session
  (threaded, non-blocking)
- on_session_end: commits session to trigger automatic memory extraction
  into 6 categories (profile, preferences, entities, events, cases,
  patterns)
- prefetch: background semantic search via find() endpoint
- on_memory_write: mirrors built-in memory writes to the session
- is_available: checks env var only, no network calls (ABC compliance)

Tools expanded from 3 to 5:
- viking_search: semantic search with mode/scope/limit
- viking_read: tiered content (abstract ~100tok / overview ~2k / full)
- viking_browse: filesystem-style navigation (list/tree/stat)
- viking_remember: explicit memory storage via session
- viking_add_resource: ingest URLs/docs into knowledge base

Uses direct HTTP via httpx (no openviking SDK dependency needed).
Response truncation on viking_read to prevent context flooding.

* fix(memory): harden Mem0 plugin — thread safety, non-blocking sync, circuit breaker

- Remove redundant mem0_context tool (identical to mem0_search with
  rerank=true, top_k=5 — wastes a tool slot and confuses the model)
- Thread sync_turn so it's non-blocking — Mem0's server-side LLM
  extraction can take 5-10s, was stalling the agent after every turn
- Add threading.Lock around _get_client() for thread-safe lazy init
  (prefetch and sync threads could race on first client creation)
- Add circuit breaker: after 5 consecutive API failures, pause calls
  for 120s instead of hammering a down server every turn. Auto-resets
  after cooldown. Logs a warning when tripped.
- Track success/failure in prefetch, sync_turn, and all tool calls
- Wait for previous sync to finish before starting a new one (prevents
  unbounded thread accumulation on rapid turns)
- Clean up shutdown to join both prefetch and sync threads

* fix(memory): enforce single external memory provider limit

MemoryManager now rejects a second non-builtin provider with a warning.
Built-in memory (MEMORY.md/USER.md) is always accepted. Only ONE
external plugin provider is allowed at a time. This prevents tool
schema bloat (some providers add 3-5 tools each) and conflicting
memory backends.

The warning message directs users to configure memory.provider in
config.yaml to select which provider to activate.

Updated all 47 tests to use builtin + one external pattern instead
of multiple externals. Added test_second_external_rejected to verify
the enforcement.

* feat(memory): add ByteRover memory provider plugin

Implements the ByteRover integration (from PR #3499 by hieuntg81) as a
MemoryProvider plugin instead of direct run_agent.py modifications.

ByteRover provides persistent memory via the brv CLI — a hierarchical
knowledge tree with tiered retrieval (fuzzy text then LLM-driven search).
Local-first with optional cloud sync.

Plugin capabilities:
- prefetch: background brv query for relevant context
- sync_turn: curate conversation turns (threaded, non-blocking)
- on_memory_write: mirror built-in memory writes to brv
- on_pre_compress: extract insights before context compression

Tools (3):
- brv_query: search the knowledge tree
- brv_curate: store facts/decisions/patterns
- brv_status: check CLI version and context tree state

Profile isolation: working directory at $HERMES_HOME/byterover/ (scoped
per profile). Binary resolution cached with thread-safe double-checked
locking. All write operations threaded to avoid blocking the agent
(curate can take 120s with LLM processing).

* fix(memory): thread remaining sync_turns, fix holographic, add config key

Plugin fixes:
- Hindsight: thread sync_turn (was blocking up to 30s via _run_in_thread)
- RetainDB: thread sync_turn (was blocking on HTTP POST)
- Both: shutdown now joins sync threads alongside prefetch threads

Holographic retrieval fixes:
- reason(): removed dead intersection_key computation (bundled but never
  used in scoring). Now reuses pre-computed entity_residuals directly,
  moved role_content encoding outside the inner loop.
- contradict(): added _MAX_CONTRADICT_FACTS=500 scaling guard. Above
  500 facts, only checks the most recently updated ones to avoid O(n^2)
  explosion (~125K comparisons at 500 is acceptable).

Config:
- Added memory.provider key to DEFAULT_CONFIG ("" = builtin only).
  No version bump needed (deep_merge handles new keys automatically).

* feat(memory): extract Honcho as a MemoryProvider plugin

Creates plugins/honcho-memory/ as a thin adapter over the existing
honcho_integration/ package. All 4 Honcho tools (profile, search,
context, conclude) move from the normal tool registry to the
MemoryProvider interface.

The plugin delegates all work to HonchoSessionManager — no Honcho
logic is reimplemented. It uses the existing config chain:
$HERMES_HOME/honcho.json -> ~/.honcho/config.json -> env vars.

Lifecycle hooks:
- initialize: creates HonchoSessionManager via existing client factory
- prefetch: background dialectic query
- sync_turn: records messages + flushes to API (threaded)
- on_memory_write: mirrors user profile writes as conclusions
- on_session_end: flushes all pending messages

This is a prerequisite for the MemoryManager wiring in run_agent.py.
Once wired, Honcho goes through the same provider interface as all
other memory plugins, and the scattered Honcho code in run_agent.py
can be consolidated into the single MemoryManager integration point.

* feat(memory): wire MemoryManager into run_agent.py

Adds 8 integration points for the external memory provider plugin,
all purely additive (zero existing code modified):

1. Init (~L1130): Create MemoryManager, find matching plugin provider
   from memory.provider config, initialize with session context
2. Tool injection (~L1160): Append provider tool schemas to self.tools
   and self.valid_tool_names after memory_manager init
3. System prompt (~L2705): Add external provider's system_prompt_block
   alongside existing MEMORY.md/USER.md blocks
4. Tool routing (~L5362): Route provider tool calls through
   memory_manager.handle_tool_call() before the catchall handler
5. Memory write bridge (~L5353): Notify external provider via
   on_memory_write() when the built-in memory tool writes
6. Pre-compress (~L5233): Call on_pre_compress() before context
   compression discards messages
7. Prefetch (~L6421): Inject provider prefetch results into the
   current-turn user message (same pattern as Honcho turn context)
8. Turn sync + session end (~L8161, ~L8172): sync_all() after each
   completed turn, queue_prefetch_all() for next turn, on_session_end()
   + shutdown_all() at conversation end

All hooks are wrapped in try/except — a failing provider never breaks
the agent. The existing memory system, Honcho integration, and all
other code paths are completely untouched.

Full suite: 7222 passed, 4 pre-existing failures.

* refactor(memory): remove legacy Honcho integration from core

Extracts all Honcho-specific code from run_agent.py, model_tools.py,
toolsets.py, and gateway/run.py. Honcho is now exclusively available
as a memory provider plugin (plugins/honcho-memory/).

Removed from run_agent.py (-457 lines):
- Honcho init block (session manager creation, activation, config)
- 8 Honcho methods: _honcho_should_activate, _strip_honcho_tools,
  _activate_honcho, _register_honcho_exit_hook, _queue_honcho_prefetch,
  _honcho_prefetch, _honcho_save_user_observation, _honcho_sync
- _inject_honcho_turn_context module-level function
- Honcho system prompt block (tool descriptions, CLI commands)
- Honcho context injection in api_messages building
- Honcho params from __init__ (honcho_session_key, honcho_manager,
  honcho_config)
- HONCHO_TOOL_NAMES constant
- All honcho-specific tool dispatch forwarding

Removed from other files:
- model_tools.py: honcho_tools import, honcho params from handle_function_call
- toolsets.py: honcho toolset definition, honcho tools from core tools list
- gateway/run.py: honcho params from AIAgent constructor calls

Removed tests (-339 lines):
- 9 Honcho-specific test methods from test_run_agent.py
- TestHonchoAtexitFlush class from test_exit_cleanup_interrupt.py

Restored two regex constants (_SURROGATE_RE, _BUDGET_WARNING_RE) that
were accidentally removed during the honcho function extraction.

The honcho_integration/ package is kept intact — the plugin delegates
to it. tools/honcho_tools.py registry entries are now dead code (import
commented out in model_tools.py) but the file is preserved for reference.

Full suite: 7207 passed, 4 pre-existing failures. Zero regressions.

* refactor(memory): restructure plugins, add CLI, clean gateway, migration notice

Plugin restructure:
- Move all memory plugins from plugins/<name>-memory/ to plugins/memory/<name>/
  (byterover, hindsight, holographic, honcho, mem0, openviking, retaindb)
- New plugins/memory/__init__.py discovery module that scans the directory
  directly, loading providers by name without the general plugin system
- run_agent.py uses load_memory_provider() instead of get_plugin_memory_providers()

CLI wiring:
- hermes memory setup — interactive curses picker + config wizard
- hermes memory status — show active provider, config, availability
- hermes memory off — disable external provider (built-in only)
- hermes honcho — now shows migration notice pointing to hermes memory setup

Gateway cleanup:
- Remove _get_or_create_gateway_honcho (already removed in prev commit)
- Remove _shutdown_gateway_honcho and _shutdown_all_gateway_honcho methods
- Remove all calls to shutdown methods (4 call sites)
- Remove _honcho_managers/_honcho_configs dict references

Dead code removal:
- Delete tools/honcho_tools.py (279 lines, import was already commented out)
- Delete tests/gateway/test_honcho_lifecycle.py (131 lines, tested removed methods)
- Remove if False placeholder from run_agent.py

Migration:
- Honcho migration notice on startup: detects existing honcho.json or
  ~/.honcho/config.json, prints guidance to run hermes memory setup.
  Only fires when memory.provider is not set and not in quiet mode.

Full suite: 7203 passed, 4 pre-existing failures. Zero regressions.

* feat(memory): standardize plugin config + add per-plugin documentation

Config architecture:
- Add save_config(values, hermes_home) to MemoryProvider ABC
- Honcho: writes to $HERMES_HOME/honcho.json (SDK native)
- Mem0: writes to $HERMES_HOME/mem0.json
- Hindsight: writes to $HERMES_HOME/hindsight/config.json
- Holographic: writes to config.yaml under plugins.hermes-memory-store
- OpenViking/RetainDB/ByteRover: env-var only (default no-op)

Setup wizard (hermes memory setup):
- Now calls provider.save_config() for non-secret config
- Secrets still go to .env via env vars
- Only memory.provider activation key goes to config.yaml

Documentation:
- README.md for each of the 7 providers in plugins/memory/<name>/
- Requirements, setup (wizard + manual), config reference, tools table
- Consistent format across all providers

The contract for new memory plugins:
- get_config_schema() declares all fields (REQUIRED)
- save_config() writes native config (REQUIRED if not env-var-only)
- Secrets use env_var field in schema, written to .env by wizard
- README.md in the plugin directory

* docs: add memory providers user guide + developer guide

New pages:
- user-guide/features/memory-providers.md — comprehensive guide covering
  all 7 shipped providers (Honcho, OpenViking, Mem0, Hindsight,
  Holographic, RetainDB, ByteRover). Each with setup, config, tools,
  cost, and unique features. Includes comparison table and profile
  isolation notes.
- developer-guide/memory-provider-plugin.md — how to build a new memory
  provider plugin. Covers ABC, required methods, config schema,
  save_config, threading contract, profile isolation, testing.

Updated pages:
- user-guide/features/memory.md — replaced Honcho section with link to
  new Memory Providers page
- user-guide/features/honcho.md — replaced with migration redirect to
  the new Memory Providers page
- sidebars.ts — added both new pages to navigation

* fix(memory): auto-migrate Honcho users to memory provider plugin

When honcho.json or ~/.honcho/config.json exists but memory.provider
is not set, automatically set memory.provider: honcho in config.yaml
and activate the plugin. The plugin reads the same config files, so
all data and credentials are preserved. Zero user action needed.

Persists the migration to config.yaml so it only fires once. Prints
a one-line confirmation in non-quiet mode.

* fix(memory): only auto-migrate Honcho when enabled + credentialed

Check HonchoClientConfig.enabled AND (api_key OR base_url) before
auto-migrating — not just file existence. Prevents false activation
for users who disabled Honcho, stopped using it (config lingers),
or have ~/.honcho/ from a different tool.

* feat(memory): auto-install pip dependencies during hermes memory setup

Reads pip_dependencies from plugin.yaml, checks which are missing,
installs them via pip before config walkthrough. Also shows install
guidance for external_dependencies (e.g. brv CLI for ByteRover).

Updated all 7 plugin.yaml files with pip_dependencies:
- honcho: honcho-ai
- mem0: mem0ai
- openviking: httpx
- hindsight: hindsight-client
- holographic: (none)
- retaindb: requests
- byterover: (external_dependencies for brv CLI)

* fix: remove remaining Honcho crash risks from cli.py and gateway

cli.py: removed Honcho session re-mapping block (would crash importing
deleted tools/honcho_tools.py), Honcho flush on compress, Honcho
session display on startup, Honcho shutdown on exit, honcho_session_key
AIAgent param.

gateway/run.py: removed honcho_session_key params from helper methods,
sync_honcho param, _honcho.shutdown() block.

tests: fixed test_cron_session_with_honcho_key_skipped (was passing
removed honcho_key param to _flush_memories_for_session).

* fix: include plugins/ in pyproject.toml package list

Without this, plugins/memory/ wouldn't be included in non-editable
installs. Hermes always runs from the repo checkout so this is belt-
and-suspenders, but prevents breakage if the install method changes.

* fix(memory): correct pip-to-import name mapping for dep checks

The heuristic dep.replace('-', '_') fails for packages where the pip
name differs from the import name: honcho-ai→honcho, mem0ai→mem0,
hindsight-client→hindsight_client. Added explicit mapping table so
hermes memory setup doesn't try to reinstall already-installed packages.

* chore: remove dead code from old plugin memory registration path

- hermes_cli/plugins.py: removed register_memory_provider(),
  _memory_providers list, get_plugin_memory_providers() — memory
  providers now use plugins/memory/ discovery, not the general plugin system
- hermes_cli/main.py: stripped 74 lines of dead honcho argparse
  subparsers (setup, status, sessions, map, peer, mode, tokens,
  identity, migrate) — kept only the migration redirect
- agent/memory_provider.py: updated docstring to reflect new
  registration path
- tests: replaced TestPluginMemoryProviderRegistration with
  TestPluginMemoryDiscovery that tests the actual plugins/memory/
  discovery system. Added 3 new tests (discover, load, nonexistent).

* chore: delete dead honcho_integration/cli.py and its tests

cli.py (794 lines) was the old 'hermes honcho' command handler — nobody
calls it since cmd_honcho was replaced with a migration redirect.

Deleted tests that imported from removed code:
- tests/honcho_integration/test_cli.py (tested _resolve_api_key)
- tests/honcho_integration/test_config_isolation.py (tested CLI config paths)
- tests/tools/test_honcho_tools.py (tested the deleted tools/honcho_tools.py)

Remaining honcho_integration/ files (actively used by the plugin):
- client.py (445 lines) — config loading, SDK client creation
- session.py (991 lines) — session management, queries, flush

* refactor: move honcho_integration/ into the honcho plugin

Moves client.py (445 lines) and session.py (991 lines) from the
top-level honcho_integration/ package into plugins/memory/honcho/.
No Honcho code remains in the main codebase.

- plugins/memory/honcho/client.py — config loading, SDK client creation
- plugins/memory/honcho/session.py — session management, queries, flush
- Updated all imports: run_agent.py (auto-migration), hermes_cli/doctor.py,
  plugin __init__.py, session.py cross-import, all tests
- Removed honcho_integration/ package and pyproject.toml entry
- Renamed tests/honcho_integration/ → tests/honcho_plugin/

* docs: update architecture + gateway-internals for memory provider system

- architecture.md: replaced honcho_integration/ with plugins/memory/
- gateway-internals.md: replaced Honcho-specific session routing and
  flush lifecycle docs with generic memory provider interface docs

* fix: update stale mock path for resolve_active_host after honcho plugin migration

* fix(memory): address review feedback — P0 lifecycle, ABC contract, honcho CLI restore

Review feedback from Honcho devs (erosika):

P0 — Provider lifecycle:
- Remove on_session_end() + shutdown_all() from run_conversation() tail
  (was killing providers after every turn in multi-turn sessions)
- Add shutdown_memory_provider() method on AIAgent for callers
- Wire shutdown into CLI atexit, reset_conversation, gateway stop/expiry

Bug fixes:
- Remove sync_honcho=False kwarg from /btw callsites (TypeError crash)
- Fix doctor.py references to dead 'hermes honcho setup' command
- Cache prefetch_all() before tool loop (was re-calling every iteration)

ABC contract hardening (all backwards-compatible):
- Add session_id kwarg to prefetch/sync_turn/queue_prefetch
- Make on_pre_compress() return str (provider insights in compression)
- Add **kwargs to on_turn_start() for runtime context
- Add on_delegation() hook for parent-side subagent observation
- Document agent_context/agent_identity/agent_workspace kwargs on
  initialize() (prevents cron corruption, enables profile scoping)
- Fix docstring: single external provider, not multiple

Honcho CLI restoration:
- Add plugins/memory/honcho/cli.py (from main's honcho_integration/cli.py
  with imports adapted to plugin path)
- Restore full hermes honcho command with all subcommands (status, peer,
  mode, tokens, identity, enable/disable, sync, peers, --target-profile)
- Restore auto-clone on profile creation + sync on hermes update
- hermes honcho setup now redirects to hermes memory setup

* fix(memory): wire on_delegation, skip_memory for cron/flush, fix ByteRover return type

- Wire on_delegation() in delegate_tool.py — parent's memory provider
  is notified with task+result after each subagent completes
- Add skip_memory=True to cron scheduler (prevents cron system prompts
  from corrupting user representations — closes #4052)
- Add skip_memory=True to gateway flush agent (throwaway agent shouldn't
  activate memory provider)
- Fix ByteRover on_pre_compress() return type: None -> str

* fix(honcho): port profile isolation fixes from PR #4632

Ports 5 bug fixes found during profile testing (erosika's PR #4632):

1. 3-tier config resolution — resolve_config_path() now checks
   $HERMES_HOME/honcho.json → ~/.hermes/honcho.json → ~/.honcho/config.json
   (non-default profiles couldn't find shared host blocks)

2. Thread host=_host_key() through from_global_config() in cmd_setup,
   cmd_status, cmd_identity (--target-profile was being ignored)

3. Use bare profile name as aiPeer (not host key with dots) — Honcho's
   peer ID pattern is ^[a-zA-Z0-9_-]+$, dots are invalid

4. Wrap add_peers() in try/except — was fatal on new AI peers, killed
   all message uploads for the session

5. Gate Honcho clone behind --clone/--clone-all on profile create
   (bare create should be blank-slate)

Also: sanitize assistant_peer_id via _sanitize_id()

* fix(tests): add module cleanup fixture to test_cli_provider_resolution

test_cli_provider_resolution._import_cli() wipes tools.*, cli, and
run_agent from sys.modules to force fresh imports, but had no cleanup.
This poisoned all subsequent tests on the same xdist worker — mocks
targeting tools.file_tools, tools.send_message_tool, etc. patched the
NEW module object while already-imported functions still referenced
the OLD one. Caused ~25 cascade failures: send_message KeyError,
process_registry FileNotFoundError, file_read_guards timeouts,
read_loop_detection file-not-found, mcp_oauth None port, and
provider_parity/codex_execution stale tool lists.

Fix: autouse fixture saves all affected modules before each test and
restores them after, matching the pattern in
test_managed_browserbase_and_modal.py.
											
										
										
											2026-04-02 15:33:51 -07:00
+								                skip_memory=True,  # Flush agent — no memory provider
-												feat(session): implement session reset policy for messaging platforms

- Added configuration options for automatic session resets based on inactivity or daily boundaries in cli-config.yaml.
- Enhanced SessionResetPolicy class to support a "none" mode for no auto-resets.
- Implemented memory flushing before session resets in SessionStore to preserve important information.
- Updated setup wizard to guide users in configuring session reset preferences.

											
										
										
											2026-02-26 21:20:50 -08:00
+								                enabled_toolsets=["memory", "skills"],
-												feat(gateway): proactive async memory flush on session expiry

Previously, when a session expired (idle/daily reset), the memory flush
ran synchronously inside get_or_create_session — blocking the user's
message for 10-60s while an LLM call saved memories.

Now a background watcher task (_session_expiry_watcher) runs every 5 min,
detects expired sessions, and flushes memories proactively in a thread
pool.  By the time the user sends their next message, memories are
already saved and the response is immediate.

Changes:
- Add _is_session_expired(entry) to SessionStore — works from entry
  alone without needing a SessionSource
- Add _pre_flushed_sessions set to track already-flushed sessions
- Remove sync _on_auto_reset callback from get_or_create_session
- Refactor flush into _flush_memories_for_session (sync worker) +
  _async_flush_memories (thread pool wrapper)
- Add _session_expiry_watcher background task, started in start()
- Simplify /reset command to use shared fire-and-forget flush
- Add 10 tests for expiry detection, callback removal, tracking

											
										
										
											2026-03-07 11:27:50 -08:00
+								                session_id=old_session_id,
-												feat(session): implement session reset policy for messaging platforms

- Added configuration options for automatic session resets based on inactivity or daily boundaries in cli-config.yaml.
- Enhanced SessionResetPolicy class to support a "none" mode for no auto-resets.
- Implemented memory flushing before session resets in SessionStore to preserve important information.
- Updated setup wizard to guide users in configuring session reset preferences.

											
										
										
											2026-02-26 21:20:50 -08:00
+								            )
-												fix(gateway): prevent stale memory overwrites by flush agent (#2670)

The gateway memory flush agent reviews old conversation history on session
reset/expiry and writes to memory. It had no awareness of memory changes
made after that conversation ended (by the live agent, cron jobs, or other
sessions), causing silent overwrites of newer entries.

Two fixes:

1. Skip memory flush entirely for cron sessions (session IDs starting with
   'cron_'). Cron sessions are headless with no meaningful user conversation
   to extract memories from.

2. Inject the current live memory state (MEMORY.md + USER.md) directly into
   the flush prompt. The flush agent can now see what's already saved and
   make informed decisions — only adding genuinely new information rather
   than blindly overwriting entries that may have been updated since the
   conversation ended.

Addresses the root cause identified in #2670: the flush agent was making
memory decisions blind to the current state of memory, causing stale
context to overwrite newer entries on gateway restarts and session resets.

Co-authored-by: devorun <devorun@users.noreply.github.com>
Co-authored-by: dlkakbs <dlkakbs@users.noreply.github.com>
											
										
										
											2026-03-23 16:08:38 -07:00
+								            try:
-												fix(gateway): close temporary agents after one-off tasks

Add shared _cleanup_agent_resources() for temporary gateway AIAgent
instances. Apply cleanup to memory flush, background tasks, /btw,
manual /compress, and session-hygiene auto-compression. Prevents
unclosed aiohttp client session leaks.

Cherry-picked from #10899 by @LeonSGP43. Consolidates #10945 by @Lubrsy706.
Fixes #10865.

Co-authored-by: Lubrsy706 <Lubrsy706@users.noreply.github.com>

											
										
										
											2026-04-16 18:41:31 +05:30
+								                # Fully silence the flush agent — quiet_mode only suppresses init
 								                # messages; tool call output still leaks to the terminal through
 								                # _safe_print → _print_fn.  Set a no-op to prevent that.
 								                tmp_agent._print_fn = lambda *a, **kw: None
 								                # Build conversation history from transcript
 								                msgs = [
 								                    {"role": m.get("role"), "content": m.get("content")}
 								                    for m in history
 								                    if m.get("role") in ("user", "assistant") and m.get("content")
 								                ]
 								                # Read live memory state from disk so the flush agent can see
 								                # what's already saved and avoid overwriting newer entries.
 								                _current_memory = ""
 								                try:
 								                    from tools.memory_tool import get_memory_dir
 								                    _mem_dir = get_memory_dir()
 								                    for fname, label in [
 								                        ("MEMORY.md", "MEMORY (your personal notes)"),
 								                        ("USER.md", "USER PROFILE (who the user is)"),
 								                    ]:
 								                        fpath = _mem_dir / fname
 								                        if fpath.exists():
 								                            content = fpath.read_text(encoding="utf-8").strip()
 								                            if content:
 								                                _current_memory += f"\n\n## Current {label}:\n{content}"
 								                except Exception:
 								                    pass  # Non-fatal — flush still works, just without the guard
 								                # Give the agent a real turn to think about what to save
 								                flush_prompt = (
 								                    "[System: This session is about to be automatically reset due to "
 								                    "inactivity or a scheduled daily reset. The conversation context "
 								                    "will be cleared after this turn.\n\n"
 								                    "Review the conversation above and:\n"
 								                    "1. Save any important facts, preferences, or decisions to memory "
 								                    "(user profile or your notes) that would be useful in future sessions.\n"
 								                    "2. If you discovered a reusable workflow or solved a non-trivial "
 								                    "problem, consider saving it as a skill.\n"
 								                    "3. If nothing is worth saving, that's fine — just skip.\n\n"
 								                )
 								                if _current_memory:
 								                    flush_prompt += (
 								                        "IMPORTANT — here is the current live state of memory. Other "
 								                        "sessions, cron jobs, or the user may have updated it since this "
 								                        "conversation ended. Do NOT overwrite or remove entries unless "
 								                        "the conversation above reveals something that genuinely "
 								                        "supersedes them. Only add new information that is not already "
 								                        "captured below."
 								                        f"{_current_memory}\n\n"
 								                    )
-												fix(gateway): prevent stale memory overwrites by flush agent (#2670)

The gateway memory flush agent reviews old conversation history on session
reset/expiry and writes to memory. It had no awareness of memory changes
made after that conversation ended (by the live agent, cron jobs, or other
sessions), causing silent overwrites of newer entries.

Two fixes:

1. Skip memory flush entirely for cron sessions (session IDs starting with
   'cron_'). Cron sessions are headless with no meaningful user conversation
   to extract memories from.

2. Inject the current live memory state (MEMORY.md + USER.md) directly into
   the flush prompt. The flush agent can now see what's already saved and
   make informed decisions — only adding genuinely new information rather
   than blindly overwriting entries that may have been updated since the
   conversation ended.

Addresses the root cause identified in #2670: the flush agent was making
memory decisions blind to the current state of memory, causing stale
context to overwrite newer entries on gateway restarts and session resets.

Co-authored-by: devorun <devorun@users.noreply.github.com>
Co-authored-by: dlkakbs <dlkakbs@users.noreply.github.com>
											
										
										
											2026-03-23 16:08:38 -07:00
 								                flush_prompt += (
-												fix(gateway): close temporary agents after one-off tasks

Add shared _cleanup_agent_resources() for temporary gateway AIAgent
instances. Apply cleanup to memory flush, background tasks, /btw,
manual /compress, and session-hygiene auto-compression. Prevents
unclosed aiohttp client session leaks.

Cherry-picked from #10899 by @LeonSGP43. Consolidates #10945 by @Lubrsy706.
Fixes #10865.

Co-authored-by: Lubrsy706 <Lubrsy706@users.noreply.github.com>

											
										
										
											2026-04-16 18:41:31 +05:30
+								                    "Do NOT respond to the user. Just use the memory and skill_manage "
 								                    "tools if needed, then stop.]"
-												fix(gateway): prevent stale memory overwrites by flush agent (#2670)

The gateway memory flush agent reviews old conversation history on session
reset/expiry and writes to memory. It had no awareness of memory changes
made after that conversation ended (by the live agent, cron jobs, or other
sessions), causing silent overwrites of newer entries.

Two fixes:

1. Skip memory flush entirely for cron sessions (session IDs starting with
   'cron_'). Cron sessions are headless with no meaningful user conversation
   to extract memories from.

2. Inject the current live memory state (MEMORY.md + USER.md) directly into
   the flush prompt. The flush agent can now see what's already saved and
   make informed decisions — only adding genuinely new information rather
   than blindly overwriting entries that may have been updated since the
   conversation ended.

Addresses the root cause identified in #2670: the flush agent was making
memory decisions blind to the current state of memory, causing stale
context to overwrite newer entries on gateway restarts and session resets.

Co-authored-by: devorun <devorun@users.noreply.github.com>
Co-authored-by: dlkakbs <dlkakbs@users.noreply.github.com>
											
										
										
											2026-03-23 16:08:38 -07:00
+								                )
-												fix(gateway): close temporary agents after one-off tasks

Add shared _cleanup_agent_resources() for temporary gateway AIAgent
instances. Apply cleanup to memory flush, background tasks, /btw,
manual /compress, and session-hygiene auto-compression. Prevents
unclosed aiohttp client session leaks.

Cherry-picked from #10899 by @LeonSGP43. Consolidates #10945 by @Lubrsy706.
Fixes #10865.

Co-authored-by: Lubrsy706 <Lubrsy706@users.noreply.github.com>

											
										
										
											2026-04-16 18:41:31 +05:30
+								                tmp_agent.run_conversation(
 								                    user_message=flush_prompt,
 								                    conversation_history=msgs,
 								                )
 								            finally:
 								                self._cleanup_agent_resources(tmp_agent)
-												feat(gateway): proactive async memory flush on session expiry

Previously, when a session expired (idle/daily reset), the memory flush
ran synchronously inside get_or_create_session — blocking the user's
message for 10-60s while an LLM call saved memories.

Now a background watcher task (_session_expiry_watcher) runs every 5 min,
detects expired sessions, and flushes memories proactively in a thread
pool.  By the time the user sends their next message, memories are
already saved and the response is immediate.

Changes:
- Add _is_session_expired(entry) to SessionStore — works from entry
  alone without needing a SessionSource
- Add _pre_flushed_sessions set to track already-flushed sessions
- Remove sync _on_auto_reset callback from get_or_create_session
- Refactor flush into _flush_memories_for_session (sync worker) +
  _async_flush_memories (thread pool wrapper)
- Add _session_expiry_watcher background task, started in start()
- Simplify /reset command to use shared fire-and-forget flush
- Add 10 tests for expiry detection, callback removal, tracking

											
										
										
											2026-03-07 11:27:50 -08:00
+								            logger.info("Pre-reset memory flush completed for session %s", old_session_id)
-												feat(session): implement session reset policy for messaging platforms

- Added configuration options for automatic session resets based on inactivity or daily boundaries in cli-config.yaml.
- Enhanced SessionResetPolicy class to support a "none" mode for no auto-resets.
- Implemented memory flushing before session resets in SessionStore to preserve important information.
- Updated setup wizard to guide users in configuring session reset preferences.

											
										
										
											2026-02-26 21:20:50 -08:00
+								        except Exception as e:
-												feat(gateway): proactive async memory flush on session expiry

Previously, when a session expired (idle/daily reset), the memory flush
ran synchronously inside get_or_create_session — blocking the user's
message for 10-60s while an LLM call saved memories.

Now a background watcher task (_session_expiry_watcher) runs every 5 min,
detects expired sessions, and flushes memories proactively in a thread
pool.  By the time the user sends their next message, memories are
already saved and the response is immediate.

Changes:
- Add _is_session_expired(entry) to SessionStore — works from entry
  alone without needing a SessionSource
- Add _pre_flushed_sessions set to track already-flushed sessions
- Remove sync _on_auto_reset callback from get_or_create_session
- Refactor flush into _flush_memories_for_session (sync worker) +
  _async_flush_memories (thread pool wrapper)
- Add _session_expiry_watcher background task, started in start()
- Simplify /reset command to use shared fire-and-forget flush
- Add 10 tests for expiry detection, callback removal, tracking

											
										
										
											2026-03-07 11:27:50 -08:00
+								            logger.debug("Pre-reset memory flush failed for session %s: %s", old_session_id, e)
-												fix(honcho): isolate session routing for multi-user gateway (#1500)

Salvaged from PR #1470 by adavyas.

Core fix: Honcho tool calls in a multi-session gateway could route to
the wrong session because honcho_tools.py relied on process-global
state. Now threads session context through the call chain:
  AIAgent._invoke_tool() → handle_function_call() → registry.dispatch()
  → handler **kw → _resolve_session_context()

Changes:
- Add _resolve_session_context() to prefer per-call context over globals
- Plumb honcho_manager + honcho_session_key through handle_function_call
- Add sync_honcho=False to run_conversation() for synthetic flush turns
- Pass honcho_session_key through gateway memory flush lifecycle
- Harden gateway PID detection when /proc cmdline is unreadable
- Make interrupt test scripts import-safe for pytest-xdist
- Wrap BibTeX examples in Jekyll raw blocks for docs build
- Fix thread-order-dependent assertion in client lifecycle test
- Expand Honcho docs: session isolation, lifecycle, routing internals

Dropped from original PR:
- Indentation change in _create_request_openai_client that would move
  client creation inside the lock (causes unnecessary contention)

Co-authored-by: adavyas <adavyas@users.noreply.github.com>
											
										
										
											2026-03-16 00:23:47 -07:00
+								    async def _async_flush_memories(
 								        self,
 								        old_session_id: str,
-												fix: honor session-scoped gateway model overrides

											
										
										
											2026-04-11 04:24:45 -05:00
+								        session_key: Optional[str] = None,
-												fix(honcho): isolate session routing for multi-user gateway (#1500)

Salvaged from PR #1470 by adavyas.

Core fix: Honcho tool calls in a multi-session gateway could route to
the wrong session because honcho_tools.py relied on process-global
state. Now threads session context through the call chain:
  AIAgent._invoke_tool() → handle_function_call() → registry.dispatch()
  → handler **kw → _resolve_session_context()

Changes:
- Add _resolve_session_context() to prefer per-call context over globals
- Plumb honcho_manager + honcho_session_key through handle_function_call
- Add sync_honcho=False to run_conversation() for synthetic flush turns
- Pass honcho_session_key through gateway memory flush lifecycle
- Harden gateway PID detection when /proc cmdline is unreadable
- Make interrupt test scripts import-safe for pytest-xdist
- Wrap BibTeX examples in Jekyll raw blocks for docs build
- Fix thread-order-dependent assertion in client lifecycle test
- Expand Honcho docs: session isolation, lifecycle, routing internals

Dropped from original PR:
- Indentation change in _create_request_openai_client that would move
  client creation inside the lock (causes unnecessary contention)

Co-authored-by: adavyas <adavyas@users.noreply.github.com>
											
										
										
											2026-03-16 00:23:47 -07:00
+								    ):
-												feat(gateway): proactive async memory flush on session expiry

Previously, when a session expired (idle/daily reset), the memory flush
ran synchronously inside get_or_create_session — blocking the user's
message for 10-60s while an LLM call saved memories.

Now a background watcher task (_session_expiry_watcher) runs every 5 min,
detects expired sessions, and flushes memories proactively in a thread
pool.  By the time the user sends their next message, memories are
already saved and the response is immediate.

Changes:
- Add _is_session_expired(entry) to SessionStore — works from entry
  alone without needing a SessionSource
- Add _pre_flushed_sessions set to track already-flushed sessions
- Remove sync _on_auto_reset callback from get_or_create_session
- Refactor flush into _flush_memories_for_session (sync worker) +
  _async_flush_memories (thread pool wrapper)
- Add _session_expiry_watcher background task, started in start()
- Simplify /reset command to use shared fire-and-forget flush
- Add 10 tests for expiry detection, callback removal, tracking

											
										
										
											2026-03-07 11:27:50 -08:00
+								        """Run the sync memory flush in a thread pool so it won't block the event loop."""
-												chore(gateway): replace deprecated asyncio.get_event_loop() with get_running_loop() (#11005)

All 10 call sites in gateway/run.py and gateway/platforms/api_server.py
are inside async functions where a loop is guaranteed to be running.

get_event_loop() is deprecated since Python 3.10 — it can silently
create a new loop when none is running, masking bugs.
get_running_loop() raises RuntimeError instead, which is safer.

Surfaced during review of PRs #10533 and #10647.

Co-authored-by: kshitijk4poor <kshitijk4poor@users.noreply.github.com>
											
										
										
											2026-04-16 05:13:39 -07:00
+								        loop = asyncio.get_running_loop()
-												fix(honcho): isolate session routing for multi-user gateway (#1500)

Salvaged from PR #1470 by adavyas.

Core fix: Honcho tool calls in a multi-session gateway could route to
the wrong session because honcho_tools.py relied on process-global
state. Now threads session context through the call chain:
  AIAgent._invoke_tool() → handle_function_call() → registry.dispatch()
  → handler **kw → _resolve_session_context()

Changes:
- Add _resolve_session_context() to prefer per-call context over globals
- Plumb honcho_manager + honcho_session_key through handle_function_call
- Add sync_honcho=False to run_conversation() for synthetic flush turns
- Pass honcho_session_key through gateway memory flush lifecycle
- Harden gateway PID detection when /proc cmdline is unreadable
- Make interrupt test scripts import-safe for pytest-xdist
- Wrap BibTeX examples in Jekyll raw blocks for docs build
- Fix thread-order-dependent assertion in client lifecycle test
- Expand Honcho docs: session isolation, lifecycle, routing internals

Dropped from original PR:
- Indentation change in _create_request_openai_client that would move
  client creation inside the lock (causes unnecessary contention)

Co-authored-by: adavyas <adavyas@users.noreply.github.com>
											
										
										
											2026-03-16 00:23:47 -07:00
+								        await loop.run_in_executor(
 								            None,
 								            self._flush_memories_for_session,
 								            old_session_id,
-												fix: honor session-scoped gateway model overrides

											
										
										
											2026-04-11 04:24:45 -05:00
+								            session_key,
-												fix(honcho): isolate session routing for multi-user gateway (#1500)

Salvaged from PR #1470 by adavyas.

Core fix: Honcho tool calls in a multi-session gateway could route to
the wrong session because honcho_tools.py relied on process-global
state. Now threads session context through the call chain:
  AIAgent._invoke_tool() → handle_function_call() → registry.dispatch()
  → handler **kw → _resolve_session_context()

Changes:
- Add _resolve_session_context() to prefer per-call context over globals
- Plumb honcho_manager + honcho_session_key through handle_function_call
- Add sync_honcho=False to run_conversation() for synthetic flush turns
- Pass honcho_session_key through gateway memory flush lifecycle
- Harden gateway PID detection when /proc cmdline is unreadable
- Make interrupt test scripts import-safe for pytest-xdist
- Wrap BibTeX examples in Jekyll raw blocks for docs build
- Fix thread-order-dependent assertion in client lifecycle test
- Expand Honcho docs: session isolation, lifecycle, routing internals

Dropped from original PR:
- Indentation change in _create_request_openai_client that would move
  client creation inside the lock (causes unnecessary contention)

Co-authored-by: adavyas <adavyas@users.noreply.github.com>
											
										
										
											2026-03-16 00:23:47 -07:00
+								        )
-												fix: preserve current gateway update and startup behavior

Follow up on salvaged PR #1052.
Restore current-main gateway lifecycle handling after conflict resolution and
adapt the update fallback to use shell-quoted argv parts safely.

											
										
										
											2026-03-14 18:03:50 -07:00
 								    @property
 								    def should_exit_cleanly(self) -> bool:
 								        return self._exit_cleanly
-												fix(gateway): restart on whatsapp bridge child exit (#2334)

Co-authored-by: Frederico Ribeiro <fr@tecompanytea.com>
											
										
										
											2026-03-21 09:38:52 -07:00
+								    @property
 								    def should_exit_with_failure(self) -> bool:
 								        return self._exit_with_failure
-												fix: preserve current gateway update and startup behavior

Follow up on salvaged PR #1052.
Restore current-main gateway lifecycle handling after conflict resolution and
adapt the update fallback to use shell-quoted argv parts safely.

											
										
										
											2026-03-14 18:03:50 -07:00
+								    @property
 								    def exit_reason(self) -> Optional[str]:
 								        return self._exit_reason
-												fix(gateway): drain in-flight work before restart

											
										
										
											2026-04-10 10:19:17 -07:00
+								    @property
 								    def exit_code(self) -> Optional[int]:
 								        return self._exit_code
-												fix(gateway): make group session isolation configurable

default group and channel sessions to per-user isolation, allow opting back into shared room sessions via config.yaml, and document Discord gateway routing and session behavior.

											
										
										
											2026-03-16 00:22:23 -07:00
+								    def _session_key_for_source(self, source: SessionSource) -> str:
 								        """Resolve the current session key for a source, honoring gateway config when available."""
 								        if hasattr(self, "session_store") and self.session_store is not None:
 								            try:
 								                session_key = self.session_store._generate_session_key(source)
 								                if isinstance(session_key, str) and session_key:
 								                    return session_key
 								            except Exception:
 								                pass
 								        config = getattr(self, "config", None)
 								        return build_session_key(
 								            source,
 								            group_sessions_per_user=getattr(config, "group_sessions_per_user", True),
-												feat: shared thread sessions by default — multi-user thread support (#5391)

Threads (Telegram forum topics, Discord threads, Slack threads) now default
to shared sessions where all participants see the same conversation. This is
the expected UX for threaded conversations where multiple users @mention the
bot and interact collaboratively.

Changes:
- build_session_key(): when thread_id is present, user_id is no longer
  appended to the session key (threads are shared by default)
- New config: thread_sessions_per_user (default: false) — opt-in to restore
  per-user isolation in threads if needed
- Sender attribution: messages in shared threads are prefixed with
  [sender name] so the agent can tell participants apart
- System prompt: shared threads show 'Multi-user thread' note instead of
  a per-turn User line (avoids busting prompt cache)
- Wired through all callers: gateway/run.py, base.py, telegram.py, feishu.py
- Regular group messages (no thread) remain per-user isolated (unchanged)
- DM threads are unaffected (they have their own keying logic)

Closes community request from demontut_ re: thread-based shared sessions.
											
										
										
											2026-04-05 19:46:58 -07:00
+								            thread_sessions_per_user=getattr(config, "thread_sessions_per_user", False),
-												fix(gateway): make group session isolation configurable

default group and channel sessions to per-user isolation, allow opting back into shared room sessions via config.yaml, and document Discord gateway routing and session behavior.

											
										
										
											2026-03-16 00:22:23 -07:00
+								        )
-												fix: honor session-scoped gateway model overrides

											
										
										
											2026-04-11 04:24:45 -05:00
+								    def _resolve_session_agent_runtime(
 								        self,
 								        *,
 								        source: Optional[SessionSource] = None,
 								        session_key: Optional[str] = None,
 								        user_config: Optional[dict] = None,
 								    ) -> tuple[str, dict]:
 								        """Resolve model/runtime for a session, honoring session-scoped /model overrides.
 								        If the session override already contains a complete provider bundle
 								        (provider/api_key/base_url/api_mode), prefer it directly instead of
 								        resolving fresh global runtime state first.
 								        """
 								        resolved_session_key = session_key
 								        if not resolved_session_key and source is not None:
 								            try:
 								                resolved_session_key = self._session_key_for_source(source)
 								            except Exception:
 								                resolved_session_key = None
 								        model = _resolve_gateway_model(user_config)
 								        override = self._session_model_overrides.get(resolved_session_key) if resolved_session_key else None
 								        if override:
 								            override_model = override.get("model", model)
 								            override_runtime = {
 								                "provider": override.get("provider"),
 								                "api_key": override.get("api_key"),
 								                "base_url": override.get("base_url"),
 								                "api_mode": override.get("api_mode"),
 								            }
 								            if override_runtime.get("api_key"):
-												fix(gateway): evict cached agent on /model switch + add diagnostic logging (#8276)

After /model switches the model (both picker and text paths), the cached
agent's config signature becomes stale — the agent was updated in-place
via switch_model() but the cache tuple's signature was never refreshed.
The next turn *should* detect the signature mismatch and create a fresh
agent, but this relies on the new model's signature differing from the
old one in _agent_config_signature().

Evicting the cached agent explicitly after storing the session override
is more defensive — the next turn is guaranteed to create a fresh agent
from the override without depending on signature mismatch detection.

Also adds debug logging at three key decision points so we can trace
exactly what happens when /model + /retry interact:
- _resolve_session_agent_runtime: which override path is taken (fast
  with api_key vs fallback), or why no override was found
- _run_agent.run_sync: final resolved model/provider before agent
  creation

Reported: /model switch to xiaomi/mimo-v2-pro followed by /retry still
used the old model (glm-5.1).
											
										
										
											2026-04-12 01:58:17 -07:00
+								                logger.debug(
 								                    "Session model override (fast): session=%s config_model=%s -> override_model=%s provider=%s",
 								                    (resolved_session_key or "")[:30], model, override_model,
 								                    override_runtime.get("provider"),
 								                )
-												fix: honor session-scoped gateway model overrides

											
										
										
											2026-04-11 04:24:45 -05:00
+								                return override_model, override_runtime
-												fix(gateway): evict cached agent on /model switch + add diagnostic logging (#8276)

After /model switches the model (both picker and text paths), the cached
agent's config signature becomes stale — the agent was updated in-place
via switch_model() but the cache tuple's signature was never refreshed.
The next turn *should* detect the signature mismatch and create a fresh
agent, but this relies on the new model's signature differing from the
old one in _agent_config_signature().

Evicting the cached agent explicitly after storing the session override
is more defensive — the next turn is guaranteed to create a fresh agent
from the override without depending on signature mismatch detection.

Also adds debug logging at three key decision points so we can trace
exactly what happens when /model + /retry interact:
- _resolve_session_agent_runtime: which override path is taken (fast
  with api_key vs fallback), or why no override was found
- _run_agent.run_sync: final resolved model/provider before agent
  creation

Reported: /model switch to xiaomi/mimo-v2-pro followed by /retry still
used the old model (glm-5.1).
											
										
										
											2026-04-12 01:58:17 -07:00
+								            # Override exists but has no api_key — fall through to env-based
 								            # resolution and apply model/provider from the override on top.
 								            logger.debug(
 								                "Session model override (no api_key, fallback): session=%s config_model=%s override_model=%s",
 								                (resolved_session_key or "")[:30], model, override_model,
 								            )
 								        else:
 								            logger.debug(
 								                "No session model override: session=%s config_model=%s override_keys=%s",
 								                (resolved_session_key or "")[:30], model,
 								                list(self._session_model_overrides.keys())[:5] if self._session_model_overrides else "[]",
 								            )
-												fix: honor session-scoped gateway model overrides

											
										
										
											2026-04-11 04:24:45 -05:00
 								        runtime_kwargs = _resolve_runtime_agent_kwargs()
 								        if override and resolved_session_key:
 								            model, runtime_kwargs = self._apply_session_model_override(
 								                resolved_session_key, model, runtime_kwargs
 								            )
-												fix: fall back to provider's default model when model config is empty (#8303)

When a user configures a provider (e.g. `hermes auth add openai-codex`)
but never selects a model via `hermes model`, the gateway and CLI would
pass an empty model string to the API, causing:
  'Codex Responses request model must be a non-empty string'

Now both gateway (_resolve_session_agent_runtime) and CLI
(_ensure_runtime_credentials) detect an empty model and fill it from
the provider's first catalog entry in _PROVIDER_MODELS. This covers
all providers that have a static model list (openai-codex, anthropic,
gemini, copilot, etc.).

The fix is conservative: it only triggers when model is truly empty
and a known provider was resolved. Explicit model choices are never
overridden.
											
										
										
											2026-04-12 03:53:30 -07:00
 								        # When the config has no model.default but a provider was resolved
 								        # (e.g. user ran `hermes auth add openai-codex` without `hermes model`),
 								        # fall back to the provider's first catalog model so the API call
 								        # doesn't fail with "model must be a non-empty string".
 								        if not model and runtime_kwargs.get("provider"):
 								            try:
 								                from hermes_cli.models import get_default_model_for_provider
 								                model = get_default_model_for_provider(runtime_kwargs["provider"])
 								                if model:
 								                    logger.info(
 								                        "No model configured — defaulting to %s for provider %s",
 								                        model, runtime_kwargs["provider"],
 								                    )
 								            except Exception:
 								                pass
-												fix: honor session-scoped gateway model overrides

											
										
										
											2026-04-11 04:24:45 -05:00
+								        return model, runtime_kwargs
-												fix: hermes update causes dual gateways on macOS (launchd) (#1567)

* feat: add optional smart model routing

Add a conservative cheap-vs-strong routing option that can send very short/simple turns to a cheaper model across providers while keeping the primary model for complex work. Wire it through CLI, gateway, and cron, and document the config.yaml workflow.

* fix(gateway): remove recursive ExecStop from systemd units, extend TimeoutStopSec to 60s

* fix(gateway): avoid recursive ExecStop in user systemd unit

* fix: extend ExecStop removal and TimeoutStopSec=60 to system unit

The cherry-picked PR #1448 fix only covered the user systemd unit.
The system unit had the same TimeoutStopSec=15 and could benefit
from the same 60s timeout for clean shutdown. Also adds a regression
test for the system unit.

---------

Co-authored-by: Ninja <ninja@local>

* feat(skills): add blender-mcp optional skill for 3D modeling

Control a running Blender instance from Hermes via socket connection
to the blender-mcp addon (port 9876). Supports creating 3D objects,
materials, animations, and running arbitrary bpy code.

Placed in optional-skills/ since it requires Blender 4.3+ desktop
with a third-party addon manually started each session.

* feat(acp): support slash commands in ACP adapter (#1532)

Adds /help, /model, /tools, /context, /reset, /compact, /version
to the ACP adapter (VS Code, Zed, JetBrains). Commands are handled
directly in the server without instantiating the TUI — each command
queries agent/session state and returns plain text.

Unrecognized /commands fall through to the LLM as normal messages.

/model uses detect_provider_for_model() for auto-detection when
switching models, matching the CLI and gateway behavior.

Fixes #1402

* fix(logging): improve error logging in session search tool (#1533)

* fix(gateway): restart on retryable startup failures (#1517)

* feat(email): add skip_attachments option via config.yaml

* feat(email): add skip_attachments option via config.yaml

Adds a config.yaml-driven option to skip email attachments in the
gateway email adapter. Useful for malware protection and bandwidth
savings.

Configure in config.yaml:
  platforms:
    email:
      skip_attachments: true

Based on PR #1521 by @an420eth, changed from env var to config.yaml
(via PlatformConfig.extra) to match the project's config-first pattern.

* docs: document skip_attachments option for email adapter

* fix(telegram): retry on transient TLS failures during connect and send

Add exponential-backoff retry (3 attempts) around initialize() to
handle transient TLS resets during gateway startup. Also catches
TimedOut and OSError in addition to NetworkError.

Add exponential-backoff retry (3 attempts) around send_message() for
NetworkError during message delivery, wrapping the existing Markdown
fallback logic.

Both imports are guarded with try/except ImportError for test
environments where telegram is mocked.

Based on PR #1527 by cmd8. Closes #1526.

* feat: permissive block_anchor thresholds and unicode normalization (#1539)

Salvaged from PR #1528 by an420eth. Closes #517.

Improves _strategy_block_anchor in fuzzy_match.py:
- Add unicode normalization (smart quotes, em/en-dashes, ellipsis,
  non-breaking spaces → ASCII) so LLM-produced unicode artifacts
  don't break anchor line matching
- Lower thresholds: 0.10 for unique matches (was 0.70), 0.30 for
  multiple candidates — if first/last lines match exactly, the
  block is almost certainly correct
- Use original (non-normalized) content for offset calculation to
  preserve correct character positions

Tested: 3 new scenarios fixed (em-dash anchors, non-breaking space
anchors, very-low-similarity unique matches), zero regressions on
all 9 existing fuzzy match tests.

Co-authored-by: an420eth <an420eth@users.noreply.github.com>

* feat(cli): add file path autocomplete in the input prompt (#1545)

When typing a path-like token (./  ../  ~/  /  or containing /),
the CLI now shows filesystem completions in the dropdown menu.
Directories show a trailing slash and 'dir' label; files show
their size. Completions are case-insensitive and capped at 30
entries.

Triggered by tokens like:
  edit ./src/ma     → shows ./src/main.py, ./src/manifest.json, ...
  check ~/doc       → shows ~/docs/, ~/documents/, ...
  read /etc/hos     → shows /etc/hosts, /etc/hostname, ...
  open tools/reg    → shows tools/registry.py

Slash command autocomplete (/help, /model, etc.) is unaffected —
it still triggers when the input starts with /.

Inspired by OpenCode PR #145 (file path completion menu).

Implementation:
- hermes_cli/commands.py: _extract_path_word() detects path-like
  tokens, _path_completions() yields filesystem Completions with
  size labels, get_completions() routes to paths vs slash commands
- tests/hermes_cli/test_path_completion.py: 26 tests covering
  path extraction, prefix filtering, directory markers, home
  expansion, case-insensitivity, integration with slash commands

* feat(privacy): redact PII from LLM context when privacy.redact_pii is enabled

Add privacy.redact_pii config option (boolean, default false). When
enabled, the gateway redacts personally identifiable information from
the system prompt before sending it to the LLM provider:

- Phone numbers (user IDs on WhatsApp/Signal) → hashed to user_<sha256>
- User IDs → hashed to user_<sha256>
- Chat IDs → numeric portion hashed, platform prefix preserved
- Home channel IDs → hashed
- Names/usernames → NOT affected (user-chosen, publicly visible)

Hashes are deterministic (same user → same hash) so the model can
still distinguish users in group chats. Routing and delivery use
the original values internally — redaction only affects LLM context.

Inspired by OpenClaw PR #47959.

* fix(privacy): skip PII redaction on Discord/Slack (mentions need real IDs)

Discord uses <@user_id> for mentions and Slack uses <@U12345> — the LLM
needs the real ID to tag users. Redaction now only applies to WhatsApp,
Signal, and Telegram where IDs are pure routing metadata.

Add 4 platform-specific tests covering Discord, WhatsApp, Signal, Slack.

* feat: smart approvals + /stop command (inspired by OpenAI Codex)

* feat: smart approvals — LLM-based risk assessment for dangerous commands

Adds a 'smart' approval mode that uses the auxiliary LLM to assess
whether a flagged command is genuinely dangerous or a false positive,
auto-approving low-risk commands without prompting the user.

Inspired by OpenAI Codex's Smart Approvals guardian subagent
(openai/codex#13860).

Config (config.yaml):
  approvals:
    mode: manual   # manual (default), smart, off

Modes:
- manual — current behavior, always prompt the user
- smart  — aux LLM evaluates risk: APPROVE (auto-allow), DENY (block),
           or ESCALATE (fall through to manual prompt)
- off    — skip all approval prompts (equivalent to --yolo)

When smart mode auto-approves, the pattern gets session-level approval
so subsequent uses of the same pattern don't trigger another LLM call.
When it denies, the command is blocked without user prompt. When
uncertain, it escalates to the normal manual approval flow.

The LLM prompt is carefully scoped: it sees only the command text and
the flagged reason, assesses actual risk vs false positive, and returns
a single-word verdict.

* feat: make smart approval model configurable via config.yaml

Adds auxiliary.approval section to config.yaml with the same
provider/model/base_url/api_key pattern as other aux tasks (vision,
web_extract, compression, etc.).

Config:
  auxiliary:
    approval:
      provider: auto
      model: ''        # fast/cheap model recommended
      base_url: ''
      api_key: ''

Bridged to env vars in both CLI and gateway paths so the aux client
picks them up automatically.

* feat: add /stop command to kill all background processes

Adds a /stop slash command that kills all running background processes
at once. Currently users have to process(list) then process(kill) for
each one individually.

Inspired by OpenAI Codex's separation of interrupt (Ctrl+C stops current
turn) from /stop (cleans up background processes). See openai/codex#14602.

Ctrl+C continues to only interrupt the active agent turn — background
dev servers, watchers, etc. are preserved. /stop is the explicit way
to clean them all up.

* feat: first-class plugin architecture + hide status bar cost by default (#1544)

The persistent status bar now shows context %, token counts, and
duration but NOT $ cost by default. Cost display is opt-in via:

  display:
    show_cost: true

in config.yaml, or: hermes config set display.show_cost true

The /usage command still shows full cost breakdown since the user
explicitly asked for it — this only affects the always-visible bar.

Status bar without cost:
  ⚕ claude-sonnet-4 │ 12K/200K │ 6% │ 15m

Status bar with show_cost: true:
  ⚕ claude-sonnet-4 │ 12K/200K │ 6% │ $0.06 │ 15m

* feat: improve memory prioritization + aggressive skill updates (inspired by OpenAI Codex)

* feat: improve memory prioritization — user preferences over procedural knowledge

Inspired by OpenAI Codex's memory prompt improvements (openai/codex#14493)
which focus memory writes on user preferences and recurring patterns
rather than procedural task details.

Key insight: 'Optimize for reducing future user steering — the most
valuable memory prevents the user from having to repeat themselves.'

Changes:
- MEMORY_GUIDANCE (prompt_builder.py): added prioritization hierarchy
  and the core principle about reducing user steering
- MEMORY_SCHEMA (memory_tool.py): reordered WHEN TO SAVE list to put
  corrections first, added explicit PRIORITY guidance
- Memory nudge (run_agent.py): now asks specifically about preferences,
  corrections, and workflow patterns instead of generic 'anything'
- Memory flush (run_agent.py): now instructs to prioritize user
  preferences and corrections over task-specific details

* feat: more aggressive skill creation and update prompting

Press harder on skill updates — the agent should proactively patch
skills when it encounters issues during use, not wait to be asked.

Changes:
- SKILLS_GUIDANCE: 'consider saving' → 'save'; added explicit instruction
  to patch skills immediately when found outdated/wrong
- Skills header: added instruction to update loaded skills before finishing
  if they had missing steps or wrong commands
- Skill nudge: more assertive ('save the approach' not 'consider saving'),
  now also prompts for updating existing skills used in the task
- Skill nudge interval: lowered default from 15 to 10 iterations
- skill_manage schema: added 'patch it immediately' to update triggers

* feat: first-class plugin architecture (#1555)

Plugin system for extending Hermes with custom tools, hooks, and
integrations — no source code changes required.

Core system (hermes_cli/plugins.py):
  - Plugin discovery from ~/.hermes/plugins/, .hermes/plugins/, and
    pip entry_points (hermes_agent.plugins group)
  - PluginContext with register_tool() and register_hook()
  - 6 lifecycle hooks: pre/post tool_call, pre/post llm_call,
    on_session_start/end
  - Namespace package handling for relative imports in plugins
  - Graceful error isolation — broken plugins never crash the agent

Integration (model_tools.py):
  - Plugin discovery runs after built-in + MCP tools
  - Plugin tools bypass toolset filter via get_plugin_tool_names()
  - Pre/post tool call hooks fire in handle_function_call()

CLI:
  - /plugins command shows loaded plugins, tool counts, status
  - Added to COMMANDS dict for autocomplete

Docs:
  - Getting started guide (build-a-hermes-plugin.md) — full tutorial
    building a calculator plugin step by step
  - Reference page (features/plugins.md) — quick overview + tables
  - Covers: file structure, schemas, handlers, hooks, data files,
    bundled skills, env var gating, pip distribution, common mistakes

Tests: 16 tests covering discovery, loading, hooks, tool visibility.

* fix: hermes update causes dual gateways on macOS (launchd)

Three bugs worked together to create the dual-gateway problem:

1. cmd_update only checked systemd for gateway restart, completely
   ignoring launchd on macOS. After killing the PID it would print
   'Restart it with: hermes gateway run' even when launchd was about
   to auto-respawn the process.

2. launchd's KeepAlive.SuccessfulExit=false respawns the gateway
   after SIGTERM (non-zero exit), so the user's manual restart
   created a second instance.

3. The launchd plist lacked --replace (systemd had it), so the
   respawned gateway didn't kill stale instances on startup.

Fixes:
- Add --replace to launchd ProgramArguments (matches systemd)
- Add launchd detection to cmd_update's auto-restart logic
- Print 'auto-restart via launchd' instead of manual restart hint

* fix: add launchd plist auto-refresh + explicit restart in cmd_update

Two integration issues with the initial fix:

1. Existing macOS users with old plist (no --replace) would never
   get the fix until manual uninstall/reinstall. Added
   refresh_launchd_plist_if_needed() — mirrors the existing
   refresh_systemd_unit_if_needed(). Called from launchd_start(),
   launchd_restart(), and cmd_update.

2. cmd_update relied on KeepAlive respawn after SIGTERM rather than
   explicit launchctl stop/start. This caused races: launchd would
   respawn the old process before the PID file was cleaned up.
   Now does explicit stop+start (matching how systemd gets an
   explicit systemctl restart), with plist refresh first so the
   new --replace flag is picked up.

---------

Co-authored-by: Ninja <ninja@local>
Co-authored-by: alireza78a <alireza78a@users.noreply.github.com>
Co-authored-by: Oktay Aydin <113846926+aydnOktay@users.noreply.github.com>
Co-authored-by: JP Lew <polydegen@protonmail.com>
Co-authored-by: an420eth <an420eth@users.noreply.github.com>
											
										
										
											2026-03-16 12:36:29 -07:00
+								    def _resolve_turn_agent_config(self, user_message: str, model: str, runtime_kwargs: dict) -> dict:
 								        from agent.smart_model_routing import resolve_turn_route
-												feat(gateway): add fast mode support to gateway chats

											
										
										
											2026-04-10 08:32:56 +01:00
+								        from hermes_cli.models import resolve_fast_mode_overrides
-												fix: hermes update causes dual gateways on macOS (launchd) (#1567)

* feat: add optional smart model routing

Add a conservative cheap-vs-strong routing option that can send very short/simple turns to a cheaper model across providers while keeping the primary model for complex work. Wire it through CLI, gateway, and cron, and document the config.yaml workflow.

* fix(gateway): remove recursive ExecStop from systemd units, extend TimeoutStopSec to 60s

* fix(gateway): avoid recursive ExecStop in user systemd unit

* fix: extend ExecStop removal and TimeoutStopSec=60 to system unit

The cherry-picked PR #1448 fix only covered the user systemd unit.
The system unit had the same TimeoutStopSec=15 and could benefit
from the same 60s timeout for clean shutdown. Also adds a regression
test for the system unit.

---------

Co-authored-by: Ninja <ninja@local>

* feat(skills): add blender-mcp optional skill for 3D modeling

Control a running Blender instance from Hermes via socket connection
to the blender-mcp addon (port 9876). Supports creating 3D objects,
materials, animations, and running arbitrary bpy code.

Placed in optional-skills/ since it requires Blender 4.3+ desktop
with a third-party addon manually started each session.

* feat(acp): support slash commands in ACP adapter (#1532)

Adds /help, /model, /tools, /context, /reset, /compact, /version
to the ACP adapter (VS Code, Zed, JetBrains). Commands are handled
directly in the server without instantiating the TUI — each command
queries agent/session state and returns plain text.

Unrecognized /commands fall through to the LLM as normal messages.

/model uses detect_provider_for_model() for auto-detection when
switching models, matching the CLI and gateway behavior.

Fixes #1402

* fix(logging): improve error logging in session search tool (#1533)

* fix(gateway): restart on retryable startup failures (#1517)

* feat(email): add skip_attachments option via config.yaml

* feat(email): add skip_attachments option via config.yaml

Adds a config.yaml-driven option to skip email attachments in the
gateway email adapter. Useful for malware protection and bandwidth
savings.

Configure in config.yaml:
  platforms:
    email:
      skip_attachments: true

Based on PR #1521 by @an420eth, changed from env var to config.yaml
(via PlatformConfig.extra) to match the project's config-first pattern.

* docs: document skip_attachments option for email adapter

* fix(telegram): retry on transient TLS failures during connect and send

Add exponential-backoff retry (3 attempts) around initialize() to
handle transient TLS resets during gateway startup. Also catches
TimedOut and OSError in addition to NetworkError.

Add exponential-backoff retry (3 attempts) around send_message() for
NetworkError during message delivery, wrapping the existing Markdown
fallback logic.

Both imports are guarded with try/except ImportError for test
environments where telegram is mocked.

Based on PR #1527 by cmd8. Closes #1526.

* feat: permissive block_anchor thresholds and unicode normalization (#1539)

Salvaged from PR #1528 by an420eth. Closes #517.

Improves _strategy_block_anchor in fuzzy_match.py:
- Add unicode normalization (smart quotes, em/en-dashes, ellipsis,
  non-breaking spaces → ASCII) so LLM-produced unicode artifacts
  don't break anchor line matching
- Lower thresholds: 0.10 for unique matches (was 0.70), 0.30 for
  multiple candidates — if first/last lines match exactly, the
  block is almost certainly correct
- Use original (non-normalized) content for offset calculation to
  preserve correct character positions

Tested: 3 new scenarios fixed (em-dash anchors, non-breaking space
anchors, very-low-similarity unique matches), zero regressions on
all 9 existing fuzzy match tests.

Co-authored-by: an420eth <an420eth@users.noreply.github.com>

* feat(cli): add file path autocomplete in the input prompt (#1545)

When typing a path-like token (./  ../  ~/  /  or containing /),
the CLI now shows filesystem completions in the dropdown menu.
Directories show a trailing slash and 'dir' label; files show
their size. Completions are case-insensitive and capped at 30
entries.

Triggered by tokens like:
  edit ./src/ma     → shows ./src/main.py, ./src/manifest.json, ...
  check ~/doc       → shows ~/docs/, ~/documents/, ...
  read /etc/hos     → shows /etc/hosts, /etc/hostname, ...
  open tools/reg    → shows tools/registry.py

Slash command autocomplete (/help, /model, etc.) is unaffected —
it still triggers when the input starts with /.

Inspired by OpenCode PR #145 (file path completion menu).

Implementation:
- hermes_cli/commands.py: _extract_path_word() detects path-like
  tokens, _path_completions() yields filesystem Completions with
  size labels, get_completions() routes to paths vs slash commands
- tests/hermes_cli/test_path_completion.py: 26 tests covering
  path extraction, prefix filtering, directory markers, home
  expansion, case-insensitivity, integration with slash commands

* feat(privacy): redact PII from LLM context when privacy.redact_pii is enabled

Add privacy.redact_pii config option (boolean, default false). When
enabled, the gateway redacts personally identifiable information from
the system prompt before sending it to the LLM provider:

- Phone numbers (user IDs on WhatsApp/Signal) → hashed to user_<sha256>
- User IDs → hashed to user_<sha256>
- Chat IDs → numeric portion hashed, platform prefix preserved
- Home channel IDs → hashed
- Names/usernames → NOT affected (user-chosen, publicly visible)

Hashes are deterministic (same user → same hash) so the model can
still distinguish users in group chats. Routing and delivery use
the original values internally — redaction only affects LLM context.

Inspired by OpenClaw PR #47959.

* fix(privacy): skip PII redaction on Discord/Slack (mentions need real IDs)

Discord uses <@user_id> for mentions and Slack uses <@U12345> — the LLM
needs the real ID to tag users. Redaction now only applies to WhatsApp,
Signal, and Telegram where IDs are pure routing metadata.

Add 4 platform-specific tests covering Discord, WhatsApp, Signal, Slack.

* feat: smart approvals + /stop command (inspired by OpenAI Codex)

* feat: smart approvals — LLM-based risk assessment for dangerous commands

Adds a 'smart' approval mode that uses the auxiliary LLM to assess
whether a flagged command is genuinely dangerous or a false positive,
auto-approving low-risk commands without prompting the user.

Inspired by OpenAI Codex's Smart Approvals guardian subagent
(openai/codex#13860).

Config (config.yaml):
  approvals:
    mode: manual   # manual (default), smart, off

Modes:
- manual — current behavior, always prompt the user
- smart  — aux LLM evaluates risk: APPROVE (auto-allow), DENY (block),
           or ESCALATE (fall through to manual prompt)
- off    — skip all approval prompts (equivalent to --yolo)

When smart mode auto-approves, the pattern gets session-level approval
so subsequent uses of the same pattern don't trigger another LLM call.
When it denies, the command is blocked without user prompt. When
uncertain, it escalates to the normal manual approval flow.

The LLM prompt is carefully scoped: it sees only the command text and
the flagged reason, assesses actual risk vs false positive, and returns
a single-word verdict.

* feat: make smart approval model configurable via config.yaml

Adds auxiliary.approval section to config.yaml with the same
provider/model/base_url/api_key pattern as other aux tasks (vision,
web_extract, compression, etc.).

Config:
  auxiliary:
    approval:
      provider: auto
      model: ''        # fast/cheap model recommended
      base_url: ''
      api_key: ''

Bridged to env vars in both CLI and gateway paths so the aux client
picks them up automatically.

* feat: add /stop command to kill all background processes

Adds a /stop slash command that kills all running background processes
at once. Currently users have to process(list) then process(kill) for
each one individually.

Inspired by OpenAI Codex's separation of interrupt (Ctrl+C stops current
turn) from /stop (cleans up background processes). See openai/codex#14602.

Ctrl+C continues to only interrupt the active agent turn — background
dev servers, watchers, etc. are preserved. /stop is the explicit way
to clean them all up.

* feat: first-class plugin architecture + hide status bar cost by default (#1544)

The persistent status bar now shows context %, token counts, and
duration but NOT $ cost by default. Cost display is opt-in via:

  display:
    show_cost: true

in config.yaml, or: hermes config set display.show_cost true

The /usage command still shows full cost breakdown since the user
explicitly asked for it — this only affects the always-visible bar.

Status bar without cost:
  ⚕ claude-sonnet-4 │ 12K/200K │ 6% │ 15m

Status bar with show_cost: true:
  ⚕ claude-sonnet-4 │ 12K/200K │ 6% │ $0.06 │ 15m

* feat: improve memory prioritization + aggressive skill updates (inspired by OpenAI Codex)

* feat: improve memory prioritization — user preferences over procedural knowledge

Inspired by OpenAI Codex's memory prompt improvements (openai/codex#14493)
which focus memory writes on user preferences and recurring patterns
rather than procedural task details.

Key insight: 'Optimize for reducing future user steering — the most
valuable memory prevents the user from having to repeat themselves.'

Changes:
- MEMORY_GUIDANCE (prompt_builder.py): added prioritization hierarchy
  and the core principle about reducing user steering
- MEMORY_SCHEMA (memory_tool.py): reordered WHEN TO SAVE list to put
  corrections first, added explicit PRIORITY guidance
- Memory nudge (run_agent.py): now asks specifically about preferences,
  corrections, and workflow patterns instead of generic 'anything'
- Memory flush (run_agent.py): now instructs to prioritize user
  preferences and corrections over task-specific details

* feat: more aggressive skill creation and update prompting

Press harder on skill updates — the agent should proactively patch
skills when it encounters issues during use, not wait to be asked.

Changes:
- SKILLS_GUIDANCE: 'consider saving' → 'save'; added explicit instruction
  to patch skills immediately when found outdated/wrong
- Skills header: added instruction to update loaded skills before finishing
  if they had missing steps or wrong commands
- Skill nudge: more assertive ('save the approach' not 'consider saving'),
  now also prompts for updating existing skills used in the task
- Skill nudge interval: lowered default from 15 to 10 iterations
- skill_manage schema: added 'patch it immediately' to update triggers

* feat: first-class plugin architecture (#1555)

Plugin system for extending Hermes with custom tools, hooks, and
integrations — no source code changes required.

Core system (hermes_cli/plugins.py):
  - Plugin discovery from ~/.hermes/plugins/, .hermes/plugins/, and
    pip entry_points (hermes_agent.plugins group)
  - PluginContext with register_tool() and register_hook()
  - 6 lifecycle hooks: pre/post tool_call, pre/post llm_call,
    on_session_start/end
  - Namespace package handling for relative imports in plugins
  - Graceful error isolation — broken plugins never crash the agent

Integration (model_tools.py):
  - Plugin discovery runs after built-in + MCP tools
  - Plugin tools bypass toolset filter via get_plugin_tool_names()
  - Pre/post tool call hooks fire in handle_function_call()

CLI:
  - /plugins command shows loaded plugins, tool counts, status
  - Added to COMMANDS dict for autocomplete

Docs:
  - Getting started guide (build-a-hermes-plugin.md) — full tutorial
    building a calculator plugin step by step
  - Reference page (features/plugins.md) — quick overview + tables
  - Covers: file structure, schemas, handlers, hooks, data files,
    bundled skills, env var gating, pip distribution, common mistakes

Tests: 16 tests covering discovery, loading, hooks, tool visibility.

* fix: hermes update causes dual gateways on macOS (launchd)

Three bugs worked together to create the dual-gateway problem:

1. cmd_update only checked systemd for gateway restart, completely
   ignoring launchd on macOS. After killing the PID it would print
   'Restart it with: hermes gateway run' even when launchd was about
   to auto-respawn the process.

2. launchd's KeepAlive.SuccessfulExit=false respawns the gateway
   after SIGTERM (non-zero exit), so the user's manual restart
   created a second instance.

3. The launchd plist lacked --replace (systemd had it), so the
   respawned gateway didn't kill stale instances on startup.

Fixes:
- Add --replace to launchd ProgramArguments (matches systemd)
- Add launchd detection to cmd_update's auto-restart logic
- Print 'auto-restart via launchd' instead of manual restart hint

* fix: add launchd plist auto-refresh + explicit restart in cmd_update

Two integration issues with the initial fix:

1. Existing macOS users with old plist (no --replace) would never
   get the fix until manual uninstall/reinstall. Added
   refresh_launchd_plist_if_needed() — mirrors the existing
   refresh_systemd_unit_if_needed(). Called from launchd_start(),
   launchd_restart(), and cmd_update.

2. cmd_update relied on KeepAlive respawn after SIGTERM rather than
   explicit launchctl stop/start. This caused races: launchd would
   respawn the old process before the PID file was cleaned up.
   Now does explicit stop+start (matching how systemd gets an
   explicit systemctl restart), with plist refresh first so the
   new --replace flag is picked up.

---------

Co-authored-by: Ninja <ninja@local>
Co-authored-by: alireza78a <alireza78a@users.noreply.github.com>
Co-authored-by: Oktay Aydin <113846926+aydnOktay@users.noreply.github.com>
Co-authored-by: JP Lew <polydegen@protonmail.com>
Co-authored-by: an420eth <an420eth@users.noreply.github.com>
											
										
										
											2026-03-16 12:36:29 -07:00
 								        primary = {
 								            "model": model,
 								            "api_key": runtime_kwargs.get("api_key"),
 								            "base_url": runtime_kwargs.get("base_url"),
 								            "provider": runtime_kwargs.get("provider"),
 								            "api_mode": runtime_kwargs.get("api_mode"),
-												feat: integrate GitHub Copilot providers across Hermes

Add first-class GitHub Copilot and Copilot ACP provider support across
model selection, runtime provider resolution, CLI sessions, delegated
subagents, cron jobs, and the Telegram gateway.

This also normalizes Copilot model catalogs and API modes, introduces a
Copilot ACP OpenAI-compatible shim, and fixes service-mode auth by
resolving Homebrew-installed gh binaries under launchd.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

											
										
										
											2026-03-17 23:40:22 -07:00
+								            "command": runtime_kwargs.get("command"),
 								            "args": list(runtime_kwargs.get("args") or []),
-												fix: preserve credential_pool through smart routing and defer eager fallback on 429 (#4361)

Three bugs prevented credential pool rotation from working when multiple
Codex OAuth tokens were configured:

1. credential_pool was dropped during smart model turn routing.
   resolve_turn_route() constructed runtime dicts without it, so the
   AIAgent was created without pool access. Fixed in smart_model_routing.py
   (no-route and fallback paths), cli.py, and gateway/run.py.

2. Eager fallback fired before pool rotation on 429. The rate-limit
   handler at line ~7180 switched to a fallback provider immediately,
   before _recover_with_credential_pool got a chance to rotate to the
   next credential. Now deferred when the pool still has credentials.

3. (Non-issue) Retry budget was reported as too small, but successful
   pool rotations already skip retry_count increment — no change needed.

Reported by community member Schinsly who identified all three root
causes and verified the fix locally with multiple Codex accounts.
											
										
										
											2026-04-01 01:02:34 -07:00
+								            "credential_pool": runtime_kwargs.get("credential_pool"),
-												fix: hermes update causes dual gateways on macOS (launchd) (#1567)

* feat: add optional smart model routing

Add a conservative cheap-vs-strong routing option that can send very short/simple turns to a cheaper model across providers while keeping the primary model for complex work. Wire it through CLI, gateway, and cron, and document the config.yaml workflow.

* fix(gateway): remove recursive ExecStop from systemd units, extend TimeoutStopSec to 60s

* fix(gateway): avoid recursive ExecStop in user systemd unit

* fix: extend ExecStop removal and TimeoutStopSec=60 to system unit

The cherry-picked PR #1448 fix only covered the user systemd unit.
The system unit had the same TimeoutStopSec=15 and could benefit
from the same 60s timeout for clean shutdown. Also adds a regression
test for the system unit.

---------

Co-authored-by: Ninja <ninja@local>

* feat(skills): add blender-mcp optional skill for 3D modeling

Control a running Blender instance from Hermes via socket connection
to the blender-mcp addon (port 9876). Supports creating 3D objects,
materials, animations, and running arbitrary bpy code.

Placed in optional-skills/ since it requires Blender 4.3+ desktop
with a third-party addon manually started each session.

* feat(acp): support slash commands in ACP adapter (#1532)

Adds /help, /model, /tools, /context, /reset, /compact, /version
to the ACP adapter (VS Code, Zed, JetBrains). Commands are handled
directly in the server without instantiating the TUI — each command
queries agent/session state and returns plain text.

Unrecognized /commands fall through to the LLM as normal messages.

/model uses detect_provider_for_model() for auto-detection when
switching models, matching the CLI and gateway behavior.

Fixes #1402

* fix(logging): improve error logging in session search tool (#1533)

* fix(gateway): restart on retryable startup failures (#1517)

* feat(email): add skip_attachments option via config.yaml

* feat(email): add skip_attachments option via config.yaml

Adds a config.yaml-driven option to skip email attachments in the
gateway email adapter. Useful for malware protection and bandwidth
savings.

Configure in config.yaml:
  platforms:
    email:
      skip_attachments: true

Based on PR #1521 by @an420eth, changed from env var to config.yaml
(via PlatformConfig.extra) to match the project's config-first pattern.

* docs: document skip_attachments option for email adapter

* fix(telegram): retry on transient TLS failures during connect and send

Add exponential-backoff retry (3 attempts) around initialize() to
handle transient TLS resets during gateway startup. Also catches
TimedOut and OSError in addition to NetworkError.

Add exponential-backoff retry (3 attempts) around send_message() for
NetworkError during message delivery, wrapping the existing Markdown
fallback logic.

Both imports are guarded with try/except ImportError for test
environments where telegram is mocked.

Based on PR #1527 by cmd8. Closes #1526.

* feat: permissive block_anchor thresholds and unicode normalization (#1539)

Salvaged from PR #1528 by an420eth. Closes #517.

Improves _strategy_block_anchor in fuzzy_match.py:
- Add unicode normalization (smart quotes, em/en-dashes, ellipsis,
  non-breaking spaces → ASCII) so LLM-produced unicode artifacts
  don't break anchor line matching
- Lower thresholds: 0.10 for unique matches (was 0.70), 0.30 for
  multiple candidates — if first/last lines match exactly, the
  block is almost certainly correct
- Use original (non-normalized) content for offset calculation to
  preserve correct character positions

Tested: 3 new scenarios fixed (em-dash anchors, non-breaking space
anchors, very-low-similarity unique matches), zero regressions on
all 9 existing fuzzy match tests.

Co-authored-by: an420eth <an420eth@users.noreply.github.com>

* feat(cli): add file path autocomplete in the input prompt (#1545)

When typing a path-like token (./  ../  ~/  /  or containing /),
the CLI now shows filesystem completions in the dropdown menu.
Directories show a trailing slash and 'dir' label; files show
their size. Completions are case-insensitive and capped at 30
entries.

Triggered by tokens like:
  edit ./src/ma     → shows ./src/main.py, ./src/manifest.json, ...
  check ~/doc       → shows ~/docs/, ~/documents/, ...
  read /etc/hos     → shows /etc/hosts, /etc/hostname, ...
  open tools/reg    → shows tools/registry.py

Slash command autocomplete (/help, /model, etc.) is unaffected —
it still triggers when the input starts with /.

Inspired by OpenCode PR #145 (file path completion menu).

Implementation:
- hermes_cli/commands.py: _extract_path_word() detects path-like
  tokens, _path_completions() yields filesystem Completions with
  size labels, get_completions() routes to paths vs slash commands
- tests/hermes_cli/test_path_completion.py: 26 tests covering
  path extraction, prefix filtering, directory markers, home
  expansion, case-insensitivity, integration with slash commands

* feat(privacy): redact PII from LLM context when privacy.redact_pii is enabled

Add privacy.redact_pii config option (boolean, default false). When
enabled, the gateway redacts personally identifiable information from
the system prompt before sending it to the LLM provider:

- Phone numbers (user IDs on WhatsApp/Signal) → hashed to user_<sha256>
- User IDs → hashed to user_<sha256>
- Chat IDs → numeric portion hashed, platform prefix preserved
- Home channel IDs → hashed
- Names/usernames → NOT affected (user-chosen, publicly visible)

Hashes are deterministic (same user → same hash) so the model can
still distinguish users in group chats. Routing and delivery use
the original values internally — redaction only affects LLM context.

Inspired by OpenClaw PR #47959.

* fix(privacy): skip PII redaction on Discord/Slack (mentions need real IDs)

Discord uses <@user_id> for mentions and Slack uses <@U12345> — the LLM
needs the real ID to tag users. Redaction now only applies to WhatsApp,
Signal, and Telegram where IDs are pure routing metadata.

Add 4 platform-specific tests covering Discord, WhatsApp, Signal, Slack.

* feat: smart approvals + /stop command (inspired by OpenAI Codex)

* feat: smart approvals — LLM-based risk assessment for dangerous commands

Adds a 'smart' approval mode that uses the auxiliary LLM to assess
whether a flagged command is genuinely dangerous or a false positive,
auto-approving low-risk commands without prompting the user.

Inspired by OpenAI Codex's Smart Approvals guardian subagent
(openai/codex#13860).

Config (config.yaml):
  approvals:
    mode: manual   # manual (default), smart, off

Modes:
- manual — current behavior, always prompt the user
- smart  — aux LLM evaluates risk: APPROVE (auto-allow), DENY (block),
           or ESCALATE (fall through to manual prompt)
- off    — skip all approval prompts (equivalent to --yolo)

When smart mode auto-approves, the pattern gets session-level approval
so subsequent uses of the same pattern don't trigger another LLM call.
When it denies, the command is blocked without user prompt. When
uncertain, it escalates to the normal manual approval flow.

The LLM prompt is carefully scoped: it sees only the command text and
the flagged reason, assesses actual risk vs false positive, and returns
a single-word verdict.

* feat: make smart approval model configurable via config.yaml

Adds auxiliary.approval section to config.yaml with the same
provider/model/base_url/api_key pattern as other aux tasks (vision,
web_extract, compression, etc.).

Config:
  auxiliary:
    approval:
      provider: auto
      model: ''        # fast/cheap model recommended
      base_url: ''
      api_key: ''

Bridged to env vars in both CLI and gateway paths so the aux client
picks them up automatically.

* feat: add /stop command to kill all background processes

Adds a /stop slash command that kills all running background processes
at once. Currently users have to process(list) then process(kill) for
each one individually.

Inspired by OpenAI Codex's separation of interrupt (Ctrl+C stops current
turn) from /stop (cleans up background processes). See openai/codex#14602.

Ctrl+C continues to only interrupt the active agent turn — background
dev servers, watchers, etc. are preserved. /stop is the explicit way
to clean them all up.

* feat: first-class plugin architecture + hide status bar cost by default (#1544)

The persistent status bar now shows context %, token counts, and
duration but NOT $ cost by default. Cost display is opt-in via:

  display:
    show_cost: true

in config.yaml, or: hermes config set display.show_cost true

The /usage command still shows full cost breakdown since the user
explicitly asked for it — this only affects the always-visible bar.

Status bar without cost:
  ⚕ claude-sonnet-4 │ 12K/200K │ 6% │ 15m

Status bar with show_cost: true:
  ⚕ claude-sonnet-4 │ 12K/200K │ 6% │ $0.06 │ 15m

* feat: improve memory prioritization + aggressive skill updates (inspired by OpenAI Codex)

* feat: improve memory prioritization — user preferences over procedural knowledge

Inspired by OpenAI Codex's memory prompt improvements (openai/codex#14493)
which focus memory writes on user preferences and recurring patterns
rather than procedural task details.

Key insight: 'Optimize for reducing future user steering — the most
valuable memory prevents the user from having to repeat themselves.'

Changes:
- MEMORY_GUIDANCE (prompt_builder.py): added prioritization hierarchy
  and the core principle about reducing user steering
- MEMORY_SCHEMA (memory_tool.py): reordered WHEN TO SAVE list to put
  corrections first, added explicit PRIORITY guidance
- Memory nudge (run_agent.py): now asks specifically about preferences,
  corrections, and workflow patterns instead of generic 'anything'
- Memory flush (run_agent.py): now instructs to prioritize user
  preferences and corrections over task-specific details

* feat: more aggressive skill creation and update prompting

Press harder on skill updates — the agent should proactively patch
skills when it encounters issues during use, not wait to be asked.

Changes:
- SKILLS_GUIDANCE: 'consider saving' → 'save'; added explicit instruction
  to patch skills immediately when found outdated/wrong
- Skills header: added instruction to update loaded skills before finishing
  if they had missing steps or wrong commands
- Skill nudge: more assertive ('save the approach' not 'consider saving'),
  now also prompts for updating existing skills used in the task
- Skill nudge interval: lowered default from 15 to 10 iterations
- skill_manage schema: added 'patch it immediately' to update triggers

* feat: first-class plugin architecture (#1555)

Plugin system for extending Hermes with custom tools, hooks, and
integrations — no source code changes required.

Core system (hermes_cli/plugins.py):
  - Plugin discovery from ~/.hermes/plugins/, .hermes/plugins/, and
    pip entry_points (hermes_agent.plugins group)
  - PluginContext with register_tool() and register_hook()
  - 6 lifecycle hooks: pre/post tool_call, pre/post llm_call,
    on_session_start/end
  - Namespace package handling for relative imports in plugins
  - Graceful error isolation — broken plugins never crash the agent

Integration (model_tools.py):
  - Plugin discovery runs after built-in + MCP tools
  - Plugin tools bypass toolset filter via get_plugin_tool_names()
  - Pre/post tool call hooks fire in handle_function_call()

CLI:
  - /plugins command shows loaded plugins, tool counts, status
  - Added to COMMANDS dict for autocomplete

Docs:
  - Getting started guide (build-a-hermes-plugin.md) — full tutorial
    building a calculator plugin step by step
  - Reference page (features/plugins.md) — quick overview + tables
  - Covers: file structure, schemas, handlers, hooks, data files,
    bundled skills, env var gating, pip distribution, common mistakes

Tests: 16 tests covering discovery, loading, hooks, tool visibility.

* fix: hermes update causes dual gateways on macOS (launchd)

Three bugs worked together to create the dual-gateway problem:

1. cmd_update only checked systemd for gateway restart, completely
   ignoring launchd on macOS. After killing the PID it would print
   'Restart it with: hermes gateway run' even when launchd was about
   to auto-respawn the process.

2. launchd's KeepAlive.SuccessfulExit=false respawns the gateway
   after SIGTERM (non-zero exit), so the user's manual restart
   created a second instance.

3. The launchd plist lacked --replace (systemd had it), so the
   respawned gateway didn't kill stale instances on startup.

Fixes:
- Add --replace to launchd ProgramArguments (matches systemd)
- Add launchd detection to cmd_update's auto-restart logic
- Print 'auto-restart via launchd' instead of manual restart hint

* fix: add launchd plist auto-refresh + explicit restart in cmd_update

Two integration issues with the initial fix:

1. Existing macOS users with old plist (no --replace) would never
   get the fix until manual uninstall/reinstall. Added
   refresh_launchd_plist_if_needed() — mirrors the existing
   refresh_systemd_unit_if_needed(). Called from launchd_start(),
   launchd_restart(), and cmd_update.

2. cmd_update relied on KeepAlive respawn after SIGTERM rather than
   explicit launchctl stop/start. This caused races: launchd would
   respawn the old process before the PID file was cleaned up.
   Now does explicit stop+start (matching how systemd gets an
   explicit systemctl restart), with plist refresh first so the
   new --replace flag is picked up.

---------

Co-authored-by: Ninja <ninja@local>
Co-authored-by: alireza78a <alireza78a@users.noreply.github.com>
Co-authored-by: Oktay Aydin <113846926+aydnOktay@users.noreply.github.com>
Co-authored-by: JP Lew <polydegen@protonmail.com>
Co-authored-by: an420eth <an420eth@users.noreply.github.com>
											
										
										
											2026-03-16 12:36:29 -07:00
+								        }
-												feat(gateway): add fast mode support to gateway chats

											
										
										
											2026-04-10 08:32:56 +01:00
+								        route = resolve_turn_route(user_message, getattr(self, "_smart_model_routing", {}), primary)
 								        service_tier = getattr(self, "_service_tier", None)
 								        if not service_tier:
 								            route["request_overrides"] = None
 								            return route
 								        try:
 								            overrides = resolve_fast_mode_overrides(route.get("model"))
 								        except Exception:
 								            overrides = None
 								        route["request_overrides"] = overrides
 								        return route
-												fix: hermes update causes dual gateways on macOS (launchd) (#1567)

* feat: add optional smart model routing

Add a conservative cheap-vs-strong routing option that can send very short/simple turns to a cheaper model across providers while keeping the primary model for complex work. Wire it through CLI, gateway, and cron, and document the config.yaml workflow.

* fix(gateway): remove recursive ExecStop from systemd units, extend TimeoutStopSec to 60s

* fix(gateway): avoid recursive ExecStop in user systemd unit

* fix: extend ExecStop removal and TimeoutStopSec=60 to system unit

The cherry-picked PR #1448 fix only covered the user systemd unit.
The system unit had the same TimeoutStopSec=15 and could benefit
from the same 60s timeout for clean shutdown. Also adds a regression
test for the system unit.

---------

Co-authored-by: Ninja <ninja@local>

* feat(skills): add blender-mcp optional skill for 3D modeling

Control a running Blender instance from Hermes via socket connection
to the blender-mcp addon (port 9876). Supports creating 3D objects,
materials, animations, and running arbitrary bpy code.

Placed in optional-skills/ since it requires Blender 4.3+ desktop
with a third-party addon manually started each session.

* feat(acp): support slash commands in ACP adapter (#1532)

Adds /help, /model, /tools, /context, /reset, /compact, /version
to the ACP adapter (VS Code, Zed, JetBrains). Commands are handled
directly in the server without instantiating the TUI — each command
queries agent/session state and returns plain text.

Unrecognized /commands fall through to the LLM as normal messages.

/model uses detect_provider_for_model() for auto-detection when
switching models, matching the CLI and gateway behavior.

Fixes #1402

* fix(logging): improve error logging in session search tool (#1533)

* fix(gateway): restart on retryable startup failures (#1517)

* feat(email): add skip_attachments option via config.yaml

* feat(email): add skip_attachments option via config.yaml

Adds a config.yaml-driven option to skip email attachments in the
gateway email adapter. Useful for malware protection and bandwidth
savings.

Configure in config.yaml:
  platforms:
    email:
      skip_attachments: true

Based on PR #1521 by @an420eth, changed from env var to config.yaml
(via PlatformConfig.extra) to match the project's config-first pattern.

* docs: document skip_attachments option for email adapter

* fix(telegram): retry on transient TLS failures during connect and send

Add exponential-backoff retry (3 attempts) around initialize() to
handle transient TLS resets during gateway startup. Also catches
TimedOut and OSError in addition to NetworkError.

Add exponential-backoff retry (3 attempts) around send_message() for
NetworkError during message delivery, wrapping the existing Markdown
fallback logic.

Both imports are guarded with try/except ImportError for test
environments where telegram is mocked.

Based on PR #1527 by cmd8. Closes #1526.

* feat: permissive block_anchor thresholds and unicode normalization (#1539)

Salvaged from PR #1528 by an420eth. Closes #517.

Improves _strategy_block_anchor in fuzzy_match.py:
- Add unicode normalization (smart quotes, em/en-dashes, ellipsis,
  non-breaking spaces → ASCII) so LLM-produced unicode artifacts
  don't break anchor line matching
- Lower thresholds: 0.10 for unique matches (was 0.70), 0.30 for
  multiple candidates — if first/last lines match exactly, the
  block is almost certainly correct
- Use original (non-normalized) content for offset calculation to
  preserve correct character positions

Tested: 3 new scenarios fixed (em-dash anchors, non-breaking space
anchors, very-low-similarity unique matches), zero regressions on
all 9 existing fuzzy match tests.

Co-authored-by: an420eth <an420eth@users.noreply.github.com>

* feat(cli): add file path autocomplete in the input prompt (#1545)

When typing a path-like token (./  ../  ~/  /  or containing /),
the CLI now shows filesystem completions in the dropdown menu.
Directories show a trailing slash and 'dir' label; files show
their size. Completions are case-insensitive and capped at 30
entries.

Triggered by tokens like:
  edit ./src/ma     → shows ./src/main.py, ./src/manifest.json, ...
  check ~/doc       → shows ~/docs/, ~/documents/, ...
  read /etc/hos     → shows /etc/hosts, /etc/hostname, ...
  open tools/reg    → shows tools/registry.py

Slash command autocomplete (/help, /model, etc.) is unaffected —
it still triggers when the input starts with /.

Inspired by OpenCode PR #145 (file path completion menu).

Implementation:
- hermes_cli/commands.py: _extract_path_word() detects path-like
  tokens, _path_completions() yields filesystem Completions with
  size labels, get_completions() routes to paths vs slash commands
- tests/hermes_cli/test_path_completion.py: 26 tests covering
  path extraction, prefix filtering, directory markers, home
  expansion, case-insensitivity, integration with slash commands

* feat(privacy): redact PII from LLM context when privacy.redact_pii is enabled

Add privacy.redact_pii config option (boolean, default false). When
enabled, the gateway redacts personally identifiable information from
the system prompt before sending it to the LLM provider:

- Phone numbers (user IDs on WhatsApp/Signal) → hashed to user_<sha256>
- User IDs → hashed to user_<sha256>
- Chat IDs → numeric portion hashed, platform prefix preserved
- Home channel IDs → hashed
- Names/usernames → NOT affected (user-chosen, publicly visible)

Hashes are deterministic (same user → same hash) so the model can
still distinguish users in group chats. Routing and delivery use
the original values internally — redaction only affects LLM context.

Inspired by OpenClaw PR #47959.

* fix(privacy): skip PII redaction on Discord/Slack (mentions need real IDs)

Discord uses <@user_id> for mentions and Slack uses <@U12345> — the LLM
needs the real ID to tag users. Redaction now only applies to WhatsApp,
Signal, and Telegram where IDs are pure routing metadata.

Add 4 platform-specific tests covering Discord, WhatsApp, Signal, Slack.

* feat: smart approvals + /stop command (inspired by OpenAI Codex)

* feat: smart approvals — LLM-based risk assessment for dangerous commands

Adds a 'smart' approval mode that uses the auxiliary LLM to assess
whether a flagged command is genuinely dangerous or a false positive,
auto-approving low-risk commands without prompting the user.

Inspired by OpenAI Codex's Smart Approvals guardian subagent
(openai/codex#13860).

Config (config.yaml):
  approvals:
    mode: manual   # manual (default), smart, off

Modes:
- manual — current behavior, always prompt the user
- smart  — aux LLM evaluates risk: APPROVE (auto-allow), DENY (block),
           or ESCALATE (fall through to manual prompt)
- off    — skip all approval prompts (equivalent to --yolo)

When smart mode auto-approves, the pattern gets session-level approval
so subsequent uses of the same pattern don't trigger another LLM call.
When it denies, the command is blocked without user prompt. When
uncertain, it escalates to the normal manual approval flow.

The LLM prompt is carefully scoped: it sees only the command text and
the flagged reason, assesses actual risk vs false positive, and returns
a single-word verdict.

* feat: make smart approval model configurable via config.yaml

Adds auxiliary.approval section to config.yaml with the same
provider/model/base_url/api_key pattern as other aux tasks (vision,
web_extract, compression, etc.).

Config:
  auxiliary:
    approval:
      provider: auto
      model: ''        # fast/cheap model recommended
      base_url: ''
      api_key: ''

Bridged to env vars in both CLI and gateway paths so the aux client
picks them up automatically.

* feat: add /stop command to kill all background processes

Adds a /stop slash command that kills all running background processes
at once. Currently users have to process(list) then process(kill) for
each one individually.

Inspired by OpenAI Codex's separation of interrupt (Ctrl+C stops current
turn) from /stop (cleans up background processes). See openai/codex#14602.

Ctrl+C continues to only interrupt the active agent turn — background
dev servers, watchers, etc. are preserved. /stop is the explicit way
to clean them all up.

* feat: first-class plugin architecture + hide status bar cost by default (#1544)

The persistent status bar now shows context %, token counts, and
duration but NOT $ cost by default. Cost display is opt-in via:

  display:
    show_cost: true

in config.yaml, or: hermes config set display.show_cost true

The /usage command still shows full cost breakdown since the user
explicitly asked for it — this only affects the always-visible bar.

Status bar without cost:
  ⚕ claude-sonnet-4 │ 12K/200K │ 6% │ 15m

Status bar with show_cost: true:
  ⚕ claude-sonnet-4 │ 12K/200K │ 6% │ $0.06 │ 15m

* feat: improve memory prioritization + aggressive skill updates (inspired by OpenAI Codex)

* feat: improve memory prioritization — user preferences over procedural knowledge

Inspired by OpenAI Codex's memory prompt improvements (openai/codex#14493)
which focus memory writes on user preferences and recurring patterns
rather than procedural task details.

Key insight: 'Optimize for reducing future user steering — the most
valuable memory prevents the user from having to repeat themselves.'

Changes:
- MEMORY_GUIDANCE (prompt_builder.py): added prioritization hierarchy
  and the core principle about reducing user steering
- MEMORY_SCHEMA (memory_tool.py): reordered WHEN TO SAVE list to put
  corrections first, added explicit PRIORITY guidance
- Memory nudge (run_agent.py): now asks specifically about preferences,
  corrections, and workflow patterns instead of generic 'anything'
- Memory flush (run_agent.py): now instructs to prioritize user
  preferences and corrections over task-specific details

* feat: more aggressive skill creation and update prompting

Press harder on skill updates — the agent should proactively patch
skills when it encounters issues during use, not wait to be asked.

Changes:
- SKILLS_GUIDANCE: 'consider saving' → 'save'; added explicit instruction
  to patch skills immediately when found outdated/wrong
- Skills header: added instruction to update loaded skills before finishing
  if they had missing steps or wrong commands
- Skill nudge: more assertive ('save the approach' not 'consider saving'),
  now also prompts for updating existing skills used in the task
- Skill nudge interval: lowered default from 15 to 10 iterations
- skill_manage schema: added 'patch it immediately' to update triggers

* feat: first-class plugin architecture (#1555)

Plugin system for extending Hermes with custom tools, hooks, and
integrations — no source code changes required.

Core system (hermes_cli/plugins.py):
  - Plugin discovery from ~/.hermes/plugins/, .hermes/plugins/, and
    pip entry_points (hermes_agent.plugins group)
  - PluginContext with register_tool() and register_hook()
  - 6 lifecycle hooks: pre/post tool_call, pre/post llm_call,
    on_session_start/end
  - Namespace package handling for relative imports in plugins
  - Graceful error isolation — broken plugins never crash the agent

Integration (model_tools.py):
  - Plugin discovery runs after built-in + MCP tools
  - Plugin tools bypass toolset filter via get_plugin_tool_names()
  - Pre/post tool call hooks fire in handle_function_call()

CLI:
  - /plugins command shows loaded plugins, tool counts, status
  - Added to COMMANDS dict for autocomplete

Docs:
  - Getting started guide (build-a-hermes-plugin.md) — full tutorial
    building a calculator plugin step by step
  - Reference page (features/plugins.md) — quick overview + tables
  - Covers: file structure, schemas, handlers, hooks, data files,
    bundled skills, env var gating, pip distribution, common mistakes

Tests: 16 tests covering discovery, loading, hooks, tool visibility.

* fix: hermes update causes dual gateways on macOS (launchd)

Three bugs worked together to create the dual-gateway problem:

1. cmd_update only checked systemd for gateway restart, completely
   ignoring launchd on macOS. After killing the PID it would print
   'Restart it with: hermes gateway run' even when launchd was about
   to auto-respawn the process.

2. launchd's KeepAlive.SuccessfulExit=false respawns the gateway
   after SIGTERM (non-zero exit), so the user's manual restart
   created a second instance.

3. The launchd plist lacked --replace (systemd had it), so the
   respawned gateway didn't kill stale instances on startup.

Fixes:
- Add --replace to launchd ProgramArguments (matches systemd)
- Add launchd detection to cmd_update's auto-restart logic
- Print 'auto-restart via launchd' instead of manual restart hint

* fix: add launchd plist auto-refresh + explicit restart in cmd_update

Two integration issues with the initial fix:

1. Existing macOS users with old plist (no --replace) would never
   get the fix until manual uninstall/reinstall. Added
   refresh_launchd_plist_if_needed() — mirrors the existing
   refresh_systemd_unit_if_needed(). Called from launchd_start(),
   launchd_restart(), and cmd_update.

2. cmd_update relied on KeepAlive respawn after SIGTERM rather than
   explicit launchctl stop/start. This caused races: launchd would
   respawn the old process before the PID file was cleaned up.
   Now does explicit stop+start (matching how systemd gets an
   explicit systemctl restart), with plist refresh first so the
   new --replace flag is picked up.

---------

Co-authored-by: Ninja <ninja@local>
Co-authored-by: alireza78a <alireza78a@users.noreply.github.com>
Co-authored-by: Oktay Aydin <113846926+aydnOktay@users.noreply.github.com>
Co-authored-by: JP Lew <polydegen@protonmail.com>
Co-authored-by: an420eth <an420eth@users.noreply.github.com>
											
										
										
											2026-03-16 12:36:29 -07:00
-												fix: preserve current gateway update and startup behavior

Follow up on salvaged PR #1052.
Restore current-main gateway lifecycle handling after conflict resolution and
adapt the update fallback to use shell-quoted argv parts safely.

											
										
										
											2026-03-14 18:03:50 -07:00
+								    async def _handle_adapter_fatal_error(self, adapter: BasePlatformAdapter) -> None:
-												feat: auto-reconnect failed gateway platforms with exponential backoff (#2584)

When a messaging platform fails to connect at startup (e.g. transient DNS
failure) or disconnects at runtime with a retryable error, the gateway now
queues it for background reconnection instead of giving up permanently.

- New _platform_reconnect_watcher background task runs alongside the
  existing session expiry watcher
- Exponential backoff: 30s, 60s, 120s, 240s, 300s cap
- Max 20 retry attempts before giving up on a platform
- Non-retryable errors (bad auth token, etc.) are not retried
- Runtime disconnections via _handle_adapter_fatal_error now queue
  retryable failures instead of triggering gateway shutdown
- On successful reconnect, adapter is wired up and channel directory
  is rebuilt automatically

Fixes the case where a DNS blip during gateway startup caused Telegram
and Discord to be permanently unavailable until manual restart.
											
										
										
											2026-03-22 23:48:24 -07:00
+								        """React to an adapter failure after startup.
 								        If the error is retryable (e.g. network blip, DNS failure), queue the
 								        platform for background reconnection instead of giving up permanently.
 								        """
-												fix: preserve current gateway update and startup behavior

Follow up on salvaged PR #1052.
Restore current-main gateway lifecycle handling after conflict resolution and
adapt the update fallback to use shell-quoted argv parts safely.

											
										
										
											2026-03-14 18:03:50 -07:00
+								        logger.error(
 								            "Fatal %s adapter error (%s): %s",
 								            adapter.platform.value,
 								            adapter.fatal_error_code or "unknown",
 								            adapter.fatal_error_message or "unknown error",
 								        )
-												fix(discord): decouple readiness from slash sync

											
										
										
											2026-04-11 17:02:03 -06:00
+								        self._update_platform_runtime_status(
 								            adapter.platform.value,
 								            platform_state="retrying" if adapter.fatal_error_retryable else "fatal",
 								            error_code=adapter.fatal_error_code,
 								            error_message=adapter.fatal_error_message,
 								        )
-												fix: preserve current gateway update and startup behavior

Follow up on salvaged PR #1052.
Restore current-main gateway lifecycle handling after conflict resolution and
adapt the update fallback to use shell-quoted argv parts safely.

											
										
										
											2026-03-14 18:03:50 -07:00
 								        existing = self.adapters.get(adapter.platform)
 								        if existing is adapter:
 								            try:
 								                await adapter.disconnect()
 								            finally:
 								                self.adapters.pop(adapter.platform, None)
 								                self.delivery_router.adapters = self.adapters
-												feat: auto-reconnect failed gateway platforms with exponential backoff (#2584)

When a messaging platform fails to connect at startup (e.g. transient DNS
failure) or disconnects at runtime with a retryable error, the gateway now
queues it for background reconnection instead of giving up permanently.

- New _platform_reconnect_watcher background task runs alongside the
  existing session expiry watcher
- Exponential backoff: 30s, 60s, 120s, 240s, 300s cap
- Max 20 retry attempts before giving up on a platform
- Non-retryable errors (bad auth token, etc.) are not retried
- Runtime disconnections via _handle_adapter_fatal_error now queue
  retryable failures instead of triggering gateway shutdown
- On successful reconnect, adapter is wired up and channel directory
  is rebuilt automatically

Fixes the case where a DNS blip during gateway startup caused Telegram
and Discord to be permanently unavailable until manual restart.
											
										
										
											2026-03-22 23:48:24 -07:00
+								        # Queue retryable failures for background reconnection
 								        if adapter.fatal_error_retryable:
 								            platform_config = self.config.platforms.get(adapter.platform)
 								            if platform_config and adapter.platform not in self._failed_platforms:
 								                self._failed_platforms[adapter.platform] = {
 								                    "config": platform_config,
 								                    "attempts": 0,
 								                    "next_retry": time.monotonic() + 30,
 								                }
 								                logger.info(
 								                    "%s queued for background reconnection",
 								                    adapter.platform.value,
 								                )
 								        if not self.adapters and not self._failed_platforms:
-												fix: preserve current gateway update and startup behavior

Follow up on salvaged PR #1052.
Restore current-main gateway lifecycle handling after conflict resolution and
adapt the update fallback to use shell-quoted argv parts safely.

											
										
										
											2026-03-14 18:03:50 -07:00
+								            self._exit_reason = adapter.fatal_error_message or "All messaging adapters disconnected"
-												fix(gateway): restart on whatsapp bridge child exit (#2334)

Co-authored-by: Frederico Ribeiro <fr@tecompanytea.com>
											
										
										
											2026-03-21 09:38:52 -07:00
+								            if adapter.fatal_error_retryable:
 								                self._exit_with_failure = True
 								                logger.error("No connected messaging platforms remain. Shutting down gateway for service restart.")
 								            else:
 								                logger.error("No connected messaging platforms remain. Shutting down gateway cleanly.")
-												fix: preserve current gateway update and startup behavior

Follow up on salvaged PR #1052.
Restore current-main gateway lifecycle handling after conflict resolution and
adapt the update fallback to use shell-quoted argv parts safely.

											
										
										
											2026-03-14 18:03:50 -07:00
+								            await self.stop()
-												feat: auto-reconnect failed gateway platforms with exponential backoff (#2584)

When a messaging platform fails to connect at startup (e.g. transient DNS
failure) or disconnects at runtime with a retryable error, the gateway now
queues it for background reconnection instead of giving up permanently.

- New _platform_reconnect_watcher background task runs alongside the
  existing session expiry watcher
- Exponential backoff: 30s, 60s, 120s, 240s, 300s cap
- Max 20 retry attempts before giving up on a platform
- Non-retryable errors (bad auth token, etc.) are not retried
- Runtime disconnections via _handle_adapter_fatal_error now queue
  retryable failures instead of triggering gateway shutdown
- On successful reconnect, adapter is wired up and channel directory
  is rebuilt automatically

Fixes the case where a DNS blip during gateway startup caused Telegram
and Discord to be permanently unavailable until manual restart.
											
										
										
											2026-03-22 23:48:24 -07:00
+								        elif not self.adapters and self._failed_platforms:
-												fix(gateway): exit with failure when all platforms fail with retryable errors (#3592)

When all messaging platforms exhaust retries and get queued for background
reconnection, exit with code 1 so systemd Restart=on-failure can restart
the process. Previously the gateway stayed alive as a zombie with no
connected platforms and exit code 0.

Salvaged from PR #3567 by kelsia14. Test updates added.

Co-authored-by: kelsia14 <kelsia14@users.noreply.github.com>
											
										
										
											2026-03-28 14:25:12 -07:00
+								            # All platforms are down and queued for background reconnection.
 								            # If the error is retryable, exit with failure so systemd Restart=on-failure
 								            # can restart the process. Otherwise stay alive and keep retrying in background.
 								            if adapter.fatal_error_retryable:
 								                self._exit_reason = adapter.fatal_error_message or "All messaging platforms failed with retryable errors"
 								                self._exit_with_failure = True
 								                logger.error(
 								                    "All messaging platforms failed with retryable errors. "
 								                    "Shutting down gateway for service restart (systemd will retry)."
 								                )
 								                await self.stop()
 								            else:
 								                logger.warning(
 								                    "No connected messaging platforms remain, but %d platform(s) queued for reconnection",
 								                    len(self._failed_platforms),
 								                )
-												fix: preserve current gateway update and startup behavior

Follow up on salvaged PR #1052.
Restore current-main gateway lifecycle handling after conflict resolution and
adapt the update fallback to use shell-quoted argv parts safely.

											
										
										
											2026-03-14 18:03:50 -07:00
 								    def _request_clean_exit(self, reason: str) -> None:
 								        self._exit_cleanly = True
 								        self._exit_reason = reason
 								        self._shutdown_event.set()
-												fix(gateway): drain in-flight work before restart

											
										
										
											2026-04-10 10:19:17 -07:00
 								    def _running_agent_count(self) -> int:
 								        return len(self._running_agents)
 								    def _status_action_label(self) -> str:
 								        return "restart" if self._restart_requested else "shutdown"
 								    def _status_action_gerund(self) -> str:
 								        return "restarting" if self._restart_requested else "shutting down"
 								    def _queue_during_drain_enabled(self) -> bool:
 								        return self._restart_requested and self._busy_input_mode == "queue"
 								    def _update_runtime_status(self, gateway_state: Optional[str] = None, exit_reason: Optional[str] = None) -> None:
 								        try:
 								            from gateway.status import write_runtime_status
 								            write_runtime_status(
 								                gateway_state=gateway_state,
 								                exit_reason=exit_reason,
 								                restart_requested=self._restart_requested,
 								                active_agents=self._running_agent_count(),
 								            )
 								        except Exception:
 								            pass
-												fix(discord): decouple readiness from slash sync

											
										
										
											2026-04-11 17:02:03 -06:00
 								    def _update_platform_runtime_status(
 								        self,
 								        platform: str,
 								        *,
 								        platform_state: Optional[str] = None,
 								        error_code: Optional[str] = None,
 								        error_message: Optional[str] = None,
 								    ) -> None:
 								        try:
 								            from gateway.status import write_runtime_status
 								            write_runtime_status(
 								                platform=platform,
 								                platform_state=platform_state,
 								                error_code=error_code,
 								                error_message=error_message,
 								            )
 								        except Exception:
 								            pass
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
-												feat: add ephemeral prefill messages and system prompt loading

- Implemented functionality to load ephemeral prefill messages from a JSON file, enhancing few-shot priming capabilities for the agent.
- Introduced a mechanism to load an ephemeral system prompt from environment variables or configuration files, ensuring dynamic prompt adjustments at API-call time.
- Updated the CLI and agent initialization to utilize the new prefill messages and system prompt, improving the overall interaction experience.
- Enhanced configuration options with new environment variables for prefill messages and system prompts, allowing for greater customization without persistence.

											
										
										
											2026-02-23 23:55:42 -08:00
+								    @staticmethod
 								    def _load_prefill_messages() -> List[Dict[str, Any]]:
 								        """Load ephemeral prefill messages from config or env var.
 								        Checks HERMES_PREFILL_MESSAGES_FILE env var first, then falls back to
 								        the prefill_messages_file key in ~/.hermes/config.yaml.
 								        Relative paths are resolved from ~/.hermes/.
 								        """
 								        import json as _json
 								        file_path = os.getenv("HERMES_PREFILL_MESSAGES_FILE", "")
 								        if not file_path:
 								            try:
 								                import yaml as _y
-												fix: respect HERMES_HOME env var in gateway and cron scheduler

Both entry points hardcoded Path.home() / ".hermes" for .env, config.yaml,
logs, and lock files. Now uses _hermes_home which reads HERMES_HOME env var
with ~/.hermes as default, matching cli.py and run_agent.py.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-26 18:51:46 +11:00
+								                cfg_path = _hermes_home / "config.yaml"
-												feat: add ephemeral prefill messages and system prompt loading

- Implemented functionality to load ephemeral prefill messages from a JSON file, enhancing few-shot priming capabilities for the agent.
- Introduced a mechanism to load an ephemeral system prompt from environment variables or configuration files, ensuring dynamic prompt adjustments at API-call time.
- Updated the CLI and agent initialization to utilize the new prefill messages and system prompt, improving the overall interaction experience.
- Enhanced configuration options with new environment variables for prefill messages and system prompts, allowing for greater customization without persistence.

											
										
										
											2026-02-23 23:55:42 -08:00
+								                if cfg_path.exists():
-												Add explicit encoding="utf-8" to all config/data file open() calls

On Windows, open() defaults to the system locale encoding (cp1252,
cp1254, etc.) rather than UTF-8. This breaks any file containing
non-ASCII characters, and also causes crashes when writing JSON with
ensure_ascii=False.

This adds encoding="utf-8" to open() calls in:
- gateway/run.py (config.yaml reads/writes throughout)
- gateway/config.py (gateway.json and config.yaml)
- hermes_cli/config.py (config.yaml load/save)
- hermes_cli/main.py (session export with ensure_ascii=False)
- hermes_cli/status.py (jobs.json and sessions.json)

											
										
										
											2026-03-05 17:04:33 -05:00
+								                    with open(cfg_path, encoding="utf-8") as _f:
-												feat: add ephemeral prefill messages and system prompt loading

- Implemented functionality to load ephemeral prefill messages from a JSON file, enhancing few-shot priming capabilities for the agent.
- Introduced a mechanism to load an ephemeral system prompt from environment variables or configuration files, ensuring dynamic prompt adjustments at API-call time.
- Updated the CLI and agent initialization to utilize the new prefill messages and system prompt, improving the overall interaction experience.
- Enhanced configuration options with new environment variables for prefill messages and system prompts, allowing for greater customization without persistence.

											
										
										
											2026-02-23 23:55:42 -08:00
+								                        cfg = _y.safe_load(_f) or {}
 								                    file_path = cfg.get("prefill_messages_file", "")
 								            except Exception:
 								                pass
 								        if not file_path:
 								            return []
 								        path = Path(file_path).expanduser()
 								        if not path.is_absolute():
-												fix: respect HERMES_HOME env var in gateway and cron scheduler

Both entry points hardcoded Path.home() / ".hermes" for .env, config.yaml,
logs, and lock files. Now uses _hermes_home which reads HERMES_HOME env var
with ~/.hermes as default, matching cli.py and run_agent.py.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-26 18:51:46 +11:00
+								            path = _hermes_home / path
-												feat: add ephemeral prefill messages and system prompt loading

- Implemented functionality to load ephemeral prefill messages from a JSON file, enhancing few-shot priming capabilities for the agent.
- Introduced a mechanism to load an ephemeral system prompt from environment variables or configuration files, ensuring dynamic prompt adjustments at API-call time.
- Updated the CLI and agent initialization to utilize the new prefill messages and system prompt, improving the overall interaction experience.
- Enhanced configuration options with new environment variables for prefill messages and system prompts, allowing for greater customization without persistence.

											
										
										
											2026-02-23 23:55:42 -08:00
+								        if not path.exists():
 								            logger.warning("Prefill messages file not found: %s", path)
 								            return []
 								        try:
 								            with open(path, "r", encoding="utf-8") as f:
 								                data = _json.load(f)
 								            if not isinstance(data, list):
 								                logger.warning("Prefill messages file must contain a JSON array: %s", path)
 								                return []
 								            return data
 								        except Exception as e:
 								            logger.warning("Failed to load prefill messages from %s: %s", path, e)
 								            return []
 								    @staticmethod
 								    def _load_ephemeral_system_prompt() -> str:
 								        """Load ephemeral system prompt from config or env var.
 								        Checks HERMES_EPHEMERAL_SYSTEM_PROMPT env var first, then falls back to
 								        agent.system_prompt in ~/.hermes/config.yaml.
 								        """
 								        prompt = os.getenv("HERMES_EPHEMERAL_SYSTEM_PROMPT", "")
 								        if prompt:
 								            return prompt
 								        try:
 								            import yaml as _y
-												fix: respect HERMES_HOME env var in gateway and cron scheduler

Both entry points hardcoded Path.home() / ".hermes" for .env, config.yaml,
logs, and lock files. Now uses _hermes_home which reads HERMES_HOME env var
with ~/.hermes as default, matching cli.py and run_agent.py.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-26 18:51:46 +11:00
+								            cfg_path = _hermes_home / "config.yaml"
-												feat: add ephemeral prefill messages and system prompt loading

- Implemented functionality to load ephemeral prefill messages from a JSON file, enhancing few-shot priming capabilities for the agent.
- Introduced a mechanism to load an ephemeral system prompt from environment variables or configuration files, ensuring dynamic prompt adjustments at API-call time.
- Updated the CLI and agent initialization to utilize the new prefill messages and system prompt, improving the overall interaction experience.
- Enhanced configuration options with new environment variables for prefill messages and system prompts, allowing for greater customization without persistence.

											
										
										
											2026-02-23 23:55:42 -08:00
+								            if cfg_path.exists():
-												Add explicit encoding="utf-8" to all config/data file open() calls

On Windows, open() defaults to the system locale encoding (cp1252,
cp1254, etc.) rather than UTF-8. This breaks any file containing
non-ASCII characters, and also causes crashes when writing JSON with
ensure_ascii=False.

This adds encoding="utf-8" to open() calls in:
- gateway/run.py (config.yaml reads/writes throughout)
- gateway/config.py (gateway.json and config.yaml)
- hermes_cli/config.py (config.yaml load/save)
- hermes_cli/main.py (session export with ensure_ascii=False)
- hermes_cli/status.py (jobs.json and sessions.json)

											
										
										
											2026-03-05 17:04:33 -05:00
+								                with open(cfg_path, encoding="utf-8") as _f:
-												feat: add ephemeral prefill messages and system prompt loading

- Implemented functionality to load ephemeral prefill messages from a JSON file, enhancing few-shot priming capabilities for the agent.
- Introduced a mechanism to load an ephemeral system prompt from environment variables or configuration files, ensuring dynamic prompt adjustments at API-call time.
- Updated the CLI and agent initialization to utilize the new prefill messages and system prompt, improving the overall interaction experience.
- Enhanced configuration options with new environment variables for prefill messages and system prompts, allowing for greater customization without persistence.

											
										
										
											2026-02-23 23:55:42 -08:00
+								                    cfg = _y.safe_load(_f) or {}
 								                return (cfg.get("agent", {}).get("system_prompt", "") or "").strip()
 								        except Exception:
 								            pass
 								        return ""
-												feat: add reasoning effort configuration for agent

- Introduced a new configuration option for reasoning effort in the CLI, allowing users to specify the level of reasoning the agent should perform before responding.
- Updated the CLI and agent initialization to incorporate the reasoning configuration, enhancing the agent's responsiveness and adaptability.
- Implemented logic to load reasoning effort from environment variables and configuration files, providing flexibility in agent behavior.
- Enhanced the documentation in the example configuration file to clarify the new reasoning effort options available.

											
										
										
											2026-02-24 03:30:19 -08:00
+								    @staticmethod
 								    def _load_reasoning_config() -> dict | None:
-												fix: unify reasoning_effort to config.yaml only, remove HERMES_REASONING_EFFORT env var

Gateway and cron had inconsistent reasoning_effort resolution:
- CLI: config.yaml only (correct)
- Gateway: config.yaml first, env var fallback
- Cron: env var first, config.yaml fallback

All three now read exclusively from agent.reasoning_effort in config.yaml.
Removed HERMES_REASONING_EFFORT env var support entirely — .env is for
secrets only, not behavioral config.
											
										
										
											2026-04-08 03:36:44 -07:00
+								        """Load reasoning effort from config.yaml.
-												feat(gateway): add reasoning hot reload

Add a /reasoning command across gateway adapters so users can
inspect or change reasoning effort without editing config by hand.

Reload reasoning settings from config.yaml before each agent run,
including background tasks, so the next message picks up the new
value consistently.

											
										
										
											2026-03-11 22:12:11 +08:00
-												fix: normalize remaining reasoning effort orderings and add missing 'minimal'

Follow-up to cherry-picked PR #6698. Fixes spots the original PR missed:
- hermes_constants.py: VALID_REASONING_EFFORTS tuple ordering
- gateway/run.py: _load_reasoning_config docstring + validation tuple
- configuration.md and batch-processing.md: docs ordering
- hermes-agent skill: /reasoning usage hint was missing 'minimal'

											
										
										
											2026-04-09 13:27:02 -07:00
+								        Reads agent.reasoning_effort from config.yaml. Valid: "none",
 								        "minimal", "low", "medium", "high", "xhigh". Returns None to use
-												fix: unify reasoning_effort to config.yaml only, remove HERMES_REASONING_EFFORT env var

Gateway and cron had inconsistent reasoning_effort resolution:
- CLI: config.yaml only (correct)
- Gateway: config.yaml first, env var fallback
- Cron: env var first, config.yaml fallback

All three now read exclusively from agent.reasoning_effort in config.yaml.
Removed HERMES_REASONING_EFFORT env var support entirely — .env is for
secrets only, not behavioral config.
											
										
										
											2026-04-08 03:36:44 -07:00
+								        default (medium).
-												feat: add reasoning effort configuration for agent

- Introduced a new configuration option for reasoning effort in the CLI, allowing users to specify the level of reasoning the agent should perform before responding.
- Updated the CLI and agent initialization to incorporate the reasoning configuration, enhancing the agent's responsiveness and adaptability.
- Implemented logic to load reasoning effort from environment variables and configuration files, providing flexibility in agent behavior.
- Enhanced the documentation in the example configuration file to clarify the new reasoning effort options available.

											
										
										
											2026-02-24 03:30:19 -08:00
+								        """
-												refactor: consolidate get_hermes_home() and parse_reasoning_effort() (#3062)

Centralizes two widely-duplicated patterns into hermes_constants.py:

1. get_hermes_home() — Path resolution for ~/.hermes (HERMES_HOME env var)
   - Was copy-pasted inline across 30+ files as:
     Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
   - Now defined once in hermes_constants.py (zero-dependency module)
   - hermes_cli/config.py re-exports it for backward compatibility
   - Removed local wrapper functions in honcho_integration/client.py,
     tools/website_policy.py, tools/tirith_security.py, hermes_cli/uninstall.py

2. parse_reasoning_effort() — Reasoning effort string validation
   - Was copy-pasted in cli.py, gateway/run.py, cron/scheduler.py
   - Same validation logic: check against (xhigh, high, medium, low, minimal, none)
   - Now defined once in hermes_constants.py, called from all 3 locations
   - Warning log for unknown values kept at call sites (context-specific)

31 files changed, net +31 lines (125 insertions, 94 deletions)
Full test suite: 6179 passed, 0 failed
											
										
										
											2026-03-25 15:54:28 -07:00
+								        from hermes_constants import parse_reasoning_effort
-												feat(gateway): add reasoning hot reload

Add a /reasoning command across gateway adapters so users can
inspect or change reasoning effort without editing config by hand.

Reload reasoning settings from config.yaml before each agent run,
including background tasks, so the next message picks up the new
value consistently.

											
										
										
											2026-03-11 22:12:11 +08:00
+								        effort = ""
 								        try:
 								            import yaml as _y
 								            cfg_path = _hermes_home / "config.yaml"
 								            if cfg_path.exists():
 								                with open(cfg_path, encoding="utf-8") as _f:
 								                    cfg = _y.safe_load(_f) or {}
 								                effort = str(cfg.get("agent", {}).get("reasoning_effort", "") or "").strip()
 								        except Exception:
 								            pass
-												refactor: consolidate get_hermes_home() and parse_reasoning_effort() (#3062)

Centralizes two widely-duplicated patterns into hermes_constants.py:

1. get_hermes_home() — Path resolution for ~/.hermes (HERMES_HOME env var)
   - Was copy-pasted inline across 30+ files as:
     Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
   - Now defined once in hermes_constants.py (zero-dependency module)
   - hermes_cli/config.py re-exports it for backward compatibility
   - Removed local wrapper functions in honcho_integration/client.py,
     tools/website_policy.py, tools/tirith_security.py, hermes_cli/uninstall.py

2. parse_reasoning_effort() — Reasoning effort string validation
   - Was copy-pasted in cli.py, gateway/run.py, cron/scheduler.py
   - Same validation logic: check against (xhigh, high, medium, low, minimal, none)
   - Now defined once in hermes_constants.py, called from all 3 locations
   - Warning log for unknown values kept at call sites (context-specific)

31 files changed, net +31 lines (125 insertions, 94 deletions)
Full test suite: 6179 passed, 0 failed
											
										
										
											2026-03-25 15:54:28 -07:00
+								        result = parse_reasoning_effort(effort)
 								        if effort and effort.strip() and result is None:
 								            logger.warning("Unknown reasoning_effort '%s', using default (medium)", effort)
 								        return result
-												feat: add reasoning effort configuration for agent

- Introduced a new configuration option for reasoning effort in the CLI, allowing users to specify the level of reasoning the agent should perform before responding.
- Updated the CLI and agent initialization to incorporate the reasoning configuration, enhancing the agent's responsiveness and adaptability.
- Implemented logic to load reasoning effort from environment variables and configuration files, providing flexibility in agent behavior.
- Enhanced the documentation in the example configuration file to clarify the new reasoning effort options available.

											
										
										
											2026-02-24 03:30:19 -08:00
-												feat(gateway): add fast mode support to gateway chats

											
										
										
											2026-04-10 08:32:56 +01:00
+								    @staticmethod
 								    def _load_service_tier() -> str | None:
 								        """Load Priority Processing setting from config.yaml.
 								        Reads agent.service_tier from config.yaml. Accepted values mirror the CLI:
 								        "fast"/"priority"/"on" => "priority", while "normal"/"off" disables it.
 								        Returns None when unset or unsupported.
 								        """
 								        raw = ""
 								        try:
 								            import yaml as _y
 								            cfg_path = _hermes_home / "config.yaml"
 								            if cfg_path.exists():
 								                with open(cfg_path, encoding="utf-8") as _f:
 								                    cfg = _y.safe_load(_f) or {}
 								                raw = str(cfg.get("agent", {}).get("service_tier", "") or "").strip()
 								        except Exception:
 								            pass
 								        value = raw.lower()
 								        if not value or value in {"normal", "default", "standard", "off", "none"}:
 								            return None
 								        if value in {"fast", "priority", "on"}:
 								            return "priority"
 								        logger.warning("Unknown service_tier '%s', ignoring", raw)
 								        return None
-												fix: /reasoning command — add gateway support, fix display, persist settings (#1031)

* fix: /reasoning command output ordering, display, and inline think extraction

Three issues with the /reasoning command:

1. Output interleaving: The command echo used print() while feedback
   used _cprint(), causing them to render out-of-order under
   prompt_toolkit's patch_stdout. Changed echo to use _cprint() so
   all output renders through the same path in correct order.

2. Reasoning display not working: /reasoning show toggled a flag
   but reasoning never appeared for models that embed thinking in
   inline <think> blocks rather than structured API fields. Added
   fallback extraction in _build_assistant_message to capture
   <think> block content as reasoning when no structured reasoning
   fields (reasoning, reasoning_content, reasoning_details) are
   present. This feeds into both the reasoning callback (during
   tool loops) and the post-response reasoning box display.

3. Feedback clarity: Added checkmarks to confirm actions, persisted
   show/hide to config (was session-only before), and aligned the
   status display for readability.

Tests: 7 new tests for inline think block extraction (41 total).

* feat: add /reasoning command to gateway (Telegram/Discord/etc)

The /reasoning command only existed in the CLI — messaging platforms
had no way to view or change reasoning settings. This adds:

1. /reasoning command handler in the gateway:
   - No args: shows current effort level and display state
   - /reasoning <level>: sets reasoning effort (none/low/medium/high/xhigh)
   - /reasoning show|hide: toggles reasoning display in responses
   - All changes saved to config.yaml immediately

2. Reasoning display in gateway responses:
   - When show_reasoning is enabled, prepends a 'Reasoning' block
     with the model's last_reasoning content before the response
   - Collapses long reasoning (>15 lines) to keep messages readable
   - Uses last_reasoning from run_conversation result dict

3. Plumbing:
   - Added _show_reasoning attribute loaded from config at startup
   - Propagated last_reasoning through _run_agent return dict
   - Added /reasoning to help text and known_commands set
   - Uses getattr for _show_reasoning to handle test stubs
											
										
										
											2026-03-12 05:38:19 -07:00
+								    @staticmethod
 								    def _load_show_reasoning() -> bool:
 								        """Load show_reasoning toggle from config.yaml display section."""
 								        try:
 								            import yaml as _y
 								            cfg_path = _hermes_home / "config.yaml"
 								            if cfg_path.exists():
 								                with open(cfg_path, encoding="utf-8") as _f:
 								                    cfg = _y.safe_load(_f) or {}
 								                return bool(cfg.get("display", {}).get("show_reasoning", False))
 								        except Exception:
 								            pass
 								        return False
-												fix(gateway): drain in-flight work before restart

											
										
										
											2026-04-10 10:19:17 -07:00
+								    @staticmethod
 								    def _load_busy_input_mode() -> str:
 								        """Load gateway drain-time busy-input behavior from config/env."""
 								        mode = os.getenv("HERMES_GATEWAY_BUSY_INPUT_MODE", "").strip().lower()
 								        if not mode:
 								            try:
 								                import yaml as _y
 								                cfg_path = _hermes_home / "config.yaml"
 								                if cfg_path.exists():
 								                    with open(cfg_path, encoding="utf-8") as _f:
 								                        cfg = _y.safe_load(_f) or {}
 								                    mode = str(cfg.get("display", {}).get("busy_input_mode", "") or "").strip().lower()
 								            except Exception:
 								                pass
 								        return "queue" if mode == "queue" else "interrupt"
 								    @staticmethod
 								    def _load_restart_drain_timeout() -> float:
 								        """Load graceful gateway restart/stop drain timeout in seconds."""
 								        raw = os.getenv("HERMES_RESTART_DRAIN_TIMEOUT", "").strip()
 								        if not raw:
 								            try:
 								                import yaml as _y
 								                cfg_path = _hermes_home / "config.yaml"
 								                if cfg_path.exists():
 								                    with open(cfg_path, encoding="utf-8") as _f:
 								                        cfg = _y.safe_load(_f) or {}
 								                    raw = str(cfg.get("agent", {}).get("restart_drain_timeout", "") or "").strip()
 								            except Exception:
 								                pass
-												fix(gateway): address restart review feedback

											
										
										
											2026-04-10 14:00:21 -07:00
+								        value = parse_restart_drain_timeout(raw)
 								        if raw and value == DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT:
 								            try:
 								                float(raw)
 								            except (TypeError, ValueError):
 								                logger.warning(
 								                    "Invalid restart_drain_timeout '%s', using default %.0fs",
 								                    raw,
 								                    DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT,
 								                )
 								        return value
-												fix(gateway): drain in-flight work before restart

											
										
										
											2026-04-10 10:19:17 -07:00
-												feat(gateway): configurable background process watcher notifications

Add display.background_process_notifications config option to control
how chatty the gateway process watcher is when using
terminal(background=true, check_interval=...) from messaging platforms.

Modes:
  - all:    running-output updates + final message (default, current behavior)
  - result: only the final completion message
  - error:  only the final message when exit code != 0
  - off:    no watcher messages at all

Also supports HERMES_BACKGROUND_NOTIFICATIONS env var override.

Includes 12 tests (5 config loading + 7 watcher behavior).

Inspired by @PeterFile's PR #593. Closes #592.

											
										
										
											2026-03-10 04:12:39 -07:00
+								    @staticmethod
 								    def _load_background_notifications_mode() -> str:
 								        """Load background process notification mode from config or env var.
 								        Modes:
 								          - ``all``    — push running-output updates *and* the final message (default)
 								          - ``result`` — only the final completion message (regardless of exit code)
 								          - ``error``  — only the final message when exit code is non-zero
 								          - ``off``    — no watcher messages at all
 								        """
 								        mode = os.getenv("HERMES_BACKGROUND_NOTIFICATIONS", "")
 								        if not mode:
 								            try:
 								                import yaml as _y
 								                cfg_path = _hermes_home / "config.yaml"
 								                if cfg_path.exists():
 								                    with open(cfg_path, encoding="utf-8") as _f:
 								                        cfg = _y.safe_load(_f) or {}
 								                    raw = cfg.get("display", {}).get("background_process_notifications")
 								                    if raw is False:
 								                        mode = "off"
 								                    elif raw not in (None, ""):
 								                        mode = str(raw)
 								            except Exception:
 								                pass
 								        mode = (mode or "all").strip().lower()
 								        valid = {"all", "result", "error", "off"}
 								        if mode not in valid:
 								            logger.warning(
 								                "Unknown background_process_notifications '%s', defaulting to 'all'",
 								                mode,
 								            )
 								            return "all"
 								        return mode
-												feat(provider-routing): add OpenRouter provider routing configuration

Introduced a new `provider_routing` section in the CLI configuration to control how requests are routed across providers when using OpenRouter. This includes options for sorting providers by throughput, latency, or price, as well as allowing or ignoring specific providers, setting the order of provider attempts, and managing data collection policies. Updated relevant classes and documentation to support these features, enhancing flexibility in provider selection.

											
										
										
											2026-03-01 18:24:27 -08:00
+								    @staticmethod
 								    def _load_provider_routing() -> dict:
 								        """Load OpenRouter provider routing preferences from config.yaml."""
 								        try:
 								            import yaml as _y
 								            cfg_path = _hermes_home / "config.yaml"
 								            if cfg_path.exists():
-												Add explicit encoding="utf-8" to all config/data file open() calls

On Windows, open() defaults to the system locale encoding (cp1252,
cp1254, etc.) rather than UTF-8. This breaks any file containing
non-ASCII characters, and also causes crashes when writing JSON with
ensure_ascii=False.

This adds encoding="utf-8" to open() calls in:
- gateway/run.py (config.yaml reads/writes throughout)
- gateway/config.py (gateway.json and config.yaml)
- hermes_cli/config.py (config.yaml load/save)
- hermes_cli/main.py (session export with ensure_ascii=False)
- hermes_cli/status.py (jobs.json and sessions.json)

											
										
										
											2026-03-05 17:04:33 -05:00
+								                with open(cfg_path, encoding="utf-8") as _f:
-												feat(provider-routing): add OpenRouter provider routing configuration

Introduced a new `provider_routing` section in the CLI configuration to control how requests are routed across providers when using OpenRouter. This includes options for sorting providers by throughput, latency, or price, as well as allowing or ignoring specific providers, setting the order of provider attempts, and managing data collection policies. Updated relevant classes and documentation to support these features, enhancing flexibility in provider selection.

											
										
										
											2026-03-01 18:24:27 -08:00
+								                    cfg = _y.safe_load(_f) or {}
 								                return cfg.get("provider_routing", {}) or {}
 								        except Exception:
 								            pass
 								        return {}
-												feat: simple fallback model for provider resilience

When the primary model/provider fails after retries (rate limit, overload,
auth errors, connection failures), Hermes automatically switches to a
configured fallback model for the remainder of the session.

Config (in ~/.hermes/config.yaml):

  fallback_model:
    provider: openrouter
    model: anthropic/claude-sonnet-4

Supports all major providers: OpenRouter, OpenAI, Nous, DeepSeek, Together,
Groq, Fireworks, Mistral, Gemini — plus custom endpoints via base_url and
api_key_env overrides.

Design principles:
- Dead simple: one fallback model, not a chain
- One-shot: switches once, doesn't ping-pong back
- Zero new dependencies: uses existing OpenAI client
- Minimal code: ~100 lines in run_agent.py, ~5 lines in cli.py/gateway
- Three trigger points: max retries exhausted, non-retryable client errors,
  and invalid response exhaustion

Does NOT trigger on context overflow or payload-too-large errors (those
are handled by the existing compression system).

Addresses #737.

25 new tests, 2492 total passing.

											
										
										
											2026-03-08 20:22:33 -07:00
+								    @staticmethod
-												feat(providers): add ordered fallback provider chain (salvage #1761) (#3813)

Extends the single fallback_model mechanism into an ordered chain.
When the primary model fails, Hermes tries each fallback provider in
sequence until one succeeds or the chain is exhausted.

Config format (new):
  fallback_providers:
    - provider: openrouter
      model: anthropic/claude-sonnet-4
    - provider: openai
      model: gpt-4o

Legacy single-dict fallback_model format still works unchanged.

Key fix vs original PR: the call sites in the retry loop now use
_fallback_index < len(_fallback_chain) instead of the old one-shot
_fallback_activated guard, so the chain actually advances through
all configured providers.

Changes:
- run_agent.py: _fallback_chain list + _fallback_index replaces
  one-shot _fallback_model; _try_activate_fallback() advances
  through chain; failed provider resolution skips to next entry;
  call sites updated to allow chain advancement
- cli.py: reads fallback_providers with legacy fallback_model compat
- gateway/run.py: same
- hermes_cli/config.py: fallback_providers: [] in DEFAULT_CONFIG
- tests: 12 new chain tests + 6 existing test fixtures updated

Co-authored-by: uzaylisak <uzaylisak@users.noreply.github.com>
											
										
										
											2026-03-29 16:04:53 -07:00
+								    def _load_fallback_model() -> list | dict | None:
 								        """Load fallback provider chain from config.yaml.
-												feat: simple fallback model for provider resilience

When the primary model/provider fails after retries (rate limit, overload,
auth errors, connection failures), Hermes automatically switches to a
configured fallback model for the remainder of the session.

Config (in ~/.hermes/config.yaml):

  fallback_model:
    provider: openrouter
    model: anthropic/claude-sonnet-4

Supports all major providers: OpenRouter, OpenAI, Nous, DeepSeek, Together,
Groq, Fireworks, Mistral, Gemini — plus custom endpoints via base_url and
api_key_env overrides.

Design principles:
- Dead simple: one fallback model, not a chain
- One-shot: switches once, doesn't ping-pong back
- Zero new dependencies: uses existing OpenAI client
- Minimal code: ~100 lines in run_agent.py, ~5 lines in cli.py/gateway
- Three trigger points: max retries exhausted, non-retryable client errors,
  and invalid response exhaustion

Does NOT trigger on context overflow or payload-too-large errors (those
are handled by the existing compression system).

Addresses #737.

25 new tests, 2492 total passing.

											
										
										
											2026-03-08 20:22:33 -07:00
-												feat(providers): add ordered fallback provider chain (salvage #1761) (#3813)

Extends the single fallback_model mechanism into an ordered chain.
When the primary model fails, Hermes tries each fallback provider in
sequence until one succeeds or the chain is exhausted.

Config format (new):
  fallback_providers:
    - provider: openrouter
      model: anthropic/claude-sonnet-4
    - provider: openai
      model: gpt-4o

Legacy single-dict fallback_model format still works unchanged.

Key fix vs original PR: the call sites in the retry loop now use
_fallback_index < len(_fallback_chain) instead of the old one-shot
_fallback_activated guard, so the chain actually advances through
all configured providers.

Changes:
- run_agent.py: _fallback_chain list + _fallback_index replaces
  one-shot _fallback_model; _try_activate_fallback() advances
  through chain; failed provider resolution skips to next entry;
  call sites updated to allow chain advancement
- cli.py: reads fallback_providers with legacy fallback_model compat
- gateway/run.py: same
- hermes_cli/config.py: fallback_providers: [] in DEFAULT_CONFIG
- tests: 12 new chain tests + 6 existing test fixtures updated

Co-authored-by: uzaylisak <uzaylisak@users.noreply.github.com>
											
										
										
											2026-03-29 16:04:53 -07:00
+								        Returns a list of provider dicts (``fallback_providers``), a single
 								        dict (legacy ``fallback_model``), or None if not configured.
 								        AIAgent.__init__ normalizes both formats into a chain.
-												feat: simple fallback model for provider resilience

When the primary model/provider fails after retries (rate limit, overload,
auth errors, connection failures), Hermes automatically switches to a
configured fallback model for the remainder of the session.

Config (in ~/.hermes/config.yaml):

  fallback_model:
    provider: openrouter
    model: anthropic/claude-sonnet-4

Supports all major providers: OpenRouter, OpenAI, Nous, DeepSeek, Together,
Groq, Fireworks, Mistral, Gemini — plus custom endpoints via base_url and
api_key_env overrides.

Design principles:
- Dead simple: one fallback model, not a chain
- One-shot: switches once, doesn't ping-pong back
- Zero new dependencies: uses existing OpenAI client
- Minimal code: ~100 lines in run_agent.py, ~5 lines in cli.py/gateway
- Three trigger points: max retries exhausted, non-retryable client errors,
  and invalid response exhaustion

Does NOT trigger on context overflow or payload-too-large errors (those
are handled by the existing compression system).

Addresses #737.

25 new tests, 2492 total passing.

											
										
										
											2026-03-08 20:22:33 -07:00
+								        """
 								        try:
 								            import yaml as _y
 								            cfg_path = _hermes_home / "config.yaml"
 								            if cfg_path.exists():
-												Merge PR #458: Add explicit UTF-8 encoding to config/data file I/O

Authored by shitcoinsherpa. Adds encoding='utf-8' to all text-mode
open() calls in gateway/run.py, gateway/config.py, hermes_cli/config.py,
hermes_cli/main.py, and hermes_cli/status.py. Prevents encoding errors
on Windows where the default locale is not UTF-8.

Also fixed 4 additional open() calls in gateway/run.py that were added
after the PR branch was created.

											
										
										
											2026-03-09 21:19:20 -07:00
+								                with open(cfg_path, encoding="utf-8") as _f:
-												feat: simple fallback model for provider resilience

When the primary model/provider fails after retries (rate limit, overload,
auth errors, connection failures), Hermes automatically switches to a
configured fallback model for the remainder of the session.

Config (in ~/.hermes/config.yaml):

  fallback_model:
    provider: openrouter
    model: anthropic/claude-sonnet-4

Supports all major providers: OpenRouter, OpenAI, Nous, DeepSeek, Together,
Groq, Fireworks, Mistral, Gemini — plus custom endpoints via base_url and
api_key_env overrides.

Design principles:
- Dead simple: one fallback model, not a chain
- One-shot: switches once, doesn't ping-pong back
- Zero new dependencies: uses existing OpenAI client
- Minimal code: ~100 lines in run_agent.py, ~5 lines in cli.py/gateway
- Three trigger points: max retries exhausted, non-retryable client errors,
  and invalid response exhaustion

Does NOT trigger on context overflow or payload-too-large errors (those
are handled by the existing compression system).

Addresses #737.

25 new tests, 2492 total passing.

											
										
										
											2026-03-08 20:22:33 -07:00
+								                    cfg = _y.safe_load(_f) or {}
-												feat(providers): add ordered fallback provider chain (salvage #1761) (#3813)

Extends the single fallback_model mechanism into an ordered chain.
When the primary model fails, Hermes tries each fallback provider in
sequence until one succeeds or the chain is exhausted.

Config format (new):
  fallback_providers:
    - provider: openrouter
      model: anthropic/claude-sonnet-4
    - provider: openai
      model: gpt-4o

Legacy single-dict fallback_model format still works unchanged.

Key fix vs original PR: the call sites in the retry loop now use
_fallback_index < len(_fallback_chain) instead of the old one-shot
_fallback_activated guard, so the chain actually advances through
all configured providers.

Changes:
- run_agent.py: _fallback_chain list + _fallback_index replaces
  one-shot _fallback_model; _try_activate_fallback() advances
  through chain; failed provider resolution skips to next entry;
  call sites updated to allow chain advancement
- cli.py: reads fallback_providers with legacy fallback_model compat
- gateway/run.py: same
- hermes_cli/config.py: fallback_providers: [] in DEFAULT_CONFIG
- tests: 12 new chain tests + 6 existing test fixtures updated

Co-authored-by: uzaylisak <uzaylisak@users.noreply.github.com>
											
										
										
											2026-03-29 16:04:53 -07:00
+								                fb = cfg.get("fallback_providers") or cfg.get("fallback_model") or None
 								                if fb:
-												feat: simple fallback model for provider resilience

When the primary model/provider fails after retries (rate limit, overload,
auth errors, connection failures), Hermes automatically switches to a
configured fallback model for the remainder of the session.

Config (in ~/.hermes/config.yaml):

  fallback_model:
    provider: openrouter
    model: anthropic/claude-sonnet-4

Supports all major providers: OpenRouter, OpenAI, Nous, DeepSeek, Together,
Groq, Fireworks, Mistral, Gemini — plus custom endpoints via base_url and
api_key_env overrides.

Design principles:
- Dead simple: one fallback model, not a chain
- One-shot: switches once, doesn't ping-pong back
- Zero new dependencies: uses existing OpenAI client
- Minimal code: ~100 lines in run_agent.py, ~5 lines in cli.py/gateway
- Three trigger points: max retries exhausted, non-retryable client errors,
  and invalid response exhaustion

Does NOT trigger on context overflow or payload-too-large errors (those
are handled by the existing compression system).

Addresses #737.

25 new tests, 2492 total passing.

											
										
										
											2026-03-08 20:22:33 -07:00
+								                    return fb
 								        except Exception:
 								            pass
 								        return None
-												fix: hermes update causes dual gateways on macOS (launchd) (#1567)

* feat: add optional smart model routing

Add a conservative cheap-vs-strong routing option that can send very short/simple turns to a cheaper model across providers while keeping the primary model for complex work. Wire it through CLI, gateway, and cron, and document the config.yaml workflow.

* fix(gateway): remove recursive ExecStop from systemd units, extend TimeoutStopSec to 60s

* fix(gateway): avoid recursive ExecStop in user systemd unit

* fix: extend ExecStop removal and TimeoutStopSec=60 to system unit

The cherry-picked PR #1448 fix only covered the user systemd unit.
The system unit had the same TimeoutStopSec=15 and could benefit
from the same 60s timeout for clean shutdown. Also adds a regression
test for the system unit.

---------

Co-authored-by: Ninja <ninja@local>

* feat(skills): add blender-mcp optional skill for 3D modeling

Control a running Blender instance from Hermes via socket connection
to the blender-mcp addon (port 9876). Supports creating 3D objects,
materials, animations, and running arbitrary bpy code.

Placed in optional-skills/ since it requires Blender 4.3+ desktop
with a third-party addon manually started each session.

* feat(acp): support slash commands in ACP adapter (#1532)

Adds /help, /model, /tools, /context, /reset, /compact, /version
to the ACP adapter (VS Code, Zed, JetBrains). Commands are handled
directly in the server without instantiating the TUI — each command
queries agent/session state and returns plain text.

Unrecognized /commands fall through to the LLM as normal messages.

/model uses detect_provider_for_model() for auto-detection when
switching models, matching the CLI and gateway behavior.

Fixes #1402

* fix(logging): improve error logging in session search tool (#1533)

* fix(gateway): restart on retryable startup failures (#1517)

* feat(email): add skip_attachments option via config.yaml

* feat(email): add skip_attachments option via config.yaml

Adds a config.yaml-driven option to skip email attachments in the
gateway email adapter. Useful for malware protection and bandwidth
savings.

Configure in config.yaml:
  platforms:
    email:
      skip_attachments: true

Based on PR #1521 by @an420eth, changed from env var to config.yaml
(via PlatformConfig.extra) to match the project's config-first pattern.

* docs: document skip_attachments option for email adapter

* fix(telegram): retry on transient TLS failures during connect and send

Add exponential-backoff retry (3 attempts) around initialize() to
handle transient TLS resets during gateway startup. Also catches
TimedOut and OSError in addition to NetworkError.

Add exponential-backoff retry (3 attempts) around send_message() for
NetworkError during message delivery, wrapping the existing Markdown
fallback logic.

Both imports are guarded with try/except ImportError for test
environments where telegram is mocked.

Based on PR #1527 by cmd8. Closes #1526.

* feat: permissive block_anchor thresholds and unicode normalization (#1539)

Salvaged from PR #1528 by an420eth. Closes #517.

Improves _strategy_block_anchor in fuzzy_match.py:
- Add unicode normalization (smart quotes, em/en-dashes, ellipsis,
  non-breaking spaces → ASCII) so LLM-produced unicode artifacts
  don't break anchor line matching
- Lower thresholds: 0.10 for unique matches (was 0.70), 0.30 for
  multiple candidates — if first/last lines match exactly, the
  block is almost certainly correct
- Use original (non-normalized) content for offset calculation to
  preserve correct character positions

Tested: 3 new scenarios fixed (em-dash anchors, non-breaking space
anchors, very-low-similarity unique matches), zero regressions on
all 9 existing fuzzy match tests.

Co-authored-by: an420eth <an420eth@users.noreply.github.com>

* feat(cli): add file path autocomplete in the input prompt (#1545)

When typing a path-like token (./  ../  ~/  /  or containing /),
the CLI now shows filesystem completions in the dropdown menu.
Directories show a trailing slash and 'dir' label; files show
their size. Completions are case-insensitive and capped at 30
entries.

Triggered by tokens like:
  edit ./src/ma     → shows ./src/main.py, ./src/manifest.json, ...
  check ~/doc       → shows ~/docs/, ~/documents/, ...
  read /etc/hos     → shows /etc/hosts, /etc/hostname, ...
  open tools/reg    → shows tools/registry.py

Slash command autocomplete (/help, /model, etc.) is unaffected —
it still triggers when the input starts with /.

Inspired by OpenCode PR #145 (file path completion menu).

Implementation:
- hermes_cli/commands.py: _extract_path_word() detects path-like
  tokens, _path_completions() yields filesystem Completions with
  size labels, get_completions() routes to paths vs slash commands
- tests/hermes_cli/test_path_completion.py: 26 tests covering
  path extraction, prefix filtering, directory markers, home
  expansion, case-insensitivity, integration with slash commands

* feat(privacy): redact PII from LLM context when privacy.redact_pii is enabled

Add privacy.redact_pii config option (boolean, default false). When
enabled, the gateway redacts personally identifiable information from
the system prompt before sending it to the LLM provider:

- Phone numbers (user IDs on WhatsApp/Signal) → hashed to user_<sha256>
- User IDs → hashed to user_<sha256>
- Chat IDs → numeric portion hashed, platform prefix preserved
- Home channel IDs → hashed
- Names/usernames → NOT affected (user-chosen, publicly visible)

Hashes are deterministic (same user → same hash) so the model can
still distinguish users in group chats. Routing and delivery use
the original values internally — redaction only affects LLM context.

Inspired by OpenClaw PR #47959.

* fix(privacy): skip PII redaction on Discord/Slack (mentions need real IDs)

Discord uses <@user_id> for mentions and Slack uses <@U12345> — the LLM
needs the real ID to tag users. Redaction now only applies to WhatsApp,
Signal, and Telegram where IDs are pure routing metadata.

Add 4 platform-specific tests covering Discord, WhatsApp, Signal, Slack.

* feat: smart approvals + /stop command (inspired by OpenAI Codex)

* feat: smart approvals — LLM-based risk assessment for dangerous commands

Adds a 'smart' approval mode that uses the auxiliary LLM to assess
whether a flagged command is genuinely dangerous or a false positive,
auto-approving low-risk commands without prompting the user.

Inspired by OpenAI Codex's Smart Approvals guardian subagent
(openai/codex#13860).

Config (config.yaml):
  approvals:
    mode: manual   # manual (default), smart, off

Modes:
- manual — current behavior, always prompt the user
- smart  — aux LLM evaluates risk: APPROVE (auto-allow), DENY (block),
           or ESCALATE (fall through to manual prompt)
- off    — skip all approval prompts (equivalent to --yolo)

When smart mode auto-approves, the pattern gets session-level approval
so subsequent uses of the same pattern don't trigger another LLM call.
When it denies, the command is blocked without user prompt. When
uncertain, it escalates to the normal manual approval flow.

The LLM prompt is carefully scoped: it sees only the command text and
the flagged reason, assesses actual risk vs false positive, and returns
a single-word verdict.

* feat: make smart approval model configurable via config.yaml

Adds auxiliary.approval section to config.yaml with the same
provider/model/base_url/api_key pattern as other aux tasks (vision,
web_extract, compression, etc.).

Config:
  auxiliary:
    approval:
      provider: auto
      model: ''        # fast/cheap model recommended
      base_url: ''
      api_key: ''

Bridged to env vars in both CLI and gateway paths so the aux client
picks them up automatically.

* feat: add /stop command to kill all background processes

Adds a /stop slash command that kills all running background processes
at once. Currently users have to process(list) then process(kill) for
each one individually.

Inspired by OpenAI Codex's separation of interrupt (Ctrl+C stops current
turn) from /stop (cleans up background processes). See openai/codex#14602.

Ctrl+C continues to only interrupt the active agent turn — background
dev servers, watchers, etc. are preserved. /stop is the explicit way
to clean them all up.

* feat: first-class plugin architecture + hide status bar cost by default (#1544)

The persistent status bar now shows context %, token counts, and
duration but NOT $ cost by default. Cost display is opt-in via:

  display:
    show_cost: true

in config.yaml, or: hermes config set display.show_cost true

The /usage command still shows full cost breakdown since the user
explicitly asked for it — this only affects the always-visible bar.

Status bar without cost:
  ⚕ claude-sonnet-4 │ 12K/200K │ 6% │ 15m

Status bar with show_cost: true:
  ⚕ claude-sonnet-4 │ 12K/200K │ 6% │ $0.06 │ 15m

* feat: improve memory prioritization + aggressive skill updates (inspired by OpenAI Codex)

* feat: improve memory prioritization — user preferences over procedural knowledge

Inspired by OpenAI Codex's memory prompt improvements (openai/codex#14493)
which focus memory writes on user preferences and recurring patterns
rather than procedural task details.

Key insight: 'Optimize for reducing future user steering — the most
valuable memory prevents the user from having to repeat themselves.'

Changes:
- MEMORY_GUIDANCE (prompt_builder.py): added prioritization hierarchy
  and the core principle about reducing user steering
- MEMORY_SCHEMA (memory_tool.py): reordered WHEN TO SAVE list to put
  corrections first, added explicit PRIORITY guidance
- Memory nudge (run_agent.py): now asks specifically about preferences,
  corrections, and workflow patterns instead of generic 'anything'
- Memory flush (run_agent.py): now instructs to prioritize user
  preferences and corrections over task-specific details

* feat: more aggressive skill creation and update prompting

Press harder on skill updates — the agent should proactively patch
skills when it encounters issues during use, not wait to be asked.

Changes:
- SKILLS_GUIDANCE: 'consider saving' → 'save'; added explicit instruction
  to patch skills immediately when found outdated/wrong
- Skills header: added instruction to update loaded skills before finishing
  if they had missing steps or wrong commands
- Skill nudge: more assertive ('save the approach' not 'consider saving'),
  now also prompts for updating existing skills used in the task
- Skill nudge interval: lowered default from 15 to 10 iterations
- skill_manage schema: added 'patch it immediately' to update triggers

* feat: first-class plugin architecture (#1555)

Plugin system for extending Hermes with custom tools, hooks, and
integrations — no source code changes required.

Core system (hermes_cli/plugins.py):
  - Plugin discovery from ~/.hermes/plugins/, .hermes/plugins/, and
    pip entry_points (hermes_agent.plugins group)
  - PluginContext with register_tool() and register_hook()
  - 6 lifecycle hooks: pre/post tool_call, pre/post llm_call,
    on_session_start/end
  - Namespace package handling for relative imports in plugins
  - Graceful error isolation — broken plugins never crash the agent

Integration (model_tools.py):
  - Plugin discovery runs after built-in + MCP tools
  - Plugin tools bypass toolset filter via get_plugin_tool_names()
  - Pre/post tool call hooks fire in handle_function_call()

CLI:
  - /plugins command shows loaded plugins, tool counts, status
  - Added to COMMANDS dict for autocomplete

Docs:
  - Getting started guide (build-a-hermes-plugin.md) — full tutorial
    building a calculator plugin step by step
  - Reference page (features/plugins.md) — quick overview + tables
  - Covers: file structure, schemas, handlers, hooks, data files,
    bundled skills, env var gating, pip distribution, common mistakes

Tests: 16 tests covering discovery, loading, hooks, tool visibility.

* fix: hermes update causes dual gateways on macOS (launchd)

Three bugs worked together to create the dual-gateway problem:

1. cmd_update only checked systemd for gateway restart, completely
   ignoring launchd on macOS. After killing the PID it would print
   'Restart it with: hermes gateway run' even when launchd was about
   to auto-respawn the process.

2. launchd's KeepAlive.SuccessfulExit=false respawns the gateway
   after SIGTERM (non-zero exit), so the user's manual restart
   created a second instance.

3. The launchd plist lacked --replace (systemd had it), so the
   respawned gateway didn't kill stale instances on startup.

Fixes:
- Add --replace to launchd ProgramArguments (matches systemd)
- Add launchd detection to cmd_update's auto-restart logic
- Print 'auto-restart via launchd' instead of manual restart hint

* fix: add launchd plist auto-refresh + explicit restart in cmd_update

Two integration issues with the initial fix:

1. Existing macOS users with old plist (no --replace) would never
   get the fix until manual uninstall/reinstall. Added
   refresh_launchd_plist_if_needed() — mirrors the existing
   refresh_systemd_unit_if_needed(). Called from launchd_start(),
   launchd_restart(), and cmd_update.

2. cmd_update relied on KeepAlive respawn after SIGTERM rather than
   explicit launchctl stop/start. This caused races: launchd would
   respawn the old process before the PID file was cleaned up.
   Now does explicit stop+start (matching how systemd gets an
   explicit systemctl restart), with plist refresh first so the
   new --replace flag is picked up.

---------

Co-authored-by: Ninja <ninja@local>
Co-authored-by: alireza78a <alireza78a@users.noreply.github.com>
Co-authored-by: Oktay Aydin <113846926+aydnOktay@users.noreply.github.com>
Co-authored-by: JP Lew <polydegen@protonmail.com>
Co-authored-by: an420eth <an420eth@users.noreply.github.com>
											
										
										
											2026-03-16 12:36:29 -07:00
+								    @staticmethod
 								    def _load_smart_model_routing() -> dict:
 								        """Load optional smart cheap-vs-strong model routing config."""
 								        try:
 								            import yaml as _y
 								            cfg_path = _hermes_home / "config.yaml"
 								            if cfg_path.exists():
 								                with open(cfg_path, encoding="utf-8") as _f:
 								                    cfg = _y.safe_load(_f) or {}
 								                return cfg.get("smart_model_routing", {}) or {}
 								        except Exception:
 								            pass
 								        return {}
-												fix(gateway): drain in-flight work before restart

											
										
										
											2026-04-10 10:19:17 -07:00
+								    def _snapshot_running_agents(self) -> Dict[str, Any]:
 								        return {
 								            session_key: agent
 								            for session_key, agent in self._running_agents.items()
 								            if agent is not _AGENT_PENDING_SENTINEL
 								        }
 								    def _queue_or_replace_pending_event(self, session_key: str, event: MessageEvent) -> None:
 								        adapter = self.adapters.get(event.source.platform)
 								        if not adapter:
 								            return
-												fix(gateway): address restart review feedback

											
										
										
											2026-04-10 14:00:21 -07:00
+								        merge_pending_message_event(adapter._pending_messages, session_key, event)
-												fix(gateway): drain in-flight work before restart

											
										
										
											2026-04-10 10:19:17 -07:00
 								    async def _handle_active_session_busy_message(self, event: MessageEvent, session_key: str) -> bool:
-												fix: interrupt agent immediately when user messages during active run (#10068)

When a user sends a message while the agent is executing a task on the
gateway, the agent is now interrupted immediately — not silently queued.
Previously, messages were stored in _pending_messages with zero feedback
to the user, potentially leaving them waiting 1+ hours.

Root cause: Level 1 guard (base.py) intercepted all messages for active
sessions and returned with no response. Level 2 (gateway/run.py) which
calls agent.interrupt() was never reached.

Fix: Expand _handle_active_session_busy_message to handle the normal
(non-draining) case:
  1. Call running_agent.interrupt(text) to abort in-flight tool calls
     and exit the agent loop at the next check point
  2. Store the message as pending so it becomes the next turn once the
     interrupted run returns
  3. Send a brief ack: 'Interrupting current task (10 min elapsed,
     iteration 21/60, running: terminal). I'll respond shortly.'
  4. Debounce acks to once per 30s to avoid spam on rapid messages

Reported by @Lonely__MH.
											
										
										
											2026-04-14 22:07:28 -07:00
+								        # --- Draining case (gateway restarting/stopping) ---
 								        if self._draining:
 								            adapter = self.adapters.get(event.source.platform)
 								            if not adapter:
 								                return True
 								            thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None
 								            if self._queue_during_drain_enabled():
 								                self._queue_or_replace_pending_event(session_key, event)
 								                message = f"⏳ Gateway {self._status_action_gerund()} — queued for the next turn after it comes back."
 								            else:
 								                message = f"⏳ Gateway is {self._status_action_gerund()} and is not accepting another turn right now."
 								            await adapter._send_with_retry(
 								                chat_id=event.source.chat_id,
 								                content=message,
 								                reply_to=event.message_id,
 								                metadata=thread_meta,
 								            )
 								            return True
 								        # --- Normal busy case (agent actively running a task) ---
 								        # The user sent a message while the agent is working.  Interrupt the
 								        # agent immediately so it stops the current tool-calling loop and
 								        # processes the new message.  The pending message is stored in the
 								        # adapter so the base adapter picks it up once the interrupted run
 								        # returns.  A brief ack tells the user what's happening (debounced
 								        # to avoid spam when they fire multiple messages quickly).
-												fix(gateway): drain in-flight work before restart

											
										
										
											2026-04-10 10:19:17 -07:00
 								        adapter = self.adapters.get(event.source.platform)
 								        if not adapter:
-												fix: interrupt agent immediately when user messages during active run (#10068)

When a user sends a message while the agent is executing a task on the
gateway, the agent is now interrupted immediately — not silently queued.
Previously, messages were stored in _pending_messages with zero feedback
to the user, potentially leaving them waiting 1+ hours.

Root cause: Level 1 guard (base.py) intercepted all messages for active
sessions and returned with no response. Level 2 (gateway/run.py) which
calls agent.interrupt() was never reached.

Fix: Expand _handle_active_session_busy_message to handle the normal
(non-draining) case:
  1. Call running_agent.interrupt(text) to abort in-flight tool calls
     and exit the agent loop at the next check point
  2. Store the message as pending so it becomes the next turn once the
     interrupted run returns
  3. Send a brief ack: 'Interrupting current task (10 min elapsed,
     iteration 21/60, running: terminal). I'll respond shortly.'
  4. Debounce acks to once per 30s to avoid spam on rapid messages

Reported by @Lonely__MH.
											
										
										
											2026-04-14 22:07:28 -07:00
+								            return False  # let default path handle it
-												fix(gateway): drain in-flight work before restart

											
										
										
											2026-04-10 10:19:17 -07:00
-												fix: interrupt agent immediately when user messages during active run (#10068)

When a user sends a message while the agent is executing a task on the
gateway, the agent is now interrupted immediately — not silently queued.
Previously, messages were stored in _pending_messages with zero feedback
to the user, potentially leaving them waiting 1+ hours.

Root cause: Level 1 guard (base.py) intercepted all messages for active
sessions and returned with no response. Level 2 (gateway/run.py) which
calls agent.interrupt() was never reached.

Fix: Expand _handle_active_session_busy_message to handle the normal
(non-draining) case:
  1. Call running_agent.interrupt(text) to abort in-flight tool calls
     and exit the agent loop at the next check point
  2. Store the message as pending so it becomes the next turn once the
     interrupted run returns
  3. Send a brief ack: 'Interrupting current task (10 min elapsed,
     iteration 21/60, running: terminal). I'll respond shortly.'
  4. Debounce acks to once per 30s to avoid spam on rapid messages

Reported by @Lonely__MH.
											
										
										
											2026-04-14 22:07:28 -07:00
+								        # Store the message so it's processed as the next turn after the
 								        # interrupt causes the current run to exit.
 								        from gateway.platforms.base import merge_pending_message_event
 								        merge_pending_message_event(adapter._pending_messages, session_key, event)
 								        # Interrupt the running agent — this aborts in-flight tool calls and
 								        # causes the agent loop to exit at the next check point.
 								        running_agent = self._running_agents.get(session_key)
 								        if running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
 								            try:
 								                running_agent.interrupt(event.text)
 								            except Exception:
 								                pass  # don't let interrupt failure block the ack
 								        # Debounce: only send an acknowledgment once every 30 seconds per session
 								        # to avoid spamming the user when they send multiple messages quickly
 								        _BUSY_ACK_COOLDOWN = 30
 								        now = time.time()
 								        last_ack = self._busy_ack_ts.get(session_key, 0)
 								        if now - last_ack < _BUSY_ACK_COOLDOWN:
 								            return True  # interrupt sent, ack already delivered recently
 								        self._busy_ack_ts[session_key] = now
 								        # Build a status-rich acknowledgment
 								        status_parts = []
 								        if running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
 								            try:
 								                summary = running_agent.get_activity_summary()
 								                iteration = summary.get("api_call_count", 0)
 								                max_iter = summary.get("max_iterations", 0)
 								                current_tool = summary.get("current_tool")
 								                start_ts = self._running_agents_ts.get(session_key, 0)
 								                if start_ts:
 								                    elapsed_min = int((now - start_ts) / 60)
 								                    if elapsed_min > 0:
 								                        status_parts.append(f"{elapsed_min} min elapsed")
 								                if max_iter:
 								                    status_parts.append(f"iteration {iteration}/{max_iter}")
 								                if current_tool:
 								                    status_parts.append(f"running: {current_tool}")
 								            except Exception:
 								                pass
-												fix(gateway): drain in-flight work before restart

											
										
										
											2026-04-10 10:19:17 -07:00
-												fix: interrupt agent immediately when user messages during active run (#10068)

When a user sends a message while the agent is executing a task on the
gateway, the agent is now interrupted immediately — not silently queued.
Previously, messages were stored in _pending_messages with zero feedback
to the user, potentially leaving them waiting 1+ hours.

Root cause: Level 1 guard (base.py) intercepted all messages for active
sessions and returned with no response. Level 2 (gateway/run.py) which
calls agent.interrupt() was never reached.

Fix: Expand _handle_active_session_busy_message to handle the normal
(non-draining) case:
  1. Call running_agent.interrupt(text) to abort in-flight tool calls
     and exit the agent loop at the next check point
  2. Store the message as pending so it becomes the next turn once the
     interrupted run returns
  3. Send a brief ack: 'Interrupting current task (10 min elapsed,
     iteration 21/60, running: terminal). I'll respond shortly.'
  4. Debounce acks to once per 30s to avoid spam on rapid messages

Reported by @Lonely__MH.
											
										
										
											2026-04-14 22:07:28 -07:00
+								        status_detail = f" ({', '.join(status_parts)})" if status_parts else ""
 								        message = (
 								            f"⚡ Interrupting current task{status_detail}. "
 								            f"I'll respond to your message shortly."
-												fix(gateway): drain in-flight work before restart

											
										
										
											2026-04-10 10:19:17 -07:00
+								        )
-												fix: interrupt agent immediately when user messages during active run (#10068)

When a user sends a message while the agent is executing a task on the
gateway, the agent is now interrupted immediately — not silently queued.
Previously, messages were stored in _pending_messages with zero feedback
to the user, potentially leaving them waiting 1+ hours.

Root cause: Level 1 guard (base.py) intercepted all messages for active
sessions and returned with no response. Level 2 (gateway/run.py) which
calls agent.interrupt() was never reached.

Fix: Expand _handle_active_session_busy_message to handle the normal
(non-draining) case:
  1. Call running_agent.interrupt(text) to abort in-flight tool calls
     and exit the agent loop at the next check point
  2. Store the message as pending so it becomes the next turn once the
     interrupted run returns
  3. Send a brief ack: 'Interrupting current task (10 min elapsed,
     iteration 21/60, running: terminal). I'll respond shortly.'
  4. Debounce acks to once per 30s to avoid spam on rapid messages

Reported by @Lonely__MH.
											
										
										
											2026-04-14 22:07:28 -07:00
 								        thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None
 								        try:
 								            await adapter._send_with_retry(
 								                chat_id=event.source.chat_id,
 								                content=message,
 								                reply_to=event.message_id,
 								                metadata=thread_meta,
 								            )
 								        except Exception as e:
 								            logger.debug("Failed to send busy-ack: %s", e)
-												fix(gateway): drain in-flight work before restart

											
										
										
											2026-04-10 10:19:17 -07:00
+								        return True
 								    async def _drain_active_agents(self, timeout: float) -> tuple[Dict[str, Any], bool]:
 								        snapshot = self._snapshot_running_agents()
-												fix(gateway): address restart review feedback

											
										
										
											2026-04-10 14:00:21 -07:00
+								        last_active_count = self._running_agent_count()
 								        last_status_at = 0.0
 								        def _maybe_update_status(force: bool = False) -> None:
 								            nonlocal last_active_count, last_status_at
 								            now = asyncio.get_running_loop().time()
 								            active_count = self._running_agent_count()
 								            if force or active_count != last_active_count or (now - last_status_at) >= 1.0:
 								                self._update_runtime_status("draining")
 								                last_active_count = active_count
 								                last_status_at = now
-												fix(gateway): drain in-flight work before restart

											
										
										
											2026-04-10 10:19:17 -07:00
+								        if not self._running_agents:
-												fix(gateway): address restart review feedback

											
										
										
											2026-04-10 14:00:21 -07:00
+								            _maybe_update_status(force=True)
-												fix(gateway): drain in-flight work before restart

											
										
										
											2026-04-10 10:19:17 -07:00
+								            return snapshot, False
-												fix(gateway): address restart review feedback

											
										
										
											2026-04-10 14:00:21 -07:00
+								        _maybe_update_status(force=True)
-												fix(gateway): drain in-flight work before restart

											
										
										
											2026-04-10 10:19:17 -07:00
+								        if timeout <= 0:
 								            return snapshot, True
 								        deadline = asyncio.get_running_loop().time() + timeout
 								        while self._running_agents and asyncio.get_running_loop().time() < deadline:
-												fix(gateway): address restart review feedback

											
										
										
											2026-04-10 14:00:21 -07:00
+								            _maybe_update_status()
-												fix(gateway): drain in-flight work before restart

											
										
										
											2026-04-10 10:19:17 -07:00
+								            await asyncio.sleep(0.1)
 								        timed_out = bool(self._running_agents)
-												fix(gateway): address restart review feedback

											
										
										
											2026-04-10 14:00:21 -07:00
+								        _maybe_update_status(force=True)
-												fix(gateway): drain in-flight work before restart

											
										
										
											2026-04-10 10:19:17 -07:00
+								        return snapshot, timed_out
 								    def _interrupt_running_agents(self, reason: str) -> None:
 								        for session_key, agent in list(self._running_agents.items()):
 								            if agent is _AGENT_PENDING_SENTINEL:
 								                continue
 								            try:
 								                agent.interrupt(reason)
 								                logger.debug("Interrupted running agent for session %s during shutdown", session_key[:20])
 								            except Exception as e:
 								                logger.debug("Failed interrupting agent during shutdown: %s", e)
-												fix: notify active sessions on gateway shutdown + update health check

Three fixes for gateway lifecycle stability:

1. Notify active sessions before shutdown (#new)
   When the gateway receives SIGTERM or /restart, it now sends a
   notification to every chat with an active agent BEFORE starting
   the drain. Users see:
   - Shutdown: 'Gateway shutting down — your task will be interrupted.'
   - Restart: 'Gateway restarting — use /retry after restart to continue.'
   Deduplicates per-chat so group sessions with multiple users get
   one notification. Best-effort: send failures are logged and swallowed.

2. Skip .clean_shutdown marker when drain timed out
   Previously, a graceful SIGTERM always wrote .clean_shutdown, even if
   agents were force-interrupted when the drain timed out. This meant
   the next startup skipped session suspension, leaving interrupted
   sessions in a broken state (trailing tool response, no final message).
   Now the marker is only written if the drain completed without timeout,
   so interrupted sessions get properly suspended on next startup.

3. Post-restart health check for hermes update (#6631)
   cmd_update() now verifies the gateway actually survived after
   systemctl restart (sleep 3s + is-active check). If the service
   crashed immediately, it retries once. If still dead, prints
   actionable diagnostics (journalctl command, manual restart hint).

Also closes #8104 — already fixed on main (the /restart handler
correctly detects systemd via INVOCATION_ID and uses via_service=True).

Test plan:
- 6 new tests for shutdown notifications (dedup, restart vs shutdown
  messaging, sentinel filtering, send failure resilience)
- Existing restart drain + update tests pass (47 total)

											
										
										
											2026-04-14 12:44:46 -07:00
+								    async def _notify_active_sessions_of_shutdown(self) -> None:
 								        """Send a notification to every chat with an active agent.
 								        Called at the very start of stop() — adapters are still connected so
 								        messages can be delivered.  Best-effort: individual send failures are
 								        logged and swallowed so they never block the shutdown sequence.
 								        """
 								        active = self._snapshot_running_agents()
 								        if not active:
 								            return
 								        action = "restarting" if self._restart_requested else "shutting down"
 								        hint = (
 								            "Your current task will be interrupted. "
-												feat: auto-continue interrupted agent work after gateway restart (#4493)

When the gateway restarts mid-agent-work, the session transcript ends
on a tool result the agent never processed. Previously, the user had
to type 'continue' or use /retry (which replays from scratch, losing
all prior work).

Now, when the next user message arrives and the loaded history ends
with role='tool', a system note is prepended:

  [System note: Your previous turn was interrupted before you could
  process the last tool result(s). Please finish processing those
  results and summarize what was accomplished, then address the
  user's new message below.]

This is injected in _run_agent()'s run_sync closure, right before
calling agent.run_conversation(). The agent sees the full history
(including the pending tool results) and the system note, so it can
summarize what was accomplished and then handle the user's new input.

Design decisions:
- No new session flags or schema changes — purely detects trailing
  tool messages in the loaded history
- Works for any restart scenario (clean, crash, SIGTERM, drain timeout)
  as long as the session wasn't suspended (suspended = fresh start)
- The user's actual message is preserved after the note
- If the session WAS suspended (unclean shutdown), the old history is
  abandoned and the user starts fresh — no false auto-continue

Also updates the shutdown notification message from 'Use /retry after
restart to continue' to 'Send any message after restart to resume
where it left off' — which is now accurate.

Test plan:
- 6 new auto-continue tests (trailing tool detection, no false
  positives for assistant/user/empty history, multi-tool, message
  preservation)
- All 13 restart drain tests pass (updated /retry assertion)

											
										
										
											2026-04-14 16:55:30 -07:00
+								            "Send any message after restart to resume where it left off."
-												fix: notify active sessions on gateway shutdown + update health check

Three fixes for gateway lifecycle stability:

1. Notify active sessions before shutdown (#new)
   When the gateway receives SIGTERM or /restart, it now sends a
   notification to every chat with an active agent BEFORE starting
   the drain. Users see:
   - Shutdown: 'Gateway shutting down — your task will be interrupted.'
   - Restart: 'Gateway restarting — use /retry after restart to continue.'
   Deduplicates per-chat so group sessions with multiple users get
   one notification. Best-effort: send failures are logged and swallowed.

2. Skip .clean_shutdown marker when drain timed out
   Previously, a graceful SIGTERM always wrote .clean_shutdown, even if
   agents were force-interrupted when the drain timed out. This meant
   the next startup skipped session suspension, leaving interrupted
   sessions in a broken state (trailing tool response, no final message).
   Now the marker is only written if the drain completed without timeout,
   so interrupted sessions get properly suspended on next startup.

3. Post-restart health check for hermes update (#6631)
   cmd_update() now verifies the gateway actually survived after
   systemctl restart (sleep 3s + is-active check). If the service
   crashed immediately, it retries once. If still dead, prints
   actionable diagnostics (journalctl command, manual restart hint).

Also closes #8104 — already fixed on main (the /restart handler
correctly detects systemd via INVOCATION_ID and uses via_service=True).

Test plan:
- 6 new tests for shutdown notifications (dedup, restart vs shutdown
  messaging, sentinel filtering, send failure resilience)
- Existing restart drain + update tests pass (47 total)

											
										
										
											2026-04-14 12:44:46 -07:00
+								            if self._restart_requested
 								            else "Your current task will be interrupted."
 								        )
 								        msg = f"⚠️ Gateway {action} — {hint}"
 								        notified: set = set()
 								        for session_key in active:
 								            # Parse platform + chat_id from the session key.
-												fix: follow-up improvements for watch notification routing (#9537)

- Populate watcher_* routing fields for watch-only processes (not just
  notify_on_complete), so watch-pattern events carry direct metadata
  instead of relying solely on session_key parsing fallback
- Extract _parse_session_key() helper to dedupe session key parsing
  at two call sites in gateway/run.py
- Add negative test proving cross-thread leakage doesn't happen
- Add edge-case tests for _build_process_event_source returning None
  (empty evt, invalid platform, short session_key)
- Add unit tests for _parse_session_key helper

											
										
										
											2026-04-15 22:52:30 +05:30
+								            _parsed = _parse_session_key(session_key)
 								            if not _parsed:
-												fix: notify active sessions on gateway shutdown + update health check

Three fixes for gateway lifecycle stability:

1. Notify active sessions before shutdown (#new)
   When the gateway receives SIGTERM or /restart, it now sends a
   notification to every chat with an active agent BEFORE starting
   the drain. Users see:
   - Shutdown: 'Gateway shutting down — your task will be interrupted.'
   - Restart: 'Gateway restarting — use /retry after restart to continue.'
   Deduplicates per-chat so group sessions with multiple users get
   one notification. Best-effort: send failures are logged and swallowed.

2. Skip .clean_shutdown marker when drain timed out
   Previously, a graceful SIGTERM always wrote .clean_shutdown, even if
   agents were force-interrupted when the drain timed out. This meant
   the next startup skipped session suspension, leaving interrupted
   sessions in a broken state (trailing tool response, no final message).
   Now the marker is only written if the drain completed without timeout,
   so interrupted sessions get properly suspended on next startup.

3. Post-restart health check for hermes update (#6631)
   cmd_update() now verifies the gateway actually survived after
   systemctl restart (sleep 3s + is-active check). If the service
   crashed immediately, it retries once. If still dead, prints
   actionable diagnostics (journalctl command, manual restart hint).

Also closes #8104 — already fixed on main (the /restart handler
correctly detects systemd via INVOCATION_ID and uses via_service=True).

Test plan:
- 6 new tests for shutdown notifications (dedup, restart vs shutdown
  messaging, sentinel filtering, send failure resilience)
- Existing restart drain + update tests pass (47 total)

											
										
										
											2026-04-14 12:44:46 -07:00
+								                continue
-												fix: follow-up improvements for watch notification routing (#9537)

- Populate watcher_* routing fields for watch-only processes (not just
  notify_on_complete), so watch-pattern events carry direct metadata
  instead of relying solely on session_key parsing fallback
- Extract _parse_session_key() helper to dedupe session key parsing
  at two call sites in gateway/run.py
- Add negative test proving cross-thread leakage doesn't happen
- Add edge-case tests for _build_process_event_source returning None
  (empty evt, invalid platform, short session_key)
- Add unit tests for _parse_session_key helper

											
										
										
											2026-04-15 22:52:30 +05:30
+								            platform_str = _parsed["platform"]
 								            chat_id = _parsed["chat_id"]
-												fix: notify active sessions on gateway shutdown + update health check

Three fixes for gateway lifecycle stability:

1. Notify active sessions before shutdown (#new)
   When the gateway receives SIGTERM or /restart, it now sends a
   notification to every chat with an active agent BEFORE starting
   the drain. Users see:
   - Shutdown: 'Gateway shutting down — your task will be interrupted.'
   - Restart: 'Gateway restarting — use /retry after restart to continue.'
   Deduplicates per-chat so group sessions with multiple users get
   one notification. Best-effort: send failures are logged and swallowed.

2. Skip .clean_shutdown marker when drain timed out
   Previously, a graceful SIGTERM always wrote .clean_shutdown, even if
   agents were force-interrupted when the drain timed out. This meant
   the next startup skipped session suspension, leaving interrupted
   sessions in a broken state (trailing tool response, no final message).
   Now the marker is only written if the drain completed without timeout,
   so interrupted sessions get properly suspended on next startup.

3. Post-restart health check for hermes update (#6631)
   cmd_update() now verifies the gateway actually survived after
   systemctl restart (sleep 3s + is-active check). If the service
   crashed immediately, it retries once. If still dead, prints
   actionable diagnostics (journalctl command, manual restart hint).

Also closes #8104 — already fixed on main (the /restart handler
correctly detects systemd via INVOCATION_ID and uses via_service=True).

Test plan:
- 6 new tests for shutdown notifications (dedup, restart vs shutdown
  messaging, sentinel filtering, send failure resilience)
- Existing restart drain + update tests pass (47 total)

											
										
										
											2026-04-14 12:44:46 -07:00
 								            # Deduplicate: one notification per chat, even if multiple
 								            # sessions (different users/threads) share the same chat.
 								            dedup_key = (platform_str, chat_id)
 								            if dedup_key in notified:
 								                continue
 								            try:
 								                platform = Platform(platform_str)
 								                adapter = self.adapters.get(platform)
 								                if not adapter:
 								                    continue
 								                # Include thread_id if present so the message lands in the
 								                # correct forum topic / thread.
-												fix: include thread_id in _parse_session_key and fix stale parts reference

_parse_session_key() now extracts the optional 6th part (thread_id) from
session keys, and _notify_active_sessions_of_shutdown uses _parsed.get()
instead of the removed 'parts' variable. Without this, shutdown notifications
silently failed (NameError caught by try/except) and forum topic routing
was lost.

											
										
										
											2026-04-15 11:07:24 -07:00
+								                thread_id = _parsed.get("thread_id")
-												fix: notify active sessions on gateway shutdown + update health check

Three fixes for gateway lifecycle stability:

1. Notify active sessions before shutdown (#new)
   When the gateway receives SIGTERM or /restart, it now sends a
   notification to every chat with an active agent BEFORE starting
   the drain. Users see:
   - Shutdown: 'Gateway shutting down — your task will be interrupted.'
   - Restart: 'Gateway restarting — use /retry after restart to continue.'
   Deduplicates per-chat so group sessions with multiple users get
   one notification. Best-effort: send failures are logged and swallowed.

2. Skip .clean_shutdown marker when drain timed out
   Previously, a graceful SIGTERM always wrote .clean_shutdown, even if
   agents were force-interrupted when the drain timed out. This meant
   the next startup skipped session suspension, leaving interrupted
   sessions in a broken state (trailing tool response, no final message).
   Now the marker is only written if the drain completed without timeout,
   so interrupted sessions get properly suspended on next startup.

3. Post-restart health check for hermes update (#6631)
   cmd_update() now verifies the gateway actually survived after
   systemctl restart (sleep 3s + is-active check). If the service
   crashed immediately, it retries once. If still dead, prints
   actionable diagnostics (journalctl command, manual restart hint).

Also closes #8104 — already fixed on main (the /restart handler
correctly detects systemd via INVOCATION_ID and uses via_service=True).

Test plan:
- 6 new tests for shutdown notifications (dedup, restart vs shutdown
  messaging, sentinel filtering, send failure resilience)
- Existing restart drain + update tests pass (47 total)

											
										
										
											2026-04-14 12:44:46 -07:00
+								                metadata = {"thread_id": thread_id} if thread_id else None
 								                await adapter.send(chat_id, msg, metadata=metadata)
 								                notified.add(dedup_key)
 								                logger.info(
 								                    "Sent shutdown notification to %s:%s",
 								                    platform_str, chat_id,
 								                )
 								            except Exception as e:
 								                logger.debug(
 								                    "Failed to send shutdown notification to %s:%s: %s",
 								                    platform_str, chat_id, e,
 								                )
-												fix(gateway): drain in-flight work before restart

											
										
										
											2026-04-10 10:19:17 -07:00
+								    def _finalize_shutdown_agents(self, active_agents: Dict[str, Any]) -> None:
 								        for agent in active_agents.values():
 								            try:
 								                from hermes_cli.plugins import invoke_hook as _invoke_hook
 								                _invoke_hook(
 								                    "on_session_finalize",
 								                    session_id=getattr(agent, "session_id", None),
 								                    platform="gateway",
 								                )
 								            except Exception:
 								                pass
-												fix(gateway): close temporary agents after one-off tasks

Add shared _cleanup_agent_resources() for temporary gateway AIAgent
instances. Apply cleanup to memory flush, background tasks, /btw,
manual /compress, and session-hygiene auto-compression. Prevents
unclosed aiohttp client session leaks.

Cherry-picked from #10899 by @LeonSGP43. Consolidates #10945 by @Lubrsy706.
Fixes #10865.

Co-authored-by: Lubrsy706 <Lubrsy706@users.noreply.github.com>

											
										
										
											2026-04-16 18:41:31 +05:30
+								            self._cleanup_agent_resources(agent)
 								    def _cleanup_agent_resources(self, agent: Any) -> None:
 								        """Best-effort cleanup for temporary or cached agent instances."""
 								        if agent is None:
 								            return
 								        try:
 								            if hasattr(agent, "shutdown_memory_provider"):
 								                agent.shutdown_memory_provider()
 								        except Exception:
 								            pass
 								        # Close tool resources (terminal sandboxes, browser daemons,
 								        # background processes, httpx clients) to prevent zombie
 								        # process accumulation.
 								        try:
 								            if hasattr(agent, "close"):
 								                agent.close()
 								        except Exception:
 								            pass
-												fix(gateway): drain in-flight work before restart

											
										
										
											2026-04-10 10:19:17 -07:00
-												fix: break stuck session resume loops after repeated restarts (#7536)

When a session gets stuck (hung terminal, runaway tool loop) and the
user restarts the gateway, the same session history loads and puts the
agent right back in the stuck state. The user is trapped in a loop:
restart → stuck → restart → stuck.

Fix: track restart-failure counts per session using a simple JSON file
(.restart_failure_counts). On each shutdown with active agents, the
counter increments for those sessions. On startup, if any session has
been active across 3+ consecutive restarts, it's auto-suspended —
giving the user a clean slate on their next message.

The counter resets to 0 when a session completes a turn successfully
(response delivered), so normal sessions that happen to be active
during planned restarts (/restart, hermes update) won't accumulate
false counts.

Implementation:
- _increment_restart_failure_counts(): called during stop() when
  agents are active. Writes {session_key: count} to JSON file.
  Sessions NOT active are dropped (loop broken).
- _suspend_stuck_loop_sessions(): called on startup. Reads the file,
  suspends sessions at threshold (3), clears the file.
- _clear_restart_failure_count(): called after successful response
  delivery. Removes the session from the counter file.

No SessionEntry schema changes. No database migration. Pure file-based
tracking that naturally cleans up.

Test plan:
- 9 new stuck-loop tests (increment, accumulate, threshold, clear,
  suspend, file cleanup, edge cases)
- All 28 gateway lifecycle tests pass (restart drain + auto-continue
  + stuck loop)

											
										
										
											2026-04-14 17:03:47 -07:00
+								    _STUCK_LOOP_THRESHOLD = 3  # restarts while active before auto-suspend
 								    _STUCK_LOOP_FILE = ".restart_failure_counts"
 								    def _increment_restart_failure_counts(self, active_session_keys: set) -> None:
 								        """Increment restart-failure counters for sessions active at shutdown.
 								        Persists to a JSON file so counters survive across restarts.
 								        Sessions NOT in active_session_keys are removed (they completed
 								        successfully, so the loop is broken).
 								        """
 								        import json
 								        path = _hermes_home / self._STUCK_LOOP_FILE
 								        try:
 								            counts = json.loads(path.read_text()) if path.exists() else {}
 								        except Exception:
 								            counts = {}
 								        # Increment active sessions, remove inactive ones (loop broken)
 								        new_counts = {}
 								        for key in active_session_keys:
 								            new_counts[key] = counts.get(key, 0) + 1
 								        # Keep any entries that are still above 0 even if not active now
 								        # (they might become active again next restart)
 								        try:
 								            path.write_text(json.dumps(new_counts))
 								        except Exception:
 								            pass
 								    def _suspend_stuck_loop_sessions(self) -> int:
 								        """Suspend sessions that have been active across too many restarts.
 								        Returns the number of sessions suspended.  Called on gateway startup
 								        AFTER suspend_recently_active() to catch the stuck-loop pattern:
 								        session loads → agent gets stuck → gateway restarts → repeat.
 								        """
 								        import json
 								        path = _hermes_home / self._STUCK_LOOP_FILE
 								        if not path.exists():
 								            return 0
 								        try:
 								            counts = json.loads(path.read_text())
 								        except Exception:
 								            return 0
 								        suspended = 0
 								        stuck_keys = [k for k, v in counts.items() if v >= self._STUCK_LOOP_THRESHOLD]
 								        for session_key in stuck_keys:
 								            try:
 								                entry = self.session_store._entries.get(session_key)
 								                if entry and not entry.suspended:
 								                    entry.suspended = True
 								                    suspended += 1
 								                    logger.warning(
 								                        "Auto-suspended stuck session %s (active across %d "
 								                        "consecutive restarts — likely a stuck loop)",
 								                        session_key[:30], counts[session_key],
 								                    )
 								            except Exception:
 								                pass
 								        if suspended:
 								            try:
 								                self.session_store._save()
 								            except Exception:
 								                pass
 								        # Clear the file — counters start fresh after suspension
 								        try:
 								            path.unlink(missing_ok=True)
 								        except Exception:
 								            pass
 								        return suspended
 								    def _clear_restart_failure_count(self, session_key: str) -> None:
 								        """Clear the restart-failure counter for a session that completed OK.
 								        Called after a successful agent turn to signal the loop is broken.
 								        """
 								        import json
 								        path = _hermes_home / self._STUCK_LOOP_FILE
 								        if not path.exists():
 								            return
 								        try:
 								            counts = json.loads(path.read_text())
 								            if session_key in counts:
 								                del counts[session_key]
 								                if counts:
 								                    path.write_text(json.dumps(counts))
 								                else:
 								                    path.unlink(missing_ok=True)
 								        except Exception:
 								            pass
-												fix(gateway): drain in-flight work before restart

											
										
										
											2026-04-10 10:19:17 -07:00
+								    async def _launch_detached_restart_command(self) -> None:
 								        import shutil
 								        import subprocess
 								        hermes_cmd = _resolve_hermes_bin()
 								        if not hermes_cmd:
 								            logger.error("Could not locate hermes binary for detached /restart")
 								            return
 								        current_pid = os.getpid()
 								        cmd = " ".join(shlex.quote(part) for part in hermes_cmd)
 								        shell_cmd = (
 								            f"while kill -0 {current_pid} 2>/dev/null; do sleep 0.2; done; "
 								            f"{cmd} gateway restart"
 								        )
 								        setsid_bin = shutil.which("setsid")
 								        if setsid_bin:
 								            subprocess.Popen(
 								                [setsid_bin, "bash", "-lc", shell_cmd],
 								                stdout=subprocess.DEVNULL,
 								                stderr=subprocess.DEVNULL,
 								                start_new_session=True,
 								            )
 								        else:
 								            subprocess.Popen(
 								                ["bash", "-lc", shell_cmd],
 								                stdout=subprocess.DEVNULL,
 								                stderr=subprocess.DEVNULL,
 								                start_new_session=True,
 								            )
 								    def request_restart(self, *, detached: bool = False, via_service: bool = False) -> bool:
 								        if self._restart_task_started:
 								            return False
 								        self._restart_requested = True
 								        self._restart_detached = detached
 								        self._restart_via_service = via_service
 								        self._restart_task_started = True
 								        async def _run_restart() -> None:
 								            await asyncio.sleep(0.05)
 								            await self.stop(restart=True, detached_restart=detached, service_restart=via_service)
 								        task = asyncio.create_task(_run_restart())
 								        self._background_tasks.add(task)
 								        task.add_done_callback(self._background_tasks.discard)
 								        return True
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								    async def start(self) -> bool:
 								        """
 								        Start the gateway and all configured platform adapters.
 								        Returns True if at least one adapter connected successfully.
 								        """
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								        logger.info("Starting Hermes Gateway...")
 								        logger.info("Session storage: %s", self.config.sessions_dir)
-												feat: add profiles — run multiple isolated Hermes instances (#3681)

Each profile is a fully independent HERMES_HOME with its own config,
API keys, memory, sessions, skills, gateway, cron, and state.db.

Core module: hermes_cli/profiles.py (~900 lines)
  - Profile CRUD: create, delete, list, show, rename
  - Three clone levels: blank, --clone (config), --clone-all (everything)
  - Export/import: tar.gz archive for backup and migration
  - Wrapper alias scripts (~/.local/bin/<name>)
  - Collision detection for alias names
  - Sticky default via ~/.hermes/active_profile
  - Skill seeding via subprocess (handles module-level caching)
  - Auto-stop gateway on delete with disable-before-stop for services
  - Tab completion generation for bash and zsh

CLI integration (hermes_cli/main.py):
  - _apply_profile_override(): pre-import -p/--profile flag + sticky default
  - Full 'hermes profile' subcommand: list, use, create, delete, show,
    alias, rename, export, import
  - 'hermes completion bash/zsh' command
  - Multi-profile skill sync in hermes update

Display (cli.py, banner.py, gateway/run.py):
  - CLI prompt: 'coder ❯' when using a non-default profile
  - Banner shows profile name
  - Gateway startup log includes profile name

Gateway safety:
  - Token locks: Discord, Slack, WhatsApp, Signal (extends Telegram pattern)
  - Port conflict detection: API server, webhook adapter

Diagnostics (hermes_cli/doctor.py):
  - Profile health section: lists profiles, checks config, .env, aliases
  - Orphan alias detection: warns when wrapper points to deleted profile

Tests (tests/hermes_cli/test_profiles.py):
  - 71 automated tests covering: validation, CRUD, clone levels, rename,
    export/import, active profile, isolation, alias collision, completion
  - Full suite: 6760 passed, 0 new failures

Documentation:
  - website/docs/user-guide/profiles.md: full user guide (12 sections)
  - website/docs/reference/profile-commands.md: command reference (12 commands)
  - website/docs/reference/faq.md: 6 profile FAQ entries
  - website/sidebars.ts: navigation updated
											
										
										
											2026-03-29 10:41:20 -07:00
+								        try:
 								            from hermes_cli.profiles import get_active_profile_name
 								            _profile = get_active_profile_name()
 								            if _profile and _profile != "default":
 								                logger.info("Active profile: %s", _profile)
 								        except Exception:
 								            pass
-												fix: preserve current gateway update and startup behavior

Follow up on salvaged PR #1052.
Restore current-main gateway lifecycle handling after conflict resolution and
adapt the update fallback to use shell-quoted argv parts safely.

											
										
										
											2026-03-14 18:03:50 -07:00
+								        try:
 								            from gateway.status import write_runtime_status
 								            write_runtime_status(gateway_state="starting", exit_reason=None)
 								        except Exception:
 								            pass
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
-												refactor: deduplicate toolsets, unify async bridging, fix approval race condition, harden security

- Replace 4 copy-pasted messaging platform toolsets with shared _HERMES_CORE_TOOLS list
- Consolidate 5 ad-hoc async-bridging patterns into single _run_async() in model_tools.py
  - Removes deprecated get_event_loop()/set_event_loop() calls
  - Makes all tool handlers self-protecting regardless of caller's event loop state
  - RL handler refactored from if/elif chain to dispatch dict
- Fix exec approval race condition: replace module-level globals with thread-safe
  per-session tools/approval.py (submit_pending, pop_pending, approve_session, is_approved)
  - Session A approving "rm" no longer approves it for all other sessions
- Fix config deep merge: user overriding tts.elevenlabs.voice_id no longer clobbers
  tts.elevenlabs.model_id; migration detection now recurses to arbitrary depth
- Gateway default-deny: unauthenticated users denied unless GATEWAY_ALLOW_ALL_USERS=true
- Add 10 dangerous command patterns: rm --recursive, bash -c, python -e, curl|bash,
  xargs rm, find -delete
- Sanitize gateway error messages: users see generic message, full traceback goes to logs

											
										
										
											2026-02-21 18:28:49 -08:00
+								        # Warn if no user allowlists are configured and open access is not opted in
 								        _any_allowlist = any(
 								            os.getenv(v)
 								            for v in ("TELEGRAM_ALLOWED_USERS", "DISCORD_ALLOWED_USERS",
 								                       "WHATSAPP_ALLOWED_USERS", "SLACK_ALLOWED_USERS",
-												fix(gateway): include per-platform ALLOW_ALL and SIGNAL_GROUP in startup allowlist check (#3313)

The startup warning 'No user allowlists configured' only checked
GATEWAY_ALLOW_ALL_USERS and per-platform _ALLOWED_USERS vars. It
missed SIGNAL_GROUP_ALLOWED_USERS and per-platform _ALLOW_ALL_USERS
vars (e.g. TELEGRAM_ALLOW_ALL_USERS), causing a false warning even
when users had these configured. The actual auth check in
_is_user_authorized already recognized these vars.

Cherry-picked from PR #3202 by binhnt92.

Co-authored-by: binhnt92 <binhnt.ht.92@gmail.com>
											
										
										
											2026-03-26 18:23:49 -07:00
+								                       "SIGNAL_ALLOWED_USERS", "SIGNAL_GROUP_ALLOWED_USERS",
 								                       "EMAIL_ALLOWED_USERS",
-												fix(gateway): add all missing platform allowlist env vars to startup warning check (#2628)

* fix(gateway): added MATRIX_ALLOWED_USERS to list of env vars checked by gateway

* fix(gateway): add all missing platform allowlist env vars to startup check

The startup warning for 'No user allowlists configured' was only checking
TELEGRAM, DISCORD, WHATSAPP, SLACK, and SMS — missing SIGNAL, EMAIL,
MATTERMOST, and DINGTALK. Users of those platforms would see a spurious
warning even with their platform-specific allowlist configured.

Now matches the canonical platform_env_map in _is_user_authorized().

---------

Co-authored-by: SteelPh0enix <wojciech_olech@hotmail.com>
											
										
										
											2026-03-23 07:19:14 -07:00
+								                       "SMS_ALLOWED_USERS", "MATTERMOST_ALLOWED_USERS",
 								                       "MATRIX_ALLOWED_USERS", "DINGTALK_ALLOWED_USERS",
-												feat(gateway): add Feishu/Lark platform support (#3817)

Adds Feishu (ByteDance's enterprise messaging platform) as a gateway
platform adapter with full feature parity: WebSocket + webhook transports,
message batching, dedup, rate limiting, rich post/card content parsing,
media handling (images/audio/files/video), group @mention gating,
reaction routing, and interactive card button support.

Cherry-picked from PR #1793 by penwyp with:
- Moved to current main (PR was 458 commits behind)
- Fixed _send_with_retry shadowing BasePlatformAdapter method (renamed to
  _feishu_send_with_retry to avoid signature mismatch crash)
- Fixed import structure: aiohttp/websockets imported independently of
  lark_oapi so they remain available when SDK is missing
- Fixed get_hermes_home import (hermes_constants, not hermes_cli.config)
- Added skip decorators for tests requiring lark_oapi SDK
- All 16 integration points added surgically to current main

New dependency: lark-oapi>=1.5.3,<2 (optional, pip install hermes-agent[feishu])

Fixes #1788

Co-authored-by: penwyp <penwyp@users.noreply.github.com>
											
										
										
											2026-03-29 18:17:42 -07:00
+								                       "FEISHU_ALLOWED_USERS",
-												feat(gateway): add WeCom (Enterprise WeChat) platform support (#3847)

Adds WeCom as a gateway platform adapter using the AI Bot WebSocket
gateway for real-time bidirectional communication. No public endpoint
or new pip dependencies needed (uses existing aiohttp + httpx).

Features:
- WebSocket persistent connection with auto-reconnect (exponential backoff)
- DM and group messaging with configurable access policies
- Media upload/download with AES decryption for encrypted attachments
- Markdown rendering, quote context preservation
- Proactive + passive reply message modes
- Chunked media upload pipeline (512KB chunks)

Cherry-picked from PR #1898 by EvilRan with:
- Moved to current main (PR was 300 commits behind)
- Skipped base.py regressions (reply_to additions are good but belong
  in a separate PR since they affect all platforms)
- Fixed test assertions to match current base class send() signature
  (reply_to=None kwarg now explicit)
- All 16 integration points added surgically to current main
- No new pip dependencies (aiohttp + httpx already installed)

Fixes #1898

Co-authored-by: EvilRan <EvilRan@users.noreply.github.com>
											
										
										
											2026-03-29 21:29:13 -07:00
+								                       "WECOM_ALLOWED_USERS",
-												feat(gateway): add WeCom callback-mode adapter for self-built apps

Add a second WeCom integration mode for regular enterprise self-built
applications.  Unlike the existing bot/websocket adapter (wecom.py),
this handles WeCom's standard callback flow: WeCom POSTs encrypted XML
to an HTTP endpoint, the adapter decrypts, queues for the agent, and
immediately acknowledges.  The agent's reply is delivered proactively
via the message/send API.

Key design choice: always acknowledge immediately and use proactive
send — agent sessions take 3-30 minutes, so the 5-second inline reply
window is never useful.  The original PR's Future/pending-reply
machinery was removed in favour of this simpler architecture.

Features:
- AES-CBC encrypt/decrypt (BizMsgCrypt-compatible)
- Multi-app routing scoped by corp_id:user_id
- Legacy bare user_id fallback for backward compat
- Access-token management with auto-refresh
- WECOM_CALLBACK_* env var overrides
- Port-in-use pre-check before binding
- Health endpoint at /health

Salvaged from PR #7774 by @chqchshj.  Simplified by removing the
inline reply Future system and fixing: secrets.choice for nonce
generation, immediate plain-text acknowledgment (not encrypted XML
containing 'success'), and initial token refresh error handling.

											
										
										
											2026-04-11 14:25:18 -07:00
+								                       "WECOM_CALLBACK_ALLOWED_USERS",
-												fix: salvage follow-ups for Weixin adapter (#6747)

- Remove sys.path.insert hack (leftover from standalone dev)
- Add token lock (acquire_scoped_lock/release_scoped_lock) in
  connect()/disconnect() to prevent duplicate pollers across profiles
- Fix get_connected_platforms: WEIXIN check must precede generic
  token/api_key check (requires both token AND account_id)
- Add WEIXIN_HOME_CHANNEL_NAME to _EXTRA_ENV_KEYS
- Add gateway setup wizard with QR login flow
- Add platform status check for partially configured state
- Add weixin.md docs page with full adapter documentation
- Update environment-variables.md reference with all 11 env vars
- Update sidebars.ts to include weixin docs page
- Wire all gateway integration points onto current main

Salvaged from PR #6747 by Zihan Huang.

											
										
										
											2026-04-10 05:20:20 -07:00
+								                       "WEIXIN_ALLOWED_USERS",
-												feat(gateway): add BlueBubbles iMessage platform adapter (#6437)

Adds Apple iMessage as a gateway platform via BlueBubbles macOS server.

Architecture:
- Webhook-based inbound (event-driven, no polling/dedup needed)
- Email/phone → chat GUID resolution for user-friendly addressing
- Private API safety (checks helper_connected before tapback/typing)
- Inbound attachment downloading (images, audio, documents cached locally)
- Markdown stripping for clean iMessage delivery
- Smart progress suppression for platforms without message editing

Based on PR #5869 by @benjaminsehl (webhook architecture, GUID resolution,
Private API safety, progress suppression) with inbound attachment downloading
from PR #4588 by @1960697431 (attachment cache routing).

Integration points: Platform enum, env config, adapter factory, auth maps,
cron delivery, send_message routing, channel directory, platform hints,
toolset definition, setup wizard, status display.

27 tests covering config, adapter, webhook parsing, GUID resolution,
attachment download routing, toolset consistency, and prompt hints.
											
										
										
											2026-04-08 23:54:03 -07:00
+								                       "BLUEBUBBLES_ALLOWED_USERS",
-												feat: add QQ Bot platform adapter (Official API v2)

Add full QQ Bot integration via the Official QQ Bot API (v2):
- WebSocket gateway for inbound events (C2C, group, guild, DM)
- REST API for outbound text/markdown/media messages
- Voice transcription (Tencent ASR + configurable STT provider)
- Attachment processing (images, voice, files)
- User authorization (allowlist + allow-all + DM pairing)

Integration points:
- gateway: Platform.QQ enum, adapter factory, allowlist maps
- CLI: setup wizard, gateway config, status display, tools config
- tools: send_message cross-platform routing, toolsets
- cron: delivery platform support
- docs: QQ Bot setup guide

											
										
										
											2026-04-13 21:56:38 +08:00
+								                       "QQ_ALLOWED_USERS",
-												revert: revert SMS (Telnyx) platform adapter for review

This reverts commit ef67037f8ee538ed6995655374ed994b560d4342.
											
										
										
											2026-03-17 02:53:30 -07:00
+								                       "GATEWAY_ALLOWED_USERS")
-												refactor: deduplicate toolsets, unify async bridging, fix approval race condition, harden security

- Replace 4 copy-pasted messaging platform toolsets with shared _HERMES_CORE_TOOLS list
- Consolidate 5 ad-hoc async-bridging patterns into single _run_async() in model_tools.py
  - Removes deprecated get_event_loop()/set_event_loop() calls
  - Makes all tool handlers self-protecting regardless of caller's event loop state
  - RL handler refactored from if/elif chain to dispatch dict
- Fix exec approval race condition: replace module-level globals with thread-safe
  per-session tools/approval.py (submit_pending, pop_pending, approve_session, is_approved)
  - Session A approving "rm" no longer approves it for all other sessions
- Fix config deep merge: user overriding tts.elevenlabs.voice_id no longer clobbers
  tts.elevenlabs.model_id; migration detection now recurses to arbitrary depth
- Gateway default-deny: unauthenticated users denied unless GATEWAY_ALLOW_ALL_USERS=true
- Add 10 dangerous command patterns: rm --recursive, bash -c, python -e, curl|bash,
  xargs rm, find -delete
- Sanitize gateway error messages: users see generic message, full traceback goes to logs

											
										
										
											2026-02-21 18:28:49 -08:00
+								        )
-												fix(gateway): include per-platform ALLOW_ALL and SIGNAL_GROUP in startup allowlist check (#3313)

The startup warning 'No user allowlists configured' only checked
GATEWAY_ALLOW_ALL_USERS and per-platform _ALLOWED_USERS vars. It
missed SIGNAL_GROUP_ALLOWED_USERS and per-platform _ALLOW_ALL_USERS
vars (e.g. TELEGRAM_ALLOW_ALL_USERS), causing a false warning even
when users had these configured. The actual auth check in
_is_user_authorized already recognized these vars.

Cherry-picked from PR #3202 by binhnt92.

Co-authored-by: binhnt92 <binhnt.ht.92@gmail.com>
											
										
										
											2026-03-26 18:23:49 -07:00
+								        _allow_all = os.getenv("GATEWAY_ALLOW_ALL_USERS", "").lower() in ("true", "1", "yes") or any(
 								            os.getenv(v, "").lower() in ("true", "1", "yes")
 								            for v in ("TELEGRAM_ALLOW_ALL_USERS", "DISCORD_ALLOW_ALL_USERS",
 								                       "WHATSAPP_ALLOW_ALL_USERS", "SLACK_ALLOW_ALL_USERS",
 								                       "SIGNAL_ALLOW_ALL_USERS", "EMAIL_ALLOW_ALL_USERS",
 								                       "SMS_ALLOW_ALL_USERS", "MATTERMOST_ALLOW_ALL_USERS",
-												feat(gateway): add Feishu/Lark platform support (#3817)

Adds Feishu (ByteDance's enterprise messaging platform) as a gateway
platform adapter with full feature parity: WebSocket + webhook transports,
message batching, dedup, rate limiting, rich post/card content parsing,
media handling (images/audio/files/video), group @mention gating,
reaction routing, and interactive card button support.

Cherry-picked from PR #1793 by penwyp with:
- Moved to current main (PR was 458 commits behind)
- Fixed _send_with_retry shadowing BasePlatformAdapter method (renamed to
  _feishu_send_with_retry to avoid signature mismatch crash)
- Fixed import structure: aiohttp/websockets imported independently of
  lark_oapi so they remain available when SDK is missing
- Fixed get_hermes_home import (hermes_constants, not hermes_cli.config)
- Added skip decorators for tests requiring lark_oapi SDK
- All 16 integration points added surgically to current main

New dependency: lark-oapi>=1.5.3,<2 (optional, pip install hermes-agent[feishu])

Fixes #1788

Co-authored-by: penwyp <penwyp@users.noreply.github.com>
											
										
										
											2026-03-29 18:17:42 -07:00
+								                       "MATRIX_ALLOW_ALL_USERS", "DINGTALK_ALLOW_ALL_USERS",
-												feat(gateway): add WeCom (Enterprise WeChat) platform support (#3847)

Adds WeCom as a gateway platform adapter using the AI Bot WebSocket
gateway for real-time bidirectional communication. No public endpoint
or new pip dependencies needed (uses existing aiohttp + httpx).

Features:
- WebSocket persistent connection with auto-reconnect (exponential backoff)
- DM and group messaging with configurable access policies
- Media upload/download with AES decryption for encrypted attachments
- Markdown rendering, quote context preservation
- Proactive + passive reply message modes
- Chunked media upload pipeline (512KB chunks)

Cherry-picked from PR #1898 by EvilRan with:
- Moved to current main (PR was 300 commits behind)
- Skipped base.py regressions (reply_to additions are good but belong
  in a separate PR since they affect all platforms)
- Fixed test assertions to match current base class send() signature
  (reply_to=None kwarg now explicit)
- All 16 integration points added surgically to current main
- No new pip dependencies (aiohttp + httpx already installed)

Fixes #1898

Co-authored-by: EvilRan <EvilRan@users.noreply.github.com>
											
										
										
											2026-03-29 21:29:13 -07:00
+								                       "FEISHU_ALLOW_ALL_USERS",
-												feat(gateway): add BlueBubbles iMessage platform adapter (#6437)

Adds Apple iMessage as a gateway platform via BlueBubbles macOS server.

Architecture:
- Webhook-based inbound (event-driven, no polling/dedup needed)
- Email/phone → chat GUID resolution for user-friendly addressing
- Private API safety (checks helper_connected before tapback/typing)
- Inbound attachment downloading (images, audio, documents cached locally)
- Markdown stripping for clean iMessage delivery
- Smart progress suppression for platforms without message editing

Based on PR #5869 by @benjaminsehl (webhook architecture, GUID resolution,
Private API safety, progress suppression) with inbound attachment downloading
from PR #4588 by @1960697431 (attachment cache routing).

Integration points: Platform enum, env config, adapter factory, auth maps,
cron delivery, send_message routing, channel directory, platform hints,
toolset definition, setup wizard, status display.

27 tests covering config, adapter, webhook parsing, GUID resolution,
attachment download routing, toolset consistency, and prompt hints.
											
										
										
											2026-04-08 23:54:03 -07:00
+								                       "WECOM_ALLOW_ALL_USERS",
-												feat(gateway): add WeCom callback-mode adapter for self-built apps

Add a second WeCom integration mode for regular enterprise self-built
applications.  Unlike the existing bot/websocket adapter (wecom.py),
this handles WeCom's standard callback flow: WeCom POSTs encrypted XML
to an HTTP endpoint, the adapter decrypts, queues for the agent, and
immediately acknowledges.  The agent's reply is delivered proactively
via the message/send API.

Key design choice: always acknowledge immediately and use proactive
send — agent sessions take 3-30 minutes, so the 5-second inline reply
window is never useful.  The original PR's Future/pending-reply
machinery was removed in favour of this simpler architecture.

Features:
- AES-CBC encrypt/decrypt (BizMsgCrypt-compatible)
- Multi-app routing scoped by corp_id:user_id
- Legacy bare user_id fallback for backward compat
- Access-token management with auto-refresh
- WECOM_CALLBACK_* env var overrides
- Port-in-use pre-check before binding
- Health endpoint at /health

Salvaged from PR #7774 by @chqchshj.  Simplified by removing the
inline reply Future system and fixing: secrets.choice for nonce
generation, immediate plain-text acknowledgment (not encrypted XML
containing 'success'), and initial token refresh error handling.

											
										
										
											2026-04-11 14:25:18 -07:00
+								                       "WECOM_CALLBACK_ALLOW_ALL_USERS",
-												fix: complete Weixin platform parity audit — 16 missing integration points

Systematic audit found Weixin missing from:

Code:
- gateway/run.py: early WEIXIN_ALLOW_ALL_USERS env check
- gateway/platforms/webhook.py: cross-platform delivery routing
- hermes_cli/dump.py: platform detection for config export
- hermes_cli/setup.py: hermes setup wizard platform list + _setup_weixin
- hermes_cli/skills_config.py: platform labels for skills config UI

Docs (11 pages):
- developer-guide/architecture.md: platform adapter listing
- developer-guide/cron-internals.md: delivery target table
- developer-guide/gateway-internals.md: file tree
- guides/cron-troubleshooting.md: supported platforms list
- integrations/index.md: platform links
- reference/toolsets-reference.md: toolset table
- user-guide/configuration.md: platform keys for tool_progress
- user-guide/features/cron.md: delivery target table
- user-guide/messaging/index.md: intro text, feature table,
  mermaid diagram, toolset table, setup links
- user-guide/messaging/webhooks.md: deliver field + routing table
- user-guide/sessions.md: platform identifiers table

											
										
										
											2026-04-10 05:31:20 -07:00
+								                       "WEIXIN_ALLOW_ALL_USERS",
-												feat: add QQ Bot platform adapter (Official API v2)

Add full QQ Bot integration via the Official QQ Bot API (v2):
- WebSocket gateway for inbound events (C2C, group, guild, DM)
- REST API for outbound text/markdown/media messages
- Voice transcription (Tencent ASR + configurable STT provider)
- Attachment processing (images, voice, files)
- User authorization (allowlist + allow-all + DM pairing)

Integration points:
- gateway: Platform.QQ enum, adapter factory, allowlist maps
- CLI: setup wizard, gateway config, status display, tools config
- tools: send_message cross-platform routing, toolsets
- cron: delivery platform support
- docs: QQ Bot setup guide

											
										
										
											2026-04-13 21:56:38 +08:00
+								                       "BLUEBUBBLES_ALLOW_ALL_USERS",
 								                       "QQ_ALLOW_ALL_USERS")
-												fix(gateway): include per-platform ALLOW_ALL and SIGNAL_GROUP in startup allowlist check (#3313)

The startup warning 'No user allowlists configured' only checked
GATEWAY_ALLOW_ALL_USERS and per-platform _ALLOWED_USERS vars. It
missed SIGNAL_GROUP_ALLOWED_USERS and per-platform _ALLOW_ALL_USERS
vars (e.g. TELEGRAM_ALLOW_ALL_USERS), causing a false warning even
when users had these configured. The actual auth check in
_is_user_authorized already recognized these vars.

Cherry-picked from PR #3202 by binhnt92.

Co-authored-by: binhnt92 <binhnt.ht.92@gmail.com>
											
										
										
											2026-03-26 18:23:49 -07:00
+								        )
-												refactor: deduplicate toolsets, unify async bridging, fix approval race condition, harden security

- Replace 4 copy-pasted messaging platform toolsets with shared _HERMES_CORE_TOOLS list
- Consolidate 5 ad-hoc async-bridging patterns into single _run_async() in model_tools.py
  - Removes deprecated get_event_loop()/set_event_loop() calls
  - Makes all tool handlers self-protecting regardless of caller's event loop state
  - RL handler refactored from if/elif chain to dispatch dict
- Fix exec approval race condition: replace module-level globals with thread-safe
  per-session tools/approval.py (submit_pending, pop_pending, approve_session, is_approved)
  - Session A approving "rm" no longer approves it for all other sessions
- Fix config deep merge: user overriding tts.elevenlabs.voice_id no longer clobbers
  tts.elevenlabs.model_id; migration detection now recurses to arbitrary depth
- Gateway default-deny: unauthenticated users denied unless GATEWAY_ALLOW_ALL_USERS=true
- Add 10 dangerous command patterns: rm --recursive, bash -c, python -e, curl|bash,
  xargs rm, find -delete
- Sanitize gateway error messages: users see generic message, full traceback goes to logs

											
										
										
											2026-02-21 18:28:49 -08:00
+								        if not _any_allowlist and not _allow_all:
 								            logger.warning(
 								                "No user allowlists configured. All unauthorized users will be denied. "
 								                "Set GATEWAY_ALLOW_ALL_USERS=true in ~/.hermes/.env to allow open access, "
 								                "or configure platform allowlists (e.g., TELEGRAM_ALLOWED_USERS=your_id)."
 								            )
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
+								        # Discover and load event hooks
 								        self.hooks.discover_and_load()
-												Add background process management with process tool, wait, PTY, and stdin support

New process registry and tool for managing long-running background processes
across all terminal backends (local, Docker, Singularity, Modal, SSH).

Process Registry (tools/process_registry.py):
- ProcessSession tracking with rolling 200KB output buffer
- spawn_local() with optional PTY via ptyprocess for interactive CLIs
- spawn_via_env() for non-local backends (runs inside sandbox, never on host)
- Background reader threads per process (Popen stdout or PTY)
- wait() with timeout clamping, interrupt support, and transparent limit reporting
- JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery
- Module-level singleton shared across agent loop, gateway, and RL

Process Tool (model_tools.py):
- 7 actions: list, poll, log, wait, kill, write, submit
- Paired with terminal in all toolsets (CLI, messaging, RL)
- Timeout clamping with transparent notes in response

Terminal Tool Updates (tools/terminal_tool.py):
- Replaced nohup background mode with registry spawn (returns session_id)
- Added workdir parameter for per-command working directory
- Added check_interval parameter for gateway auto-check watchers
- Added pty parameter for interactive CLI tools (Codex, Claude Code)
- Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs
- Cleanup thread now respects active background processes (won't reap sandbox)

Gateway Integration (gateway/run.py, session.py, config.py):
- Session reset protection: sessions with active processes exempt from reset
- Default idle timeout increased from 2 hours to 24 hours
- from_dict fallback aligned to match (was 120, now 1440)
- session_key env var propagated to process registry for session mapping
- Crash recovery on gateway startup via checkpoint probe
- check_interval watcher: asyncio task polls process, delivers updates to platform

RL Safety (environments/):
- tool_context.py cleanup() kills background processes on episode end
- hermes_base_env.py warns when enabled_toolsets is None (loads all tools)
- Process tool safe in RL via wait() blocking the agent loop

Also:
- Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all])
- Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP
- Updated AGENTS.md with process management docs and project structure
- Updated README.md terminal section with process management overview

											
										
										
											2026-02-17 02:51:31 -08:00
+								        # Recover background processes from checkpoint (crash recovery)
 								        try:
 								            from tools.process_registry import process_registry
 								            recovered = process_registry.recover_from_checkpoint()
 								            if recovered:
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								                logger.info("Recovered %s background process(es) from previous run", recovered)
-												Add background process management with process tool, wait, PTY, and stdin support

New process registry and tool for managing long-running background processes
across all terminal backends (local, Docker, Singularity, Modal, SSH).

Process Registry (tools/process_registry.py):
- ProcessSession tracking with rolling 200KB output buffer
- spawn_local() with optional PTY via ptyprocess for interactive CLIs
- spawn_via_env() for non-local backends (runs inside sandbox, never on host)
- Background reader threads per process (Popen stdout or PTY)
- wait() with timeout clamping, interrupt support, and transparent limit reporting
- JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery
- Module-level singleton shared across agent loop, gateway, and RL

Process Tool (model_tools.py):
- 7 actions: list, poll, log, wait, kill, write, submit
- Paired with terminal in all toolsets (CLI, messaging, RL)
- Timeout clamping with transparent notes in response

Terminal Tool Updates (tools/terminal_tool.py):
- Replaced nohup background mode with registry spawn (returns session_id)
- Added workdir parameter for per-command working directory
- Added check_interval parameter for gateway auto-check watchers
- Added pty parameter for interactive CLI tools (Codex, Claude Code)
- Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs
- Cleanup thread now respects active background processes (won't reap sandbox)

Gateway Integration (gateway/run.py, session.py, config.py):
- Session reset protection: sessions with active processes exempt from reset
- Default idle timeout increased from 2 hours to 24 hours
- from_dict fallback aligned to match (was 120, now 1440)
- session_key env var propagated to process registry for session mapping
- Crash recovery on gateway startup via checkpoint probe
- check_interval watcher: asyncio task polls process, delivers updates to platform

RL Safety (environments/):
- tool_context.py cleanup() kills background processes on episode end
- hermes_base_env.py warns when enabled_toolsets is None (loads all tools)
- Process tool safe in RL via wait() blocking the agent loop

Also:
- Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all])
- Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP
- Updated AGENTS.md with process management docs and project structure
- Updated README.md terminal section with process management overview

											
										
										
											2026-02-17 02:51:31 -08:00
+								        except Exception as e:
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								            logger.warning("Process checkpoint recovery: %s", e)
-												fix(gateway): break stuck session resume loops on restart (#7536)

Cherry-picked from PR #7747 with follow-up fixes:
- Narrowed suspend_all_active() to suspend_recently_active() — only
  suspends sessions updated within the last 2 minutes (likely in-flight),
  not all sessions which would unnecessarily reset idle users
- /stop with no running agent no longer suspends the session; only
  actual force-stops mark the session for reset

											
										
										
											2026-04-11 11:59:00 -07:00
 								        # Suspend sessions that were active when the gateway last exited.
 								        # This prevents stuck sessions from being blindly resumed on restart,
 								        # which can create an unrecoverable loop (#7536).  Suspended sessions
 								        # auto-reset on the next incoming message, giving the user a clean start.
-												fix: prevent unwanted session auto-reset after graceful gateway restarts (#8299)

When the gateway shuts down gracefully (hermes update, gateway restart,
/restart), it now writes a .clean_shutdown marker file. On the next
startup, if this marker exists, suspend_recently_active() is skipped
and the marker is cleaned up.

Previously, suspend_recently_active() fired on EVERY startup —
including planned restarts from hermes update or hermes gateway restart.
This caused users to lose their conversation history unexpectedly: the
session would be marked as suspended, and the next message would
trigger an auto-reset with a notification the user never asked for.

The original purpose of suspend_recently_active() is crash recovery —
preventing stuck sessions that were mid-processing when the gateway
died unexpectedly. Graceful shutdowns already drain active agents via
_drain_active_agents(), so there is no stuck-session risk. After a
crash (no marker written), suspension still fires as before.

Fixes the scenario where a user asks the agent to run hermes update,
the gateway restarts, and the user's next message gets an unwanted
'Session automatically reset' notification with their history cleared.
											
										
										
											2026-04-12 03:03:07 -07:00
+								        #
 								        # SKIP suspension after a clean (graceful) shutdown — the previous
 								        # process already drained active agents, so sessions aren't stuck.
 								        # This prevents unwanted auto-resets after `hermes update`,
 								        # `hermes gateway restart`, or `/restart`.
 								        _clean_marker = _hermes_home / ".clean_shutdown"
 								        if _clean_marker.exists():
 								            logger.info("Previous gateway exited cleanly — skipping session suspension")
 								            try:
 								                _clean_marker.unlink()
 								            except Exception:
 								                pass
 								        else:
 								            try:
 								                suspended = self.session_store.suspend_recently_active()
 								                if suspended:
 								                    logger.info("Suspended %d in-flight session(s) from previous run", suspended)
 								            except Exception as e:
 								                logger.warning("Session suspension on startup failed: %s", e)
-												fix(gateway): break stuck session resume loops on restart (#7536)

Cherry-picked from PR #7747 with follow-up fixes:
- Narrowed suspend_all_active() to suspend_recently_active() — only
  suspends sessions updated within the last 2 minutes (likely in-flight),
  not all sessions which would unnecessarily reset idle users
- /stop with no running agent no longer suspends the session; only
  actual force-stops mark the session for reset

											
										
										
											2026-04-11 11:59:00 -07:00
-												fix: break stuck session resume loops after repeated restarts (#7536)

When a session gets stuck (hung terminal, runaway tool loop) and the
user restarts the gateway, the same session history loads and puts the
agent right back in the stuck state. The user is trapped in a loop:
restart → stuck → restart → stuck.

Fix: track restart-failure counts per session using a simple JSON file
(.restart_failure_counts). On each shutdown with active agents, the
counter increments for those sessions. On startup, if any session has
been active across 3+ consecutive restarts, it's auto-suspended —
giving the user a clean slate on their next message.

The counter resets to 0 when a session completes a turn successfully
(response delivered), so normal sessions that happen to be active
during planned restarts (/restart, hermes update) won't accumulate
false counts.

Implementation:
- _increment_restart_failure_counts(): called during stop() when
  agents are active. Writes {session_key: count} to JSON file.
  Sessions NOT active are dropped (loop broken).
- _suspend_stuck_loop_sessions(): called on startup. Reads the file,
  suspends sessions at threshold (3), clears the file.
- _clear_restart_failure_count(): called after successful response
  delivery. Removes the session from the counter file.

No SessionEntry schema changes. No database migration. Pure file-based
tracking that naturally cleans up.

Test plan:
- 9 new stuck-loop tests (increment, accumulate, threshold, clear,
  suspend, file cleanup, edge cases)
- All 28 gateway lifecycle tests pass (restart drain + auto-continue
  + stuck loop)

											
										
										
											2026-04-14 17:03:47 -07:00
+								        # Stuck-loop detection (#7536): if a session has been active across
 								        # 3+ consecutive restarts, it's probably stuck in a loop (the same
 								        # history keeps causing the agent to hang).  Auto-suspend it so the
 								        # user gets a clean slate on the next message.
 								        try:
 								            stuck = self._suspend_stuck_loop_sessions()
 								            if stuck:
 								                logger.warning("Auto-suspended %d stuck-loop session(s)", stuck)
 								        except Exception as e:
 								            logger.debug("Stuck-loop detection failed: %s", e)
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        connected_count = 0
-												fix(gateway): restart on retryable startup failures (#1517)
											
										
										
											2026-03-16 17:56:31 +05:30
+								        enabled_platform_count = 0
-												fix: preserve current gateway update and startup behavior

Follow up on salvaged PR #1052.
Restore current-main gateway lifecycle handling after conflict resolution and
adapt the update fallback to use shell-quoted argv parts safely.

											
										
										
											2026-03-14 18:03:50 -07:00
+								        startup_nonretryable_errors: list[str] = []
-												fix(gateway): restart on retryable startup failures (#1517)
											
										
										
											2026-03-16 17:56:31 +05:30
+								        startup_retryable_errors: list[str] = []
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
 								        # Initialize and connect each configured platform
 								        for platform, platform_config in self.config.platforms.items():
 								            if not platform_config.enabled:
 								                continue
-												fix(gateway): restart on retryable startup failures (#1517)
											
										
										
											2026-03-16 17:56:31 +05:30
+								            enabled_platform_count += 1
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
 								            adapter = self._create_adapter(platform, platform_config)
 								            if not adapter:
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								                logger.warning("No adapter available for %s", platform.value)
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								                continue
-												fix: preserve current gateway update and startup behavior

Follow up on salvaged PR #1052.
Restore current-main gateway lifecycle handling after conflict resolution and
adapt the update fallback to use shell-quoted argv parts safely.

											
										
										
											2026-03-14 18:03:50 -07:00
+								            # Set up message + fatal error handlers
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								            adapter.set_message_handler(self._handle_message)
-												fix: preserve current gateway update and startup behavior

Follow up on salvaged PR #1052.
Restore current-main gateway lifecycle handling after conflict resolution and
adapt the update fallback to use shell-quoted argv parts safely.

											
										
										
											2026-03-14 18:03:50 -07:00
+								            adapter.set_fatal_error_handler(self._handle_adapter_fatal_error)
-												feat(gateway): Enable Slack thread replies without explicit @mentions

When a user replies in a Slack thread where the bot has an active
conversation session, the bot now processes the message even without
an explicit @mention. This improves UX for ongoing threaded
discussions.

Changes:
- Added set_session_store() to BasePlatformAdapter for adapters to
  check active sessions
- Modified SlackAdapter to detect thread replies and check if a
  session exists for that thread before requiring @mentions
- Updated GatewayRunner to inject the session store into adapters
- Added comprehensive tests for the new behavior

Fixes: Thread replies without @jarvis are now processed if there is
an active session, matching user expectations for conversation flow

											
										
										
											2026-04-06 11:46:17 -04:00
+								            adapter.set_session_store(self.session_store)
-												fix(gateway): drain in-flight work before restart

											
										
										
											2026-04-10 10:19:17 -07:00
+								            adapter.set_busy_session_handler(self._handle_active_session_busy_message)
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
 								            # Try to connect
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								            logger.info("Connecting to %s...", platform.value)
-												fix(discord): decouple readiness from slash sync

											
										
										
											2026-04-11 17:02:03 -06:00
+								            self._update_platform_runtime_status(
 								                platform.value,
 								                platform_state="connecting",
 								                error_code=None,
 								                error_message=None,
 								            )
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								            try:
 								                success = await adapter.connect()
 								                if success:
 								                    self.adapters[platform] = adapter
-												fix: persist clean voice transcripts and /voice off state

- keep CLI voice prefixes API-local while storing the original user text
- persist explicit gateway off state and restore adapter auto-TTS suppression on restart
- add regression coverage for both behaviors

											
										
										
											2026-03-14 06:14:22 -07:00
+								                    self._sync_voice_mode_state_to_adapter(adapter)
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								                    connected_count += 1
-												fix(discord): decouple readiness from slash sync

											
										
										
											2026-04-11 17:02:03 -06:00
+								                    self._update_platform_runtime_status(
 								                        platform.value,
 								                        platform_state="connected",
 								                        error_code=None,
 								                        error_message=None,
 								                    )
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								                    logger.info("✓ %s connected", platform.value)
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								                else:
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								                    logger.warning("✗ %s failed to connect", platform.value)
-												fix(gateway): restart on retryable startup failures (#1517)
											
										
										
											2026-03-16 17:56:31 +05:30
+								                    if adapter.has_fatal_error:
-												fix(discord): decouple readiness from slash sync

											
										
										
											2026-04-11 17:02:03 -06:00
+								                        self._update_platform_runtime_status(
 								                            platform.value,
 								                            platform_state="retrying" if adapter.fatal_error_retryable else "fatal",
 								                            error_code=adapter.fatal_error_code,
 								                            error_message=adapter.fatal_error_message,
 								                        )
-												fix(gateway): restart on retryable startup failures (#1517)
											
										
										
											2026-03-16 17:56:31 +05:30
+								                        target = (
 								                            startup_retryable_errors
 								                            if adapter.fatal_error_retryable
 								                            else startup_nonretryable_errors
 								                        )
 								                        target.append(
-												fix: preserve current gateway update and startup behavior

Follow up on salvaged PR #1052.
Restore current-main gateway lifecycle handling after conflict resolution and
adapt the update fallback to use shell-quoted argv parts safely.

											
										
										
											2026-03-14 18:03:50 -07:00
+								                            f"{platform.value}: {adapter.fatal_error_message}"
 								                        )
-												feat: auto-reconnect failed gateway platforms with exponential backoff (#2584)

When a messaging platform fails to connect at startup (e.g. transient DNS
failure) or disconnects at runtime with a retryable error, the gateway now
queues it for background reconnection instead of giving up permanently.

- New _platform_reconnect_watcher background task runs alongside the
  existing session expiry watcher
- Exponential backoff: 30s, 60s, 120s, 240s, 300s cap
- Max 20 retry attempts before giving up on a platform
- Non-retryable errors (bad auth token, etc.) are not retried
- Runtime disconnections via _handle_adapter_fatal_error now queue
  retryable failures instead of triggering gateway shutdown
- On successful reconnect, adapter is wired up and channel directory
  is rebuilt automatically

Fixes the case where a DNS blip during gateway startup caused Telegram
and Discord to be permanently unavailable until manual restart.
											
										
										
											2026-03-22 23:48:24 -07:00
+								                        # Queue for reconnection if the error is retryable
 								                        if adapter.fatal_error_retryable:
 								                            self._failed_platforms[platform] = {
 								                                "config": platform_config,
 								                                "attempts": 1,
 								                                "next_retry": time.monotonic() + 30,
 								                            }
-												fix(gateway): restart on retryable startup failures (#1517)
											
										
										
											2026-03-16 17:56:31 +05:30
+								                    else:
-												fix(discord): decouple readiness from slash sync

											
										
										
											2026-04-11 17:02:03 -06:00
+								                        self._update_platform_runtime_status(
 								                            platform.value,
 								                            platform_state="retrying",
 								                            error_code=None,
 								                            error_message="failed to connect",
 								                        )
-												fix(gateway): restart on retryable startup failures (#1517)
											
										
										
											2026-03-16 17:56:31 +05:30
+								                        startup_retryable_errors.append(
 								                            f"{platform.value}: failed to connect"
 								                        )
-												feat: auto-reconnect failed gateway platforms with exponential backoff (#2584)

When a messaging platform fails to connect at startup (e.g. transient DNS
failure) or disconnects at runtime with a retryable error, the gateway now
queues it for background reconnection instead of giving up permanently.

- New _platform_reconnect_watcher background task runs alongside the
  existing session expiry watcher
- Exponential backoff: 30s, 60s, 120s, 240s, 300s cap
- Max 20 retry attempts before giving up on a platform
- Non-retryable errors (bad auth token, etc.) are not retried
- Runtime disconnections via _handle_adapter_fatal_error now queue
  retryable failures instead of triggering gateway shutdown
- On successful reconnect, adapter is wired up and channel directory
  is rebuilt automatically

Fixes the case where a DNS blip during gateway startup caused Telegram
and Discord to be permanently unavailable until manual restart.
											
										
										
											2026-03-22 23:48:24 -07:00
+								                        # No fatal error info means likely a transient issue — queue for retry
 								                        self._failed_platforms[platform] = {
 								                            "config": platform_config,
 								                            "attempts": 1,
 								                            "next_retry": time.monotonic() + 30,
 								                        }
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								            except Exception as e:
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								                logger.error("✗ %s error: %s", platform.value, e)
-												fix(discord): decouple readiness from slash sync

											
										
										
											2026-04-11 17:02:03 -06:00
+								                self._update_platform_runtime_status(
 								                    platform.value,
 								                    platform_state="retrying",
 								                    error_code=None,
 								                    error_message=str(e),
 								                )
-												fix(gateway): restart on retryable startup failures (#1517)
											
										
										
											2026-03-16 17:56:31 +05:30
+								                startup_retryable_errors.append(f"{platform.value}: {e}")
-												feat: auto-reconnect failed gateway platforms with exponential backoff (#2584)

When a messaging platform fails to connect at startup (e.g. transient DNS
failure) or disconnects at runtime with a retryable error, the gateway now
queues it for background reconnection instead of giving up permanently.

- New _platform_reconnect_watcher background task runs alongside the
  existing session expiry watcher
- Exponential backoff: 30s, 60s, 120s, 240s, 300s cap
- Max 20 retry attempts before giving up on a platform
- Non-retryable errors (bad auth token, etc.) are not retried
- Runtime disconnections via _handle_adapter_fatal_error now queue
  retryable failures instead of triggering gateway shutdown
- On successful reconnect, adapter is wired up and channel directory
  is rebuilt automatically

Fixes the case where a DNS blip during gateway startup caused Telegram
and Discord to be permanently unavailable until manual restart.
											
										
										
											2026-03-22 23:48:24 -07:00
+								                # Unexpected exceptions are typically transient — queue for retry
 								                self._failed_platforms[platform] = {
 								                    "config": platform_config,
 								                    "attempts": 1,
 								                    "next_retry": time.monotonic() + 30,
 								                }
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
 								        if connected_count == 0:
-												fix: preserve current gateway update and startup behavior

Follow up on salvaged PR #1052.
Restore current-main gateway lifecycle handling after conflict resolution and
adapt the update fallback to use shell-quoted argv parts safely.

											
										
										
											2026-03-14 18:03:50 -07:00
+								            if startup_nonretryable_errors:
 								                reason = "; ".join(startup_nonretryable_errors)
 								                logger.error("Gateway hit a non-retryable startup conflict: %s", reason)
 								                try:
 								                    from gateway.status import write_runtime_status
 								                    write_runtime_status(gateway_state="startup_failed", exit_reason=reason)
 								                except Exception:
 								                    pass
 								                self._request_clean_exit(reason)
 								                return True
-												fix(gateway): restart on retryable startup failures (#1517)
											
										
										
											2026-03-16 17:56:31 +05:30
+								            if enabled_platform_count > 0:
 								                reason = "; ".join(startup_retryable_errors) or "all configured messaging platforms failed to connect"
 								                logger.error("Gateway failed to connect any configured messaging platform: %s", reason)
 								                try:
 								                    from gateway.status import write_runtime_status
 								                    write_runtime_status(gateway_state="startup_failed", exit_reason=reason)
 								                except Exception:
 								                    pass
 								                return False
 								            logger.warning("No messaging platforms enabled.")
-												refactor: streamline cron job handling and update CLI commands

- Removed legacy cron daemon functionality, integrating cron job execution directly into the gateway process for improved efficiency.
- Updated CLI commands to reflect changes, replacing `hermes cron daemon` with `hermes cron status` and enhancing documentation for cron job management.
- Clarified messaging in the README and other documentation regarding the gateway's role in managing cron jobs.
- Removed obsolete terminal_hecate tool and related configurations to simplify the codebase.

											
										
										
											2026-02-21 16:21:19 -08:00
+								            logger.info("Gateway will continue running for cron job execution.")
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
 								        # Update delivery router with adapters
 								        self.delivery_router.adapters = self.adapters
 								        self._running = True
-												fix(gateway): drain in-flight work before restart

											
										
										
											2026-04-10 10:19:17 -07:00
+								        self._update_runtime_status("running")
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
 								        # Emit gateway:startup hook
 								        hook_count = len(self.hooks.loaded_hooks)
 								        if hook_count:
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								            logger.info("%s hook(s) loaded", hook_count)
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
+								        await self.hooks.emit("gateway:startup", {
 								            "platforms": [p.value for p in self.adapters.keys()],
 								        })
-												refactor: streamline cron job handling and update CLI commands

- Removed legacy cron daemon functionality, integrating cron job execution directly into the gateway process for improved efficiency.
- Updated CLI commands to reflect changes, replacing `hermes cron daemon` with `hermes cron status` and enhancing documentation for cron job management.
- Clarified messaging in the README and other documentation regarding the gateway's role in managing cron jobs.
- Removed obsolete terminal_hecate tool and related configurations to simplify the codebase.

											
										
										
											2026-02-21 16:21:19 -08:00
+								        if connected_count > 0:
 								            logger.info("Gateway running with %s platform(s)", connected_count)
-												feat: implement channel directory and message mirroring for cross-platform communication

- Introduced a new channel directory to cache reachable channels/contacts for messaging platforms, enhancing the send_message tool's ability to resolve human-friendly names to numeric IDs.
- Added functionality to mirror sent messages into the target's session transcript, providing context for cross-platform message delivery.
- Updated the send_message tool to support listing available targets and improved error handling for channel resolution.
- Enhanced the gateway to build and refresh the channel directory during startup and at regular intervals, ensuring up-to-date channel information.

											
										
										
											2026-02-22 20:44:15 -08:00
 								        # Build initial channel directory for send_message name resolution
 								        try:
 								            from gateway.channel_directory import build_channel_directory
 								            directory = build_channel_directory(self.adapters)
 								            ch_count = sum(len(chs) for chs in directory.get("platforms", {}).values())
 								            logger.info("Channel directory built: %d target(s)", ch_count)
 								        except Exception as e:
 								            logger.warning("Channel directory build failed: %s", e)
-												fix: notify gateway users when updates finish or fail

											
										
										
											2026-03-11 20:43:31 +01:00
+								        # Check if we're restarting after a /update command. If the update is
 								        # still running, keep watching so we notify once it actually finishes.
 								        notified = await self._send_update_notification()
 								        if not notified and any(
 								            path.exists()
 								            for path in (
 								                _hermes_home / ".update_pending.json",
 								                _hermes_home / ".update_pending.claimed.json",
 								            )
 								        ):
 								            self._schedule_update_notification_watch()
-												feat: add /update slash command for gateway platforms

Adds a /update command to Telegram, Discord, and other gateway platforms
that runs `hermes update` to pull the latest code, update dependencies,
sync skills, and restart the gateway.

Implementation:
- Spawns `hermes update` in a separate systemd scope (systemd-run --user
  --scope) so the process survives the gateway restart that hermes update
  triggers at the end. Falls back to nohup if systemd-run is unavailable.
- Writes a marker file (.update_pending.json) with the originating
  platform and chat_id before spawning the update.
- On gateway startup, _send_update_notification() checks for the marker,
  reads the captured update output, sends the results back to the user,
  and cleans up.

Also:
- Registers /update as a Discord slash command
- Updates README.md, docs/messaging.md, docs/slash-commands.md
- Adds 18 tests covering handler, notification, and edge cases

											
										
										
											2026-03-05 01:20:58 -08:00
-												feat(gateway): notify /restart requester when gateway comes back online

When a user sends /restart, the gateway now persists their routing info
(platform, chat_id, thread_id) to .restart_notify.json. After the new
gateway process starts and adapters connect, it reads the file, sends a
'Gateway restarted successfully' message to that specific chat, and
cleans up the file.

This follows the same pattern as _send_update_notification (used by
/update). Thread IDs are preserved so the notification lands in the
correct Telegram topic or Discord thread.

Previously, after /restart the user had no feedback that the gateway was
back — they had to send a message to find out. Now they get a proactive
notification and know their session continues.

											
										
										
											2026-04-12 21:19:44 -07:00
+								        # Notify the chat that initiated /restart that the gateway is back.
 								        await self._send_restart_notification()
-												fix(gateway): persist watcher metadata in checkpoint for crash recovery (#1706)

Salvaged from PR #1573 by @eren-karakus0. Cherry-picked with authorship preserved.

Fixes #1143 — background process notifications resume after gateway restart.

Co-authored-by: Muhammet Eren Karakuş <erenkar950@gmail.com>
											
										
										
											2026-03-17 03:52:15 -07:00
+								        # Drain any recovered process watchers (from crash recovery checkpoint)
 								        try:
 								            from tools.process_registry import process_registry
 								            while process_registry.pending_watchers:
 								                watcher = process_registry.pending_watchers.pop(0)
 								                asyncio.create_task(self._run_process_watcher(watcher))
 								                logger.info("Resumed watcher for recovered process %s", watcher.get("session_id"))
 								        except Exception as e:
 								            logger.error("Recovered watcher setup error: %s", e)
-												feat(gateway): proactive async memory flush on session expiry

Previously, when a session expired (idle/daily reset), the memory flush
ran synchronously inside get_or_create_session — blocking the user's
message for 10-60s while an LLM call saved memories.

Now a background watcher task (_session_expiry_watcher) runs every 5 min,
detects expired sessions, and flushes memories proactively in a thread
pool.  By the time the user sends their next message, memories are
already saved and the response is immediate.

Changes:
- Add _is_session_expired(entry) to SessionStore — works from entry
  alone without needing a SessionSource
- Add _pre_flushed_sessions set to track already-flushed sessions
- Remove sync _on_auto_reset callback from get_or_create_session
- Refactor flush into _flush_memories_for_session (sync worker) +
  _async_flush_memories (thread pool wrapper)
- Add _session_expiry_watcher background task, started in start()
- Simplify /reset command to use shared fire-and-forget flush
- Add 10 tests for expiry detection, callback removal, tracking

											
										
										
											2026-03-07 11:27:50 -08:00
+								        # Start background session expiry watcher for proactive memory flushing
 								        asyncio.create_task(self._session_expiry_watcher())
-												feat: auto-reconnect failed gateway platforms with exponential backoff (#2584)

When a messaging platform fails to connect at startup (e.g. transient DNS
failure) or disconnects at runtime with a retryable error, the gateway now
queues it for background reconnection instead of giving up permanently.

- New _platform_reconnect_watcher background task runs alongside the
  existing session expiry watcher
- Exponential backoff: 30s, 60s, 120s, 240s, 300s cap
- Max 20 retry attempts before giving up on a platform
- Non-retryable errors (bad auth token, etc.) are not retried
- Runtime disconnections via _handle_adapter_fatal_error now queue
  retryable failures instead of triggering gateway shutdown
- On successful reconnect, adapter is wired up and channel directory
  is rebuilt automatically

Fixes the case where a DNS blip during gateway startup caused Telegram
and Discord to be permanently unavailable until manual restart.
											
										
										
											2026-03-22 23:48:24 -07:00
+								        # Start background reconnection watcher for platforms that failed at startup
 								        if self._failed_platforms:
 								            logger.info(
 								                "Starting reconnection watcher for %d failed platform(s): %s",
 								                len(self._failed_platforms),
 								                ", ".join(p.value for p in self._failed_platforms),
 								            )
 								        asyncio.create_task(self._platform_reconnect_watcher())
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								        logger.info("Press Ctrl+C to stop")
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
 								        return True
-												feat(gateway): proactive async memory flush on session expiry

Previously, when a session expired (idle/daily reset), the memory flush
ran synchronously inside get_or_create_session — blocking the user's
message for 10-60s while an LLM call saved memories.

Now a background watcher task (_session_expiry_watcher) runs every 5 min,
detects expired sessions, and flushes memories proactively in a thread
pool.  By the time the user sends their next message, memories are
already saved and the response is immediate.

Changes:
- Add _is_session_expired(entry) to SessionStore — works from entry
  alone without needing a SessionSource
- Add _pre_flushed_sessions set to track already-flushed sessions
- Remove sync _on_auto_reset callback from get_or_create_session
- Refactor flush into _flush_memories_for_session (sync worker) +
  _async_flush_memories (thread pool wrapper)
- Add _session_expiry_watcher background task, started in start()
- Simplify /reset command to use shared fire-and-forget flush
- Add 10 tests for expiry detection, callback removal, tracking

											
										
										
											2026-03-07 11:27:50 -08:00
+								    async def _session_expiry_watcher(self, interval: int = 300):
 								        """Background task that proactively flushes memories for expired sessions.
 								        Runs every `interval` seconds (default 5 min).  For each session that
 								        has expired according to its reset policy, flushes memories in a thread
 								        pool and marks the session so it won't be flushed again.
 								        This means memories are already saved by the time the user sends their
 								        next message, so there's no blocking delay.
 								        """
 								        await asyncio.sleep(60)  # initial delay — let the gateway fully start
-												fix(gateway): cap memory flush retries at 3 to prevent infinite loop

The _session_expiry_watcher retried failed memory flushes forever
because exceptions were caught at debug level without setting
memory_flushed=True. Expired sessions with transient failures
(rate limits, network errors) would retry every 5 minutes
indefinitely, burning API quota and blocking gateway message
processing via 429 rate limit cascades.

Observed case: a March 19 session retried 28+ times over ~17 days,
causing repeated 429 errors that made Telegram unresponsive.

Add a per-session failure counter (_flush_failures) that gives up
after 3 consecutive attempts and marks the session as flushed to
break the loop.

											
										
										
											2026-04-05 12:41:45 +00:00
+								        _flush_failures: dict[str, int] = {}  # session_id -> consecutive failure count
 								        _MAX_FLUSH_RETRIES = 3
-												feat(gateway): proactive async memory flush on session expiry

Previously, when a session expired (idle/daily reset), the memory flush
ran synchronously inside get_or_create_session — blocking the user's
message for 10-60s while an LLM call saved memories.

Now a background watcher task (_session_expiry_watcher) runs every 5 min,
detects expired sessions, and flushes memories proactively in a thread
pool.  By the time the user sends their next message, memories are
already saved and the response is immediate.

Changes:
- Add _is_session_expired(entry) to SessionStore — works from entry
  alone without needing a SessionSource
- Add _pre_flushed_sessions set to track already-flushed sessions
- Remove sync _on_auto_reset callback from get_or_create_session
- Refactor flush into _flush_memories_for_session (sync worker) +
  _async_flush_memories (thread pool wrapper)
- Add _session_expiry_watcher background task, started in start()
- Simplify /reset command to use shared fire-and-forget flush
- Add 10 tests for expiry detection, callback removal, tracking

											
										
										
											2026-03-07 11:27:50 -08:00
+								        while self._running:
 								            try:
 								                self.session_store._ensure_loaded()
-												feat: centralized logging, instrumentation, hermes logs CLI, gateway noise fix (#5430)

Adds comprehensive logging infrastructure to Hermes Agent across 4 phases:

**Phase 1 — Centralized logging**
- New hermes_logging.py with idempotent setup_logging() used by CLI, gateway, and cron
- agent.log (INFO+) and errors.log (WARNING+) with RotatingFileHandler + RedactingFormatter
- config.yaml logging: section (level, max_size_mb, backup_count)
- All entry points wired (cli.py, main.py, gateway/run.py, run_agent.py)
- Fixed debug_helpers.py writing to ./logs/ instead of ~/.hermes/logs/

**Phase 2 — Event instrumentation**
- API calls: model, provider, tokens, latency, cache hit %
- Tool execution: name, duration, result size (both sequential + concurrent)
- Session lifecycle: turn start (session/model/provider/platform), compression (before/after)
- Credential pool: rotation events, exhaustion tracking

**Phase 3 — hermes logs CLI command**
- hermes logs / hermes logs -f / hermes logs errors / hermes logs gateway
- --level, --session, --since filters
- hermes logs list (file sizes + ages)

**Phase 4 — Gateway bug fix + noise reduction**
- fix: _async_flush_memories() called with wrong arg count — sessions never flushed
- Batched session expiry logs: 6 lines/cycle → 2 summary lines
- Added inbound message + response time logging

75 new tests, zero regressions on the full suite.
											
										
										
											2026-04-06 00:08:20 -07:00
+								                # Collect expired sessions first, then log a single summary.
 								                _expired_entries = []
-												feat(gateway): proactive async memory flush on session expiry

Previously, when a session expired (idle/daily reset), the memory flush
ran synchronously inside get_or_create_session — blocking the user's
message for 10-60s while an LLM call saved memories.

Now a background watcher task (_session_expiry_watcher) runs every 5 min,
detects expired sessions, and flushes memories proactively in a thread
pool.  By the time the user sends their next message, memories are
already saved and the response is immediate.

Changes:
- Add _is_session_expired(entry) to SessionStore — works from entry
  alone without needing a SessionSource
- Add _pre_flushed_sessions set to track already-flushed sessions
- Remove sync _on_auto_reset callback from get_or_create_session
- Refactor flush into _flush_memories_for_session (sync worker) +
  _async_flush_memories (thread pool wrapper)
- Add _session_expiry_watcher background task, started in start()
- Simplify /reset command to use shared fire-and-forget flush
- Add 10 tests for expiry detection, callback removal, tracking

											
										
										
											2026-03-07 11:27:50 -08:00
+								                for key, entry in list(self.session_store._entries.items()):
-												fix(gateway): persist memory flush state to prevent redundant re-flushes on restart (#4481)

* fix: force-close TCP sockets on client cleanup, detect and recover dead connections

When a provider drops connections mid-stream (e.g. OpenRouter outage),
httpx's graceful close leaves sockets in CLOSE-WAIT indefinitely. These
zombie connections accumulate and can prevent recovery without restarting.

Changes:
- _force_close_tcp_sockets: walks the httpx connection pool and issues
  socket.shutdown(SHUT_RDWR) + close() to force TCP RST on every socket
  when a client is closed, preventing CLOSE-WAIT accumulation
- _cleanup_dead_connections: probes the primary client's pool for dead
  sockets (recv MSG_PEEK), rebuilds the client if any are found
- Pre-turn health check at the start of each run_conversation call that
  auto-recovers with a user-facing status message
- Primary client rebuild after stale stream detection to purge pool
- User-facing messages on streaming connection failures:
  "Connection to provider dropped — Reconnecting (attempt 2/3)"
  "Connection failed after 3 attempts — try again in a moment"

Made-with: Cursor

* fix: pool entry missing base_url for openrouter, clean error messages

- _resolve_runtime_from_pool_entry: add OPENROUTER_BASE_URL fallback
  when pool entry has no runtime_base_url (pool entries from auth.json
  credential_pool often omit base_url)
- Replace Rich console.print for auth errors with plain print() to
  prevent ANSI escape code mangling through prompt_toolkit's stdout patch
- Force-close TCP sockets on client cleanup to prevent CLOSE-WAIT
  accumulation after provider outages
- Pre-turn dead connection detection with auto-recovery and user message
- Primary client rebuild after stale stream detection
- User-facing status messages on streaming connection failures/retries

Made-with: Cursor

* fix(gateway): persist memory flush state to prevent redundant re-flushes on restart

The _session_expiry_watcher tracked flushed sessions in an in-memory set
(_pre_flushed_sessions) that was lost on gateway restart. Expired sessions
remained in sessions.json and were re-discovered every restart, causing
redundant AIAgent runs that burned API credits and blocked the event loop.

Fix: Add a memory_flushed boolean field to SessionEntry, persisted in
sessions.json. The watcher sets it after a successful flush. On restart,
the flag survives and the watcher skips already-flushed sessions.

- Add memory_flushed field to SessionEntry with to_dict/from_dict support
- Old sessions.json entries without the field default to False (backward compat)
- Remove the ephemeral _pre_flushed_sessions set from SessionStore
- Update tests: save/load roundtrip, legacy entry compat, auto-reset behavior
											
										
										
											2026-04-01 12:05:02 -07:00
+								                    if entry.memory_flushed:
-												feat: centralized logging, instrumentation, hermes logs CLI, gateway noise fix (#5430)

Adds comprehensive logging infrastructure to Hermes Agent across 4 phases:

**Phase 1 — Centralized logging**
- New hermes_logging.py with idempotent setup_logging() used by CLI, gateway, and cron
- agent.log (INFO+) and errors.log (WARNING+) with RotatingFileHandler + RedactingFormatter
- config.yaml logging: section (level, max_size_mb, backup_count)
- All entry points wired (cli.py, main.py, gateway/run.py, run_agent.py)
- Fixed debug_helpers.py writing to ./logs/ instead of ~/.hermes/logs/

**Phase 2 — Event instrumentation**
- API calls: model, provider, tokens, latency, cache hit %
- Tool execution: name, duration, result size (both sequential + concurrent)
- Session lifecycle: turn start (session/model/provider/platform), compression (before/after)
- Credential pool: rotation events, exhaustion tracking

**Phase 3 — hermes logs CLI command**
- hermes logs / hermes logs -f / hermes logs errors / hermes logs gateway
- --level, --session, --since filters
- hermes logs list (file sizes + ages)

**Phase 4 — Gateway bug fix + noise reduction**
- fix: _async_flush_memories() called with wrong arg count — sessions never flushed
- Batched session expiry logs: 6 lines/cycle → 2 summary lines
- Added inbound message + response time logging

75 new tests, zero regressions on the full suite.
											
										
										
											2026-04-06 00:08:20 -07:00
+								                        continue
-												feat(gateway): proactive async memory flush on session expiry

Previously, when a session expired (idle/daily reset), the memory flush
ran synchronously inside get_or_create_session — blocking the user's
message for 10-60s while an LLM call saved memories.

Now a background watcher task (_session_expiry_watcher) runs every 5 min,
detects expired sessions, and flushes memories proactively in a thread
pool.  By the time the user sends their next message, memories are
already saved and the response is immediate.

Changes:
- Add _is_session_expired(entry) to SessionStore — works from entry
  alone without needing a SessionSource
- Add _pre_flushed_sessions set to track already-flushed sessions
- Remove sync _on_auto_reset callback from get_or_create_session
- Refactor flush into _flush_memories_for_session (sync worker) +
  _async_flush_memories (thread pool wrapper)
- Add _session_expiry_watcher background task, started in start()
- Simplify /reset command to use shared fire-and-forget flush
- Add 10 tests for expiry detection, callback removal, tracking

											
										
										
											2026-03-07 11:27:50 -08:00
+								                    if not self.session_store._is_session_expired(entry):
-												feat: centralized logging, instrumentation, hermes logs CLI, gateway noise fix (#5430)

Adds comprehensive logging infrastructure to Hermes Agent across 4 phases:

**Phase 1 — Centralized logging**
- New hermes_logging.py with idempotent setup_logging() used by CLI, gateway, and cron
- agent.log (INFO+) and errors.log (WARNING+) with RotatingFileHandler + RedactingFormatter
- config.yaml logging: section (level, max_size_mb, backup_count)
- All entry points wired (cli.py, main.py, gateway/run.py, run_agent.py)
- Fixed debug_helpers.py writing to ./logs/ instead of ~/.hermes/logs/

**Phase 2 — Event instrumentation**
- API calls: model, provider, tokens, latency, cache hit %
- Tool execution: name, duration, result size (both sequential + concurrent)
- Session lifecycle: turn start (session/model/provider/platform), compression (before/after)
- Credential pool: rotation events, exhaustion tracking

**Phase 3 — hermes logs CLI command**
- hermes logs / hermes logs -f / hermes logs errors / hermes logs gateway
- --level, --session, --since filters
- hermes logs list (file sizes + ages)

**Phase 4 — Gateway bug fix + noise reduction**
- fix: _async_flush_memories() called with wrong arg count — sessions never flushed
- Batched session expiry logs: 6 lines/cycle → 2 summary lines
- Added inbound message + response time logging

75 new tests, zero regressions on the full suite.
											
										
										
											2026-04-06 00:08:20 -07:00
+								                        continue
 								                    _expired_entries.append((key, entry))
 								                if _expired_entries:
 								                    # Extract platform names from session keys for a compact summary.
 								                    # Keys look like "agent:main:telegram:dm:12345" — platform is field [2].
 								                    _platforms: dict[str, int] = {}
 								                    for _k, _e in _expired_entries:
 								                        _parts = _k.split(":")
 								                        _plat = _parts[2] if len(_parts) > 2 else "unknown"
 								                        _platforms[_plat] = _platforms.get(_plat, 0) + 1
 								                    _plat_summary = ", ".join(
 								                        f"{p}:{c}" for p, c in sorted(_platforms.items())
 								                    )
-												feat(gateway): proactive async memory flush on session expiry

Previously, when a session expired (idle/daily reset), the memory flush
ran synchronously inside get_or_create_session — blocking the user's
message for 10-60s while an LLM call saved memories.

Now a background watcher task (_session_expiry_watcher) runs every 5 min,
detects expired sessions, and flushes memories proactively in a thread
pool.  By the time the user sends their next message, memories are
already saved and the response is immediate.

Changes:
- Add _is_session_expired(entry) to SessionStore — works from entry
  alone without needing a SessionSource
- Add _pre_flushed_sessions set to track already-flushed sessions
- Remove sync _on_auto_reset callback from get_or_create_session
- Refactor flush into _flush_memories_for_session (sync worker) +
  _async_flush_memories (thread pool wrapper)
- Add _session_expiry_watcher background task, started in start()
- Simplify /reset command to use shared fire-and-forget flush
- Add 10 tests for expiry detection, callback removal, tracking

											
										
										
											2026-03-07 11:27:50 -08:00
+								                    logger.info(
-												feat: centralized logging, instrumentation, hermes logs CLI, gateway noise fix (#5430)

Adds comprehensive logging infrastructure to Hermes Agent across 4 phases:

**Phase 1 — Centralized logging**
- New hermes_logging.py with idempotent setup_logging() used by CLI, gateway, and cron
- agent.log (INFO+) and errors.log (WARNING+) with RotatingFileHandler + RedactingFormatter
- config.yaml logging: section (level, max_size_mb, backup_count)
- All entry points wired (cli.py, main.py, gateway/run.py, run_agent.py)
- Fixed debug_helpers.py writing to ./logs/ instead of ~/.hermes/logs/

**Phase 2 — Event instrumentation**
- API calls: model, provider, tokens, latency, cache hit %
- Tool execution: name, duration, result size (both sequential + concurrent)
- Session lifecycle: turn start (session/model/provider/platform), compression (before/after)
- Credential pool: rotation events, exhaustion tracking

**Phase 3 — hermes logs CLI command**
- hermes logs / hermes logs -f / hermes logs errors / hermes logs gateway
- --level, --session, --since filters
- hermes logs list (file sizes + ages)

**Phase 4 — Gateway bug fix + noise reduction**
- fix: _async_flush_memories() called with wrong arg count — sessions never flushed
- Batched session expiry logs: 6 lines/cycle → 2 summary lines
- Added inbound message + response time logging

75 new tests, zero regressions on the full suite.
											
										
										
											2026-04-06 00:08:20 -07:00
+								                        "Session expiry: %d sessions to flush (%s)",
 								                        len(_expired_entries), _plat_summary,
-												feat(gateway): proactive async memory flush on session expiry

Previously, when a session expired (idle/daily reset), the memory flush
ran synchronously inside get_or_create_session — blocking the user's
message for 10-60s while an LLM call saved memories.

Now a background watcher task (_session_expiry_watcher) runs every 5 min,
detects expired sessions, and flushes memories proactively in a thread
pool.  By the time the user sends their next message, memories are
already saved and the response is immediate.

Changes:
- Add _is_session_expired(entry) to SessionStore — works from entry
  alone without needing a SessionSource
- Add _pre_flushed_sessions set to track already-flushed sessions
- Remove sync _on_auto_reset callback from get_or_create_session
- Refactor flush into _flush_memories_for_session (sync worker) +
  _async_flush_memories (thread pool wrapper)
- Add _session_expiry_watcher background task, started in start()
- Simplify /reset command to use shared fire-and-forget flush
- Add 10 tests for expiry detection, callback removal, tracking

											
										
										
											2026-03-07 11:27:50 -08:00
+								                    )
-												feat: centralized logging, instrumentation, hermes logs CLI, gateway noise fix (#5430)

Adds comprehensive logging infrastructure to Hermes Agent across 4 phases:

**Phase 1 — Centralized logging**
- New hermes_logging.py with idempotent setup_logging() used by CLI, gateway, and cron
- agent.log (INFO+) and errors.log (WARNING+) with RotatingFileHandler + RedactingFormatter
- config.yaml logging: section (level, max_size_mb, backup_count)
- All entry points wired (cli.py, main.py, gateway/run.py, run_agent.py)
- Fixed debug_helpers.py writing to ./logs/ instead of ~/.hermes/logs/

**Phase 2 — Event instrumentation**
- API calls: model, provider, tokens, latency, cache hit %
- Tool execution: name, duration, result size (both sequential + concurrent)
- Session lifecycle: turn start (session/model/provider/platform), compression (before/after)
- Credential pool: rotation events, exhaustion tracking

**Phase 3 — hermes logs CLI command**
- hermes logs / hermes logs -f / hermes logs errors / hermes logs gateway
- --level, --session, --since filters
- hermes logs list (file sizes + ages)

**Phase 4 — Gateway bug fix + noise reduction**
- fix: _async_flush_memories() called with wrong arg count — sessions never flushed
- Batched session expiry logs: 6 lines/cycle → 2 summary lines
- Added inbound message + response time logging

75 new tests, zero regressions on the full suite.
											
										
										
											2026-04-06 00:08:20 -07:00
 								                for key, entry in _expired_entries:
-												feat(gateway): proactive async memory flush on session expiry

Previously, when a session expired (idle/daily reset), the memory flush
ran synchronously inside get_or_create_session — blocking the user's
message for 10-60s while an LLM call saved memories.

Now a background watcher task (_session_expiry_watcher) runs every 5 min,
detects expired sessions, and flushes memories proactively in a thread
pool.  By the time the user sends their next message, memories are
already saved and the response is immediate.

Changes:
- Add _is_session_expired(entry) to SessionStore — works from entry
  alone without needing a SessionSource
- Add _pre_flushed_sessions set to track already-flushed sessions
- Remove sync _on_auto_reset callback from get_or_create_session
- Refactor flush into _flush_memories_for_session (sync worker) +
  _async_flush_memories (thread pool wrapper)
- Add _session_expiry_watcher background task, started in start()
- Simplify /reset command to use shared fire-and-forget flush
- Add 10 tests for expiry detection, callback removal, tracking

											
										
										
											2026-03-07 11:27:50 -08:00
+								                    try:
-												fix: honor session-scoped gateway model overrides

											
										
										
											2026-04-11 04:24:45 -05:00
+								                        await self._async_flush_memories(entry.session_id, key)
-												fix(gateway): look up expired agents in _agent_cache, add global kill_all

Two fixes from PR review:

1. Session expiry was looking in _running_agents for the cached agent,
   but idle expired sessions live in _agent_cache. Now checks
   _agent_cache first, falls back to _running_agents.

2. Global cleanup in stop() was missing process_registry.kill_all(),
   so background processes from agents evicted without close() (branch,
   fallback) survived shutdown.

											
										
										
											2026-04-10 17:26:10 -03:00
+								                        # Shut down memory provider and close tool resources
 								                        # on the cached agent.  Idle agents live in
 								                        # _agent_cache (not _running_agents), so look there.
 								                        _cached_agent = None
 								                        _cache_lock = getattr(self, "_agent_cache_lock", None)
 								                        if _cache_lock is not None:
 								                            with _cache_lock:
 								                                _cached = self._agent_cache.get(key)
 								                                _cached_agent = _cached[0] if isinstance(_cached, tuple) else _cached if _cached else None
 								                        # Fall back to _running_agents in case the agent is
 								                        # still mid-turn when the expiry fires.
 								                        if _cached_agent is None:
 								                            _cached_agent = self._running_agents.get(key)
 								                        if _cached_agent and _cached_agent is not _AGENT_PENDING_SENTINEL:
-												fix(gateway): close temporary agents after one-off tasks

Add shared _cleanup_agent_resources() for temporary gateway AIAgent
instances. Apply cleanup to memory flush, background tasks, /btw,
manual /compress, and session-hygiene auto-compression. Prevents
unclosed aiohttp client session leaks.

Cherry-picked from #10899 by @LeonSGP43. Consolidates #10945 by @Lubrsy706.
Fixes #10865.

Co-authored-by: Lubrsy706 <Lubrsy706@users.noreply.github.com>

											
										
										
											2026-04-16 18:41:31 +05:30
+								                            self._cleanup_agent_resources(_cached_agent)
-												fix(gateway): persist memory flush state to prevent redundant re-flushes on restart (#4481)

* fix: force-close TCP sockets on client cleanup, detect and recover dead connections

When a provider drops connections mid-stream (e.g. OpenRouter outage),
httpx's graceful close leaves sockets in CLOSE-WAIT indefinitely. These
zombie connections accumulate and can prevent recovery without restarting.

Changes:
- _force_close_tcp_sockets: walks the httpx connection pool and issues
  socket.shutdown(SHUT_RDWR) + close() to force TCP RST on every socket
  when a client is closed, preventing CLOSE-WAIT accumulation
- _cleanup_dead_connections: probes the primary client's pool for dead
  sockets (recv MSG_PEEK), rebuilds the client if any are found
- Pre-turn health check at the start of each run_conversation call that
  auto-recovers with a user-facing status message
- Primary client rebuild after stale stream detection to purge pool
- User-facing messages on streaming connection failures:
  "Connection to provider dropped — Reconnecting (attempt 2/3)"
  "Connection failed after 3 attempts — try again in a moment"

Made-with: Cursor

* fix: pool entry missing base_url for openrouter, clean error messages

- _resolve_runtime_from_pool_entry: add OPENROUTER_BASE_URL fallback
  when pool entry has no runtime_base_url (pool entries from auth.json
  credential_pool often omit base_url)
- Replace Rich console.print for auth errors with plain print() to
  prevent ANSI escape code mangling through prompt_toolkit's stdout patch
- Force-close TCP sockets on client cleanup to prevent CLOSE-WAIT
  accumulation after provider outages
- Pre-turn dead connection detection with auto-recovery and user message
- Primary client rebuild after stale stream detection
- User-facing status messages on streaming connection failures/retries

Made-with: Cursor

* fix(gateway): persist memory flush state to prevent redundant re-flushes on restart

The _session_expiry_watcher tracked flushed sessions in an in-memory set
(_pre_flushed_sessions) that was lost on gateway restart. Expired sessions
remained in sessions.json and were re-discovered every restart, causing
redundant AIAgent runs that burned API credits and blocked the event loop.

Fix: Add a memory_flushed boolean field to SessionEntry, persisted in
sessions.json. The watcher sets it after a successful flush. On restart,
the flag survives and the watcher skips already-flushed sessions.

- Add memory_flushed field to SessionEntry with to_dict/from_dict support
- Old sessions.json entries without the field default to False (backward compat)
- Remove the ephemeral _pre_flushed_sessions set from SessionStore
- Update tests: save/load roundtrip, legacy entry compat, auto-reset behavior
											
										
										
											2026-04-01 12:05:02 -07:00
+								                        # Mark as flushed and persist to disk so the flag
 								                        # survives gateway restarts.
 								                        with self.session_store._lock:
 								                            entry.memory_flushed = True
 								                            self.session_store._save()
-												feat: centralized logging, instrumentation, hermes logs CLI, gateway noise fix (#5430)

Adds comprehensive logging infrastructure to Hermes Agent across 4 phases:

**Phase 1 — Centralized logging**
- New hermes_logging.py with idempotent setup_logging() used by CLI, gateway, and cron
- agent.log (INFO+) and errors.log (WARNING+) with RotatingFileHandler + RedactingFormatter
- config.yaml logging: section (level, max_size_mb, backup_count)
- All entry points wired (cli.py, main.py, gateway/run.py, run_agent.py)
- Fixed debug_helpers.py writing to ./logs/ instead of ~/.hermes/logs/

**Phase 2 — Event instrumentation**
- API calls: model, provider, tokens, latency, cache hit %
- Tool execution: name, duration, result size (both sequential + concurrent)
- Session lifecycle: turn start (session/model/provider/platform), compression (before/after)
- Credential pool: rotation events, exhaustion tracking

**Phase 3 — hermes logs CLI command**
- hermes logs / hermes logs -f / hermes logs errors / hermes logs gateway
- --level, --session, --since filters
- hermes logs list (file sizes + ages)

**Phase 4 — Gateway bug fix + noise reduction**
- fix: _async_flush_memories() called with wrong arg count — sessions never flushed
- Batched session expiry logs: 6 lines/cycle → 2 summary lines
- Added inbound message + response time logging

75 new tests, zero regressions on the full suite.
											
										
										
											2026-04-06 00:08:20 -07:00
+								                        logger.debug(
 								                            "Memory flush completed for session %s",
-												fix(gateway): persist memory flush state to prevent redundant re-flushes on restart (#4481)

* fix: force-close TCP sockets on client cleanup, detect and recover dead connections

When a provider drops connections mid-stream (e.g. OpenRouter outage),
httpx's graceful close leaves sockets in CLOSE-WAIT indefinitely. These
zombie connections accumulate and can prevent recovery without restarting.

Changes:
- _force_close_tcp_sockets: walks the httpx connection pool and issues
  socket.shutdown(SHUT_RDWR) + close() to force TCP RST on every socket
  when a client is closed, preventing CLOSE-WAIT accumulation
- _cleanup_dead_connections: probes the primary client's pool for dead
  sockets (recv MSG_PEEK), rebuilds the client if any are found
- Pre-turn health check at the start of each run_conversation call that
  auto-recovers with a user-facing status message
- Primary client rebuild after stale stream detection to purge pool
- User-facing messages on streaming connection failures:
  "Connection to provider dropped — Reconnecting (attempt 2/3)"
  "Connection failed after 3 attempts — try again in a moment"

Made-with: Cursor

* fix: pool entry missing base_url for openrouter, clean error messages

- _resolve_runtime_from_pool_entry: add OPENROUTER_BASE_URL fallback
  when pool entry has no runtime_base_url (pool entries from auth.json
  credential_pool often omit base_url)
- Replace Rich console.print for auth errors with plain print() to
  prevent ANSI escape code mangling through prompt_toolkit's stdout patch
- Force-close TCP sockets on client cleanup to prevent CLOSE-WAIT
  accumulation after provider outages
- Pre-turn dead connection detection with auto-recovery and user message
- Primary client rebuild after stale stream detection
- User-facing status messages on streaming connection failures/retries

Made-with: Cursor

* fix(gateway): persist memory flush state to prevent redundant re-flushes on restart

The _session_expiry_watcher tracked flushed sessions in an in-memory set
(_pre_flushed_sessions) that was lost on gateway restart. Expired sessions
remained in sessions.json and were re-discovered every restart, causing
redundant AIAgent runs that burned API credits and blocked the event loop.

Fix: Add a memory_flushed boolean field to SessionEntry, persisted in
sessions.json. The watcher sets it after a successful flush. On restart,
the flag survives and the watcher skips already-flushed sessions.

- Add memory_flushed field to SessionEntry with to_dict/from_dict support
- Old sessions.json entries without the field default to False (backward compat)
- Remove the ephemeral _pre_flushed_sessions set from SessionStore
- Update tests: save/load roundtrip, legacy entry compat, auto-reset behavior
											
										
										
											2026-04-01 12:05:02 -07:00
+								                            entry.session_id,
 								                        )
-												fix(gateway): cap memory flush retries at 3 to prevent infinite loop

The _session_expiry_watcher retried failed memory flushes forever
because exceptions were caught at debug level without setting
memory_flushed=True. Expired sessions with transient failures
(rate limits, network errors) would retry every 5 minutes
indefinitely, burning API quota and blocking gateway message
processing via 429 rate limit cascades.

Observed case: a March 19 session retried 28+ times over ~17 days,
causing repeated 429 errors that made Telegram unresponsive.

Add a per-session failure counter (_flush_failures) that gives up
after 3 consecutive attempts and marks the session as flushed to
break the loop.

											
										
										
											2026-04-05 12:41:45 +00:00
+								                        _flush_failures.pop(entry.session_id, None)
-												feat(gateway): proactive async memory flush on session expiry

Previously, when a session expired (idle/daily reset), the memory flush
ran synchronously inside get_or_create_session — blocking the user's
message for 10-60s while an LLM call saved memories.

Now a background watcher task (_session_expiry_watcher) runs every 5 min,
detects expired sessions, and flushes memories proactively in a thread
pool.  By the time the user sends their next message, memories are
already saved and the response is immediate.

Changes:
- Add _is_session_expired(entry) to SessionStore — works from entry
  alone without needing a SessionSource
- Add _pre_flushed_sessions set to track already-flushed sessions
- Remove sync _on_auto_reset callback from get_or_create_session
- Refactor flush into _flush_memories_for_session (sync worker) +
  _async_flush_memories (thread pool wrapper)
- Add _session_expiry_watcher background task, started in start()
- Simplify /reset command to use shared fire-and-forget flush
- Add 10 tests for expiry detection, callback removal, tracking

											
										
										
											2026-03-07 11:27:50 -08:00
+								                    except Exception as e:
-												fix(gateway): cap memory flush retries at 3 to prevent infinite loop

The _session_expiry_watcher retried failed memory flushes forever
because exceptions were caught at debug level without setting
memory_flushed=True. Expired sessions with transient failures
(rate limits, network errors) would retry every 5 minutes
indefinitely, burning API quota and blocking gateway message
processing via 429 rate limit cascades.

Observed case: a March 19 session retried 28+ times over ~17 days,
causing repeated 429 errors that made Telegram unresponsive.

Add a per-session failure counter (_flush_failures) that gives up
after 3 consecutive attempts and marks the session as flushed to
break the loop.

											
										
										
											2026-04-05 12:41:45 +00:00
+								                        failures = _flush_failures.get(entry.session_id, 0) + 1
 								                        _flush_failures[entry.session_id] = failures
 								                        if failures >= _MAX_FLUSH_RETRIES:
 								                            logger.warning(
-												feat: centralized logging, instrumentation, hermes logs CLI, gateway noise fix (#5430)

Adds comprehensive logging infrastructure to Hermes Agent across 4 phases:

**Phase 1 — Centralized logging**
- New hermes_logging.py with idempotent setup_logging() used by CLI, gateway, and cron
- agent.log (INFO+) and errors.log (WARNING+) with RotatingFileHandler + RedactingFormatter
- config.yaml logging: section (level, max_size_mb, backup_count)
- All entry points wired (cli.py, main.py, gateway/run.py, run_agent.py)
- Fixed debug_helpers.py writing to ./logs/ instead of ~/.hermes/logs/

**Phase 2 — Event instrumentation**
- API calls: model, provider, tokens, latency, cache hit %
- Tool execution: name, duration, result size (both sequential + concurrent)
- Session lifecycle: turn start (session/model/provider/platform), compression (before/after)
- Credential pool: rotation events, exhaustion tracking

**Phase 3 — hermes logs CLI command**
- hermes logs / hermes logs -f / hermes logs errors / hermes logs gateway
- --level, --session, --since filters
- hermes logs list (file sizes + ages)

**Phase 4 — Gateway bug fix + noise reduction**
- fix: _async_flush_memories() called with wrong arg count — sessions never flushed
- Batched session expiry logs: 6 lines/cycle → 2 summary lines
- Added inbound message + response time logging

75 new tests, zero regressions on the full suite.
											
										
										
											2026-04-06 00:08:20 -07:00
+								                                "Memory flush gave up after %d attempts for %s: %s. "
-												fix(gateway): cap memory flush retries at 3 to prevent infinite loop

The _session_expiry_watcher retried failed memory flushes forever
because exceptions were caught at debug level without setting
memory_flushed=True. Expired sessions with transient failures
(rate limits, network errors) would retry every 5 minutes
indefinitely, burning API quota and blocking gateway message
processing via 429 rate limit cascades.

Observed case: a March 19 session retried 28+ times over ~17 days,
causing repeated 429 errors that made Telegram unresponsive.

Add a per-session failure counter (_flush_failures) that gives up
after 3 consecutive attempts and marks the session as flushed to
break the loop.

											
										
										
											2026-04-05 12:41:45 +00:00
+								                                "Marking as flushed to prevent infinite retry loop.",
 								                                failures, entry.session_id, e,
 								                            )
 								                            with self.session_store._lock:
 								                                entry.memory_flushed = True
 								                                self.session_store._save()
 								                            _flush_failures.pop(entry.session_id, None)
 								                        else:
 								                            logger.debug(
-												feat: centralized logging, instrumentation, hermes logs CLI, gateway noise fix (#5430)

Adds comprehensive logging infrastructure to Hermes Agent across 4 phases:

**Phase 1 — Centralized logging**
- New hermes_logging.py with idempotent setup_logging() used by CLI, gateway, and cron
- agent.log (INFO+) and errors.log (WARNING+) with RotatingFileHandler + RedactingFormatter
- config.yaml logging: section (level, max_size_mb, backup_count)
- All entry points wired (cli.py, main.py, gateway/run.py, run_agent.py)
- Fixed debug_helpers.py writing to ./logs/ instead of ~/.hermes/logs/

**Phase 2 — Event instrumentation**
- API calls: model, provider, tokens, latency, cache hit %
- Tool execution: name, duration, result size (both sequential + concurrent)
- Session lifecycle: turn start (session/model/provider/platform), compression (before/after)
- Credential pool: rotation events, exhaustion tracking

**Phase 3 — hermes logs CLI command**
- hermes logs / hermes logs -f / hermes logs errors / hermes logs gateway
- --level, --session, --since filters
- hermes logs list (file sizes + ages)

**Phase 4 — Gateway bug fix + noise reduction**
- fix: _async_flush_memories() called with wrong arg count — sessions never flushed
- Batched session expiry logs: 6 lines/cycle → 2 summary lines
- Added inbound message + response time logging

75 new tests, zero regressions on the full suite.
											
										
										
											2026-04-06 00:08:20 -07:00
+								                                "Memory flush failed (%d/%d) for %s: %s",
-												fix(gateway): cap memory flush retries at 3 to prevent infinite loop

The _session_expiry_watcher retried failed memory flushes forever
because exceptions were caught at debug level without setting
memory_flushed=True. Expired sessions with transient failures
(rate limits, network errors) would retry every 5 minutes
indefinitely, burning API quota and blocking gateway message
processing via 429 rate limit cascades.

Observed case: a March 19 session retried 28+ times over ~17 days,
causing repeated 429 errors that made Telegram unresponsive.

Add a per-session failure counter (_flush_failures) that gives up
after 3 consecutive attempts and marks the session as flushed to
break the loop.

											
										
										
											2026-04-05 12:41:45 +00:00
+								                                failures, _MAX_FLUSH_RETRIES, entry.session_id, e,
 								                            )
-												feat: centralized logging, instrumentation, hermes logs CLI, gateway noise fix (#5430)

Adds comprehensive logging infrastructure to Hermes Agent across 4 phases:

**Phase 1 — Centralized logging**
- New hermes_logging.py with idempotent setup_logging() used by CLI, gateway, and cron
- agent.log (INFO+) and errors.log (WARNING+) with RotatingFileHandler + RedactingFormatter
- config.yaml logging: section (level, max_size_mb, backup_count)
- All entry points wired (cli.py, main.py, gateway/run.py, run_agent.py)
- Fixed debug_helpers.py writing to ./logs/ instead of ~/.hermes/logs/

**Phase 2 — Event instrumentation**
- API calls: model, provider, tokens, latency, cache hit %
- Tool execution: name, duration, result size (both sequential + concurrent)
- Session lifecycle: turn start (session/model/provider/platform), compression (before/after)
- Credential pool: rotation events, exhaustion tracking

**Phase 3 — hermes logs CLI command**
- hermes logs / hermes logs -f / hermes logs errors / hermes logs gateway
- --level, --session, --since filters
- hermes logs list (file sizes + ages)

**Phase 4 — Gateway bug fix + noise reduction**
- fix: _async_flush_memories() called with wrong arg count — sessions never flushed
- Batched session expiry logs: 6 lines/cycle → 2 summary lines
- Added inbound message + response time logging

75 new tests, zero regressions on the full suite.
											
										
										
											2026-04-06 00:08:20 -07:00
 								                if _expired_entries:
 								                    _flushed = sum(
 for _, e in _expired_entries if e.memory_flushed
 								                    )
 								                    _failed = len(_expired_entries) - _flushed
 								                    if _failed:
 								                        logger.info(
 								                            "Session expiry done: %d flushed, %d pending retry",
 								                            _flushed, _failed,
 								                        )
 								                    else:
 								                        logger.info(
 								                            "Session expiry done: %d flushed", _flushed,
 								                        )
-												feat(gateway): proactive async memory flush on session expiry

Previously, when a session expired (idle/daily reset), the memory flush
ran synchronously inside get_or_create_session — blocking the user's
message for 10-60s while an LLM call saved memories.

Now a background watcher task (_session_expiry_watcher) runs every 5 min,
detects expired sessions, and flushes memories proactively in a thread
pool.  By the time the user sends their next message, memories are
already saved and the response is immediate.

Changes:
- Add _is_session_expired(entry) to SessionStore — works from entry
  alone without needing a SessionSource
- Add _pre_flushed_sessions set to track already-flushed sessions
- Remove sync _on_auto_reset callback from get_or_create_session
- Refactor flush into _flush_memories_for_session (sync worker) +
  _async_flush_memories (thread pool wrapper)
- Add _session_expiry_watcher background task, started in start()
- Simplify /reset command to use shared fire-and-forget flush
- Add 10 tests for expiry detection, callback removal, tracking

											
										
										
											2026-03-07 11:27:50 -08:00
+								            except Exception as e:
 								                logger.debug("Session expiry watcher error: %s", e)
 								            # Sleep in small increments so we can stop quickly
 								            for _ in range(interval):
 								                if not self._running:
 								                    break
 								                await asyncio.sleep(1)
-												feat: auto-reconnect failed gateway platforms with exponential backoff (#2584)

When a messaging platform fails to connect at startup (e.g. transient DNS
failure) or disconnects at runtime with a retryable error, the gateway now
queues it for background reconnection instead of giving up permanently.

- New _platform_reconnect_watcher background task runs alongside the
  existing session expiry watcher
- Exponential backoff: 30s, 60s, 120s, 240s, 300s cap
- Max 20 retry attempts before giving up on a platform
- Non-retryable errors (bad auth token, etc.) are not retried
- Runtime disconnections via _handle_adapter_fatal_error now queue
  retryable failures instead of triggering gateway shutdown
- On successful reconnect, adapter is wired up and channel directory
  is rebuilt automatically

Fixes the case where a DNS blip during gateway startup caused Telegram
and Discord to be permanently unavailable until manual restart.
											
										
										
											2026-03-22 23:48:24 -07:00
+								    async def _platform_reconnect_watcher(self) -> None:
 								        """Background task that periodically retries connecting failed platforms.
 								        Uses exponential backoff: 30s → 60s → 120s → 240s → 300s (cap).
 								        Stops retrying a platform after 20 failed attempts or if the error
 								        is non-retryable (e.g. bad auth token).
 								        """
 								        _MAX_ATTEMPTS = 20
 								        _BACKOFF_CAP = 300  # 5 minutes max between retries
 								        await asyncio.sleep(10)  # initial delay — let startup finish
 								        while self._running:
 								            if not self._failed_platforms:
 								                # Nothing to reconnect — sleep and check again
 								                for _ in range(30):
 								                    if not self._running:
 								                        return
 								                    await asyncio.sleep(1)
 								                continue
 								            now = time.monotonic()
 								            for platform in list(self._failed_platforms.keys()):
 								                if not self._running:
 								                    return
 								                info = self._failed_platforms[platform]
 								                if now < info["next_retry"]:
 								                    continue  # not time yet
 								                if info["attempts"] >= _MAX_ATTEMPTS:
 								                    logger.warning(
 								                        "Giving up reconnecting %s after %d attempts",
 								                        platform.value, info["attempts"],
 								                    )
 								                    del self._failed_platforms[platform]
 								                    continue
 								                platform_config = info["config"]
 								                attempt = info["attempts"] + 1
 								                logger.info(
 								                    "Reconnecting %s (attempt %d/%d)...",
 								                    platform.value, attempt, _MAX_ATTEMPTS,
 								                )
 								                try:
 								                    adapter = self._create_adapter(platform, platform_config)
 								                    if not adapter:
 								                        logger.warning(
 								                            "Reconnect %s: adapter creation returned None, removing from retry queue",
 								                            platform.value,
 								                        )
 								                        del self._failed_platforms[platform]
 								                        continue
 								                    adapter.set_message_handler(self._handle_message)
 								                    adapter.set_fatal_error_handler(self._handle_adapter_fatal_error)
-												feat(gateway): Enable Slack thread replies without explicit @mentions

When a user replies in a Slack thread where the bot has an active
conversation session, the bot now processes the message even without
an explicit @mention. This improves UX for ongoing threaded
discussions.

Changes:
- Added set_session_store() to BasePlatformAdapter for adapters to
  check active sessions
- Modified SlackAdapter to detect thread replies and check if a
  session exists for that thread before requiring @mentions
- Updated GatewayRunner to inject the session store into adapters
- Added comprehensive tests for the new behavior

Fixes: Thread replies without @jarvis are now processed if there is
an active session, matching user expectations for conversation flow

											
										
										
											2026-04-06 11:46:17 -04:00
+								                    adapter.set_session_store(self.session_store)
-												fix(gateway): drain in-flight work before restart

											
										
										
											2026-04-10 10:19:17 -07:00
+								                    adapter.set_busy_session_handler(self._handle_active_session_busy_message)
-												feat: auto-reconnect failed gateway platforms with exponential backoff (#2584)

When a messaging platform fails to connect at startup (e.g. transient DNS
failure) or disconnects at runtime with a retryable error, the gateway now
queues it for background reconnection instead of giving up permanently.

- New _platform_reconnect_watcher background task runs alongside the
  existing session expiry watcher
- Exponential backoff: 30s, 60s, 120s, 240s, 300s cap
- Max 20 retry attempts before giving up on a platform
- Non-retryable errors (bad auth token, etc.) are not retried
- Runtime disconnections via _handle_adapter_fatal_error now queue
  retryable failures instead of triggering gateway shutdown
- On successful reconnect, adapter is wired up and channel directory
  is rebuilt automatically

Fixes the case where a DNS blip during gateway startup caused Telegram
and Discord to be permanently unavailable until manual restart.
											
										
										
											2026-03-22 23:48:24 -07:00
 								                    success = await adapter.connect()
 								                    if success:
 								                        self.adapters[platform] = adapter
 								                        self._sync_voice_mode_state_to_adapter(adapter)
 								                        self.delivery_router.adapters = self.adapters
 								                        del self._failed_platforms[platform]
-												fix(discord): decouple readiness from slash sync

											
										
										
											2026-04-11 17:02:03 -06:00
+								                        self._update_platform_runtime_status(
 								                            platform.value,
 								                            platform_state="connected",
 								                            error_code=None,
 								                            error_message=None,
 								                        )
-												feat: auto-reconnect failed gateway platforms with exponential backoff (#2584)

When a messaging platform fails to connect at startup (e.g. transient DNS
failure) or disconnects at runtime with a retryable error, the gateway now
queues it for background reconnection instead of giving up permanently.

- New _platform_reconnect_watcher background task runs alongside the
  existing session expiry watcher
- Exponential backoff: 30s, 60s, 120s, 240s, 300s cap
- Max 20 retry attempts before giving up on a platform
- Non-retryable errors (bad auth token, etc.) are not retried
- Runtime disconnections via _handle_adapter_fatal_error now queue
  retryable failures instead of triggering gateway shutdown
- On successful reconnect, adapter is wired up and channel directory
  is rebuilt automatically

Fixes the case where a DNS blip during gateway startup caused Telegram
and Discord to be permanently unavailable until manual restart.
											
										
										
											2026-03-22 23:48:24 -07:00
+								                        logger.info("✓ %s reconnected successfully", platform.value)
 								                        # Rebuild channel directory with the new adapter
 								                        try:
 								                            from gateway.channel_directory import build_channel_directory
 								                            build_channel_directory(self.adapters)
 								                        except Exception:
 								                            pass
 								                    else:
 								                        # Check if the failure is non-retryable
 								                        if adapter.has_fatal_error and not adapter.fatal_error_retryable:
-												fix(discord): decouple readiness from slash sync

											
										
										
											2026-04-11 17:02:03 -06:00
+								                            self._update_platform_runtime_status(
 								                                platform.value,
 								                                platform_state="fatal",
 								                                error_code=adapter.fatal_error_code,
 								                                error_message=adapter.fatal_error_message,
 								                            )
-												feat: auto-reconnect failed gateway platforms with exponential backoff (#2584)

When a messaging platform fails to connect at startup (e.g. transient DNS
failure) or disconnects at runtime with a retryable error, the gateway now
queues it for background reconnection instead of giving up permanently.

- New _platform_reconnect_watcher background task runs alongside the
  existing session expiry watcher
- Exponential backoff: 30s, 60s, 120s, 240s, 300s cap
- Max 20 retry attempts before giving up on a platform
- Non-retryable errors (bad auth token, etc.) are not retried
- Runtime disconnections via _handle_adapter_fatal_error now queue
  retryable failures instead of triggering gateway shutdown
- On successful reconnect, adapter is wired up and channel directory
  is rebuilt automatically

Fixes the case where a DNS blip during gateway startup caused Telegram
and Discord to be permanently unavailable until manual restart.
											
										
										
											2026-03-22 23:48:24 -07:00
+								                            logger.warning(
 								                                "Reconnect %s: non-retryable error (%s), removing from retry queue",
 								                                platform.value, adapter.fatal_error_message,
 								                            )
 								                            del self._failed_platforms[platform]
 								                        else:
-												fix(discord): decouple readiness from slash sync

											
										
										
											2026-04-11 17:02:03 -06:00
+								                            self._update_platform_runtime_status(
 								                                platform.value,
 								                                platform_state="retrying",
 								                                error_code=adapter.fatal_error_code,
 								                                error_message=adapter.fatal_error_message or "failed to reconnect",
 								                            )
-												feat: auto-reconnect failed gateway platforms with exponential backoff (#2584)

When a messaging platform fails to connect at startup (e.g. transient DNS
failure) or disconnects at runtime with a retryable error, the gateway now
queues it for background reconnection instead of giving up permanently.

- New _platform_reconnect_watcher background task runs alongside the
  existing session expiry watcher
- Exponential backoff: 30s, 60s, 120s, 240s, 300s cap
- Max 20 retry attempts before giving up on a platform
- Non-retryable errors (bad auth token, etc.) are not retried
- Runtime disconnections via _handle_adapter_fatal_error now queue
  retryable failures instead of triggering gateway shutdown
- On successful reconnect, adapter is wired up and channel directory
  is rebuilt automatically

Fixes the case where a DNS blip during gateway startup caused Telegram
and Discord to be permanently unavailable until manual restart.
											
										
										
											2026-03-22 23:48:24 -07:00
+								                            backoff = min(30 * (2 ** (attempt - 1)), _BACKOFF_CAP)
 								                            info["attempts"] = attempt
 								                            info["next_retry"] = time.monotonic() + backoff
 								                            logger.info(
 								                                "Reconnect %s failed, next retry in %ds",
 								                                platform.value, backoff,
 								                            )
 								                except Exception as e:
-												fix(discord): decouple readiness from slash sync

											
										
										
											2026-04-11 17:02:03 -06:00
+								                    self._update_platform_runtime_status(
 								                        platform.value,
 								                        platform_state="retrying",
 								                        error_code=None,
 								                        error_message=str(e),
 								                    )
-												feat: auto-reconnect failed gateway platforms with exponential backoff (#2584)

When a messaging platform fails to connect at startup (e.g. transient DNS
failure) or disconnects at runtime with a retryable error, the gateway now
queues it for background reconnection instead of giving up permanently.

- New _platform_reconnect_watcher background task runs alongside the
  existing session expiry watcher
- Exponential backoff: 30s, 60s, 120s, 240s, 300s cap
- Max 20 retry attempts before giving up on a platform
- Non-retryable errors (bad auth token, etc.) are not retried
- Runtime disconnections via _handle_adapter_fatal_error now queue
  retryable failures instead of triggering gateway shutdown
- On successful reconnect, adapter is wired up and channel directory
  is rebuilt automatically

Fixes the case where a DNS blip during gateway startup caused Telegram
and Discord to be permanently unavailable until manual restart.
											
										
										
											2026-03-22 23:48:24 -07:00
+								                    backoff = min(30 * (2 ** (attempt - 1)), _BACKOFF_CAP)
 								                    info["attempts"] = attempt
 								                    info["next_retry"] = time.monotonic() + backoff
 								                    logger.warning(
 								                        "Reconnect %s error: %s, next retry in %ds",
 								                        platform.value, e, backoff,
 								                    )
 								            # Check every 10 seconds for platforms that need reconnection
 								            for _ in range(10):
 								                if not self._running:
 								                    return
 								                await asyncio.sleep(1)
-												fix(gateway): drain in-flight work before restart

											
										
										
											2026-04-10 10:19:17 -07:00
+								    async def stop(
 								        self,
 								        *,
 								        restart: bool = False,
 								        detached_restart: bool = False,
 								        service_restart: bool = False,
 								    ) -> None:
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        """Stop the gateway and disconnect all adapters."""
-												fix(gateway): drain in-flight work before restart

											
										
										
											2026-04-10 10:19:17 -07:00
+								        if restart:
 								            self._restart_requested = True
 								            self._restart_detached = detached_restart
 								            self._restart_via_service = service_restart
 								        if self._stop_task is not None:
 								            await self._stop_task
 								            return
-												fix(gateway): cancel active runs during shutdown

Track adapter background message-processing tasks, cancel them during gateway shutdown, and interrupt running agents before disconnecting adapters. This prevents old gateway instances from continuing in-flight work after stop/replace, which was contributing to the restart-time task continuation/flicker behavior reported in #1414. Adds regression coverage for adapter task cancellation and shutdown interrupts.

											
										
										
											2026-03-15 04:21:50 -07:00
-												fix(gateway): drain in-flight work before restart

											
										
										
											2026-04-10 10:19:17 -07:00
+								        async def _stop_impl() -> None:
 								            logger.info(
 								                "Stopping gateway%s...",
 								                " for restart" if self._restart_requested else "",
 								            )
 								            self._running = False
 								            self._draining = True
-												fix: notify active sessions on gateway shutdown + update health check

Three fixes for gateway lifecycle stability:

1. Notify active sessions before shutdown (#new)
   When the gateway receives SIGTERM or /restart, it now sends a
   notification to every chat with an active agent BEFORE starting
   the drain. Users see:
   - Shutdown: 'Gateway shutting down — your task will be interrupted.'
   - Restart: 'Gateway restarting — use /retry after restart to continue.'
   Deduplicates per-chat so group sessions with multiple users get
   one notification. Best-effort: send failures are logged and swallowed.

2. Skip .clean_shutdown marker when drain timed out
   Previously, a graceful SIGTERM always wrote .clean_shutdown, even if
   agents were force-interrupted when the drain timed out. This meant
   the next startup skipped session suspension, leaving interrupted
   sessions in a broken state (trailing tool response, no final message).
   Now the marker is only written if the drain completed without timeout,
   so interrupted sessions get properly suspended on next startup.

3. Post-restart health check for hermes update (#6631)
   cmd_update() now verifies the gateway actually survived after
   systemctl restart (sleep 3s + is-active check). If the service
   crashed immediately, it retries once. If still dead, prints
   actionable diagnostics (journalctl command, manual restart hint).

Also closes #8104 — already fixed on main (the /restart handler
correctly detects systemd via INVOCATION_ID and uses via_service=True).

Test plan:
- 6 new tests for shutdown notifications (dedup, restart vs shutdown
  messaging, sentinel filtering, send failure resilience)
- Existing restart drain + update tests pass (47 total)

											
										
										
											2026-04-14 12:44:46 -07:00
+								            # Notify all chats with active agents BEFORE draining.
 								            # Adapters are still connected here, so messages can be sent.
 								            await self._notify_active_sessions_of_shutdown()
-												fix(gateway): drain in-flight work before restart

											
										
										
											2026-04-10 10:19:17 -07:00
+								            timeout = self._restart_drain_timeout
 								            active_agents, timed_out = await self._drain_active_agents(timeout)
 								            if timed_out:
 								                logger.warning(
 								                    "Gateway drain timed out after %.1fs with %d active agent(s); interrupting remaining work.",
 								                    timeout,
 								                    self._running_agent_count(),
 								                )
 								                self._interrupt_running_agents(
 								                    "Gateway restarting" if self._restart_requested else "Gateway shutting down"
 								                )
 								                interrupt_deadline = asyncio.get_running_loop().time() + 5.0
 								                while self._running_agents and asyncio.get_running_loop().time() < interrupt_deadline:
 								                    self._update_runtime_status("draining")
 								                    await asyncio.sleep(0.1)
 								            if self._restart_requested and self._restart_detached:
 								                try:
 								                    await self._launch_detached_restart_command()
 								                except Exception as e:
 								                    logger.error("Failed to launch detached gateway restart: %s", e)
 								            self._finalize_shutdown_agents(active_agents)
 								            for platform, adapter in list(self.adapters.items()):
 								                try:
 								                    await adapter.cancel_background_tasks()
 								                except Exception as e:
 								                    logger.debug("✗ %s background-task cancel error: %s", platform.value, e)
 								                try:
 								                    await adapter.disconnect()
 								                    logger.info("✓ %s disconnected", platform.value)
 								                except Exception as e:
 								                    logger.error("✗ %s disconnect error: %s", platform.value, e)
 								            for _task in list(self._background_tasks):
 								                if _task is self._stop_task:
 								                    continue
 								                _task.cancel()
 								            self._background_tasks.clear()
 								            self.adapters.clear()
 								            self._running_agents.clear()
 								            self._pending_messages.clear()
 								            self._pending_approvals.clear()
-												fix: interrupt agent immediately when user messages during active run (#10068)

When a user sends a message while the agent is executing a task on the
gateway, the agent is now interrupted immediately — not silently queued.
Previously, messages were stored in _pending_messages with zero feedback
to the user, potentially leaving them waiting 1+ hours.

Root cause: Level 1 guard (base.py) intercepted all messages for active
sessions and returned with no response. Level 2 (gateway/run.py) which
calls agent.interrupt() was never reached.

Fix: Expand _handle_active_session_busy_message to handle the normal
(non-draining) case:
  1. Call running_agent.interrupt(text) to abort in-flight tool calls
     and exit the agent loop at the next check point
  2. Store the message as pending so it becomes the next turn once the
     interrupted run returns
  3. Send a brief ack: 'Interrupting current task (10 min elapsed,
     iteration 21/60, running: terminal). I'll respond shortly.'
  4. Debounce acks to once per 30s to avoid spam on rapid messages

Reported by @Lonely__MH.
											
										
										
											2026-04-14 22:07:28 -07:00
+								            if hasattr(self, '_busy_ack_ts'):
 								                self._busy_ack_ts.clear()
-												fix(gateway): drain in-flight work before restart

											
										
										
											2026-04-10 10:19:17 -07:00
+								            self._shutdown_event.set()
 								            # Global cleanup: kill any remaining tool subprocesses not tied
 								            # to a specific agent (catch-all for zombie prevention).
-												fix: add gateway coverage for session boundary hooks, move test to tests/cli/

- Fire on_session_finalize and on_session_reset in gateway _handle_reset_command()
- Fire on_session_finalize during gateway stop() for each active agent
- Move CLI test from tests/ root to tests/cli/ (matches recent restructure)
- Add 5 gateway tests covering reset hooks, ordering, shutdown, and error handling
- Place on_session_reset after new session is guaranteed to exist (covers
  the get_or_create_session fallback path)

											
										
										
											2026-04-08 04:22:55 -07:00
+								            try:
-												fix(gateway): drain in-flight work before restart

											
										
										
											2026-04-10 10:19:17 -07:00
+								                from tools.process_registry import process_registry
 								                process_registry.kill_all()
-												fix: add gateway coverage for session boundary hooks, move test to tests/cli/

- Fire on_session_finalize and on_session_reset in gateway _handle_reset_command()
- Fire on_session_finalize during gateway stop() for each active agent
- Move CLI test from tests/ root to tests/cli/ (matches recent restructure)
- Add 5 gateway tests covering reset hooks, ordering, shutdown, and error handling
- Place on_session_reset after new session is guaranteed to exist (covers
  the get_or_create_session fallback path)

											
										
										
											2026-04-08 04:22:55 -07:00
+								            except Exception:
 								                pass
-												feat(memory): pluggable memory provider interface with profile isolation, review fixes, and honcho CLI restoration (#4623)

* feat(memory): add pluggable memory provider interface with profile isolation

Introduces a pluggable MemoryProvider ABC so external memory backends can
integrate with Hermes without modifying core files. Each backend becomes a
plugin implementing a standard interface, orchestrated by MemoryManager.

Key architecture:
- agent/memory_provider.py — ABC with core + optional lifecycle hooks
- agent/memory_manager.py — single integration point in the agent loop
- agent/builtin_memory_provider.py — wraps existing MEMORY.md/USER.md

Profile isolation fixes applied to all 6 shipped plugins:
- Cognitive Memory: use get_hermes_home() instead of raw env var
- Hindsight Memory: check $HERMES_HOME/hindsight/config.json first,
  fall back to legacy ~/.hindsight/ for backward compat
- Hermes Memory Store: replace hardcoded ~/.hermes paths with
  get_hermes_home() for config loading and DB path defaults
- Mem0 Memory: use get_hermes_home() instead of raw env var
- RetainDB Memory: auto-derive profile-scoped project name from
  hermes_home path (hermes-<profile>), explicit env var overrides
- OpenViking Memory: read-only, no local state, isolation via .env

MemoryManager.initialize_all() now injects hermes_home into kwargs so
every provider can resolve profile-scoped storage without importing
get_hermes_home() themselves.

Plugin system: adds register_memory_provider() to PluginContext and
get_plugin_memory_providers() accessor.

Based on PR #3825. 46 tests (37 unit + 5 E2E + 4 plugin registration).

* refactor(memory): drop cognitive plugin, rewrite OpenViking as full provider

Remove cognitive-memory plugin (#727) — core mechanics are broken:
decay runs 24x too fast (hourly not daily), prefetch uses row ID as
timestamp, search limited by importance not similarity.

Rewrite openviking-memory plugin from a read-only search wrapper into
a full bidirectional memory provider using the complete OpenViking
session lifecycle API:

- sync_turn: records user/assistant messages to OpenViking session
  (threaded, non-blocking)
- on_session_end: commits session to trigger automatic memory extraction
  into 6 categories (profile, preferences, entities, events, cases,
  patterns)
- prefetch: background semantic search via find() endpoint
- on_memory_write: mirrors built-in memory writes to the session
- is_available: checks env var only, no network calls (ABC compliance)

Tools expanded from 3 to 5:
- viking_search: semantic search with mode/scope/limit
- viking_read: tiered content (abstract ~100tok / overview ~2k / full)
- viking_browse: filesystem-style navigation (list/tree/stat)
- viking_remember: explicit memory storage via session
- viking_add_resource: ingest URLs/docs into knowledge base

Uses direct HTTP via httpx (no openviking SDK dependency needed).
Response truncation on viking_read to prevent context flooding.

* fix(memory): harden Mem0 plugin — thread safety, non-blocking sync, circuit breaker

- Remove redundant mem0_context tool (identical to mem0_search with
  rerank=true, top_k=5 — wastes a tool slot and confuses the model)
- Thread sync_turn so it's non-blocking — Mem0's server-side LLM
  extraction can take 5-10s, was stalling the agent after every turn
- Add threading.Lock around _get_client() for thread-safe lazy init
  (prefetch and sync threads could race on first client creation)
- Add circuit breaker: after 5 consecutive API failures, pause calls
  for 120s instead of hammering a down server every turn. Auto-resets
  after cooldown. Logs a warning when tripped.
- Track success/failure in prefetch, sync_turn, and all tool calls
- Wait for previous sync to finish before starting a new one (prevents
  unbounded thread accumulation on rapid turns)
- Clean up shutdown to join both prefetch and sync threads

* fix(memory): enforce single external memory provider limit

MemoryManager now rejects a second non-builtin provider with a warning.
Built-in memory (MEMORY.md/USER.md) is always accepted. Only ONE
external plugin provider is allowed at a time. This prevents tool
schema bloat (some providers add 3-5 tools each) and conflicting
memory backends.

The warning message directs users to configure memory.provider in
config.yaml to select which provider to activate.

Updated all 47 tests to use builtin + one external pattern instead
of multiple externals. Added test_second_external_rejected to verify
the enforcement.

* feat(memory): add ByteRover memory provider plugin

Implements the ByteRover integration (from PR #3499 by hieuntg81) as a
MemoryProvider plugin instead of direct run_agent.py modifications.

ByteRover provides persistent memory via the brv CLI — a hierarchical
knowledge tree with tiered retrieval (fuzzy text then LLM-driven search).
Local-first with optional cloud sync.

Plugin capabilities:
- prefetch: background brv query for relevant context
- sync_turn: curate conversation turns (threaded, non-blocking)
- on_memory_write: mirror built-in memory writes to brv
- on_pre_compress: extract insights before context compression

Tools (3):
- brv_query: search the knowledge tree
- brv_curate: store facts/decisions/patterns
- brv_status: check CLI version and context tree state

Profile isolation: working directory at $HERMES_HOME/byterover/ (scoped
per profile). Binary resolution cached with thread-safe double-checked
locking. All write operations threaded to avoid blocking the agent
(curate can take 120s with LLM processing).

* fix(memory): thread remaining sync_turns, fix holographic, add config key

Plugin fixes:
- Hindsight: thread sync_turn (was blocking up to 30s via _run_in_thread)
- RetainDB: thread sync_turn (was blocking on HTTP POST)
- Both: shutdown now joins sync threads alongside prefetch threads

Holographic retrieval fixes:
- reason(): removed dead intersection_key computation (bundled but never
  used in scoring). Now reuses pre-computed entity_residuals directly,
  moved role_content encoding outside the inner loop.
- contradict(): added _MAX_CONTRADICT_FACTS=500 scaling guard. Above
  500 facts, only checks the most recently updated ones to avoid O(n^2)
  explosion (~125K comparisons at 500 is acceptable).

Config:
- Added memory.provider key to DEFAULT_CONFIG ("" = builtin only).
  No version bump needed (deep_merge handles new keys automatically).

* feat(memory): extract Honcho as a MemoryProvider plugin

Creates plugins/honcho-memory/ as a thin adapter over the existing
honcho_integration/ package. All 4 Honcho tools (profile, search,
context, conclude) move from the normal tool registry to the
MemoryProvider interface.

The plugin delegates all work to HonchoSessionManager — no Honcho
logic is reimplemented. It uses the existing config chain:
$HERMES_HOME/honcho.json -> ~/.honcho/config.json -> env vars.

Lifecycle hooks:
- initialize: creates HonchoSessionManager via existing client factory
- prefetch: background dialectic query
- sync_turn: records messages + flushes to API (threaded)
- on_memory_write: mirrors user profile writes as conclusions
- on_session_end: flushes all pending messages

This is a prerequisite for the MemoryManager wiring in run_agent.py.
Once wired, Honcho goes through the same provider interface as all
other memory plugins, and the scattered Honcho code in run_agent.py
can be consolidated into the single MemoryManager integration point.

* feat(memory): wire MemoryManager into run_agent.py

Adds 8 integration points for the external memory provider plugin,
all purely additive (zero existing code modified):

1. Init (~L1130): Create MemoryManager, find matching plugin provider
   from memory.provider config, initialize with session context
2. Tool injection (~L1160): Append provider tool schemas to self.tools
   and self.valid_tool_names after memory_manager init
3. System prompt (~L2705): Add external provider's system_prompt_block
   alongside existing MEMORY.md/USER.md blocks
4. Tool routing (~L5362): Route provider tool calls through
   memory_manager.handle_tool_call() before the catchall handler
5. Memory write bridge (~L5353): Notify external provider via
   on_memory_write() when the built-in memory tool writes
6. Pre-compress (~L5233): Call on_pre_compress() before context
   compression discards messages
7. Prefetch (~L6421): Inject provider prefetch results into the
   current-turn user message (same pattern as Honcho turn context)
8. Turn sync + session end (~L8161, ~L8172): sync_all() after each
   completed turn, queue_prefetch_all() for next turn, on_session_end()
   + shutdown_all() at conversation end

All hooks are wrapped in try/except — a failing provider never breaks
the agent. The existing memory system, Honcho integration, and all
other code paths are completely untouched.

Full suite: 7222 passed, 4 pre-existing failures.

* refactor(memory): remove legacy Honcho integration from core

Extracts all Honcho-specific code from run_agent.py, model_tools.py,
toolsets.py, and gateway/run.py. Honcho is now exclusively available
as a memory provider plugin (plugins/honcho-memory/).

Removed from run_agent.py (-457 lines):
- Honcho init block (session manager creation, activation, config)
- 8 Honcho methods: _honcho_should_activate, _strip_honcho_tools,
  _activate_honcho, _register_honcho_exit_hook, _queue_honcho_prefetch,
  _honcho_prefetch, _honcho_save_user_observation, _honcho_sync
- _inject_honcho_turn_context module-level function
- Honcho system prompt block (tool descriptions, CLI commands)
- Honcho context injection in api_messages building
- Honcho params from __init__ (honcho_session_key, honcho_manager,
  honcho_config)
- HONCHO_TOOL_NAMES constant
- All honcho-specific tool dispatch forwarding

Removed from other files:
- model_tools.py: honcho_tools import, honcho params from handle_function_call
- toolsets.py: honcho toolset definition, honcho tools from core tools list
- gateway/run.py: honcho params from AIAgent constructor calls

Removed tests (-339 lines):
- 9 Honcho-specific test methods from test_run_agent.py
- TestHonchoAtexitFlush class from test_exit_cleanup_interrupt.py

Restored two regex constants (_SURROGATE_RE, _BUDGET_WARNING_RE) that
were accidentally removed during the honcho function extraction.

The honcho_integration/ package is kept intact — the plugin delegates
to it. tools/honcho_tools.py registry entries are now dead code (import
commented out in model_tools.py) but the file is preserved for reference.

Full suite: 7207 passed, 4 pre-existing failures. Zero regressions.

* refactor(memory): restructure plugins, add CLI, clean gateway, migration notice

Plugin restructure:
- Move all memory plugins from plugins/<name>-memory/ to plugins/memory/<name>/
  (byterover, hindsight, holographic, honcho, mem0, openviking, retaindb)
- New plugins/memory/__init__.py discovery module that scans the directory
  directly, loading providers by name without the general plugin system
- run_agent.py uses load_memory_provider() instead of get_plugin_memory_providers()

CLI wiring:
- hermes memory setup — interactive curses picker + config wizard
- hermes memory status — show active provider, config, availability
- hermes memory off — disable external provider (built-in only)
- hermes honcho — now shows migration notice pointing to hermes memory setup

Gateway cleanup:
- Remove _get_or_create_gateway_honcho (already removed in prev commit)
- Remove _shutdown_gateway_honcho and _shutdown_all_gateway_honcho methods
- Remove all calls to shutdown methods (4 call sites)
- Remove _honcho_managers/_honcho_configs dict references

Dead code removal:
- Delete tools/honcho_tools.py (279 lines, import was already commented out)
- Delete tests/gateway/test_honcho_lifecycle.py (131 lines, tested removed methods)
- Remove if False placeholder from run_agent.py

Migration:
- Honcho migration notice on startup: detects existing honcho.json or
  ~/.honcho/config.json, prints guidance to run hermes memory setup.
  Only fires when memory.provider is not set and not in quiet mode.

Full suite: 7203 passed, 4 pre-existing failures. Zero regressions.

* feat(memory): standardize plugin config + add per-plugin documentation

Config architecture:
- Add save_config(values, hermes_home) to MemoryProvider ABC
- Honcho: writes to $HERMES_HOME/honcho.json (SDK native)
- Mem0: writes to $HERMES_HOME/mem0.json
- Hindsight: writes to $HERMES_HOME/hindsight/config.json
- Holographic: writes to config.yaml under plugins.hermes-memory-store
- OpenViking/RetainDB/ByteRover: env-var only (default no-op)

Setup wizard (hermes memory setup):
- Now calls provider.save_config() for non-secret config
- Secrets still go to .env via env vars
- Only memory.provider activation key goes to config.yaml

Documentation:
- README.md for each of the 7 providers in plugins/memory/<name>/
- Requirements, setup (wizard + manual), config reference, tools table
- Consistent format across all providers

The contract for new memory plugins:
- get_config_schema() declares all fields (REQUIRED)
- save_config() writes native config (REQUIRED if not env-var-only)
- Secrets use env_var field in schema, written to .env by wizard
- README.md in the plugin directory

* docs: add memory providers user guide + developer guide

New pages:
- user-guide/features/memory-providers.md — comprehensive guide covering
  all 7 shipped providers (Honcho, OpenViking, Mem0, Hindsight,
  Holographic, RetainDB, ByteRover). Each with setup, config, tools,
  cost, and unique features. Includes comparison table and profile
  isolation notes.
- developer-guide/memory-provider-plugin.md — how to build a new memory
  provider plugin. Covers ABC, required methods, config schema,
  save_config, threading contract, profile isolation, testing.

Updated pages:
- user-guide/features/memory.md — replaced Honcho section with link to
  new Memory Providers page
- user-guide/features/honcho.md — replaced with migration redirect to
  the new Memory Providers page
- sidebars.ts — added both new pages to navigation

* fix(memory): auto-migrate Honcho users to memory provider plugin

When honcho.json or ~/.honcho/config.json exists but memory.provider
is not set, automatically set memory.provider: honcho in config.yaml
and activate the plugin. The plugin reads the same config files, so
all data and credentials are preserved. Zero user action needed.

Persists the migration to config.yaml so it only fires once. Prints
a one-line confirmation in non-quiet mode.

* fix(memory): only auto-migrate Honcho when enabled + credentialed

Check HonchoClientConfig.enabled AND (api_key OR base_url) before
auto-migrating — not just file existence. Prevents false activation
for users who disabled Honcho, stopped using it (config lingers),
or have ~/.honcho/ from a different tool.

* feat(memory): auto-install pip dependencies during hermes memory setup

Reads pip_dependencies from plugin.yaml, checks which are missing,
installs them via pip before config walkthrough. Also shows install
guidance for external_dependencies (e.g. brv CLI for ByteRover).

Updated all 7 plugin.yaml files with pip_dependencies:
- honcho: honcho-ai
- mem0: mem0ai
- openviking: httpx
- hindsight: hindsight-client
- holographic: (none)
- retaindb: requests
- byterover: (external_dependencies for brv CLI)

* fix: remove remaining Honcho crash risks from cli.py and gateway

cli.py: removed Honcho session re-mapping block (would crash importing
deleted tools/honcho_tools.py), Honcho flush on compress, Honcho
session display on startup, Honcho shutdown on exit, honcho_session_key
AIAgent param.

gateway/run.py: removed honcho_session_key params from helper methods,
sync_honcho param, _honcho.shutdown() block.

tests: fixed test_cron_session_with_honcho_key_skipped (was passing
removed honcho_key param to _flush_memories_for_session).

* fix: include plugins/ in pyproject.toml package list

Without this, plugins/memory/ wouldn't be included in non-editable
installs. Hermes always runs from the repo checkout so this is belt-
and-suspenders, but prevents breakage if the install method changes.

* fix(memory): correct pip-to-import name mapping for dep checks

The heuristic dep.replace('-', '_') fails for packages where the pip
name differs from the import name: honcho-ai→honcho, mem0ai→mem0,
hindsight-client→hindsight_client. Added explicit mapping table so
hermes memory setup doesn't try to reinstall already-installed packages.

* chore: remove dead code from old plugin memory registration path

- hermes_cli/plugins.py: removed register_memory_provider(),
  _memory_providers list, get_plugin_memory_providers() — memory
  providers now use plugins/memory/ discovery, not the general plugin system
- hermes_cli/main.py: stripped 74 lines of dead honcho argparse
  subparsers (setup, status, sessions, map, peer, mode, tokens,
  identity, migrate) — kept only the migration redirect
- agent/memory_provider.py: updated docstring to reflect new
  registration path
- tests: replaced TestPluginMemoryProviderRegistration with
  TestPluginMemoryDiscovery that tests the actual plugins/memory/
  discovery system. Added 3 new tests (discover, load, nonexistent).

* chore: delete dead honcho_integration/cli.py and its tests

cli.py (794 lines) was the old 'hermes honcho' command handler — nobody
calls it since cmd_honcho was replaced with a migration redirect.

Deleted tests that imported from removed code:
- tests/honcho_integration/test_cli.py (tested _resolve_api_key)
- tests/honcho_integration/test_config_isolation.py (tested CLI config paths)
- tests/tools/test_honcho_tools.py (tested the deleted tools/honcho_tools.py)

Remaining honcho_integration/ files (actively used by the plugin):
- client.py (445 lines) — config loading, SDK client creation
- session.py (991 lines) — session management, queries, flush

* refactor: move honcho_integration/ into the honcho plugin

Moves client.py (445 lines) and session.py (991 lines) from the
top-level honcho_integration/ package into plugins/memory/honcho/.
No Honcho code remains in the main codebase.

- plugins/memory/honcho/client.py — config loading, SDK client creation
- plugins/memory/honcho/session.py — session management, queries, flush
- Updated all imports: run_agent.py (auto-migration), hermes_cli/doctor.py,
  plugin __init__.py, session.py cross-import, all tests
- Removed honcho_integration/ package and pyproject.toml entry
- Renamed tests/honcho_integration/ → tests/honcho_plugin/

* docs: update architecture + gateway-internals for memory provider system

- architecture.md: replaced honcho_integration/ with plugins/memory/
- gateway-internals.md: replaced Honcho-specific session routing and
  flush lifecycle docs with generic memory provider interface docs

* fix: update stale mock path for resolve_active_host after honcho plugin migration

* fix(memory): address review feedback — P0 lifecycle, ABC contract, honcho CLI restore

Review feedback from Honcho devs (erosika):

P0 — Provider lifecycle:
- Remove on_session_end() + shutdown_all() from run_conversation() tail
  (was killing providers after every turn in multi-turn sessions)
- Add shutdown_memory_provider() method on AIAgent for callers
- Wire shutdown into CLI atexit, reset_conversation, gateway stop/expiry

Bug fixes:
- Remove sync_honcho=False kwarg from /btw callsites (TypeError crash)
- Fix doctor.py references to dead 'hermes honcho setup' command
- Cache prefetch_all() before tool loop (was re-calling every iteration)

ABC contract hardening (all backwards-compatible):
- Add session_id kwarg to prefetch/sync_turn/queue_prefetch
- Make on_pre_compress() return str (provider insights in compression)
- Add **kwargs to on_turn_start() for runtime context
- Add on_delegation() hook for parent-side subagent observation
- Document agent_context/agent_identity/agent_workspace kwargs on
  initialize() (prevents cron corruption, enables profile scoping)
- Fix docstring: single external provider, not multiple

Honcho CLI restoration:
- Add plugins/memory/honcho/cli.py (from main's honcho_integration/cli.py
  with imports adapted to plugin path)
- Restore full hermes honcho command with all subcommands (status, peer,
  mode, tokens, identity, enable/disable, sync, peers, --target-profile)
- Restore auto-clone on profile creation + sync on hermes update
- hermes honcho setup now redirects to hermes memory setup

* fix(memory): wire on_delegation, skip_memory for cron/flush, fix ByteRover return type

- Wire on_delegation() in delegate_tool.py — parent's memory provider
  is notified with task+result after each subagent completes
- Add skip_memory=True to cron scheduler (prevents cron system prompts
  from corrupting user representations — closes #4052)
- Add skip_memory=True to gateway flush agent (throwaway agent shouldn't
  activate memory provider)
- Fix ByteRover on_pre_compress() return type: None -> str

* fix(honcho): port profile isolation fixes from PR #4632

Ports 5 bug fixes found during profile testing (erosika's PR #4632):

1. 3-tier config resolution — resolve_config_path() now checks
   $HERMES_HOME/honcho.json → ~/.hermes/honcho.json → ~/.honcho/config.json
   (non-default profiles couldn't find shared host blocks)

2. Thread host=_host_key() through from_global_config() in cmd_setup,
   cmd_status, cmd_identity (--target-profile was being ignored)

3. Use bare profile name as aiPeer (not host key with dots) — Honcho's
   peer ID pattern is ^[a-zA-Z0-9_-]+$, dots are invalid

4. Wrap add_peers() in try/except — was fatal on new AI peers, killed
   all message uploads for the session

5. Gate Honcho clone behind --clone/--clone-all on profile create
   (bare create should be blank-slate)

Also: sanitize assistant_peer_id via _sanitize_id()

* fix(tests): add module cleanup fixture to test_cli_provider_resolution

test_cli_provider_resolution._import_cli() wipes tools.*, cli, and
run_agent from sys.modules to force fresh imports, but had no cleanup.
This poisoned all subsequent tests on the same xdist worker — mocks
targeting tools.file_tools, tools.send_message_tool, etc. patched the
NEW module object while already-imported functions still referenced
the OLD one. Caused ~25 cascade failures: send_message KeyError,
process_registry FileNotFoundError, file_read_guards timeouts,
read_loop_detection file-not-found, mcp_oauth None port, and
provider_parity/codex_execution stale tool lists.

Fix: autouse fixture saves all affected modules before each test and
restores them after, matching the pattern in
test_managed_browserbase_and_modal.py.
											
										
										
											2026-04-02 15:33:51 -07:00
+								            try:
-												fix(gateway): drain in-flight work before restart

											
										
										
											2026-04-10 10:19:17 -07:00
+								                from tools.terminal_tool import cleanup_all_environments
 								                cleanup_all_environments()
-												feat(memory): pluggable memory provider interface with profile isolation, review fixes, and honcho CLI restoration (#4623)

* feat(memory): add pluggable memory provider interface with profile isolation

Introduces a pluggable MemoryProvider ABC so external memory backends can
integrate with Hermes without modifying core files. Each backend becomes a
plugin implementing a standard interface, orchestrated by MemoryManager.

Key architecture:
- agent/memory_provider.py — ABC with core + optional lifecycle hooks
- agent/memory_manager.py — single integration point in the agent loop
- agent/builtin_memory_provider.py — wraps existing MEMORY.md/USER.md

Profile isolation fixes applied to all 6 shipped plugins:
- Cognitive Memory: use get_hermes_home() instead of raw env var
- Hindsight Memory: check $HERMES_HOME/hindsight/config.json first,
  fall back to legacy ~/.hindsight/ for backward compat
- Hermes Memory Store: replace hardcoded ~/.hermes paths with
  get_hermes_home() for config loading and DB path defaults
- Mem0 Memory: use get_hermes_home() instead of raw env var
- RetainDB Memory: auto-derive profile-scoped project name from
  hermes_home path (hermes-<profile>), explicit env var overrides
- OpenViking Memory: read-only, no local state, isolation via .env

MemoryManager.initialize_all() now injects hermes_home into kwargs so
every provider can resolve profile-scoped storage without importing
get_hermes_home() themselves.

Plugin system: adds register_memory_provider() to PluginContext and
get_plugin_memory_providers() accessor.

Based on PR #3825. 46 tests (37 unit + 5 E2E + 4 plugin registration).

* refactor(memory): drop cognitive plugin, rewrite OpenViking as full provider

Remove cognitive-memory plugin (#727) — core mechanics are broken:
decay runs 24x too fast (hourly not daily), prefetch uses row ID as
timestamp, search limited by importance not similarity.

Rewrite openviking-memory plugin from a read-only search wrapper into
a full bidirectional memory provider using the complete OpenViking
session lifecycle API:

- sync_turn: records user/assistant messages to OpenViking session
  (threaded, non-blocking)
- on_session_end: commits session to trigger automatic memory extraction
  into 6 categories (profile, preferences, entities, events, cases,
  patterns)
- prefetch: background semantic search via find() endpoint
- on_memory_write: mirrors built-in memory writes to the session
- is_available: checks env var only, no network calls (ABC compliance)

Tools expanded from 3 to 5:
- viking_search: semantic search with mode/scope/limit
- viking_read: tiered content (abstract ~100tok / overview ~2k / full)
- viking_browse: filesystem-style navigation (list/tree/stat)
- viking_remember: explicit memory storage via session
- viking_add_resource: ingest URLs/docs into knowledge base

Uses direct HTTP via httpx (no openviking SDK dependency needed).
Response truncation on viking_read to prevent context flooding.

* fix(memory): harden Mem0 plugin — thread safety, non-blocking sync, circuit breaker

- Remove redundant mem0_context tool (identical to mem0_search with
  rerank=true, top_k=5 — wastes a tool slot and confuses the model)
- Thread sync_turn so it's non-blocking — Mem0's server-side LLM
  extraction can take 5-10s, was stalling the agent after every turn
- Add threading.Lock around _get_client() for thread-safe lazy init
  (prefetch and sync threads could race on first client creation)
- Add circuit breaker: after 5 consecutive API failures, pause calls
  for 120s instead of hammering a down server every turn. Auto-resets
  after cooldown. Logs a warning when tripped.
- Track success/failure in prefetch, sync_turn, and all tool calls
- Wait for previous sync to finish before starting a new one (prevents
  unbounded thread accumulation on rapid turns)
- Clean up shutdown to join both prefetch and sync threads

* fix(memory): enforce single external memory provider limit

MemoryManager now rejects a second non-builtin provider with a warning.
Built-in memory (MEMORY.md/USER.md) is always accepted. Only ONE
external plugin provider is allowed at a time. This prevents tool
schema bloat (some providers add 3-5 tools each) and conflicting
memory backends.

The warning message directs users to configure memory.provider in
config.yaml to select which provider to activate.

Updated all 47 tests to use builtin + one external pattern instead
of multiple externals. Added test_second_external_rejected to verify
the enforcement.

* feat(memory): add ByteRover memory provider plugin

Implements the ByteRover integration (from PR #3499 by hieuntg81) as a
MemoryProvider plugin instead of direct run_agent.py modifications.

ByteRover provides persistent memory via the brv CLI — a hierarchical
knowledge tree with tiered retrieval (fuzzy text then LLM-driven search).
Local-first with optional cloud sync.

Plugin capabilities:
- prefetch: background brv query for relevant context
- sync_turn: curate conversation turns (threaded, non-blocking)
- on_memory_write: mirror built-in memory writes to brv
- on_pre_compress: extract insights before context compression

Tools (3):
- brv_query: search the knowledge tree
- brv_curate: store facts/decisions/patterns
- brv_status: check CLI version and context tree state

Profile isolation: working directory at $HERMES_HOME/byterover/ (scoped
per profile). Binary resolution cached with thread-safe double-checked
locking. All write operations threaded to avoid blocking the agent
(curate can take 120s with LLM processing).

* fix(memory): thread remaining sync_turns, fix holographic, add config key

Plugin fixes:
- Hindsight: thread sync_turn (was blocking up to 30s via _run_in_thread)
- RetainDB: thread sync_turn (was blocking on HTTP POST)
- Both: shutdown now joins sync threads alongside prefetch threads

Holographic retrieval fixes:
- reason(): removed dead intersection_key computation (bundled but never
  used in scoring). Now reuses pre-computed entity_residuals directly,
  moved role_content encoding outside the inner loop.
- contradict(): added _MAX_CONTRADICT_FACTS=500 scaling guard. Above
  500 facts, only checks the most recently updated ones to avoid O(n^2)
  explosion (~125K comparisons at 500 is acceptable).

Config:
- Added memory.provider key to DEFAULT_CONFIG ("" = builtin only).
  No version bump needed (deep_merge handles new keys automatically).

* feat(memory): extract Honcho as a MemoryProvider plugin

Creates plugins/honcho-memory/ as a thin adapter over the existing
honcho_integration/ package. All 4 Honcho tools (profile, search,
context, conclude) move from the normal tool registry to the
MemoryProvider interface.

The plugin delegates all work to HonchoSessionManager — no Honcho
logic is reimplemented. It uses the existing config chain:
$HERMES_HOME/honcho.json -> ~/.honcho/config.json -> env vars.

Lifecycle hooks:
- initialize: creates HonchoSessionManager via existing client factory
- prefetch: background dialectic query
- sync_turn: records messages + flushes to API (threaded)
- on_memory_write: mirrors user profile writes as conclusions
- on_session_end: flushes all pending messages

This is a prerequisite for the MemoryManager wiring in run_agent.py.
Once wired, Honcho goes through the same provider interface as all
other memory plugins, and the scattered Honcho code in run_agent.py
can be consolidated into the single MemoryManager integration point.

* feat(memory): wire MemoryManager into run_agent.py

Adds 8 integration points for the external memory provider plugin,
all purely additive (zero existing code modified):

1. Init (~L1130): Create MemoryManager, find matching plugin provider
   from memory.provider config, initialize with session context
2. Tool injection (~L1160): Append provider tool schemas to self.tools
   and self.valid_tool_names after memory_manager init
3. System prompt (~L2705): Add external provider's system_prompt_block
   alongside existing MEMORY.md/USER.md blocks
4. Tool routing (~L5362): Route provider tool calls through
   memory_manager.handle_tool_call() before the catchall handler
5. Memory write bridge (~L5353): Notify external provider via
   on_memory_write() when the built-in memory tool writes
6. Pre-compress (~L5233): Call on_pre_compress() before context
   compression discards messages
7. Prefetch (~L6421): Inject provider prefetch results into the
   current-turn user message (same pattern as Honcho turn context)
8. Turn sync + session end (~L8161, ~L8172): sync_all() after each
   completed turn, queue_prefetch_all() for next turn, on_session_end()
   + shutdown_all() at conversation end

All hooks are wrapped in try/except — a failing provider never breaks
the agent. The existing memory system, Honcho integration, and all
other code paths are completely untouched.

Full suite: 7222 passed, 4 pre-existing failures.

* refactor(memory): remove legacy Honcho integration from core

Extracts all Honcho-specific code from run_agent.py, model_tools.py,
toolsets.py, and gateway/run.py. Honcho is now exclusively available
as a memory provider plugin (plugins/honcho-memory/).

Removed from run_agent.py (-457 lines):
- Honcho init block (session manager creation, activation, config)
- 8 Honcho methods: _honcho_should_activate, _strip_honcho_tools,
  _activate_honcho, _register_honcho_exit_hook, _queue_honcho_prefetch,
  _honcho_prefetch, _honcho_save_user_observation, _honcho_sync
- _inject_honcho_turn_context module-level function
- Honcho system prompt block (tool descriptions, CLI commands)
- Honcho context injection in api_messages building
- Honcho params from __init__ (honcho_session_key, honcho_manager,
  honcho_config)
- HONCHO_TOOL_NAMES constant
- All honcho-specific tool dispatch forwarding

Removed from other files:
- model_tools.py: honcho_tools import, honcho params from handle_function_call
- toolsets.py: honcho toolset definition, honcho tools from core tools list
- gateway/run.py: honcho params from AIAgent constructor calls

Removed tests (-339 lines):
- 9 Honcho-specific test methods from test_run_agent.py
- TestHonchoAtexitFlush class from test_exit_cleanup_interrupt.py

Restored two regex constants (_SURROGATE_RE, _BUDGET_WARNING_RE) that
were accidentally removed during the honcho function extraction.

The honcho_integration/ package is kept intact — the plugin delegates
to it. tools/honcho_tools.py registry entries are now dead code (import
commented out in model_tools.py) but the file is preserved for reference.

Full suite: 7207 passed, 4 pre-existing failures. Zero regressions.

* refactor(memory): restructure plugins, add CLI, clean gateway, migration notice

Plugin restructure:
- Move all memory plugins from plugins/<name>-memory/ to plugins/memory/<name>/
  (byterover, hindsight, holographic, honcho, mem0, openviking, retaindb)
- New plugins/memory/__init__.py discovery module that scans the directory
  directly, loading providers by name without the general plugin system
- run_agent.py uses load_memory_provider() instead of get_plugin_memory_providers()

CLI wiring:
- hermes memory setup — interactive curses picker + config wizard
- hermes memory status — show active provider, config, availability
- hermes memory off — disable external provider (built-in only)
- hermes honcho — now shows migration notice pointing to hermes memory setup

Gateway cleanup:
- Remove _get_or_create_gateway_honcho (already removed in prev commit)
- Remove _shutdown_gateway_honcho and _shutdown_all_gateway_honcho methods
- Remove all calls to shutdown methods (4 call sites)
- Remove _honcho_managers/_honcho_configs dict references

Dead code removal:
- Delete tools/honcho_tools.py (279 lines, import was already commented out)
- Delete tests/gateway/test_honcho_lifecycle.py (131 lines, tested removed methods)
- Remove if False placeholder from run_agent.py

Migration:
- Honcho migration notice on startup: detects existing honcho.json or
  ~/.honcho/config.json, prints guidance to run hermes memory setup.
  Only fires when memory.provider is not set and not in quiet mode.

Full suite: 7203 passed, 4 pre-existing failures. Zero regressions.

* feat(memory): standardize plugin config + add per-plugin documentation

Config architecture:
- Add save_config(values, hermes_home) to MemoryProvider ABC
- Honcho: writes to $HERMES_HOME/honcho.json (SDK native)
- Mem0: writes to $HERMES_HOME/mem0.json
- Hindsight: writes to $HERMES_HOME/hindsight/config.json
- Holographic: writes to config.yaml under plugins.hermes-memory-store
- OpenViking/RetainDB/ByteRover: env-var only (default no-op)

Setup wizard (hermes memory setup):
- Now calls provider.save_config() for non-secret config
- Secrets still go to .env via env vars
- Only memory.provider activation key goes to config.yaml

Documentation:
- README.md for each of the 7 providers in plugins/memory/<name>/
- Requirements, setup (wizard + manual), config reference, tools table
- Consistent format across all providers

The contract for new memory plugins:
- get_config_schema() declares all fields (REQUIRED)
- save_config() writes native config (REQUIRED if not env-var-only)
- Secrets use env_var field in schema, written to .env by wizard
- README.md in the plugin directory

* docs: add memory providers user guide + developer guide

New pages:
- user-guide/features/memory-providers.md — comprehensive guide covering
  all 7 shipped providers (Honcho, OpenViking, Mem0, Hindsight,
  Holographic, RetainDB, ByteRover). Each with setup, config, tools,
  cost, and unique features. Includes comparison table and profile
  isolation notes.
- developer-guide/memory-provider-plugin.md — how to build a new memory
  provider plugin. Covers ABC, required methods, config schema,
  save_config, threading contract, profile isolation, testing.

Updated pages:
- user-guide/features/memory.md — replaced Honcho section with link to
  new Memory Providers page
- user-guide/features/honcho.md — replaced with migration redirect to
  the new Memory Providers page
- sidebars.ts — added both new pages to navigation

* fix(memory): auto-migrate Honcho users to memory provider plugin

When honcho.json or ~/.honcho/config.json exists but memory.provider
is not set, automatically set memory.provider: honcho in config.yaml
and activate the plugin. The plugin reads the same config files, so
all data and credentials are preserved. Zero user action needed.

Persists the migration to config.yaml so it only fires once. Prints
a one-line confirmation in non-quiet mode.

* fix(memory): only auto-migrate Honcho when enabled + credentialed

Check HonchoClientConfig.enabled AND (api_key OR base_url) before
auto-migrating — not just file existence. Prevents false activation
for users who disabled Honcho, stopped using it (config lingers),
or have ~/.honcho/ from a different tool.

* feat(memory): auto-install pip dependencies during hermes memory setup

Reads pip_dependencies from plugin.yaml, checks which are missing,
installs them via pip before config walkthrough. Also shows install
guidance for external_dependencies (e.g. brv CLI for ByteRover).

Updated all 7 plugin.yaml files with pip_dependencies:
- honcho: honcho-ai
- mem0: mem0ai
- openviking: httpx
- hindsight: hindsight-client
- holographic: (none)
- retaindb: requests
- byterover: (external_dependencies for brv CLI)

* fix: remove remaining Honcho crash risks from cli.py and gateway

cli.py: removed Honcho session re-mapping block (would crash importing
deleted tools/honcho_tools.py), Honcho flush on compress, Honcho
session display on startup, Honcho shutdown on exit, honcho_session_key
AIAgent param.

gateway/run.py: removed honcho_session_key params from helper methods,
sync_honcho param, _honcho.shutdown() block.

tests: fixed test_cron_session_with_honcho_key_skipped (was passing
removed honcho_key param to _flush_memories_for_session).

* fix: include plugins/ in pyproject.toml package list

Without this, plugins/memory/ wouldn't be included in non-editable
installs. Hermes always runs from the repo checkout so this is belt-
and-suspenders, but prevents breakage if the install method changes.

* fix(memory): correct pip-to-import name mapping for dep checks

The heuristic dep.replace('-', '_') fails for packages where the pip
name differs from the import name: honcho-ai→honcho, mem0ai→mem0,
hindsight-client→hindsight_client. Added explicit mapping table so
hermes memory setup doesn't try to reinstall already-installed packages.

* chore: remove dead code from old plugin memory registration path

- hermes_cli/plugins.py: removed register_memory_provider(),
  _memory_providers list, get_plugin_memory_providers() — memory
  providers now use plugins/memory/ discovery, not the general plugin system
- hermes_cli/main.py: stripped 74 lines of dead honcho argparse
  subparsers (setup, status, sessions, map, peer, mode, tokens,
  identity, migrate) — kept only the migration redirect
- agent/memory_provider.py: updated docstring to reflect new
  registration path
- tests: replaced TestPluginMemoryProviderRegistration with
  TestPluginMemoryDiscovery that tests the actual plugins/memory/
  discovery system. Added 3 new tests (discover, load, nonexistent).

* chore: delete dead honcho_integration/cli.py and its tests

cli.py (794 lines) was the old 'hermes honcho' command handler — nobody
calls it since cmd_honcho was replaced with a migration redirect.

Deleted tests that imported from removed code:
- tests/honcho_integration/test_cli.py (tested _resolve_api_key)
- tests/honcho_integration/test_config_isolation.py (tested CLI config paths)
- tests/tools/test_honcho_tools.py (tested the deleted tools/honcho_tools.py)

Remaining honcho_integration/ files (actively used by the plugin):
- client.py (445 lines) — config loading, SDK client creation
- session.py (991 lines) — session management, queries, flush

* refactor: move honcho_integration/ into the honcho plugin

Moves client.py (445 lines) and session.py (991 lines) from the
top-level honcho_integration/ package into plugins/memory/honcho/.
No Honcho code remains in the main codebase.

- plugins/memory/honcho/client.py — config loading, SDK client creation
- plugins/memory/honcho/session.py — session management, queries, flush
- Updated all imports: run_agent.py (auto-migration), hermes_cli/doctor.py,
  plugin __init__.py, session.py cross-import, all tests
- Removed honcho_integration/ package and pyproject.toml entry
- Renamed tests/honcho_integration/ → tests/honcho_plugin/

* docs: update architecture + gateway-internals for memory provider system

- architecture.md: replaced honcho_integration/ with plugins/memory/
- gateway-internals.md: replaced Honcho-specific session routing and
  flush lifecycle docs with generic memory provider interface docs

* fix: update stale mock path for resolve_active_host after honcho plugin migration

* fix(memory): address review feedback — P0 lifecycle, ABC contract, honcho CLI restore

Review feedback from Honcho devs (erosika):

P0 — Provider lifecycle:
- Remove on_session_end() + shutdown_all() from run_conversation() tail
  (was killing providers after every turn in multi-turn sessions)
- Add shutdown_memory_provider() method on AIAgent for callers
- Wire shutdown into CLI atexit, reset_conversation, gateway stop/expiry

Bug fixes:
- Remove sync_honcho=False kwarg from /btw callsites (TypeError crash)
- Fix doctor.py references to dead 'hermes honcho setup' command
- Cache prefetch_all() before tool loop (was re-calling every iteration)

ABC contract hardening (all backwards-compatible):
- Add session_id kwarg to prefetch/sync_turn/queue_prefetch
- Make on_pre_compress() return str (provider insights in compression)
- Add **kwargs to on_turn_start() for runtime context
- Add on_delegation() hook for parent-side subagent observation
- Document agent_context/agent_identity/agent_workspace kwargs on
  initialize() (prevents cron corruption, enables profile scoping)
- Fix docstring: single external provider, not multiple

Honcho CLI restoration:
- Add plugins/memory/honcho/cli.py (from main's honcho_integration/cli.py
  with imports adapted to plugin path)
- Restore full hermes honcho command with all subcommands (status, peer,
  mode, tokens, identity, enable/disable, sync, peers, --target-profile)
- Restore auto-clone on profile creation + sync on hermes update
- hermes honcho setup now redirects to hermes memory setup

* fix(memory): wire on_delegation, skip_memory for cron/flush, fix ByteRover return type

- Wire on_delegation() in delegate_tool.py — parent's memory provider
  is notified with task+result after each subagent completes
- Add skip_memory=True to cron scheduler (prevents cron system prompts
  from corrupting user representations — closes #4052)
- Add skip_memory=True to gateway flush agent (throwaway agent shouldn't
  activate memory provider)
- Fix ByteRover on_pre_compress() return type: None -> str

* fix(honcho): port profile isolation fixes from PR #4632

Ports 5 bug fixes found during profile testing (erosika's PR #4632):

1. 3-tier config resolution — resolve_config_path() now checks
   $HERMES_HOME/honcho.json → ~/.hermes/honcho.json → ~/.honcho/config.json
   (non-default profiles couldn't find shared host blocks)

2. Thread host=_host_key() through from_global_config() in cmd_setup,
   cmd_status, cmd_identity (--target-profile was being ignored)

3. Use bare profile name as aiPeer (not host key with dots) — Honcho's
   peer ID pattern is ^[a-zA-Z0-9_-]+$, dots are invalid

4. Wrap add_peers() in try/except — was fatal on new AI peers, killed
   all message uploads for the session

5. Gate Honcho clone behind --clone/--clone-all on profile create
   (bare create should be blank-slate)

Also: sanitize assistant_peer_id via _sanitize_id()

* fix(tests): add module cleanup fixture to test_cli_provider_resolution

test_cli_provider_resolution._import_cli() wipes tools.*, cli, and
run_agent from sys.modules to force fresh imports, but had no cleanup.
This poisoned all subsequent tests on the same xdist worker — mocks
targeting tools.file_tools, tools.send_message_tool, etc. patched the
NEW module object while already-imported functions still referenced
the OLD one. Caused ~25 cascade failures: send_message KeyError,
process_registry FileNotFoundError, file_read_guards timeouts,
read_loop_detection file-not-found, mcp_oauth None port, and
provider_parity/codex_execution stale tool lists.

Fix: autouse fixture saves all affected modules before each test and
restores them after, matching the pattern in
test_managed_browserbase_and_modal.py.
											
										
										
											2026-04-02 15:33:51 -07:00
+								            except Exception:
 								                pass
-												fix(gateway): call agent.close() on session end to prevent zombies

Wire AIAgent.close() into every gateway code path where an agent's
session is actually ending:

- stop(): close all running agents after interrupt + memory shutdown,
  then call cleanup_all_environments() and cleanup_all_browsers() as
  a global catch-all
- _session_expiry_watcher(): close agents when sessions expire after
  the 5-minute idle timeout
- _handle_reset_command(): close the old agent before evicting it from
  cache on /new or /reset

Note: _evict_cached_agent() intentionally does NOT call close() because
it is also used for non-destructive cache refreshes (model switch,
branch, fallback) where tool resources should persist.

Ref: #7131

											
										
										
											2026-04-10 16:22:59 -03:00
+								            try:
-												fix(gateway): drain in-flight work before restart

											
										
										
											2026-04-10 10:19:17 -07:00
+								                from tools.browser_tool import cleanup_all_browsers
 								                cleanup_all_browsers()
-												fix(gateway): call agent.close() on session end to prevent zombies

Wire AIAgent.close() into every gateway code path where an agent's
session is actually ending:

- stop(): close all running agents after interrupt + memory shutdown,
  then call cleanup_all_environments() and cleanup_all_browsers() as
  a global catch-all
- _session_expiry_watcher(): close agents when sessions expire after
  the 5-minute idle timeout
- _handle_reset_command(): close the old agent before evicting it from
  cache on /new or /reset

Note: _evict_cached_agent() intentionally does NOT call close() because
it is also used for non-destructive cache refreshes (model switch,
branch, fallback) where tool resources should persist.

Ref: #7131

											
										
										
											2026-04-10 16:22:59 -03:00
+								            except Exception:
 								                pass
-												fix(gateway): cancel active runs during shutdown

Track adapter background message-processing tasks, cancel them during gateway shutdown, and interrupt running agents before disconnecting adapters. This prevents old gateway instances from continuing in-flight work after stop/replace, which was contributing to the restart-time task continuation/flicker behavior reported in #1414. Adds regression coverage for adapter task cancellation and shutdown interrupts.

											
										
										
											2026-03-15 04:21:50 -07:00
-												fix(gateway): drain in-flight work before restart

											
										
										
											2026-04-10 10:19:17 -07:00
+								            from gateway.status import remove_pid_file
 								            remove_pid_file()
-												fix(gateway): track background task references in GatewayRunner (#3254)

Asyncio tasks created with create_task() but never stored can be
garbage collected mid-execution. Add self._background_tasks set to
hold references, with add_done_callback cleanup. Tracks:
- /background command task
- session-reset memory flush task
- session-resume memory flush task
Cancel all pending tasks in stop().

Update test fixtures that construct GatewayRunner via object.__new__()
to include the new _background_tasks attribute.

Cherry-picked from PR #3167 by memosr. The original PR also deleted
the DM topic auto-skill loading code — that deletion was excluded
from this salvage as it removes a shipped feature (#2598).

Co-authored-by: memosr.eth <96793918+memosr@users.noreply.github.com>
											
										
										
											2026-03-26 14:33:48 -07:00
-												fix: prevent unwanted session auto-reset after graceful gateway restarts (#8299)

When the gateway shuts down gracefully (hermes update, gateway restart,
/restart), it now writes a .clean_shutdown marker file. On the next
startup, if this marker exists, suspend_recently_active() is skipped
and the marker is cleaned up.

Previously, suspend_recently_active() fired on EVERY startup —
including planned restarts from hermes update or hermes gateway restart.
This caused users to lose their conversation history unexpectedly: the
session would be marked as suspended, and the next message would
trigger an auto-reset with a notification the user never asked for.

The original purpose of suspend_recently_active() is crash recovery —
preventing stuck sessions that were mid-processing when the gateway
died unexpectedly. Graceful shutdowns already drain active agents via
_drain_active_agents(), so there is no stuck-session risk. After a
crash (no marker written), suspension still fires as before.

Fixes the scenario where a user asks the agent to run hermes update,
the gateway restarts, and the user's next message gets an unwanted
'Session automatically reset' notification with their history cleared.
											
										
										
											2026-04-12 03:03:07 -07:00
+								            # Write a clean-shutdown marker so the next startup knows this
 								            # wasn't a crash.  suspend_recently_active() only needs to run
-												fix: notify active sessions on gateway shutdown + update health check

Three fixes for gateway lifecycle stability:

1. Notify active sessions before shutdown (#new)
   When the gateway receives SIGTERM or /restart, it now sends a
   notification to every chat with an active agent BEFORE starting
   the drain. Users see:
   - Shutdown: 'Gateway shutting down — your task will be interrupted.'
   - Restart: 'Gateway restarting — use /retry after restart to continue.'
   Deduplicates per-chat so group sessions with multiple users get
   one notification. Best-effort: send failures are logged and swallowed.

2. Skip .clean_shutdown marker when drain timed out
   Previously, a graceful SIGTERM always wrote .clean_shutdown, even if
   agents were force-interrupted when the drain timed out. This meant
   the next startup skipped session suspension, leaving interrupted
   sessions in a broken state (trailing tool response, no final message).
   Now the marker is only written if the drain completed without timeout,
   so interrupted sessions get properly suspended on next startup.

3. Post-restart health check for hermes update (#6631)
   cmd_update() now verifies the gateway actually survived after
   systemctl restart (sleep 3s + is-active check). If the service
   crashed immediately, it retries once. If still dead, prints
   actionable diagnostics (journalctl command, manual restart hint).

Also closes #8104 — already fixed on main (the /restart handler
correctly detects systemd via INVOCATION_ID and uses via_service=True).

Test plan:
- 6 new tests for shutdown notifications (dedup, restart vs shutdown
  messaging, sentinel filtering, send failure resilience)
- Existing restart drain + update tests pass (47 total)

											
										
										
											2026-04-14 12:44:46 -07:00
+								            # after unexpected exits.  However, if the drain timed out and
 								            # agents were force-interrupted, their sessions may be in an
 								            # incomplete state (trailing tool response, no final assistant
 								            # message).  Skip the marker in that case so the next startup
 								            # suspends those sessions — giving users a clean slate instead
 								            # of resuming a half-finished tool loop.
 								            if not timed_out:
 								                try:
 								                    (_hermes_home / ".clean_shutdown").touch()
 								                except Exception:
 								                    pass
 								            else:
 								                logger.info(
 								                    "Skipping .clean_shutdown marker — drain timed out with "
 								                    "interrupted agents; next startup will suspend recently "
 								                    "active sessions."
 								                )
-												fix: prevent unwanted session auto-reset after graceful gateway restarts (#8299)

When the gateway shuts down gracefully (hermes update, gateway restart,
/restart), it now writes a .clean_shutdown marker file. On the next
startup, if this marker exists, suspend_recently_active() is skipped
and the marker is cleaned up.

Previously, suspend_recently_active() fired on EVERY startup —
including planned restarts from hermes update or hermes gateway restart.
This caused users to lose their conversation history unexpectedly: the
session would be marked as suspended, and the next message would
trigger an auto-reset with a notification the user never asked for.

The original purpose of suspend_recently_active() is crash recovery —
preventing stuck sessions that were mid-processing when the gateway
died unexpectedly. Graceful shutdowns already drain active agents via
_drain_active_agents(), so there is no stuck-session risk. After a
crash (no marker written), suspension still fires as before.

Fixes the scenario where a user asks the agent to run hermes update,
the gateway restarts, and the user's next message gets an unwanted
'Session automatically reset' notification with their history cleared.
											
										
										
											2026-04-12 03:03:07 -07:00
-												fix: break stuck session resume loops after repeated restarts (#7536)

When a session gets stuck (hung terminal, runaway tool loop) and the
user restarts the gateway, the same session history loads and puts the
agent right back in the stuck state. The user is trapped in a loop:
restart → stuck → restart → stuck.

Fix: track restart-failure counts per session using a simple JSON file
(.restart_failure_counts). On each shutdown with active agents, the
counter increments for those sessions. On startup, if any session has
been active across 3+ consecutive restarts, it's auto-suspended —
giving the user a clean slate on their next message.

The counter resets to 0 when a session completes a turn successfully
(response delivered), so normal sessions that happen to be active
during planned restarts (/restart, hermes update) won't accumulate
false counts.

Implementation:
- _increment_restart_failure_counts(): called during stop() when
  agents are active. Writes {session_key: count} to JSON file.
  Sessions NOT active are dropped (loop broken).
- _suspend_stuck_loop_sessions(): called on startup. Reads the file,
  suspends sessions at threshold (3), clears the file.
- _clear_restart_failure_count(): called after successful response
  delivery. Removes the session from the counter file.

No SessionEntry schema changes. No database migration. Pure file-based
tracking that naturally cleans up.

Test plan:
- 9 new stuck-loop tests (increment, accumulate, threshold, clear,
  suspend, file cleanup, edge cases)
- All 28 gateway lifecycle tests pass (restart drain + auto-continue
  + stuck loop)

											
										
										
											2026-04-14 17:03:47 -07:00
+								            # Track sessions that were active at shutdown for stuck-loop
 								            # detection (#7536).  On each restart, the counter increments
 								            # for sessions that were running.  If a session hits the
 								            # threshold (3 consecutive restarts while active), the next
 								            # startup auto-suspends it — breaking the loop.
 								            if active_agents:
 								                self._increment_restart_failure_counts(set(active_agents.keys()))
-												fix(gateway): drain in-flight work before restart

											
										
										
											2026-04-10 10:19:17 -07:00
+								            if self._restart_requested and self._restart_via_service:
-												fix(gateway): address restart review feedback

											
										
										
											2026-04-10 14:00:21 -07:00
+								                self._exit_code = GATEWAY_SERVICE_RESTART_EXIT_CODE
-												fix(gateway): drain in-flight work before restart

											
										
										
											2026-04-10 10:19:17 -07:00
+								                self._exit_reason = self._exit_reason or "Gateway restart requested"
-												fix(gateway): call agent.close() on session end to prevent zombies

Wire AIAgent.close() into every gateway code path where an agent's
session is actually ending:

- stop(): close all running agents after interrupt + memory shutdown,
  then call cleanup_all_environments() and cleanup_all_browsers() as
  a global catch-all
- _session_expiry_watcher(): close agents when sessions expire after
  the 5-minute idle timeout
- _handle_reset_command(): close the old agent before evicting it from
  cache on /new or /reset

Note: _evict_cached_agent() intentionally does NOT call close() because
it is also used for non-destructive cache refreshes (model switch,
branch, fallback) where tool resources should persist.

Ref: #7131

											
										
										
											2026-04-10 16:22:59 -03:00
-												fix(gateway): drain in-flight work before restart

											
										
										
											2026-04-10 10:19:17 -07:00
+								            self._draining = False
 								            self._update_runtime_status("stopped", self._exit_reason)
 								            logger.info("Gateway stopped")
-												fix(gateway): call agent.close() on session end to prevent zombies

Wire AIAgent.close() into every gateway code path where an agent's
session is actually ending:

- stop(): close all running agents after interrupt + memory shutdown,
  then call cleanup_all_environments() and cleanup_all_browsers() as
  a global catch-all
- _session_expiry_watcher(): close agents when sessions expire after
  the 5-minute idle timeout
- _handle_reset_command(): close the old agent before evicting it from
  cache on /new or /reset

Note: _evict_cached_agent() intentionally does NOT call close() because
it is also used for non-destructive cache refreshes (model switch,
branch, fallback) where tool resources should persist.

Ref: #7131

											
										
										
											2026-04-10 16:22:59 -03:00
-												fix(gateway): drain in-flight work before restart

											
										
										
											2026-04-10 10:19:17 -07:00
+								        self._stop_task = asyncio.create_task(_stop_impl())
 								        await self._stop_task
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
 								    async def wait_for_shutdown(self) -> None:
 								        """Wait for shutdown signal."""
 								        await self._shutdown_event.wait()
 								    def _create_adapter(
 								        self,
 								        platform: Platform,
 								        config: Any
 								    ) -> Optional[BasePlatformAdapter]:
 								        """Create the appropriate adapter for a platform."""
-												fix(gateway): make group session isolation configurable

default group and channel sessions to per-user isolation, allow opting back into shared room sessions via config.yaml, and document Discord gateway routing and session behavior.

											
										
										
											2026-03-16 00:22:23 -07:00
+								        if hasattr(config, "extra") and isinstance(config.extra, dict):
 								            config.extra.setdefault(
 								                "group_sessions_per_user",
 								                self.config.group_sessions_per_user,
 								            )
-												feat: shared thread sessions by default — multi-user thread support (#5391)

Threads (Telegram forum topics, Discord threads, Slack threads) now default
to shared sessions where all participants see the same conversation. This is
the expected UX for threaded conversations where multiple users @mention the
bot and interact collaboratively.

Changes:
- build_session_key(): when thread_id is present, user_id is no longer
  appended to the session key (threads are shared by default)
- New config: thread_sessions_per_user (default: false) — opt-in to restore
  per-user isolation in threads if needed
- Sender attribution: messages in shared threads are prefixed with
  [sender name] so the agent can tell participants apart
- System prompt: shared threads show 'Multi-user thread' note instead of
  a per-turn User line (avoids busting prompt cache)
- Wired through all callers: gateway/run.py, base.py, telegram.py, feishu.py
- Regular group messages (no thread) remain per-user isolated (unchanged)
- DM threads are unaffected (they have their own keying logic)

Closes community request from demontut_ re: thread-based shared sessions.
											
										
										
											2026-04-05 19:46:58 -07:00
+								            config.extra.setdefault(
 								                "thread_sessions_per_user",
 								                getattr(self.config, "thread_sessions_per_user", False),
 								            )
-												fix(gateway): make group session isolation configurable

default group and channel sessions to per-user isolation, allow opting back into shared room sessions via config.yaml, and document Discord gateway routing and session behavior.

											
										
										
											2026-03-16 00:22:23 -07:00
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        if platform == Platform.TELEGRAM:
 								            from gateway.platforms.telegram import TelegramAdapter, check_telegram_requirements
 								            if not check_telegram_requirements():
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								                logger.warning("Telegram: python-telegram-bot not installed")
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								                return None
 								            return TelegramAdapter(config)
 								        elif platform == Platform.DISCORD:
 								            from gateway.platforms.discord import DiscordAdapter, check_discord_requirements
 								            if not check_discord_requirements():
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								                logger.warning("Discord: discord.py not installed")
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								                return None
 								            return DiscordAdapter(config)
 								        elif platform == Platform.WHATSAPP:
 								            from gateway.platforms.whatsapp import WhatsAppAdapter, check_whatsapp_requirements
 								            if not check_whatsapp_requirements():
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								                logger.warning("WhatsApp: Node.js not installed or bridge not configured")
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								                return None
 								            return WhatsAppAdapter(config)
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
+								        elif platform == Platform.SLACK:
 								            from gateway.platforms.slack import SlackAdapter, check_slack_requirements
 								            if not check_slack_requirements():
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								                logger.warning("Slack: slack-bolt not installed. Run: pip install 'hermes-agent[slack]'")
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
+								                return None
 								            return SlackAdapter(config)
-												feat: add Home Assistant integration (REST tools + WebSocket gateway)

- Add ha_list_entities, ha_get_state, ha_call_service tools via REST API
- Add WebSocket gateway adapter for real-time state_changed event monitoring
- Support domain/entity filtering, cooldown, and auto-reconnect with backoff
- Use REST API for outbound notifications to avoid WS race condition
- Gate tool availability on HASS_TOKEN env var
- Add 82 unit tests covering real logic (filtering, payload building, event pipeline)

											
										
										
											2026-02-28 13:32:48 +03:00
-												feat: add Signal messenger gateway platform (#405)

Complete Signal adapter using signal-cli daemon HTTP API.
Based on PR #268 by ibhagwan, rebuilt on current main with bug fixes.

Architecture:
- SSE streaming for inbound messages with exponential backoff (2s→60s)
- JSON-RPC 2.0 for outbound (send, typing, attachments, contacts)
- Health monitor detects stale SSE connections (120s threshold)
- Phone number redaction in all logs and global redact.py

Features:
- DM and group message support with separate access policies
- DM policies: pairing (default), allowlist, open
- Group policies: disabled (default), allowlist, open
- Attachment download with magic-byte type detection
- Typing indicators (8s refresh interval)
- 100MB attachment size limit, 8000 char message limit
- E.164 phone + UUID allowlist support

Integration:
- Platform.SIGNAL enum in gateway/config.py
- Signal in _is_user_authorized() allowlist maps (gateway/run.py)
- Adapter factory in _create_adapter() (gateway/run.py)
- user_id_alt/chat_id_alt fields in SessionSource for UUIDs
- send_message tool support via httpx JSON-RPC (not aiohttp)
- Interactive setup wizard in 'hermes gateway setup'
- Connectivity testing during setup (pings /api/v1/check)
- signal-cli detection and install guidance

Bug fixes from PR #268:
- Timestamp reads from envelope_data (not outer wrapper)
- Uses httpx consistently (not aiohttp in send_message tool)
- SIGNAL_DEBUG scoped to signal logger (not root)
- extract_images regex NOT modified (preserves group numbering)
- pairing.py NOT modified (no cross-platform side effects)
- No dual authorization (adapter defers to run.py for user auth)
- Wildcard uses set membership ('*' in set, not list equality)
- .zip default for PK magic bytes (not .docx)

No new Python dependencies — uses httpx (already core).
External requirement: signal-cli daemon (user-installed).

Tests: 30 new tests covering config, init, helpers, session source,
phone redaction, authorization, and send_message integration.

Co-authored-by: ibhagwan <ibhagwan@users.noreply.github.com>

											
										
										
											2026-03-08 20:20:35 -07:00
+								        elif platform == Platform.SIGNAL:
 								            from gateway.platforms.signal import SignalAdapter, check_signal_requirements
 								            if not check_signal_requirements():
 								                logger.warning("Signal: SIGNAL_HTTP_URL or SIGNAL_ACCOUNT not configured")
 								                return None
 								            return SignalAdapter(config)
-												feat: add Home Assistant integration (REST tools + WebSocket gateway)

- Add ha_list_entities, ha_get_state, ha_call_service tools via REST API
- Add WebSocket gateway adapter for real-time state_changed event monitoring
- Support domain/entity filtering, cooldown, and auto-reconnect with backoff
- Use REST API for outbound notifications to avoid WS race condition
- Gate tool availability on HASS_TOKEN env var
- Add 82 unit tests covering real logic (filtering, payload building, event pipeline)

											
										
										
											2026-02-28 13:32:48 +03:00
+								        elif platform == Platform.HOMEASSISTANT:
 								            from gateway.platforms.homeassistant import HomeAssistantAdapter, check_ha_requirements
 								            if not check_ha_requirements():
 								                logger.warning("HomeAssistant: aiohttp not installed or HASS_TOKEN not set")
 								                return None
 								            return HomeAssistantAdapter(config)
-												feat: add email gateway platform (IMAP/SMTP)

Allow users to interact with Hermes by sending and receiving emails.
Uses IMAP polling for incoming messages and SMTP for replies with
proper threading (In-Reply-To, References headers).

Integrates with all 14 gateway extension points: config, adapter
factory, authorization, send_message tool, cron delivery, toolsets,
prompt hints, channel directory, setup wizard, status display, and
env example.

65 tests covering config, parsing, dispatch, threading, IMAP fetch,
SMTP send, attachments, and all integration points.

											
										
										
											2026-03-10 03:15:38 +03:00
+								        elif platform == Platform.EMAIL:
 								            from gateway.platforms.email import EmailAdapter, check_email_requirements
 								            if not check_email_requirements():
 								                logger.warning("Email: EMAIL_ADDRESS, EMAIL_PASSWORD, EMAIL_IMAP_HOST, or EMAIL_SMTP_HOST not set")
 								                return None
 								            return EmailAdapter(config)
-												feat: add SMS (Telnyx) platform adapter

Implement SMS as a first-class messaging platform following
ADDING_A_PLATFORM.md checklist. All 16 integration points covered:

- gateway/platforms/sms.py: Core adapter with aiohttp webhook server,
  Telnyx REST API send, markdown stripping, 1600-char chunking,
  echo loop prevention, multi-number reply-from tracking
- gateway/config.py: Platform.SMS enum + env override block
- gateway/run.py: Adapter factory + auth maps (SMS_ALLOWED_USERS,
  SMS_ALLOW_ALL_USERS)
- toolsets.py: hermes-sms toolset + included in hermes-gateway
- cron/scheduler.py: SMS in platform_map for cron delivery
- tools/send_message_tool.py: SMS routing + _send_sms() standalone sender
- tools/cronjob_tools.py: 'sms' in deliver description
- gateway/channel_directory.py: SMS in session-based discovery
- agent/prompt_builder.py: SMS platform hint (plain text, concise)
- hermes_cli/status.py: SMS in platforms status display
- hermes_cli/gateway.py: SMS in setup wizard with Telnyx instructions
- pyproject.toml: sms optional dependency group (aiohttp>=3.9.0)
- tests/gateway/test_sms.py: Unit tests for config, format, truncate,
  echo prevention, requirements, toolset integration

Co-authored-by: sunsakis <teo@sunsakis.com>
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
											
										
										
											2026-03-17 02:52:34 -07:00
+								        elif platform == Platform.SMS:
 								            from gateway.platforms.sms import SmsAdapter, check_sms_requirements
 								            if not check_sms_requirements():
-												feat: add SMS (Twilio) platform adapter

Add SMS as a first-class messaging platform via the Twilio API.
Shares credentials with the existing telephony skill — same
TWILIO_ACCOUNT_SID, TWILIO_AUTH_TOKEN, TWILIO_PHONE_NUMBER env vars.

Adapter (gateway/platforms/sms.py):
- aiohttp webhook server for inbound (Twilio form-encoded POSTs)
- Twilio REST API with Basic auth for outbound
- Markdown stripping, smart chunking at 1600 chars
- Echo loop prevention, phone number redaction in logs

Integration (13 files):
- gateway config, run, channel_directory
- agent prompt_builder (SMS platform hint)
- cron scheduler, cronjob tools
- send_message_tool (_send_sms via Twilio API)
- toolsets (hermes-sms + hermes-gateway)
- gateway setup wizard, status display
- pyproject.toml (sms optional extra)
- 21 tests

Docs:
- website/docs/user-guide/messaging/sms.md (full setup guide)
- Updated messaging index (architecture, toolsets, security, links)
- Updated environment-variables.md reference

Inspired by PR #1575 (@sunsakis), rewritten for Twilio.
											
										
										
											2026-03-17 03:14:53 -07:00
+								                logger.warning("SMS: aiohttp not installed or TWILIO_ACCOUNT_SID/TWILIO_AUTH_TOKEN not set")
-												feat: add SMS (Telnyx) platform adapter

Implement SMS as a first-class messaging platform following
ADDING_A_PLATFORM.md checklist. All 16 integration points covered:

- gateway/platforms/sms.py: Core adapter with aiohttp webhook server,
  Telnyx REST API send, markdown stripping, 1600-char chunking,
  echo loop prevention, multi-number reply-from tracking
- gateway/config.py: Platform.SMS enum + env override block
- gateway/run.py: Adapter factory + auth maps (SMS_ALLOWED_USERS,
  SMS_ALLOW_ALL_USERS)
- toolsets.py: hermes-sms toolset + included in hermes-gateway
- cron/scheduler.py: SMS in platform_map for cron delivery
- tools/send_message_tool.py: SMS routing + _send_sms() standalone sender
- tools/cronjob_tools.py: 'sms' in deliver description
- gateway/channel_directory.py: SMS in session-based discovery
- agent/prompt_builder.py: SMS platform hint (plain text, concise)
- hermes_cli/status.py: SMS in platforms status display
- hermes_cli/gateway.py: SMS in setup wizard with Telnyx instructions
- pyproject.toml: sms optional dependency group (aiohttp>=3.9.0)
- tests/gateway/test_sms.py: Unit tests for config, format, truncate,
  echo prevention, requirements, toolset integration

Co-authored-by: sunsakis <teo@sunsakis.com>
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
											
										
										
											2026-03-17 02:52:34 -07:00
+								                return None
 								            return SmsAdapter(config)
-												feat(gateway): add DingTalk platform adapter (#1685)

Add DingTalk as a messaging platform using the dingtalk-stream SDK
for real-time message reception via Stream Mode (no webhook needed).
Replies are sent via session webhook using markdown format.

Features:
- Stream Mode connection (long-lived WebSocket, no public URL needed)
- Text and rich text message support
- DM and group chat support
- Message deduplication with 5-minute window
- Auto-reconnection with exponential backoff
- Session webhook caching for reply routing

Configuration:
  export DINGTALK_CLIENT_ID=your-app-key
  export DINGTALK_CLIENT_SECRET=your-app-secret

  # or in config.yaml:
  platforms:
    dingtalk:
      enabled: true
      extra:
        client_id: your-app-key
        client_secret: your-app-secret

Files:
- gateway/platforms/dingtalk.py (340 lines) — adapter implementation
- gateway/config.py — add DINGTALK to Platform enum
- gateway/run.py — add DingTalk to _create_adapter
- hermes_cli/config.py — add env vars to _EXTRA_ENV_KEYS
- hermes_cli/tools_config.py — add dingtalk to PLATFORMS
- tests/gateway/test_dingtalk.py — 21 tests
											
										
										
											2026-03-17 03:04:58 -07:00
+								        elif platform == Platform.DINGTALK:
 								            from gateway.platforms.dingtalk import DingTalkAdapter, check_dingtalk_requirements
 								            if not check_dingtalk_requirements():
 								                logger.warning("DingTalk: dingtalk-stream not installed or DINGTALK_CLIENT_ID/SECRET not set")
 								                return None
 								            return DingTalkAdapter(config)
-												feat(gateway): add Feishu/Lark platform support (#3817)

Adds Feishu (ByteDance's enterprise messaging platform) as a gateway
platform adapter with full feature parity: WebSocket + webhook transports,
message batching, dedup, rate limiting, rich post/card content parsing,
media handling (images/audio/files/video), group @mention gating,
reaction routing, and interactive card button support.

Cherry-picked from PR #1793 by penwyp with:
- Moved to current main (PR was 458 commits behind)
- Fixed _send_with_retry shadowing BasePlatformAdapter method (renamed to
  _feishu_send_with_retry to avoid signature mismatch crash)
- Fixed import structure: aiohttp/websockets imported independently of
  lark_oapi so they remain available when SDK is missing
- Fixed get_hermes_home import (hermes_constants, not hermes_cli.config)
- Added skip decorators for tests requiring lark_oapi SDK
- All 16 integration points added surgically to current main

New dependency: lark-oapi>=1.5.3,<2 (optional, pip install hermes-agent[feishu])

Fixes #1788

Co-authored-by: penwyp <penwyp@users.noreply.github.com>
											
										
										
											2026-03-29 18:17:42 -07:00
+								        elif platform == Platform.FEISHU:
 								            from gateway.platforms.feishu import FeishuAdapter, check_feishu_requirements
 								            if not check_feishu_requirements():
 								                logger.warning("Feishu: lark-oapi not installed or FEISHU_APP_ID/SECRET not set")
 								                return None
 								            return FeishuAdapter(config)
-												feat(gateway): add WeCom callback-mode adapter for self-built apps

Add a second WeCom integration mode for regular enterprise self-built
applications.  Unlike the existing bot/websocket adapter (wecom.py),
this handles WeCom's standard callback flow: WeCom POSTs encrypted XML
to an HTTP endpoint, the adapter decrypts, queues for the agent, and
immediately acknowledges.  The agent's reply is delivered proactively
via the message/send API.

Key design choice: always acknowledge immediately and use proactive
send — agent sessions take 3-30 minutes, so the 5-second inline reply
window is never useful.  The original PR's Future/pending-reply
machinery was removed in favour of this simpler architecture.

Features:
- AES-CBC encrypt/decrypt (BizMsgCrypt-compatible)
- Multi-app routing scoped by corp_id:user_id
- Legacy bare user_id fallback for backward compat
- Access-token management with auto-refresh
- WECOM_CALLBACK_* env var overrides
- Port-in-use pre-check before binding
- Health endpoint at /health

Salvaged from PR #7774 by @chqchshj.  Simplified by removing the
inline reply Future system and fixing: secrets.choice for nonce
generation, immediate plain-text acknowledgment (not encrypted XML
containing 'success'), and initial token refresh error handling.

											
										
										
											2026-04-11 14:25:18 -07:00
+								        elif platform == Platform.WECOM_CALLBACK:
 								            from gateway.platforms.wecom_callback import (
 								                WecomCallbackAdapter,
 								                check_wecom_callback_requirements,
 								            )
 								            if not check_wecom_callback_requirements():
 								                logger.warning("WeComCallback: aiohttp/httpx not installed")
 								                return None
 								            return WecomCallbackAdapter(config)
-												feat(gateway): add WeCom (Enterprise WeChat) platform support (#3847)

Adds WeCom as a gateway platform adapter using the AI Bot WebSocket
gateway for real-time bidirectional communication. No public endpoint
or new pip dependencies needed (uses existing aiohttp + httpx).

Features:
- WebSocket persistent connection with auto-reconnect (exponential backoff)
- DM and group messaging with configurable access policies
- Media upload/download with AES decryption for encrypted attachments
- Markdown rendering, quote context preservation
- Proactive + passive reply message modes
- Chunked media upload pipeline (512KB chunks)

Cherry-picked from PR #1898 by EvilRan with:
- Moved to current main (PR was 300 commits behind)
- Skipped base.py regressions (reply_to additions are good but belong
  in a separate PR since they affect all platforms)
- Fixed test assertions to match current base class send() signature
  (reply_to=None kwarg now explicit)
- All 16 integration points added surgically to current main
- No new pip dependencies (aiohttp + httpx already installed)

Fixes #1898

Co-authored-by: EvilRan <EvilRan@users.noreply.github.com>
											
										
										
											2026-03-29 21:29:13 -07:00
+								        elif platform == Platform.WECOM:
 								            from gateway.platforms.wecom import WeComAdapter, check_wecom_requirements
 								            if not check_wecom_requirements():
 								                logger.warning("WeCom: aiohttp not installed or WECOM_BOT_ID/SECRET not set")
 								                return None
 								            return WeComAdapter(config)
-												fix: salvage follow-ups for Weixin adapter (#6747)

- Remove sys.path.insert hack (leftover from standalone dev)
- Add token lock (acquire_scoped_lock/release_scoped_lock) in
  connect()/disconnect() to prevent duplicate pollers across profiles
- Fix get_connected_platforms: WEIXIN check must precede generic
  token/api_key check (requires both token AND account_id)
- Add WEIXIN_HOME_CHANNEL_NAME to _EXTRA_ENV_KEYS
- Add gateway setup wizard with QR login flow
- Add platform status check for partially configured state
- Add weixin.md docs page with full adapter documentation
- Update environment-variables.md reference with all 11 env vars
- Update sidebars.ts to include weixin docs page
- Wire all gateway integration points onto current main

Salvaged from PR #6747 by Zihan Huang.

											
										
										
											2026-04-10 05:20:20 -07:00
+								        elif platform == Platform.WEIXIN:
 								            from gateway.platforms.weixin import WeixinAdapter, check_weixin_requirements
 								            if not check_weixin_requirements():
 								                logger.warning("Weixin: aiohttp/cryptography not installed")
 								                return None
 								            return WeixinAdapter(config)
-												feat: add Mattermost and Matrix gateway adapters

Add support for Mattermost (self-hosted Slack alternative) and Matrix
(federated messaging protocol) as messaging platforms.

Mattermost adapter:
- REST API v4 client for posts, files, channels, typing indicators
- WebSocket listener for real-time 'posted' events with reconnect backoff
- Thread support via root_id
- File upload/download with auth-aware caching
- Dedup cache (5min TTL, 2000 entries)
- Full self-hosted instance support

Matrix adapter:
- matrix-nio AsyncClient with sync loop
- Dual auth: access token or user_id + password
- Optional E2EE via matrix-nio[e2e] (libolm)
- Thread support via m.thread (MSC3440)
- Reply support via m.in_reply_to with fallback stripping
- Media upload/download via mxc:// URLs (authenticated v1.11+ endpoint)
- Auto-join on room invite
- DM detection via m.direct account data with sync fallback
- Markdown to HTML conversion

Fixes applied over original PR #1225 by @cyb0rgk1tty:
- Mattermost: add timeout to file downloads, wrap API helpers in
  try/except for network errors, download incoming files immediately
  with auth headers instead of passing auth-required URLs
- Matrix: use authenticated media endpoint (/_matrix/client/v1/media/),
  robust m.direct cache with sync fallback, prefer aiohttp over httpx

Install Matrix support: pip install 'hermes-agent[matrix]'
Mattermost needs no extra deps (uses aiohttp).

Salvaged from PR #1225 by @cyb0rgk1tty with fixes.

											
										
										
											2026-03-17 02:59:36 -07:00
+								        elif platform == Platform.MATTERMOST:
 								            from gateway.platforms.mattermost import MattermostAdapter, check_mattermost_requirements
 								            if not check_mattermost_requirements():
 								                logger.warning("Mattermost: MATTERMOST_TOKEN or MATTERMOST_URL not set, or aiohttp missing")
 								                return None
 								            return MattermostAdapter(config)
 								        elif platform == Platform.MATRIX:
 								            from gateway.platforms.matrix import MatrixAdapter, check_matrix_requirements
 								            if not check_matrix_requirements():
-												refactor(matrix): rewrite adapter from matrix-nio to mautrix-python

Translate all nio SDK calls to mautrix equivalents while preserving the
adapter structure, business logic, and all features (E2EE, reactions,
threading, mention gating, text batching, media caching, voice MSC3245).

Key changes:
- nio.AsyncClient -> mautrix.client.Client + HTTPAPI + MemoryStateStore
- Manual E2EE key management -> OlmMachine with auto key lifecycle
- isinstance(resp, nio.XxxResponse) -> mautrix returns values directly
- add_event_callback per type -> single ROOM_MESSAGE handler with
  msgtype dispatch
- Room state (member_count, display_name) via async state store lookups
- Upload/download return ContentURI/bytes directly (no wrapper objects)

											
										
										
											2026-04-11 06:51:43 +05:30
+								                logger.warning("Matrix: mautrix not installed or credentials not set. Run: pip install 'mautrix[encryption]'")
-												feat: add Mattermost and Matrix gateway adapters

Add support for Mattermost (self-hosted Slack alternative) and Matrix
(federated messaging protocol) as messaging platforms.

Mattermost adapter:
- REST API v4 client for posts, files, channels, typing indicators
- WebSocket listener for real-time 'posted' events with reconnect backoff
- Thread support via root_id
- File upload/download with auth-aware caching
- Dedup cache (5min TTL, 2000 entries)
- Full self-hosted instance support

Matrix adapter:
- matrix-nio AsyncClient with sync loop
- Dual auth: access token or user_id + password
- Optional E2EE via matrix-nio[e2e] (libolm)
- Thread support via m.thread (MSC3440)
- Reply support via m.in_reply_to with fallback stripping
- Media upload/download via mxc:// URLs (authenticated v1.11+ endpoint)
- Auto-join on room invite
- DM detection via m.direct account data with sync fallback
- Markdown to HTML conversion

Fixes applied over original PR #1225 by @cyb0rgk1tty:
- Mattermost: add timeout to file downloads, wrap API helpers in
  try/except for network errors, download incoming files immediately
  with auth headers instead of passing auth-required URLs
- Matrix: use authenticated media endpoint (/_matrix/client/v1/media/),
  robust m.direct cache with sync fallback, prefer aiohttp over httpx

Install Matrix support: pip install 'hermes-agent[matrix]'
Mattermost needs no extra deps (uses aiohttp).

Salvaged from PR #1225 by @cyb0rgk1tty with fixes.

											
										
										
											2026-03-17 02:59:36 -07:00
+								                return None
 								            return MatrixAdapter(config)
-												feat: OpenAI-compatible API server + WhatsApp configurable reply prefix (#1756)

* feat: OpenAI-compatible API server platform adapter

Salvaged from PR #956, updated for current main.

Adds an HTTP API server as a gateway platform adapter that exposes
hermes-agent via the OpenAI Chat Completions and Responses APIs.
Any OpenAI-compatible frontend (Open WebUI, LobeChat, LibreChat,
AnythingLLM, NextChat, ChatBox, etc.) can connect by pointing at
http://localhost:8642/v1.

Endpoints:
- POST /v1/chat/completions  — stateless Chat Completions API
- POST /v1/responses         — stateful Responses API with chaining
- GET  /v1/responses/{id}    — retrieve stored response
- DELETE /v1/responses/{id}  — delete stored response
- GET  /v1/models            — list hermes-agent as available model
- GET  /health               — health check

Features:
- Real SSE streaming via stream_delta_callback (uses main's streaming)
- In-memory LRU response store for Responses API conversation chaining
- Named conversations via 'conversation' parameter
- Bearer token auth (optional, via API_SERVER_KEY)
- CORS support for browser-based frontends
- System prompt layering (frontend system messages on top of core)
- Real token usage tracking in responses

Integration points:
- Platform.API_SERVER in gateway/config.py
- _create_adapter() branch in gateway/run.py
- API_SERVER_* env vars in hermes_cli/config.py
- Env var overrides in gateway/config.py _apply_env_overrides()

Changes vs original PR #956:
- Removed streaming infrastructure (already on main via stream_consumer.py)
- Removed Telegram reply_to_mode (separate feature, not included)
- Updated _resolve_model() -> _resolve_gateway_model()
- Updated stream_callback -> stream_delta_callback
- Updated connect()/disconnect() to use _mark_connected()/_mark_disconnected()
- Adapted to current Platform enum (includes MATTERMOST, MATRIX, DINGTALK)

Tests: 72 new tests, all passing
Docs: API server guide, Open WebUI integration guide, env var reference

* feat(whatsapp): make reply prefix configurable via config.yaml

Reworked from PR #1764 (ifrederico) to use config.yaml instead of .env.

The WhatsApp bridge prepends a header to every outgoing message.
This was hardcoded to '⚕ *Hermes Agent*'. Users can now customize
or disable it via config.yaml:

  whatsapp:
    reply_prefix: ''                     # disable header
    reply_prefix: '🤖 *My Bot*\n───\n'  # custom prefix

How it works:
- load_gateway_config() reads whatsapp.reply_prefix from config.yaml
  and stores it in PlatformConfig.extra['reply_prefix']
- WhatsAppAdapter reads it from config.extra at init
- When spawning bridge.js, the adapter passes it as
  WHATSAPP_REPLY_PREFIX in the subprocess environment
- bridge.js handles undefined (default), empty (no header),
  or custom values with \\n escape support
- Self-chat echo suppression uses the configured prefix

Also fixes _config_version: was 9 but ENV_VARS_BY_VERSION had a
key 10 (TAVILY_API_KEY), so existing users at v9 would never be
prompted for Tavily. Bumped to 10 to close the gap. Added a
regression test to prevent this from happening again.

Credit: ifrederico (PR #1764) for the bridge.js implementation
and the config version gap discovery.

---------

Co-authored-by: Test <test@test.com>
											
										
										
											2026-03-17 10:44:37 -07:00
+								        elif platform == Platform.API_SERVER:
 								            from gateway.platforms.api_server import APIServerAdapter, check_api_server_requirements
 								            if not check_api_server_requirements():
 								                logger.warning("API Server: aiohttp not installed")
 								                return None
 								            return APIServerAdapter(config)
-												feat(gateway): add webhook platform adapter for external event triggers

Add a generic webhook platform adapter that receives HTTP POSTs from
external services (GitHub, GitLab, JIRA, Stripe, etc.), validates HMAC
signatures, transforms payloads into agent prompts, and routes responses
back to the source or to another platform.

Features:
- Configurable routes with per-route HMAC secrets, event filters,
  prompt templates with dot-notation payload access, skill loading,
  and pluggable delivery (github_comment, telegram, discord, log)
- HMAC signature validation (GitHub SHA-256, GitLab token, generic)
- Rate limiting (30 req/min per route, configurable)
- Idempotency cache (1hr TTL, prevents duplicate runs on retries)
- Body size limits (1MB default, checked before reading payload)
- Setup wizard integration with security warnings and docs links
- 33 tests (29 unit + 4 integration), all passing

Security:
- HMAC secret required per route (startup validation)
- Setup wizard warns about internet exposure for webhook/SMS platforms
- Sandboxing (Docker/VM) recommended in docs for public-facing deployments

Files changed:
- gateway/config.py — Platform.WEBHOOK enum + env var overrides
- gateway/platforms/webhook.py — WebhookAdapter (~420 lines)
- gateway/run.py — factory wiring + auth bypass for webhook events
- hermes_cli/config.py — WEBHOOK_* env var definitions
- hermes_cli/setup.py — webhook section in setup_gateway()
- tests/gateway/test_webhook_adapter.py — 29 unit tests
- tests/gateway/test_webhook_integration.py — 4 integration tests
- website/docs/user-guide/messaging/webhooks.md — full user docs
- website/docs/reference/environment-variables.md — WEBHOOK_* vars
- website/sidebars.ts — nav entry

											
										
										
											2026-03-20 06:33:36 -07:00
+								        elif platform == Platform.WEBHOOK:
 								            from gateway.platforms.webhook import WebhookAdapter, check_webhook_requirements
 								            if not check_webhook_requirements():
 								                logger.warning("Webhook: aiohttp not installed")
 								                return None
 								            adapter = WebhookAdapter(config)
 								            adapter.gateway_runner = self  # For cross-platform delivery
 								            return adapter
-												feat(gateway): add BlueBubbles iMessage platform adapter (#6437)

Adds Apple iMessage as a gateway platform via BlueBubbles macOS server.

Architecture:
- Webhook-based inbound (event-driven, no polling/dedup needed)
- Email/phone → chat GUID resolution for user-friendly addressing
- Private API safety (checks helper_connected before tapback/typing)
- Inbound attachment downloading (images, audio, documents cached locally)
- Markdown stripping for clean iMessage delivery
- Smart progress suppression for platforms without message editing

Based on PR #5869 by @benjaminsehl (webhook architecture, GUID resolution,
Private API safety, progress suppression) with inbound attachment downloading
from PR #4588 by @1960697431 (attachment cache routing).

Integration points: Platform enum, env config, adapter factory, auth maps,
cron delivery, send_message routing, channel directory, platform hints,
toolset definition, setup wizard, status display.

27 tests covering config, adapter, webhook parsing, GUID resolution,
attachment download routing, toolset consistency, and prompt hints.
											
										
										
											2026-04-08 23:54:03 -07:00
+								        elif platform == Platform.BLUEBUBBLES:
 								            from gateway.platforms.bluebubbles import BlueBubblesAdapter, check_bluebubbles_requirements
 								            if not check_bluebubbles_requirements():
 								                logger.warning("BlueBubbles: aiohttp/httpx missing or BLUEBUBBLES_SERVER_URL/BLUEBUBBLES_PASSWORD not configured")
 								                return None
 								            return BlueBubblesAdapter(config)
-												feat(gateway): unify QQBot branding, add PLATFORM_HINTS, fix streaming, restore missing setup functions

- Rename platform from 'qq' to 'qqbot' across all integration points
  (Platform enum, toolset, config keys, import paths, file rename qq.py → qqbot.py)
- Add PLATFORM_HINTS for QQBot in prompt_builder (QQ supports markdown)
- Set SUPPORTS_MESSAGE_EDITING = False to skip streaming on QQ
  (prevents duplicate messages from non-editable partial + final sends)
- Add _send_qqbot() standalone send function for cron/send_message tool
- Add interactive _setup_qq() wizard in hermes_cli/setup.py
- Restore missing _setup_signal/email/sms/dingtalk/feishu/wecom/wecom_callback
  functions that were lost during the original merge

											
										
										
											2026-04-14 01:33:06 +08:00
+								        elif platform == Platform.QQBOT:
 								            from gateway.platforms.qqbot import QQAdapter, check_qq_requirements
 								            if not check_qq_requirements():
 								                logger.warning("QQBot: aiohttp/httpx missing or QQ_APP_ID/QQ_CLIENT_SECRET not configured")
 								                return None
-												feat: add QQ Bot platform adapter (Official API v2)

Add full QQ Bot integration via the Official QQ Bot API (v2):
- WebSocket gateway for inbound events (C2C, group, guild, DM)
- REST API for outbound text/markdown/media messages
- Voice transcription (Tencent ASR + configurable STT provider)
- Attachment processing (images, voice, files)
- User authorization (allowlist + allow-all + DM pairing)

Integration points:
- gateway: Platform.QQ enum, adapter factory, allowlist maps
- CLI: setup wizard, gateway config, status display, tools config
- tools: send_message cross-platform routing, toolsets
- cron: delivery platform support
- docs: QQ Bot setup guide

											
										
										
											2026-04-13 21:56:38 +08:00
+								            return QQAdapter(config)
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        return None
-												feat: add QQ Bot platform adapter (Official API v2)

Add full QQ Bot integration via the Official QQ Bot API (v2):
- WebSocket gateway for inbound events (C2C, group, guild, DM)
- REST API for outbound text/markdown/media messages
- Voice transcription (Tencent ASR + configurable STT provider)
- Attachment processing (images, voice, files)
- User authorization (allowlist + allow-all + DM pairing)

Integration points:
- gateway: Platform.QQ enum, adapter factory, allowlist maps
- CLI: setup wizard, gateway config, status display, tools config
- tools: send_message cross-platform routing, toolsets
- cron: delivery platform support
- docs: QQ Bot setup guide

											
										
										
											2026-04-13 21:56:38 +08:00
-												Enhance messaging gateway configuration and security features

- Added new environment variables for Telegram and Discord bot configurations, including `TELEGRAM_ALLOWED_USERS` and `DISCORD_ALLOWED_USERS`, to restrict bot access to specific users.
- Updated documentation in AGENTS.md and README.md to include detailed setup instructions for the messaging gateway, emphasizing the importance of user allowlists for security.
- Improved the CLI setup wizard to prompt for allowed user IDs during configuration, enhancing user guidance and security awareness.
- Refined the gateway run script to support user authorization checks, ensuring only allowed users can interact with the bot.

											
										
										
											2026-02-03 10:46:23 -08:00
+								    def _is_user_authorized(self, source: SessionSource) -> bool:
 								        """
 								        Check if a user is authorized to use the bot.
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
+								        Checks in order:
-												feat: enhance user authorization checks in GatewayRunner

- Updated the authorization logic to include a per-platform allow-all flag for improved flexibility.
- Revised the order of checks to prioritize platform-specific allow-all settings, followed by environment variable allowlists and DM pairing approvals.
- Added global allow-all configuration for broader access control.
- Improved handling of allowlists by stripping whitespace and ensuring valid entries are processed.

											
										
										
											2026-02-22 16:32:08 -08:00
+. Per-platform allow-all flag (e.g., DISCORD_ALLOW_ALL_USERS=true)
 . Environment variable allowlists (TELEGRAM_ALLOWED_USERS, etc.)
 . DM pairing approved list
 . Global allow-all (GATEWAY_ALLOW_ALL_USERS=true)
 . Default: deny
-												Enhance messaging gateway configuration and security features

- Added new environment variables for Telegram and Discord bot configurations, including `TELEGRAM_ALLOWED_USERS` and `DISCORD_ALLOWED_USERS`, to restrict bot access to specific users.
- Updated documentation in AGENTS.md and README.md to include detailed setup instructions for the messaging gateway, emphasizing the importance of user allowlists for security.
- Improved the CLI setup wizard to prompt for allowed user IDs during configuration, enhancing user guidance and security awareness.
- Refined the gateway run script to support user authorization checks, ensuring only allowed users can interact with the bot.

											
										
										
											2026-02-03 10:46:23 -08:00
+								        """
-												fix: resolve 4 bugs found in HA integration code review

- Auto-authorize HA events in gateway (system-generated, not user messages)
- Guard _read_events against None/closed WebSocket after failed reconnect
- Use UUID for send() message_id instead of polluting WS sequence counter
- entity_id parameter now takes precedence over data["entity_id"]

											
										
										
											2026-02-28 15:12:18 +03:00
+								        # Home Assistant events are system-generated (state changes), not
 								        # user-initiated messages.  The HASS_TOKEN already authenticates the
 								        # connection, so HA events are always authorized.
-												feat(gateway): add webhook platform adapter for external event triggers

Add a generic webhook platform adapter that receives HTTP POSTs from
external services (GitHub, GitLab, JIRA, Stripe, etc.), validates HMAC
signatures, transforms payloads into agent prompts, and routes responses
back to the source or to another platform.

Features:
- Configurable routes with per-route HMAC secrets, event filters,
  prompt templates with dot-notation payload access, skill loading,
  and pluggable delivery (github_comment, telegram, discord, log)
- HMAC signature validation (GitHub SHA-256, GitLab token, generic)
- Rate limiting (30 req/min per route, configurable)
- Idempotency cache (1hr TTL, prevents duplicate runs on retries)
- Body size limits (1MB default, checked before reading payload)
- Setup wizard integration with security warnings and docs links
- 33 tests (29 unit + 4 integration), all passing

Security:
- HMAC secret required per route (startup validation)
- Setup wizard warns about internet exposure for webhook/SMS platforms
- Sandboxing (Docker/VM) recommended in docs for public-facing deployments

Files changed:
- gateway/config.py — Platform.WEBHOOK enum + env var overrides
- gateway/platforms/webhook.py — WebhookAdapter (~420 lines)
- gateway/run.py — factory wiring + auth bypass for webhook events
- hermes_cli/config.py — WEBHOOK_* env var definitions
- hermes_cli/setup.py — webhook section in setup_gateway()
- tests/gateway/test_webhook_adapter.py — 29 unit tests
- tests/gateway/test_webhook_integration.py — 4 integration tests
- website/docs/user-guide/messaging/webhooks.md — full user docs
- website/docs/reference/environment-variables.md — WEBHOOK_* vars
- website/sidebars.ts — nav entry

											
										
										
											2026-03-20 06:33:36 -07:00
+								        # Webhook events are authenticated via HMAC signature validation in
 								        # the adapter itself — no user allowlist applies.
 								        if source.platform in (Platform.HOMEASSISTANT, Platform.WEBHOOK):
-												fix: resolve 4 bugs found in HA integration code review

- Auto-authorize HA events in gateway (system-generated, not user messages)
- Guard _read_events against None/closed WebSocket after failed reconnect
- Use UUID for send() message_id instead of polluting WS sequence counter
- entity_id parameter now takes precedence over data["entity_id"]

											
										
										
											2026-02-28 15:12:18 +03:00
+								            return True
-												Enhance messaging gateway configuration and security features

- Added new environment variables for Telegram and Discord bot configurations, including `TELEGRAM_ALLOWED_USERS` and `DISCORD_ALLOWED_USERS`, to restrict bot access to specific users.
- Updated documentation in AGENTS.md and README.md to include detailed setup instructions for the messaging gateway, emphasizing the importance of user allowlists for security.
- Improved the CLI setup wizard to prompt for allowed user IDs during configuration, enhancing user guidance and security awareness.
- Refined the gateway run script to support user authorization checks, ensuring only allowed users can interact with the bot.

											
										
										
											2026-02-03 10:46:23 -08:00
+								        user_id = source.user_id
 								        if not user_id:
-												feat: enhance user authorization checks in GatewayRunner

- Updated the authorization logic to include a per-platform allow-all flag for improved flexibility.
- Revised the order of checks to prioritize platform-specific allow-all settings, followed by environment variable allowlists and DM pairing approvals.
- Added global allow-all configuration for broader access control.
- Improved handling of allowlists by stripping whitespace and ensuring valid entries are processed.

											
										
										
											2026-02-22 16:32:08 -08:00
+								            return False
-												Enhance messaging gateway configuration and security features

- Added new environment variables for Telegram and Discord bot configurations, including `TELEGRAM_ALLOWED_USERS` and `DISCORD_ALLOWED_USERS`, to restrict bot access to specific users.
- Updated documentation in AGENTS.md and README.md to include detailed setup instructions for the messaging gateway, emphasizing the importance of user allowlists for security.
- Improved the CLI setup wizard to prompt for allowed user IDs during configuration, enhancing user guidance and security awareness.
- Refined the gateway run script to support user authorization checks, ensuring only allowed users can interact with the bot.

											
										
										
											2026-02-03 10:46:23 -08:00
+								        platform_env_map = {
 								            Platform.TELEGRAM: "TELEGRAM_ALLOWED_USERS",
 								            Platform.DISCORD: "DISCORD_ALLOWED_USERS",
 								            Platform.WHATSAPP: "WHATSAPP_ALLOWED_USERS",
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
+								            Platform.SLACK: "SLACK_ALLOWED_USERS",
-												feat: add Signal messenger gateway platform (#405)

Complete Signal adapter using signal-cli daemon HTTP API.
Based on PR #268 by ibhagwan, rebuilt on current main with bug fixes.

Architecture:
- SSE streaming for inbound messages with exponential backoff (2s→60s)
- JSON-RPC 2.0 for outbound (send, typing, attachments, contacts)
- Health monitor detects stale SSE connections (120s threshold)
- Phone number redaction in all logs and global redact.py

Features:
- DM and group message support with separate access policies
- DM policies: pairing (default), allowlist, open
- Group policies: disabled (default), allowlist, open
- Attachment download with magic-byte type detection
- Typing indicators (8s refresh interval)
- 100MB attachment size limit, 8000 char message limit
- E.164 phone + UUID allowlist support

Integration:
- Platform.SIGNAL enum in gateway/config.py
- Signal in _is_user_authorized() allowlist maps (gateway/run.py)
- Adapter factory in _create_adapter() (gateway/run.py)
- user_id_alt/chat_id_alt fields in SessionSource for UUIDs
- send_message tool support via httpx JSON-RPC (not aiohttp)
- Interactive setup wizard in 'hermes gateway setup'
- Connectivity testing during setup (pings /api/v1/check)
- signal-cli detection and install guidance

Bug fixes from PR #268:
- Timestamp reads from envelope_data (not outer wrapper)
- Uses httpx consistently (not aiohttp in send_message tool)
- SIGNAL_DEBUG scoped to signal logger (not root)
- extract_images regex NOT modified (preserves group numbering)
- pairing.py NOT modified (no cross-platform side effects)
- No dual authorization (adapter defers to run.py for user auth)
- Wildcard uses set membership ('*' in set, not list equality)
- .zip default for PK magic bytes (not .docx)

No new Python dependencies — uses httpx (already core).
External requirement: signal-cli daemon (user-installed).

Tests: 30 new tests covering config, init, helpers, session source,
phone redaction, authorization, and send_message integration.

Co-authored-by: ibhagwan <ibhagwan@users.noreply.github.com>

											
										
										
											2026-03-08 20:20:35 -07:00
+								            Platform.SIGNAL: "SIGNAL_ALLOWED_USERS",
-												feat: add email gateway platform (IMAP/SMTP)

Allow users to interact with Hermes by sending and receiving emails.
Uses IMAP polling for incoming messages and SMTP for replies with
proper threading (In-Reply-To, References headers).

Integrates with all 14 gateway extension points: config, adapter
factory, authorization, send_message tool, cron delivery, toolsets,
prompt hints, channel directory, setup wizard, status display, and
env example.

65 tests covering config, parsing, dispatch, threading, IMAP fetch,
SMTP send, attachments, and all integration points.

											
										
										
											2026-03-10 03:15:38 +03:00
+								            Platform.EMAIL: "EMAIL_ALLOWED_USERS",
-												feat: add SMS (Telnyx) platform adapter

Implement SMS as a first-class messaging platform following
ADDING_A_PLATFORM.md checklist. All 16 integration points covered:

- gateway/platforms/sms.py: Core adapter with aiohttp webhook server,
  Telnyx REST API send, markdown stripping, 1600-char chunking,
  echo loop prevention, multi-number reply-from tracking
- gateway/config.py: Platform.SMS enum + env override block
- gateway/run.py: Adapter factory + auth maps (SMS_ALLOWED_USERS,
  SMS_ALLOW_ALL_USERS)
- toolsets.py: hermes-sms toolset + included in hermes-gateway
- cron/scheduler.py: SMS in platform_map for cron delivery
- tools/send_message_tool.py: SMS routing + _send_sms() standalone sender
- tools/cronjob_tools.py: 'sms' in deliver description
- gateway/channel_directory.py: SMS in session-based discovery
- agent/prompt_builder.py: SMS platform hint (plain text, concise)
- hermes_cli/status.py: SMS in platforms status display
- hermes_cli/gateway.py: SMS in setup wizard with Telnyx instructions
- pyproject.toml: sms optional dependency group (aiohttp>=3.9.0)
- tests/gateway/test_sms.py: Unit tests for config, format, truncate,
  echo prevention, requirements, toolset integration

Co-authored-by: sunsakis <teo@sunsakis.com>
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
											
										
										
											2026-03-17 02:52:34 -07:00
+								            Platform.SMS: "SMS_ALLOWED_USERS",
-												feat: add Mattermost and Matrix gateway adapters

Add support for Mattermost (self-hosted Slack alternative) and Matrix
(federated messaging protocol) as messaging platforms.

Mattermost adapter:
- REST API v4 client for posts, files, channels, typing indicators
- WebSocket listener for real-time 'posted' events with reconnect backoff
- Thread support via root_id
- File upload/download with auth-aware caching
- Dedup cache (5min TTL, 2000 entries)
- Full self-hosted instance support

Matrix adapter:
- matrix-nio AsyncClient with sync loop
- Dual auth: access token or user_id + password
- Optional E2EE via matrix-nio[e2e] (libolm)
- Thread support via m.thread (MSC3440)
- Reply support via m.in_reply_to with fallback stripping
- Media upload/download via mxc:// URLs (authenticated v1.11+ endpoint)
- Auto-join on room invite
- DM detection via m.direct account data with sync fallback
- Markdown to HTML conversion

Fixes applied over original PR #1225 by @cyb0rgk1tty:
- Mattermost: add timeout to file downloads, wrap API helpers in
  try/except for network errors, download incoming files immediately
  with auth headers instead of passing auth-required URLs
- Matrix: use authenticated media endpoint (/_matrix/client/v1/media/),
  robust m.direct cache with sync fallback, prefer aiohttp over httpx

Install Matrix support: pip install 'hermes-agent[matrix]'
Mattermost needs no extra deps (uses aiohttp).

Salvaged from PR #1225 by @cyb0rgk1tty with fixes.

											
										
										
											2026-03-17 02:59:36 -07:00
+								            Platform.MATTERMOST: "MATTERMOST_ALLOWED_USERS",
 								            Platform.MATRIX: "MATRIX_ALLOWED_USERS",
-												feat(gateway): wire DingTalk into gateway setup and platform maps (#1690)

Add DingTalk to:
- hermes_cli/gateway.py: _PLATFORMS list with setup instructions,
  AppKey/AppSecret prompts, and Stream Mode setup guide
- gateway/run.py: all platform-to-config-key maps, allowed users
  map, allow-all-users map, and toolset resolution maps
											
										
										
											2026-03-17 03:19:45 -07:00
+								            Platform.DINGTALK: "DINGTALK_ALLOWED_USERS",
-												feat(gateway): add Feishu/Lark platform support (#3817)

Adds Feishu (ByteDance's enterprise messaging platform) as a gateway
platform adapter with full feature parity: WebSocket + webhook transports,
message batching, dedup, rate limiting, rich post/card content parsing,
media handling (images/audio/files/video), group @mention gating,
reaction routing, and interactive card button support.

Cherry-picked from PR #1793 by penwyp with:
- Moved to current main (PR was 458 commits behind)
- Fixed _send_with_retry shadowing BasePlatformAdapter method (renamed to
  _feishu_send_with_retry to avoid signature mismatch crash)
- Fixed import structure: aiohttp/websockets imported independently of
  lark_oapi so they remain available when SDK is missing
- Fixed get_hermes_home import (hermes_constants, not hermes_cli.config)
- Added skip decorators for tests requiring lark_oapi SDK
- All 16 integration points added surgically to current main

New dependency: lark-oapi>=1.5.3,<2 (optional, pip install hermes-agent[feishu])

Fixes #1788

Co-authored-by: penwyp <penwyp@users.noreply.github.com>
											
										
										
											2026-03-29 18:17:42 -07:00
+								            Platform.FEISHU: "FEISHU_ALLOWED_USERS",
-												feat(gateway): add WeCom (Enterprise WeChat) platform support (#3847)

Adds WeCom as a gateway platform adapter using the AI Bot WebSocket
gateway for real-time bidirectional communication. No public endpoint
or new pip dependencies needed (uses existing aiohttp + httpx).

Features:
- WebSocket persistent connection with auto-reconnect (exponential backoff)
- DM and group messaging with configurable access policies
- Media upload/download with AES decryption for encrypted attachments
- Markdown rendering, quote context preservation
- Proactive + passive reply message modes
- Chunked media upload pipeline (512KB chunks)

Cherry-picked from PR #1898 by EvilRan with:
- Moved to current main (PR was 300 commits behind)
- Skipped base.py regressions (reply_to additions are good but belong
  in a separate PR since they affect all platforms)
- Fixed test assertions to match current base class send() signature
  (reply_to=None kwarg now explicit)
- All 16 integration points added surgically to current main
- No new pip dependencies (aiohttp + httpx already installed)

Fixes #1898

Co-authored-by: EvilRan <EvilRan@users.noreply.github.com>
											
										
										
											2026-03-29 21:29:13 -07:00
+								            Platform.WECOM: "WECOM_ALLOWED_USERS",
-												feat(gateway): add WeCom callback-mode adapter for self-built apps

Add a second WeCom integration mode for regular enterprise self-built
applications.  Unlike the existing bot/websocket adapter (wecom.py),
this handles WeCom's standard callback flow: WeCom POSTs encrypted XML
to an HTTP endpoint, the adapter decrypts, queues for the agent, and
immediately acknowledges.  The agent's reply is delivered proactively
via the message/send API.

Key design choice: always acknowledge immediately and use proactive
send — agent sessions take 3-30 minutes, so the 5-second inline reply
window is never useful.  The original PR's Future/pending-reply
machinery was removed in favour of this simpler architecture.

Features:
- AES-CBC encrypt/decrypt (BizMsgCrypt-compatible)
- Multi-app routing scoped by corp_id:user_id
- Legacy bare user_id fallback for backward compat
- Access-token management with auto-refresh
- WECOM_CALLBACK_* env var overrides
- Port-in-use pre-check before binding
- Health endpoint at /health

Salvaged from PR #7774 by @chqchshj.  Simplified by removing the
inline reply Future system and fixing: secrets.choice for nonce
generation, immediate plain-text acknowledgment (not encrypted XML
containing 'success'), and initial token refresh error handling.

											
										
										
											2026-04-11 14:25:18 -07:00
+								            Platform.WECOM_CALLBACK: "WECOM_CALLBACK_ALLOWED_USERS",
-												fix: salvage follow-ups for Weixin adapter (#6747)

- Remove sys.path.insert hack (leftover from standalone dev)
- Add token lock (acquire_scoped_lock/release_scoped_lock) in
  connect()/disconnect() to prevent duplicate pollers across profiles
- Fix get_connected_platforms: WEIXIN check must precede generic
  token/api_key check (requires both token AND account_id)
- Add WEIXIN_HOME_CHANNEL_NAME to _EXTRA_ENV_KEYS
- Add gateway setup wizard with QR login flow
- Add platform status check for partially configured state
- Add weixin.md docs page with full adapter documentation
- Update environment-variables.md reference with all 11 env vars
- Update sidebars.ts to include weixin docs page
- Wire all gateway integration points onto current main

Salvaged from PR #6747 by Zihan Huang.

											
										
										
											2026-04-10 05:20:20 -07:00
+								            Platform.WEIXIN: "WEIXIN_ALLOWED_USERS",
-												feat(gateway): add BlueBubbles iMessage platform adapter (#6437)

Adds Apple iMessage as a gateway platform via BlueBubbles macOS server.

Architecture:
- Webhook-based inbound (event-driven, no polling/dedup needed)
- Email/phone → chat GUID resolution for user-friendly addressing
- Private API safety (checks helper_connected before tapback/typing)
- Inbound attachment downloading (images, audio, documents cached locally)
- Markdown stripping for clean iMessage delivery
- Smart progress suppression for platforms without message editing

Based on PR #5869 by @benjaminsehl (webhook architecture, GUID resolution,
Private API safety, progress suppression) with inbound attachment downloading
from PR #4588 by @1960697431 (attachment cache routing).

Integration points: Platform enum, env config, adapter factory, auth maps,
cron delivery, send_message routing, channel directory, platform hints,
toolset definition, setup wizard, status display.

27 tests covering config, adapter, webhook parsing, GUID resolution,
attachment download routing, toolset consistency, and prompt hints.
											
										
										
											2026-04-08 23:54:03 -07:00
+								            Platform.BLUEBUBBLES: "BLUEBUBBLES_ALLOWED_USERS",
-												feat(gateway): unify QQBot branding, add PLATFORM_HINTS, fix streaming, restore missing setup functions

- Rename platform from 'qq' to 'qqbot' across all integration points
  (Platform enum, toolset, config keys, import paths, file rename qq.py → qqbot.py)
- Add PLATFORM_HINTS for QQBot in prompt_builder (QQ supports markdown)
- Set SUPPORTS_MESSAGE_EDITING = False to skip streaming on QQ
  (prevents duplicate messages from non-editable partial + final sends)
- Add _send_qqbot() standalone send function for cron/send_message tool
- Add interactive _setup_qq() wizard in hermes_cli/setup.py
- Restore missing _setup_signal/email/sms/dingtalk/feishu/wecom/wecom_callback
  functions that were lost during the original merge

											
										
										
											2026-04-14 01:33:06 +08:00
+								            Platform.QQBOT: "QQ_ALLOWED_USERS",
-												Enhance messaging gateway configuration and security features

- Added new environment variables for Telegram and Discord bot configurations, including `TELEGRAM_ALLOWED_USERS` and `DISCORD_ALLOWED_USERS`, to restrict bot access to specific users.
- Updated documentation in AGENTS.md and README.md to include detailed setup instructions for the messaging gateway, emphasizing the importance of user allowlists for security.
- Improved the CLI setup wizard to prompt for allowed user IDs during configuration, enhancing user guidance and security awareness.
- Refined the gateway run script to support user authorization checks, ensuring only allowed users can interact with the bot.

											
										
										
											2026-02-03 10:46:23 -08:00
+								        }
-												feat: enhance user authorization checks in GatewayRunner

- Updated the authorization logic to include a per-platform allow-all flag for improved flexibility.
- Revised the order of checks to prioritize platform-specific allow-all settings, followed by environment variable allowlists and DM pairing approvals.
- Added global allow-all configuration for broader access control.
- Improved handling of allowlists by stripping whitespace and ensuring valid entries are processed.

											
										
										
											2026-02-22 16:32:08 -08:00
+								        platform_allow_all_map = {
 								            Platform.TELEGRAM: "TELEGRAM_ALLOW_ALL_USERS",
 								            Platform.DISCORD: "DISCORD_ALLOW_ALL_USERS",
 								            Platform.WHATSAPP: "WHATSAPP_ALLOW_ALL_USERS",
 								            Platform.SLACK: "SLACK_ALLOW_ALL_USERS",
-												feat: add Signal messenger gateway platform (#405)

Complete Signal adapter using signal-cli daemon HTTP API.
Based on PR #268 by ibhagwan, rebuilt on current main with bug fixes.

Architecture:
- SSE streaming for inbound messages with exponential backoff (2s→60s)
- JSON-RPC 2.0 for outbound (send, typing, attachments, contacts)
- Health monitor detects stale SSE connections (120s threshold)
- Phone number redaction in all logs and global redact.py

Features:
- DM and group message support with separate access policies
- DM policies: pairing (default), allowlist, open
- Group policies: disabled (default), allowlist, open
- Attachment download with magic-byte type detection
- Typing indicators (8s refresh interval)
- 100MB attachment size limit, 8000 char message limit
- E.164 phone + UUID allowlist support

Integration:
- Platform.SIGNAL enum in gateway/config.py
- Signal in _is_user_authorized() allowlist maps (gateway/run.py)
- Adapter factory in _create_adapter() (gateway/run.py)
- user_id_alt/chat_id_alt fields in SessionSource for UUIDs
- send_message tool support via httpx JSON-RPC (not aiohttp)
- Interactive setup wizard in 'hermes gateway setup'
- Connectivity testing during setup (pings /api/v1/check)
- signal-cli detection and install guidance

Bug fixes from PR #268:
- Timestamp reads from envelope_data (not outer wrapper)
- Uses httpx consistently (not aiohttp in send_message tool)
- SIGNAL_DEBUG scoped to signal logger (not root)
- extract_images regex NOT modified (preserves group numbering)
- pairing.py NOT modified (no cross-platform side effects)
- No dual authorization (adapter defers to run.py for user auth)
- Wildcard uses set membership ('*' in set, not list equality)
- .zip default for PK magic bytes (not .docx)

No new Python dependencies — uses httpx (already core).
External requirement: signal-cli daemon (user-installed).

Tests: 30 new tests covering config, init, helpers, session source,
phone redaction, authorization, and send_message integration.

Co-authored-by: ibhagwan <ibhagwan@users.noreply.github.com>

											
										
										
											2026-03-08 20:20:35 -07:00
+								            Platform.SIGNAL: "SIGNAL_ALLOW_ALL_USERS",
-												feat: add email gateway platform (IMAP/SMTP)

Allow users to interact with Hermes by sending and receiving emails.
Uses IMAP polling for incoming messages and SMTP for replies with
proper threading (In-Reply-To, References headers).

Integrates with all 14 gateway extension points: config, adapter
factory, authorization, send_message tool, cron delivery, toolsets,
prompt hints, channel directory, setup wizard, status display, and
env example.

65 tests covering config, parsing, dispatch, threading, IMAP fetch,
SMTP send, attachments, and all integration points.

											
										
										
											2026-03-10 03:15:38 +03:00
+								            Platform.EMAIL: "EMAIL_ALLOW_ALL_USERS",
-												feat: add SMS (Telnyx) platform adapter

Implement SMS as a first-class messaging platform following
ADDING_A_PLATFORM.md checklist. All 16 integration points covered:

- gateway/platforms/sms.py: Core adapter with aiohttp webhook server,
  Telnyx REST API send, markdown stripping, 1600-char chunking,
  echo loop prevention, multi-number reply-from tracking
- gateway/config.py: Platform.SMS enum + env override block
- gateway/run.py: Adapter factory + auth maps (SMS_ALLOWED_USERS,
  SMS_ALLOW_ALL_USERS)
- toolsets.py: hermes-sms toolset + included in hermes-gateway
- cron/scheduler.py: SMS in platform_map for cron delivery
- tools/send_message_tool.py: SMS routing + _send_sms() standalone sender
- tools/cronjob_tools.py: 'sms' in deliver description
- gateway/channel_directory.py: SMS in session-based discovery
- agent/prompt_builder.py: SMS platform hint (plain text, concise)
- hermes_cli/status.py: SMS in platforms status display
- hermes_cli/gateway.py: SMS in setup wizard with Telnyx instructions
- pyproject.toml: sms optional dependency group (aiohttp>=3.9.0)
- tests/gateway/test_sms.py: Unit tests for config, format, truncate,
  echo prevention, requirements, toolset integration

Co-authored-by: sunsakis <teo@sunsakis.com>
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
											
										
										
											2026-03-17 02:52:34 -07:00
+								            Platform.SMS: "SMS_ALLOW_ALL_USERS",
-												feat: add Mattermost and Matrix gateway adapters

Add support for Mattermost (self-hosted Slack alternative) and Matrix
(federated messaging protocol) as messaging platforms.

Mattermost adapter:
- REST API v4 client for posts, files, channels, typing indicators
- WebSocket listener for real-time 'posted' events with reconnect backoff
- Thread support via root_id
- File upload/download with auth-aware caching
- Dedup cache (5min TTL, 2000 entries)
- Full self-hosted instance support

Matrix adapter:
- matrix-nio AsyncClient with sync loop
- Dual auth: access token or user_id + password
- Optional E2EE via matrix-nio[e2e] (libolm)
- Thread support via m.thread (MSC3440)
- Reply support via m.in_reply_to with fallback stripping
- Media upload/download via mxc:// URLs (authenticated v1.11+ endpoint)
- Auto-join on room invite
- DM detection via m.direct account data with sync fallback
- Markdown to HTML conversion

Fixes applied over original PR #1225 by @cyb0rgk1tty:
- Mattermost: add timeout to file downloads, wrap API helpers in
  try/except for network errors, download incoming files immediately
  with auth headers instead of passing auth-required URLs
- Matrix: use authenticated media endpoint (/_matrix/client/v1/media/),
  robust m.direct cache with sync fallback, prefer aiohttp over httpx

Install Matrix support: pip install 'hermes-agent[matrix]'
Mattermost needs no extra deps (uses aiohttp).

Salvaged from PR #1225 by @cyb0rgk1tty with fixes.

											
										
										
											2026-03-17 02:59:36 -07:00
+								            Platform.MATTERMOST: "MATTERMOST_ALLOW_ALL_USERS",
 								            Platform.MATRIX: "MATRIX_ALLOW_ALL_USERS",
-												feat(gateway): wire DingTalk into gateway setup and platform maps (#1690)

Add DingTalk to:
- hermes_cli/gateway.py: _PLATFORMS list with setup instructions,
  AppKey/AppSecret prompts, and Stream Mode setup guide
- gateway/run.py: all platform-to-config-key maps, allowed users
  map, allow-all-users map, and toolset resolution maps
											
										
										
											2026-03-17 03:19:45 -07:00
+								            Platform.DINGTALK: "DINGTALK_ALLOW_ALL_USERS",
-												feat(gateway): add Feishu/Lark platform support (#3817)

Adds Feishu (ByteDance's enterprise messaging platform) as a gateway
platform adapter with full feature parity: WebSocket + webhook transports,
message batching, dedup, rate limiting, rich post/card content parsing,
media handling (images/audio/files/video), group @mention gating,
reaction routing, and interactive card button support.

Cherry-picked from PR #1793 by penwyp with:
- Moved to current main (PR was 458 commits behind)
- Fixed _send_with_retry shadowing BasePlatformAdapter method (renamed to
  _feishu_send_with_retry to avoid signature mismatch crash)
- Fixed import structure: aiohttp/websockets imported independently of
  lark_oapi so they remain available when SDK is missing
- Fixed get_hermes_home import (hermes_constants, not hermes_cli.config)
- Added skip decorators for tests requiring lark_oapi SDK
- All 16 integration points added surgically to current main

New dependency: lark-oapi>=1.5.3,<2 (optional, pip install hermes-agent[feishu])

Fixes #1788

Co-authored-by: penwyp <penwyp@users.noreply.github.com>
											
										
										
											2026-03-29 18:17:42 -07:00
+								            Platform.FEISHU: "FEISHU_ALLOW_ALL_USERS",
-												feat(gateway): add WeCom (Enterprise WeChat) platform support (#3847)

Adds WeCom as a gateway platform adapter using the AI Bot WebSocket
gateway for real-time bidirectional communication. No public endpoint
or new pip dependencies needed (uses existing aiohttp + httpx).

Features:
- WebSocket persistent connection with auto-reconnect (exponential backoff)
- DM and group messaging with configurable access policies
- Media upload/download with AES decryption for encrypted attachments
- Markdown rendering, quote context preservation
- Proactive + passive reply message modes
- Chunked media upload pipeline (512KB chunks)

Cherry-picked from PR #1898 by EvilRan with:
- Moved to current main (PR was 300 commits behind)
- Skipped base.py regressions (reply_to additions are good but belong
  in a separate PR since they affect all platforms)
- Fixed test assertions to match current base class send() signature
  (reply_to=None kwarg now explicit)
- All 16 integration points added surgically to current main
- No new pip dependencies (aiohttp + httpx already installed)

Fixes #1898

Co-authored-by: EvilRan <EvilRan@users.noreply.github.com>
											
										
										
											2026-03-29 21:29:13 -07:00
+								            Platform.WECOM: "WECOM_ALLOW_ALL_USERS",
-												feat(gateway): add WeCom callback-mode adapter for self-built apps

Add a second WeCom integration mode for regular enterprise self-built
applications.  Unlike the existing bot/websocket adapter (wecom.py),
this handles WeCom's standard callback flow: WeCom POSTs encrypted XML
to an HTTP endpoint, the adapter decrypts, queues for the agent, and
immediately acknowledges.  The agent's reply is delivered proactively
via the message/send API.

Key design choice: always acknowledge immediately and use proactive
send — agent sessions take 3-30 minutes, so the 5-second inline reply
window is never useful.  The original PR's Future/pending-reply
machinery was removed in favour of this simpler architecture.

Features:
- AES-CBC encrypt/decrypt (BizMsgCrypt-compatible)
- Multi-app routing scoped by corp_id:user_id
- Legacy bare user_id fallback for backward compat
- Access-token management with auto-refresh
- WECOM_CALLBACK_* env var overrides
- Port-in-use pre-check before binding
- Health endpoint at /health

Salvaged from PR #7774 by @chqchshj.  Simplified by removing the
inline reply Future system and fixing: secrets.choice for nonce
generation, immediate plain-text acknowledgment (not encrypted XML
containing 'success'), and initial token refresh error handling.

											
										
										
											2026-04-11 14:25:18 -07:00
+								            Platform.WECOM_CALLBACK: "WECOM_CALLBACK_ALLOW_ALL_USERS",
-												fix: salvage follow-ups for Weixin adapter (#6747)

- Remove sys.path.insert hack (leftover from standalone dev)
- Add token lock (acquire_scoped_lock/release_scoped_lock) in
  connect()/disconnect() to prevent duplicate pollers across profiles
- Fix get_connected_platforms: WEIXIN check must precede generic
  token/api_key check (requires both token AND account_id)
- Add WEIXIN_HOME_CHANNEL_NAME to _EXTRA_ENV_KEYS
- Add gateway setup wizard with QR login flow
- Add platform status check for partially configured state
- Add weixin.md docs page with full adapter documentation
- Update environment-variables.md reference with all 11 env vars
- Update sidebars.ts to include weixin docs page
- Wire all gateway integration points onto current main

Salvaged from PR #6747 by Zihan Huang.

											
										
										
											2026-04-10 05:20:20 -07:00
+								            Platform.WEIXIN: "WEIXIN_ALLOW_ALL_USERS",
-												feat(gateway): add BlueBubbles iMessage platform adapter (#6437)

Adds Apple iMessage as a gateway platform via BlueBubbles macOS server.

Architecture:
- Webhook-based inbound (event-driven, no polling/dedup needed)
- Email/phone → chat GUID resolution for user-friendly addressing
- Private API safety (checks helper_connected before tapback/typing)
- Inbound attachment downloading (images, audio, documents cached locally)
- Markdown stripping for clean iMessage delivery
- Smart progress suppression for platforms without message editing

Based on PR #5869 by @benjaminsehl (webhook architecture, GUID resolution,
Private API safety, progress suppression) with inbound attachment downloading
from PR #4588 by @1960697431 (attachment cache routing).

Integration points: Platform enum, env config, adapter factory, auth maps,
cron delivery, send_message routing, channel directory, platform hints,
toolset definition, setup wizard, status display.

27 tests covering config, adapter, webhook parsing, GUID resolution,
attachment download routing, toolset consistency, and prompt hints.
											
										
										
											2026-04-08 23:54:03 -07:00
+								            Platform.BLUEBUBBLES: "BLUEBUBBLES_ALLOW_ALL_USERS",
-												feat(gateway): unify QQBot branding, add PLATFORM_HINTS, fix streaming, restore missing setup functions

- Rename platform from 'qq' to 'qqbot' across all integration points
  (Platform enum, toolset, config keys, import paths, file rename qq.py → qqbot.py)
- Add PLATFORM_HINTS for QQBot in prompt_builder (QQ supports markdown)
- Set SUPPORTS_MESSAGE_EDITING = False to skip streaming on QQ
  (prevents duplicate messages from non-editable partial + final sends)
- Add _send_qqbot() standalone send function for cron/send_message tool
- Add interactive _setup_qq() wizard in hermes_cli/setup.py
- Restore missing _setup_signal/email/sms/dingtalk/feishu/wecom/wecom_callback
  functions that were lost during the original merge

											
										
										
											2026-04-14 01:33:06 +08:00
+								            Platform.QQBOT: "QQ_ALLOW_ALL_USERS",
-												feat: enhance user authorization checks in GatewayRunner

- Updated the authorization logic to include a per-platform allow-all flag for improved flexibility.
- Revised the order of checks to prioritize platform-specific allow-all settings, followed by environment variable allowlists and DM pairing approvals.
- Added global allow-all configuration for broader access control.
- Improved handling of allowlists by stripping whitespace and ensuring valid entries are processed.

											
										
										
											2026-02-22 16:32:08 -08:00
+								        }
 								        # Per-platform allow-all flag (e.g., DISCORD_ALLOW_ALL_USERS=true)
 								        platform_allow_all_var = platform_allow_all_map.get(source.platform, "")
 								        if platform_allow_all_var and os.getenv(platform_allow_all_var, "").lower() in ("true", "1", "yes"):
 								            return True
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
+								        # Check pairing store (always checked, regardless of allowlists)
 								        platform_name = source.platform.value if source.platform else ""
 								        if self.pairing_store.is_approved(platform_name, user_id):
 								            return True
-												feat: enhance user authorization checks in GatewayRunner

- Updated the authorization logic to include a per-platform allow-all flag for improved flexibility.
- Revised the order of checks to prioritize platform-specific allow-all settings, followed by environment variable allowlists and DM pairing approvals.
- Added global allow-all configuration for broader access control.
- Improved handling of allowlists by stripping whitespace and ensuring valid entries are processed.

											
										
										
											2026-02-22 16:32:08 -08:00
 								        # Check platform-specific and global allowlists
 								        platform_allowlist = os.getenv(platform_env_map.get(source.platform, ""), "").strip()
 								        global_allowlist = os.getenv("GATEWAY_ALLOWED_USERS", "").strip()
-												Enhance messaging gateway configuration and security features

- Added new environment variables for Telegram and Discord bot configurations, including `TELEGRAM_ALLOWED_USERS` and `DISCORD_ALLOWED_USERS`, to restrict bot access to specific users.
- Updated documentation in AGENTS.md and README.md to include detailed setup instructions for the messaging gateway, emphasizing the importance of user allowlists for security.
- Improved the CLI setup wizard to prompt for allowed user IDs during configuration, enhancing user guidance and security awareness.
- Refined the gateway run script to support user authorization checks, ensuring only allowed users can interact with the bot.

											
										
										
											2026-02-03 10:46:23 -08:00
+								        if not platform_allowlist and not global_allowlist:
-												feat: enhance user authorization checks in GatewayRunner

- Updated the authorization logic to include a per-platform allow-all flag for improved flexibility.
- Revised the order of checks to prioritize platform-specific allow-all settings, followed by environment variable allowlists and DM pairing approvals.
- Added global allow-all configuration for broader access control.
- Improved handling of allowlists by stripping whitespace and ensuring valid entries are processed.

											
										
										
											2026-02-22 16:32:08 -08:00
+								            # No allowlists configured -- check global allow-all flag
 								            return os.getenv("GATEWAY_ALLOW_ALL_USERS", "").lower() in ("true", "1", "yes")
-												Enhance messaging gateway configuration and security features

- Added new environment variables for Telegram and Discord bot configurations, including `TELEGRAM_ALLOWED_USERS` and `DISCORD_ALLOWED_USERS`, to restrict bot access to specific users.
- Updated documentation in AGENTS.md and README.md to include detailed setup instructions for the messaging gateway, emphasizing the importance of user allowlists for security.
- Improved the CLI setup wizard to prompt for allowed user IDs during configuration, enhancing user guidance and security awareness.
- Refined the gateway run script to support user authorization checks, ensuring only allowed users can interact with the bot.

											
										
										
											2026-02-03 10:46:23 -08:00
+								        # Check if user is in any allowlist
 								        allowed_ids = set()
 								        if platform_allowlist:
-												feat: enhance user authorization checks in GatewayRunner

- Updated the authorization logic to include a per-platform allow-all flag for improved flexibility.
- Revised the order of checks to prioritize platform-specific allow-all settings, followed by environment variable allowlists and DM pairing approvals.
- Added global allow-all configuration for broader access control.
- Improved handling of allowlists by stripping whitespace and ensuring valid entries are processed.

											
										
										
											2026-02-22 16:32:08 -08:00
+								            allowed_ids.update(uid.strip() for uid in platform_allowlist.split(",") if uid.strip())
-												Enhance messaging gateway configuration and security features

- Added new environment variables for Telegram and Discord bot configurations, including `TELEGRAM_ALLOWED_USERS` and `DISCORD_ALLOWED_USERS`, to restrict bot access to specific users.
- Updated documentation in AGENTS.md and README.md to include detailed setup instructions for the messaging gateway, emphasizing the importance of user allowlists for security.
- Improved the CLI setup wizard to prompt for allowed user IDs during configuration, enhancing user guidance and security awareness.
- Refined the gateway run script to support user authorization checks, ensuring only allowed users can interact with the bot.

											
										
										
											2026-02-03 10:46:23 -08:00
+								        if global_allowlist:
-												feat: enhance user authorization checks in GatewayRunner

- Updated the authorization logic to include a per-platform allow-all flag for improved flexibility.
- Revised the order of checks to prioritize platform-specific allow-all settings, followed by environment variable allowlists and DM pairing approvals.
- Added global allow-all configuration for broader access control.
- Improved handling of allowlists by stripping whitespace and ensuring valid entries are processed.

											
										
										
											2026-02-22 16:32:08 -08:00
+								            allowed_ids.update(uid.strip() for uid in global_allowlist.split(",") if uid.strip())
-												feat: support * wildcard in platform allowlists and improve WhatsApp docs

* docs: clarify WhatsApp allowlist behavior and document WHATSAPP_ALLOW_ALL_USERS

- Add WHATSAPP_ALLOW_ALL_USERS and WHATSAPP_DEBUG to env vars reference
- Warn that * is not a wildcard and silently blocks all messages
- Show WHATSAPP_ALLOWED_USERS as optional, not required
- Update troubleshooting with the * trap and debug mode tip
- Fix Security section to mention the allow-all alternative

Prompted by a user report in Discord where WHATSAPP_ALLOWED_USERS=*
caused all incoming messages to be silently dropped at the bridge level.

* feat: support * wildcard in platform allowlists

Follow the precedent set by SIGNAL_GROUP_ALLOWED_USERS which already
supports * as an allow-all wildcard.

Bridge (allowlist.js): matchesAllowedUser() now checks for * in the
allowedUsers set before iterating sender aliases.

Gateway (run.py): _is_authorized() checks for * in allowed_ids after
parsing the allowlist. This is generic — works for all platforms, not
just WhatsApp.

Updated docs to document * as a supported value instead of warning
against it. Added WHATSAPP_ALLOW_ALL_USERS and WHATSAPP_DEBUG to
the env vars reference.

Tests: JS allowlist test + 2 Python gateway tests (WhatsApp + Telegram
to verify cross-platform behavior).
											
										
										
											2026-03-31 10:42:03 -07:00
+								        # "*" in any allowlist means allow everyone (consistent with
 								        # SIGNAL_GROUP_ALLOWED_USERS precedent)
 								        if "*" in allowed_ids:
 								            return True
-												add full support for whatsapp

											
										
										
											2026-02-25 21:04:36 -08:00
+								        check_ids = {user_id}
 								        if "@" in user_id:
 								            check_ids.add(user_id.split("@")[0])
-												fix(whatsapp): resolve LID↔phone aliases in allowlist matching (#3830)

WhatsApp DMs can arrive with LID sender IDs even when
WHATSAPP_ALLOWED_USERS is configured with phone numbers. The allowlist
check now reads bridge session mapping files (lid-mapping-*.json) to
resolve phone↔LID aliases, matching users regardless of which
identifier format the message uses.

Both the Python gateway (_is_user_authorized) and the Node bridge
(allowlist.js) now share the same mapping-file-based resolution logic.

Co-authored-by: Frederico Ribeiro <fr@tecompanytea.com>
											
										
										
											2026-03-29 18:21:50 -07:00
 								        # WhatsApp: resolve phone↔LID aliases from bridge session mapping files
 								        if source.platform == Platform.WHATSAPP:
 								            normalized_allowed_ids = set()
 								            for allowed_id in allowed_ids:
 								                normalized_allowed_ids.update(_expand_whatsapp_auth_aliases(allowed_id))
 								            if normalized_allowed_ids:
 								                allowed_ids = normalized_allowed_ids
 								            check_ids.update(_expand_whatsapp_auth_aliases(user_id))
 								            normalized_user_id = _normalize_whatsapp_identifier(user_id)
 								            if normalized_user_id:
 								                check_ids.add(normalized_user_id)
-												add full support for whatsapp

											
										
										
											2026-02-25 21:04:36 -08:00
+								        return bool(check_ids & allowed_ids)
-												feat: support ignoring unauthorized gateway DMs (#1919)

Add unauthorized_dm_behavior config (pair|ignore) with global default
and per-platform override. WhatsApp can silently drop unknown DMs
instead of sending pairing codes.

Adapted config bridging to work with gw_data dict (pre-construction)
rather than config object. Dropped implementation plan document.

Co-authored-by: Frederico Ribeiro <fr@tecompanytea.com>
											
										
										
											2026-03-18 04:06:08 -07:00
 								    def _get_unauthorized_dm_behavior(self, platform: Optional[Platform]) -> str:
 								        """Return how unauthorized DMs should be handled for a platform."""
 								        config = getattr(self, "config", None)
 								        if config and hasattr(config, "get_unauthorized_dm_behavior"):
 								            return config.get_unauthorized_dm_behavior(platform)
 								        return "pair"
-												Enhance messaging gateway configuration and security features

- Added new environment variables for Telegram and Discord bot configurations, including `TELEGRAM_ALLOWED_USERS` and `DISCORD_ALLOWED_USERS`, to restrict bot access to specific users.
- Updated documentation in AGENTS.md and README.md to include detailed setup instructions for the messaging gateway, emphasizing the importance of user allowlists for security.
- Improved the CLI setup wizard to prompt for allowed user IDs during configuration, enhancing user guidance and security awareness.
- Refined the gateway run script to support user authorization checks, ensuring only allowed users can interact with the bot.

											
										
										
											2026-02-03 10:46:23 -08:00
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								    async def _handle_message(self, event: MessageEvent) -> Optional[str]:
 								        """
 								        Handle an incoming message from any platform.
 								        This is the core message processing pipeline:
-												Enhance messaging gateway configuration and security features

- Added new environment variables for Telegram and Discord bot configurations, including `TELEGRAM_ALLOWED_USERS` and `DISCORD_ALLOWED_USERS`, to restrict bot access to specific users.
- Updated documentation in AGENTS.md and README.md to include detailed setup instructions for the messaging gateway, emphasizing the importance of user allowlists for security.
- Improved the CLI setup wizard to prompt for allowed user IDs during configuration, enhancing user guidance and security awareness.
- Refined the gateway run script to support user authorization checks, ensuring only allowed users can interact with the bot.

											
										
										
											2026-02-03 10:46:23 -08:00
+. Check user authorization
 . Check for commands (/new, /reset, etc.)
-												Implement interrupt handling for agent and CLI input and persistent prompt line at bottom of CLI :)

- Enhanced the AIAgent class to support interrupt requests, allowing for graceful interruption of ongoing tasks and processing of new messages.
- Updated the HermesCLI to manage user input in a persistent manner, enabling real-time interruption of the agent's conversation.
- Introduced a mechanism in the GatewayRunner to handle incoming messages while an agent is running, allowing for immediate response to user commands.
- Improved overall user experience by providing feedback during interruptions and ensuring that pending messages are processed correctly.

											
										
										
											2026-02-03 16:15:49 -08:00
+. Check for running agent and interrupt if needed
 . Get or create session
 . Build context for agent
 . Run agent conversation
 . Return response
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        """
 								        source = event.source
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
-												fix(gateway): prevent background process notifications from triggering false pairing requests

When a background process with notify_on_complete=True finishes, the
gateway injects a synthetic MessageEvent to notify the session. This
event was constructed without user_id, causing _is_user_authorized()
to reject it and — for DM-origin sessions — trigger the pairing flow,
sending "Hi~ I don't recognize you yet!" with a pairing code to the
chat owner.

Add an `internal` flag to MessageEvent that bypasses authorization
checks for system-generated synthetic events. Only the process watcher
sets this flag; no external/adapter code path can produce it.

Includes 4 regression tests covering the fix and the normal pairing path.

											
										
										
											2026-04-08 11:21:24 +10:00
+								        # Internal events (e.g. background-process completion notifications)
 								        # are system-generated and must skip user authorization.
 								        if getattr(event, "internal", False):
 								            pass
-												fix(gateway): propagate user identity through process watcher pipeline

Background process watchers (notify_on_complete, check_interval) created
synthetic SessionSource objects without user_id/user_name. While the
internal=True bypass (1d8d4f28) prevented false pairing for agent-
generated notifications, the missing identity caused:

- Garbage entries in pairing rate limiters (discord:None, telegram:None)
- 'User None' in approval messages and logs
- No user identity available for future code paths that need it

Additionally, platform messages arriving without from_user (Telegram
service messages, channel forwards, anonymous admin actions) could still
trigger false pairing because they are not internal events.

Fix:
1. Propagate user_id/user_name through the full watcher chain:
   session_context.py → gateway/run.py → terminal_tool.py →
   process_registry.py (including checkpoint persistence/recovery)

2. Add None user_id guard in _handle_message() — silently drop
   non-internal messages with no user identity instead of triggering
   the pairing flow.

Salvaged from PRs #7664 (kagura-agent, ContextVar approach),
#6540 (MestreY0d4-Uninter, tests), and #7709 (guang384, None guard).

Closes #6341, #6485, #7643
Relates to #6516, #7392

											
										
										
											2026-04-11 12:09:01 -07:00
+								        elif source.user_id is None:
 								            # Messages with no user identity (Telegram service messages,
 								            # channel forwards, anonymous admin actions) cannot be
 								            # authorized — drop silently instead of triggering the pairing
 								            # flow with a None user_id.
 								            logger.debug("Ignoring message with no user_id from %s", source.platform.value)
 								            return None
-												fix(gateway): prevent background process notifications from triggering false pairing requests

When a background process with notify_on_complete=True finishes, the
gateway injects a synthetic MessageEvent to notify the session. This
event was constructed without user_id, causing _is_user_authorized()
to reject it and — for DM-origin sessions — trigger the pairing flow,
sending "Hi~ I don't recognize you yet!" with a pairing code to the
chat owner.

Add an `internal` flag to MessageEvent that bypasses authorization
checks for system-generated synthetic events. Only the process watcher
sets this flag; no external/adapter code path can produce it.

Includes 4 regression tests covering the fix and the normal pairing path.

											
										
										
											2026-04-08 11:21:24 +10:00
+								        elif not self._is_user_authorized(source):
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								            logger.warning("Unauthorized user: %s (%s) on %s", source.user_id, source.user_name, source.platform.value)
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
+								            # In DMs: offer pairing code. In groups: silently ignore.
-												feat: support ignoring unauthorized gateway DMs (#1919)

Add unauthorized_dm_behavior config (pair|ignore) with global default
and per-platform override. WhatsApp can silently drop unknown DMs
instead of sending pairing codes.

Adapted config bridging to work with gw_data dict (pre-construction)
rather than config object. Dropped implementation plan document.

Co-authored-by: Frederico Ribeiro <fr@tecompanytea.com>
											
										
										
											2026-03-18 04:06:08 -07:00
+								            if source.chat_type == "dm" and self._get_unauthorized_dm_behavior(source.platform) == "pair":
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
+								                platform_name = source.platform.value if source.platform else "unknown"
-												fix: rate-limit pairing rejection messages to prevent spam (#4081)

* fix: rate-limit pairing rejection messages to prevent spam

When generate_code() returns None (rate limited or max pending), the
"Too many pairing requests" message was sent on every subsequent DM
with no cooldown. A user sending 30 messages would get 30 rejection
replies — reported as potential hack on WhatsApp.

Now check _is_rate_limited() before any pairing response, and record
rate limit after sending a rejection. Subsequent messages from the
same user are silently ignored until the rate limit window expires.

* test: add coverage for pairing response rate limiting

Follow-up to cherry-picked PR #4042 — adds tests verifying:
- Rate-limited users get silently ignored (no response sent)
- Rejection messages record rate limit for subsequent suppression

---------

Co-authored-by: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com>
											
										
										
											2026-03-30 16:48:00 -07:00
+								                # Rate-limit ALL pairing responses (code or rejection) to
 								                # prevent spamming the user with repeated messages when
 								                # multiple DMs arrive in quick succession.
 								                if self.pairing_store._is_rate_limited(platform_name, source.user_id):
 								                    return None
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
+								                code = self.pairing_store.generate_code(
 								                    platform_name, source.user_id, source.user_name or ""
 								                )
 								                if code:
 								                    adapter = self.adapters.get(source.platform)
 								                    if adapter:
 								                        await adapter.send(
 								                            source.chat_id,
 								                            f"Hi~ I don't recognize you yet!\n\n"
 								                            f"Here's your pairing code: `{code}`\n\n"
 								                            f"Ask the bot owner to run:\n"
 								                            f"`hermes pairing approve {platform_name} {code}`"
 								                        )
 								                else:
 								                    adapter = self.adapters.get(source.platform)
 								                    if adapter:
 								                        await adapter.send(
 								                            source.chat_id,
 								                            "Too many pairing requests right now~ "
 								                            "Please try again later!"
 								                        )
-												fix: rate-limit pairing rejection messages to prevent spam (#4081)

* fix: rate-limit pairing rejection messages to prevent spam

When generate_code() returns None (rate limited or max pending), the
"Too many pairing requests" message was sent on every subsequent DM
with no cooldown. A user sending 30 messages would get 30 rejection
replies — reported as potential hack on WhatsApp.

Now check _is_rate_limited() before any pairing response, and record
rate limit after sending a rejection. Subsequent messages from the
same user are silently ignored until the rate limit window expires.

* test: add coverage for pairing response rate limiting

Follow-up to cherry-picked PR #4042 — adds tests verifying:
- Rate-limited users get silently ignored (no response sent)
- Rejection messages record rate limit for subsequent suppression

---------

Co-authored-by: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com>
											
										
										
											2026-03-30 16:48:00 -07:00
+								                    # Record rate limit so subsequent messages are silently ignored
 								                    self.pairing_store._record_rate_limit(platform_name, source.user_id)
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
+								            return None
-												Enhance messaging gateway configuration and security features

- Added new environment variables for Telegram and Discord bot configurations, including `TELEGRAM_ALLOWED_USERS` and `DISCORD_ALLOWED_USERS`, to restrict bot access to specific users.
- Updated documentation in AGENTS.md and README.md to include detailed setup instructions for the messaging gateway, emphasizing the importance of user allowlists for security.
- Improved the CLI setup wizard to prompt for allowed user IDs during configuration, enhancing user guidance and security awareness.
- Refined the gateway run script to support user authorization checks, ensuring only allowed users can interact with the bot.

											
										
										
											2026-02-03 10:46:23 -08:00
-												feat(gateway): live-stream /update output + interactive prompt buttons (#5180)

* feat(gateway): live-stream /update output + forward interactive prompts

Adds real-time output streaming and interactive prompt forwarding for
the gateway /update command, so users on Telegram/Discord/etc see the
full update progress and can respond to prompts (stash restore, config
migration) without needing terminal access.

Changes:

hermes_cli/main.py:
- Add --gateway flag to 'hermes update' argparse
- Add _gateway_prompt() file-based IPC function that writes
  .update_prompt.json and polls for .update_response
- Modify _restore_stashed_changes() to accept optional input_fn
  parameter for gateway mode prompt forwarding
- cmd_update() uses _gateway_prompt when --gateway is set, enabling
  interactive stash restore and config migration prompts

gateway/run.py:
- _handle_update_command: spawn with --gateway flag and
  PYTHONUNBUFFERED=1 for real-time output flushing
- Store session_key in .update_pending.json for cross-restart
  session matching
- Add _update_prompt_pending dict to track sessions awaiting
  update prompt responses
- Replace _watch_for_update_completion with _watch_update_progress:
  streams output chunks every ~4s, detects .update_prompt.json and
  forwards prompts to the user, handles completion/failure/timeout
- Add update prompt interception in _handle_message: when a prompt
  is pending, the user's next message is written to .update_response
  instead of being processed normally
- Preserve _send_update_notification as legacy fallback for
  post-restart cases where adapter isn't available yet

File-based IPC protocol:
- .update_prompt.json: written by update process with prompt text,
  default value, and unique ID
- .update_response: written by gateway with user's answer
- .update_output.txt: existing, now streamed in real-time
- .update_exit_code: existing completion marker

Tests: 16 new tests covering _gateway_prompt IPC, output streaming,
prompt detection/forwarding, message interception, and cleanup.

* feat: interactive buttons for update prompts (Telegram + Discord)

Telegram: Inline keyboard with ✓ Yes / ✗ No buttons. Clicking a button
answers the callback query, edits the message to show the choice, and
writes .update_response directly. CallbackQueryHandler registered on
the update_prompt: prefix.

Discord: UpdatePromptView (discord.ui.View) with green Yes / red No
buttons. Follows the ExecApprovalView pattern — auth check, embed color
update, disabled-after-click. Writes .update_response on click.

All platforms: /approve and /deny (and /yes, /no) now work as shorthand
for yes/no when an update prompt is pending. The text fallback message
instructs users to use these commands. Raw message interception still
works as a fallback for non-command responses.

Gateway watcher checks adapter for send_update_prompt method (class-level
check to avoid MagicMock false positives) and falls back to text prompt
with /approve instructions when unavailable.

* fix: block /update on non-messaging platforms (API, webhooks, ACP)

Add _UPDATE_ALLOWED_PLATFORMS frozenset that explicitly lists messaging
platforms where /update is permitted. API server, webhook, and ACP
platforms get a clear error directing them to run hermes update from
the terminal instead.

ACP and API server already don't reach _handle_message (separate
codepaths), and webhooks have distinct session keys that can't collide
with messaging sessions. This guard is belt-and-suspenders.
											
										
										
											2026-04-05 00:28:58 -07:00
+								        # Intercept messages that are responses to a pending /update prompt.
 								        # The update process (detached) wrote .update_prompt.json; the watcher
 								        # forwarded it to the user; now the user's reply goes back via
 								        # .update_response so the update process can continue.
 								        _quick_key = self._session_key_for_source(source)
 								        _update_prompts = getattr(self, "_update_prompt_pending", {})
 								        if _update_prompts.get(_quick_key):
 								            raw = (event.text or "").strip()
 								            # Accept /approve and /deny as shorthand for yes/no
 								            cmd = event.get_command()
 								            if cmd in ("approve", "yes"):
 								                response_text = "y"
 								            elif cmd in ("deny", "no"):
 								                response_text = "n"
 								            else:
 								                response_text = raw
 								            if response_text:
 								                response_path = _hermes_home / ".update_response"
 								                try:
 								                    tmp = response_path.with_suffix(".tmp")
 								                    tmp.write_text(response_text)
 								                    tmp.replace(response_path)
 								                except OSError as e:
 								                    logger.warning("Failed to write update response: %s", e)
 								                    return f"✗ Failed to send response to update process: {e}"
 								                _update_prompts.pop(_quick_key, None)
 								                label = response_text if len(response_text) <= 20 else response_text[:20] + "…"
 								                return f"✓ Sent `{label}` to the update process."
-												fix(gateway): prevent telegram photo burst interrupts

											
										
										
											2026-03-15 11:58:19 +05:30
+								        # PRIORITY handling when an agent is already running for this session.
 								        # Default behavior is to interrupt immediately so user text/stop messages
 								        # are handled with minimal latency.
 								        #
 								        # Special case: Telegram/photo bursts often arrive as multiple near-
 								        # simultaneous updates. Do NOT interrupt for photo-only follow-ups here;
 								        # let the adapter-level batching/queueing logic absorb them.
-												fix(gateway): prevent stuck sessions with agent timeout and staleness eviction

Three changes to prevent sessions from getting permanently locked:

1. Agent execution timeout (HERMES_AGENT_TIMEOUT, default 10min):
   Wraps run_in_executor with asyncio.wait_for so a hung API call or
   runaway tool can't lock a session indefinitely. On timeout, the
   agent is interrupted and the user gets an actionable error message.

2. Staleness eviction for _running_agents:
   Tracks start timestamps for each session entry. When a new message
   arrives and the entry is older than timeout + 1min grace, it's
   evicted as a leaked lock. Safety net for any cleanup path that
   fails to remove the entry.

3. Cron job timeout (HERMES_CRON_TIMEOUT, default 10min):
   Wraps run_conversation in a ThreadPoolExecutor with timeout so a
   hung cron job doesn't block the ticker thread (and all subsequent
   cron jobs) indefinitely.

Follows grammY runner's per-update timeout pattern and aiogram's
asyncio.wait_for approach for handler deadlines.

											
										
										
											2026-04-02 22:52:52 +05:30
-												fix(gateway): replace wall-clock agent timeout with inactivity-based timeout (#5389)

The gateway previously used a hard wall-clock asyncio.wait_for timeout
that killed agents after a fixed duration regardless of activity. This
punished legitimate long-running tasks (subagent delegation, reasoning
models, multi-step research).

Now uses an inactivity-based polling loop that checks the agent's
built-in activity tracker (get_activity_summary) every 5 seconds. The
agent can run indefinitely as long as it's actively calling tools or
receiving API responses. Only fires when the agent has been completely
idle for the configured duration.

Changes:
- Replace asyncio.wait_for with asyncio.wait poll loop checking
  agent idle time via get_activity_summary()
- Add agent.gateway_timeout config.yaml key (default 1800s, 0=unlimited)
- Update stale session eviction to use agent idle time instead of
  pure wall-clock (prevents evicting active long-running tasks)
- Preserve all existing diagnostic logging and user-facing context

Inspired by PR #4864 (Mibayy) and issue #4815 (BongSuCHOI).
Reimplemented on current main using existing _touch_activity()
infrastructure rather than a parallel tracker.
											
										
										
											2026-04-05 19:38:21 -07:00
+								        # Staleness eviction: detect leaked locks from hung/crashed handlers.
 								        # With inactivity-based timeout, active tasks can run for hours, so
 								        # wall-clock age alone isn't sufficient.  Evict only when the agent
 								        # has been *idle* beyond the inactivity threshold (or when the agent
 								        # object has no activity tracker and wall-clock age is extreme).
-												fix: increase default HERMES_AGENT_TIMEOUT from 10min to 30min

Users hitting the 10-minute default during complex tool chains.
Bumps both the execution cap and stale-lock eviction timeout.
Still overridable via HERMES_AGENT_TIMEOUT env var (0 = unlimited).

											
										
										
											2026-04-05 10:32:48 -07:00
+								        _raw_stale_timeout = float(os.getenv("HERMES_AGENT_TIMEOUT", 1800))
-												refactor: simplify and harden PR fixes after review

- Fix cron ThreadPoolExecutor blocking on timeout: use shutdown(wait=False,
  cancel_futures=True) instead of context manager that waits indefinitely
- Extract _dequeue_pending_text() to deduplicate media-placeholder logic
  in interrupt and normal-completion dequeue paths
- Remove hasattr guards for _running_agents_ts: add class-level default
  so partial test construction works without scattered defensive checks
- Move `import concurrent.futures` to top of cron/scheduler.py
- Progress throttle: sleep remaining interval instead of busy-looping
  0.1s (~15 wakeups per 1.5s window → 1 wakeup)
- Deduplicate _load_stt_config() in transcription_tools.py:
  _has_openai_audio_backend() now delegates to _resolve_openai_audio_client_config()

											
										
										
											2026-04-02 23:41:38 +05:30
+								        _stale_ts = self._running_agents_ts.get(_quick_key, 0)
-												fix(gateway): replace wall-clock agent timeout with inactivity-based timeout (#5389)

The gateway previously used a hard wall-clock asyncio.wait_for timeout
that killed agents after a fixed duration regardless of activity. This
punished legitimate long-running tasks (subagent delegation, reasoning
models, multi-step research).

Now uses an inactivity-based polling loop that checks the agent's
built-in activity tracker (get_activity_summary) every 5 seconds. The
agent can run indefinitely as long as it's actively calling tools or
receiving API responses. Only fires when the agent has been completely
idle for the configured duration.

Changes:
- Replace asyncio.wait_for with asyncio.wait poll loop checking
  agent idle time via get_activity_summary()
- Add agent.gateway_timeout config.yaml key (default 1800s, 0=unlimited)
- Update stale session eviction to use agent idle time instead of
  pure wall-clock (prevents evicting active long-running tasks)
- Preserve all existing diagnostic logging and user-facing context

Inspired by PR #4864 (Mibayy) and issue #4815 (BongSuCHOI).
Reimplemented on current main using existing _touch_activity()
infrastructure rather than a parallel tracker.
											
										
										
											2026-04-05 19:38:21 -07:00
+								        if _quick_key in self._running_agents and _stale_ts:
-												fix: improve timeout debug logging and user-facing diagnostics (#5370)

Agent activity tracking:
- Add _last_activity_ts, _last_activity_desc, _current_tool to AIAgent
- Touch activity on: API call start/complete, tool start/complete,
  first stream chunk, streaming request start
- Public get_activity_summary() method for external consumers

Gateway timeout diagnostics:
- Timeout message now includes what the agent was doing when killed:
  actively working vs stuck on a tool vs waiting on API response
- Includes iteration count, last activity description, seconds since
  last activity — users can distinguish legitimate long tasks from
  genuine hangs
- 'Still working' notifications now show iteration count and current
  tool instead of just elapsed time
- Stale lock eviction logs include agent activity state for debugging

Stream stale timeout:
- _emit_status when stale stream is detected (was log-only) — gateway
  users now see 'No response from provider for Ns' with model and
  context size
- Improved logger.warning with model name and estimated context size

Error path notifications (gateway-visible via _emit_status):
- Context compression attempts now use _emit_status (was _vprint only)
- Non-retryable client errors emit summary before aborting
- Max retry exhaustion emits error summary (was _vprint only)
- Rate limit exhaustion emits specific rate-limit message

These were all CLI-visible but silent to gateway users, which is why
people on Telegram/Discord saw generic 'request failed' messages
without explanation.
											
										
										
											2026-04-05 18:33:33 -07:00
+								            _stale_age = time.time() - _stale_ts
 								            _stale_agent = self._running_agents.get(_quick_key)
-												fix: repair 57 failing CI tests across 14 files (#5823)

* fix: repair 57 failing CI tests across 14 files

Categories of fixes:

**Test isolation under xdist (-n auto):**
- test_hermes_logging: Strip ALL RotatingFileHandlers before each test
  to prevent handlers leaked from other xdist workers from polluting counts
- test_code_execution: Force TERMINAL_ENV=local in setUp — prevents Modal
  AuthError when another test leaks TERMINAL_ENV=modal
- test_timezone: Same TERMINAL_ENV fix for execute_code timezone tests
- test_codex_execution_paths: Mock _resolve_turn_agent_config to ensure
  model resolution works regardless of xdist worker state

**Matrix adapter tests (nio not installed in CI):**
- Add _make_fake_nio() helper with real response classes for isinstance()
  checks in production code
- Replace MagicMock(spec=nio.XxxResponse) with fake_nio instances
- Wrap production method calls with patch.dict('sys.modules', {'nio': ...})
  so import nio succeeds in method bodies
- Use try/except instead of pytest.importorskip for nio.crypto imports
  (importorskip can be fooled by MagicMock in sys.modules)
- test_matrix_voice: Skip entire file if nio is a mock, not just missing

**Stale test expectations:**
- test_cli_provider_resolution: _prompt_provider_choice now takes **kwargs
  (default param added); mock getpass.getpass alongside input
- test_anthropic_oauth_flow: Mock getpass.getpass (code switched from input)
- test_gemini_provider: Mock models.dev + OpenRouter API lookups to test
  hardcoded defaults without external API variance
- test_code_execution: Add notify_on_complete to blocked terminal params
- test_setup_openclaw_migration: Mock prompt_choice to select 'Full setup'
  (new quick-setup path leads to _require_tty → sys.exit in CI)
- test_skill_manager_tool: Patch get_all_skills_dirs alongside SKILLS_DIR
  so _find_skill searches tmp_path, not real ~/.hermes/skills/

**Missing attributes in object.__new__ test runners:**
- test_platform_reconnect: Add session_store to _make_runner()
- test_session_race_guard: Add hooks, _running_agents_ts, session_store,
  delivery_router to _make_runner()

**Production bug fix (gateway/run.py):**
- Fix sentinel eviction race: _AGENT_PENDING_SENTINEL was immediately
  evicted by the stale-detection logic because sentinels have no
  get_activity_summary() method, causing _stale_idle=inf >= timeout.
  Guard _should_evict with 'is not _AGENT_PENDING_SENTINEL'.

* fix: address remaining CI failures

- test_setup_openclaw_migration: Also mock _offer_launch_chat (called at
  end of both quick and full setup paths)
- test_code_execution: Move TERMINAL_ENV=local to module level to protect
  ALL test classes (TestEnvVarFiltering, TestExecuteCodeEdgeCases,
  TestInterruptHandling, TestHeadTailTruncation) from xdist env leaks
- test_matrix: Use try/except for nio.crypto imports (importorskip can be
  fooled by MagicMock in sys.modules under xdist)
											
										
										
											2026-04-07 09:58:45 -07:00
+								            # Never evict the pending sentinel — it was just placed moments
 								            # ago during the async setup phase before the real agent is
 								            # created.  Sentinels have no get_activity_summary(), so the
 								            # idle check below would always evaluate to inf >= timeout and
 								            # immediately evict them, racing with the setup path.
-												fix(gateway): replace wall-clock agent timeout with inactivity-based timeout (#5389)

The gateway previously used a hard wall-clock asyncio.wait_for timeout
that killed agents after a fixed duration regardless of activity. This
punished legitimate long-running tasks (subagent delegation, reasoning
models, multi-step research).

Now uses an inactivity-based polling loop that checks the agent's
built-in activity tracker (get_activity_summary) every 5 seconds. The
agent can run indefinitely as long as it's actively calling tools or
receiving API responses. Only fires when the agent has been completely
idle for the configured duration.

Changes:
- Replace asyncio.wait_for with asyncio.wait poll loop checking
  agent idle time via get_activity_summary()
- Add agent.gateway_timeout config.yaml key (default 1800s, 0=unlimited)
- Update stale session eviction to use agent idle time instead of
  pure wall-clock (prevents evicting active long-running tasks)
- Preserve all existing diagnostic logging and user-facing context

Inspired by PR #4864 (Mibayy) and issue #4815 (BongSuCHOI).
Reimplemented on current main using existing _touch_activity()
infrastructure rather than a parallel tracker.
											
										
										
											2026-04-05 19:38:21 -07:00
+								            _stale_idle = float("inf")  # assume idle if we can't check
-												fix: improve timeout debug logging and user-facing diagnostics (#5370)

Agent activity tracking:
- Add _last_activity_ts, _last_activity_desc, _current_tool to AIAgent
- Touch activity on: API call start/complete, tool start/complete,
  first stream chunk, streaming request start
- Public get_activity_summary() method for external consumers

Gateway timeout diagnostics:
- Timeout message now includes what the agent was doing when killed:
  actively working vs stuck on a tool vs waiting on API response
- Includes iteration count, last activity description, seconds since
  last activity — users can distinguish legitimate long tasks from
  genuine hangs
- 'Still working' notifications now show iteration count and current
  tool instead of just elapsed time
- Stale lock eviction logs include agent activity state for debugging

Stream stale timeout:
- _emit_status when stale stream is detected (was log-only) — gateway
  users now see 'No response from provider for Ns' with model and
  context size
- Improved logger.warning with model name and estimated context size

Error path notifications (gateway-visible via _emit_status):
- Context compression attempts now use _emit_status (was _vprint only)
- Non-retryable client errors emit summary before aborting
- Max retry exhaustion emits error summary (was _vprint only)
- Rate limit exhaustion emits specific rate-limit message

These were all CLI-visible but silent to gateway users, which is why
people on Telegram/Discord saw generic 'request failed' messages
without explanation.
											
										
										
											2026-04-05 18:33:33 -07:00
+								            _stale_detail = ""
 								            if _stale_agent and hasattr(_stale_agent, "get_activity_summary"):
 								                try:
 								                    _sa = _stale_agent.get_activity_summary()
-												fix(gateway): replace wall-clock agent timeout with inactivity-based timeout (#5389)

The gateway previously used a hard wall-clock asyncio.wait_for timeout
that killed agents after a fixed duration regardless of activity. This
punished legitimate long-running tasks (subagent delegation, reasoning
models, multi-step research).

Now uses an inactivity-based polling loop that checks the agent's
built-in activity tracker (get_activity_summary) every 5 seconds. The
agent can run indefinitely as long as it's actively calling tools or
receiving API responses. Only fires when the agent has been completely
idle for the configured duration.

Changes:
- Replace asyncio.wait_for with asyncio.wait poll loop checking
  agent idle time via get_activity_summary()
- Add agent.gateway_timeout config.yaml key (default 1800s, 0=unlimited)
- Update stale session eviction to use agent idle time instead of
  pure wall-clock (prevents evicting active long-running tasks)
- Preserve all existing diagnostic logging and user-facing context

Inspired by PR #4864 (Mibayy) and issue #4815 (BongSuCHOI).
Reimplemented on current main using existing _touch_activity()
infrastructure rather than a parallel tracker.
											
										
										
											2026-04-05 19:38:21 -07:00
+								                    _stale_idle = _sa.get("seconds_since_activity", float("inf"))
-												fix: improve timeout debug logging and user-facing diagnostics (#5370)

Agent activity tracking:
- Add _last_activity_ts, _last_activity_desc, _current_tool to AIAgent
- Touch activity on: API call start/complete, tool start/complete,
  first stream chunk, streaming request start
- Public get_activity_summary() method for external consumers

Gateway timeout diagnostics:
- Timeout message now includes what the agent was doing when killed:
  actively working vs stuck on a tool vs waiting on API response
- Includes iteration count, last activity description, seconds since
  last activity — users can distinguish legitimate long tasks from
  genuine hangs
- 'Still working' notifications now show iteration count and current
  tool instead of just elapsed time
- Stale lock eviction logs include agent activity state for debugging

Stream stale timeout:
- _emit_status when stale stream is detected (was log-only) — gateway
  users now see 'No response from provider for Ns' with model and
  context size
- Improved logger.warning with model name and estimated context size

Error path notifications (gateway-visible via _emit_status):
- Context compression attempts now use _emit_status (was _vprint only)
- Non-retryable client errors emit summary before aborting
- Max retry exhaustion emits error summary (was _vprint only)
- Rate limit exhaustion emits specific rate-limit message

These were all CLI-visible but silent to gateway users, which is why
people on Telegram/Discord saw generic 'request failed' messages
without explanation.
											
										
										
											2026-04-05 18:33:33 -07:00
+								                    _stale_detail = (
 								                        f" | last_activity={_sa.get('last_activity_desc', 'unknown')} "
-												fix(gateway): replace wall-clock agent timeout with inactivity-based timeout (#5389)

The gateway previously used a hard wall-clock asyncio.wait_for timeout
that killed agents after a fixed duration regardless of activity. This
punished legitimate long-running tasks (subagent delegation, reasoning
models, multi-step research).

Now uses an inactivity-based polling loop that checks the agent's
built-in activity tracker (get_activity_summary) every 5 seconds. The
agent can run indefinitely as long as it's actively calling tools or
receiving API responses. Only fires when the agent has been completely
idle for the configured duration.

Changes:
- Replace asyncio.wait_for with asyncio.wait poll loop checking
  agent idle time via get_activity_summary()
- Add agent.gateway_timeout config.yaml key (default 1800s, 0=unlimited)
- Update stale session eviction to use agent idle time instead of
  pure wall-clock (prevents evicting active long-running tasks)
- Preserve all existing diagnostic logging and user-facing context

Inspired by PR #4864 (Mibayy) and issue #4815 (BongSuCHOI).
Reimplemented on current main using existing _touch_activity()
infrastructure rather than a parallel tracker.
											
										
										
											2026-04-05 19:38:21 -07:00
+								                        f"({_stale_idle:.0f}s ago) "
-												fix: improve timeout debug logging and user-facing diagnostics (#5370)

Agent activity tracking:
- Add _last_activity_ts, _last_activity_desc, _current_tool to AIAgent
- Touch activity on: API call start/complete, tool start/complete,
  first stream chunk, streaming request start
- Public get_activity_summary() method for external consumers

Gateway timeout diagnostics:
- Timeout message now includes what the agent was doing when killed:
  actively working vs stuck on a tool vs waiting on API response
- Includes iteration count, last activity description, seconds since
  last activity — users can distinguish legitimate long tasks from
  genuine hangs
- 'Still working' notifications now show iteration count and current
  tool instead of just elapsed time
- Stale lock eviction logs include agent activity state for debugging

Stream stale timeout:
- _emit_status when stale stream is detected (was log-only) — gateway
  users now see 'No response from provider for Ns' with model and
  context size
- Improved logger.warning with model name and estimated context size

Error path notifications (gateway-visible via _emit_status):
- Context compression attempts now use _emit_status (was _vprint only)
- Non-retryable client errors emit summary before aborting
- Max retry exhaustion emits error summary (was _vprint only)
- Rate limit exhaustion emits specific rate-limit message

These were all CLI-visible but silent to gateway users, which is why
people on Telegram/Discord saw generic 'request failed' messages
without explanation.
											
										
										
											2026-04-05 18:33:33 -07:00
+								                        f"| iteration={_sa.get('api_call_count', 0)}/{_sa.get('max_iterations', 0)}"
 								                    )
 								                except Exception:
 								                    pass
-												fix(gateway): replace wall-clock agent timeout with inactivity-based timeout (#5389)

The gateway previously used a hard wall-clock asyncio.wait_for timeout
that killed agents after a fixed duration regardless of activity. This
punished legitimate long-running tasks (subagent delegation, reasoning
models, multi-step research).

Now uses an inactivity-based polling loop that checks the agent's
built-in activity tracker (get_activity_summary) every 5 seconds. The
agent can run indefinitely as long as it's actively calling tools or
receiving API responses. Only fires when the agent has been completely
idle for the configured duration.

Changes:
- Replace asyncio.wait_for with asyncio.wait poll loop checking
  agent idle time via get_activity_summary()
- Add agent.gateway_timeout config.yaml key (default 1800s, 0=unlimited)
- Update stale session eviction to use agent idle time instead of
  pure wall-clock (prevents evicting active long-running tasks)
- Preserve all existing diagnostic logging and user-facing context

Inspired by PR #4864 (Mibayy) and issue #4815 (BongSuCHOI).
Reimplemented on current main using existing _touch_activity()
infrastructure rather than a parallel tracker.
											
										
										
											2026-04-05 19:38:21 -07:00
+								            # Evict if: agent is idle beyond timeout, OR wall-clock age is
 								            # extreme (10x timeout or 2h, whichever is larger — catches
 								            # cases where the agent object was garbage-collected).
 								            _wall_ttl = max(_raw_stale_timeout * 10, 7200) if _raw_stale_timeout > 0 else float("inf")
 								            _should_evict = (
-												fix: repair 57 failing CI tests across 14 files (#5823)

* fix: repair 57 failing CI tests across 14 files

Categories of fixes:

**Test isolation under xdist (-n auto):**
- test_hermes_logging: Strip ALL RotatingFileHandlers before each test
  to prevent handlers leaked from other xdist workers from polluting counts
- test_code_execution: Force TERMINAL_ENV=local in setUp — prevents Modal
  AuthError when another test leaks TERMINAL_ENV=modal
- test_timezone: Same TERMINAL_ENV fix for execute_code timezone tests
- test_codex_execution_paths: Mock _resolve_turn_agent_config to ensure
  model resolution works regardless of xdist worker state

**Matrix adapter tests (nio not installed in CI):**
- Add _make_fake_nio() helper with real response classes for isinstance()
  checks in production code
- Replace MagicMock(spec=nio.XxxResponse) with fake_nio instances
- Wrap production method calls with patch.dict('sys.modules', {'nio': ...})
  so import nio succeeds in method bodies
- Use try/except instead of pytest.importorskip for nio.crypto imports
  (importorskip can be fooled by MagicMock in sys.modules)
- test_matrix_voice: Skip entire file if nio is a mock, not just missing

**Stale test expectations:**
- test_cli_provider_resolution: _prompt_provider_choice now takes **kwargs
  (default param added); mock getpass.getpass alongside input
- test_anthropic_oauth_flow: Mock getpass.getpass (code switched from input)
- test_gemini_provider: Mock models.dev + OpenRouter API lookups to test
  hardcoded defaults without external API variance
- test_code_execution: Add notify_on_complete to blocked terminal params
- test_setup_openclaw_migration: Mock prompt_choice to select 'Full setup'
  (new quick-setup path leads to _require_tty → sys.exit in CI)
- test_skill_manager_tool: Patch get_all_skills_dirs alongside SKILLS_DIR
  so _find_skill searches tmp_path, not real ~/.hermes/skills/

**Missing attributes in object.__new__ test runners:**
- test_platform_reconnect: Add session_store to _make_runner()
- test_session_race_guard: Add hooks, _running_agents_ts, session_store,
  delivery_router to _make_runner()

**Production bug fix (gateway/run.py):**
- Fix sentinel eviction race: _AGENT_PENDING_SENTINEL was immediately
  evicted by the stale-detection logic because sentinels have no
  get_activity_summary() method, causing _stale_idle=inf >= timeout.
  Guard _should_evict with 'is not _AGENT_PENDING_SENTINEL'.

* fix: address remaining CI failures

- test_setup_openclaw_migration: Also mock _offer_launch_chat (called at
  end of both quick and full setup paths)
- test_code_execution: Move TERMINAL_ENV=local to module level to protect
  ALL test classes (TestEnvVarFiltering, TestExecuteCodeEdgeCases,
  TestInterruptHandling, TestHeadTailTruncation) from xdist env leaks
- test_matrix: Use try/except for nio.crypto imports (importorskip can be
  fooled by MagicMock in sys.modules under xdist)
											
										
										
											2026-04-07 09:58:45 -07:00
+								                _stale_agent is not _AGENT_PENDING_SENTINEL
 								                and (
 								                    (_raw_stale_timeout > 0 and _stale_idle >= _raw_stale_timeout)
 								                    or _stale_age > _wall_ttl
 								                )
-												fix(gateway): prevent stuck sessions with agent timeout and staleness eviction

Three changes to prevent sessions from getting permanently locked:

1. Agent execution timeout (HERMES_AGENT_TIMEOUT, default 10min):
   Wraps run_in_executor with asyncio.wait_for so a hung API call or
   runaway tool can't lock a session indefinitely. On timeout, the
   agent is interrupted and the user gets an actionable error message.

2. Staleness eviction for _running_agents:
   Tracks start timestamps for each session entry. When a new message
   arrives and the entry is older than timeout + 1min grace, it's
   evicted as a leaked lock. Safety net for any cleanup path that
   fails to remove the entry.

3. Cron job timeout (HERMES_CRON_TIMEOUT, default 10min):
   Wraps run_conversation in a ThreadPoolExecutor with timeout so a
   hung cron job doesn't block the ticker thread (and all subsequent
   cron jobs) indefinitely.

Follows grammY runner's per-update timeout pattern and aiogram's
asyncio.wait_for approach for handler deadlines.

											
										
										
											2026-04-02 22:52:52 +05:30
+								            )
-												fix(gateway): replace wall-clock agent timeout with inactivity-based timeout (#5389)

The gateway previously used a hard wall-clock asyncio.wait_for timeout
that killed agents after a fixed duration regardless of activity. This
punished legitimate long-running tasks (subagent delegation, reasoning
models, multi-step research).

Now uses an inactivity-based polling loop that checks the agent's
built-in activity tracker (get_activity_summary) every 5 seconds. The
agent can run indefinitely as long as it's actively calling tools or
receiving API responses. Only fires when the agent has been completely
idle for the configured duration.

Changes:
- Replace asyncio.wait_for with asyncio.wait poll loop checking
  agent idle time via get_activity_summary()
- Add agent.gateway_timeout config.yaml key (default 1800s, 0=unlimited)
- Update stale session eviction to use agent idle time instead of
  pure wall-clock (prevents evicting active long-running tasks)
- Preserve all existing diagnostic logging and user-facing context

Inspired by PR #4864 (Mibayy) and issue #4815 (BongSuCHOI).
Reimplemented on current main using existing _touch_activity()
infrastructure rather than a parallel tracker.
											
										
										
											2026-04-05 19:38:21 -07:00
+								            if _should_evict:
 								                logger.warning(
 								                    "Evicting stale _running_agents entry for %s "
 								                    "(age: %.0fs, idle: %.0fs, timeout: %.0fs)%s",
 								                    _quick_key[:30], _stale_age, _stale_idle,
 								                    _raw_stale_timeout, _stale_detail,
 								                )
 								                del self._running_agents[_quick_key]
 								                self._running_agents_ts.pop(_quick_key, None)
-												fix: interrupt agent immediately when user messages during active run (#10068)

When a user sends a message while the agent is executing a task on the
gateway, the agent is now interrupted immediately — not silently queued.
Previously, messages were stored in _pending_messages with zero feedback
to the user, potentially leaving them waiting 1+ hours.

Root cause: Level 1 guard (base.py) intercepted all messages for active
sessions and returned with no response. Level 2 (gateway/run.py) which
calls agent.interrupt() was never reached.

Fix: Expand _handle_active_session_busy_message to handle the normal
(non-draining) case:
  1. Call running_agent.interrupt(text) to abort in-flight tool calls
     and exit the agent loop at the next check point
  2. Store the message as pending so it becomes the next turn once the
     interrupted run returns
  3. Send a brief ack: 'Interrupting current task (10 min elapsed,
     iteration 21/60, running: terminal). I'll respond shortly.'
  4. Debounce acks to once per 30s to avoid spam on rapid messages

Reported by @Lonely__MH.
											
										
										
											2026-04-14 22:07:28 -07:00
+								                self._busy_ack_ts.pop(_quick_key, None)
-												fix(gateway): prevent stuck sessions with agent timeout and staleness eviction

Three changes to prevent sessions from getting permanently locked:

1. Agent execution timeout (HERMES_AGENT_TIMEOUT, default 10min):
   Wraps run_in_executor with asyncio.wait_for so a hung API call or
   runaway tool can't lock a session indefinitely. On timeout, the
   agent is interrupted and the user gets an actionable error message.

2. Staleness eviction for _running_agents:
   Tracks start timestamps for each session entry. When a new message
   arrives and the entry is older than timeout + 1min grace, it's
   evicted as a leaked lock. Safety net for any cleanup path that
   fails to remove the entry.

3. Cron job timeout (HERMES_CRON_TIMEOUT, default 10min):
   Wraps run_conversation in a ThreadPoolExecutor with timeout so a
   hung cron job doesn't block the ticker thread (and all subsequent
   cron jobs) indefinitely.

Follows grammY runner's per-update timeout pattern and aiogram's
asyncio.wait_for approach for handler deadlines.

											
										
										
											2026-04-02 22:52:52 +05:30
-												feat: enhance interrupt handling and container resource configuration

- Introduced a shared interrupt signaling mechanism to allow tools to check for user interrupts during long-running operations.
- Updated the AIAgent to handle interrupts more effectively, ensuring in-progress tool calls are canceled and multiple interrupt messages are combined into one prompt.
- Enhanced the CLI configuration to include container resource limits (CPU, memory, disk) and persistence options for Docker, Singularity, and Modal environments.
- Improved documentation to clarify interrupt behaviors and container resource settings, providing users with better guidance on configuration and usage.

											
										
										
											2026-02-23 02:11:33 -08:00
+								        if _quick_key in self._running_agents:
-												fix(gateway): make /status report live state and tokens (#1476)
											
										
										
											2026-03-15 19:18:58 -07:00
+								            if event.get_command() == "status":
 								                return await self._handle_status_command(event)
-												fix(gateway): recover from hung agents — /stop force-unlocks session (#3104)

When an agent thread hangs (truly blocked, never checks _interrupt_requested),
/stop now force-cleans _running_agents to unlock the session immediately.

Two changes:
- Early /stop intercept in the running-agent guard: bypasses normal command
  dispatch to force-interrupt and unlock the session. Follows the same pattern
  as the existing /new intercept.
- Sentinel /stop: force-cleans the sentinel instead of returning 'nothing to
  stop yet', so /stop during slow startup actually unlocks the session.

Follow-up improvements over original PR:
- Consolidated duplicate resolve_command imports into single early resolution
- Updated _handle_stop_command to also force-clean for consistency
- Removed 10-minute hard timeout on the executor (would kill legitimate
  long-running agent tasks; the /stop force-clean handles recovery)

Cherry-picked from Mibayy's PR #2498.

Co-authored-by: Mibayy <Mibayy@users.noreply.github.com>
											
										
										
											2026-03-25 18:46:50 -07:00
+								            # Resolve the command once for all early-intercept checks below.
 								            from hermes_cli.commands import resolve_command as _resolve_cmd_inner
 								            _evt_cmd = event.get_command()
 								            _cmd_def_inner = _resolve_cmd_inner(_evt_cmd) if _evt_cmd else None
-												fix(gateway): drain in-flight work before restart

											
										
										
											2026-04-10 10:19:17 -07:00
+								            if _cmd_def_inner and _cmd_def_inner.name == "restart":
 								                return await self._handle_restart_command(event)
-												fix(gateway): recover from hung agents — /stop force-unlocks session (#3104)

When an agent thread hangs (truly blocked, never checks _interrupt_requested),
/stop now force-cleans _running_agents to unlock the session immediately.

Two changes:
- Early /stop intercept in the running-agent guard: bypasses normal command
  dispatch to force-interrupt and unlock the session. Follows the same pattern
  as the existing /new intercept.
- Sentinel /stop: force-cleans the sentinel instead of returning 'nothing to
  stop yet', so /stop during slow startup actually unlocks the session.

Follow-up improvements over original PR:
- Consolidated duplicate resolve_command imports into single early resolution
- Updated _handle_stop_command to also force-clean for consistency
- Removed 10-minute hard timeout on the executor (would kill legitimate
  long-running agent tasks; the /stop force-clean handles recovery)

Cherry-picked from Mibayy's PR #2498.

Co-authored-by: Mibayy <Mibayy@users.noreply.github.com>
											
										
										
											2026-03-25 18:46:50 -07:00
+								            # /stop must hard-kill the session when an agent is running.
 								            # A soft interrupt (agent.interrupt()) doesn't help when the agent
 								            # is truly hung — the executor thread is blocked and never checks
 								            # _interrupt_requested.  Force-clean _running_agents so the session
 								            # is unlocked and subsequent messages are processed normally.
 								            if _cmd_def_inner and _cmd_def_inner.name == "stop":
 								                running_agent = self._running_agents.get(_quick_key)
 								                if running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
 								                    running_agent.interrupt("Stop requested")
 								                # Force-clean: remove the session lock regardless of agent state
 								                adapter = self.adapters.get(source.platform)
 								                if adapter and hasattr(adapter, 'get_pending_message'):
 								                    adapter.get_pending_message(_quick_key)  # consume and discard
 								                self._pending_messages.pop(_quick_key, None)
 								                if _quick_key in self._running_agents:
 								                    del self._running_agents[_quick_key]
-												fix(gateway): /stop no longer resets the session (#9224)

/stop was calling suspend_session() which marked the session for auto-reset
on the next message. This meant users lost their conversation history every
time they stopped a running agent — especially painful for untitled sessions
that can't be resumed by name.

Now /stop just interrupts the agent and cleans the session lock. The session
stays intact so users can continue the conversation.

The suspend behavior was introduced in #7536 to break stuck session resume
loops on gateway restart. That case is already handled by
suspend_recently_active() which runs at gateway startup, so removing it from
/stop doesn't regress the original fix.
											
										
										
											2026-04-13 14:59:05 -07:00
+								                logger.info("STOP for session %s — agent interrupted, session lock released", _quick_key[:20])
 								                return "⚡ Stopped. You can continue this session."
-												fix(gateway): recover from hung agents — /stop force-unlocks session (#3104)

When an agent thread hangs (truly blocked, never checks _interrupt_requested),
/stop now force-cleans _running_agents to unlock the session immediately.

Two changes:
- Early /stop intercept in the running-agent guard: bypasses normal command
  dispatch to force-interrupt and unlock the session. Follows the same pattern
  as the existing /new intercept.
- Sentinel /stop: force-cleans the sentinel instead of returning 'nothing to
  stop yet', so /stop during slow startup actually unlocks the session.

Follow-up improvements over original PR:
- Consolidated duplicate resolve_command imports into single early resolution
- Updated _handle_stop_command to also force-clean for consistency
- Removed 10-minute hard timeout on the executor (would kill legitimate
  long-running agent tasks; the /stop force-clean handles recovery)

Cherry-picked from Mibayy's PR #2498.

Co-authored-by: Mibayy <Mibayy@users.noreply.github.com>
											
										
										
											2026-03-25 18:46:50 -07:00
-												fix(gateway): strip orphaned tool_results + let /reset bypass running agent (#2180)

Two fixes for Telegram/gateway-specific bugs:

1. Anthropic adapter: strip orphaned tool_result blocks (mirror of
   existing tool_use stripping). Context compression or session
   truncation can remove an assistant message containing a tool_use
   while leaving the subsequent tool_result intact. Anthropic rejects
   these with a 400: 'unexpected tool_use_id found in tool_result
   blocks'. The adapter now collects all tool_use IDs and filters out
   any tool_result blocks referencing IDs not in that set.

2. Gateway: /reset and /new now bypass the running-agent guard (like
   /status already does). Previously, sending /reset while an agent
   was running caused the raw text to be queued and later fed back as
   a user message with the same broken history — replaying the
   corrupted session instead of resetting it. Now the running agent is
   interrupted, pending messages are cleared, and the reset command
   dispatches immediately.

Tests updated: existing tests now include proper tool_use→tool_result
pairs; two new tests cover orphaned tool_result stripping.

Co-authored-by: Test <test@test.com>
											
										
										
											2026-03-20 08:39:49 -07:00
+								            # /reset and /new must bypass the running-agent guard so they
 								            # actually dispatch as commands instead of being queued as user
 								            # text (which would be fed back to the agent with the same
 								            # broken history — #2170).  Interrupt the agent first, then
 								            # clear the adapter's pending queue so the stale "/reset" text
 								            # doesn't get re-processed as a user message after the
 								            # interrupt completes.
 								            if _cmd_def_inner and _cmd_def_inner.name == "new":
 								                running_agent = self._running_agents.get(_quick_key)
 								                if running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
 								                    running_agent.interrupt("Session reset requested")
 								                # Clear any pending messages so the old text doesn't replay
 								                adapter = self.adapters.get(source.platform)
 								                if adapter and hasattr(adapter, 'get_pending_message'):
 								                    adapter.get_pending_message(_quick_key)  # consume and discard
 								                self._pending_messages.pop(_quick_key, None)
 								                # Clean up the running agent entry so the reset handler
 								                # doesn't think an agent is still active.
 								                if _quick_key in self._running_agents:
 								                    del self._running_agents[_quick_key]
 								                return await self._handle_reset_command(event)
-												feat: add /queue command to queue prompts without interrupting (#2191)

Adds /queue <prompt> (alias /q) that queues a message for the next
turn while the agent is busy, without interrupting the current run.

- CLI: /queue <prompt> puts it in _pending_input for the next turn
- Gateway: /queue <prompt> creates a pending MessageEvent on the
  adapter, picked up after the current agent run finishes
- Enter still interrupts as usual (no behavior change)
- /queue with no prompt shows usage
- /queue when agent is idle tells user to just type normally

Co-authored-by: Test <test@test.com>
											
										
										
											2026-03-20 09:44:27 -07:00
+								            # /queue <prompt> — queue without interrupting
 								            if event.get_command() in ("queue", "q"):
 								                queued_text = event.get_command_args().strip()
 								                if not queued_text:
 								                    return "Usage: /queue <prompt>"
 								                adapter = self.adapters.get(source.platform)
 								                if adapter:
 								                    from gateway.platforms.base import MessageEvent as _ME, MessageType as _MT
 								                    queued_event = _ME(
 								                        text=queued_text,
 								                        message_type=_MT.TEXT,
 								                        source=event.source,
 								                        message_id=event.message_id,
-												fix: remove redundant key normalization and defensive getattr in channel_prompts

- Remove double str() normalization in _resolve_channel_prompt since
  config bridging already handles numeric YAML key conversion
- Remove dead prompts.get(str(key)) fallback that could never match
  after keys were already normalized to strings
- Replace getattr(event, "channel_prompt", None) with direct attribute
  access since channel_prompt is a declared dataclass field
- Update test to verify normalization responsibility lives in config bridging

											
										
										
											2026-04-13 17:26:25 -07:00
+								                        channel_prompt=event.channel_prompt,
-												feat: add /queue command to queue prompts without interrupting (#2191)

Adds /queue <prompt> (alias /q) that queues a message for the next
turn while the agent is busy, without interrupting the current run.

- CLI: /queue <prompt> puts it in _pending_input for the next turn
- Gateway: /queue <prompt> creates a pending MessageEvent on the
  adapter, picked up after the current agent run finishes
- Enter still interrupts as usual (no behavior change)
- /queue with no prompt shows usage
- /queue when agent is idle tells user to just type normally

Co-authored-by: Test <test@test.com>
											
										
										
											2026-03-20 09:44:27 -07:00
+								                    )
 								                    adapter._pending_messages[_quick_key] = queued_event
 								                return "Queued for the next turn."
-												feat: /model command — models.dev primary database + --provider flag (#5181)

Full overhaul of the model/provider system.

## What changed
- models.dev (109 providers, 4000+ models) as primary database for provider identity AND model metadata
- --provider flag replaces colon syntax for explicit provider switching
- Full ModelInfo/ProviderInfo dataclasses with context, cost, capabilities, modalities
- HermesOverlay system merges models.dev + Hermes-specific transport/auth/aggregator flags
- User-defined endpoints via config.yaml providers: section
- /model (no args) lists authenticated providers with curated model catalog
- Rich metadata display: context window, max output, cost/M tokens, capabilities
- Config migration: custom_providers list → providers dict (v11→v12)
- AIAgent.switch_model() for in-place model swap preserving conversation

## Files
agent/models_dev.py, hermes_cli/providers.py, hermes_cli/model_switch.py,
hermes_cli/model_normalize.py, cli.py, gateway/run.py, run_agent.py,
hermes_cli/config.py, hermes_cli/commands.py
											
										
										
											2026-04-05 01:04:44 -07:00
+								            # /model must not be used while the agent is running.
 								            if _cmd_def_inner and _cmd_def_inner.name == "model":
 								                return "Agent is running — wait or /stop first, then switch models."
-												fix(gateway): route /approve and /deny through running-agent guard (#4798)

When the agent is blocked on a dangerous command approval (threading.Event
wait inside tools/approval.py), incoming /approve and /deny commands were
falling through to the generic interrupt path instead of being dispatched
to their command handlers. The interrupt sets _interrupt_requested on the
agent, but the agent thread is blocked on event.wait() — not checking the
flag. Result: approval times out after 300s (5 minutes) before executing.

Fix: intercept /approve and /deny in the running-agent early-intercept
block (alongside /stop, /new, /queue) and route directly to
_handle_approve_command / _handle_deny_command.
											
										
										
											2026-04-03 09:59:52 -07:00
+								            # /approve and /deny must bypass the running-agent interrupt path.
 								            # The agent thread is blocked on a threading.Event inside
 								            # tools/approval.py — sending an interrupt won't unblock it.
 								            # Route directly to the approval handler so the event is signalled.
 								            if _cmd_def_inner and _cmd_def_inner.name in ("approve", "deny"):
 								                if _cmd_def_inner.name == "approve":
 								                    return await self._handle_approve_command(event)
 								                return await self._handle_deny_command(event)
-												fix(gateway): route /background through active-session bypass

When /background was sent during an active run, it was not in the
platform adapter's bypass list and fell through to the interrupt path
instead of spawning a parallel background task.

Add "background" to the active-session command bypass in the platform
adapter, and add an early return in the gateway runner's running-agent
guard to route /background to _handle_background_command() before it
reaches the default interrupt logic.

Fixes #6827

											
										
										
											2026-04-10 13:22:38 +10:00
+								            # /background must bypass the running-agent guard — it starts a
 								            # parallel task and must never interrupt the active conversation.
 								            if _cmd_def_inner and _cmd_def_inner.name == "background":
 								                return await self._handle_background_command(event)
-												fix(gateway): prevent telegram photo burst interrupts

											
										
										
											2026-03-15 11:58:19 +05:30
+								            if event.message_type == MessageType.PHOTO:
 								                logger.debug("PRIORITY photo follow-up for session %s — queueing without interrupt", _quick_key[:20])
 								                adapter = self.adapters.get(source.platform)
 								                if adapter:
-												fix(gateway): address restart review feedback

											
										
										
											2026-04-10 14:00:21 -07:00
+								                    merge_pending_message_event(adapter._pending_messages, _quick_key, event)
-												fix(gateway): prevent telegram photo burst interrupts

											
										
										
											2026-03-15 11:58:19 +05:30
+								                return None
-												fix: keep rapid telegram follow-ups from getting cut off

											
										
										
											2026-04-14 16:27:12 -04:00
+								            _telegram_followup_grace = float(
 								                os.getenv("HERMES_TELEGRAM_FOLLOWUP_GRACE_SECONDS", "3.0")
 								            )
 								            _started_at = self._running_agents_ts.get(_quick_key, 0)
 								            if (
 								                source.platform == Platform.TELEGRAM
 								                and event.message_type == MessageType.TEXT
 								                and _telegram_followup_grace > 0
 								                and _started_at
 								                and (time.time() - _started_at) <= _telegram_followup_grace
 								            ):
 								                logger.debug(
 								                    "Telegram follow-up arrived %.2fs after run start for %s — queueing without interrupt",
 								                    time.time() - _started_at,
 								                    _quick_key[:20],
 								                )
 								                adapter = self.adapters.get(source.platform)
 								                if adapter:
 								                    merge_pending_message_event(
 								                        adapter._pending_messages,
 								                        _quick_key,
 								                        event,
 								                        merge_text=True,
 								                    )
 								                return None
-												fix(gateway): prevent concurrent agent runs for the same session

Place a sentinel in _running_agents immediately after the "already
running" guard check passes — before any await.  Without this, the
numerous await points between the guard (line 1324) and agent
registration (track_agent at line 4790) create a window where a
second message for the same session can bypass the guard and start
a duplicate agent, corrupting the transcript.

The await gap includes: hook emissions, vision enrichment (external
API call), audio transcription (external API call), session hygiene
compression, and the run_in_executor call itself.  For messages with
media attachments the window can be several seconds wide.

The sentinel is wrapped in try/finally so it is always cleaned up —
even if the handler raises or takes an early-return path.  When the
real AIAgent is created, track_agent() overwrites the sentinel with
the actual instance (preserving interrupt support).

Also handles the edge case where a message arrives while the sentinel
is set but no real agent exists yet: the message is queued via the
adapter's pending-message mechanism instead of attempting to call
interrupt() on the sentinel object.

											
										
										
											2026-03-19 22:32:37 +03:00
+								            running_agent = self._running_agents.get(_quick_key)
 								            if running_agent is _AGENT_PENDING_SENTINEL:
-												fix: harden sentinel guard for /stop during setup and shutdown

- /stop during sentinel returns helpful message instead of queuing
- Shutdown loop skips sentinel entries instead of catching AttributeError
- _handle_stop_command guards against sentinel (defensive)
- Added tests for both edge cases (7 total race guard tests)

											
										
										
											2026-03-19 18:26:09 -07:00
+								                # Agent is being set up but not ready yet.
 								                if event.get_command() == "stop":
-												fix(gateway): recover from hung agents — /stop force-unlocks session (#3104)

When an agent thread hangs (truly blocked, never checks _interrupt_requested),
/stop now force-cleans _running_agents to unlock the session immediately.

Two changes:
- Early /stop intercept in the running-agent guard: bypasses normal command
  dispatch to force-interrupt and unlock the session. Follows the same pattern
  as the existing /new intercept.
- Sentinel /stop: force-cleans the sentinel instead of returning 'nothing to
  stop yet', so /stop during slow startup actually unlocks the session.

Follow-up improvements over original PR:
- Consolidated duplicate resolve_command imports into single early resolution
- Updated _handle_stop_command to also force-clean for consistency
- Removed 10-minute hard timeout on the executor (would kill legitimate
  long-running agent tasks; the /stop force-clean handles recovery)

Cherry-picked from Mibayy's PR #2498.

Co-authored-by: Mibayy <Mibayy@users.noreply.github.com>
											
										
										
											2026-03-25 18:46:50 -07:00
+								                    # Force-clean the sentinel so the session is unlocked.
 								                    if _quick_key in self._running_agents:
 								                        del self._running_agents[_quick_key]
 								                    logger.info("HARD STOP (pending) for session %s — sentinel cleared", _quick_key[:20])
 								                    return "⚡ Force-stopped. The agent was still starting — session unlocked."
-												fix: harden sentinel guard for /stop during setup and shutdown

- /stop during sentinel returns helpful message instead of queuing
- Shutdown loop skips sentinel entries instead of catching AttributeError
- _handle_stop_command guards against sentinel (defensive)
- Added tests for both edge cases (7 total race guard tests)

											
										
										
											2026-03-19 18:26:09 -07:00
+								                # Queue the message so it will be picked up after the
 								                # agent starts.
-												fix(gateway): prevent concurrent agent runs for the same session

Place a sentinel in _running_agents immediately after the "already
running" guard check passes — before any await.  Without this, the
numerous await points between the guard (line 1324) and agent
registration (track_agent at line 4790) create a window where a
second message for the same session can bypass the guard and start
a duplicate agent, corrupting the transcript.

The await gap includes: hook emissions, vision enrichment (external
API call), audio transcription (external API call), session hygiene
compression, and the run_in_executor call itself.  For messages with
media attachments the window can be several seconds wide.

The sentinel is wrapped in try/finally so it is always cleaned up —
even if the handler raises or takes an early-return path.  When the
real AIAgent is created, track_agent() overwrites the sentinel with
the actual instance (preserving interrupt support).

Also handles the edge case where a message arrives while the sentinel
is set but no real agent exists yet: the message is queued via the
adapter's pending-message mechanism instead of attempting to call
interrupt() on the sentinel object.

											
										
										
											2026-03-19 22:32:37 +03:00
+								                adapter = self.adapters.get(source.platform)
 								                if adapter:
-												fix: keep rapid telegram follow-ups from getting cut off

											
										
										
											2026-04-14 16:27:12 -04:00
+								                    merge_pending_message_event(
 								                        adapter._pending_messages,
 								                        _quick_key,
 								                        event,
 								                        merge_text=True,
 								                    )
-												fix(gateway): prevent concurrent agent runs for the same session

Place a sentinel in _running_agents immediately after the "already
running" guard check passes — before any await.  Without this, the
numerous await points between the guard (line 1324) and agent
registration (track_agent at line 4790) create a window where a
second message for the same session can bypass the guard and start
a duplicate agent, corrupting the transcript.

The await gap includes: hook emissions, vision enrichment (external
API call), audio transcription (external API call), session hygiene
compression, and the run_in_executor call itself.  For messages with
media attachments the window can be several seconds wide.

The sentinel is wrapped in try/finally so it is always cleaned up —
even if the handler raises or takes an early-return path.  When the
real AIAgent is created, track_agent() overwrites the sentinel with
the actual instance (preserving interrupt support).

Also handles the edge case where a message arrives while the sentinel
is set but no real agent exists yet: the message is queued via the
adapter's pending-message mechanism instead of attempting to call
interrupt() on the sentinel object.

											
										
										
											2026-03-19 22:32:37 +03:00
+								                return None
-												fix(gateway): drain in-flight work before restart

											
										
										
											2026-04-10 10:19:17 -07:00
+								            if self._draining:
 								                if self._queue_during_drain_enabled():
 								                    self._queue_or_replace_pending_event(_quick_key, event)
 								                return (
 								                    f"⏳ Gateway {self._status_action_gerund()} — queued for the next turn after it comes back."
 								                    if self._queue_during_drain_enabled()
 								                    else f"⏳ Gateway is {self._status_action_gerund()} and is not accepting another turn right now."
 								                )
-												feat: enhance interrupt handling and container resource configuration

- Introduced a shared interrupt signaling mechanism to allow tools to check for user interrupts during long-running operations.
- Updated the AIAgent to handle interrupts more effectively, ensuring in-progress tool calls are canceled and multiple interrupt messages are combined into one prompt.
- Enhanced the CLI configuration to include container resource limits (CPU, memory, disk) and persistence options for Docker, Singularity, and Modal environments.
- Improved documentation to clarify interrupt behaviors and container resource settings, providing users with better guidance on configuration and usage.

											
										
										
											2026-02-23 02:11:33 -08:00
+								            logger.debug("PRIORITY interrupt for session %s", _quick_key[:20])
 								            running_agent.interrupt(event.text)
 								            if _quick_key in self._pending_messages:
 								                self._pending_messages[_quick_key] += "\n" + event.text
 								            else:
 								                self._pending_messages[_quick_key] = event.text
 								            return None
-												fix(gateway): prevent concurrent agent runs for the same session

Place a sentinel in _running_agents immediately after the "already
running" guard check passes — before any await.  Without this, the
numerous await points between the guard (line 1324) and agent
registration (track_agent at line 4790) create a window where a
second message for the same session can bypass the guard and start
a duplicate agent, corrupting the transcript.

The await gap includes: hook emissions, vision enrichment (external
API call), audio transcription (external API call), session hygiene
compression, and the run_in_executor call itself.  For messages with
media attachments the window can be several seconds wide.

The sentinel is wrapped in try/finally so it is always cleaned up —
even if the handler raises or takes an early-return path.  When the
real AIAgent is created, track_agent() overwrites the sentinel with
the actual instance (preserving interrupt support).

Also handles the edge case where a message arrives while the sentinel
is set but no real agent exists yet: the message is queued via the
adapter's pending-message mechanism instead of attempting to call
interrupt() on the sentinel object.

											
										
										
											2026-03-19 22:32:37 +03:00
-												Implement interrupt handling for agent and CLI input and persistent prompt line at bottom of CLI :)

- Enhanced the AIAgent class to support interrupt requests, allowing for graceful interruption of ongoing tasks and processing of new messages.
- Updated the HermesCLI to manage user input in a persistent manner, enabling real-time interruption of the agent's conversation.
- Introduced a mechanism in the GatewayRunner to handle incoming messages while an agent is running, allowing for immediate response to user commands.
- Improved overall user experience by providing feedback during interruptions and ensuring that pending messages are processed correctly.

											
										
										
											2026-02-03 16:15:49 -08:00
+								        # Check for commands
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        command = event.get_command()
-												feat(hooks): introduce event hooks system for lifecycle management

Add a new hooks system allowing users to run custom code at key lifecycle points in the agent's operation. This includes support for events such as `gateway:startup`, `session:start`, `agent:step`, and more. Documentation for creating hooks and available events has been added to `README.md` and a new `hooks.md` file. Additionally, integrate step callbacks in the agent to facilitate hook execution during tool-calling iterations.

											
										
										
											2026-02-28 17:09:26 -08:00
-												refactor: centralize slash command registry (#1603)

* refactor: centralize slash command registry

Replace 7+ scattered command definition sites with a single
CommandDef registry in hermes_cli/commands.py. All downstream
consumers now derive from this registry:

- CLI process_command() resolves aliases via resolve_command()
- Gateway _known_commands uses GATEWAY_KNOWN_COMMANDS frozenset
- Gateway help text generated by gateway_help_lines()
- Telegram BotCommands generated by telegram_bot_commands()
- Slack subcommand map generated by slack_subcommand_map()

Adding a command or alias is now a one-line change to
COMMAND_REGISTRY instead of touching 6+ files.

Bugfixes included:
- Telegram now registers /rollback, /background (were missing)
- Slack now has /voice, /update, /reload-mcp (were missing)
- Gateway duplicate 'reasoning' dispatch (dead code) removed
- Gateway help text can no longer drift from CLI help

Backwards-compatible: COMMANDS and COMMANDS_BY_CATEGORY dicts are
rebuilt from the registry, so existing imports work unchanged.

* docs: update developer docs for centralized command registry

Update AGENTS.md with full 'Slash Command Registry' and 'Adding a
Slash Command' sections covering CommandDef fields, registry helpers,
and the one-line alias workflow.

Also update:
- CONTRIBUTING.md: commands.py description
- website/docs/reference/slash-commands.md: reference central registry
- docs/plans/centralize-command-registry.md: mark COMPLETED
- plans/checkpoint-rollback.md: reference new pattern
- hermes-agent-dev skill: architecture table

* chore: remove stale plan docs
											
										
										
											2026-03-16 23:21:03 -07:00
+								        # Emit command:* hook for any recognized slash command.
 								        # GATEWAY_KNOWN_COMMANDS is derived from the central COMMAND_REGISTRY
 								        # in hermes_cli/commands.py — no hardcoded set to maintain here.
 								        from hermes_cli.commands import GATEWAY_KNOWN_COMMANDS, resolve_command as _resolve_cmd
 								        if command and command in GATEWAY_KNOWN_COMMANDS:
-												feat(hooks): introduce event hooks system for lifecycle management

Add a new hooks system allowing users to run custom code at key lifecycle points in the agent's operation. This includes support for events such as `gateway:startup`, `session:start`, `agent:step`, and more. Documentation for creating hooks and available events has been added to `README.md` and a new `hooks.md` file. Additionally, integrate step callbacks in the agent to facilitate hook execution during tool-calling iterations.

											
										
										
											2026-02-28 17:09:26 -08:00
+								            await self.hooks.emit(f"command:{command}", {
 								                "platform": source.platform.value if source.platform else "",
 								                "user_id": source.user_id,
 								                "command": command,
 								                "args": event.get_command_args().strip(),
 								            })
-												refactor: centralize slash command registry (#1603)

* refactor: centralize slash command registry

Replace 7+ scattered command definition sites with a single
CommandDef registry in hermes_cli/commands.py. All downstream
consumers now derive from this registry:

- CLI process_command() resolves aliases via resolve_command()
- Gateway _known_commands uses GATEWAY_KNOWN_COMMANDS frozenset
- Gateway help text generated by gateway_help_lines()
- Telegram BotCommands generated by telegram_bot_commands()
- Slack subcommand map generated by slack_subcommand_map()

Adding a command or alias is now a one-line change to
COMMAND_REGISTRY instead of touching 6+ files.

Bugfixes included:
- Telegram now registers /rollback, /background (were missing)
- Slack now has /voice, /update, /reload-mcp (were missing)
- Gateway duplicate 'reasoning' dispatch (dead code) removed
- Gateway help text can no longer drift from CLI help

Backwards-compatible: COMMANDS and COMMANDS_BY_CATEGORY dicts are
rebuilt from the registry, so existing imports work unchanged.

* docs: update developer docs for centralized command registry

Update AGENTS.md with full 'Slash Command Registry' and 'Adding a
Slash Command' sections covering CommandDef fields, registry helpers,
and the one-line alias workflow.

Also update:
- CONTRIBUTING.md: commands.py description
- website/docs/reference/slash-commands.md: reference central registry
- docs/plans/centralize-command-registry.md: mark COMPLETED
- plans/checkpoint-rollback.md: reference new pattern
- hermes-agent-dev skill: architecture table

* chore: remove stale plan docs
											
										
										
											2026-03-16 23:21:03 -07:00
 								        # Resolve aliases to canonical name so dispatch only checks canonicals.
 								        _cmd_def = _resolve_cmd(command) if command else None
 								        canonical = _cmd_def.name if _cmd_def else command
 								        if canonical == "new":
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								            return await self._handle_reset_command(event)
-												refactor: centralize slash command registry (#1603)

* refactor: centralize slash command registry

Replace 7+ scattered command definition sites with a single
CommandDef registry in hermes_cli/commands.py. All downstream
consumers now derive from this registry:

- CLI process_command() resolves aliases via resolve_command()
- Gateway _known_commands uses GATEWAY_KNOWN_COMMANDS frozenset
- Gateway help text generated by gateway_help_lines()
- Telegram BotCommands generated by telegram_bot_commands()
- Slack subcommand map generated by slack_subcommand_map()

Adding a command or alias is now a one-line change to
COMMAND_REGISTRY instead of touching 6+ files.

Bugfixes included:
- Telegram now registers /rollback, /background (were missing)
- Slack now has /voice, /update, /reload-mcp (were missing)
- Gateway duplicate 'reasoning' dispatch (dead code) removed
- Gateway help text can no longer drift from CLI help

Backwards-compatible: COMMANDS and COMMANDS_BY_CATEGORY dicts are
rebuilt from the registry, so existing imports work unchanged.

* docs: update developer docs for centralized command registry

Update AGENTS.md with full 'Slash Command Registry' and 'Adding a
Slash Command' sections covering CommandDef fields, registry helpers,
and the one-line alias workflow.

Also update:
- CONTRIBUTING.md: commands.py description
- website/docs/reference/slash-commands.md: reference central registry
- docs/plans/centralize-command-registry.md: mark COMPLETED
- plans/checkpoint-rollback.md: reference new pattern
- hermes-agent-dev skill: architecture table

* chore: remove stale plan docs
											
										
										
											2026-03-16 23:21:03 -07:00
+								        if canonical == "help":
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
+								            return await self._handle_help_command(event)
-												feat(gateway): skill-aware slash commands, paginated /commands, Telegram 100-cap (#3934)

* feat(gateway): skill-aware slash commands, paginated /commands, Telegram 100-cap

Map active skills to Telegram's slash command menu so users can
discover and invoke skills directly. Three changes:

1. Telegram menu now includes active skill commands alongside built-in
   commands, capped at 100 entries (Telegram Bot API limit). Overflow
   commands remain callable but hidden from the picker. Logged at
   startup when cap is hit.

2. New /commands [page] gateway command for paginated browsing of all
   commands + skills. /help now shows first 10 skill commands and
   points to /commands for the full list.

3. When a user types a slash command that matches a disabled or
   uninstalled skill, they get actionable guidance:
   - Disabled: 'Enable it with: hermes skills config'
   - Optional (not installed): 'Install with: hermes skills install official/<path>'

Built on ideas from PR #3921 by @kshitijk4poor.

* chore: move 21 niche skills to optional-skills

Move specialized/niche skills from built-in (skills/) to optional
(optional-skills/) to reduce the default skill count. Users can
install them with: hermes skills install official/<category>/<name>

Moved skills (21):
- mlops: accelerate, chroma, faiss, flash-attention,
  hermes-atropos-environments, huggingface-tokenizers, instructor,
  lambda-labs, llava, nemo-curator, pinecone, pytorch-lightning,
  qdrant, saelens, simpo, slime, tensorrt-llm, torchtitan
- research: domain-intel, duckduckgo-search
- devops: inference-sh cli

Built-in skills: 96 → 75
Optional skills: 22 → 43

* fix: only include repo built-in skills in Telegram menu, not user-installed

User-installed skills (from hub or manually added) stay accessible via
/skills and by typing the command directly, but don't get registered
in the Telegram slash command picker. Only skills whose SKILL.md is
under the repo's skills/ directory are included in the menu.

This keeps the Telegram menu focused on the curated built-in set while
user-installed skills remain discoverable through /skills and /commands.
											
										
										
											2026-03-30 10:57:30 -07:00
 								        if canonical == "commands":
 								            return await self._handle_commands_command(event)
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
-												feat: add /profile slash command to show active profile (#4027)

Adds /profile to COMMAND_REGISTRY (Info category) with handlers in
both CLI and gateway. Shows the active profile name and home directory.

Works on all platforms — CLI, Telegram, Discord, Slack, etc.
Detects profile by checking if HERMES_HOME is under ~/.hermes/profiles/.
Shows 'default' when running without a profile.
											
										
										
											2026-03-30 13:20:06 -07:00
+								        if canonical == "profile":
 								            return await self._handle_profile_command(event)
-												refactor: centralize slash command registry (#1603)

* refactor: centralize slash command registry

Replace 7+ scattered command definition sites with a single
CommandDef registry in hermes_cli/commands.py. All downstream
consumers now derive from this registry:

- CLI process_command() resolves aliases via resolve_command()
- Gateway _known_commands uses GATEWAY_KNOWN_COMMANDS frozenset
- Gateway help text generated by gateway_help_lines()
- Telegram BotCommands generated by telegram_bot_commands()
- Slack subcommand map generated by slack_subcommand_map()

Adding a command or alias is now a one-line change to
COMMAND_REGISTRY instead of touching 6+ files.

Bugfixes included:
- Telegram now registers /rollback, /background (were missing)
- Slack now has /voice, /update, /reload-mcp (were missing)
- Gateway duplicate 'reasoning' dispatch (dead code) removed
- Gateway help text can no longer drift from CLI help

Backwards-compatible: COMMANDS and COMMANDS_BY_CATEGORY dicts are
rebuilt from the registry, so existing imports work unchanged.

* docs: update developer docs for centralized command registry

Update AGENTS.md with full 'Slash Command Registry' and 'Adding a
Slash Command' sections covering CommandDef fields, registry helpers,
and the one-line alias workflow.

Also update:
- CONTRIBUTING.md: commands.py description
- website/docs/reference/slash-commands.md: reference central registry
- docs/plans/centralize-command-registry.md: mark COMPLETED
- plans/checkpoint-rollback.md: reference new pattern
- hermes-agent-dev skill: architecture table

* chore: remove stale plan docs
											
										
										
											2026-03-16 23:21:03 -07:00
+								        if canonical == "status":
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								            return await self._handle_status_command(event)
-												fix(gateway): drain in-flight work before restart

											
										
										
											2026-04-10 10:19:17 -07:00
 								        if canonical == "restart":
 								            return await self._handle_restart_command(event)
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
-												refactor: centralize slash command registry (#1603)

* refactor: centralize slash command registry

Replace 7+ scattered command definition sites with a single
CommandDef registry in hermes_cli/commands.py. All downstream
consumers now derive from this registry:

- CLI process_command() resolves aliases via resolve_command()
- Gateway _known_commands uses GATEWAY_KNOWN_COMMANDS frozenset
- Gateway help text generated by gateway_help_lines()
- Telegram BotCommands generated by telegram_bot_commands()
- Slack subcommand map generated by slack_subcommand_map()

Adding a command or alias is now a one-line change to
COMMAND_REGISTRY instead of touching 6+ files.

Bugfixes included:
- Telegram now registers /rollback, /background (were missing)
- Slack now has /voice, /update, /reload-mcp (were missing)
- Gateway duplicate 'reasoning' dispatch (dead code) removed
- Gateway help text can no longer drift from CLI help

Backwards-compatible: COMMANDS and COMMANDS_BY_CATEGORY dicts are
rebuilt from the registry, so existing imports work unchanged.

* docs: update developer docs for centralized command registry

Update AGENTS.md with full 'Slash Command Registry' and 'Adding a
Slash Command' sections covering CommandDef fields, registry helpers,
and the one-line alias workflow.

Also update:
- CONTRIBUTING.md: commands.py description
- website/docs/reference/slash-commands.md: reference central registry
- docs/plans/centralize-command-registry.md: mark COMPLETED
- plans/checkpoint-rollback.md: reference new pattern
- hermes-agent-dev skill: architecture table

* chore: remove stale plan docs
											
										
										
											2026-03-16 23:21:03 -07:00
+								        if canonical == "stop":
-												Implement interrupt handling for agent and CLI input and persistent prompt line at bottom of CLI :)

- Enhanced the AIAgent class to support interrupt requests, allowing for graceful interruption of ongoing tasks and processing of new messages.
- Updated the HermesCLI to manage user input in a persistent manner, enabling real-time interruption of the agent's conversation.
- Introduced a mechanism in the GatewayRunner to handle incoming messages while an agent is running, allowing for immediate response to user commands.
- Improved overall user experience by providing feedback during interruptions and ensuring that pending messages are processed correctly.

											
										
										
											2026-02-03 16:15:49 -08:00
+								            return await self._handle_stop_command(event)
-												refactor: centralize slash command registry (#1603)

* refactor: centralize slash command registry

Replace 7+ scattered command definition sites with a single
CommandDef registry in hermes_cli/commands.py. All downstream
consumers now derive from this registry:

- CLI process_command() resolves aliases via resolve_command()
- Gateway _known_commands uses GATEWAY_KNOWN_COMMANDS frozenset
- Gateway help text generated by gateway_help_lines()
- Telegram BotCommands generated by telegram_bot_commands()
- Slack subcommand map generated by slack_subcommand_map()

Adding a command or alias is now a one-line change to
COMMAND_REGISTRY instead of touching 6+ files.

Bugfixes included:
- Telegram now registers /rollback, /background (were missing)
- Slack now has /voice, /update, /reload-mcp (were missing)
- Gateway duplicate 'reasoning' dispatch (dead code) removed
- Gateway help text can no longer drift from CLI help

Backwards-compatible: COMMANDS and COMMANDS_BY_CATEGORY dicts are
rebuilt from the registry, so existing imports work unchanged.

* docs: update developer docs for centralized command registry

Update AGENTS.md with full 'Slash Command Registry' and 'Adding a
Slash Command' sections covering CommandDef fields, registry helpers,
and the one-line alias workflow.

Also update:
- CONTRIBUTING.md: commands.py description
- website/docs/reference/slash-commands.md: reference central registry
- docs/plans/centralize-command-registry.md: mark COMPLETED
- plans/checkpoint-rollback.md: reference new pattern
- hermes-agent-dev skill: architecture table

* chore: remove stale plan docs
											
										
										
											2026-03-16 23:21:03 -07:00
+								        if canonical == "reasoning":
-												feat(gateway): add reasoning hot reload

Add a /reasoning command across gateway adapters so users can
inspect or change reasoning effort without editing config by hand.

Reload reasoning settings from config.yaml before each agent run,
including background tasks, so the next message picks up the new
value consistently.

											
										
										
											2026-03-11 22:12:11 +08:00
+								            return await self._handle_reasoning_command(event)
-												feat(gateway): add fast mode support to gateway chats

											
										
										
											2026-04-10 08:32:56 +01:00
+								        if canonical == "fast":
 								            return await self._handle_fast_command(event)
-												feat: config-gated /verbose command for messaging gateway (#3262)

* feat: config-gated /verbose command for messaging gateway

Add gateway_config_gate field to CommandDef, allowing cli_only commands
to be conditionally available in the gateway based on a config value.

- CommandDef gains gateway_config_gate: str | None — a config dotpath
  that, when truthy, overrides cli_only for gateway surfaces
- /verbose uses gateway_config_gate='display.tool_progress_command'
- Default is off (cli_only behavior preserved)
- When enabled, /verbose cycles tool_progress mode (off/new/all/verbose)
  in the gateway, saving to config.yaml — same cycle as the CLI
- Gateway helpers (help, telegram menus, slack mapping) dynamically
  check config to include/exclude config-gated commands
- GATEWAY_KNOWN_COMMANDS always includes config-gated commands so
  the gateway recognizes them and can respond appropriately
- Handles YAML 1.1 bool coercion (bare 'off' parses as False)
- 8 new tests for the config gate mechanism + gateway handler

* docs: document gateway_config_gate and /verbose messaging support

- AGENTS.md: add gateway_config_gate to CommandDef fields
- slash-commands.md: note /verbose can be enabled for messaging, update Notes
- configuration.md: add tool_progress_command to display section + usage note
- cli.md: cross-link to config docs for messaging enablement
- messaging/index.md: show tool_progress_command in config snippet
- plugins.md: add gateway_config_gate to register_command parameter table
											
										
										
											2026-03-26 14:41:04 -07:00
+								        if canonical == "verbose":
 								            return await self._handle_verbose_command(event)
-												feat: add /yolo slash command to toggle dangerous command approvals (#3990)

Adds a /yolo command that toggles HERMES_YOLO_MODE at runtime, skipping
all dangerous command approval prompts for the current session. Works in
both CLI and gateway (Telegram, Discord, etc.).

- /yolo -> ON: all commands auto-approved, no confirmation prompts
- /yolo -> OFF: normal approval flow restored

The --yolo CLI flag already existed for launch-time opt-in. This adds
the ability to toggle mid-session without restarting.

Session-scoped — resets when the process ends. Uses the existing
HERMES_YOLO_MODE env var that check_all_command_guards() already
respects.
											
										
										
											2026-03-30 11:17:09 -07:00
+								        if canonical == "yolo":
 								            return await self._handle_yolo_command(event)
-												feat: /model command — models.dev primary database + --provider flag (#5181)

Full overhaul of the model/provider system.

## What changed
- models.dev (109 providers, 4000+ models) as primary database for provider identity AND model metadata
- --provider flag replaces colon syntax for explicit provider switching
- Full ModelInfo/ProviderInfo dataclasses with context, cost, capabilities, modalities
- HermesOverlay system merges models.dev + Hermes-specific transport/auth/aggregator flags
- User-defined endpoints via config.yaml providers: section
- /model (no args) lists authenticated providers with curated model catalog
- Rich metadata display: context window, max output, cost/M tokens, capabilities
- Config migration: custom_providers list → providers dict (v11→v12)
- AIAgent.switch_model() for in-place model swap preserving conversation

## Files
agent/models_dev.py, hermes_cli/providers.py, hermes_cli/model_switch.py,
hermes_cli/model_normalize.py, cli.py, gateway/run.py, run_agent.py,
hermes_cli/config.py, hermes_cli/commands.py
											
										
										
											2026-04-05 01:04:44 -07:00
+								        if canonical == "model":
 								            return await self._handle_model_command(event)
-												refactor: centralize slash command registry (#1603)

* refactor: centralize slash command registry

Replace 7+ scattered command definition sites with a single
CommandDef registry in hermes_cli/commands.py. All downstream
consumers now derive from this registry:

- CLI process_command() resolves aliases via resolve_command()
- Gateway _known_commands uses GATEWAY_KNOWN_COMMANDS frozenset
- Gateway help text generated by gateway_help_lines()
- Telegram BotCommands generated by telegram_bot_commands()
- Slack subcommand map generated by slack_subcommand_map()

Adding a command or alias is now a one-line change to
COMMAND_REGISTRY instead of touching 6+ files.

Bugfixes included:
- Telegram now registers /rollback, /background (were missing)
- Slack now has /voice, /update, /reload-mcp (were missing)
- Gateway duplicate 'reasoning' dispatch (dead code) removed
- Gateway help text can no longer drift from CLI help

Backwards-compatible: COMMANDS and COMMANDS_BY_CATEGORY dicts are
rebuilt from the registry, so existing imports work unchanged.

* docs: update developer docs for centralized command registry

Update AGENTS.md with full 'Slash Command Registry' and 'Adding a
Slash Command' sections covering CommandDef fields, registry helpers,
and the one-line alias workflow.

Also update:
- CONTRIBUTING.md: commands.py description
- website/docs/reference/slash-commands.md: reference central registry
- docs/plans/centralize-command-registry.md: mark COMPLETED
- plans/checkpoint-rollback.md: reference new pattern
- hermes-agent-dev skill: architecture table

* chore: remove stale plan docs
											
										
										
											2026-03-16 23:21:03 -07:00
+								        if canonical == "provider":
-												feat: /provider command + fix gateway bugs + harden parse_model_input

/provider command (CLI + gateway):
  Shows all providers with auth status (✓/✗), aliases, and active marker.
  Users can now discover what provider names work with provider:model syntax.

Gateway bugs fixed:
  - Config was saved even when validation.persist=False (told user 'session
    only' but actually persisted the unvalidated model)
  - HERMES_INFERENCE_PROVIDER env var not set on provider switch, causing
    the switch to be silently overridden if that env var was already set

parse_model_input hardened:
  - Colon only treated as provider delimiter if left side is a recognized
    provider name or alias. 'anthropic/claude-3.5-sonnet:beta' now passes
    through as a model name instead of trying provider='anthropic/claude-3.5-sonnet'.
  - HTTP URLs, random colons no longer misinterpreted.

56 tests passing across model validation, CLI commands, and integration.

											
										
										
											2026-03-08 06:09:36 -07:00
+								            return await self._handle_provider_command(event)
-												refactor: centralize slash command registry (#1603)

* refactor: centralize slash command registry

Replace 7+ scattered command definition sites with a single
CommandDef registry in hermes_cli/commands.py. All downstream
consumers now derive from this registry:

- CLI process_command() resolves aliases via resolve_command()
- Gateway _known_commands uses GATEWAY_KNOWN_COMMANDS frozenset
- Gateway help text generated by gateway_help_lines()
- Telegram BotCommands generated by telegram_bot_commands()
- Slack subcommand map generated by slack_subcommand_map()

Adding a command or alias is now a one-line change to
COMMAND_REGISTRY instead of touching 6+ files.

Bugfixes included:
- Telegram now registers /rollback, /background (were missing)
- Slack now has /voice, /update, /reload-mcp (were missing)
- Gateway duplicate 'reasoning' dispatch (dead code) removed
- Gateway help text can no longer drift from CLI help

Backwards-compatible: COMMANDS and COMMANDS_BY_CATEGORY dicts are
rebuilt from the registry, so existing imports work unchanged.

* docs: update developer docs for centralized command registry

Update AGENTS.md with full 'Slash Command Registry' and 'Adding a
Slash Command' sections covering CommandDef fields, registry helpers,
and the one-line alias workflow.

Also update:
- CONTRIBUTING.md: commands.py description
- website/docs/reference/slash-commands.md: reference central registry
- docs/plans/centralize-command-registry.md: mark COMPLETED
- plans/checkpoint-rollback.md: reference new pattern
- hermes-agent-dev skill: architecture table

* chore: remove stale plan docs
											
										
										
											2026-03-16 23:21:03 -07:00
+								        if canonical == "personality":
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
+								            return await self._handle_personality_command(event)
-												feat: add /plan command (#1372)

* feat: add /plan command

* refactor: back /plan with bundled skill

* docs: document /plan skill
											
										
										
											2026-03-14 21:18:17 -07:00
-												refactor: centralize slash command registry (#1603)

* refactor: centralize slash command registry

Replace 7+ scattered command definition sites with a single
CommandDef registry in hermes_cli/commands.py. All downstream
consumers now derive from this registry:

- CLI process_command() resolves aliases via resolve_command()
- Gateway _known_commands uses GATEWAY_KNOWN_COMMANDS frozenset
- Gateway help text generated by gateway_help_lines()
- Telegram BotCommands generated by telegram_bot_commands()
- Slack subcommand map generated by slack_subcommand_map()

Adding a command or alias is now a one-line change to
COMMAND_REGISTRY instead of touching 6+ files.

Bugfixes included:
- Telegram now registers /rollback, /background (were missing)
- Slack now has /voice, /update, /reload-mcp (were missing)
- Gateway duplicate 'reasoning' dispatch (dead code) removed
- Gateway help text can no longer drift from CLI help

Backwards-compatible: COMMANDS and COMMANDS_BY_CATEGORY dicts are
rebuilt from the registry, so existing imports work unchanged.

* docs: update developer docs for centralized command registry

Update AGENTS.md with full 'Slash Command Registry' and 'Adding a
Slash Command' sections covering CommandDef fields, registry helpers,
and the one-line alias workflow.

Also update:
- CONTRIBUTING.md: commands.py description
- website/docs/reference/slash-commands.md: reference central registry
- docs/plans/centralize-command-registry.md: mark COMPLETED
- plans/checkpoint-rollback.md: reference new pattern
- hermes-agent-dev skill: architecture table

* chore: remove stale plan docs
											
										
										
											2026-03-16 23:21:03 -07:00
+								        if canonical == "plan":
-												feat: add /plan command (#1372)

* feat: add /plan command

* refactor: back /plan with bundled skill

* docs: document /plan skill
											
										
										
											2026-03-14 21:18:17 -07:00
+								            try:
 								                from agent.skill_commands import build_plan_path, build_skill_invocation_message
 								                user_instruction = event.get_command_args().strip()
 								                plan_path = build_plan_path(user_instruction)
 								                event.text = build_skill_invocation_message(
 								                    "/plan",
 								                    user_instruction,
 								                    task_id=_quick_key,
 								                    runtime_note=(
-												fix: save /plan output in workspace (#1381)
											
										
										
											2026-03-14 21:28:51 -07:00
+								                        "Save the markdown plan with write_file to this exact relative path "
 								                        f"inside the active workspace/backend cwd: {plan_path}"
-												feat: add /plan command (#1372)

* feat: add /plan command

* refactor: back /plan with bundled skill

* docs: document /plan skill
											
										
										
											2026-03-14 21:18:17 -07:00
+								                    ),
 								                )
 								                if not event.text:
 								                    return "Failed to load the bundled /plan skill."
-												refactor: centralize slash command registry (#1603)

* refactor: centralize slash command registry

Replace 7+ scattered command definition sites with a single
CommandDef registry in hermes_cli/commands.py. All downstream
consumers now derive from this registry:

- CLI process_command() resolves aliases via resolve_command()
- Gateway _known_commands uses GATEWAY_KNOWN_COMMANDS frozenset
- Gateway help text generated by gateway_help_lines()
- Telegram BotCommands generated by telegram_bot_commands()
- Slack subcommand map generated by slack_subcommand_map()

Adding a command or alias is now a one-line change to
COMMAND_REGISTRY instead of touching 6+ files.

Bugfixes included:
- Telegram now registers /rollback, /background (were missing)
- Slack now has /voice, /update, /reload-mcp (were missing)
- Gateway duplicate 'reasoning' dispatch (dead code) removed
- Gateway help text can no longer drift from CLI help

Backwards-compatible: COMMANDS and COMMANDS_BY_CATEGORY dicts are
rebuilt from the registry, so existing imports work unchanged.

* docs: update developer docs for centralized command registry

Update AGENTS.md with full 'Slash Command Registry' and 'Adding a
Slash Command' sections covering CommandDef fields, registry helpers,
and the one-line alias workflow.

Also update:
- CONTRIBUTING.md: commands.py description
- website/docs/reference/slash-commands.md: reference central registry
- docs/plans/centralize-command-registry.md: mark COMPLETED
- plans/checkpoint-rollback.md: reference new pattern
- hermes-agent-dev skill: architecture table

* chore: remove stale plan docs
											
										
										
											2026-03-16 23:21:03 -07:00
+								                canonical = None
-												feat: add /plan command (#1372)

* feat: add /plan command

* refactor: back /plan with bundled skill

* docs: document /plan skill
											
										
										
											2026-03-14 21:18:17 -07:00
+								            except Exception as e:
 								                logger.exception("Failed to prepare /plan command")
 								                return f"Failed to enter plan mode: {e}"
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
-												refactor: centralize slash command registry (#1603)

* refactor: centralize slash command registry

Replace 7+ scattered command definition sites with a single
CommandDef registry in hermes_cli/commands.py. All downstream
consumers now derive from this registry:

- CLI process_command() resolves aliases via resolve_command()
- Gateway _known_commands uses GATEWAY_KNOWN_COMMANDS frozenset
- Gateway help text generated by gateway_help_lines()
- Telegram BotCommands generated by telegram_bot_commands()
- Slack subcommand map generated by slack_subcommand_map()

Adding a command or alias is now a one-line change to
COMMAND_REGISTRY instead of touching 6+ files.

Bugfixes included:
- Telegram now registers /rollback, /background (were missing)
- Slack now has /voice, /update, /reload-mcp (were missing)
- Gateway duplicate 'reasoning' dispatch (dead code) removed
- Gateway help text can no longer drift from CLI help

Backwards-compatible: COMMANDS and COMMANDS_BY_CATEGORY dicts are
rebuilt from the registry, so existing imports work unchanged.

* docs: update developer docs for centralized command registry

Update AGENTS.md with full 'Slash Command Registry' and 'Adding a
Slash Command' sections covering CommandDef fields, registry helpers,
and the one-line alias workflow.

Also update:
- CONTRIBUTING.md: commands.py description
- website/docs/reference/slash-commands.md: reference central registry
- docs/plans/centralize-command-registry.md: mark COMPLETED
- plans/checkpoint-rollback.md: reference new pattern
- hermes-agent-dev skill: architecture table

* chore: remove stale plan docs
											
										
										
											2026-03-16 23:21:03 -07:00
+								        if canonical == "retry":
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
+								            return await self._handle_retry_command(event)
-												refactor: centralize slash command registry (#1603)

* refactor: centralize slash command registry

Replace 7+ scattered command definition sites with a single
CommandDef registry in hermes_cli/commands.py. All downstream
consumers now derive from this registry:

- CLI process_command() resolves aliases via resolve_command()
- Gateway _known_commands uses GATEWAY_KNOWN_COMMANDS frozenset
- Gateway help text generated by gateway_help_lines()
- Telegram BotCommands generated by telegram_bot_commands()
- Slack subcommand map generated by slack_subcommand_map()

Adding a command or alias is now a one-line change to
COMMAND_REGISTRY instead of touching 6+ files.

Bugfixes included:
- Telegram now registers /rollback, /background (were missing)
- Slack now has /voice, /update, /reload-mcp (were missing)
- Gateway duplicate 'reasoning' dispatch (dead code) removed
- Gateway help text can no longer drift from CLI help

Backwards-compatible: COMMANDS and COMMANDS_BY_CATEGORY dicts are
rebuilt from the registry, so existing imports work unchanged.

* docs: update developer docs for centralized command registry

Update AGENTS.md with full 'Slash Command Registry' and 'Adding a
Slash Command' sections covering CommandDef fields, registry helpers,
and the one-line alias workflow.

Also update:
- CONTRIBUTING.md: commands.py description
- website/docs/reference/slash-commands.md: reference central registry
- docs/plans/centralize-command-registry.md: mark COMPLETED
- plans/checkpoint-rollback.md: reference new pattern
- hermes-agent-dev skill: architecture table

* chore: remove stale plan docs
											
										
										
											2026-03-16 23:21:03 -07:00
+								        if canonical == "undo":
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
+								            return await self._handle_undo_command(event)
-												refactor: centralize slash command registry (#1603)

* refactor: centralize slash command registry

Replace 7+ scattered command definition sites with a single
CommandDef registry in hermes_cli/commands.py. All downstream
consumers now derive from this registry:

- CLI process_command() resolves aliases via resolve_command()
- Gateway _known_commands uses GATEWAY_KNOWN_COMMANDS frozenset
- Gateway help text generated by gateway_help_lines()
- Telegram BotCommands generated by telegram_bot_commands()
- Slack subcommand map generated by slack_subcommand_map()

Adding a command or alias is now a one-line change to
COMMAND_REGISTRY instead of touching 6+ files.

Bugfixes included:
- Telegram now registers /rollback, /background (were missing)
- Slack now has /voice, /update, /reload-mcp (were missing)
- Gateway duplicate 'reasoning' dispatch (dead code) removed
- Gateway help text can no longer drift from CLI help

Backwards-compatible: COMMANDS and COMMANDS_BY_CATEGORY dicts are
rebuilt from the registry, so existing imports work unchanged.

* docs: update developer docs for centralized command registry

Update AGENTS.md with full 'Slash Command Registry' and 'Adding a
Slash Command' sections covering CommandDef fields, registry helpers,
and the one-line alias workflow.

Also update:
- CONTRIBUTING.md: commands.py description
- website/docs/reference/slash-commands.md: reference central registry
- docs/plans/centralize-command-registry.md: mark COMPLETED
- plans/checkpoint-rollback.md: reference new pattern
- hermes-agent-dev skill: architecture table

* chore: remove stale plan docs
											
										
										
											2026-03-16 23:21:03 -07:00
+								        if canonical == "sethome":
-												feat: implement channel directory and message mirroring for cross-platform communication

- Introduced a new channel directory to cache reachable channels/contacts for messaging platforms, enhancing the send_message tool's ability to resolve human-friendly names to numeric IDs.
- Added functionality to mirror sent messages into the target's session transcript, providing context for cross-platform message delivery.
- Updated the send_message tool to support listing available targets and improved error handling for channel resolution.
- Enhanced the gateway to build and refresh the channel directory during startup and at regular intervals, ensuring up-to-date channel information.

											
										
										
											2026-02-22 20:44:15 -08:00
+								            return await self._handle_set_home_command(event)
-												feat(gateway): add /compress and /usage commands for conversation management

Implemented the /compress command to allow users to manually compress conversation context, ensuring sufficient history is available before execution. The /usage command was also added to display token usage statistics for the current session, including prompt and completion tokens. Updated command documentation to reflect these new features.

											
										
										
											2026-03-01 00:25:44 -08:00
-												refactor: centralize slash command registry (#1603)

* refactor: centralize slash command registry

Replace 7+ scattered command definition sites with a single
CommandDef registry in hermes_cli/commands.py. All downstream
consumers now derive from this registry:

- CLI process_command() resolves aliases via resolve_command()
- Gateway _known_commands uses GATEWAY_KNOWN_COMMANDS frozenset
- Gateway help text generated by gateway_help_lines()
- Telegram BotCommands generated by telegram_bot_commands()
- Slack subcommand map generated by slack_subcommand_map()

Adding a command or alias is now a one-line change to
COMMAND_REGISTRY instead of touching 6+ files.

Bugfixes included:
- Telegram now registers /rollback, /background (were missing)
- Slack now has /voice, /update, /reload-mcp (were missing)
- Gateway duplicate 'reasoning' dispatch (dead code) removed
- Gateway help text can no longer drift from CLI help

Backwards-compatible: COMMANDS and COMMANDS_BY_CATEGORY dicts are
rebuilt from the registry, so existing imports work unchanged.

* docs: update developer docs for centralized command registry

Update AGENTS.md with full 'Slash Command Registry' and 'Adding a
Slash Command' sections covering CommandDef fields, registry helpers,
and the one-line alias workflow.

Also update:
- CONTRIBUTING.md: commands.py description
- website/docs/reference/slash-commands.md: reference central registry
- docs/plans/centralize-command-registry.md: mark COMPLETED
- plans/checkpoint-rollback.md: reference new pattern
- hermes-agent-dev skill: architecture table

* chore: remove stale plan docs
											
										
										
											2026-03-16 23:21:03 -07:00
+								        if canonical == "compress":
-												feat(gateway): add /compress and /usage commands for conversation management

Implemented the /compress command to allow users to manually compress conversation context, ensuring sufficient history is available before execution. The /usage command was also added to display token usage statistics for the current session, including prompt and completion tokens. Updated command documentation to reflect these new features.

											
										
										
											2026-03-01 00:25:44 -08:00
+								            return await self._handle_compress_command(event)
-												refactor: centralize slash command registry (#1603)

* refactor: centralize slash command registry

Replace 7+ scattered command definition sites with a single
CommandDef registry in hermes_cli/commands.py. All downstream
consumers now derive from this registry:

- CLI process_command() resolves aliases via resolve_command()
- Gateway _known_commands uses GATEWAY_KNOWN_COMMANDS frozenset
- Gateway help text generated by gateway_help_lines()
- Telegram BotCommands generated by telegram_bot_commands()
- Slack subcommand map generated by slack_subcommand_map()

Adding a command or alias is now a one-line change to
COMMAND_REGISTRY instead of touching 6+ files.

Bugfixes included:
- Telegram now registers /rollback, /background (were missing)
- Slack now has /voice, /update, /reload-mcp (were missing)
- Gateway duplicate 'reasoning' dispatch (dead code) removed
- Gateway help text can no longer drift from CLI help

Backwards-compatible: COMMANDS and COMMANDS_BY_CATEGORY dicts are
rebuilt from the registry, so existing imports work unchanged.

* docs: update developer docs for centralized command registry

Update AGENTS.md with full 'Slash Command Registry' and 'Adding a
Slash Command' sections covering CommandDef fields, registry helpers,
and the one-line alias workflow.

Also update:
- CONTRIBUTING.md: commands.py description
- website/docs/reference/slash-commands.md: reference central registry
- docs/plans/centralize-command-registry.md: mark COMPLETED
- plans/checkpoint-rollback.md: reference new pattern
- hermes-agent-dev skill: architecture table

* chore: remove stale plan docs
											
										
										
											2026-03-16 23:21:03 -07:00
+								        if canonical == "usage":
-												feat(gateway): add /compress and /usage commands for conversation management

Implemented the /compress command to allow users to manually compress conversation context, ensuring sufficient history is available before execution. The /usage command was also added to display token usage statistics for the current session, including prompt and completion tokens. Updated command documentation to reflect these new features.

											
										
										
											2026-03-01 00:25:44 -08:00
+								            return await self._handle_usage_command(event)
-												feat(mcp): banner integration, /reload-mcp command, resources & prompts

Banner integration:
- MCP Servers section in CLI startup banner between Tools and Skills
- Shows each server with transport type, tool count, connection status
- Failed servers shown in red; section hidden when no MCP configured
- Summary line includes MCP server count
- Removed raw print() calls from discovery (banner handles display)

/reload-mcp command:
- New slash command in both CLI and gateway
- Disconnects all MCP servers, re-reads config.yaml, reconnects
- Reports what changed (added/removed/reconnected servers)
- Allows adding/removing MCP servers without restarting

Resources & Prompts support:
- 4 utility tools registered per server: list_resources, read_resource,
  list_prompts, get_prompt
- Exposes MCP Resources (data sources) and Prompts (templates) as tools
- Proper parameter schemas (uri for read_resource, name for get_prompt)
- Handles text and binary resource content
- 23 new tests covering schemas, handlers, and registration

Test coverage: 74 MCP tests total, 1186 tests pass overall.

											
										
										
											2026-03-02 19:15:59 -08:00
-												refactor: centralize slash command registry (#1603)

* refactor: centralize slash command registry

Replace 7+ scattered command definition sites with a single
CommandDef registry in hermes_cli/commands.py. All downstream
consumers now derive from this registry:

- CLI process_command() resolves aliases via resolve_command()
- Gateway _known_commands uses GATEWAY_KNOWN_COMMANDS frozenset
- Gateway help text generated by gateway_help_lines()
- Telegram BotCommands generated by telegram_bot_commands()
- Slack subcommand map generated by slack_subcommand_map()

Adding a command or alias is now a one-line change to
COMMAND_REGISTRY instead of touching 6+ files.

Bugfixes included:
- Telegram now registers /rollback, /background (were missing)
- Slack now has /voice, /update, /reload-mcp (were missing)
- Gateway duplicate 'reasoning' dispatch (dead code) removed
- Gateway help text can no longer drift from CLI help

Backwards-compatible: COMMANDS and COMMANDS_BY_CATEGORY dicts are
rebuilt from the registry, so existing imports work unchanged.

* docs: update developer docs for centralized command registry

Update AGENTS.md with full 'Slash Command Registry' and 'Adding a
Slash Command' sections covering CommandDef fields, registry helpers,
and the one-line alias workflow.

Also update:
- CONTRIBUTING.md: commands.py description
- website/docs/reference/slash-commands.md: reference central registry
- docs/plans/centralize-command-registry.md: mark COMPLETED
- plans/checkpoint-rollback.md: reference new pattern
- hermes-agent-dev skill: architecture table

* chore: remove stale plan docs
											
										
										
											2026-03-16 23:21:03 -07:00
+								        if canonical == "insights":
-												feat: add /insights command with usage analytics and cost estimation

Inspired by Claude Code's /insights, adapted for Hermes Agent's multi-platform
architecture. Analyzes session history from state.db to produce comprehensive
usage insights.

Features:
- Overview stats: sessions, messages, tokens, estimated cost, active time
- Model breakdown: per-model sessions, tokens, and cost estimation
- Platform breakdown: CLI vs Telegram vs Discord etc. (unique to Hermes)
- Tool usage ranking: most-used tools with percentages
- Activity patterns: day-of-week chart, peak hours, streaks
- Notable sessions: longest, most messages, most tokens, most tool calls
- Cost estimation: real pricing data for 25+ models (OpenAI, Anthropic,
  DeepSeek, Google, Meta) with fuzzy model name matching
- Configurable time window: --days flag (default 30)
- Source filtering: --source flag to filter by platform

Three entry points:
- /insights slash command in CLI (supports --days and --source flags)
- /insights slash command in gateway (compact markdown format)
- hermes insights CLI subcommand (standalone)

Includes 56 tests covering pricing helpers, format helpers, empty DB,
populated DB with multi-platform data, filtering, formatting, and edge cases.

											
										
										
											2026-03-06 14:04:59 -08:00
+								            return await self._handle_insights_command(event)
-												refactor: centralize slash command registry (#1603)

* refactor: centralize slash command registry

Replace 7+ scattered command definition sites with a single
CommandDef registry in hermes_cli/commands.py. All downstream
consumers now derive from this registry:

- CLI process_command() resolves aliases via resolve_command()
- Gateway _known_commands uses GATEWAY_KNOWN_COMMANDS frozenset
- Gateway help text generated by gateway_help_lines()
- Telegram BotCommands generated by telegram_bot_commands()
- Slack subcommand map generated by slack_subcommand_map()

Adding a command or alias is now a one-line change to
COMMAND_REGISTRY instead of touching 6+ files.

Bugfixes included:
- Telegram now registers /rollback, /background (were missing)
- Slack now has /voice, /update, /reload-mcp (were missing)
- Gateway duplicate 'reasoning' dispatch (dead code) removed
- Gateway help text can no longer drift from CLI help

Backwards-compatible: COMMANDS and COMMANDS_BY_CATEGORY dicts are
rebuilt from the registry, so existing imports work unchanged.

* docs: update developer docs for centralized command registry

Update AGENTS.md with full 'Slash Command Registry' and 'Adding a
Slash Command' sections covering CommandDef fields, registry helpers,
and the one-line alias workflow.

Also update:
- CONTRIBUTING.md: commands.py description
- website/docs/reference/slash-commands.md: reference central registry
- docs/plans/centralize-command-registry.md: mark COMPLETED
- plans/checkpoint-rollback.md: reference new pattern
- hermes-agent-dev skill: architecture table

* chore: remove stale plan docs
											
										
										
											2026-03-16 23:21:03 -07:00
+								        if canonical == "reload-mcp":
-												feat(mcp): banner integration, /reload-mcp command, resources & prompts

Banner integration:
- MCP Servers section in CLI startup banner between Tools and Skills
- Shows each server with transport type, tool count, connection status
- Failed servers shown in red; section hidden when no MCP configured
- Summary line includes MCP server count
- Removed raw print() calls from discovery (banner handles display)

/reload-mcp command:
- New slash command in both CLI and gateway
- Disconnects all MCP servers, re-reads config.yaml, reconnects
- Reports what changed (added/removed/reconnected servers)
- Allows adding/removing MCP servers without restarting

Resources & Prompts support:
- 4 utility tools registered per server: list_resources, read_resource,
  list_prompts, get_prompt
- Exposes MCP Resources (data sources) and Prompts (templates) as tools
- Proper parameter schemas (uri for read_resource, name for get_prompt)
- Handles text and binary resource content
- 23 new tests covering schemas, handlers, and registration

Test coverage: 74 MCP tests total, 1186 tests pass overall.

											
										
										
											2026-03-02 19:15:59 -08:00
+								            return await self._handle_reload_mcp_command(event)
-												feat: add /update slash command for gateway platforms

Adds a /update command to Telegram, Discord, and other gateway platforms
that runs `hermes update` to pull the latest code, update dependencies,
sync skills, and restart the gateway.

Implementation:
- Spawns `hermes update` in a separate systemd scope (systemd-run --user
  --scope) so the process survives the gateway restart that hermes update
  triggers at the end. Falls back to nohup if systemd-run is unavailable.
- Writes a marker file (.update_pending.json) with the originating
  platform and chat_id before spawning the update.
- On gateway startup, _send_update_notification() checks for the marker,
  reads the captured update output, sends the results back to the user,
  and cleans up.

Also:
- Registers /update as a Discord slash command
- Updates README.md, docs/messaging.md, docs/slash-commands.md
- Adds 18 tests covering handler, notification, and edge cases

											
										
										
											2026-03-05 01:20:58 -08:00
-												fix(gateway): replace bare text approval with /approve and /deny commands (#2002)

The gateway approval system previously intercepted bare 'yes'/'no' text
from the user's next message to approve/deny dangerous commands. This was
fragile and dangerous — if the agent asked a clarify question and the user
said 'yes' to answer it, the gateway would execute the pending dangerous
command instead. (Fixes #1888)

Changes:
- Remove bare text matching ('yes', 'y', 'approve', 'ok', etc.) from
  _handle_message approval check
- Add /approve and /deny as gateway-only slash commands in the command
  registry
- /approve supports scoping: /approve (one-time), /approve session,
  /approve always (permanent)
- Add 5-minute timeout for stale approvals
- Gateway appends structured instructions to the agent response when a
  dangerous command is pending, telling the user exactly how to respond
- 9 tests covering approve, deny, timeout, scoping, and verification
  that bare 'yes' no longer triggers execution

Credit to @solo386 and @FlyByNight69420 for identifying and reporting
this security issue in PR #1971 and issue #1888.

Co-authored-by: Test <test@test.com>
											
										
										
											2026-03-18 16:58:20 -07:00
+								        if canonical == "approve":
 								            return await self._handle_approve_command(event)
 								        if canonical == "deny":
 								            return await self._handle_deny_command(event)
-												refactor: centralize slash command registry (#1603)

* refactor: centralize slash command registry

Replace 7+ scattered command definition sites with a single
CommandDef registry in hermes_cli/commands.py. All downstream
consumers now derive from this registry:

- CLI process_command() resolves aliases via resolve_command()
- Gateway _known_commands uses GATEWAY_KNOWN_COMMANDS frozenset
- Gateway help text generated by gateway_help_lines()
- Telegram BotCommands generated by telegram_bot_commands()
- Slack subcommand map generated by slack_subcommand_map()

Adding a command or alias is now a one-line change to
COMMAND_REGISTRY instead of touching 6+ files.

Bugfixes included:
- Telegram now registers /rollback, /background (were missing)
- Slack now has /voice, /update, /reload-mcp (were missing)
- Gateway duplicate 'reasoning' dispatch (dead code) removed
- Gateway help text can no longer drift from CLI help

Backwards-compatible: COMMANDS and COMMANDS_BY_CATEGORY dicts are
rebuilt from the registry, so existing imports work unchanged.

* docs: update developer docs for centralized command registry

Update AGENTS.md with full 'Slash Command Registry' and 'Adding a
Slash Command' sections covering CommandDef fields, registry helpers,
and the one-line alias workflow.

Also update:
- CONTRIBUTING.md: commands.py description
- website/docs/reference/slash-commands.md: reference central registry
- docs/plans/centralize-command-registry.md: mark COMPLETED
- plans/checkpoint-rollback.md: reference new pattern
- hermes-agent-dev skill: architecture table

* chore: remove stale plan docs
											
										
										
											2026-03-16 23:21:03 -07:00
+								        if canonical == "update":
-												feat: add /update slash command for gateway platforms

Adds a /update command to Telegram, Discord, and other gateway platforms
that runs `hermes update` to pull the latest code, update dependencies,
sync skills, and restart the gateway.

Implementation:
- Spawns `hermes update` in a separate systemd scope (systemd-run --user
  --scope) so the process survives the gateway restart that hermes update
  triggers at the end. Falls back to nohup if systemd-run is unavailable.
- Writes a marker file (.update_pending.json) with the originating
  platform and chat_id before spawning the update.
- On gateway startup, _send_update_notification() checks for the marker,
  reads the captured update output, sends the results back to the user,
  and cleans up.

Also:
- Registers /update as a Discord slash command
- Updates README.md, docs/messaging.md, docs/slash-commands.md
- Adds 18 tests covering handler, notification, and edge cases

											
										
										
											2026-03-05 01:20:58 -08:00
+								            return await self._handle_update_command(event)
-												fix: harden session title system + add /title to gateway

- Empty string titles normalized to None (prevents uncaught IntegrityError
  when two sessions both get empty-string titles via the unique index)
- Escape SQL LIKE wildcards (%, _) in resolve_session_by_title and
  get_next_title_in_lineage to prevent false matches on titles like
  'test_project' matching 'testXproject #2'
- Optimize list_sessions_rich from N+2 queries to a single query with
  correlated subqueries (preview + last_active computed in SQL)
- Add /title slash command to gateway (Telegram, Discord, Slack, WhatsApp)
  with set and show modes, uniqueness conflict handling
- Add /title to gateway /help text and _known_commands
- 12 new tests: empty string normalization, multi-empty-title safety,
  SQL wildcard edge cases, gateway /title set/show/conflict/cross-platform

											
										
										
											2026-03-08 15:48:09 -07:00
-												feat: add /debug slash command for all platforms

Adds /debug as a slash command available in CLI, Telegram, Discord,
Slack, and all other gateway platforms. Uploads debug report + full
logs to paste services and returns shareable URLs.

- commands.py: CommandDef in Info category (no cli_only/gateway_only)
- gateway/run.py: async handler with run_in_executor for blocking I/O
- cli.py: dispatch in process_command to run_debug_share

											
										
										
											2026-04-12 18:08:45 -07:00
+								        if canonical == "debug":
 								            return await self._handle_debug_command(event)
-												refactor: centralize slash command registry (#1603)

* refactor: centralize slash command registry

Replace 7+ scattered command definition sites with a single
CommandDef registry in hermes_cli/commands.py. All downstream
consumers now derive from this registry:

- CLI process_command() resolves aliases via resolve_command()
- Gateway _known_commands uses GATEWAY_KNOWN_COMMANDS frozenset
- Gateway help text generated by gateway_help_lines()
- Telegram BotCommands generated by telegram_bot_commands()
- Slack subcommand map generated by slack_subcommand_map()

Adding a command or alias is now a one-line change to
COMMAND_REGISTRY instead of touching 6+ files.

Bugfixes included:
- Telegram now registers /rollback, /background (were missing)
- Slack now has /voice, /update, /reload-mcp (were missing)
- Gateway duplicate 'reasoning' dispatch (dead code) removed
- Gateway help text can no longer drift from CLI help

Backwards-compatible: COMMANDS and COMMANDS_BY_CATEGORY dicts are
rebuilt from the registry, so existing imports work unchanged.

* docs: update developer docs for centralized command registry

Update AGENTS.md with full 'Slash Command Registry' and 'Adding a
Slash Command' sections covering CommandDef fields, registry helpers,
and the one-line alias workflow.

Also update:
- CONTRIBUTING.md: commands.py description
- website/docs/reference/slash-commands.md: reference central registry
- docs/plans/centralize-command-registry.md: mark COMPLETED
- plans/checkpoint-rollback.md: reference new pattern
- hermes-agent-dev skill: architecture table

* chore: remove stale plan docs
											
										
										
											2026-03-16 23:21:03 -07:00
+								        if canonical == "title":
-												fix: harden session title system + add /title to gateway

- Empty string titles normalized to None (prevents uncaught IntegrityError
  when two sessions both get empty-string titles via the unique index)
- Escape SQL LIKE wildcards (%, _) in resolve_session_by_title and
  get_next_title_in_lineage to prevent false matches on titles like
  'test_project' matching 'testXproject #2'
- Optimize list_sessions_rich from N+2 queries to a single query with
  correlated subqueries (preview + last_active computed in SQL)
- Add /title slash command to gateway (Telegram, Discord, Slack, WhatsApp)
  with set and show modes, uniqueness conflict handling
- Add /title to gateway /help text and _known_commands
- 12 new tests: empty string normalization, multi-empty-title safety,
  SQL wildcard edge cases, gateway /title set/show/conflict/cross-platform

											
										
										
											2026-03-08 15:48:09 -07:00
+								            return await self._handle_title_command(event)
-												feat: add /resume command to gateway for switching to named sessions

Messaging users can now switch back to previously-named sessions:
- /resume My Project  — resolves the title (with auto-lineage) and
  restores that session's conversation history
- /resume (no args)   — lists recent titled sessions to choose from

Adds SessionStore.switch_session() which ends the current session and
points the session entry at the target session ID so the old transcript
is loaded on the next message. Running agents are cleared on switch.

Completes the session naming feature from PR #720 for gateway users.

8 new tests covering: name resolution, lineage auto-latest, already-on-
session check, nonexistent names, agent cleanup, no-DB fallback, and
listing titled sessions.

											
										
										
											2026-03-08 17:09:00 -07:00
-												refactor: centralize slash command registry (#1603)

* refactor: centralize slash command registry

Replace 7+ scattered command definition sites with a single
CommandDef registry in hermes_cli/commands.py. All downstream
consumers now derive from this registry:

- CLI process_command() resolves aliases via resolve_command()
- Gateway _known_commands uses GATEWAY_KNOWN_COMMANDS frozenset
- Gateway help text generated by gateway_help_lines()
- Telegram BotCommands generated by telegram_bot_commands()
- Slack subcommand map generated by slack_subcommand_map()

Adding a command or alias is now a one-line change to
COMMAND_REGISTRY instead of touching 6+ files.

Bugfixes included:
- Telegram now registers /rollback, /background (were missing)
- Slack now has /voice, /update, /reload-mcp (were missing)
- Gateway duplicate 'reasoning' dispatch (dead code) removed
- Gateway help text can no longer drift from CLI help

Backwards-compatible: COMMANDS and COMMANDS_BY_CATEGORY dicts are
rebuilt from the registry, so existing imports work unchanged.

* docs: update developer docs for centralized command registry

Update AGENTS.md with full 'Slash Command Registry' and 'Adding a
Slash Command' sections covering CommandDef fields, registry helpers,
and the one-line alias workflow.

Also update:
- CONTRIBUTING.md: commands.py description
- website/docs/reference/slash-commands.md: reference central registry
- docs/plans/centralize-command-registry.md: mark COMPLETED
- plans/checkpoint-rollback.md: reference new pattern
- hermes-agent-dev skill: architecture table

* chore: remove stale plan docs
											
										
										
											2026-03-16 23:21:03 -07:00
+								        if canonical == "resume":
-												feat: add /resume command to gateway for switching to named sessions

Messaging users can now switch back to previously-named sessions:
- /resume My Project  — resolves the title (with auto-lineage) and
  restores that session's conversation history
- /resume (no args)   — lists recent titled sessions to choose from

Adds SessionStore.switch_session() which ends the current session and
points the session entry at the target session ID so the old transcript
is loaded on the next message. Running agents are cleared on switch.

Completes the session naming feature from PR #720 for gateway users.

8 new tests covering: name resolution, lineage auto-latest, already-on-
session check, nonexistent names, agent cleanup, no-DB fallback, and
listing titled sessions.

											
										
										
											2026-03-08 17:09:00 -07:00
+								            return await self._handle_resume_command(event)
-												feat: filesystem checkpoints and /rollback command

Automatic filesystem snapshots before destructive file operations,
with user-facing rollback.  Inspired by PR #559 (by @alireza78a).

Architecture:
- Shadow git repos at ~/.hermes/checkpoints/{hash}/ via GIT_DIR
- CheckpointManager: take/list/restore, turn-scoped dedup, pruning
- Transparent — the LLM never sees it, no tool schema, no tokens
- Once per turn — only first write_file/patch triggers a snapshot

Integration:
- Config: checkpoints.enabled + checkpoints.max_snapshots
- CLI flag: hermes --checkpoints
- Trigger: run_agent.py _execute_tool_calls() before write_file/patch
- /rollback slash command in CLI + gateway (list, restore by number)
- Pre-rollback snapshot auto-created on restore (undo the undo)

Safety:
- Never blocks file operations — all errors silently logged
- Skips root dir, home dir, dirs >50K files
- Disables gracefully when git not installed
- Shadow repo completely isolated from project git

Tests: 35 new tests, all passing (2798 total suite)
Docs: feature page, config reference, CLI commands reference

											
										
										
											2026-03-10 00:49:15 -07:00
-												fix: clear ghost status-bar lines on terminal resize (#4960)

* feat: add /branch (/fork) command for session branching

Inspired by Claude Code's /branch command. Creates a copy of the current
session's conversation history in a new session, allowing the user to
explore a different approach without losing the original.

Works like 'git checkout -b' for conversations:
- /branch            — auto-generates a title from the parent session
- /branch my-idea    — uses a custom title
- /fork              — alias for /branch

Implementation:
- CLI: _handle_branch_command() in cli.py
- Gateway: _handle_branch_command() in gateway/run.py
- CommandDef with 'fork' alias in commands.py
- Uses existing parent_session_id field in session DB
- Uses get_next_title_in_lineage() for auto-numbered branches
- 14 tests covering session creation, history copy, parent links,
  title generation, edge cases, and agent sync

* fix: clear ghost status-bar lines on terminal resize

When the terminal shrinks (e.g. un-maximize), the emulator reflows
previously full-width rows (status bar, input rules) into multiple
narrower rows. prompt_toolkit's _on_resize only cursor_up()s by the
stored layout height, missing the extra rows from reflow — leaving
ghost duplicates of the status bar visible.

Fix: monkey-patch Application._on_resize to detect width shrinks,
calculate the extra rows created by reflow, and inflate the renderer's
cursor_pos.y so the erase moves up far enough to clear ghosts.
											
										
										
											2026-04-03 22:43:45 -07:00
+								        if canonical == "branch":
 								            return await self._handle_branch_command(event)
-												refactor: centralize slash command registry (#1603)

* refactor: centralize slash command registry

Replace 7+ scattered command definition sites with a single
CommandDef registry in hermes_cli/commands.py. All downstream
consumers now derive from this registry:

- CLI process_command() resolves aliases via resolve_command()
- Gateway _known_commands uses GATEWAY_KNOWN_COMMANDS frozenset
- Gateway help text generated by gateway_help_lines()
- Telegram BotCommands generated by telegram_bot_commands()
- Slack subcommand map generated by slack_subcommand_map()

Adding a command or alias is now a one-line change to
COMMAND_REGISTRY instead of touching 6+ files.

Bugfixes included:
- Telegram now registers /rollback, /background (were missing)
- Slack now has /voice, /update, /reload-mcp (were missing)
- Gateway duplicate 'reasoning' dispatch (dead code) removed
- Gateway help text can no longer drift from CLI help

Backwards-compatible: COMMANDS and COMMANDS_BY_CATEGORY dicts are
rebuilt from the registry, so existing imports work unchanged.

* docs: update developer docs for centralized command registry

Update AGENTS.md with full 'Slash Command Registry' and 'Adding a
Slash Command' sections covering CommandDef fields, registry helpers,
and the one-line alias workflow.

Also update:
- CONTRIBUTING.md: commands.py description
- website/docs/reference/slash-commands.md: reference central registry
- docs/plans/centralize-command-registry.md: mark COMPLETED
- plans/checkpoint-rollback.md: reference new pattern
- hermes-agent-dev skill: architecture table

* chore: remove stale plan docs
											
										
										
											2026-03-16 23:21:03 -07:00
+								        if canonical == "rollback":
-												feat: filesystem checkpoints and /rollback command

Automatic filesystem snapshots before destructive file operations,
with user-facing rollback.  Inspired by PR #559 (by @alireza78a).

Architecture:
- Shadow git repos at ~/.hermes/checkpoints/{hash}/ via GIT_DIR
- CheckpointManager: take/list/restore, turn-scoped dedup, pruning
- Transparent — the LLM never sees it, no tool schema, no tokens
- Once per turn — only first write_file/patch triggers a snapshot

Integration:
- Config: checkpoints.enabled + checkpoints.max_snapshots
- CLI flag: hermes --checkpoints
- Trigger: run_agent.py _execute_tool_calls() before write_file/patch
- /rollback slash command in CLI + gateway (list, restore by number)
- Pre-rollback snapshot auto-created on restore (undo the undo)

Safety:
- Never blocks file operations — all errors silently logged
- Skips root dir, home dir, dirs >50K files
- Disables gracefully when git not installed
- Shadow repo completely isolated from project git

Tests: 35 new tests, all passing (2798 total suite)
Docs: feature page, config reference, CLI commands reference

											
										
										
											2026-03-10 00:49:15 -07:00
+								            return await self._handle_rollback_command(event)
-												feat: add /background command to gateway and CLI commands registry

Add /background <prompt> to the gateway, allowing users on Telegram,
Discord, Slack, etc. to fire off a prompt in a separate agent session.
The result is delivered back to the same chat when done, without
modifying the active conversation history.

Implementation:
- _handle_background_command: validates input, spawns asyncio task
- _run_background_task: creates AIAgent in executor thread, delivers
  result (text, images, media files) back via the platform adapter
- Inherits model, toolsets, provider routing from gateway config
- Error handling with user-visible failure messages

Also adds /background to hermes_cli/commands.py registry so it
appears in /help and autocomplete.

Tests: 15 new tests covering usage, task creation, uniqueness,
multi-platform, error paths, and help/autocomplete integration.

											
										
										
											2026-03-11 02:41:36 -07:00
-												refactor: centralize slash command registry (#1603)

* refactor: centralize slash command registry

Replace 7+ scattered command definition sites with a single
CommandDef registry in hermes_cli/commands.py. All downstream
consumers now derive from this registry:

- CLI process_command() resolves aliases via resolve_command()
- Gateway _known_commands uses GATEWAY_KNOWN_COMMANDS frozenset
- Gateway help text generated by gateway_help_lines()
- Telegram BotCommands generated by telegram_bot_commands()
- Slack subcommand map generated by slack_subcommand_map()

Adding a command or alias is now a one-line change to
COMMAND_REGISTRY instead of touching 6+ files.

Bugfixes included:
- Telegram now registers /rollback, /background (were missing)
- Slack now has /voice, /update, /reload-mcp (were missing)
- Gateway duplicate 'reasoning' dispatch (dead code) removed
- Gateway help text can no longer drift from CLI help

Backwards-compatible: COMMANDS and COMMANDS_BY_CATEGORY dicts are
rebuilt from the registry, so existing imports work unchanged.

* docs: update developer docs for centralized command registry

Update AGENTS.md with full 'Slash Command Registry' and 'Adding a
Slash Command' sections covering CommandDef fields, registry helpers,
and the one-line alias workflow.

Also update:
- CONTRIBUTING.md: commands.py description
- website/docs/reference/slash-commands.md: reference central registry
- docs/plans/centralize-command-registry.md: mark COMPLETED
- plans/checkpoint-rollback.md: reference new pattern
- hermes-agent-dev skill: architecture table

* chore: remove stale plan docs
											
										
										
											2026-03-16 23:21:03 -07:00
+								        if canonical == "background":
-												feat: add /background command to gateway and CLI commands registry

Add /background <prompt> to the gateway, allowing users on Telegram,
Discord, Slack, etc. to fire off a prompt in a separate agent session.
The result is delivered back to the same chat when done, without
modifying the active conversation history.

Implementation:
- _handle_background_command: validates input, spawns asyncio task
- _run_background_task: creates AIAgent in executor thread, delivers
  result (text, images, media files) back via the platform adapter
- Inherits model, toolsets, provider routing from gateway config
- Error handling with user-visible failure messages

Also adds /background to hermes_cli/commands.py registry so it
appears in /help and autocomplete.

Tests: 15 new tests covering usage, task creation, uniqueness,
multi-platform, error paths, and help/autocomplete integration.

											
										
										
											2026-03-11 02:41:36 -07:00
+								            return await self._handle_background_command(event)
-												fix: /reasoning command — add gateway support, fix display, persist settings (#1031)

* fix: /reasoning command output ordering, display, and inline think extraction

Three issues with the /reasoning command:

1. Output interleaving: The command echo used print() while feedback
   used _cprint(), causing them to render out-of-order under
   prompt_toolkit's patch_stdout. Changed echo to use _cprint() so
   all output renders through the same path in correct order.

2. Reasoning display not working: /reasoning show toggled a flag
   but reasoning never appeared for models that embed thinking in
   inline <think> blocks rather than structured API fields. Added
   fallback extraction in _build_assistant_message to capture
   <think> block content as reasoning when no structured reasoning
   fields (reasoning, reasoning_content, reasoning_details) are
   present. This feeds into both the reasoning callback (during
   tool loops) and the post-response reasoning box display.

3. Feedback clarity: Added checkmarks to confirm actions, persisted
   show/hide to config (was session-only before), and aligned the
   status display for readability.

Tests: 7 new tests for inline think block extraction (41 total).

* feat: add /reasoning command to gateway (Telegram/Discord/etc)

The /reasoning command only existed in the CLI — messaging platforms
had no way to view or change reasoning settings. This adds:

1. /reasoning command handler in the gateway:
   - No args: shows current effort level and display state
   - /reasoning <level>: sets reasoning effort (none/low/medium/high/xhigh)
   - /reasoning show|hide: toggles reasoning display in responses
   - All changes saved to config.yaml immediately

2. Reasoning display in gateway responses:
   - When show_reasoning is enabled, prepends a 'Reasoning' block
     with the model's last_reasoning content before the response
   - Collapses long reasoning (>15 lines) to keep messages readable
   - Uses last_reasoning from run_conversation result dict

3. Plumbing:
   - Added _show_reasoning attribute loaded from config at startup
   - Propagated last_reasoning through _run_agent return dict
   - Added /reasoning to help text and known_commands set
   - Uses getattr for _show_reasoning to handle test stubs
											
										
										
											2026-03-12 05:38:19 -07:00
-												feat: add /btw command for ephemeral side questions (#4161)

Adds /btw <question> — ask a quick follow-up using the current
session context without interrupting the main conversation.

- Snapshots conversation history, answers with a no-tools agent
- Response is not persisted to session history or DB
- Runs in a background thread (CLI) / async task (gateway)
- Per-session guard prevents concurrent /btw in gateway

Implementation:
- model_tools.py: enabled_toolsets=[] now correctly means "no tools"
  (was falsy, fell through to default "all tools")
- run_agent.py: persist_session=False gates _persist_session()
- cli.py: _handle_btw_command (background thread, Rich panel output)
- gateway/run.py: _handle_btw_command + _run_btw_task (async task)
- hermes_cli/commands.py: CommandDef for "btw"

Inspired by PR #3504 by areu01or00, reimplemented cleanly on current
main with the enabled_toolsets=[] fix and without the __btw_no_tools__
hack.
											
										
										
											2026-03-30 21:10:05 -07:00
+								        if canonical == "btw":
 								            return await self._handle_btw_command(event)
-												refactor: centralize slash command registry (#1603)

* refactor: centralize slash command registry

Replace 7+ scattered command definition sites with a single
CommandDef registry in hermes_cli/commands.py. All downstream
consumers now derive from this registry:

- CLI process_command() resolves aliases via resolve_command()
- Gateway _known_commands uses GATEWAY_KNOWN_COMMANDS frozenset
- Gateway help text generated by gateway_help_lines()
- Telegram BotCommands generated by telegram_bot_commands()
- Slack subcommand map generated by slack_subcommand_map()

Adding a command or alias is now a one-line change to
COMMAND_REGISTRY instead of touching 6+ files.

Bugfixes included:
- Telegram now registers /rollback, /background (were missing)
- Slack now has /voice, /update, /reload-mcp (were missing)
- Gateway duplicate 'reasoning' dispatch (dead code) removed
- Gateway help text can no longer drift from CLI help

Backwards-compatible: COMMANDS and COMMANDS_BY_CATEGORY dicts are
rebuilt from the registry, so existing imports work unchanged.

* docs: update developer docs for centralized command registry

Update AGENTS.md with full 'Slash Command Registry' and 'Adding a
Slash Command' sections covering CommandDef fields, registry helpers,
and the one-line alias workflow.

Also update:
- CONTRIBUTING.md: commands.py description
- website/docs/reference/slash-commands.md: reference central registry
- docs/plans/centralize-command-registry.md: mark COMPLETED
- plans/checkpoint-rollback.md: reference new pattern
- hermes-agent-dev skill: architecture table

* chore: remove stale plan docs
											
										
										
											2026-03-16 23:21:03 -07:00
+								        if canonical == "voice":
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
+								            return await self._handle_voice_command(event)
-												fix(gateway): drain in-flight work before restart

											
										
										
											2026-04-10 10:19:17 -07:00
+								        if self._draining:
 								            return f"⏳ Gateway is {self._status_action_gerund()} and is not accepting new work right now."
-												feat(cli,gateway): add user-defined quick commands that bypass agent loop

Implements config-driven quick commands for both CLI and gateway that
execute locally without invoking the LLM.

Config example (~/.hermes/config.yaml):
  quick_commands:
    limits:
      type: exec
      command: /home/user/.local/bin/hermes-limits
    dn:
      type: exec
      command: echo daily-note

Changes:
- hermes_cli/config.py: add quick_commands: {} default
- cli.py: check quick_commands before skill commands in process_command()
- gateway/run.py: check quick_commands before skill commands in _handle_message()
- tests/test_quick_commands.py: 11 tests covering exec, timeout, unsupported type, missing command, priority over skills

Closes #744

											
										
										
											2026-03-09 07:38:06 +03:00
+								        # User-defined quick commands (bypass agent loop, no LLM call)
 								        if command:
-												fix(gateway): support quick commands from GatewayConfig

											
										
										
											2026-03-11 12:46:56 -07:00
+								            if isinstance(self.config, dict):
 								                quick_commands = self.config.get("quick_commands", {}) or {}
 								            else:
 								                quick_commands = getattr(self.config, "quick_commands", {}) or {}
-												fix(gateway): bridge quick commands into GatewayConfig runtime

Follow-up on salvaged PR #975.

Bridge quick_commands from config.yaml into load_gateway_config(),
normalize non-dict quick command config at runtime, and add coverage
for GatewayConfig round-trips plus config.yaml bridging. This makes the
GatewayConfig quick-command fix complete for the real user-facing config
path implicated by issue #973.

											
										
										
											2026-03-14 03:57:25 -07:00
+								            if not isinstance(quick_commands, dict):
 								                quick_commands = {}
-												feat(cli,gateway): add user-defined quick commands that bypass agent loop

Implements config-driven quick commands for both CLI and gateway that
execute locally without invoking the LLM.

Config example (~/.hermes/config.yaml):
  quick_commands:
    limits:
      type: exec
      command: /home/user/.local/bin/hermes-limits
    dn:
      type: exec
      command: echo daily-note

Changes:
- hermes_cli/config.py: add quick_commands: {} default
- cli.py: check quick_commands before skill commands in process_command()
- gateway/run.py: check quick_commands before skill commands in _handle_message()
- tests/test_quick_commands.py: 11 tests covering exec, timeout, unsupported type, missing command, priority over skills

Closes #744

											
										
										
											2026-03-09 07:38:06 +03:00
+								            if command in quick_commands:
 								                qcmd = quick_commands[command]
 								                if qcmd.get("type") == "exec":
 								                    exec_cmd = qcmd.get("command", "")
 								                    if exec_cmd:
 								                        try:
 								                            proc = await asyncio.create_subprocess_shell(
 								                                exec_cmd,
 								                                stdout=asyncio.subprocess.PIPE,
 								                                stderr=asyncio.subprocess.PIPE,
 								                            )
 								                            stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=30)
 								                            output = (stdout or stderr).decode().strip()
 								                            return output if output else "Command returned no output."
 								                        except asyncio.TimeoutError:
 								                            return "Quick command timed out (30s)."
 								                        except Exception as e:
 								                            return f"Quick command error: {e}"
 								                    else:
 								                        return f"Quick command '/{command}' has no command defined."
-												fix: thread safety for concurrent subagent delegation (#1672)

* fix: thread safety for concurrent subagent delegation

Four thread-safety fixes that prevent crashes and data races when
running multiple subagents concurrently via delegate_task:

1. Remove redirect_stdout/stderr from delegate_tool — mutating global
   sys.stdout races with the spinner thread when multiple children start
   concurrently, causing segfaults. Children already run with
   quiet_mode=True so the redirect was redundant.

2. Split _run_single_child into _build_child_agent (main thread) +
   _run_single_child (worker thread). AIAgent construction creates
   httpx/SSL clients which are not thread-safe to initialize
   concurrently.

3. Add threading.Lock to SessionDB — subagents share the parent's
   SessionDB and call create_session/append_message from worker threads
   with no synchronization.

4. Add _active_children_lock to AIAgent — interrupt() iterates
   _active_children while worker threads append/remove children.

5. Add _client_cache_lock to auxiliary_client — multiple subagent
   threads may resolve clients concurrently via call_llm().

Based on PR #1471 by peteromallet.

* feat: Honcho base_url override via config.yaml + quick command alias type

Two features salvaged from PR #1576:

1. Honcho base_url override: allows pointing Hermes at a remote
   self-hosted Honcho deployment via config.yaml:

     honcho:
       base_url: "http://192.168.x.x:8000"

   When set, this overrides the Honcho SDK's environment mapping
   (production/local), enabling LAN/VPN Honcho deployments without
   requiring the server to live on localhost. Uses config.yaml instead
   of env var (HONCHO_URL) per project convention.

2. Quick command alias type: adds a new 'alias' quick command type
   that rewrites to another slash command before normal dispatch:

     quick_commands:
       sc:
         type: alias
         target: /context

   Supports both CLI and gateway. Arguments are forwarded to the
   target command.

Based on PR #1576 by redhelix.

---------

Co-authored-by: peteromallet <peteromallet@users.noreply.github.com>
Co-authored-by: redhelix <redhelix@users.noreply.github.com>
											
										
										
											2026-03-17 02:53:33 -07:00
+								                elif qcmd.get("type") == "alias":
 								                    target = qcmd.get("target", "").strip()
 								                    if target:
 								                        target = target if target.startswith("/") else f"/{target}"
 								                        target_command = target.lstrip("/")
 								                        user_args = event.get_command_args().strip()
 								                        event.text = f"{target} {user_args}".strip()
 								                        command = target_command
 								                        # Fall through to normal command dispatch below
 								                    else:
 								                        return f"Quick command '/{command}' has no target defined."
-												feat(cli,gateway): add user-defined quick commands that bypass agent loop

Implements config-driven quick commands for both CLI and gateway that
execute locally without invoking the LLM.

Config example (~/.hermes/config.yaml):
  quick_commands:
    limits:
      type: exec
      command: /home/user/.local/bin/hermes-limits
    dn:
      type: exec
      command: echo daily-note

Changes:
- hermes_cli/config.py: add quick_commands: {} default
- cli.py: check quick_commands before skill commands in process_command()
- gateway/run.py: check quick_commands before skill commands in _handle_message()
- tests/test_quick_commands.py: 11 tests covering exec, timeout, unsupported type, missing command, priority over skills

Closes #744

											
										
										
											2026-03-09 07:38:06 +03:00
+								                else:
-												fix: thread safety for concurrent subagent delegation (#1672)

* fix: thread safety for concurrent subagent delegation

Four thread-safety fixes that prevent crashes and data races when
running multiple subagents concurrently via delegate_task:

1. Remove redirect_stdout/stderr from delegate_tool — mutating global
   sys.stdout races with the spinner thread when multiple children start
   concurrently, causing segfaults. Children already run with
   quiet_mode=True so the redirect was redundant.

2. Split _run_single_child into _build_child_agent (main thread) +
   _run_single_child (worker thread). AIAgent construction creates
   httpx/SSL clients which are not thread-safe to initialize
   concurrently.

3. Add threading.Lock to SessionDB — subagents share the parent's
   SessionDB and call create_session/append_message from worker threads
   with no synchronization.

4. Add _active_children_lock to AIAgent — interrupt() iterates
   _active_children while worker threads append/remove children.

5. Add _client_cache_lock to auxiliary_client — multiple subagent
   threads may resolve clients concurrently via call_llm().

Based on PR #1471 by peteromallet.

* feat: Honcho base_url override via config.yaml + quick command alias type

Two features salvaged from PR #1576:

1. Honcho base_url override: allows pointing Hermes at a remote
   self-hosted Honcho deployment via config.yaml:

     honcho:
       base_url: "http://192.168.x.x:8000"

   When set, this overrides the Honcho SDK's environment mapping
   (production/local), enabling LAN/VPN Honcho deployments without
   requiring the server to live on localhost. Uses config.yaml instead
   of env var (HONCHO_URL) per project convention.

2. Quick command alias type: adds a new 'alias' quick command type
   that rewrites to another slash command before normal dispatch:

     quick_commands:
       sc:
         type: alias
         target: /context

   Supports both CLI and gateway. Arguments are forwarded to the
   target command.

Based on PR #1576 by redhelix.

---------

Co-authored-by: peteromallet <peteromallet@users.noreply.github.com>
Co-authored-by: redhelix <redhelix@users.noreply.github.com>
											
										
										
											2026-03-17 02:53:33 -07:00
+								                    return f"Quick command '/{command}' has unsupported type (supported: 'exec', 'alias')."
-												feat(cli,gateway): add user-defined quick commands that bypass agent loop

Implements config-driven quick commands for both CLI and gateway that
execute locally without invoking the LLM.

Config example (~/.hermes/config.yaml):
  quick_commands:
    limits:
      type: exec
      command: /home/user/.local/bin/hermes-limits
    dn:
      type: exec
      command: echo daily-note

Changes:
- hermes_cli/config.py: add quick_commands: {} default
- cli.py: check quick_commands before skill commands in process_command()
- gateway/run.py: check quick_commands before skill commands in _handle_message()
- tests/test_quick_commands.py: 11 tests covering exec, timeout, unsupported type, missing command, priority over skills

Closes #744

											
										
										
											2026-03-09 07:38:06 +03:00
-												feat(plugins): add slash command registration for plugins (#2359)

Plugins can now register slash commands via ctx.register_command()
in their register() function. Commands automatically appear in:
- /help and COMMANDS_BY_CATEGORY (under 'Plugins' category)
- Tab autocomplete in CLI
- Telegram bot menu
- Slack subcommand mapping
- Gateway dispatch

Handler signature: handler(args: str) -> str | None
Async handlers are supported in gateway context.

Changes:
- commands.py: add register_plugin_command() and rebuild_lookups()
- plugins.py: add register_command() to PluginContext, track in
  PluginManager._plugin_commands and LoadedPlugin.commands_registered
- cli.py: dispatch plugin commands in process_command()
- gateway/run.py: dispatch plugin commands before skill commands
- tests: 5 new tests for registration, help, tracking, handler, gateway
- docs: update plugins feature page and build guide
											
										
										
											2026-03-21 16:00:30 -07:00
+								        # Plugin-registered slash commands
 								        if command:
 								            try:
 								                from hermes_cli.plugins import get_plugin_command_handler
-												fix(gateway): resolve Telegram's underscored /commands to skill/plugin keys

Telegram's Bot API disallows hyphens in command names, so
_build_telegram_menu registers /claude-code as /claude_code. When the
user taps it from autocomplete, the gateway dispatch did a direct
lookup against skill_cmds (keyed on the hyphenated form) and missed,
silently falling through to the LLM as plain text. The model would
then typically call delegate_task, spawning a Hermes subagent instead
of invoking the intended skill.

Normalize underscores to hyphens in skill and plugin command lookup,
matching the existing pattern in _check_unavailable_skill.

											
										
										
											2026-04-05 09:35:26 +00:00
+								                # Normalize underscores to hyphens so Telegram's underscored
 								                # autocomplete form matches plugin commands registered with
 								                # hyphens. See hermes_cli/commands.py:_build_telegram_menu.
 								                plugin_handler = get_plugin_command_handler(command.replace("_", "-"))
-												feat(plugins): add slash command registration for plugins (#2359)

Plugins can now register slash commands via ctx.register_command()
in their register() function. Commands automatically appear in:
- /help and COMMANDS_BY_CATEGORY (under 'Plugins' category)
- Tab autocomplete in CLI
- Telegram bot menu
- Slack subcommand mapping
- Gateway dispatch

Handler signature: handler(args: str) -> str | None
Async handlers are supported in gateway context.

Changes:
- commands.py: add register_plugin_command() and rebuild_lookups()
- plugins.py: add register_command() to PluginContext, track in
  PluginManager._plugin_commands and LoadedPlugin.commands_registered
- cli.py: dispatch plugin commands in process_command()
- gateway/run.py: dispatch plugin commands before skill commands
- tests: 5 new tests for registration, help, tracking, handler, gateway
- docs: update plugins feature page and build guide
											
										
										
											2026-03-21 16:00:30 -07:00
+								                if plugin_handler:
 								                    user_args = event.get_command_args().strip()
 								                    import asyncio as _aio
 								                    result = plugin_handler(user_args)
 								                    if _aio.iscoroutine(result):
 								                        result = await result
 								                    return str(result) if result else None
 								            except Exception as e:
 								                logger.debug("Plugin command dispatch failed (non-fatal): %s", e)
-												fix(gateway): resolve Telegram's underscored /commands to skill/plugin keys

Telegram's Bot API disallows hyphens in command names, so
_build_telegram_menu registers /claude-code as /claude_code. When the
user taps it from autocomplete, the gateway dispatch did a direct
lookup against skill_cmds (keyed on the hyphenated form) and missed,
silently falling through to the LLM as plain text. The model would
then typically call delegate_task, spawning a Hermes subagent instead
of invoking the intended skill.

Normalize underscores to hyphens in skill and plugin command lookup,
matching the existing pattern in _check_unavailable_skill.

											
										
										
											2026-04-05 09:35:26 +00:00
+								        # Skill slash commands: /skill-name loads the skill and sends to agent.
 								        # resolve_skill_command_key() handles the Telegram underscore/hyphen
 								        # round-trip so /claude_code from Telegram autocomplete still resolves
 								        # to the claude-code skill.
-												feat(skills): implement dynamic skill slash commands for CLI and gateway

											
										
										
											2026-02-28 11:18:50 -08:00
+								        if command:
 								            try:
-												fix(gateway): resolve Telegram's underscored /commands to skill/plugin keys

Telegram's Bot API disallows hyphens in command names, so
_build_telegram_menu registers /claude-code as /claude_code. When the
user taps it from autocomplete, the gateway dispatch did a direct
lookup against skill_cmds (keyed on the hyphenated form) and missed,
silently falling through to the LLM as plain text. The model would
then typically call delegate_task, spawning a Hermes subagent instead
of invoking the intended skill.

Normalize underscores to hyphens in skill and plugin command lookup,
matching the existing pattern in _check_unavailable_skill.

											
										
										
											2026-04-05 09:35:26 +00:00
+								                from agent.skill_commands import (
 								                    get_skill_commands,
 								                    build_skill_invocation_message,
 								                    resolve_skill_command_key,
 								                )
-												feat(skills): implement dynamic skill slash commands for CLI and gateway

											
										
										
											2026-02-28 11:18:50 -08:00
+								                skill_cmds = get_skill_commands()
-												fix(gateway): resolve Telegram's underscored /commands to skill/plugin keys

Telegram's Bot API disallows hyphens in command names, so
_build_telegram_menu registers /claude-code as /claude_code. When the
user taps it from autocomplete, the gateway dispatch did a direct
lookup against skill_cmds (keyed on the hyphenated form) and missed,
silently falling through to the LLM as plain text. The model would
then typically call delegate_task, spawning a Hermes subagent instead
of invoking the intended skill.

Normalize underscores to hyphens in skill and plugin command lookup,
matching the existing pattern in _check_unavailable_skill.

											
										
										
											2026-04-05 09:35:26 +00:00
+								                cmd_key = resolve_skill_command_key(command)
 								                if cmd_key is not None:
-												fix: respect per-platform disabled skills in Telegram menu and gateway dispatch (#4799)

Three interconnected bugs caused `hermes skills config` per-platform
settings to be silently ignored:

1. telegram_menu_commands() never filtered disabled skills — all skills
   consumed menu slots regardless of platform config, hitting Telegram's
   100 command cap. Now loads disabled skills for 'telegram' and excludes
   them from the menu.

2. Gateway skill dispatch executed disabled skills because
   get_skill_commands() (process-global cache) only filters by the global
   disabled list at scan time. Added per-platform check before execution,
   returning an actionable 'skill is disabled' message.

3. get_disabled_skill_names() only checked HERMES_PLATFORM env var, but
   the gateway sets HERMES_SESSION_PLATFORM instead. Added
   HERMES_SESSION_PLATFORM as fallback, plus an explicit platform=
   parameter for callers that know their platform (menu builder, gateway
   dispatch). Also added platform to prompt_builder's skills cache key
   so multi-platform gateways get correct per-platform skill prompts.

Reported by SteveSkedasticity (CLAW community).
											
										
										
											2026-04-03 10:10:53 -07:00
+								                    # Check per-platform disabled status before executing.
 								                    # get_skill_commands() only applies the *global* disabled
 								                    # list at scan time; per-platform overrides need checking
 								                    # here because the cache is process-global across platforms.
 								                    _skill_name = skill_cmds[cmd_key].get("name", "")
 								                    _plat = source.platform.value if source.platform else None
 								                    if _plat and _skill_name:
 								                        from agent.skill_utils import get_disabled_skill_names as _get_plat_disabled
 								                        if _skill_name in _get_plat_disabled(platform=_plat):
 								                            return (
 								                                f"The **{_skill_name}** skill is disabled for {_plat}.\n"
 								                                f"Enable it with: `hermes skills config`"
 								                            )
-												feat(skills): implement dynamic skill slash commands for CLI and gateway

											
										
										
											2026-02-28 11:18:50 -08:00
+								                    user_instruction = event.get_command_args().strip()
-												feat: secure skill env setup on load (core #688)

When a skill declares required_environment_variables in its YAML
frontmatter, missing env vars trigger a secure TUI prompt (identical
to the sudo password widget) when the skill is loaded. Secrets flow
directly to ~/.hermes/.env, never entering LLM context.

Key changes:
- New required_environment_variables frontmatter field for skills
- Secure TUI widget (masked input, 120s timeout)
- Gateway safety: messaging platforms show local setup guidance
- Legacy prerequisites.env_vars normalized into new format
- Remote backend handling: conservative setup_needed=True
- Env var name validation, file permissions hardened to 0o600
- Redact patterns extended for secret-related JSON fields
- 12 existing skills updated with prerequisites declarations
- ~48 new tests covering skip, timeout, gateway, remote backends
- Dynamic panel widget sizing (fixes hardcoded width from original PR)

Cherry-picked from PR #723 by kshitijk4poor, rebased onto current main
with conflict resolution.

Fixes #688

Co-authored-by: kshitijk4poor <kshitijk4poor@users.noreply.github.com>

											
										
										
											2026-03-13 03:14:04 -07:00
+								                    msg = build_skill_invocation_message(
-												fix(gateway): use correct variable for skill slash command task_id

Line 1482 referenced 'session_key' which is not defined until line 1519,
causing a NameError on every skill slash command invocation in the gateway
(e.g. /deploy, /plan-with-skill). The try/except silently swallowed the
error, making all user-defined skill slash commands silently fail.

The correct variable is '_quick_key', defined at line 1292 (same variable
used by the /plan handler on line 1379).

											
										
										
											2026-03-17 03:42:15 -07:00
+								                        cmd_key, user_instruction, task_id=_quick_key
-												feat: secure skill env setup on load (core #688)

When a skill declares required_environment_variables in its YAML
frontmatter, missing env vars trigger a secure TUI prompt (identical
to the sudo password widget) when the skill is loaded. Secrets flow
directly to ~/.hermes/.env, never entering LLM context.

Key changes:
- New required_environment_variables frontmatter field for skills
- Secure TUI widget (masked input, 120s timeout)
- Gateway safety: messaging platforms show local setup guidance
- Legacy prerequisites.env_vars normalized into new format
- Remote backend handling: conservative setup_needed=True
- Env var name validation, file permissions hardened to 0o600
- Redact patterns extended for secret-related JSON fields
- 12 existing skills updated with prerequisites declarations
- ~48 new tests covering skip, timeout, gateway, remote backends
- Dynamic panel widget sizing (fixes hardcoded width from original PR)

Cherry-picked from PR #723 by kshitijk4poor, rebased onto current main
with conflict resolution.

Fixes #688

Co-authored-by: kshitijk4poor <kshitijk4poor@users.noreply.github.com>

											
										
										
											2026-03-13 03:14:04 -07:00
+								                    )
-												feat(skills): implement dynamic skill slash commands for CLI and gateway

											
										
										
											2026-02-28 11:18:50 -08:00
+								                    if msg:
 								                        event.text = msg
 								                        # Fall through to normal message processing with skill content
-												feat(gateway): skill-aware slash commands, paginated /commands, Telegram 100-cap (#3934)

* feat(gateway): skill-aware slash commands, paginated /commands, Telegram 100-cap

Map active skills to Telegram's slash command menu so users can
discover and invoke skills directly. Three changes:

1. Telegram menu now includes active skill commands alongside built-in
   commands, capped at 100 entries (Telegram Bot API limit). Overflow
   commands remain callable but hidden from the picker. Logged at
   startup when cap is hit.

2. New /commands [page] gateway command for paginated browsing of all
   commands + skills. /help now shows first 10 skill commands and
   points to /commands for the full list.

3. When a user types a slash command that matches a disabled or
   uninstalled skill, they get actionable guidance:
   - Disabled: 'Enable it with: hermes skills config'
   - Optional (not installed): 'Install with: hermes skills install official/<path>'

Built on ideas from PR #3921 by @kshitijk4poor.

* chore: move 21 niche skills to optional-skills

Move specialized/niche skills from built-in (skills/) to optional
(optional-skills/) to reduce the default skill count. Users can
install them with: hermes skills install official/<category>/<name>

Moved skills (21):
- mlops: accelerate, chroma, faiss, flash-attention,
  hermes-atropos-environments, huggingface-tokenizers, instructor,
  lambda-labs, llava, nemo-curator, pinecone, pytorch-lightning,
  qdrant, saelens, simpo, slime, tensorrt-llm, torchtitan
- research: domain-intel, duckduckgo-search
- devops: inference-sh cli

Built-in skills: 96 → 75
Optional skills: 22 → 43

* fix: only include repo built-in skills in Telegram menu, not user-installed

User-installed skills (from hub or manually added) stay accessible via
/skills and by typing the command directly, but don't get registered
in the Telegram slash command picker. Only skills whose SKILL.md is
under the repo's skills/ directory are included in the menu.

This keeps the Telegram menu focused on the curated built-in set while
user-installed skills remain discoverable through /skills and /commands.
											
										
										
											2026-03-30 10:57:30 -07:00
+								                else:
 								                    # Not an active skill — check if it's a known-but-disabled or
 								                    # uninstalled skill and give actionable guidance.
 								                    _unavail_msg = _check_unavailable_skill(command)
 								                    if _unavail_msg:
 								                        return _unavail_msg
-												fix(gateway): surface unknown /commands instead of leaking them to the LLM

Previously, typing a /command that isn't a built-in, plugin, or skill
would silently fall through to the LLM as plain text. The model often
interprets it as a loose instruction and invents unrelated tool calls —
e.g. a stray /claude_code slipped through and the model fabricated a
delegate_task invocation that got stuck in an OAuth loop.

Now we check GATEWAY_KNOWN_COMMANDS after the skill / plugin /
unavailable-skill lookups and return an actionable message pointing the
user at /commands. The user gets feedback, and the agent doesn't waste
a round-trip guessing what /foo-bar was supposed to mean.

											
										
										
											2026-04-05 10:09:01 +00:00
+								                    # Genuinely unrecognized /command: not a built-in, not a
 								                    # plugin, not a skill, not a known-inactive skill. Warn
 								                    # the user instead of silently forwarding it to the LLM
 								                    # as free text (which leads to silent-failure behavior
 								                    # like the model inventing a delegate_task call).
 								                    # Normalize to hyphenated form before checking known
 								                    # built-ins (command may be an alias target set by the
 								                    # quick-command block above, so _cmd_def can be stale).
 								                    if command.replace("_", "-") not in GATEWAY_KNOWN_COMMANDS:
 								                        logger.warning(
 								                            "Unrecognized slash command /%s from %s — "
-												fix(gateway): correct misleading log text for unknown /commands

The warning said 'forwarding as plain text' but the code returns a
user-facing error reply instead of forwarding. Describe what actually
happens.

											
										
										
											2026-04-05 10:15:59 +00:00
+								                            "replying with unknown-command notice",
-												fix(gateway): surface unknown /commands instead of leaking them to the LLM

Previously, typing a /command that isn't a built-in, plugin, or skill
would silently fall through to the LLM as plain text. The model often
interprets it as a loose instruction and invents unrelated tool calls —
e.g. a stray /claude_code slipped through and the model fabricated a
delegate_task invocation that got stuck in an OAuth loop.

Now we check GATEWAY_KNOWN_COMMANDS after the skill / plugin /
unavailable-skill lookups and return an actionable message pointing the
user at /commands. The user gets feedback, and the agent doesn't waste
a round-trip guessing what /foo-bar was supposed to mean.

											
										
										
											2026-04-05 10:09:01 +00:00
+								                            command,
 								                            source.platform.value if source.platform else "?",
 								                        )
 								                        return (
 								                            f"Unknown command `/{command}`. "
 								                            f"Type /commands to see what's available, "
 								                            f"or resend without the leading slash to send "
 								                            f"as a regular message."
 								                        )
-												feat(skills): implement dynamic skill slash commands for CLI and gateway

											
										
										
											2026-02-28 11:18:50 -08:00
+								            except Exception as e:
 								                logger.debug("Skill command check failed (non-fatal): %s", e)
-												feat: implement channel directory and message mirroring for cross-platform communication

- Introduced a new channel directory to cache reachable channels/contacts for messaging platforms, enhancing the send_message tool's ability to resolve human-friendly names to numeric IDs.
- Added functionality to mirror sent messages into the target's session transcript, providing context for cross-platform message delivery.
- Updated the send_message tool to support listing available targets and improved error handling for channel resolution.
- Enhanced the gateway to build and refresh the channel directory during startup and at regular intervals, ensuring up-to-date channel information.

											
										
										
											2026-02-22 20:44:15 -08:00
-												fix(gateway): replace bare text approval with /approve and /deny commands (#2002)

The gateway approval system previously intercepted bare 'yes'/'no' text
from the user's next message to approve/deny dangerous commands. This was
fragile and dangerous — if the agent asked a clarify question and the user
said 'yes' to answer it, the gateway would execute the pending dangerous
command instead. (Fixes #1888)

Changes:
- Remove bare text matching ('yes', 'y', 'approve', 'ok', etc.) from
  _handle_message approval check
- Add /approve and /deny as gateway-only slash commands in the command
  registry
- /approve supports scoping: /approve (one-time), /approve session,
  /approve always (permanent)
- Add 5-minute timeout for stale approvals
- Gateway appends structured instructions to the agent response when a
  dangerous command is pending, telling the user exactly how to respond
- 9 tests covering approve, deny, timeout, scoping, and verification
  that bare 'yes' no longer triggers execution

Credit to @solo386 and @FlyByNight69420 for identifying and reporting
this security issue in PR #1971 and issue #1888.

Co-authored-by: Test <test@test.com>
											
										
										
											2026-03-18 16:58:20 -07:00
+								        # Pending exec approvals are handled by /approve and /deny commands above.
 								        # No bare text matching — "yes" in normal conversation must not trigger
 								        # execution of a dangerous command.
-												fix(gateway): prevent concurrent agent runs for the same session

Place a sentinel in _running_agents immediately after the "already
running" guard check passes — before any await.  Without this, the
numerous await points between the guard (line 1324) and agent
registration (track_agent at line 4790) create a window where a
second message for the same session can bypass the guard and start
a duplicate agent, corrupting the transcript.

The await gap includes: hook emissions, vision enrichment (external
API call), audio transcription (external API call), session hygiene
compression, and the run_in_executor call itself.  For messages with
media attachments the window can be several seconds wide.

The sentinel is wrapped in try/finally so it is always cleaned up —
even if the handler raises or takes an early-return path.  When the
real AIAgent is created, track_agent() overwrites the sentinel with
the actual instance (preserving interrupt support).

Also handles the edge case where a message arrives while the sentinel
is set but no real agent exists yet: the message is queued via the
adapter's pending-message mechanism instead of attempting to call
interrupt() on the sentinel object.

											
										
										
											2026-03-19 22:32:37 +03:00
 								        # ── Claim this session before any await ───────────────────────
 								        # Between here and _run_agent registering the real AIAgent, there
 								        # are numerous await points (hooks, vision enrichment, STT,
 								        # session hygiene compression).  Without this sentinel a second
 								        # message arriving during any of those yields would pass the
 								        # "already running" guard and spin up a duplicate agent for the
 								        # same session — corrupting the transcript.
 								        self._running_agents[_quick_key] = _AGENT_PENDING_SENTINEL
-												refactor: simplify and harden PR fixes after review

- Fix cron ThreadPoolExecutor blocking on timeout: use shutdown(wait=False,
  cancel_futures=True) instead of context manager that waits indefinitely
- Extract _dequeue_pending_text() to deduplicate media-placeholder logic
  in interrupt and normal-completion dequeue paths
- Remove hasattr guards for _running_agents_ts: add class-level default
  so partial test construction works without scattered defensive checks
- Move `import concurrent.futures` to top of cron/scheduler.py
- Progress throttle: sleep remaining interval instead of busy-looping
  0.1s (~15 wakeups per 1.5s window → 1 wakeup)
- Deduplicate _load_stt_config() in transcription_tools.py:
  _has_openai_audio_backend() now delegates to _resolve_openai_audio_client_config()

											
										
										
											2026-04-02 23:41:38 +05:30
+								        self._running_agents_ts[_quick_key] = time.time()
-												fix(gateway): prevent concurrent agent runs for the same session

Place a sentinel in _running_agents immediately after the "already
running" guard check passes — before any await.  Without this, the
numerous await points between the guard (line 1324) and agent
registration (track_agent at line 4790) create a window where a
second message for the same session can bypass the guard and start
a duplicate agent, corrupting the transcript.

The await gap includes: hook emissions, vision enrichment (external
API call), audio transcription (external API call), session hygiene
compression, and the run_in_executor call itself.  For messages with
media attachments the window can be several seconds wide.

The sentinel is wrapped in try/finally so it is always cleaned up —
even if the handler raises or takes an early-return path.  When the
real AIAgent is created, track_agent() overwrites the sentinel with
the actual instance (preserving interrupt support).

Also handles the edge case where a message arrives while the sentinel
is set but no real agent exists yet: the message is queued via the
adapter's pending-message mechanism instead of attempting to call
interrupt() on the sentinel object.

											
										
										
											2026-03-19 22:32:37 +03:00
 								        try:
 								            return await self._handle_message_with_agent(event, source, _quick_key)
 								        finally:
 								            # If _run_agent replaced the sentinel with a real agent and
 								            # then cleaned it up, this is a no-op.  If we exited early
 								            # (exception, command fallthrough, etc.) the sentinel must
 								            # not linger or the session would be permanently locked out.
 								            if self._running_agents.get(_quick_key) is _AGENT_PENDING_SENTINEL:
 								                del self._running_agents[_quick_key]
-												refactor: simplify and harden PR fixes after review

- Fix cron ThreadPoolExecutor blocking on timeout: use shutdown(wait=False,
  cancel_futures=True) instead of context manager that waits indefinitely
- Extract _dequeue_pending_text() to deduplicate media-placeholder logic
  in interrupt and normal-completion dequeue paths
- Remove hasattr guards for _running_agents_ts: add class-level default
  so partial test construction works without scattered defensive checks
- Move `import concurrent.futures` to top of cron/scheduler.py
- Progress throttle: sleep remaining interval instead of busy-looping
  0.1s (~15 wakeups per 1.5s window → 1 wakeup)
- Deduplicate _load_stt_config() in transcription_tools.py:
  _has_openai_audio_backend() now delegates to _resolve_openai_audio_client_config()

											
										
										
											2026-04-02 23:41:38 +05:30
+								            self._running_agents_ts.pop(_quick_key, None)
-												fix(gateway): prevent concurrent agent runs for the same session

Place a sentinel in _running_agents immediately after the "already
running" guard check passes — before any await.  Without this, the
numerous await points between the guard (line 1324) and agent
registration (track_agent at line 4790) create a window where a
second message for the same session can bypass the guard and start
a duplicate agent, corrupting the transcript.

The await gap includes: hook emissions, vision enrichment (external
API call), audio transcription (external API call), session hygiene
compression, and the run_in_executor call itself.  For messages with
media attachments the window can be several seconds wide.

The sentinel is wrapped in try/finally so it is always cleaned up —
even if the handler raises or takes an early-return path.  When the
real AIAgent is created, track_agent() overwrites the sentinel with
the actual instance (preserving interrupt support).

Also handles the edge case where a message arrives while the sentinel
is set but no real agent exists yet: the message is queued via the
adapter's pending-message mechanism instead of attempting to call
interrupt() on the sentinel object.

											
										
										
											2026-03-19 22:32:37 +03:00
-												fix(gateway): preserve queued voice events for STT

											
										
										
											2026-04-11 21:36:05 +01:00
+								    async def _prepare_inbound_message_text(
 								        self,
 								        *,
 								        event: MessageEvent,
 								        source: SessionSource,
 								        history: List[Dict[str, Any]],
 								    ) -> Optional[str]:
 								        """Prepare inbound event text for the agent.
 								        Keep the normal inbound path and the queued follow-up path on the same
 								        preprocessing pipeline so sender attribution, image enrichment, STT,
 								        document notes, reply context, and @ references all behave the same.
 								        """
 								        history = history or []
 								        message_text = event.text or ""
 								        _is_shared_thread = (
 								            source.chat_type != "dm"
 								            and source.thread_id
 								            and not getattr(self.config, "thread_sessions_per_user", False)
 								        )
 								        if _is_shared_thread and source.user_name:
 								            message_text = f"[{source.user_name}] {message_text}"
 								        if event.media_urls:
 								            image_paths = []
 								            audio_paths = []
 								            for i, path in enumerate(event.media_urls):
 								                mtype = event.media_types[i] if i < len(event.media_types) else ""
 								                if mtype.startswith("image/") or event.message_type == MessageType.PHOTO:
 								                    image_paths.append(path)
 								                if mtype.startswith("audio/") or event.message_type in (MessageType.VOICE, MessageType.AUDIO):
 								                    audio_paths.append(path)
 								            if image_paths:
 								                message_text = await self._enrich_message_with_vision(
 								                    message_text,
 								                    image_paths,
 								                )
 								            if audio_paths:
 								                message_text = await self._enrich_message_with_transcription(
 								                    message_text,
 								                    audio_paths,
 								                )
 								                _stt_fail_markers = (
 								                    "No STT provider",
 								                    "STT is disabled",
 								                    "can't listen",
 								                    "VOICE_TOOLS_OPENAI_KEY",
 								                )
 								                if any(marker in message_text for marker in _stt_fail_markers):
 								                    _stt_adapter = self.adapters.get(source.platform)
 								                    _stt_meta = {"thread_id": source.thread_id} if source.thread_id else None
 								                    if _stt_adapter:
 								                        try:
 								                            _stt_msg = (
 								                                "🎤 I received your voice message but can't transcribe it — "
 								                                "no speech-to-text provider is configured.\n\n"
 								                                "To enable voice: install faster-whisper "
 								                                "(`pip install faster-whisper` in the Hermes venv) "
 								                                "and set `stt.enabled: true` in config.yaml, "
 								                                "then /restart the gateway."
 								                            )
 								                            if self._has_setup_skill():
 								                                _stt_msg += "\n\nFor full setup instructions, type: `/skill hermes-agent-setup`"
 								                            await _stt_adapter.send(
 								                                source.chat_id,
 								                                _stt_msg,
 								                                metadata=_stt_meta,
 								                            )
 								                        except Exception:
 								                            pass
 								        if event.media_urls and event.message_type == MessageType.DOCUMENT:
 								            import mimetypes as _mimetypes
 								            _TEXT_EXTENSIONS = {".txt", ".md", ".csv", ".log", ".json", ".xml", ".yaml", ".yml", ".toml", ".ini", ".cfg"}
 								            for i, path in enumerate(event.media_urls):
 								                mtype = event.media_types[i] if i < len(event.media_types) else ""
 								                if mtype in ("", "application/octet-stream"):
 								                    import os as _os2
 								                    _ext = _os2.path.splitext(path)[1].lower()
 								                    if _ext in _TEXT_EXTENSIONS:
 								                        mtype = "text/plain"
 								                    else:
 								                        guessed, _ = _mimetypes.guess_type(path)
 								                        if guessed:
 								                            mtype = guessed
 								                if not mtype.startswith(("application/", "text/")):
 								                    continue
 								                import os as _os
 								                import re as _re
 								                basename = _os.path.basename(path)
 								                parts = basename.split("_", 2)
 								                display_name = parts[2] if len(parts) >= 3 else basename
 								                display_name = _re.sub(r'[^\w.\- ]', '_', display_name)
 								                if mtype.startswith("text/"):
 								                    context_note = (
 								                        f"[The user sent a text document: '{display_name}'. "
 								                        f"Its content has been included below. "
 								                        f"The file is also saved at: {path}]"
 								                    )
 								                else:
 								                    context_note = (
 								                        f"[The user sent a document: '{display_name}'. "
 								                        f"The file is saved at: {path}. "
 								                        f"Ask the user what they'd like you to do with it.]"
 								                    )
 								                message_text = f"{context_note}\n\n{message_text}"
 								        if getattr(event, "reply_to_text", None) and event.reply_to_message_id:
 								            reply_snippet = event.reply_to_text[:500]
 								            found_in_history = any(
 								                reply_snippet[:200] in (msg.get("content") or "")
 								                for msg in history
 								                if msg.get("role") in ("assistant", "user", "tool")
 								            )
 								            if not found_in_history:
 								                message_text = f'[Replying to: "{reply_snippet}"]\n\n{message_text}'
 								        if "@" in message_text:
 								            try:
 								                from agent.context_references import preprocess_context_references_async
 								                from agent.model_metadata import get_model_context_length
-												fix: enforce config.yaml as sole CWD source + deprecate .env CWD vars + add hermes memory reset (#11029)

config.yaml terminal.cwd is now the single source of truth for working
directory. MESSAGING_CWD and TERMINAL_CWD in .env are deprecated with a
migration warning.

Changes:

1. config.py: Remove MESSAGING_CWD from OPTIONAL_ENV_VARS (setup wizard
   no longer prompts for it). Add warn_deprecated_cwd_env_vars() that
   prints a migration hint when deprecated env vars are detected.

2. gateway/run.py: Replace all MESSAGING_CWD reads with TERMINAL_CWD
   (which is bridged from config.yaml terminal.cwd). MESSAGING_CWD is
   still accepted as a backward-compat fallback with deprecation warning.
   Config bridge skips cwd placeholder values so they don't clobber
   the resolved TERMINAL_CWD.

3. cli.py: Guard against lazy-import clobbering — when cli.py is
   imported lazily during gateway runtime (via delegate_tool), don't
   let load_cli_config() overwrite an already-resolved TERMINAL_CWD
   with os.getcwd() of the service's working directory. (#10817)

4. hermes_cli/main.py: Add 'hermes memory reset' command with
   --target all/memory/user and --yes flags. Profile-scoped via
   HERMES_HOME.

Migration path for users with .env settings:
  Remove MESSAGING_CWD / TERMINAL_CWD from .env
  Add to config.yaml:
    terminal:
      cwd: /your/project/path

Addresses: #10225, #4672, #10817, #7663
											
										
										
											2026-04-16 06:48:33 -07:00
+								                _msg_cwd = os.environ.get("TERMINAL_CWD", os.path.expanduser("~"))
-												fix(gateway): preserve queued voice events for STT

											
										
										
											2026-04-11 21:36:05 +01:00
+								                _msg_ctx_len = get_model_context_length(
 								                    self._model,
 								                    base_url=self._base_url or "",
 								                )
 								                _ctx_result = await preprocess_context_references_async(
 								                    message_text,
 								                    cwd=_msg_cwd,
 								                    context_length=_msg_ctx_len,
 								                    allowed_root=_msg_cwd,
 								                )
 								                if _ctx_result.blocked:
 								                    _adapter = self.adapters.get(source.platform)
 								                    if _adapter:
 								                        await _adapter.send(
 								                            source.chat_id,
 								                            "\n".join(_ctx_result.warnings) or "Context injection refused.",
 								                        )
 								                    return None
 								                if _ctx_result.expanded:
 								                    message_text = _ctx_result.message
 								            except Exception as exc:
 								                logger.debug("@ context reference expansion failed: %s", exc)
 								        return message_text
-												fix(gateway): prevent concurrent agent runs for the same session

Place a sentinel in _running_agents immediately after the "already
running" guard check passes — before any await.  Without this, the
numerous await points between the guard (line 1324) and agent
registration (track_agent at line 4790) create a window where a
second message for the same session can bypass the guard and start
a duplicate agent, corrupting the transcript.

The await gap includes: hook emissions, vision enrichment (external
API call), audio transcription (external API call), session hygiene
compression, and the run_in_executor call itself.  For messages with
media attachments the window can be several seconds wide.

The sentinel is wrapped in try/finally so it is always cleaned up —
even if the handler raises or takes an early-return path.  When the
real AIAgent is created, track_agent() overwrites the sentinel with
the actual instance (preserving interrupt support).

Also handles the edge case where a message arrives while the sentinel
is set but no real agent exists yet: the message is queued via the
adapter's pending-message mechanism instead of attempting to call
interrupt() on the sentinel object.

											
										
										
											2026-03-19 22:32:37 +03:00
+								    async def _handle_message_with_agent(self, event, source, _quick_key: str):
 								        """Inner handler that runs under the _running_agents sentinel guard."""
-												feat: centralized logging, instrumentation, hermes logs CLI, gateway noise fix (#5430)

Adds comprehensive logging infrastructure to Hermes Agent across 4 phases:

**Phase 1 — Centralized logging**
- New hermes_logging.py with idempotent setup_logging() used by CLI, gateway, and cron
- agent.log (INFO+) and errors.log (WARNING+) with RotatingFileHandler + RedactingFormatter
- config.yaml logging: section (level, max_size_mb, backup_count)
- All entry points wired (cli.py, main.py, gateway/run.py, run_agent.py)
- Fixed debug_helpers.py writing to ./logs/ instead of ~/.hermes/logs/

**Phase 2 — Event instrumentation**
- API calls: model, provider, tokens, latency, cache hit %
- Tool execution: name, duration, result size (both sequential + concurrent)
- Session lifecycle: turn start (session/model/provider/platform), compression (before/after)
- Credential pool: rotation events, exhaustion tracking

**Phase 3 — hermes logs CLI command**
- hermes logs / hermes logs -f / hermes logs errors / hermes logs gateway
- --level, --session, --since filters
- hermes logs list (file sizes + ages)

**Phase 4 — Gateway bug fix + noise reduction**
- fix: _async_flush_memories() called with wrong arg count — sessions never flushed
- Batched session expiry logs: 6 lines/cycle → 2 summary lines
- Added inbound message + response time logging

75 new tests, zero regressions on the full suite.
											
										
										
											2026-04-06 00:08:20 -07:00
+								        _msg_start_time = time.time()
 								        _platform_name = source.platform.value if hasattr(source.platform, "value") else str(source.platform)
 								        _msg_preview = (event.text or "")[:80].replace("\n", " ")
 								        logger.info(
 								            "inbound message: platform=%s user=%s chat=%s msg=%r",
 								            _platform_name, source.user_name or source.user_id or "unknown",
 								            source.chat_id or "unknown", _msg_preview,
 								        )
-												fix(gateway): prevent concurrent agent runs for the same session

Place a sentinel in _running_agents immediately after the "already
running" guard check passes — before any await.  Without this, the
numerous await points between the guard (line 1324) and agent
registration (track_agent at line 4790) create a window where a
second message for the same session can bypass the guard and start
a duplicate agent, corrupting the transcript.

The await gap includes: hook emissions, vision enrichment (external
API call), audio transcription (external API call), session hygiene
compression, and the run_in_executor call itself.  For messages with
media attachments the window can be several seconds wide.

The sentinel is wrapped in try/finally so it is always cleaned up —
even if the handler raises or takes an early-return path.  When the
real AIAgent is created, track_agent() overwrites the sentinel with
the actual instance (preserving interrupt support).

Also handles the edge case where a message arrives while the sentinel
is set but no real agent exists yet: the message is queued via the
adapter's pending-message mechanism instead of attempting to call
interrupt() on the sentinel object.

											
										
										
											2026-03-19 22:32:37 +03:00
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        # Get or create session
 								        session_entry = self.session_store.get_or_create_session(source)
-												Implement interrupt handling for agent and CLI input and persistent prompt line at bottom of CLI :)

- Enhanced the AIAgent class to support interrupt requests, allowing for graceful interruption of ongoing tasks and processing of new messages.
- Updated the HermesCLI to manage user input in a persistent manner, enabling real-time interruption of the agent's conversation.
- Introduced a mechanism in the GatewayRunner to handle incoming messages while an agent is running, allowing for immediate response to user commands.
- Improved overall user experience by providing feedback during interruptions and ensuring that pending messages are processed correctly.

											
										
										
											2026-02-03 16:15:49 -08:00
+								        session_key = session_entry.session_key
-												feat(hooks): introduce event hooks system for lifecycle management

Add a new hooks system allowing users to run custom code at key lifecycle points in the agent's operation. This includes support for events such as `gateway:startup`, `session:start`, `agent:step`, and more. Documentation for creating hooks and available events has been added to `README.md` and a new `hooks.md` file. Additionally, integrate step callbacks in the agent to facilitate hook execution during tool-calling iterations.

											
										
										
											2026-02-28 17:09:26 -08:00
+								        # Emit session:start for new or auto-reset sessions
 								        _is_new_session = (
 								            session_entry.created_at == session_entry.updated_at
 								            or getattr(session_entry, "was_auto_reset", False)
 								        )
 								        if _is_new_session:
 								            await self.hooks.emit("session:start", {
 								                "platform": source.platform.value if source.platform else "",
 								                "user_id": source.user_id,
 								                "session_id": session_entry.session_id,
 								                "session_key": session_key,
 								            })
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        # Build session context
 								        context = build_session_context(source, self.config, session_entry)
-												fix(gateway): replace os.environ session state with contextvars for concurrency safety

When two gateway messages arrived concurrently, _set_session_env wrote
HERMES_SESSION_PLATFORM/CHAT_ID/CHAT_NAME/THREAD_ID into the process-global
os.environ. Because asyncio tasks share the same process, Message B would
overwrite Message A's values mid-flight, causing background-task notifications
and tool calls to route to the wrong thread/chat.

Replace os.environ with Python's contextvars.ContextVar. Each asyncio task
(and any run_in_executor thread it spawns) gets its own copy, so concurrent
messages never interfere.

Changes:
- New gateway/session_context.py with ContextVar definitions, set/clear/get
  helpers, and os.environ fallback for CLI/cron/test backward compatibility
- gateway/run.py: _set_session_env returns reset tokens, _clear_session_env
  accepts them for proper cleanup in finally blocks
- All tool consumers updated: cronjob_tools, send_message_tool, skills_tool,
  terminal_tool (both notify_on_complete AND check_interval blocks), tts_tool,
  agent/skill_utils, agent/prompt_builder
- Tests updated for new contextvar-based API

Fixes #7358

Co-authored-by: teknium1 <127238744+teknium1@users.noreply.github.com>

											
										
										
											2026-04-10 16:50:56 -07:00
+								        # Set session context variables for tools (task-local, concurrency-safe)
 								        _session_env_tokens = self._set_session_env(context)
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
-												feat(privacy): redact PII from LLM context when privacy.redact_pii is enabled

Add privacy.redact_pii config option (boolean, default false). When
enabled, the gateway redacts personally identifiable information from
the system prompt before sending it to the LLM provider:

- Phone numbers (user IDs on WhatsApp/Signal) → hashed to user_<sha256>
- User IDs → hashed to user_<sha256>
- Chat IDs → numeric portion hashed, platform prefix preserved
- Home channel IDs → hashed
- Names/usernames → NOT affected (user-chosen, publicly visible)

Hashes are deterministic (same user → same hash) so the model can
still distinguish users in group chats. Routing and delivery use
the original values internally — redaction only affects LLM context.

Inspired by OpenClaw PR #47959.

											
										
										
											2026-03-16 05:48:45 -07:00
+								        # Read privacy.redact_pii from config (re-read per message)
 								        _redact_pii = False
 								        try:
-												fix(gateway): add missing yaml import for PII redaction config read

The privacy.redact_pii config reader on line 1546 used bare 'yaml'
which is not in scope — yaml is imported as '_yaml' at module level
(line 93) and as '_y' in other methods. The NameError was silently
caught by the try/except, so PII redaction never activated even when
configured.

Add a local 'import yaml as _pii_yaml' consistent with the pattern
used elsewhere in the file.

											
										
										
											2026-03-17 03:48:15 -07:00
+								            import yaml as _pii_yaml
-												feat(privacy): redact PII from LLM context when privacy.redact_pii is enabled

Add privacy.redact_pii config option (boolean, default false). When
enabled, the gateway redacts personally identifiable information from
the system prompt before sending it to the LLM provider:

- Phone numbers (user IDs on WhatsApp/Signal) → hashed to user_<sha256>
- User IDs → hashed to user_<sha256>
- Chat IDs → numeric portion hashed, platform prefix preserved
- Home channel IDs → hashed
- Names/usernames → NOT affected (user-chosen, publicly visible)

Hashes are deterministic (same user → same hash) so the model can
still distinguish users in group chats. Routing and delivery use
the original values internally — redaction only affects LLM context.

Inspired by OpenClaw PR #47959.

											
										
										
											2026-03-16 05:48:45 -07:00
+								            with open(_config_path, encoding="utf-8") as _pf:
-												fix(gateway): add missing yaml import for PII redaction config read

The privacy.redact_pii config reader on line 1546 used bare 'yaml'
which is not in scope — yaml is imported as '_yaml' at module level
(line 93) and as '_y' in other methods. The NameError was silently
caught by the try/except, so PII redaction never activated even when
configured.

Add a local 'import yaml as _pii_yaml' consistent with the pattern
used elsewhere in the file.

											
										
										
											2026-03-17 03:48:15 -07:00
+								                _pcfg = _pii_yaml.safe_load(_pf) or {}
-												feat(privacy): redact PII from LLM context when privacy.redact_pii is enabled

Add privacy.redact_pii config option (boolean, default false). When
enabled, the gateway redacts personally identifiable information from
the system prompt before sending it to the LLM provider:

- Phone numbers (user IDs on WhatsApp/Signal) → hashed to user_<sha256>
- User IDs → hashed to user_<sha256>
- Chat IDs → numeric portion hashed, platform prefix preserved
- Home channel IDs → hashed
- Names/usernames → NOT affected (user-chosen, publicly visible)

Hashes are deterministic (same user → same hash) so the model can
still distinguish users in group chats. Routing and delivery use
the original values internally — redaction only affects LLM context.

Inspired by OpenClaw PR #47959.

											
										
										
											2026-03-16 05:48:45 -07:00
+								            _redact_pii = bool((_pcfg.get("privacy") or {}).get("redact_pii", False))
 								        except Exception:
 								            pass
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        # Build the context prompt to inject
-												feat(privacy): redact PII from LLM context when privacy.redact_pii is enabled

Add privacy.redact_pii config option (boolean, default false). When
enabled, the gateway redacts personally identifiable information from
the system prompt before sending it to the LLM provider:

- Phone numbers (user IDs on WhatsApp/Signal) → hashed to user_<sha256>
- User IDs → hashed to user_<sha256>
- Chat IDs → numeric portion hashed, platform prefix preserved
- Home channel IDs → hashed
- Names/usernames → NOT affected (user-chosen, publicly visible)

Hashes are deterministic (same user → same hash) so the model can
still distinguish users in group chats. Routing and delivery use
the original values internally — redaction only affects LLM context.

Inspired by OpenClaw PR #47959.

											
										
										
											2026-03-16 05:48:45 -07:00
+								        context_prompt = build_session_context_prompt(context, redact_pii=_redact_pii)
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
-												Hermes Agent UX Improvements

											
										
										
											2026-02-22 02:16:11 -08:00
+								        # If the previous session expired and was auto-reset, prepend a notice
 								        # so the agent knows this is a fresh conversation (not an intentional /reset).
 								        if getattr(session_entry, 'was_auto_reset', False):
-												feat(gateway): notify users when session auto-resets (#2519)

When a session expires (daily schedule or idle timeout) and is
automatically reset, send a notification to the user explaining
what happened:

  ◐ Session automatically reset (inactive for 24h).
    Conversation history cleared.
  Use /resume to browse and restore a previous session.
  Adjust reset timing in config.yaml under session_reset.

Notifications are suppressed when:
- The expired session had no activity (no tokens used)
- The platform is excluded (api_server, webhook by default)
- notify: false in config

Changes:
- session.py: _should_reset() returns reason string ('idle'/'daily')
  instead of bool; SessionEntry gains auto_reset_reason and
  reset_had_activity fields; old entry's total_tokens checked
- config.py: SessionResetPolicy gains notify (bool, default: true)
  and notify_exclude_platforms (default: api_server, webhook)
- run.py: sends notification via adapter.send() before processing
  the user's message, with activity + platform checks
- 13 new tests

Config (config.yaml):

  session_reset:
    notify: true
    notify_exclude_platforms: [api_server, webhook]
											
										
										
											2026-03-22 09:33:39 -07:00
+								            reset_reason = getattr(session_entry, 'auto_reset_reason', None) or 'idle'
-												fix(gateway): break stuck session resume loops on restart (#7536)

Cherry-picked from PR #7747 with follow-up fixes:
- Narrowed suspend_all_active() to suspend_recently_active() — only
  suspends sessions updated within the last 2 minutes (likely in-flight),
  not all sessions which would unnecessarily reset idle users
- /stop with no running agent no longer suspends the session; only
  actual force-stops mark the session for reset

											
										
										
											2026-04-11 11:59:00 -07:00
+								            if reset_reason == "suspended":
 								                context_note = "[System note: The user's previous session was stopped and suspended. This is a fresh conversation with no prior context.]"
 								            elif reset_reason == "daily":
-												feat(gateway): notify users when session auto-resets (#2519)

When a session expires (daily schedule or idle timeout) and is
automatically reset, send a notification to the user explaining
what happened:

  ◐ Session automatically reset (inactive for 24h).
    Conversation history cleared.
  Use /resume to browse and restore a previous session.
  Adjust reset timing in config.yaml under session_reset.

Notifications are suppressed when:
- The expired session had no activity (no tokens used)
- The platform is excluded (api_server, webhook by default)
- notify: false in config

Changes:
- session.py: _should_reset() returns reason string ('idle'/'daily')
  instead of bool; SessionEntry gains auto_reset_reason and
  reset_had_activity fields; old entry's total_tokens checked
- config.py: SessionResetPolicy gains notify (bool, default: true)
  and notify_exclude_platforms (default: api_server, webhook)
- run.py: sends notification via adapter.send() before processing
  the user's message, with activity + platform checks
- 13 new tests

Config (config.yaml):

  session_reset:
    notify: true
    notify_exclude_platforms: [api_server, webhook]
											
										
										
											2026-03-22 09:33:39 -07:00
+								                context_note = "[System note: The user's session was automatically reset by the daily schedule. This is a fresh conversation with no prior context.]"
 								            else:
 								                context_note = "[System note: The user's previous session expired due to inactivity. This is a fresh conversation with no prior context.]"
 								            context_prompt = context_note + "\n\n" + context_prompt
 								            # Send a user-facing notification explaining the reset, unless:
 								            # - notifications are disabled in config
 								            # - the platform is excluded (e.g. api_server, webhook)
 								            # - the expired session had no activity (nothing was cleared)
 								            try:
 								                policy = self.session_store.config.get_reset_policy(
 								                    platform=source.platform,
 								                    session_type=getattr(source, 'chat_type', 'dm'),
 								                )
 								                platform_name = source.platform.value if source.platform else ""
 								                had_activity = getattr(session_entry, 'reset_had_activity', False)
-												fix(gateway): break stuck session resume loops on restart (#7536)

Cherry-picked from PR #7747 with follow-up fixes:
- Narrowed suspend_all_active() to suspend_recently_active() — only
  suspends sessions updated within the last 2 minutes (likely in-flight),
  not all sessions which would unnecessarily reset idle users
- /stop with no running agent no longer suspends the session; only
  actual force-stops mark the session for reset

											
										
										
											2026-04-11 11:59:00 -07:00
+								                # Suspended sessions always notify (they were explicitly stopped
 								                # or crashed mid-operation) — skip the policy check.
 								                should_notify = reset_reason == "suspended" or (
-												feat(gateway): notify users when session auto-resets (#2519)

When a session expires (daily schedule or idle timeout) and is
automatically reset, send a notification to the user explaining
what happened:

  ◐ Session automatically reset (inactive for 24h).
    Conversation history cleared.
  Use /resume to browse and restore a previous session.
  Adjust reset timing in config.yaml under session_reset.

Notifications are suppressed when:
- The expired session had no activity (no tokens used)
- The platform is excluded (api_server, webhook by default)
- notify: false in config

Changes:
- session.py: _should_reset() returns reason string ('idle'/'daily')
  instead of bool; SessionEntry gains auto_reset_reason and
  reset_had_activity fields; old entry's total_tokens checked
- config.py: SessionResetPolicy gains notify (bool, default: true)
  and notify_exclude_platforms (default: api_server, webhook)
- run.py: sends notification via adapter.send() before processing
  the user's message, with activity + platform checks
- 13 new tests

Config (config.yaml):

  session_reset:
    notify: true
    notify_exclude_platforms: [api_server, webhook]
											
										
										
											2026-03-22 09:33:39 -07:00
+								                    policy.notify
 								                    and had_activity
 								                    and platform_name not in policy.notify_exclude_platforms
 								                )
 								                if should_notify:
 								                    adapter = self.adapters.get(source.platform)
 								                    if adapter:
-												fix(gateway): break stuck session resume loops on restart (#7536)

Cherry-picked from PR #7747 with follow-up fixes:
- Narrowed suspend_all_active() to suspend_recently_active() — only
  suspends sessions updated within the last 2 minutes (likely in-flight),
  not all sessions which would unnecessarily reset idle users
- /stop with no running agent no longer suspends the session; only
  actual force-stops mark the session for reset

											
										
										
											2026-04-11 11:59:00 -07:00
+								                        if reset_reason == "suspended":
 								                            reason_text = "previous session was stopped or interrupted"
 								                        elif reset_reason == "daily":
-												feat(gateway): notify users when session auto-resets (#2519)

When a session expires (daily schedule or idle timeout) and is
automatically reset, send a notification to the user explaining
what happened:

  ◐ Session automatically reset (inactive for 24h).
    Conversation history cleared.
  Use /resume to browse and restore a previous session.
  Adjust reset timing in config.yaml under session_reset.

Notifications are suppressed when:
- The expired session had no activity (no tokens used)
- The platform is excluded (api_server, webhook by default)
- notify: false in config

Changes:
- session.py: _should_reset() returns reason string ('idle'/'daily')
  instead of bool; SessionEntry gains auto_reset_reason and
  reset_had_activity fields; old entry's total_tokens checked
- config.py: SessionResetPolicy gains notify (bool, default: true)
  and notify_exclude_platforms (default: api_server, webhook)
- run.py: sends notification via adapter.send() before processing
  the user's message, with activity + platform checks
- 13 new tests

Config (config.yaml):

  session_reset:
    notify: true
    notify_exclude_platforms: [api_server, webhook]
											
										
										
											2026-03-22 09:33:39 -07:00
+								                            reason_text = f"daily schedule at {policy.at_hour}:00"
 								                        else:
 								                            hours = policy.idle_minutes // 60
 								                            mins = policy.idle_minutes % 60
 								                            duration = f"{hours}h" if not mins else f"{hours}h {mins}m" if hours else f"{mins}m"
 								                            reason_text = f"inactive for {duration}"
 								                        notice = (
 								                            f"◐ Session automatically reset ({reason_text}). "
 								                            f"Conversation history cleared.\n"
 								                            f"Use /resume to browse and restore a previous session.\n"
 								                            f"Adjust reset timing in config.yaml under session_reset."
 								                        )
-												feat(gateway): surface session config on /new, /reset, and auto-reset (#3321)

When a new session starts in the gateway (via /new, /reset, or
auto-reset), send the user a summary of the detected configuration:

  ✨ Session reset! Starting fresh.

  ◆ Model: qwen3.5:27b-q4_K_M
  ◆ Provider: custom
  ◆ Context: 8K tokens (config)
  ◆ Endpoint: http://localhost:11434/v1

This makes misconfigured context length immediately visible — a user
running a local 8K model that falls to the 128K default will see:

  ◆ Context: 128K tokens (default — set model.context_length in config to override)

Instead of silently getting no compression and degrading responses.

- _format_session_info() resolves model, provider, context length,
  and endpoint from config + runtime, matching the hygiene code's
  resolution chain
- Local/custom endpoints shown; cloud endpoints hidden (not useful)
- Context source annotated: config, detected, or default with hint
- Appended to /new and /reset responses, and auto-reset notifications
- 9 tests covering all formatting paths and failure resilience

Addresses the user-facing side of #2708 — instead of trying to fix
every edge case in context detection, surface the values so users
can immediately see when something is wrong.
											
										
										
											2026-03-26 19:27:58 -07:00
+								                        try:
 								                            session_info = self._format_session_info()
 								                            if session_info:
 								                                notice = f"{notice}\n\n{session_info}"
 								                        except Exception:
 								                            pass
-												feat(gateway): notify users when session auto-resets (#2519)

When a session expires (daily schedule or idle timeout) and is
automatically reset, send a notification to the user explaining
what happened:

  ◐ Session automatically reset (inactive for 24h).
    Conversation history cleared.
  Use /resume to browse and restore a previous session.
  Adjust reset timing in config.yaml under session_reset.

Notifications are suppressed when:
- The expired session had no activity (no tokens used)
- The platform is excluded (api_server, webhook by default)
- notify: false in config

Changes:
- session.py: _should_reset() returns reason string ('idle'/'daily')
  instead of bool; SessionEntry gains auto_reset_reason and
  reset_had_activity fields; old entry's total_tokens checked
- config.py: SessionResetPolicy gains notify (bool, default: true)
  and notify_exclude_platforms (default: api_server, webhook)
- run.py: sends notification via adapter.send() before processing
  the user's message, with activity + platform checks
- 13 new tests

Config (config.yaml):

  session_reset:
    notify: true
    notify_exclude_platforms: [api_server, webhook]
											
										
										
											2026-03-22 09:33:39 -07:00
+								                        await adapter.send(
 								                            source.chat_id, notice,
 								                            metadata=getattr(event, 'metadata', None),
 								                        )
 								            except Exception as e:
 								                logger.debug("Auto-reset notification failed (non-fatal): %s", e)
-												Hermes Agent UX Improvements

											
										
										
											2026-02-22 02:16:11 -08:00
+								            session_entry.was_auto_reset = False
-												feat(gateway): notify users when session auto-resets (#2519)

When a session expires (daily schedule or idle timeout) and is
automatically reset, send a notification to the user explaining
what happened:

  ◐ Session automatically reset (inactive for 24h).
    Conversation history cleared.
  Use /resume to browse and restore a previous session.
  Adjust reset timing in config.yaml under session_reset.

Notifications are suppressed when:
- The expired session had no activity (no tokens used)
- The platform is excluded (api_server, webhook by default)
- notify: false in config

Changes:
- session.py: _should_reset() returns reason string ('idle'/'daily')
  instead of bool; SessionEntry gains auto_reset_reason and
  reset_had_activity fields; old entry's total_tokens checked
- config.py: SessionResetPolicy gains notify (bool, default: true)
  and notify_exclude_platforms (default: api_server, webhook)
- run.py: sends notification via adapter.send() before processing
  the user's message, with activity + platform checks
- 13 new tests

Config (config.yaml):

  session_reset:
    notify: true
    notify_exclude_platforms: [api_server, webhook]
											
										
										
											2026-03-22 09:33:39 -07:00
+								            session_entry.auto_reset_reason = None
-												feat(telegram): Private Chat Topics with functional skill binding (#2598)

Salvages PR #3005 by web3blind. Cherry-picked onto current main with functional skill binding and docs added.

- DM topic creation via createForumTopic (Bot API 9.4, Feb 2026)
- Config-driven topics with thread_id persistence across restarts
- Session isolation via existing build_session_key thread_id support
- auto_skill field on MessageEvent for topic-skill bindings
- Gateway auto-loads bound skill on new sessions (same as /skill commands)
- Docs: full Private Chat Topics section in Telegram messaging guide
- 20 tests (17 original + 3 for auto_skill)

Closes #2598
Co-authored-by: web3blind <web3blind@users.noreply.github.com>
											
										
										
											2026-03-26 02:04:11 -07:00
-												feat(discord): add channel_skill_bindings for auto-loading skills per channel

Simplified implementation of the feature from PR #6842 (RunzhouLi).
Allows Discord channels/forum threads to auto-bind skills via config:

    discord:
      channel_skill_bindings:
        - id: "123456"
          skills: ["skill-a", "skill-b"]

The run.py auto-skill loader now handles both str and list[str],
loading multiple skills in order and concatenating their payloads.
Forum threads inherit their parent channel's bindings.

Co-authored-by: RunzhouLi <RunzhouLi@users.noreply.github.com>

											
										
										
											2026-04-10 05:06:05 -07:00
+								        # Auto-load skill(s) for topic/channel bindings (Telegram DM Topics,
 								        # Discord channel_skill_bindings).  Supports a single name or ordered list.
 								        # Only inject on NEW sessions — ongoing conversations already have the
 								        # skill content in their conversation history from the first message.
 								        _auto = getattr(event, "auto_skill", None)
 								        if _is_new_session and _auto:
 								            _skill_names = [_auto] if isinstance(_auto, str) else list(_auto)
-												feat(telegram): Private Chat Topics with functional skill binding (#2598)

Salvages PR #3005 by web3blind. Cherry-picked onto current main with functional skill binding and docs added.

- DM topic creation via createForumTopic (Bot API 9.4, Feb 2026)
- Config-driven topics with thread_id persistence across restarts
- Session isolation via existing build_session_key thread_id support
- auto_skill field on MessageEvent for topic-skill bindings
- Gateway auto-loads bound skill on new sessions (same as /skill commands)
- Docs: full Private Chat Topics section in Telegram messaging guide
- 20 tests (17 original + 3 for auto_skill)

Closes #2598
Co-authored-by: web3blind <web3blind@users.noreply.github.com>
											
										
										
											2026-03-26 02:04:11 -07:00
+								            try:
 								                from agent.skill_commands import _load_skill_payload, _build_skill_message
-												feat(discord): add channel_skill_bindings for auto-loading skills per channel

Simplified implementation of the feature from PR #6842 (RunzhouLi).
Allows Discord channels/forum threads to auto-bind skills via config:

    discord:
      channel_skill_bindings:
        - id: "123456"
          skills: ["skill-a", "skill-b"]

The run.py auto-skill loader now handles both str and list[str],
loading multiple skills in order and concatenating their payloads.
Forum threads inherit their parent channel's bindings.

Co-authored-by: RunzhouLi <RunzhouLi@users.noreply.github.com>

											
										
										
											2026-04-10 05:06:05 -07:00
+								                _combined_parts: list[str] = []
 								                _loaded_names: list[str] = []
 								                for _sname in _skill_names:
 								                    _loaded = _load_skill_payload(_sname, task_id=_quick_key)
 								                    if _loaded:
 								                        _loaded_skill, _skill_dir, _display_name = _loaded
 								                        _note = (
 								                            f'[SYSTEM: The "{_display_name}" skill is auto-loaded. '
 								                            f"Follow its instructions for this session.]"
-												feat(telegram): Private Chat Topics with functional skill binding (#2598)

Salvages PR #3005 by web3blind. Cherry-picked onto current main with functional skill binding and docs added.

- DM topic creation via createForumTopic (Bot API 9.4, Feb 2026)
- Config-driven topics with thread_id persistence across restarts
- Session isolation via existing build_session_key thread_id support
- auto_skill field on MessageEvent for topic-skill bindings
- Gateway auto-loads bound skill on new sessions (same as /skill commands)
- Docs: full Private Chat Topics section in Telegram messaging guide
- 20 tests (17 original + 3 for auto_skill)

Closes #2598
Co-authored-by: web3blind <web3blind@users.noreply.github.com>
											
										
										
											2026-03-26 02:04:11 -07:00
+								                        )
-												feat(discord): add channel_skill_bindings for auto-loading skills per channel

Simplified implementation of the feature from PR #6842 (RunzhouLi).
Allows Discord channels/forum threads to auto-bind skills via config:

    discord:
      channel_skill_bindings:
        - id: "123456"
          skills: ["skill-a", "skill-b"]

The run.py auto-skill loader now handles both str and list[str],
loading multiple skills in order and concatenating their payloads.
Forum threads inherit their parent channel's bindings.

Co-authored-by: RunzhouLi <RunzhouLi@users.noreply.github.com>

											
										
										
											2026-04-10 05:06:05 -07:00
+								                        _part = _build_skill_message(_loaded_skill, _skill_dir, _note)
 								                        if _part:
 								                            _combined_parts.append(_part)
 								                            _loaded_names.append(_sname)
 								                    else:
 								                        logger.warning("[Gateway] Auto-skill '%s' not found", _sname)
 								                if _combined_parts:
 								                    # Append the user's original text after all skill payloads
 								                    _combined_parts.append(event.text)
 								                    event.text = "\n\n".join(_combined_parts)
 								                    logger.info(
 								                        "[Gateway] Auto-loaded skill(s) %s for session %s",
 								                        _loaded_names, session_key,
-												feat(telegram): Private Chat Topics with functional skill binding (#2598)

Salvages PR #3005 by web3blind. Cherry-picked onto current main with functional skill binding and docs added.

- DM topic creation via createForumTopic (Bot API 9.4, Feb 2026)
- Config-driven topics with thread_id persistence across restarts
- Session isolation via existing build_session_key thread_id support
- auto_skill field on MessageEvent for topic-skill bindings
- Gateway auto-loads bound skill on new sessions (same as /skill commands)
- Docs: full Private Chat Topics section in Telegram messaging guide
- 20 tests (17 original + 3 for auto_skill)

Closes #2598
Co-authored-by: web3blind <web3blind@users.noreply.github.com>
											
										
										
											2026-03-26 02:04:11 -07:00
+								                    )
 								            except Exception as e:
-												feat(discord): add channel_skill_bindings for auto-loading skills per channel

Simplified implementation of the feature from PR #6842 (RunzhouLi).
Allows Discord channels/forum threads to auto-bind skills via config:

    discord:
      channel_skill_bindings:
        - id: "123456"
          skills: ["skill-a", "skill-b"]

The run.py auto-skill loader now handles both str and list[str],
loading multiple skills in order and concatenating their payloads.
Forum threads inherit their parent channel's bindings.

Co-authored-by: RunzhouLi <RunzhouLi@users.noreply.github.com>

											
										
										
											2026-04-10 05:06:05 -07:00
+								                logger.warning("[Gateway] Failed to auto-load skill(s) %s: %s", _skill_names, e)
-												feat(telegram): Private Chat Topics with functional skill binding (#2598)

Salvages PR #3005 by web3blind. Cherry-picked onto current main with functional skill binding and docs added.

- DM topic creation via createForumTopic (Bot API 9.4, Feb 2026)
- Config-driven topics with thread_id persistence across restarts
- Session isolation via existing build_session_key thread_id support
- auto_skill field on MessageEvent for topic-skill bindings
- Gateway auto-loads bound skill on new sessions (same as /skill commands)
- Docs: full Private Chat Topics section in Telegram messaging guide
- 20 tests (17 original + 3 for auto_skill)

Closes #2598
Co-authored-by: web3blind <web3blind@users.noreply.github.com>
											
										
										
											2026-03-26 02:04:11 -07:00
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        # Load conversation history from transcript
 								        history = self.session_store.load_transcript(session_entry.session_id)
-												feat: auto-compress pathologically large gateway sessions (#628)

Long-lived gateway sessions can accumulate enough history that every new
message rehydrates an oversized transcript, causing repeated truncation
failures (finish_reason=length).

Add a session hygiene check in _handle_message that runs right after
loading the transcript and before invoking the agent:

1. Estimate message count and rough token count of the transcript
2. If above configurable thresholds (default: 200 msgs or 100K tokens),
   auto-compress the transcript proactively
3. Notify the user about the compression with before/after stats
4. If still above warn threshold (default: 200K tokens) after
   compression, suggest /reset
5. If compression fails on a dangerously large session, warn the user
   to use /compress or /reset manually

Thresholds are configurable via config.yaml:

  session_hygiene:
    auto_compress_tokens: 100000
    auto_compress_messages: 200
    warn_tokens: 200000

This complements the agent's existing preflight compression (which
runs inside run_conversation) by catching pathological sessions at
the gateway layer before the agent is even created.

Includes 12 tests for threshold detection and token estimation.

											
										
										
											2026-03-07 20:09:48 -08:00
+								        # -----------------------------------------------------------------
 								        # Session hygiene: auto-compress pathologically large transcripts
 								        #
 								        # Long-lived gateway sessions can accumulate enough history that
 								        # every new message rehydrates an oversized transcript, causing
 								        # repeated truncation/context failures.  Detect this early and
 								        # compress proactively — before the agent even starts.  (#628)
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								        #
-												fix: use actual API token counts for gateway compression pre-check

Root cause of aggressive gateway compression vs CLI:
- CLI: single AIAgent persists across conversation, uses real API-reported
  prompt_tokens for compression decisions — accurate
- Gateway: each message creates fresh AIAgent, token count discarded after,
  next message pre-check falls back to rough str(msg)//4 estimate which
  overestimates 30-50% on tool-heavy conversations

Fix:
- Add last_prompt_tokens field to SessionEntry — stores the actual
  API-reported prompt token count from the most recent agent turn
- After run_conversation(), extract context_compressor.last_prompt_tokens
  and persist it via update_session()
- Gateway pre-check now uses stored actual tokens when available (exact
  same accuracy as CLI), falling back to rough estimate with 1.4x safety
  factor only for the first message of a session

This makes gateway compression behave identically to CLI compression
for all turns after the first. Reported by TigerHix.

											
										
										
											2026-03-10 23:28:18 -07:00
+								        # Token source priority:
 								        # 1. Actual API-reported prompt_tokens from the last turn
 								        #    (stored in session_entry.last_prompt_tokens)
-												refactor(gateway): remove broken 1.4x hygiene multiplier entirely

The previous commit capped the 1.4x at 95% of context, but the multiplier
itself is unnecessary and confusing:

  85% threshold × 1.4 = 119% of context → never fires
  95% warn      × 1.4 = 133% of context → never warns

The 85% hygiene threshold already provides ample headroom over the agent's
own 50% compressor. Even if rough estimates overestimate by 50%, hygiene
would fire at ~57% actual usage — safe and harmless.

Remove the multiplier entirely. Both actual and estimated token paths
now use the same 85% / 95% thresholds. Update tests and comments.

											
										
										
											2026-03-22 15:21:18 -07:00
+								        # 2. Rough char-based estimate (str(msg)//4). Overestimates
 								        #    by 30-50% on code/JSON-heavy sessions, but that just
 								        #    means hygiene fires a bit early — safe and harmless.
-												feat: auto-compress pathologically large gateway sessions (#628)

Long-lived gateway sessions can accumulate enough history that every new
message rehydrates an oversized transcript, causing repeated truncation
failures (finish_reason=length).

Add a session hygiene check in _handle_message that runs right after
loading the transcript and before invoking the agent:

1. Estimate message count and rough token count of the transcript
2. If above configurable thresholds (default: 200 msgs or 100K tokens),
   auto-compress the transcript proactively
3. Notify the user about the compression with before/after stats
4. If still above warn threshold (default: 200K tokens) after
   compression, suggest /reset
5. If compression fails on a dangerously large session, warn the user
   to use /compress or /reset manually

Thresholds are configurable via config.yaml:

  session_hygiene:
    auto_compress_tokens: 100000
    auto_compress_messages: 200
    warn_tokens: 200000

This complements the agent's existing preflight compression (which
runs inside run_conversation) by catching pathological sessions at
the gateway layer before the agent is even created.

Includes 12 tests for threshold detection and token estimation.

											
										
										
											2026-03-07 20:09:48 -08:00
+								        # -----------------------------------------------------------------
 								        if history and len(history) >= 4:
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								            from agent.model_metadata import (
 								                estimate_messages_tokens_rough,
 								                get_model_context_length,
 								            )
-												feat: auto-compress pathologically large gateway sessions (#628)

Long-lived gateway sessions can accumulate enough history that every new
message rehydrates an oversized transcript, causing repeated truncation
failures (finish_reason=length).

Add a session hygiene check in _handle_message that runs right after
loading the transcript and before invoking the agent:

1. Estimate message count and rough token count of the transcript
2. If above configurable thresholds (default: 200 msgs or 100K tokens),
   auto-compress the transcript proactively
3. Notify the user about the compression with before/after stats
4. If still above warn threshold (default: 200K tokens) after
   compression, suggest /reset
5. If compression fails on a dangerously large session, warn the user
   to use /compress or /reset manually

Thresholds are configurable via config.yaml:

  session_hygiene:
    auto_compress_tokens: 100000
    auto_compress_messages: 200
    warn_tokens: 200000

This complements the agent's existing preflight compression (which
runs inside run_conversation) by catching pathological sessions at
the gateway layer before the agent is even created.

Includes 12 tests for threshold detection and token estimation.

											
										
										
											2026-03-07 20:09:48 -08:00
-												fix: raise session hygiene threshold from 50% to 85%

Session hygiene was firing at the same threshold (50%) as the agent's
own context compressor, causing premature compression on every turn
in long gateway sessions (especially Telegram).

Hygiene is a safety net for pathologically large sessions that would
cause API failures — it should NOT be doing normal compression work.
The agent's own compressor handles that during its tool loop with
accurate real token counts from the API.

Changes:
- Default hygiene threshold: 0.50 → 0.85 (fires only when truly large)
- Hygiene threshold is now independent of compression.threshold config
  (that setting controls the agent's compressor, not the pre-agent safety net)
- Removed env var override for hygiene threshold (CONTEXT_COMPRESSION_THRESHOLD
  still controls the agent's own compressor)
											
										
										
											2026-03-13 04:17:45 -07:00
+								            # Read model + compression config from config.yaml.
 								            # NOTE: hygiene threshold is intentionally HIGHER than the agent's
 								            # own compressor (0.85 vs 0.50).  Hygiene is a safety net for
 								            # sessions that grew too large between turns — it fires pre-agent
 								            # to prevent API failures.  The agent's own compressor handles
 								            # normal context management during its tool loop with accurate
 								            # real token counts.  Having hygiene at 0.50 caused premature
 								            # compression on every turn in long gateway sessions.
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								            _hyg_model = "anthropic/claude-sonnet-4.6"
-												fix: raise session hygiene threshold from 50% to 85%

Session hygiene was firing at the same threshold (50%) as the agent's
own context compressor, causing premature compression on every turn
in long gateway sessions (especially Telegram).

Hygiene is a safety net for pathologically large sessions that would
cause API failures — it should NOT be doing normal compression work.
The agent's own compressor handles that during its tool loop with
accurate real token counts from the API.

Changes:
- Default hygiene threshold: 0.50 → 0.85 (fires only when truly large)
- Hygiene threshold is now independent of compression.threshold config
  (that setting controls the agent's compressor, not the pre-agent safety net)
- Removed env var override for hygiene threshold (CONTEXT_COMPRESSION_THRESHOLD
  still controls the agent's own compressor)
											
										
										
											2026-03-13 04:17:45 -07:00
+								            _hyg_threshold_pct = 0.85
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								            _hyg_compression_enabled = True
-												fix(gateway): hygiene compression ignores config context_length and 1.4x exceeds model limit

Three bugs in gateway session hygiene pre-compression caused 'Session too
large' errors for ~200K context models like GLM-5-turbo on z.ai:

1. Gateway hygiene called get_model_context_length(model) without passing
   config_context_length, provider, or base_url — so user overrides like
   model.context_length: 180000 were ignored, and provider-aware detection
   (models.dev, z.ai endpoint) couldn't fire. The agent's own compressor
   correctly passed all three (run_agent.py line 1038).

2. The 1.4x safety factor on rough token estimates pushed the compression
   threshold above the model's actual context limit:
     200K * 0.85 * 1.4 = 238K > 200K (model limit)
   So hygiene never compressed, sessions grew past the limit, and the API
   rejected the request.

3. Same issue for the warn threshold: 200K * 0.95 * 1.4 = 266K.

Fix:
- Read model.context_length, provider, and base_url from config.yaml
  (same as run_agent.py does) and pass them to get_model_context_length()
- Resolve provider/base_url from runtime when not in config
- Cap the 1.4x-adjusted compress threshold at 95% of context_length
- Cap the 1.4x-adjusted warn threshold at context_length

Affects: z.ai GLM-5/GLM-5-turbo, any ~200K or smaller context model
where the 1.4x factor would push 85% above 100%.

Ref: Discord report from Ddox — glm-5-turbo on z.ai coding plan

											
										
										
											2026-03-22 15:15:25 -07:00
+								            _hyg_config_context_length = None
 								            _hyg_provider = None
 								            _hyg_base_url = None
 								            _hyg_api_key = None
-												fix: honor session-scoped gateway model overrides

											
										
										
											2026-04-11 04:24:45 -05:00
+								            _hyg_data = {}
-												feat: auto-compress pathologically large gateway sessions (#628)

Long-lived gateway sessions can accumulate enough history that every new
message rehydrates an oversized transcript, causing repeated truncation
failures (finish_reason=length).

Add a session hygiene check in _handle_message that runs right after
loading the transcript and before invoking the agent:

1. Estimate message count and rough token count of the transcript
2. If above configurable thresholds (default: 200 msgs or 100K tokens),
   auto-compress the transcript proactively
3. Notify the user about the compression with before/after stats
4. If still above warn threshold (default: 200K tokens) after
   compression, suggest /reset
5. If compression fails on a dangerously large session, warn the user
   to use /compress or /reset manually

Thresholds are configurable via config.yaml:

  session_hygiene:
    auto_compress_tokens: 100000
    auto_compress_messages: 200
    warn_tokens: 200000

This complements the agent's existing preflight compression (which
runs inside run_conversation) by catching pathological sessions at
the gateway layer before the agent is even created.

Includes 12 tests for threshold detection and token estimation.

											
										
										
											2026-03-07 20:09:48 -08:00
+								            try:
 								                _hyg_cfg_path = _hermes_home / "config.yaml"
 								                if _hyg_cfg_path.exists():
 								                    import yaml as _hyg_yaml
-												Merge PR #458: Add explicit UTF-8 encoding to config/data file I/O

Authored by shitcoinsherpa. Adds encoding='utf-8' to all text-mode
open() calls in gateway/run.py, gateway/config.py, hermes_cli/config.py,
hermes_cli/main.py, and hermes_cli/status.py. Prevents encoding errors
on Windows where the default locale is not UTF-8.

Also fixed 4 additional open() calls in gateway/run.py that were added
after the PR branch was created.

											
										
										
											2026-03-09 21:19:20 -07:00
+								                    with open(_hyg_cfg_path, encoding="utf-8") as _hyg_f:
-												feat: auto-compress pathologically large gateway sessions (#628)

Long-lived gateway sessions can accumulate enough history that every new
message rehydrates an oversized transcript, causing repeated truncation
failures (finish_reason=length).

Add a session hygiene check in _handle_message that runs right after
loading the transcript and before invoking the agent:

1. Estimate message count and rough token count of the transcript
2. If above configurable thresholds (default: 200 msgs or 100K tokens),
   auto-compress the transcript proactively
3. Notify the user about the compression with before/after stats
4. If still above warn threshold (default: 200K tokens) after
   compression, suggest /reset
5. If compression fails on a dangerously large session, warn the user
   to use /compress or /reset manually

Thresholds are configurable via config.yaml:

  session_hygiene:
    auto_compress_tokens: 100000
    auto_compress_messages: 200
    warn_tokens: 200000

This complements the agent's existing preflight compression (which
runs inside run_conversation) by catching pathological sessions at
the gateway layer before the agent is even created.

Includes 12 tests for threshold detection and token estimation.

											
										
										
											2026-03-07 20:09:48 -08:00
+								                        _hyg_data = _hyg_yaml.safe_load(_hyg_f) or {}
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
 								                    # Resolve model name (same logic as run_sync)
 								                    _model_cfg = _hyg_data.get("model", {})
 								                    if isinstance(_model_cfg, str):
 								                        _hyg_model = _model_cfg
 								                    elif isinstance(_model_cfg, dict):
-												fix(config): accept 'model' key as alias for 'default' in model config (#3603)

Users intuitively write model: { model: my-model } instead of
model: { default: my-model } and it silently falls back to the
hardcoded default. Now both spellings work across all three config
consumers: runtime_provider, CLI, and gateway.

Co-authored-by: ygd58 <ygd58@users.noreply.github.com>
											
										
										
											2026-03-28 14:55:27 -07:00
+								                        _hyg_model = _model_cfg.get("default") or _model_cfg.get("model") or _hyg_model
-												fix(gateway): hygiene compression ignores config context_length and 1.4x exceeds model limit

Three bugs in gateway session hygiene pre-compression caused 'Session too
large' errors for ~200K context models like GLM-5-turbo on z.ai:

1. Gateway hygiene called get_model_context_length(model) without passing
   config_context_length, provider, or base_url — so user overrides like
   model.context_length: 180000 were ignored, and provider-aware detection
   (models.dev, z.ai endpoint) couldn't fire. The agent's own compressor
   correctly passed all three (run_agent.py line 1038).

2. The 1.4x safety factor on rough token estimates pushed the compression
   threshold above the model's actual context limit:
     200K * 0.85 * 1.4 = 238K > 200K (model limit)
   So hygiene never compressed, sessions grew past the limit, and the API
   rejected the request.

3. Same issue for the warn threshold: 200K * 0.95 * 1.4 = 266K.

Fix:
- Read model.context_length, provider, and base_url from config.yaml
  (same as run_agent.py does) and pass them to get_model_context_length()
- Resolve provider/base_url from runtime when not in config
- Cap the 1.4x-adjusted compress threshold at 95% of context_length
- Cap the 1.4x-adjusted warn threshold at context_length

Affects: z.ai GLM-5/GLM-5-turbo, any ~200K or smaller context model
where the 1.4x factor would push 85% above 100%.

Ref: Discord report from Ddox — glm-5-turbo on z.ai coding plan

											
										
										
											2026-03-22 15:15:25 -07:00
+								                        # Read explicit context_length override from model config
 								                        # (same as run_agent.py lines 995-1005)
 								                        _raw_ctx = _model_cfg.get("context_length")
 								                        if _raw_ctx is not None:
 								                            try:
 								                                _hyg_config_context_length = int(_raw_ctx)
 								                            except (TypeError, ValueError):
 								                                pass
 								                        # Read provider for accurate context detection
 								                        _hyg_provider = _model_cfg.get("provider") or None
 								                        _hyg_base_url = _model_cfg.get("base_url") or None
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
-												fix: raise session hygiene threshold from 50% to 85%

Session hygiene was firing at the same threshold (50%) as the agent's
own context compressor, causing premature compression on every turn
in long gateway sessions (especially Telegram).

Hygiene is a safety net for pathologically large sessions that would
cause API failures — it should NOT be doing normal compression work.
The agent's own compressor handles that during its tool loop with
accurate real token counts from the API.

Changes:
- Default hygiene threshold: 0.50 → 0.85 (fires only when truly large)
- Hygiene threshold is now independent of compression.threshold config
  (that setting controls the agent's compressor, not the pre-agent safety net)
- Removed env var override for hygiene threshold (CONTEXT_COMPRESSION_THRESHOLD
  still controls the agent's own compressor)
											
										
										
											2026-03-13 04:17:45 -07:00
+								                    # Read compression settings — only use enabled flag.
 								                    # The threshold is intentionally separate from the agent's
 								                    # compression.threshold (hygiene runs higher).
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								                    _comp_cfg = _hyg_data.get("compression", {})
 								                    if isinstance(_comp_cfg, dict):
 								                        _hyg_compression_enabled = str(
 								                            _comp_cfg.get("enabled", True)
 								                        ).lower() in ("true", "1", "yes")
-												fix(gateway): hygiene compression ignores config context_length and 1.4x exceeds model limit

Three bugs in gateway session hygiene pre-compression caused 'Session too
large' errors for ~200K context models like GLM-5-turbo on z.ai:

1. Gateway hygiene called get_model_context_length(model) without passing
   config_context_length, provider, or base_url — so user overrides like
   model.context_length: 180000 were ignored, and provider-aware detection
   (models.dev, z.ai endpoint) couldn't fire. The agent's own compressor
   correctly passed all three (run_agent.py line 1038).

2. The 1.4x safety factor on rough token estimates pushed the compression
   threshold above the model's actual context limit:
     200K * 0.85 * 1.4 = 238K > 200K (model limit)
   So hygiene never compressed, sessions grew past the limit, and the API
   rejected the request.

3. Same issue for the warn threshold: 200K * 0.95 * 1.4 = 266K.

Fix:
- Read model.context_length, provider, and base_url from config.yaml
  (same as run_agent.py does) and pass them to get_model_context_length()
- Resolve provider/base_url from runtime when not in config
- Cap the 1.4x-adjusted compress threshold at 95% of context_length
- Cap the 1.4x-adjusted warn threshold at context_length

Affects: z.ai GLM-5/GLM-5-turbo, any ~200K or smaller context model
where the 1.4x factor would push 85% above 100%.

Ref: Discord report from Ddox — glm-5-turbo on z.ai coding plan

											
										
										
											2026-03-22 15:15:25 -07:00
-												fix: honor session-scoped gateway model overrides

											
										
										
											2026-04-11 04:24:45 -05:00
+								                try:
 								                    _hyg_model, _hyg_runtime = self._resolve_session_agent_runtime(
 								                        source=source,
 								                        session_key=session_key,
 								                        user_config=_hyg_data if isinstance(_hyg_data, dict) else None,
 								                    )
 								                    _hyg_provider = _hyg_runtime.get("provider") or _hyg_provider
 								                    _hyg_base_url = _hyg_runtime.get("base_url") or _hyg_base_url
 								                    _hyg_api_key = _hyg_runtime.get("api_key") or _hyg_api_key
 								                except Exception:
 								                    pass
-												fix(gateway): read custom_providers context_length in hygiene compression (#4085)

Gateway hygiene pre-compression only checked model.context_length from
the top-level config, missing per-model context_length defined in
custom_providers entries. This caused premature compression for custom
provider users (e.g. 128K default instead of 200K configured).

The AIAgent's own compressor already reads custom_providers correctly
(run_agent.py lines 1171-1189). This adds the same fallback to the
gateway hygiene path, running after runtime provider resolution so
the base_url is available for matching.
											
										
										
											2026-03-30 17:04:31 -07:00
 								                # Check custom_providers per-model context_length
 								                # (same fallback as run_agent.py lines 1171-1189).
 								                # Must run after runtime resolution so _hyg_base_url is set.
 								                if _hyg_config_context_length is None and _hyg_base_url:
 								                    try:
-												fix(config): restore custom providers after v11→v12 migration

The v11→v12 migration converts custom_providers (list) into providers
(dict), then deletes the list. But all runtime resolvers read from
custom_providers — after migration, named custom endpoints silently stop
resolving and fallback chains fail with AuthError.

Add get_compatible_custom_providers() that reads from both config schemas
(legacy custom_providers list + v12+ providers dict), normalizes entries,
deduplicates, and returns a unified list. Update ALL consumers:

- hermes_cli/runtime_provider.py: _get_named_custom_provider() + key_env
- hermes_cli/auth_commands.py: credential pool provider names
- hermes_cli/main.py: model picker + _model_flow_named_custom()
- agent/auxiliary_client.py: key_env + custom_entry model fallback
- agent/credential_pool.py: _iter_custom_providers()
- cli.py + gateway/run.py: /model switch custom_providers passthrough
- run_agent.py + gateway/run.py: per-model context_length lookup

Also: use config.pop() instead of del for safer migration, fix stale
_config_version assertions in tests, add pool mock to codex test.

Co-authored-by: 墨綠BG <s5460703@gmail.com>
Closes #8776, salvaged from PR #8814

											
										
										
											2026-04-13 05:26:32 -07:00
+								                        try:
 								                            from hermes_cli.config import get_compatible_custom_providers as _gw_gcp
 								                            _hyg_custom_providers = _gw_gcp(_hyg_data)
 								                        except Exception:
 								                            _hyg_custom_providers = _hyg_data.get("custom_providers")
 								                            if not isinstance(_hyg_custom_providers, list):
 								                                _hyg_custom_providers = []
 								                        for _cp in _hyg_custom_providers:
 								                            if not isinstance(_cp, dict):
 								                                continue
 								                            _cp_url = (_cp.get("base_url") or "").rstrip("/")
 								                            if _cp_url and _cp_url == _hyg_base_url.rstrip("/"):
 								                                _cp_models = _cp.get("models", {})
 								                                if isinstance(_cp_models, dict):
 								                                    _cp_model_cfg = _cp_models.get(_hyg_model, {})
 								                                    if isinstance(_cp_model_cfg, dict):
 								                                        _cp_ctx = _cp_model_cfg.get("context_length")
 								                                        if _cp_ctx is not None:
 								                                            _hyg_config_context_length = int(_cp_ctx)
 								                                break
-												fix(gateway): read custom_providers context_length in hygiene compression (#4085)

Gateway hygiene pre-compression only checked model.context_length from
the top-level config, missing per-model context_length defined in
custom_providers entries. This caused premature compression for custom
provider users (e.g. 128K default instead of 200K configured).

The AIAgent's own compressor already reads custom_providers correctly
(run_agent.py lines 1171-1189). This adds the same fallback to the
gateway hygiene path, running after runtime provider resolution so
the base_url is available for matching.
											
										
										
											2026-03-30 17:04:31 -07:00
+								                    except (TypeError, ValueError):
 								                        pass
-												feat: auto-compress pathologically large gateway sessions (#628)

Long-lived gateway sessions can accumulate enough history that every new
message rehydrates an oversized transcript, causing repeated truncation
failures (finish_reason=length).

Add a session hygiene check in _handle_message that runs right after
loading the transcript and before invoking the agent:

1. Estimate message count and rough token count of the transcript
2. If above configurable thresholds (default: 200 msgs or 100K tokens),
   auto-compress the transcript proactively
3. Notify the user about the compression with before/after stats
4. If still above warn threshold (default: 200K tokens) after
   compression, suggest /reset
5. If compression fails on a dangerously large session, warn the user
   to use /compress or /reset manually

Thresholds are configurable via config.yaml:

  session_hygiene:
    auto_compress_tokens: 100000
    auto_compress_messages: 200
    warn_tokens: 200000

This complements the agent's existing preflight compression (which
runs inside run_conversation) by catching pathological sessions at
the gateway layer before the agent is even created.

Includes 12 tests for threshold detection and token estimation.

											
										
										
											2026-03-07 20:09:48 -08:00
+								            except Exception:
 								                pass
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								            if _hyg_compression_enabled:
-												fix(gateway): hygiene compression ignores config context_length and 1.4x exceeds model limit

Three bugs in gateway session hygiene pre-compression caused 'Session too
large' errors for ~200K context models like GLM-5-turbo on z.ai:

1. Gateway hygiene called get_model_context_length(model) without passing
   config_context_length, provider, or base_url — so user overrides like
   model.context_length: 180000 were ignored, and provider-aware detection
   (models.dev, z.ai endpoint) couldn't fire. The agent's own compressor
   correctly passed all three (run_agent.py line 1038).

2. The 1.4x safety factor on rough token estimates pushed the compression
   threshold above the model's actual context limit:
     200K * 0.85 * 1.4 = 238K > 200K (model limit)
   So hygiene never compressed, sessions grew past the limit, and the API
   rejected the request.

3. Same issue for the warn threshold: 200K * 0.95 * 1.4 = 266K.

Fix:
- Read model.context_length, provider, and base_url from config.yaml
  (same as run_agent.py does) and pass them to get_model_context_length()
- Resolve provider/base_url from runtime when not in config
- Cap the 1.4x-adjusted compress threshold at 95% of context_length
- Cap the 1.4x-adjusted warn threshold at context_length

Affects: z.ai GLM-5/GLM-5-turbo, any ~200K or smaller context model
where the 1.4x factor would push 85% above 100%.

Ref: Discord report from Ddox — glm-5-turbo on z.ai coding plan

											
										
										
											2026-03-22 15:15:25 -07:00
+								                _hyg_context_length = get_model_context_length(
 								                    _hyg_model,
 								                    base_url=_hyg_base_url or "",
 								                    api_key=_hyg_api_key or "",
 								                    config_context_length=_hyg_config_context_length,
 								                    provider=_hyg_provider or "",
 								                )
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								                _compress_token_threshold = int(
-												fix: use actual API token counts for gateway compression pre-check

Root cause of aggressive gateway compression vs CLI:
- CLI: single AIAgent persists across conversation, uses real API-reported
  prompt_tokens for compression decisions — accurate
- Gateway: each message creates fresh AIAgent, token count discarded after,
  next message pre-check falls back to rough str(msg)//4 estimate which
  overestimates 30-50% on tool-heavy conversations

Fix:
- Add last_prompt_tokens field to SessionEntry — stores the actual
  API-reported prompt token count from the most recent agent turn
- After run_conversation(), extract context_compressor.last_prompt_tokens
  and persist it via update_session()
- Gateway pre-check now uses stored actual tokens when available (exact
  same accuracy as CLI), falling back to rough estimate with 1.4x safety
  factor only for the first message of a session

This makes gateway compression behave identically to CLI compression
for all turns after the first. Reported by TigerHix.

											
										
										
											2026-03-10 23:28:18 -07:00
+								                    _hyg_context_length * _hyg_threshold_pct
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								                )
-												fix: use actual API token counts for gateway compression pre-check

Root cause of aggressive gateway compression vs CLI:
- CLI: single AIAgent persists across conversation, uses real API-reported
  prompt_tokens for compression decisions — accurate
- Gateway: each message creates fresh AIAgent, token count discarded after,
  next message pre-check falls back to rough str(msg)//4 estimate which
  overestimates 30-50% on tool-heavy conversations

Fix:
- Add last_prompt_tokens field to SessionEntry — stores the actual
  API-reported prompt token count from the most recent agent turn
- After run_conversation(), extract context_compressor.last_prompt_tokens
  and persist it via update_session()
- Gateway pre-check now uses stored actual tokens when available (exact
  same accuracy as CLI), falling back to rough estimate with 1.4x safety
  factor only for the first message of a session

This makes gateway compression behave identically to CLI compression
for all turns after the first. Reported by TigerHix.

											
										
										
											2026-03-10 23:28:18 -07:00
+								                _warn_token_threshold = int(_hyg_context_length * 0.95)
-												feat: auto-compress pathologically large gateway sessions (#628)

Long-lived gateway sessions can accumulate enough history that every new
message rehydrates an oversized transcript, causing repeated truncation
failures (finish_reason=length).

Add a session hygiene check in _handle_message that runs right after
loading the transcript and before invoking the agent:

1. Estimate message count and rough token count of the transcript
2. If above configurable thresholds (default: 200 msgs or 100K tokens),
   auto-compress the transcript proactively
3. Notify the user about the compression with before/after stats
4. If still above warn threshold (default: 200K tokens) after
   compression, suggest /reset
5. If compression fails on a dangerously large session, warn the user
   to use /compress or /reset manually

Thresholds are configurable via config.yaml:

  session_hygiene:
    auto_compress_tokens: 100000
    auto_compress_messages: 200
    warn_tokens: 200000

This complements the agent's existing preflight compression (which
runs inside run_conversation) by catching pathological sessions at
the gateway layer before the agent is even created.

Includes 12 tests for threshold detection and token estimation.

											
										
										
											2026-03-07 20:09:48 -08:00
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								                _msg_count = len(history)
-												fix: use actual API token counts for gateway compression pre-check

Root cause of aggressive gateway compression vs CLI:
- CLI: single AIAgent persists across conversation, uses real API-reported
  prompt_tokens for compression decisions — accurate
- Gateway: each message creates fresh AIAgent, token count discarded after,
  next message pre-check falls back to rough str(msg)//4 estimate which
  overestimates 30-50% on tool-heavy conversations

Fix:
- Add last_prompt_tokens field to SessionEntry — stores the actual
  API-reported prompt token count from the most recent agent turn
- After run_conversation(), extract context_compressor.last_prompt_tokens
  and persist it via update_session()
- Gateway pre-check now uses stored actual tokens when available (exact
  same accuracy as CLI), falling back to rough estimate with 1.4x safety
  factor only for the first message of a session

This makes gateway compression behave identically to CLI compression
for all turns after the first. Reported by TigerHix.

											
										
										
											2026-03-10 23:28:18 -07:00
 								                # Prefer actual API-reported tokens from the last turn
 								                # (stored in session entry) over the rough char-based estimate.
 								                _stored_tokens = session_entry.last_prompt_tokens
 								                if _stored_tokens > 0:
 								                    _approx_tokens = _stored_tokens
 								                    _token_source = "actual"
 								                else:
 								                    _approx_tokens = estimate_messages_tokens_rough(history)
 								                    _token_source = "estimated"
-												refactor(gateway): remove broken 1.4x hygiene multiplier entirely

The previous commit capped the 1.4x at 95% of context, but the multiplier
itself is unnecessary and confusing:

  85% threshold × 1.4 = 119% of context → never fires
  95% warn      × 1.4 = 133% of context → never warns

The 85% hygiene threshold already provides ample headroom over the agent's
own 50% compressor. Even if rough estimates overestimate by 50%, hygiene
would fire at ~57% actual usage — safe and harmless.

Remove the multiplier entirely. Both actual and estimated token paths
now use the same 85% / 95% thresholds. Update tests and comments.

											
										
										
											2026-03-22 15:21:18 -07:00
+								                    # Note: rough estimates overestimate by 30-50% for code/JSON-heavy
 								                    # sessions, but that just means hygiene fires a bit early — which
 								                    # is safe and harmless.  The 85% threshold already provides ample
 								                    # headroom (agent's own compressor runs at 50%).  A previous 1.4x
 								                    # multiplier tried to compensate by inflating the threshold, but
 								                    # 85% * 1.4 = 119% of context — which exceeds the model's limit
 								                    # and prevented hygiene from ever firing for ~200K models (GLM-5).
-												feat: auto-compress pathologically large gateway sessions (#628)

Long-lived gateway sessions can accumulate enough history that every new
message rehydrates an oversized transcript, causing repeated truncation
failures (finish_reason=length).

Add a session hygiene check in _handle_message that runs right after
loading the transcript and before invoking the agent:

1. Estimate message count and rough token count of the transcript
2. If above configurable thresholds (default: 200 msgs or 100K tokens),
   auto-compress the transcript proactively
3. Notify the user about the compression with before/after stats
4. If still above warn threshold (default: 200K tokens) after
   compression, suggest /reset
5. If compression fails on a dangerously large session, warn the user
   to use /compress or /reset manually

Thresholds are configurable via config.yaml:

  session_hygiene:
    auto_compress_tokens: 100000
    auto_compress_messages: 200
    warn_tokens: 200000

This complements the agent's existing preflight compression (which
runs inside run_conversation) by catching pathological sessions at
the gateway layer before the agent is even created.

Includes 12 tests for threshold detection and token estimation.

											
										
										
											2026-03-07 20:09:48 -08:00
-												fix: prevent compression death spiral from API disconnects (#2153) (#4750)

Three fixes for long-running gateway sessions that enter a death spiral
when API disconnects prevent token data collection, which prevents
compression, which causes more disconnects:

Layer 1 — Stale token counter fallback (run_agent.py in-loop):
When last_prompt_tokens is 0 (stale after API disconnect or provider
returned no usage data), fall back to estimate_messages_tokens_rough()
instead of passing 0 to should_compress(), which would never fire.

Layer 2 — Server disconnect heuristic (run_agent.py error handler):
When ReadError/RemoteProtocolError hits a large session (>60% context
or >200 messages), treat it as a context-length error and trigger
compression rather than burning through retries that all fail the
same way.

Layer 3 — Hard message count limit (gateway/run.py hygiene):
Force compression when a session exceeds 400 messages, regardless of
token estimates. This catches runaway growth even when all token-based
checks fail due to missing API data.

Based on the analysis from PR #2157 by ygd58 — the gateway threshold
direction fix (1.4x multiplier) was already resolved on main.
											
										
										
											2026-04-03 02:16:46 -07:00
+								                # Hard safety valve: force compression if message count is
 								                # extreme, regardless of token estimates.  This breaks the
 								                # death spiral where API disconnects prevent token data
 								                # collection, which prevents compression, which causes more
 								                # disconnects.  400 messages is well above normal sessions
 								                # but catches runaway growth before it becomes unrecoverable.
 								                # (#2153)
 								                _HARD_MSG_LIMIT = 400
 								                _needs_compress = (
 								                    _approx_tokens >= _compress_token_threshold
 								                    or _msg_count >= _HARD_MSG_LIMIT
 								                )
-												feat: auto-compress pathologically large gateway sessions (#628)

Long-lived gateway sessions can accumulate enough history that every new
message rehydrates an oversized transcript, causing repeated truncation
failures (finish_reason=length).

Add a session hygiene check in _handle_message that runs right after
loading the transcript and before invoking the agent:

1. Estimate message count and rough token count of the transcript
2. If above configurable thresholds (default: 200 msgs or 100K tokens),
   auto-compress the transcript proactively
3. Notify the user about the compression with before/after stats
4. If still above warn threshold (default: 200K tokens) after
   compression, suggest /reset
5. If compression fails on a dangerously large session, warn the user
   to use /compress or /reset manually

Thresholds are configurable via config.yaml:

  session_hygiene:
    auto_compress_tokens: 100000
    auto_compress_messages: 200
    warn_tokens: 200000

This complements the agent's existing preflight compression (which
runs inside run_conversation) by catching pathological sessions at
the gateway layer before the agent is even created.

Includes 12 tests for threshold detection and token estimation.

											
										
										
											2026-03-07 20:09:48 -08:00
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								                if _needs_compress:
 								                    logger.info(
-												fix: use actual API token counts for gateway compression pre-check

Root cause of aggressive gateway compression vs CLI:
- CLI: single AIAgent persists across conversation, uses real API-reported
  prompt_tokens for compression decisions — accurate
- Gateway: each message creates fresh AIAgent, token count discarded after,
  next message pre-check falls back to rough str(msg)//4 estimate which
  overestimates 30-50% on tool-heavy conversations

Fix:
- Add last_prompt_tokens field to SessionEntry — stores the actual
  API-reported prompt token count from the most recent agent turn
- After run_conversation(), extract context_compressor.last_prompt_tokens
  and persist it via update_session()
- Gateway pre-check now uses stored actual tokens when available (exact
  same accuracy as CLI), falling back to rough estimate with 1.4x safety
  factor only for the first message of a session

This makes gateway compression behave identically to CLI compression
for all turns after the first. Reported by TigerHix.

											
										
										
											2026-03-10 23:28:18 -07:00
+								                        "Session hygiene: %s messages, ~%s tokens (%s) — auto-compressing "
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								                        "(threshold: %s%% of %s = %s tokens)",
-												fix: use actual API token counts for gateway compression pre-check

Root cause of aggressive gateway compression vs CLI:
- CLI: single AIAgent persists across conversation, uses real API-reported
  prompt_tokens for compression decisions — accurate
- Gateway: each message creates fresh AIAgent, token count discarded after,
  next message pre-check falls back to rough str(msg)//4 estimate which
  overestimates 30-50% on tool-heavy conversations

Fix:
- Add last_prompt_tokens field to SessionEntry — stores the actual
  API-reported prompt token count from the most recent agent turn
- After run_conversation(), extract context_compressor.last_prompt_tokens
  and persist it via update_session()
- Gateway pre-check now uses stored actual tokens when available (exact
  same accuracy as CLI), falling back to rough estimate with 1.4x safety
  factor only for the first message of a session

This makes gateway compression behave identically to CLI compression
for all turns after the first. Reported by TigerHix.

											
										
										
											2026-03-10 23:28:18 -07:00
+								                        _msg_count, f"{_approx_tokens:,}", _token_source,
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								                        int(_hyg_threshold_pct * 100),
 								                        f"{_hyg_context_length:,}",
 								                        f"{_compress_token_threshold:,}",
 								                    )
-												feat: auto-compress pathologically large gateway sessions (#628)

Long-lived gateway sessions can accumulate enough history that every new
message rehydrates an oversized transcript, causing repeated truncation
failures (finish_reason=length).

Add a session hygiene check in _handle_message that runs right after
loading the transcript and before invoking the agent:

1. Estimate message count and rough token count of the transcript
2. If above configurable thresholds (default: 200 msgs or 100K tokens),
   auto-compress the transcript proactively
3. Notify the user about the compression with before/after stats
4. If still above warn threshold (default: 200K tokens) after
   compression, suggest /reset
5. If compression fails on a dangerously large session, warn the user
   to use /compress or /reset manually

Thresholds are configurable via config.yaml:

  session_hygiene:
    auto_compress_tokens: 100000
    auto_compress_messages: 200
    warn_tokens: 200000

This complements the agent's existing preflight compression (which
runs inside run_conversation) by catching pathological sessions at
the gateway layer before the agent is even created.

Includes 12 tests for threshold detection and token estimation.

											
										
										
											2026-03-07 20:09:48 -08:00
-												fix(gateway): isolate telegram forum topic sessions

											
										
										
											2026-03-11 09:15:34 +01:00
+								                    _hyg_meta = {"thread_id": source.thread_id} if source.thread_id else None
-												feat: auto-compress pathologically large gateway sessions (#628)

Long-lived gateway sessions can accumulate enough history that every new
message rehydrates an oversized transcript, causing repeated truncation
failures (finish_reason=length).

Add a session hygiene check in _handle_message that runs right after
loading the transcript and before invoking the agent:

1. Estimate message count and rough token count of the transcript
2. If above configurable thresholds (default: 200 msgs or 100K tokens),
   auto-compress the transcript proactively
3. Notify the user about the compression with before/after stats
4. If still above warn threshold (default: 200K tokens) after
   compression, suggest /reset
5. If compression fails on a dangerously large session, warn the user
   to use /compress or /reset manually

Thresholds are configurable via config.yaml:

  session_hygiene:
    auto_compress_tokens: 100000
    auto_compress_messages: 200
    warn_tokens: 200000

This complements the agent's existing preflight compression (which
runs inside run_conversation) by catching pathological sessions at
the gateway layer before the agent is even created.

Includes 12 tests for threshold detection and token estimation.

											
										
										
											2026-03-07 20:09:48 -08:00
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								                    try:
 								                        from run_agent import AIAgent
-												fix: honor session-scoped gateway model overrides

											
										
										
											2026-04-11 04:24:45 -05:00
+								                        _hyg_model, _hyg_runtime = self._resolve_session_agent_runtime(
 								                            source=source,
 								                            session_key=session_key,
 								                            user_config=_hyg_data if isinstance(_hyg_data, dict) else None,
 								                        )
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								                        if _hyg_runtime.get("api_key"):
 								                            _hyg_msgs = [
 								                                {"role": m.get("role"), "content": m.get("content")}
 								                                for m in history
 								                                if m.get("role") in ("user", "assistant")
 								                                and m.get("content")
 								                            ]
 								                            if len(_hyg_msgs) >= 4:
 								                                _hyg_agent = AIAgent(
 								                                    **_hyg_runtime,
-												fix(gateway): pass model to temporary AIAgent instances

Memory flush, /compress, and session hygiene create AIAgent without
model=, falling back to the hardcoded default "anthropic/claude-opus-4.6".
This fails with a 400 error when the active provider is openai-codex
(Codex only accepts its own model names like gpt-5.1-codex-mini).

Add _resolve_gateway_model() that mirrors the env/config resolution
already used by _run_agent_sync, and wire it into all three temporary
agent creation sites.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 00:09:37 +01:00
+								                                    model=_hyg_model,
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								                                    max_iterations=4,
 								                                    quiet_mode=True,
-												feat(honcho): context injection overhaul, 5-tool surface, cost safety, session isolation (#10619)

Salvaged from PR #9884 by erosika. Cherry-picked plugin changes onto
current main with minimal core modifications.

Plugin changes (plugins/memory/honcho/):
- New honcho_reasoning tool (5th tool, splits LLM calls from honcho_context)
- Two-layer context injection: base context (summary + representation + card)
  on contextCadence, dialectic supplement on dialecticCadence
- Multi-pass dialectic depth (1-3 passes) with early bail-out on strong signal
- Cold/warm prompt selection based on session state
- dialecticCadence defaults to 3 (was 1) — ~66% fewer Honcho LLM calls
- Session summary injection for conversational continuity
- Bidirectional peer targeting on all 5 tools
- Correctness fixes: peer param fallback, None guard on set_peer_card,
  schema validation, signal_sufficient anchored regex, mid->medium level fix

Core changes (~20 lines across 3 files):
- agent/memory_manager.py: Enhanced sanitize_context() to strip full
  <memory-context> blocks and system notes (prevents leak from saveMessages)
- run_agent.py: gateway_session_key param for stable per-chat Honcho sessions,
  on_turn_start() call before prefetch_all() for cadence tracking,
  sanitize_context() on user messages to strip leaked memory blocks
- gateway/run.py: skip_memory=True on 2 temp agents (prevents orphan sessions),
  gateway_session_key threading to main agent

Tests: 509 passed (3 skipped — honcho SDK not installed locally)
Docs: Updated honcho.md, memory-providers.md, tools-reference.md, SKILL.md

Co-authored-by: erosika <erosika@users.noreply.github.com>
											
										
										
											2026-04-15 19:12:19 -07:00
+								                                    skip_memory=True,
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								                                    enabled_toolsets=["memory"],
 								                                    session_id=session_entry.session_id,
 								                                )
-												fix(gateway): close temporary agents after one-off tasks

Add shared _cleanup_agent_resources() for temporary gateway AIAgent
instances. Apply cleanup to memory flush, background tasks, /btw,
manual /compress, and session-hygiene auto-compression. Prevents
unclosed aiohttp client session leaks.

Cherry-picked from #10899 by @LeonSGP43. Consolidates #10945 by @Lubrsy706.
Fixes #10865.

Co-authored-by: Lubrsy706 <Lubrsy706@users.noreply.github.com>

											
										
										
											2026-04-16 18:41:31 +05:30
+								                                try:
 								                                    _hyg_agent._print_fn = lambda *a, **kw: None
 								                                    loop = asyncio.get_running_loop()
 								                                    _compressed, _ = await loop.run_in_executor(
 								                                        None,
 								                                        lambda: _hyg_agent._compress_context(
 								                                            _hyg_msgs, "",
 								                                            approx_tokens=_approx_tokens,
 								                                        ),
 								                                    )
-												feat: auto-compress pathologically large gateway sessions (#628)

Long-lived gateway sessions can accumulate enough history that every new
message rehydrates an oversized transcript, causing repeated truncation
failures (finish_reason=length).

Add a session hygiene check in _handle_message that runs right after
loading the transcript and before invoking the agent:

1. Estimate message count and rough token count of the transcript
2. If above configurable thresholds (default: 200 msgs or 100K tokens),
   auto-compress the transcript proactively
3. Notify the user about the compression with before/after stats
4. If still above warn threshold (default: 200K tokens) after
   compression, suggest /reset
5. If compression fails on a dangerously large session, warn the user
   to use /compress or /reset manually

Thresholds are configurable via config.yaml:

  session_hygiene:
    auto_compress_tokens: 100000
    auto_compress_messages: 200
    warn_tokens: 200000

This complements the agent's existing preflight compression (which
runs inside run_conversation) by catching pathological sessions at
the gateway layer before the agent is even created.

Includes 12 tests for threshold detection and token estimation.

											
										
										
											2026-03-07 20:09:48 -08:00
-												fix(gateway): close temporary agents after one-off tasks

Add shared _cleanup_agent_resources() for temporary gateway AIAgent
instances. Apply cleanup to memory flush, background tasks, /btw,
manual /compress, and session-hygiene auto-compression. Prevents
unclosed aiohttp client session leaks.

Cherry-picked from #10899 by @LeonSGP43. Consolidates #10945 by @Lubrsy706.
Fixes #10865.

Co-authored-by: Lubrsy706 <Lubrsy706@users.noreply.github.com>

											
										
										
											2026-04-16 18:41:31 +05:30
+								                                    # _compress_context ends the old session and creates
 								                                    # a new session_id.  Write compressed messages into
 								                                    # the NEW session so the old transcript stays intact
 								                                    # and searchable via session_search.
 								                                    _hyg_new_sid = _hyg_agent.session_id
 								                                    if _hyg_new_sid != session_entry.session_id:
 								                                        session_entry.session_id = _hyg_new_sid
 								                                        self.session_store._save()
 								                                    self.session_store.rewrite_transcript(
 								                                        session_entry.session_id, _compressed
 								                                    )
 								                                    # Reset stored token count — transcript was rewritten
 								                                    session_entry.last_prompt_tokens = 0
 								                                    history = _compressed
 								                                    _new_count = len(_compressed)
 								                                    _new_tokens = estimate_messages_tokens_rough(
 								                                        _compressed
 								                                    )
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
-												fix(gateway): close temporary agents after one-off tasks

Add shared _cleanup_agent_resources() for temporary gateway AIAgent
instances. Apply cleanup to memory flush, background tasks, /btw,
manual /compress, and session-hygiene auto-compression. Prevents
unclosed aiohttp client session leaks.

Cherry-picked from #10899 by @LeonSGP43. Consolidates #10945 by @Lubrsy706.
Fixes #10865.

Co-authored-by: Lubrsy706 <Lubrsy706@users.noreply.github.com>

											
										
										
											2026-04-16 18:41:31 +05:30
+								                                    logger.info(
 								                                        "Session hygiene: compressed %s → %s msgs, "
 								                                        "~%s → ~%s tokens",
 								                                        _msg_count, _new_count,
 								                                        f"{_approx_tokens:,}", f"{_new_tokens:,}",
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								                                    )
-												fix(gateway): close temporary agents after one-off tasks

Add shared _cleanup_agent_resources() for temporary gateway AIAgent
instances. Apply cleanup to memory flush, background tasks, /btw,
manual /compress, and session-hygiene auto-compression. Prevents
unclosed aiohttp client session leaks.

Cherry-picked from #10899 by @LeonSGP43. Consolidates #10945 by @Lubrsy706.
Fixes #10865.

Co-authored-by: Lubrsy706 <Lubrsy706@users.noreply.github.com>

											
										
										
											2026-04-16 18:41:31 +05:30
+								                                    if _new_tokens >= _warn_token_threshold:
 								                                        logger.warning(
 								                                            "Session hygiene: still ~%s tokens after "
 								                                            "compression",
 								                                            f"{_new_tokens:,}",
 								                                        )
 								                                finally:
 								                                    self._cleanup_agent_resources(_hyg_agent)
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								                    except Exception as e:
 								                        logger.warning(
 								                            "Session hygiene auto-compress failed: %s", e
 								                        )
-												feat: auto-compress pathologically large gateway sessions (#628)

Long-lived gateway sessions can accumulate enough history that every new
message rehydrates an oversized transcript, causing repeated truncation
failures (finish_reason=length).

Add a session hygiene check in _handle_message that runs right after
loading the transcript and before invoking the agent:

1. Estimate message count and rough token count of the transcript
2. If above configurable thresholds (default: 200 msgs or 100K tokens),
   auto-compress the transcript proactively
3. Notify the user about the compression with before/after stats
4. If still above warn threshold (default: 200K tokens) after
   compression, suggest /reset
5. If compression fails on a dangerously large session, warn the user
   to use /compress or /reset manually

Thresholds are configurable via config.yaml:

  session_hygiene:
    auto_compress_tokens: 100000
    auto_compress_messages: 200
    warn_tokens: 200000

This complements the agent's existing preflight compression (which
runs inside run_conversation) by catching pathological sessions at
the gateway layer before the agent is even created.

Includes 12 tests for threshold detection and token estimation.

											
										
										
											2026-03-07 20:09:48 -08:00
-												feat: implement channel directory and message mirroring for cross-platform communication

- Introduced a new channel directory to cache reachable channels/contacts for messaging platforms, enhancing the send_message tool's ability to resolve human-friendly names to numeric IDs.
- Added functionality to mirror sent messages into the target's session transcript, providing context for cross-platform message delivery.
- Updated the send_message tool to support listing available targets and improved error handling for channel resolution.
- Enhanced the gateway to build and refresh the channel directory during startup and at regular intervals, ensuring up-to-date channel information.

											
										
										
											2026-02-22 20:44:15 -08:00
+								        # First-message onboarding -- only on the very first interaction ever
 								        if not history and not self.session_store.has_any_sessions():
-												Hermes Agent UX Improvements

											
										
										
											2026-02-22 02:16:11 -08:00
+								            context_prompt += (
-												feat: implement channel directory and message mirroring for cross-platform communication

- Introduced a new channel directory to cache reachable channels/contacts for messaging platforms, enhancing the send_message tool's ability to resolve human-friendly names to numeric IDs.
- Added functionality to mirror sent messages into the target's session transcript, providing context for cross-platform message delivery.
- Updated the send_message tool to support listing available targets and improved error handling for channel resolution.
- Enhanced the gateway to build and refresh the channel directory during startup and at regular intervals, ensuring up-to-date channel information.

											
										
										
											2026-02-22 20:44:15 -08:00
+								                "\n\n[System note: This is the user's very first message ever. "
-												Hermes Agent UX Improvements

											
										
										
											2026-02-22 02:16:11 -08:00
+								                "Briefly introduce yourself and mention that /help shows available commands. "
 								                "Keep the introduction concise -- one or two sentences max.]"
 								            )
-												feat: implement channel directory and message mirroring for cross-platform communication

- Introduced a new channel directory to cache reachable channels/contacts for messaging platforms, enhancing the send_message tool's ability to resolve human-friendly names to numeric IDs.
- Added functionality to mirror sent messages into the target's session transcript, providing context for cross-platform message delivery.
- Updated the send_message tool to support listing available targets and improved error handling for channel resolution.
- Enhanced the gateway to build and refresh the channel directory during startup and at regular intervals, ensuring up-to-date channel information.

											
										
										
											2026-02-22 20:44:15 -08:00
+								        # One-time prompt if no home channel is set for this platform
-												fix: webhook platform support — skip home channel prompt, disable tool progress (salvage #4363) (#4660)

Cherry-picked from PR #4363 by @bennyhodl with follow-up fixes:

- Skip 'No home channel' prompt for webhook platform (webhooks deliver
  to configured targets, not a home channel)
- Disable tool progress for webhooks (no message editing support)
- Add webhook to PLATFORMS in tools_config.py and skills_config.py
- Add hermes-webhook toolset to toolsets.py + hermes-gateway includes
- Removed overly aggressive <50 char content filter that blocked
  legitimate short responses (tool progress already handled at source)

Co-authored-by: bennyhodl <bennyhodl@users.noreply.github.com>
											
										
										
											2026-04-02 14:00:22 -07:00
+								        # Skip for webhooks - they deliver directly to configured targets (github_comment, etc.)
 								        if not history and source.platform and source.platform != Platform.LOCAL and source.platform != Platform.WEBHOOK:
-												feat: implement channel directory and message mirroring for cross-platform communication

- Introduced a new channel directory to cache reachable channels/contacts for messaging platforms, enhancing the send_message tool's ability to resolve human-friendly names to numeric IDs.
- Added functionality to mirror sent messages into the target's session transcript, providing context for cross-platform message delivery.
- Updated the send_message tool to support listing available targets and improved error handling for channel resolution.
- Enhanced the gateway to build and refresh the channel directory during startup and at regular intervals, ensuring up-to-date channel information.

											
										
										
											2026-02-22 20:44:15 -08:00
+								            platform_name = source.platform.value
 								            env_key = f"{platform_name.upper()}_HOME_CHANNEL"
 								            if not os.getenv(env_key):
 								                adapter = self.adapters.get(source.platform)
 								                if adapter:
 								                    await adapter.send(
 								                        source.chat_id,
 								                        f"📬 No home channel is set for {platform_name.title()}. "
 								                        f"A home channel is where Hermes delivers cron job results "
 								                        f"and cross-platform messages.\n\n"
-												feat: unify set-home command naming across platforms

- Updated the command name from `/set-home` to `/sethome` in the GatewayRunner class for consistency.
- Added a new slash command `/sethome` in the Discord adapter to set the home channel.
- Registered the `/sethome` command in the Telegram adapter to align with the updated naming convention.

											
										
										
											2026-02-23 15:01:22 -08:00
+								                        f"Type /sethome to make this chat your home channel, "
-												feat: implement channel directory and message mirroring for cross-platform communication

- Introduced a new channel directory to cache reachable channels/contacts for messaging platforms, enhancing the send_message tool's ability to resolve human-friendly names to numeric IDs.
- Added functionality to mirror sent messages into the target's session transcript, providing context for cross-platform message delivery.
- Updated the send_message tool to support listing available targets and improved error handling for channel resolution.
- Enhanced the gateway to build and refresh the channel directory during startup and at regular intervals, ensuring up-to-date channel information.

											
										
										
											2026-02-22 20:44:15 -08:00
+								                        f"or ignore to skip."
 								                    )
-												feat: add voice channel awareness — inject participant and speaking state into agent context

											
										
										
											2026-03-14 02:14:34 +03:00
+								        # -----------------------------------------------------------------
 								        # Voice channel awareness — inject current voice channel state
 								        # into context so the agent knows who is in the channel and who
 								        # is speaking, without needing a separate tool call.
 								        # -----------------------------------------------------------------
 								        if source.platform == Platform.DISCORD:
 								            adapter = self.adapters.get(Platform.DISCORD)
 								            guild_id = self._get_guild_id(event)
 								            if guild_id and adapter and hasattr(adapter, "get_voice_channel_context"):
 								                vc_context = adapter.get_voice_channel_context(guild_id)
 								                if vc_context:
 								                    context_prompt += f"\n\n{vc_context}"
-												Enhance image handling and analysis capabilities across platforms

- Updated the vision tool to accept both HTTP/HTTPS URLs and local file paths for image analysis.
- Implemented caching of user-uploaded images in local directories to ensure reliable access for the vision tool, addressing issues with ephemeral URLs.
- Enhanced platform adapters (Discord, Telegram, WhatsApp) to download and cache images, allowing for immediate analysis and enriched message context.
- Added a new method to auto-analyze images attached by users, enriching the conversation with detailed descriptions.
- Improved documentation for image handling processes and updated related functions for clarity and efficiency.

											
										
										
											2026-02-15 16:10:50 -08:00
+								        # -----------------------------------------------------------------
 								        # Auto-analyze images sent by the user
 								        #
 								        # If the user attached image(s), we run the vision tool eagerly so
 								        # the conversation model always receives a text description.  The
 								        # local file path is also included so the model can re-examine the
 								        # image later with a more targeted question via vision_analyze.
 								        #
 								        # We filter to image paths only (by media_type) so that non-image
 								        # attachments (documents, audio, etc.) are not sent to the vision
 								        # tool even when they appear in the same message.
 								        # -----------------------------------------------------------------
-												fix(gateway): preserve queued voice events for STT

											
										
										
											2026-04-11 21:36:05 +01:00
+								        message_text = await self._prepare_inbound_message_text(
 								            event=event,
 								            source=source,
 								            history=history,
-												feat: shared thread sessions by default — multi-user thread support (#5391)

Threads (Telegram forum topics, Discord threads, Slack threads) now default
to shared sessions where all participants see the same conversation. This is
the expected UX for threaded conversations where multiple users @mention the
bot and interact collaboratively.

Changes:
- build_session_key(): when thread_id is present, user_id is no longer
  appended to the session key (threads are shared by default)
- New config: thread_sessions_per_user (default: false) — opt-in to restore
  per-user isolation in threads if needed
- Sender attribution: messages in shared threads are prefixed with
  [sender name] so the agent can tell participants apart
- System prompt: shared threads show 'Multi-user thread' note instead of
  a per-turn User line (avoids busting prompt cache)
- Wired through all callers: gateway/run.py, base.py, telegram.py, feishu.py
- Regular group messages (no thread) remain per-user isolated (unchanged)
- DM threads are unaffected (they have their own keying logic)

Closes community request from demontut_ re: thread-based shared sessions.
											
										
										
											2026-04-05 19:46:58 -07:00
+								        )
-												fix(gateway): preserve queued voice events for STT

											
										
										
											2026-04-11 21:36:05 +01:00
+								        if message_text is None:
 								            return
-												feat(gateway): inject reply-to message context for out-of-session replies (#1594)

* fix: prevent infinite 400 failure loop on context overflow (#1630)

When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message.  This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error.  Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.

Three-layer fix:

1. run_agent.py — Fallback heuristic: when a 400 error has a very short
   generic message AND the session is large (>40% of context or >80
   messages), treat it as a probable context overflow and trigger
   compression instead of aborting.

2. run_agent.py + gateway/run.py — Don't persist failed messages:
   when the agent returns failed=True before generating any response,
   skip writing the user's message to the transcript/DB. This prevents
   the session from growing on each failure.

3. gateway/run.py — Smarter error messages: detect context-overflow
   failures and suggest /compact or /reset specifically, instead of a
   generic 'try again' that will fail identically.

* fix(skills): detect prompt injection patterns and block cache file reads

Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):

1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
   (index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
   was the original injection vector — untrusted skill descriptions
   in the catalog contained adversarial text that the model executed.

2. skill_view: warns when skills are loaded from outside the trusted
   ~/.hermes/skills/ directory, and detects common injection patterns
   in skill content ("ignore previous instructions", "<system>", etc.).

Cherry-picked from PR #1562 by ygd58.

* fix(tools): chunk long messages in send_message_tool before dispatch (#1552)

Long messages sent via send_message tool or cron delivery silently
failed when exceeding platform limits. Gateway adapters handle this
via truncate_message(), but the standalone senders in send_message_tool
bypassed that entirely.

- Apply truncate_message() chunking in _send_to_platform() before
  dispatching to individual platform senders
- Remove naive message[i:i+2000] character split in _send_discord()
  in favor of centralized smart splitting
- Attach media files to last chunk only for Telegram
- Add regression tests for chunking and media placement

Cherry-picked from PR #1557 by llbn.

* fix(approval): show full command in dangerous command approval (#1553)

Previously the command was truncated to 80 chars in CLI (with a
[v]iew full option), 500 chars in Discord embeds, and missing entirely
in Telegram/Slack approval messages. Now the full command is always
displayed everywhere:

- CLI: removed 80-char truncation and [v]iew full menu option
- Gateway (TG/Slack): approval_required message includes full command
  in a code block
- Discord: embed shows full command up to 4096-char limit
- Windows: skip SIGALRM-based test timeout (Unix-only)
- Updated tests: replaced view-flow tests with direct approval tests

Cherry-picked from PR #1566 by crazywriter1.

* fix(cli): flush stdout during agent loop to prevent macOS display freeze (#1624)

The interrupt polling loop in chat() waited on the queue without
invalidating the prompt_toolkit renderer. On macOS, the StdoutProxy
buffer only flushed on input events, causing the CLI to appear frozen
during tool execution until the user typed a key.

Fix: call _invalidate() on each queue timeout (every ~100ms, throttled
to 150ms) to force the renderer to flush buffered agent output.

* fix(claw): warn when API keys are skipped during OpenClaw migration (#1580)

When --migrate-secrets is not passed (the default), API keys like
OPENROUTER_API_KEY are silently skipped with no warning. Users don't
realize their keys weren't migrated until the agent fails to connect.

Add a post-migration warning with actionable instructions: either
re-run with --migrate-secrets or add the key manually via
hermes config set.

Cherry-picked from PR #1593 by ygd58.

* fix(security): block sandbox backend creds from subprocess env (#1264)

Add Modal and Daytona sandbox credentials to the subprocess env
blocklist so they're not leaked to agent terminal sessions via
printenv/env.

Cherry-picked from PR #1571 by ygd58.

* fix(gateway): cap interrupt recursion depth to prevent resource exhaustion (#816)

When a user sends multiple messages while the agent keeps failing,
_run_agent() calls itself recursively with no depth limit. This can
exhaust stack/memory if the agent is in a failure loop.

Add _MAX_INTERRUPT_DEPTH = 3. When exceeded, the pending message is
logged and the current result is returned instead of recursing deeper.

The log handler duplication bug described in #816 was already fixed
separately (AIAgent.__init__ deduplicates handlers).

* fix(gateway): /model shows active fallback model instead of config default (#1615)

When the agent falls back to a different model (e.g. due to rate
limiting), /model still showed the config default. Now tracks the
effective model/provider after each agent run and displays it.

Cleared when the primary model succeeds again or the user explicitly
switches via /model.

Cherry-picked from PR #1616 by MaxKerkula. Added hasattr guard for
test compatibility.

* feat(gateway): inject reply-to message context for out-of-session replies (#1594)

When a user replies to a Telegram message, check if the quoted text
exists in the current session transcript. If missing (from cron jobs,
background tasks, or old sessions), prepend [Replying to: "..."] to
the message so the agent has context about what's being referenced.

- Add reply_to_text field to MessageEvent (base.py)
- Populate from Telegram's reply_to_message (text or caption)
- Inject context in _handle_message when not found in history

Based on PR #1596 by anpicasso (cherry-picked reply-to feature only,
excluded unrelated /server command and background delegation changes).

---------

Co-authored-by: buray <ygd58@users.noreply.github.com>
Co-authored-by: lbn <llbn@users.noreply.github.com>
Co-authored-by: crazywriter1 <53251494+crazywriter1@users.noreply.github.com>
Co-authored-by: Max K <MaxKerkula@users.noreply.github.com>
Co-authored-by: Angello Picasso <angello.picasso@devsu.com>
											
										
										
											2026-03-17 02:31:27 -07:00
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        try:
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
+								            # Emit agent:start hook
 								            hook_ctx = {
 								                "platform": source.platform.value if source.platform else "",
 								                "user_id": source.user_id,
 								                "session_id": session_entry.session_id,
 								                "message": message_text[:500],
 								            }
 								            await self.hooks.emit("agent:start", hook_ctx)
-												feat: @ context references — inline file, folder, diff, git, and URL injection

Add @file:path, @folder:dir, @diff, @staged, @git:N, and @url:
references that expand inline before the message reaches the LLM.
Supports line ranges (@file:main.py:10-50), token budget enforcement
(soft warn at 25%, hard block at 50%), and path sandboxing for gateway.

Core module from PR #2090 by @kshitijk4poor. CLI and gateway wiring
rewritten against current main. Fixed asyncio.run() crash when called
from inside a running event loop (gateway).

Closes #682.

											
										
										
											2026-03-21 15:57:13 -07:00
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								            # Run the agent
-												Enhance agent response handling and transcript logging

- Refactored the agent response processing to return a comprehensive result dictionary, including final responses and full message history.
- Improved transcript logging to capture the complete conversation, including tool calls and intermediate reasoning, facilitating session resumption and debugging.
- Added handling for fresh sessions to include tool definitions in the transcript for clarity.
- Implemented logic to filter and timestamp new messages, ensuring accurate logging of user and assistant interactions.

											
										
										
											2026-02-16 00:53:17 -08:00
+								            agent_result = await self._run_agent(
-												Enhance image handling and analysis capabilities across platforms

- Updated the vision tool to accept both HTTP/HTTPS URLs and local file paths for image analysis.
- Implemented caching of user-uploaded images in local directories to ensure reliable access for the vision tool, addressing issues with ephemeral URLs.
- Enhanced platform adapters (Discord, Telegram, WhatsApp) to download and cache images, allowing for immediate analysis and enriched message context.
- Added a new method to auto-analyze images attached by users, enriching the conversation with detailed descriptions.
- Improved documentation for image handling processes and updated related functions for clarity and efficiency.

											
										
										
											2026-02-15 16:10:50 -08:00
+								                message=message_text,
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								                context_prompt=context_prompt,
 								                history=history,
 								                source=source,
-												Implement interrupt handling for agent and CLI input and persistent prompt line at bottom of CLI :)

- Enhanced the AIAgent class to support interrupt requests, allowing for graceful interruption of ongoing tasks and processing of new messages.
- Updated the HermesCLI to manage user input in a persistent manner, enabling real-time interruption of the agent's conversation.
- Introduced a mechanism in the GatewayRunner to handle incoming messages while an agent is running, allowing for immediate response to user commands.
- Improved overall user experience by providing feedback during interruptions and ensuring that pending messages are processed correctly.

											
										
										
											2026-02-03 16:15:49 -08:00
+								                session_id=session_entry.session_id,
-												fix(gateway/slack): send progress messages to correct thread (#3063)

Co-authored-by: Jneeee <jneeee@outlook.com>
											
										
										
											2026-03-25 15:51:15 -07:00
+								                session_key=session_key,
 								                event_message_id=event.message_id,
-												fix: remove redundant key normalization and defensive getattr in channel_prompts

- Remove double str() normalization in _resolve_channel_prompt since
  config bridging already handles numeric YAML key conversion
- Remove dead prompts.get(str(key)) fallback that could never match
  after keys were already normalized to strings
- Replace getattr(event, "channel_prompt", None) with direct attribute
  access since channel_prompt is a declared dataclass field
- Update test to verify normalization responsibility lives in config bridging

											
										
										
											2026-04-13 17:26:25 -07:00
+								                channel_prompt=event.channel_prompt,
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								            )
-												feat(discord): persistent typing indicator for DMs

Based on PR #2427 by @oxngon (core feature extracted, reformatting
and unrelated changes dropped).

Discord's TYPING_START gateway event is unreliable for bot DMs. This
adds a background typing loop that hits POST /channels/{id}/typing
every 8 seconds (indicator lasts ~10s) until the response is sent.

- send_typing() starts a per-channel background loop (idempotent)
- stop_typing() cancels it (called after _run_agent returns)
- Base adapter gets stop_typing() as a no-op default
- Per-channel tracking via _typing_tasks dict prevents duplicates

											
										
										
											2026-03-22 04:47:53 -07:00
 								            # Stop persistent typing indicator now that the agent is done
 								            try:
 								                _typing_adapter = self.adapters.get(source.platform)
 								                if _typing_adapter and hasattr(_typing_adapter, "stop_typing"):
 								                    await _typing_adapter.stop_typing(source.chat_id)
 								            except Exception:
 								                pass
-												fix(anthropic): retry 429/529 errors and surface error details to users

- 429 rate limit and 529 overloaded were incorrectly treated as
  non-retryable client errors, causing immediate failure instead of
  exponential backoff retry. Users hitting Anthropic rate limits got
  silent failures or no response at all.
- Generic "Sorry, I encountered an unexpected error" now includes
  error type, details, and status-specific hints (auth, rate limit,
  overloaded).
- Failed agent with final_response=None now surfaces the actual
  error message instead of returning an empty response.

											
										
										
											2026-03-17 00:30:26 +03:00
+								            response = agent_result.get("final_response") or ""
-												fix: prevent already_sent from swallowing empty responses after tool calls (#10531)

When a model (e.g. mimo-v2-pro) streams intermediate text alongside tool
calls ("Let me search for that") but then returns empty after processing
tool results, the stream consumer already_sent flag is True from the
earlier text delivery.  The gateway suppression check
(already_sent=True, failed=False → return None) would swallow the final
response, leaving the user staring at silence after the search.

Two changes:

1. gateway/run.py return path: skip already_sent suppression when the
   final_response is "(empty)" or empty — the user needs to know the
   agent finished even if streaming sent partial content earlier.

2. gateway/run.py response handler: convert the internal "(empty)"
   sentinel to a user-friendly warning instead of delivering the raw
   sentinel string.

Tests added for all empty/None/sentinel cases plus preserved existing
suppression behavior for normal non-empty responses.
											
										
										
											2026-04-15 14:26:45 -07:00
 								            # Convert the agent's internal "(empty)" sentinel into a
 								            # user-friendly message.  "(empty)" means the model failed to
 								            # produce visible content after exhausting all retries (nudge,
 								            # prefill, empty-retry, fallback).  Sending the raw sentinel
 								            # looks like a bug; a short explanation is more helpful.
 								            if response == "(empty)":
 								                response = (
 								                    "⚠️ The model returned no response after processing tool "
 								                    "results. This can happen with some models — try again or "
 								                    "rephrase your question."
 								                )
-												Enhance agent response handling and transcript logging

- Refactored the agent response processing to return a comprehensive result dictionary, including final responses and full message history.
- Improved transcript logging to capture the complete conversation, including tool calls and intermediate reasoning, facilitating session resumption and debugging.
- Added handling for fresh sessions to include tool definitions in the transcript for clarity.
- Implemented logic to filter and timestamp new messages, ensuring accurate logging of user and assistant interactions.

											
										
										
											2026-02-16 00:53:17 -08:00
+								            agent_messages = agent_result.get("messages", [])
-												feat: centralized logging, instrumentation, hermes logs CLI, gateway noise fix (#5430)

Adds comprehensive logging infrastructure to Hermes Agent across 4 phases:

**Phase 1 — Centralized logging**
- New hermes_logging.py with idempotent setup_logging() used by CLI, gateway, and cron
- agent.log (INFO+) and errors.log (WARNING+) with RotatingFileHandler + RedactingFormatter
- config.yaml logging: section (level, max_size_mb, backup_count)
- All entry points wired (cli.py, main.py, gateway/run.py, run_agent.py)
- Fixed debug_helpers.py writing to ./logs/ instead of ~/.hermes/logs/

**Phase 2 — Event instrumentation**
- API calls: model, provider, tokens, latency, cache hit %
- Tool execution: name, duration, result size (both sequential + concurrent)
- Session lifecycle: turn start (session/model/provider/platform), compression (before/after)
- Credential pool: rotation events, exhaustion tracking

**Phase 3 — hermes logs CLI command**
- hermes logs / hermes logs -f / hermes logs errors / hermes logs gateway
- --level, --session, --since filters
- hermes logs list (file sizes + ages)

**Phase 4 — Gateway bug fix + noise reduction**
- fix: _async_flush_memories() called with wrong arg count — sessions never flushed
- Batched session expiry logs: 6 lines/cycle → 2 summary lines
- Added inbound message + response time logging

75 new tests, zero regressions on the full suite.
											
										
										
											2026-04-06 00:08:20 -07:00
+								            _response_time = time.time() - _msg_start_time
 								            _api_calls = agent_result.get("api_calls", 0)
 								            _resp_len = len(response)
 								            logger.info(
 								                "response ready: platform=%s chat=%s time=%.1fs api_calls=%d response=%d chars",
 								                _platform_name, source.chat_id or "unknown",
 								                _response_time, _api_calls, _resp_len,
 								            )
-												fix: /reasoning command — add gateway support, fix display, persist settings (#1031)

* fix: /reasoning command output ordering, display, and inline think extraction

Three issues with the /reasoning command:

1. Output interleaving: The command echo used print() while feedback
   used _cprint(), causing them to render out-of-order under
   prompt_toolkit's patch_stdout. Changed echo to use _cprint() so
   all output renders through the same path in correct order.

2. Reasoning display not working: /reasoning show toggled a flag
   but reasoning never appeared for models that embed thinking in
   inline <think> blocks rather than structured API fields. Added
   fallback extraction in _build_assistant_message to capture
   <think> block content as reasoning when no structured reasoning
   fields (reasoning, reasoning_content, reasoning_details) are
   present. This feeds into both the reasoning callback (during
   tool loops) and the post-response reasoning box display.

3. Feedback clarity: Added checkmarks to confirm actions, persisted
   show/hide to config (was session-only before), and aligned the
   status display for readability.

Tests: 7 new tests for inline think block extraction (41 total).

* feat: add /reasoning command to gateway (Telegram/Discord/etc)

The /reasoning command only existed in the CLI — messaging platforms
had no way to view or change reasoning settings. This adds:

1. /reasoning command handler in the gateway:
   - No args: shows current effort level and display state
   - /reasoning <level>: sets reasoning effort (none/low/medium/high/xhigh)
   - /reasoning show|hide: toggles reasoning display in responses
   - All changes saved to config.yaml immediately

2. Reasoning display in gateway responses:
   - When show_reasoning is enabled, prepends a 'Reasoning' block
     with the model's last_reasoning content before the response
   - Collapses long reasoning (>15 lines) to keep messages readable
   - Uses last_reasoning from run_conversation result dict

3. Plumbing:
   - Added _show_reasoning attribute loaded from config at startup
   - Propagated last_reasoning through _run_agent return dict
   - Added /reasoning to help text and known_commands set
   - Uses getattr for _show_reasoning to handle test stubs
											
										
										
											2026-03-12 05:38:19 -07:00
-												fix: break stuck session resume loops after repeated restarts (#7536)

When a session gets stuck (hung terminal, runaway tool loop) and the
user restarts the gateway, the same session history loads and puts the
agent right back in the stuck state. The user is trapped in a loop:
restart → stuck → restart → stuck.

Fix: track restart-failure counts per session using a simple JSON file
(.restart_failure_counts). On each shutdown with active agents, the
counter increments for those sessions. On startup, if any session has
been active across 3+ consecutive restarts, it's auto-suspended —
giving the user a clean slate on their next message.

The counter resets to 0 when a session completes a turn successfully
(response delivered), so normal sessions that happen to be active
during planned restarts (/restart, hermes update) won't accumulate
false counts.

Implementation:
- _increment_restart_failure_counts(): called during stop() when
  agents are active. Writes {session_key: count} to JSON file.
  Sessions NOT active are dropped (loop broken).
- _suspend_stuck_loop_sessions(): called on startup. Reads the file,
  suspends sessions at threshold (3), clears the file.
- _clear_restart_failure_count(): called after successful response
  delivery. Removes the session from the counter file.

No SessionEntry schema changes. No database migration. Pure file-based
tracking that naturally cleans up.

Test plan:
- 9 new stuck-loop tests (increment, accumulate, threshold, clear,
  suspend, file cleanup, edge cases)
- All 28 gateway lifecycle tests pass (restart drain + auto-continue
  + stuck loop)

											
										
										
											2026-04-14 17:03:47 -07:00
+								            # Successful turn — clear any stuck-loop counter for this session.
 								            # This ensures the counter only accumulates across CONSECUTIVE
 								            # restarts where the session was active (never completed).
 								            if session_key:
 								                self._clear_restart_failure_count(session_key)
-												fix(anthropic): retry 429/529 errors and surface error details to users

- 429 rate limit and 529 overloaded were incorrectly treated as
  non-retryable client errors, causing immediate failure instead of
  exponential backoff retry. Users hitting Anthropic rate limits got
  silent failures or no response at all.
- Generic "Sorry, I encountered an unexpected error" now includes
  error type, details, and status-specific hints (auth, rate limit,
  overloaded).
- Failed agent with final_response=None now surfaces the actual
  error message instead of returning an empty response.

											
										
										
											2026-03-17 00:30:26 +03:00
+								            # Surface error details when the agent failed silently (final_response=None)
 								            if not response and agent_result.get("failed"):
 								                error_detail = agent_result.get("error", "unknown error")
-												fix: prevent infinite 400 loop on context overflow + block prompt injection via cache files (#1630, #1558)

* fix: prevent infinite 400 failure loop on context overflow (#1630)

When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message.  This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error.  Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.

Three-layer fix:

1. run_agent.py — Fallback heuristic: when a 400 error has a very short
   generic message AND the session is large (>40% of context or >80
   messages), treat it as a probable context overflow and trigger
   compression instead of aborting.

2. run_agent.py + gateway/run.py — Don't persist failed messages:
   when the agent returns failed=True before generating any response,
   skip writing the user's message to the transcript/DB. This prevents
   the session from growing on each failure.

3. gateway/run.py — Smarter error messages: detect context-overflow
   failures and suggest /compact or /reset specifically, instead of a
   generic 'try again' that will fail identically.

* fix(skills): detect prompt injection patterns and block cache file reads

Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):

1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
   (index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
   was the original injection vector — untrusted skill descriptions
   in the catalog contained adversarial text that the model executed.

2. skill_view: warns when skills are loaded from outside the trusted
   ~/.hermes/skills/ directory, and detects common injection patterns
   in skill content ("ignore previous instructions", "<system>", etc.).

Cherry-picked from PR #1562 by ygd58.

---------

Co-authored-by: buray <ygd58@users.noreply.github.com>
											
										
										
											2026-03-17 01:50:59 -07:00
+								                error_str = str(error_detail).lower()
 								                # Detect context-overflow failures and give specific guidance.
 								                # Generic 400 "Error" from Anthropic with large sessions is the
 								                # most common cause of this (#1630).
 								                _is_ctx_fail = any(p in error_str for p in (
 								                    "context", "token", "too large", "too long",
 								                    "exceed", "payload",
 								                )) or (
 								                    "400" in error_str
 								                    and len(history) > 50
-												fix(anthropic): retry 429/529 errors and surface error details to users

- 429 rate limit and 529 overloaded were incorrectly treated as
  non-retryable client errors, causing immediate failure instead of
  exponential backoff retry. Users hitting Anthropic rate limits got
  silent failures or no response at all.
- Generic "Sorry, I encountered an unexpected error" now includes
  error type, details, and status-specific hints (auth, rate limit,
  overloaded).
- Failed agent with final_response=None now surfaces the actual
  error message instead of returning an empty response.

											
										
										
											2026-03-17 00:30:26 +03:00
+								                )
-												fix: prevent infinite 400 loop on context overflow + block prompt injection via cache files (#1630, #1558)

* fix: prevent infinite 400 failure loop on context overflow (#1630)

When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message.  This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error.  Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.

Three-layer fix:

1. run_agent.py — Fallback heuristic: when a 400 error has a very short
   generic message AND the session is large (>40% of context or >80
   messages), treat it as a probable context overflow and trigger
   compression instead of aborting.

2. run_agent.py + gateway/run.py — Don't persist failed messages:
   when the agent returns failed=True before generating any response,
   skip writing the user's message to the transcript/DB. This prevents
   the session from growing on each failure.

3. gateway/run.py — Smarter error messages: detect context-overflow
   failures and suggest /compact or /reset specifically, instead of a
   generic 'try again' that will fail identically.

* fix(skills): detect prompt injection patterns and block cache file reads

Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):

1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
   (index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
   was the original injection vector — untrusted skill descriptions
   in the catalog contained adversarial text that the model executed.

2. skill_view: warns when skills are loaded from outside the trusted
   ~/.hermes/skills/ directory, and detects common injection patterns
   in skill content ("ignore previous instructions", "<system>", etc.).

Cherry-picked from PR #1562 by ygd58.

---------

Co-authored-by: buray <ygd58@users.noreply.github.com>
											
										
										
											2026-03-17 01:50:59 -07:00
+								                if _is_ctx_fail:
 								                    response = (
 								                        "⚠️ Session too large for the model's context window.\n"
 								                        "Use /compact to compress the conversation, or "
 								                        "/reset to start fresh."
 								                    )
 								                else:
 								                    response = (
 								                        f"The request failed: {str(error_detail)[:300]}\n"
 								                        "Try again or use /reset to start a fresh session."
 								                    )
-												fix: sync session_id after mid-run context compression

Critical bug: when the agent's context compressor fires during a tool
loop (_compress_context), it creates a new session_id and writes the
compressed messages there. But the gateway's session_entry still pointed
to the old session_id. On the next message, load_transcript() loaded
the stale pre-compression transcript, causing:

- Context bloat returning every turn
- Repeated compression cycles
- Loss of carefully compressed context

Fix: after run_conversation() returns, check if the agent's session_id
changed (compression split) and sync it back to the session store entry.
Also pass the effective session_id in the result dict so _handle_message
writes transcript entries to the correct session.

This affects ALL gateway adapters, not just webhook.
											
										
										
											2026-03-13 04:14:35 -07:00
+								            # If the agent's session_id changed during compression, update
 								            # session_entry so transcript writes below go to the right session.
 								            if agent_result.get("session_id") and agent_result["session_id"] != session_entry.session_id:
 								                session_entry.session_id = agent_result["session_id"]
-												feat: per-platform display verbosity configuration (#8006)

Add display.platforms section to config.yaml for per-platform overrides of
display settings (tool_progress, show_reasoning, streaming, tool_preview_length).

Each platform gets sensible built-in defaults based on capability tier:
- High (telegram, discord): tool_progress=all, streaming follows global
- Medium (slack, mattermost, matrix, feishu): tool_progress=new
- Low (signal, whatsapp, bluebubbles, wecom, etc.): tool_progress=off, streaming=false
- Minimal (email, sms, webhook, homeassistant): tool_progress=off, streaming=false

Example config:
  display:
    platforms:
      telegram:
        tool_progress: all
        show_reasoning: true
      slack:
        tool_progress: off

Resolution order: platform override > global setting > built-in platform default.

Changes:
- New gateway/display_config.py: resolver module with tier-based platform defaults
- gateway/run.py: tool_progress, tool_preview_length, streaming, show_reasoning
  all resolve per-platform via the new resolver
- /verbose command: now cycles tool_progress per-platform (saves to
  display.platforms.<platform>.tool_progress instead of global)
- /reasoning show|hide: now saves show_reasoning per-platform
- Config version 15 -> 16: migrates tool_progress_overrides into display.platforms
- Backward compat: legacy tool_progress_overrides still read as fallback
- 27 new tests for resolver, normalization, migration, backward compat
- Updated verbose command tests for per-platform behavior

Addresses community request for per-channel verbosity control (Guillaume Meyer,
Nathan Danielsen) — high verbosity on backchannel Telegram, low on customer-facing
Slack, none on email.
											
										
										
											2026-04-11 17:20:34 -07:00
+								            # Prepend reasoning/thinking if display is enabled (per-platform)
 								            try:
 								                from gateway.display_config import resolve_display_setting as _rds
 								                _show_reasoning_effective = _rds(
 								                    _load_gateway_config(),
 								                    _platform_config_key(source.platform),
 								                    "show_reasoning",
 								                    getattr(self, "_show_reasoning", False),
 								                )
 								            except Exception:
 								                _show_reasoning_effective = getattr(self, "_show_reasoning", False)
 								            if _show_reasoning_effective and response:
-												fix: /reasoning command — add gateway support, fix display, persist settings (#1031)

* fix: /reasoning command output ordering, display, and inline think extraction

Three issues with the /reasoning command:

1. Output interleaving: The command echo used print() while feedback
   used _cprint(), causing them to render out-of-order under
   prompt_toolkit's patch_stdout. Changed echo to use _cprint() so
   all output renders through the same path in correct order.

2. Reasoning display not working: /reasoning show toggled a flag
   but reasoning never appeared for models that embed thinking in
   inline <think> blocks rather than structured API fields. Added
   fallback extraction in _build_assistant_message to capture
   <think> block content as reasoning when no structured reasoning
   fields (reasoning, reasoning_content, reasoning_details) are
   present. This feeds into both the reasoning callback (during
   tool loops) and the post-response reasoning box display.

3. Feedback clarity: Added checkmarks to confirm actions, persisted
   show/hide to config (was session-only before), and aligned the
   status display for readability.

Tests: 7 new tests for inline think block extraction (41 total).

* feat: add /reasoning command to gateway (Telegram/Discord/etc)

The /reasoning command only existed in the CLI — messaging platforms
had no way to view or change reasoning settings. This adds:

1. /reasoning command handler in the gateway:
   - No args: shows current effort level and display state
   - /reasoning <level>: sets reasoning effort (none/low/medium/high/xhigh)
   - /reasoning show|hide: toggles reasoning display in responses
   - All changes saved to config.yaml immediately

2. Reasoning display in gateway responses:
   - When show_reasoning is enabled, prepends a 'Reasoning' block
     with the model's last_reasoning content before the response
   - Collapses long reasoning (>15 lines) to keep messages readable
   - Uses last_reasoning from run_conversation result dict

3. Plumbing:
   - Added _show_reasoning attribute loaded from config at startup
   - Propagated last_reasoning through _run_agent return dict
   - Added /reasoning to help text and known_commands set
   - Uses getattr for _show_reasoning to handle test stubs
											
										
										
											2026-03-12 05:38:19 -07:00
+								                last_reasoning = agent_result.get("last_reasoning")
 								                if last_reasoning:
 								                    # Collapse long reasoning to keep messages readable
 								                    lines = last_reasoning.strip().splitlines()
 								                    if len(lines) > 15:
 								                        display_reasoning = "\n".join(lines[:15])
 								                        display_reasoning += f"\n_... ({len(lines) - 15} more lines)_"
 								                    else:
 								                        display_reasoning = last_reasoning.strip()
 								                    response = f"💭 **Reasoning:**\n```\n{display_reasoning}\n```\n\n{response}"
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
+								            # Emit agent:end hook
 								            await self.hooks.emit("agent:end", {
 								                **hook_ctx,
 								                "response": (response or "")[:500],
 								            })
-												Add background process management with process tool, wait, PTY, and stdin support

New process registry and tool for managing long-running background processes
across all terminal backends (local, Docker, Singularity, Modal, SSH).

Process Registry (tools/process_registry.py):
- ProcessSession tracking with rolling 200KB output buffer
- spawn_local() with optional PTY via ptyprocess for interactive CLIs
- spawn_via_env() for non-local backends (runs inside sandbox, never on host)
- Background reader threads per process (Popen stdout or PTY)
- wait() with timeout clamping, interrupt support, and transparent limit reporting
- JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery
- Module-level singleton shared across agent loop, gateway, and RL

Process Tool (model_tools.py):
- 7 actions: list, poll, log, wait, kill, write, submit
- Paired with terminal in all toolsets (CLI, messaging, RL)
- Timeout clamping with transparent notes in response

Terminal Tool Updates (tools/terminal_tool.py):
- Replaced nohup background mode with registry spawn (returns session_id)
- Added workdir parameter for per-command working directory
- Added check_interval parameter for gateway auto-check watchers
- Added pty parameter for interactive CLI tools (Codex, Claude Code)
- Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs
- Cleanup thread now respects active background processes (won't reap sandbox)

Gateway Integration (gateway/run.py, session.py, config.py):
- Session reset protection: sessions with active processes exempt from reset
- Default idle timeout increased from 2 hours to 24 hours
- from_dict fallback aligned to match (was 120, now 1440)
- session_key env var propagated to process registry for session mapping
- Crash recovery on gateway startup via checkpoint probe
- check_interval watcher: asyncio task polls process, delivers updates to platform

RL Safety (environments/):
- tool_context.py cleanup() kills background processes on episode end
- hermes_base_env.py warns when enabled_toolsets is None (loads all tools)
- Process tool safe in RL via wait() blocking the agent loop

Also:
- Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all])
- Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP
- Updated AGENTS.md with process management docs and project structure
- Updated README.md terminal section with process management overview

											
										
										
											2026-02-17 02:51:31 -08:00
+								            # Check for pending process watchers (check_interval on background processes)
 								            try:
 								                from tools.process_registry import process_registry
 								                while process_registry.pending_watchers:
 								                    watcher = process_registry.pending_watchers.pop(0)
 								                    asyncio.create_task(self._run_process_watcher(watcher))
 								            except Exception as e:
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								                logger.error("Process watcher setup error: %s", e)
-												Add background process management with process tool, wait, PTY, and stdin support

New process registry and tool for managing long-running background processes
across all terminal backends (local, Docker, Singularity, Modal, SSH).

Process Registry (tools/process_registry.py):
- ProcessSession tracking with rolling 200KB output buffer
- spawn_local() with optional PTY via ptyprocess for interactive CLIs
- spawn_via_env() for non-local backends (runs inside sandbox, never on host)
- Background reader threads per process (Popen stdout or PTY)
- wait() with timeout clamping, interrupt support, and transparent limit reporting
- JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery
- Module-level singleton shared across agent loop, gateway, and RL

Process Tool (model_tools.py):
- 7 actions: list, poll, log, wait, kill, write, submit
- Paired with terminal in all toolsets (CLI, messaging, RL)
- Timeout clamping with transparent notes in response

Terminal Tool Updates (tools/terminal_tool.py):
- Replaced nohup background mode with registry spawn (returns session_id)
- Added workdir parameter for per-command working directory
- Added check_interval parameter for gateway auto-check watchers
- Added pty parameter for interactive CLI tools (Codex, Claude Code)
- Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs
- Cleanup thread now respects active background processes (won't reap sandbox)

Gateway Integration (gateway/run.py, session.py, config.py):
- Session reset protection: sessions with active processes exempt from reset
- Default idle timeout increased from 2 hours to 24 hours
- from_dict fallback aligned to match (was 120, now 1440)
- session_key env var propagated to process registry for session mapping
- Crash recovery on gateway startup via checkpoint probe
- check_interval watcher: asyncio task polls process, delivers updates to platform

RL Safety (environments/):
- tool_context.py cleanup() kills background processes on episode end
- hermes_base_env.py warns when enabled_toolsets is None (loads all tools)
- Process tool safe in RL via wait() blocking the agent loop

Also:
- Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all])
- Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP
- Updated AGENTS.md with process management docs and project structure
- Updated README.md terminal section with process management overview

											
										
										
											2026-02-17 02:51:31 -08:00
-												feat: background process monitoring — watch_patterns for real-time output alerts

* feat: add watch_patterns to background processes for output monitoring

Adds a new 'watch_patterns' parameter to terminal(background=true) that
lets the agent specify strings to watch for in process output. When a
matching line appears, a notification is queued and injected as a
synthetic message — triggering a new agent turn, similar to
notify_on_complete but mid-process.

Implementation:
- ProcessSession gets watch_patterns field + rate-limit state
- _check_watch_patterns() in ProcessRegistry scans new output chunks
  from all three reader threads (local, PTY, env-poller)
- Rate limited: max 8 notifications per 10s window
- Sustained overload (45s) permanently disables watching for that process
- watch_queue alongside completion_queue, same consumption pattern
- CLI drains watch_queue in both idle loop and post-turn drain
- Gateway drains after agent runs via _inject_watch_notification()
- Checkpoint persistence + crash recovery includes watch_patterns
- Blocked in execute_code sandbox (like other bg params)
- 20 new tests covering matching, rate limiting, overload kill,
  checkpoint persistence, schema, and handler passthrough

Usage:
  terminal(
      command='npm run dev',
      background=true,
      watch_patterns=['ERROR', 'WARN', 'listening on port']
  )

* refactor: merge watch_queue into completion_queue

Unified queue with 'type' field distinguishing 'completion',
'watch_match', and 'watch_disabled' events. Extracted
_format_process_notification() in CLI and gateway to handle
all event types in a single drain loop. Removes duplication
across both CLI drain sites and the gateway.
											
										
										
											2026-04-11 03:13:23 -07:00
+								            # Drain watch pattern notifications that arrived during the agent run.
 								            # Watch events and completions share the same queue; completions are
 								            # already handled by the per-process watcher task above, so we only
 								            # inject watch-type events here.
 								            try:
 								                from tools.process_registry import process_registry as _pr
 								                _watch_events = []
 								                while not _pr.completion_queue.empty():
 								                    evt = _pr.completion_queue.get_nowait()
 								                    evt_type = evt.get("type", "completion")
 								                    if evt_type in ("watch_match", "watch_disabled"):
 								                        _watch_events.append(evt)
 								                    # else: completion events are handled by the watcher task
 								                for evt in _watch_events:
 								                    synth_text = _format_gateway_process_notification(evt)
 								                    if synth_text:
 								                        try:
-												fix(gateway): route synthetic background events by session

											
										
										
											2026-04-15 16:38:57 +01:00
+								                            await self._inject_watch_notification(synth_text, evt)
-												feat: background process monitoring — watch_patterns for real-time output alerts

* feat: add watch_patterns to background processes for output monitoring

Adds a new 'watch_patterns' parameter to terminal(background=true) that
lets the agent specify strings to watch for in process output. When a
matching line appears, a notification is queued and injected as a
synthetic message — triggering a new agent turn, similar to
notify_on_complete but mid-process.

Implementation:
- ProcessSession gets watch_patterns field + rate-limit state
- _check_watch_patterns() in ProcessRegistry scans new output chunks
  from all three reader threads (local, PTY, env-poller)
- Rate limited: max 8 notifications per 10s window
- Sustained overload (45s) permanently disables watching for that process
- watch_queue alongside completion_queue, same consumption pattern
- CLI drains watch_queue in both idle loop and post-turn drain
- Gateway drains after agent runs via _inject_watch_notification()
- Checkpoint persistence + crash recovery includes watch_patterns
- Blocked in execute_code sandbox (like other bg params)
- 20 new tests covering matching, rate limiting, overload kill,
  checkpoint persistence, schema, and handler passthrough

Usage:
  terminal(
      command='npm run dev',
      background=true,
      watch_patterns=['ERROR', 'WARN', 'listening on port']
  )

* refactor: merge watch_queue into completion_queue

Unified queue with 'type' field distinguishing 'completion',
'watch_match', and 'watch_disabled' events. Extracted
_format_process_notification() in CLI and gateway to handle
all event types in a single drain loop. Removes duplication
across both CLI drain sites and the gateway.
											
										
										
											2026-04-11 03:13:23 -07:00
+								                        except Exception as e2:
 								                            logger.error("Watch notification injection error: %s", e2)
 								            except Exception as e:
 								                logger.debug("Watch queue drain error: %s", e)
-												fix: make gateway approval block agent thread like CLI does (#4557)

The gateway's dangerous command approval system was fundamentally broken:
the agent loop continued running after a command was flagged, and the
approval request only reached the user after the agent finished its
entire conversation loop. By then the context was lost.

This change makes the gateway approval mirror the CLI's synchronous
behavior. When a dangerous command is detected:

1. The agent thread blocks on a threading.Event
2. The approval request is sent to the user immediately
3. The user responds with /approve or /deny
4. The event is signaled and the agent resumes with the real result

The agent never sees 'approval_required' as a tool result. It either
gets the command output (approved) or a definitive BLOCKED message
(denied/timed out) — same as CLI mode.

Queue-based design supports multiple concurrent approvals (parallel
subagents via delegate_task, execute_code RPC handlers). Each approval
gets its own _ApprovalEntry with its own threading.Event. /approve
resolves the oldest (FIFO); /approve all resolves all at once.

Changes:
- tools/approval.py: Queue-based per-session blocking gateway approval
  (register/unregister callbacks, resolve with FIFO or all-at-once)
- gateway/run.py: Register approval callback in run_sync(), remove
  post-loop pop_pending hack, /approve and /deny support 'all' flag
- tests: 21 tests including parallel subagent E2E scenarios
											
										
										
											2026-04-02 01:47:19 -07:00
+								            # NOTE: Dangerous command approvals are now handled inline by the
 								            # blocking gateway approval mechanism in tools/approval.py.  The agent
 								            # thread blocks until the user responds with /approve or /deny, so by
 								            # the time we reach here the approval has already been resolved.  The
 								            # old post-loop pop_pending + approval_hint code was removed in favour
 								            # of the blocking approach that mirrors CLI's synchronous input().
-												Add Text-to-Speech (TTS) functionality with multiple providers

Add tool previews

Add AGENTS and SOUL.md support

Add Exec Approval

											
										
										
											2026-02-12 10:05:08 -08:00
-												Enhance agent response handling and transcript logging

- Refactored the agent response processing to return a comprehensive result dictionary, including final responses and full message history.
- Improved transcript logging to capture the complete conversation, including tool calls and intermediate reasoning, facilitating session resumption and debugging.
- Added handling for fresh sessions to include tool definitions in the transcript for clarity.
- Implemented logic to filter and timestamp new messages, ensuring accurate logging of user and assistant interactions.

											
										
										
											2026-02-16 00:53:17 -08:00
+								            # Save the full conversation to the transcript, including tool calls.
 								            # This preserves the complete agent loop (tool_calls, tool results,
 								            # intermediate reasoning) so sessions can be resumed with full context
 								            # and transcripts are useful for debugging and training data.
-												fix: prevent infinite 400 loop on context overflow + block prompt injection via cache files (#1630, #1558)

* fix: prevent infinite 400 failure loop on context overflow (#1630)

When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message.  This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error.  Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.

Three-layer fix:

1. run_agent.py — Fallback heuristic: when a 400 error has a very short
   generic message AND the session is large (>40% of context or >80
   messages), treat it as a probable context overflow and trigger
   compression instead of aborting.

2. run_agent.py + gateway/run.py — Don't persist failed messages:
   when the agent returns failed=True before generating any response,
   skip writing the user's message to the transcript/DB. This prevents
   the session from growing on each failure.

3. gateway/run.py — Smarter error messages: detect context-overflow
   failures and suggest /compact or /reset specifically, instead of a
   generic 'try again' that will fail identically.

* fix(skills): detect prompt injection patterns and block cache file reads

Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):

1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
   (index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
   was the original injection vector — untrusted skill descriptions
   in the catalog contained adversarial text that the model executed.

2. skill_view: warns when skills are loaded from outside the trusted
   ~/.hermes/skills/ directory, and detects common injection patterns
   in skill content ("ignore previous instructions", "<system>", etc.).

Cherry-picked from PR #1562 by ygd58.

---------

Co-authored-by: buray <ygd58@users.noreply.github.com>
											
										
										
											2026-03-17 01:50:59 -07:00
+								            #
-												fix(gateway): break compression-exhaustion infinite loop and auto-reset session (#9893)

When compression fails after max attempts, the agent returns
{completed: False, partial: True} but was missing the 'failed' flag.
The gateway's agent_failed_early guard checked for 'failed' AND
'not final_response', but _run_agent_blocking always converts errors
to final_response — making the guard dead code.  This caused the
oversized session to persist, creating an infinite fail loop where
every subsequent message hits the same compression failure.

Changes:
- run_agent.py: add 'failed: True' and 'compression_exhausted: True'
  to all 5 compression-exhaustion return paths
- gateway/run.py (_run_agent_blocking): forward 'failed' and
  'compression_exhausted' flags through to the caller
- gateway/run.py (_handle_message_with_agent): fix agent_failed_early
  to check bool(failed) without the broken 'not final_response' clause;
  auto-reset the session when compression is exhausted so the next
  message starts fresh
- Update tests to match new guard logic and add
  TestCompressionExhaustedFlag test class

Closes #9893

											
										
										
											2026-04-14 21:15:12 -07:00
+								            # IMPORTANT: When the agent failed (e.g. context-overflow 400,
 								            # compression exhausted), do NOT persist the user's message.
-												fix: prevent infinite 400 loop on context overflow + block prompt injection via cache files (#1630, #1558)

* fix: prevent infinite 400 failure loop on context overflow (#1630)

When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message.  This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error.  Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.

Three-layer fix:

1. run_agent.py — Fallback heuristic: when a 400 error has a very short
   generic message AND the session is large (>40% of context or >80
   messages), treat it as a probable context overflow and trigger
   compression instead of aborting.

2. run_agent.py + gateway/run.py — Don't persist failed messages:
   when the agent returns failed=True before generating any response,
   skip writing the user's message to the transcript/DB. This prevents
   the session from growing on each failure.

3. gateway/run.py — Smarter error messages: detect context-overflow
   failures and suggest /compact or /reset specifically, instead of a
   generic 'try again' that will fail identically.

* fix(skills): detect prompt injection patterns and block cache file reads

Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):

1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
   (index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
   was the original injection vector — untrusted skill descriptions
   in the catalog contained adversarial text that the model executed.

2. skill_view: warns when skills are loaded from outside the trusted
   ~/.hermes/skills/ directory, and detects common injection patterns
   in skill content ("ignore previous instructions", "<system>", etc.).

Cherry-picked from PR #1562 by ygd58.

---------

Co-authored-by: buray <ygd58@users.noreply.github.com>
											
										
										
											2026-03-17 01:50:59 -07:00
+								            # Persisting it would make the session even larger, causing the
-												fix(gateway): break compression-exhaustion infinite loop and auto-reset session (#9893)

When compression fails after max attempts, the agent returns
{completed: False, partial: True} but was missing the 'failed' flag.
The gateway's agent_failed_early guard checked for 'failed' AND
'not final_response', but _run_agent_blocking always converts errors
to final_response — making the guard dead code.  This caused the
oversized session to persist, creating an infinite fail loop where
every subsequent message hits the same compression failure.

Changes:
- run_agent.py: add 'failed: True' and 'compression_exhausted: True'
  to all 5 compression-exhaustion return paths
- gateway/run.py (_run_agent_blocking): forward 'failed' and
  'compression_exhausted' flags through to the caller
- gateway/run.py (_handle_message_with_agent): fix agent_failed_early
  to check bool(failed) without the broken 'not final_response' clause;
  auto-reset the session when compression is exhausted so the next
  message starts fresh
- Update tests to match new guard logic and add
  TestCompressionExhaustedFlag test class

Closes #9893

											
										
										
											2026-04-14 21:15:12 -07:00
+								            # same failure on the next attempt — an infinite loop. (#1630, #9893)
 								            agent_failed_early = bool(agent_result.get("failed"))
-												fix: prevent infinite 400 loop on context overflow + block prompt injection via cache files (#1630, #1558)

* fix: prevent infinite 400 failure loop on context overflow (#1630)

When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message.  This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error.  Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.

Three-layer fix:

1. run_agent.py — Fallback heuristic: when a 400 error has a very short
   generic message AND the session is large (>40% of context or >80
   messages), treat it as a probable context overflow and trigger
   compression instead of aborting.

2. run_agent.py + gateway/run.py — Don't persist failed messages:
   when the agent returns failed=True before generating any response,
   skip writing the user's message to the transcript/DB. This prevents
   the session from growing on each failure.

3. gateway/run.py — Smarter error messages: detect context-overflow
   failures and suggest /compact or /reset specifically, instead of a
   generic 'try again' that will fail identically.

* fix(skills): detect prompt injection patterns and block cache file reads

Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):

1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
   (index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
   was the original injection vector — untrusted skill descriptions
   in the catalog contained adversarial text that the model executed.

2. skill_view: warns when skills are loaded from outside the trusted
   ~/.hermes/skills/ directory, and detects common injection patterns
   in skill content ("ignore previous instructions", "<system>", etc.).

Cherry-picked from PR #1562 by ygd58.

---------

Co-authored-by: buray <ygd58@users.noreply.github.com>
											
										
										
											2026-03-17 01:50:59 -07:00
+								            if agent_failed_early:
 								                logger.info(
 								                    "Skipping transcript persistence for failed request in "
 								                    "session %s to prevent session growth loop.",
 								                    session_entry.session_id,
 								                )
-												fix(gateway): break compression-exhaustion infinite loop and auto-reset session (#9893)

When compression fails after max attempts, the agent returns
{completed: False, partial: True} but was missing the 'failed' flag.
The gateway's agent_failed_early guard checked for 'failed' AND
'not final_response', but _run_agent_blocking always converts errors
to final_response — making the guard dead code.  This caused the
oversized session to persist, creating an infinite fail loop where
every subsequent message hits the same compression failure.

Changes:
- run_agent.py: add 'failed: True' and 'compression_exhausted: True'
  to all 5 compression-exhaustion return paths
- gateway/run.py (_run_agent_blocking): forward 'failed' and
  'compression_exhausted' flags through to the caller
- gateway/run.py (_handle_message_with_agent): fix agent_failed_early
  to check bool(failed) without the broken 'not final_response' clause;
  auto-reset the session when compression is exhausted so the next
  message starts fresh
- Update tests to match new guard logic and add
  TestCompressionExhaustedFlag test class

Closes #9893

											
										
										
											2026-04-14 21:15:12 -07:00
+								            # When compression is exhausted, the session is permanently too
 								            # large to process.  Auto-reset it so the next message starts
 								            # fresh instead of replaying the same oversized context in an
 								            # infinite fail loop.  (#9893)
 								            if agent_result.get("compression_exhausted") and session_entry and session_key:
 								                logger.info(
 								                    "Auto-resetting session %s after compression exhaustion.",
 								                    session_entry.session_id,
 								                )
 								                self.session_store.reset_session(session_key)
 								                self._evict_cached_agent(session_key)
 								                self._session_model_overrides.pop(session_key, None)
 								                response = (response or "") + (
 								                    "\n\n🔄 Session auto-reset — the conversation exceeded the "
 								                    "maximum context size and could not be compressed further. "
 								                    "Your next message will start a fresh session."
 								                )
-												Enhance agent response handling and transcript logging

- Refactored the agent response processing to return a comprehensive result dictionary, including final responses and full message history.
- Improved transcript logging to capture the complete conversation, including tool calls and intermediate reasoning, facilitating session resumption and debugging.
- Added handling for fresh sessions to include tool definitions in the transcript for clarity.
- Implemented logic to filter and timestamp new messages, ensuring accurate logging of user and assistant interactions.

											
										
										
											2026-02-16 00:53:17 -08:00
+								            ts = datetime.now().isoformat()
 								            # If this is a fresh session (no history), write the full tool
 								            # definitions as the first entry so the transcript is self-describing
 								            # -- the same list of dicts sent as tools=[...] in the API request.
-												fix: prevent infinite 400 loop on context overflow + block prompt injection via cache files (#1630, #1558)

* fix: prevent infinite 400 failure loop on context overflow (#1630)

When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message.  This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error.  Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.

Three-layer fix:

1. run_agent.py — Fallback heuristic: when a 400 error has a very short
   generic message AND the session is large (>40% of context or >80
   messages), treat it as a probable context overflow and trigger
   compression instead of aborting.

2. run_agent.py + gateway/run.py — Don't persist failed messages:
   when the agent returns failed=True before generating any response,
   skip writing the user's message to the transcript/DB. This prevents
   the session from growing on each failure.

3. gateway/run.py — Smarter error messages: detect context-overflow
   failures and suggest /compact or /reset specifically, instead of a
   generic 'try again' that will fail identically.

* fix(skills): detect prompt injection patterns and block cache file reads

Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):

1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
   (index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
   was the original injection vector — untrusted skill descriptions
   in the catalog contained adversarial text that the model executed.

2. skill_view: warns when skills are loaded from outside the trusted
   ~/.hermes/skills/ directory, and detects common injection patterns
   in skill content ("ignore previous instructions", "<system>", etc.).

Cherry-picked from PR #1562 by ygd58.

---------

Co-authored-by: buray <ygd58@users.noreply.github.com>
											
										
										
											2026-03-17 01:50:59 -07:00
+								            if agent_failed_early:
 								                pass  # Skip all transcript writes — don't grow a broken session
 								            elif not history:
-												Update tool definitions handling in GatewayRunner

- Modified the retrieval of tool definitions to use the agent result's "tools" key, ensuring accurate logging in the transcript.
- Enhanced the response structure to include tools in the final output, improving the clarity of tool usage in session interactions.

											
										
										
											2026-02-16 00:55:18 -08:00
+								                tool_defs = agent_result.get("tools", [])
-												Enhance agent response handling and transcript logging

- Refactored the agent response processing to return a comprehensive result dictionary, including final responses and full message history.
- Improved transcript logging to capture the complete conversation, including tool calls and intermediate reasoning, facilitating session resumption and debugging.
- Added handling for fresh sessions to include tool definitions in the transcript for clarity.
- Implemented logic to filter and timestamp new messages, ensuring accurate logging of user and assistant interactions.

											
										
										
											2026-02-16 00:53:17 -08:00
+								                self.session_store.append_to_transcript(
 								                    session_entry.session_id,
 								                    {
 								                        "role": "session_meta",
 								                        "tools": tool_defs or [],
-												refactor: make config.yaml the single source of truth for endpoint URLs (#4165)

OPENAI_BASE_URL was written to .env AND config.yaml, creating a dual-source
confusion. Users (especially Docker) would see the URL in .env and assume
that's where all config lives, then wonder why LLM_MODEL in .env didn't work.

Changes:
- Remove all 27 save_env_value("OPENAI_BASE_URL", ...) calls across main.py,
  setup.py, and tools_config.py
- Remove OPENAI_BASE_URL env var reading from runtime_provider.py, cli.py,
  models.py, and gateway/run.py
- Remove LLM_MODEL/HERMES_MODEL env var reading from gateway/run.py and
  auxiliary_client.py — config.yaml model.default is authoritative
- Vision base URL now saved to config.yaml auxiliary.vision.base_url
  (both setup wizard and tools_config paths)
- Tests updated to set config values instead of env vars

Convention enforced: .env is for SECRETS only (API keys). All other
configuration (model names, base URLs, provider selection) lives
exclusively in config.yaml.
											
										
										
											2026-03-30 22:02:53 -07:00
+								                        "model": _resolve_gateway_model(),
-												Enhance agent response handling and transcript logging

- Refactored the agent response processing to return a comprehensive result dictionary, including final responses and full message history.
- Improved transcript logging to capture the complete conversation, including tool calls and intermediate reasoning, facilitating session resumption and debugging.
- Added handling for fresh sessions to include tool definitions in the transcript for clarity.
- Implemented logic to filter and timestamp new messages, ensuring accurate logging of user and assistant interactions.

											
										
										
											2026-02-16 00:53:17 -08:00
+								                        "platform": source.platform.value if source.platform else "",
 								                        "timestamp": ts,
 								                    }
 								                )
-												fix(gateway): use filtered history length for transcript message extraction

The transcript extraction used len(history) to find new messages, but
history includes session_meta entries that are stripped before passing
to the agent. This mismatch caused 1 message to be lost from the
transcript on every turn after the first, because the slice offset
was too high. Use the filtered history length (history_offset) returned
by _run_agent instead.

Also changed the else branch from returning all agent_messages to
returning an empty list, so compressed/shorter agent output does not
duplicate the entire history into the transcript.

											
										
										
											2026-03-04 21:34:40 +03:00
+								            # Find only the NEW messages from this turn (skip history we loaded).
 								            # Use the filtered history length (history_offset) that was actually
 								            # passed to the agent, not len(history) which includes session_meta
 								            # entries that were stripped before the agent saw them.
-												fix: prevent infinite 400 loop on context overflow + block prompt injection via cache files (#1630, #1558)

* fix: prevent infinite 400 failure loop on context overflow (#1630)

When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message.  This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error.  Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.

Three-layer fix:

1. run_agent.py — Fallback heuristic: when a 400 error has a very short
   generic message AND the session is large (>40% of context or >80
   messages), treat it as a probable context overflow and trigger
   compression instead of aborting.

2. run_agent.py + gateway/run.py — Don't persist failed messages:
   when the agent returns failed=True before generating any response,
   skip writing the user's message to the transcript/DB. This prevents
   the session from growing on each failure.

3. gateway/run.py — Smarter error messages: detect context-overflow
   failures and suggest /compact or /reset specifically, instead of a
   generic 'try again' that will fail identically.

* fix(skills): detect prompt injection patterns and block cache file reads

Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):

1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
   (index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
   was the original injection vector — untrusted skill descriptions
   in the catalog contained adversarial text that the model executed.

2. skill_view: warns when skills are loaded from outside the trusted
   ~/.hermes/skills/ directory, and detects common injection patterns
   in skill content ("ignore previous instructions", "<system>", etc.).

Cherry-picked from PR #1562 by ygd58.

---------

Co-authored-by: buray <ygd58@users.noreply.github.com>
											
										
										
											2026-03-17 01:50:59 -07:00
+								            if not agent_failed_early:
 								                history_len = agent_result.get("history_offset", len(history))
 								                new_messages = agent_messages[history_len:] if len(agent_messages) > history_len else []
 								                # If no new messages found (edge case), fall back to simple user/assistant
 								                if not new_messages:
-												Enhance agent response handling and transcript logging

- Refactored the agent response processing to return a comprehensive result dictionary, including final responses and full message history.
- Improved transcript logging to capture the complete conversation, including tool calls and intermediate reasoning, facilitating session resumption and debugging.
- Added handling for fresh sessions to include tool definitions in the transcript for clarity.
- Implemented logic to filter and timestamp new messages, ensuring accurate logging of user and assistant interactions.

											
										
										
											2026-02-16 00:53:17 -08:00
+								                    self.session_store.append_to_transcript(
 								                        session_entry.session_id,
-												fix: prevent infinite 400 loop on context overflow + block prompt injection via cache files (#1630, #1558)

* fix: prevent infinite 400 failure loop on context overflow (#1630)

When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message.  This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error.  Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.

Three-layer fix:

1. run_agent.py — Fallback heuristic: when a 400 error has a very short
   generic message AND the session is large (>40% of context or >80
   messages), treat it as a probable context overflow and trigger
   compression instead of aborting.

2. run_agent.py + gateway/run.py — Don't persist failed messages:
   when the agent returns failed=True before generating any response,
   skip writing the user's message to the transcript/DB. This prevents
   the session from growing on each failure.

3. gateway/run.py — Smarter error messages: detect context-overflow
   failures and suggest /compact or /reset specifically, instead of a
   generic 'try again' that will fail identically.

* fix(skills): detect prompt injection patterns and block cache file reads

Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):

1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
   (index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
   was the original injection vector — untrusted skill descriptions
   in the catalog contained adversarial text that the model executed.

2. skill_view: warns when skills are loaded from outside the trusted
   ~/.hermes/skills/ directory, and detects common injection patterns
   in skill content ("ignore previous instructions", "<system>", etc.).

Cherry-picked from PR #1562 by ygd58.

---------

Co-authored-by: buray <ygd58@users.noreply.github.com>
											
										
										
											2026-03-17 01:50:59 -07:00
+								                        {"role": "user", "content": message_text, "timestamp": ts}
-												Enhance agent response handling and transcript logging

- Refactored the agent response processing to return a comprehensive result dictionary, including final responses and full message history.
- Improved transcript logging to capture the complete conversation, including tool calls and intermediate reasoning, facilitating session resumption and debugging.
- Added handling for fresh sessions to include tool definitions in the transcript for clarity.
- Implemented logic to filter and timestamp new messages, ensuring accurate logging of user and assistant interactions.

											
										
										
											2026-02-16 00:53:17 -08:00
+								                    )
-												fix: prevent infinite 400 loop on context overflow + block prompt injection via cache files (#1630, #1558)

* fix: prevent infinite 400 failure loop on context overflow (#1630)

When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message.  This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error.  Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.

Three-layer fix:

1. run_agent.py — Fallback heuristic: when a 400 error has a very short
   generic message AND the session is large (>40% of context or >80
   messages), treat it as a probable context overflow and trigger
   compression instead of aborting.

2. run_agent.py + gateway/run.py — Don't persist failed messages:
   when the agent returns failed=True before generating any response,
   skip writing the user's message to the transcript/DB. This prevents
   the session from growing on each failure.

3. gateway/run.py — Smarter error messages: detect context-overflow
   failures and suggest /compact or /reset specifically, instead of a
   generic 'try again' that will fail identically.

* fix(skills): detect prompt injection patterns and block cache file reads

Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):

1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
   (index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
   was the original injection vector — untrusted skill descriptions
   in the catalog contained adversarial text that the model executed.

2. skill_view: warns when skills are loaded from outside the trusted
   ~/.hermes/skills/ directory, and detects common injection patterns
   in skill content ("ignore previous instructions", "<system>", etc.).

Cherry-picked from PR #1562 by ygd58.

---------

Co-authored-by: buray <ygd58@users.noreply.github.com>
											
										
										
											2026-03-17 01:50:59 -07:00
+								                    if response:
 								                        self.session_store.append_to_transcript(
 								                            session_entry.session_id,
 								                            {"role": "assistant", "content": response, "timestamp": ts}
 								                        )
 								                else:
 								                    # The agent already persisted these messages to SQLite via
 								                    # _flush_messages_to_session_db(), so skip the DB write here
 								                    # to prevent the duplicate-write bug (#860).  We still write
 								                    # to JSONL for backward compatibility and as a backup.
 								                    agent_persisted = self._session_db is not None
 								                    for msg in new_messages:
 								                        # Skip system messages (they're rebuilt each run)
 								                        if msg.get("role") == "system":
 								                            continue
 								                        # Add timestamp to each message for debugging
 								                        entry = {**msg, "timestamp": ts}
 								                        self.session_store.append_to_transcript(
 								                            session_entry.session_id, entry,
 								                            skip_db=agent_persisted,
 								                        )
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
-												fix(insights): persist token usage for non-CLI sessions

											
										
										
											2026-04-02 11:15:16 +05:30
+								            # Token counts and model are now persisted by the agent directly.
 								            # Keep only last_prompt_tokens here for context-window tracking and
 								            # compression decisions.
-												fix: use actual API token counts for gateway compression pre-check

Root cause of aggressive gateway compression vs CLI:
- CLI: single AIAgent persists across conversation, uses real API-reported
  prompt_tokens for compression decisions — accurate
- Gateway: each message creates fresh AIAgent, token count discarded after,
  next message pre-check falls back to rough str(msg)//4 estimate which
  overestimates 30-50% on tool-heavy conversations

Fix:
- Add last_prompt_tokens field to SessionEntry — stores the actual
  API-reported prompt token count from the most recent agent turn
- After run_conversation(), extract context_compressor.last_prompt_tokens
  and persist it via update_session()
- Gateway pre-check now uses stored actual tokens when available (exact
  same accuracy as CLI), falling back to rough estimate with 1.4x safety
  factor only for the first message of a session

This makes gateway compression behave identically to CLI compression
for all turns after the first. Reported by TigerHix.

											
										
										
											2026-03-10 23:28:18 -07:00
+								            self.session_store.update_session(
 								                session_entry.session_key,
 								                last_prompt_tokens=agent_result.get("last_prompt_tokens", 0),
 								            )
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
 								            # Auto voice reply: send TTS audio before the text response
-												fix(voice): enable TTS voice reply when streaming is active (#2322)

When streaming is enabled, the base adapter receives None from
_handle_message (already_sent=True) and cannot run auto-TTS for
voice input. The runner was unconditionally skipping voice input
TTS assuming the base adapter would handle it.

Now the runner takes over TTS responsibility when streaming has
already delivered the text response, so voice channel playback
works with both streaming on and off.

Streaming off behavior is unchanged (default already_sent=False
preserves the original code path exactly).

Co-authored-by: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com>
											
										
										
											2026-03-21 08:08:37 -07:00
+								            _already_sent = bool(agent_result.get("already_sent"))
 								            if self._should_send_voice_reply(event, response, agent_messages, already_sent=_already_sent):
-												fix: extract voice reply logic and add comprehensive tests

- Fix tempfile.mktemp() TOCTOU race in Discord voice input (use NamedTemporaryFile)
- Extract voice reply decision from _handle_message into _should_send_voice_reply()
- Rewrite TestAutoVoiceReply to call real method instead of testing a copy
- Add 59 new tests: VoiceReceiver, VC commands, adapter methods, streaming TTS

											
										
										
											2026-03-11 23:18:49 +03:00
+								                await self._send_voice_reply(event, response)
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
-												fix(gateway): deliver MEDIA: files after streaming responses

When streaming is enabled, text chunks are sent to the user in
real-time including raw MEDIA: tags. The normal post-processing in
_process_message_background is skipped when already_sent=True, so
MEDIA: files were never extracted or delivered — the user just saw
the raw MEDIA:/path/to/file text.

Fix: after streaming completes, extract MEDIA: tags and local file
paths from the response and deliver them via the platform adapter.
The text is already sent (with the raw tag visible in the stream),
but the actual files now get delivered as attachments.

											
										
										
											2026-03-21 16:01:25 -07:00
+								            # If streaming already delivered the response, extract and
 								            # deliver any MEDIA: files before returning None.  Streaming
 								            # sends raw text chunks that include MEDIA: tags — the normal
 								            # post-processing in _process_message_background is skipped
 								            # when already_sent is True, so media files would never be
 								            # delivered without this.
-												fix(gateway): don't suppress error messages when streaming already_sent (#7652)

When the stream consumer has sent at least one message (already_sent=True),
the gateway skips sending the final response to avoid duplicates. But this
also suppressed error messages when the agent failed mid-loop — rate limit
exhaustion, context overflow, compression failure, etc.

The user would see the last streamed content and then nothing: no error
message, no explanation. The agent appeared to 'stop responding.'

Fix: check the 'failed' flag at both the producer (_run_agent marks
already_sent) and consumer (_handle_message_with_agent checks it) sites.
Error messages are always delivered regardless of streaming state.
											
										
										
											2026-04-11 01:55:36 -07:00
+								            #
 								            # Never skip when the agent failed — the error message is new
 								            # content the user hasn't seen (streaming only sent earlier
 								            # partial output before the failure).  Without this guard,
 								            # users see the agent "stop responding without explanation."
 								            if agent_result.get("already_sent") and not agent_result.get("failed"):
-												fix(gateway): deliver MEDIA: files after streaming responses

When streaming is enabled, text chunks are sent to the user in
real-time including raw MEDIA: tags. The normal post-processing in
_process_message_background is skipped when already_sent=True, so
MEDIA: files were never extracted or delivered — the user just saw
the raw MEDIA:/path/to/file text.

Fix: after streaming completes, extract MEDIA: tags and local file
paths from the response and deliver them via the platform adapter.
The text is already sent (with the raw tag visible in the stream),
but the actual files now get delivered as attachments.

											
										
										
											2026-03-21 16:01:25 -07:00
+								                if response:
-												fix: /stop command crash + UnboundLocalError in streaming media delivery

Two fixes:

1. CLI /stop command crashed with 'cannot import name get_registry' —
   the code imported a non-existent function. Fixed to use the actual
   process_registry singleton and list_sessions() method.
   (Reported in #2458 by haiyuzhong1980)

2. Streaming media delivery used undefined 'adapter' variable —
   our PR #2382 called _deliver_media_from_response(adapter=adapter)
   but 'adapter' wasn't guaranteed to be defined in that scope.
   Fixed to resolve via self.adapters.get(source.platform).
   (Reported in #2424 by 42-evey)

											
										
										
											2026-03-22 04:35:27 -07:00
+								                    _media_adapter = self.adapters.get(source.platform)
 								                    if _media_adapter:
 								                        await self._deliver_media_from_response(
 								                            response, event, _media_adapter,
 								                        )
-												feat(gateway): streaming token delivery — StreamingConfig, GatewayStreamConsumer, already_sent

Stage 3 of streaming support. Gateway now streams tokens to messaging platforms:

- StreamingConfig dataclass (enabled, transport, edit_interval, buffer_threshold, cursor)
  on GatewayConfig with from_dict/to_dict serialization
- GatewayStreamConsumer: async queue-based consumer that progressively edits
  a single message on the target platform (edit transport)
- on_delta() → queue → run() async task → send_or_edit() with rate limiting
- already_sent propagation: when streaming delivered the response, handler
  returns None so base adapter skips duplicate send()
- stream_delta_callback wired into AIAgent constructor in _run_agent
- Consumer lifecycle: started as asyncio task, awaited with timeout in finally

Config (config.yaml):
  streaming:
    enabled: true
    transport: edit      # progressive editMessageText
    edit_interval: 0.3   # seconds between edits
    buffer_threshold: 40 # chars before forcing flush
    cursor: ' ▉'

Credit: jobless0x (#774, #1312), OutThisLife (#798), clicksingh (#697).

											
										
										
											2026-03-16 05:52:42 -07:00
+								                return None
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								            return response
 								        except Exception as e:
-												feat(discord): persistent typing indicator for DMs

Based on PR #2427 by @oxngon (core feature extracted, reformatting
and unrelated changes dropped).

Discord's TYPING_START gateway event is unreliable for bot DMs. This
adds a background typing loop that hits POST /channels/{id}/typing
every 8 seconds (indicator lasts ~10s) until the response is sent.

- send_typing() starts a per-channel background loop (idempotent)
- stop_typing() cancels it (called after _run_agent returns)
- Base adapter gets stop_typing() as a no-op default
- Per-channel tracking via _typing_tasks dict prevents duplicates

											
										
										
											2026-03-22 04:47:53 -07:00
+								            # Stop typing indicator on error too
 								            try:
 								                _err_adapter = self.adapters.get(source.platform)
 								                if _err_adapter and hasattr(_err_adapter, "stop_typing"):
 								                    await _err_adapter.stop_typing(source.chat_id)
 								            except Exception:
 								                pass
-												refactor: deduplicate toolsets, unify async bridging, fix approval race condition, harden security

- Replace 4 copy-pasted messaging platform toolsets with shared _HERMES_CORE_TOOLS list
- Consolidate 5 ad-hoc async-bridging patterns into single _run_async() in model_tools.py
  - Removes deprecated get_event_loop()/set_event_loop() calls
  - Makes all tool handlers self-protecting regardless of caller's event loop state
  - RL handler refactored from if/elif chain to dispatch dict
- Fix exec approval race condition: replace module-level globals with thread-safe
  per-session tools/approval.py (submit_pending, pop_pending, approve_session, is_approved)
  - Session A approving "rm" no longer approves it for all other sessions
- Fix config deep merge: user overriding tts.elevenlabs.voice_id no longer clobbers
  tts.elevenlabs.model_id; migration detection now recurses to arbitrary depth
- Gateway default-deny: unauthenticated users denied unless GATEWAY_ALLOW_ALL_USERS=true
- Add 10 dangerous command patterns: rm --recursive, bash -c, python -e, curl|bash,
  xargs rm, find -delete
- Sanitize gateway error messages: users see generic message, full traceback goes to logs

											
										
										
											2026-02-21 18:28:49 -08:00
+								            logger.exception("Agent error in session %s", session_key)
-												fix(anthropic): retry 429/529 errors and surface error details to users

- 429 rate limit and 529 overloaded were incorrectly treated as
  non-retryable client errors, causing immediate failure instead of
  exponential backoff retry. Users hitting Anthropic rate limits got
  silent failures or no response at all.
- Generic "Sorry, I encountered an unexpected error" now includes
  error type, details, and status-specific hints (auth, rate limit,
  overloaded).
- Failed agent with final_response=None now surfaces the actual
  error message instead of returning an empty response.

											
										
										
											2026-03-17 00:30:26 +03:00
+								            error_type = type(e).__name__
 								            error_detail = str(e)[:300] if str(e) else "no details available"
 								            status_hint = ""
 								            status_code = getattr(e, "status_code", None)
-												Improve gateway error handling for 429 usage limits and 500 context overflow

- Distinguish plan usage limits (429 with usage_limit_reached) from transient rate limits
- Show approximate reset time in hours for plan limits
- Treat HTTP 500 with large sessions as context overflow (same as 400)
- Move history length check earlier for reuse across status codes

											
										
										
											2026-03-17 13:25:20 -07:00
+								            _hist_len = len(history) if 'history' in locals() else 0
-												fix(anthropic): retry 429/529 errors and surface error details to users

- 429 rate limit and 529 overloaded were incorrectly treated as
  non-retryable client errors, causing immediate failure instead of
  exponential backoff retry. Users hitting Anthropic rate limits got
  silent failures or no response at all.
- Generic "Sorry, I encountered an unexpected error" now includes
  error type, details, and status-specific hints (auth, rate limit,
  overloaded).
- Failed agent with final_response=None now surfaces the actual
  error message instead of returning an empty response.

											
										
										
											2026-03-17 00:30:26 +03:00
+								            if status_code == 401:
 								                status_hint = " Check your API key or run `claude /login` to refresh OAuth credentials."
-												fix: add 402 billing error hint to gateway error handler (#5220) (#10057)

* fix: hermes gateway restart waits for service to come back up (#8260)

Previously, systemd_restart() sent SIGUSR1 to the gateway, printed
'restart requested', and returned immediately. The gateway still
needed to drain active agents, exit with code 75, wait for systemd's
RestartSec=30, and start the new process. The user saw 'success' but
the gateway was actually down for 30-60 seconds.

Now the SIGUSR1 path blocks with progress feedback:

Phase 1 — wait for old process to die:
  ⏳ User service draining active work...
  Polls os.kill(pid, 0) until ProcessLookupError (up to 90s)

Phase 2 — wait for new process to become active:
  ⏳ Waiting for hermes-gateway to restart...
  Polls systemctl is-active + verifies new PID (up to 60s)

Success:
  ✓ User service restarted (PID 12345)

Timeout:
  ⚠ User service did not become active within 60s.
    Check status: hermes gateway status
    Check logs: journalctl --user -u hermes-gateway --since '2 min ago'

The reload-or-restart fallback path (line 1189) already blocks because
systemctl reload-or-restart is synchronous.

Test plan:
- Updated test to verify wait-for-restart behavior
- All 118 gateway CLI tests pass

* fix: add 402 billing error hint to gateway error handler (#5220)

The gateway's exception handler for agent errors had specific hints for
HTTP 401, 429, 529, 400, 500 — but not 402 (Payment Required / quota
exhausted). Users hitting billing limits from custom proxy providers
got a generic error with no guidance.

Added: 'Your API balance or quota is exhausted. Check your provider
dashboard.'

The underlying billing classification (error_classifier.py) already
correctly handles 402 as FailoverReason.billing with credential
rotation and fallback. The original issue (#5220) where 402 killed
the entire gateway was from an older version — on current main, 402
is excluded from the is_client_error abort path (line 9460) and goes
through the proper retry/fallback/fail flow. Combined with PR #9875
(auto-recover from unexpected SIGTERM), even edge cases where the
gateway dies are now survivable.
											
										
										
											2026-04-14 21:03:05 -07:00
+								            elif status_code == 402:
 								                status_hint = " Your API balance or quota is exhausted. Check your provider dashboard."
-												fix(anthropic): retry 429/529 errors and surface error details to users

- 429 rate limit and 529 overloaded were incorrectly treated as
  non-retryable client errors, causing immediate failure instead of
  exponential backoff retry. Users hitting Anthropic rate limits got
  silent failures or no response at all.
- Generic "Sorry, I encountered an unexpected error" now includes
  error type, details, and status-specific hints (auth, rate limit,
  overloaded).
- Failed agent with final_response=None now surfaces the actual
  error message instead of returning an empty response.

											
										
										
											2026-03-17 00:30:26 +03:00
+								            elif status_code == 429:
-												Improve gateway error handling for 429 usage limits and 500 context overflow

- Distinguish plan usage limits (429 with usage_limit_reached) from transient rate limits
- Show approximate reset time in hours for plan limits
- Treat HTTP 500 with large sessions as context overflow (same as 400)
- Move history length check earlier for reuse across status codes

											
										
										
											2026-03-17 13:25:20 -07:00
+								                # Check if this is a plan usage limit (resets on a schedule) vs a transient rate limit
 								                _err_body = getattr(e, "response", None)
 								                _err_json = {}
 								                try:
 								                    if _err_body is not None:
 								                        _err_json = _err_body.json().get("error", {})
 								                except Exception:
 								                    pass
 								                if _err_json.get("type") == "usage_limit_reached":
 								                    _resets_in = _err_json.get("resets_in_seconds")
 								                    if _resets_in and _resets_in > 0:
 								                        import math
 								                        _hours = math.ceil(_resets_in / 3600)
 								                        status_hint = f" Your plan's usage limit has been reached. It resets in ~{_hours}h."
 								                    else:
 								                        status_hint = " Your plan's usage limit has been reached. Please wait until it resets."
 								                else:
 								                    status_hint = " You are being rate-limited. Please wait a moment and try again."
-												fix(anthropic): retry 429/529 errors and surface error details to users

- 429 rate limit and 529 overloaded were incorrectly treated as
  non-retryable client errors, causing immediate failure instead of
  exponential backoff retry. Users hitting Anthropic rate limits got
  silent failures or no response at all.
- Generic "Sorry, I encountered an unexpected error" now includes
  error type, details, and status-specific hints (auth, rate limit,
  overloaded).
- Failed agent with final_response=None now surfaces the actual
  error message instead of returning an empty response.

											
										
										
											2026-03-17 00:30:26 +03:00
+								            elif status_code == 529:
 								                status_hint = " The API is temporarily overloaded. Please try again shortly."
-												Improve gateway error handling for 429 usage limits and 500 context overflow

- Distinguish plan usage limits (429 with usage_limit_reached) from transient rate limits
- Show approximate reset time in hours for plan limits
- Treat HTTP 500 with large sessions as context overflow (same as 400)
- Move history length check earlier for reuse across status codes

											
										
										
											2026-03-17 13:25:20 -07:00
+								            elif status_code in (400, 500):
 								                # 400 with a large session is context overflow.
 								                # 500 with a large session often means the payload is too large
 								                # for the API to process — treat it the same way.
-												fix: prevent infinite 400 loop on context overflow + block prompt injection via cache files (#1630, #1558)

* fix: prevent infinite 400 failure loop on context overflow (#1630)

When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message.  This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error.  Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.

Three-layer fix:

1. run_agent.py — Fallback heuristic: when a 400 error has a very short
   generic message AND the session is large (>40% of context or >80
   messages), treat it as a probable context overflow and trigger
   compression instead of aborting.

2. run_agent.py + gateway/run.py — Don't persist failed messages:
   when the agent returns failed=True before generating any response,
   skip writing the user's message to the transcript/DB. This prevents
   the session from growing on each failure.

3. gateway/run.py — Smarter error messages: detect context-overflow
   failures and suggest /compact or /reset specifically, instead of a
   generic 'try again' that will fail identically.

* fix(skills): detect prompt injection patterns and block cache file reads

Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):

1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
   (index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
   was the original injection vector — untrusted skill descriptions
   in the catalog contained adversarial text that the model executed.

2. skill_view: warns when skills are loaded from outside the trusted
   ~/.hermes/skills/ directory, and detects common injection patterns
   in skill content ("ignore previous instructions", "<system>", etc.).

Cherry-picked from PR #1562 by ygd58.

---------

Co-authored-by: buray <ygd58@users.noreply.github.com>
											
										
										
											2026-03-17 01:50:59 -07:00
+								                if _hist_len > 50:
 								                    return (
 								                        "⚠️ Session too large for the model's context window.\n"
 								                        "Use /compact to compress the conversation, or "
 								                        "/reset to start fresh."
 								                    )
-												Improve gateway error handling for 429 usage limits and 500 context overflow

- Distinguish plan usage limits (429 with usage_limit_reached) from transient rate limits
- Show approximate reset time in hours for plan limits
- Treat HTTP 500 with large sessions as context overflow (same as 400)
- Move history length check earlier for reuse across status codes

											
										
										
											2026-03-17 13:25:20 -07:00
+								                elif status_code == 400:
-												fix: prevent infinite 400 loop on context overflow + block prompt injection via cache files (#1630, #1558)

* fix: prevent infinite 400 failure loop on context overflow (#1630)

When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message.  This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error.  Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.

Three-layer fix:

1. run_agent.py — Fallback heuristic: when a 400 error has a very short
   generic message AND the session is large (>40% of context or >80
   messages), treat it as a probable context overflow and trigger
   compression instead of aborting.

2. run_agent.py + gateway/run.py — Don't persist failed messages:
   when the agent returns failed=True before generating any response,
   skip writing the user's message to the transcript/DB. This prevents
   the session from growing on each failure.

3. gateway/run.py — Smarter error messages: detect context-overflow
   failures and suggest /compact or /reset specifically, instead of a
   generic 'try again' that will fail identically.

* fix(skills): detect prompt injection patterns and block cache file reads

Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):

1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
   (index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
   was the original injection vector — untrusted skill descriptions
   in the catalog contained adversarial text that the model executed.

2. skill_view: warns when skills are loaded from outside the trusted
   ~/.hermes/skills/ directory, and detects common injection patterns
   in skill content ("ignore previous instructions", "<system>", etc.).

Cherry-picked from PR #1562 by ygd58.

---------

Co-authored-by: buray <ygd58@users.noreply.github.com>
											
										
										
											2026-03-17 01:50:59 -07:00
+								                    status_hint = " The request was rejected by the API."
-												refactor: deduplicate toolsets, unify async bridging, fix approval race condition, harden security

- Replace 4 copy-pasted messaging platform toolsets with shared _HERMES_CORE_TOOLS list
- Consolidate 5 ad-hoc async-bridging patterns into single _run_async() in model_tools.py
  - Removes deprecated get_event_loop()/set_event_loop() calls
  - Makes all tool handlers self-protecting regardless of caller's event loop state
  - RL handler refactored from if/elif chain to dispatch dict
- Fix exec approval race condition: replace module-level globals with thread-safe
  per-session tools/approval.py (submit_pending, pop_pending, approve_session, is_approved)
  - Session A approving "rm" no longer approves it for all other sessions
- Fix config deep merge: user overriding tts.elevenlabs.voice_id no longer clobbers
  tts.elevenlabs.model_id; migration detection now recurses to arbitrary depth
- Gateway default-deny: unauthenticated users denied unless GATEWAY_ALLOW_ALL_USERS=true
- Add 10 dangerous command patterns: rm --recursive, bash -c, python -e, curl|bash,
  xargs rm, find -delete
- Sanitize gateway error messages: users see generic message, full traceback goes to logs

											
										
										
											2026-02-21 18:28:49 -08:00
+								            return (
-												fix(anthropic): retry 429/529 errors and surface error details to users

- 429 rate limit and 529 overloaded were incorrectly treated as
  non-retryable client errors, causing immediate failure instead of
  exponential backoff retry. Users hitting Anthropic rate limits got
  silent failures or no response at all.
- Generic "Sorry, I encountered an unexpected error" now includes
  error type, details, and status-specific hints (auth, rate limit,
  overloaded).
- Failed agent with final_response=None now surfaces the actual
  error message instead of returning an empty response.

											
										
										
											2026-03-17 00:30:26 +03:00
+								                f"Sorry, I encountered an error ({error_type}).\n"
 								                f"{error_detail}\n"
 								                f"{status_hint}"
-												refactor: deduplicate toolsets, unify async bridging, fix approval race condition, harden security

- Replace 4 copy-pasted messaging platform toolsets with shared _HERMES_CORE_TOOLS list
- Consolidate 5 ad-hoc async-bridging patterns into single _run_async() in model_tools.py
  - Removes deprecated get_event_loop()/set_event_loop() calls
  - Makes all tool handlers self-protecting regardless of caller's event loop state
  - RL handler refactored from if/elif chain to dispatch dict
- Fix exec approval race condition: replace module-level globals with thread-safe
  per-session tools/approval.py (submit_pending, pop_pending, approve_session, is_approved)
  - Session A approving "rm" no longer approves it for all other sessions
- Fix config deep merge: user overriding tts.elevenlabs.voice_id no longer clobbers
  tts.elevenlabs.model_id; migration detection now recurses to arbitrary depth
- Gateway default-deny: unauthenticated users denied unless GATEWAY_ALLOW_ALL_USERS=true
- Add 10 dangerous command patterns: rm --recursive, bash -c, python -e, curl|bash,
  xargs rm, find -delete
- Sanitize gateway error messages: users see generic message, full traceback goes to logs

											
										
										
											2026-02-21 18:28:49 -08:00
+								                "Try again or use /reset to start a fresh session."
 								            )
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        finally:
-												fix(gateway): replace os.environ session state with contextvars for concurrency safety

When two gateway messages arrived concurrently, _set_session_env wrote
HERMES_SESSION_PLATFORM/CHAT_ID/CHAT_NAME/THREAD_ID into the process-global
os.environ. Because asyncio tasks share the same process, Message B would
overwrite Message A's values mid-flight, causing background-task notifications
and tool calls to route to the wrong thread/chat.

Replace os.environ with Python's contextvars.ContextVar. Each asyncio task
(and any run_in_executor thread it spawns) gets its own copy, so concurrent
messages never interfere.

Changes:
- New gateway/session_context.py with ContextVar definitions, set/clear/get
  helpers, and os.environ fallback for CLI/cron/test backward compatibility
- gateway/run.py: _set_session_env returns reset tokens, _clear_session_env
  accepts them for proper cleanup in finally blocks
- All tool consumers updated: cronjob_tools, send_message_tool, skills_tool,
  terminal_tool (both notify_on_complete AND check_interval blocks), tts_tool,
  agent/skill_utils, agent/prompt_builder
- Tests updated for new contextvar-based API

Fixes #7358

Co-authored-by: teknium1 <127238744+teknium1@users.noreply.github.com>

											
										
										
											2026-04-10 16:50:56 -07:00
+								            # Restore session context variables to their pre-handler state
 								            self._clear_session_env(_session_env_tokens)
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
-												feat(gateway): surface session config on /new, /reset, and auto-reset (#3321)

When a new session starts in the gateway (via /new, /reset, or
auto-reset), send the user a summary of the detected configuration:

  ✨ Session reset! Starting fresh.

  ◆ Model: qwen3.5:27b-q4_K_M
  ◆ Provider: custom
  ◆ Context: 8K tokens (config)
  ◆ Endpoint: http://localhost:11434/v1

This makes misconfigured context length immediately visible — a user
running a local 8K model that falls to the 128K default will see:

  ◆ Context: 128K tokens (default — set model.context_length in config to override)

Instead of silently getting no compression and degrading responses.

- _format_session_info() resolves model, provider, context length,
  and endpoint from config + runtime, matching the hygiene code's
  resolution chain
- Local/custom endpoints shown; cloud endpoints hidden (not useful)
- Context source annotated: config, detected, or default with hint
- Appended to /new and /reset responses, and auto-reset notifications
- 9 tests covering all formatting paths and failure resilience

Addresses the user-facing side of #2708 — instead of trying to fix
every edge case in context detection, surface the values so users
can immediately see when something is wrong.
											
										
										
											2026-03-26 19:27:58 -07:00
+								    def _format_session_info(self) -> str:
 								        """Resolve current model config and return a formatted info block.
 								        Surfaces model, provider, context length, and endpoint so gateway
 								        users can immediately see if context detection went wrong (e.g.
 								        local models falling to the 128K default).
 								        """
 								        from agent.model_metadata import get_model_context_length, DEFAULT_FALLBACK_CONTEXT
 								        model = _resolve_gateway_model()
 								        config_context_length = None
 								        provider = None
 								        base_url = None
 								        api_key = None
 								        try:
 								            cfg_path = _hermes_home / "config.yaml"
 								            if cfg_path.exists():
 								                import yaml as _info_yaml
 								                with open(cfg_path, encoding="utf-8") as f:
 								                    data = _info_yaml.safe_load(f) or {}
 								                model_cfg = data.get("model", {})
 								                if isinstance(model_cfg, dict):
 								                    raw_ctx = model_cfg.get("context_length")
 								                    if raw_ctx is not None:
 								                        try:
 								                            config_context_length = int(raw_ctx)
 								                        except (TypeError, ValueError):
 								                            pass
 								                    provider = model_cfg.get("provider") or None
 								                    base_url = model_cfg.get("base_url") or None
 								        except Exception:
 								            pass
 								        # Resolve runtime credentials for probing
 								        try:
 								            runtime = _resolve_runtime_agent_kwargs()
 								            provider = provider or runtime.get("provider")
 								            base_url = base_url or runtime.get("base_url")
 								            api_key = runtime.get("api_key")
 								        except Exception:
 								            pass
 								        context_length = get_model_context_length(
 								            model,
 								            base_url=base_url or "",
 								            api_key=api_key or "",
 								            config_context_length=config_context_length,
 								            provider=provider or "",
 								        )
 								        # Format context source hint
 								        if config_context_length is not None:
 								            ctx_source = "config"
 								        elif context_length == DEFAULT_FALLBACK_CONTEXT:
 								            ctx_source = "default — set model.context_length in config to override"
 								        else:
 								            ctx_source = "detected"
 								        # Format context length for display
 								        if context_length >= 1_000_000:
 								            ctx_display = f"{context_length / 1_000_000:.1f}M"
 								        elif context_length >= 1_000:
 								            ctx_display = f"{context_length // 1_000}K"
 								        else:
 								            ctx_display = str(context_length)
 								        lines = [
 								            f"◆ Model: `{model}`",
 								            f"◆ Provider: {provider or 'openrouter'}",
 								            f"◆ Context: {ctx_display} tokens ({ctx_source})",
 								        ]
 								        # Show endpoint for local/custom setups
 								        if base_url and ("localhost" in base_url or "127.0.0.1" in base_url or "0.0.0.0" in base_url):
 								            lines.append(f"◆ Endpoint: {base_url}")
 								        return "\n".join(lines)
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								    async def _handle_reset_command(self, event: MessageEvent) -> str:
 								        """Handle /new or /reset command."""
 								        source = event.source
 								        # Get existing session key
-												fix(gateway): make group session isolation configurable

default group and channel sessions to per-user isolation, allow opting back into shared room sessions via config.yaml, and document Discord gateway routing and session behavior.

											
										
										
											2026-03-16 00:22:23 -07:00
+								        session_key = self._session_key_for_source(source)
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
-												feat(gateway): proactive async memory flush on session expiry

Previously, when a session expired (idle/daily reset), the memory flush
ran synchronously inside get_or_create_session — blocking the user's
message for 10-60s while an LLM call saved memories.

Now a background watcher task (_session_expiry_watcher) runs every 5 min,
detects expired sessions, and flushes memories proactively in a thread
pool.  By the time the user sends their next message, memories are
already saved and the response is immediate.

Changes:
- Add _is_session_expired(entry) to SessionStore — works from entry
  alone without needing a SessionSource
- Add _pre_flushed_sessions set to track already-flushed sessions
- Remove sync _on_auto_reset callback from get_or_create_session
- Refactor flush into _flush_memories_for_session (sync worker) +
  _async_flush_memories (thread pool wrapper)
- Add _session_expiry_watcher background task, started in start()
- Simplify /reset command to use shared fire-and-forget flush
- Add 10 tests for expiry detection, callback removal, tracking

											
										
										
											2026-03-07 11:27:50 -08:00
+								        # Flush memories in the background (fire-and-forget) so the user
 								        # gets the "Session reset!" response immediately.
-												feat: introduce skills management features in AIAgent and CLI

- Added skills configuration options in cli-config.yaml.example, including a nudge interval for skill creation reminders.
- Implemented skills guidance in AIAgent to prompt users to save reusable workflows after complex tasks.
- Enhanced skills indexing in the prompt builder to include descriptions from SKILL.md files for better context.
- Updated the agent's behavior to periodically remind users about potential skills during tool-calling iterations.

											
										
										
											2026-02-22 13:28:13 -08:00
+								        try:
-												fix: /retry, /undo, /compress, and /reset gateway commands (#210)

- /retry, /undo, /compress were setting a non-existent conversation_history
  attribute on SessionEntry (a @dataclass with no such field). The dangling
  attribute was silently created but never read — transcript was reloaded
  from DB on next interaction, making all three commands no-ops.

- /reset accessed self.session_store._sessions (non-existent) instead of
  self.session_store._entries, causing AttributeError caught by a bare
  except, silently skipping the pre-reset memory flush.

Fix:
- Add SessionDB.clear_messages() to delete messages and reset counters
- Add SessionStore.rewrite_transcript() to atomically replace transcript
  in both SQLite and legacy JSONL storage
- Replace all dangling attr assignments with rewrite_transcript() calls
- Fix _sessions → _entries in /reset handler

Closes #210

											
										
										
											2026-03-02 00:14:49 -08:00
+								            old_entry = self.session_store._entries.get(session_key)
-												feat: introduce skills management features in AIAgent and CLI

- Added skills configuration options in cli-config.yaml.example, including a nudge interval for skill creation reminders.
- Implemented skills guidance in AIAgent to prompt users to save reusable workflows after complex tasks.
- Enhanced skills indexing in the prompt builder to include descriptions from SKILL.md files for better context.
- Updated the agent's behavior to periodically remind users about potential skills during tool-calling iterations.

											
										
										
											2026-02-22 13:28:13 -08:00
+								            if old_entry:
-												fix(gateway): track background task references in GatewayRunner (#3254)

Asyncio tasks created with create_task() but never stored can be
garbage collected mid-execution. Add self._background_tasks set to
hold references, with add_done_callback cleanup. Tracks:
- /background command task
- session-reset memory flush task
- session-resume memory flush task
Cancel all pending tasks in stop().

Update test fixtures that construct GatewayRunner via object.__new__()
to include the new _background_tasks attribute.

Cherry-picked from PR #3167 by memosr. The original PR also deleted
the DM topic auto-skill loading code — that deletion was excluded
from this salvage as it removes a shipped feature (#2598).

Co-authored-by: memosr.eth <96793918+memosr@users.noreply.github.com>
											
										
										
											2026-03-26 14:33:48 -07:00
+								                _flush_task = asyncio.create_task(
-												fix: honor session-scoped gateway model overrides

											
										
										
											2026-04-11 04:24:45 -05:00
+								                    self._async_flush_memories(old_entry.session_id, session_key)
-												fix(honcho): isolate session routing for multi-user gateway (#1500)

Salvaged from PR #1470 by adavyas.

Core fix: Honcho tool calls in a multi-session gateway could route to
the wrong session because honcho_tools.py relied on process-global
state. Now threads session context through the call chain:
  AIAgent._invoke_tool() → handle_function_call() → registry.dispatch()
  → handler **kw → _resolve_session_context()

Changes:
- Add _resolve_session_context() to prefer per-call context over globals
- Plumb honcho_manager + honcho_session_key through handle_function_call
- Add sync_honcho=False to run_conversation() for synthetic flush turns
- Pass honcho_session_key through gateway memory flush lifecycle
- Harden gateway PID detection when /proc cmdline is unreadable
- Make interrupt test scripts import-safe for pytest-xdist
- Wrap BibTeX examples in Jekyll raw blocks for docs build
- Fix thread-order-dependent assertion in client lifecycle test
- Expand Honcho docs: session isolation, lifecycle, routing internals

Dropped from original PR:
- Indentation change in _create_request_openai_client that would move
  client creation inside the lock (causes unnecessary contention)

Co-authored-by: adavyas <adavyas@users.noreply.github.com>
											
										
										
											2026-03-16 00:23:47 -07:00
+								                )
-												fix(gateway): track background task references in GatewayRunner (#3254)

Asyncio tasks created with create_task() but never stored can be
garbage collected mid-execution. Add self._background_tasks set to
hold references, with add_done_callback cleanup. Tracks:
- /background command task
- session-reset memory flush task
- session-resume memory flush task
Cancel all pending tasks in stop().

Update test fixtures that construct GatewayRunner via object.__new__()
to include the new _background_tasks attribute.

Cherry-picked from PR #3167 by memosr. The original PR also deleted
the DM topic auto-skill loading code — that deletion was excluded
from this salvage as it removes a shipped feature (#2598).

Co-authored-by: memosr.eth <96793918+memosr@users.noreply.github.com>
											
										
										
											2026-03-26 14:33:48 -07:00
+								                self._background_tasks.add(_flush_task)
 								                _flush_task.add_done_callback(self._background_tasks.discard)
-												feat: introduce skills management features in AIAgent and CLI

- Added skills configuration options in cli-config.yaml.example, including a nudge interval for skill creation reminders.
- Implemented skills guidance in AIAgent to prompt users to save reusable workflows after complex tasks.
- Enhanced skills indexing in the prompt builder to include descriptions from SKILL.md files for better context.
- Updated the agent's behavior to periodically remind users about potential skills during tool-calling iterations.

											
										
										
											2026-02-22 13:28:13 -08:00
+								        except Exception as e:
 								            logger.debug("Gateway memory flush on reset failed: %s", e)
-												fix(gateway): call agent.close() on session end to prevent zombies

Wire AIAgent.close() into every gateway code path where an agent's
session is actually ending:

- stop(): close all running agents after interrupt + memory shutdown,
  then call cleanup_all_environments() and cleanup_all_browsers() as
  a global catch-all
- _session_expiry_watcher(): close agents when sessions expire after
  the 5-minute idle timeout
- _handle_reset_command(): close the old agent before evicting it from
  cache on /new or /reset

Note: _evict_cached_agent() intentionally does NOT call close() because
it is also used for non-destructive cache refreshes (model switch,
branch, fallback) where tool resources should persist.

Ref: #7131

											
										
										
											2026-04-10 16:22:59 -03:00
+								        # Close tool resources on the old agent (terminal sandboxes, browser
 								        # daemons, background processes) before evicting from cache.
-												fix(gateway): guard _agent_cache_lock access in reset handler

Use getattr guard for _agent_cache_lock in _handle_reset_command
because test fixtures may create GatewayRunner without calling
__init__, leaving the attribute unset.

Fixes e2e test failure: test_new_resets_session,
test_new_then_status_reflects_reset, test_new_is_idempotent.

											
										
										
											2026-04-10 16:58:42 -03:00
+								        # Guard with getattr because test fixtures may skip __init__.
 								        _cache_lock = getattr(self, "_agent_cache_lock", None)
 								        if _cache_lock is not None:
 								            with _cache_lock:
 								                _cached = self._agent_cache.get(session_key)
 								                _old_agent = _cached[0] if isinstance(_cached, tuple) else _cached if _cached else None
 								            if _old_agent is not None:
-												fix(gateway): close temporary agents after one-off tasks

Add shared _cleanup_agent_resources() for temporary gateway AIAgent
instances. Apply cleanup to memory flush, background tasks, /btw,
manual /compress, and session-hygiene auto-compression. Prevents
unclosed aiohttp client session leaks.

Cherry-picked from #10899 by @LeonSGP43. Consolidates #10945 by @Lubrsy706.
Fixes #10865.

Co-authored-by: Lubrsy706 <Lubrsy706@users.noreply.github.com>

											
										
										
											2026-04-16 18:41:31 +05:30
+								                self._cleanup_agent_resources(_old_agent)
-												feat(gateway): cache AIAgent per session for prompt caching

The gateway created a fresh AIAgent per message, rebuilding the system
prompt (including memory, skills, context files) every turn. This broke
prompt prefix caching — providers like Anthropic charge ~10x more for
uncached prefixes.

Now caches AIAgent instances per session_key with a config signature.
The cached agent is reused across messages in the same session,
preserving the frozen system prompt and tool schemas. Cache is
invalidated when:
- Config changes (model, provider, toolsets, reasoning, ephemeral
  prompt) — detected via signature mismatch
- /new, /reset, /clear — explicit session reset
- /model — global model change clears all cached agents
- /reasoning — global reasoning change clears all cached agents

Per-message state (callbacks, stream consumers, progress queues) is
set on the agent instance before each run_conversation() call.

This matches CLI behavior where a single AIAgent lives across all turns
in a session, with _cached_system_prompt built once and reused.

											
										
										
											2026-03-21 13:07:08 -07:00
+								        self._evict_cached_agent(session_key)
-												fix(gateway): call agent.close() on session end to prevent zombies

Wire AIAgent.close() into every gateway code path where an agent's
session is actually ending:

- stop(): close all running agents after interrupt + memory shutdown,
  then call cleanup_all_environments() and cleanup_all_browsers() as
  a global catch-all
- _session_expiry_watcher(): close agents when sessions expire after
  the 5-minute idle timeout
- _handle_reset_command(): close the old agent before evicting it from
  cache on /new or /reset

Note: _evict_cached_agent() intentionally does NOT call close() because
it is also used for non-destructive cache refreshes (model switch,
branch, fallback) where tool resources should persist.

Ref: #7131

											
										
										
											2026-04-10 16:22:59 -03:00
-												security(gateway): isolate env/credential registries using ContextVars

											
										
										
											2026-04-06 16:05:15 +03:00
+								        try:
 								            from tools.env_passthrough import clear_env_passthrough
 								            clear_env_passthrough()
 								        except Exception:
 								            pass
 								        try:
 								            from tools.credential_files import clear_credential_files
 								            clear_credential_files()
 								        except Exception:
 								            pass
-												fix: clear session-scoped model after session reset

											
										
										
											2026-04-06 16:52:27 +02:00
+								        # Reset the session
 								        new_entry = self.session_store.reset_session(session_key)
-												fix: clear session-scoped model overrides during session reset

											
										
										
											2026-04-06 15:22:30 +02:00
+								        # Clear any session-scoped model override so the next agent picks up
 								        # the configured default instead of the previously switched model.
 								        self._session_model_overrides.pop(session_key, None)
-												fix: add gateway coverage for session boundary hooks, move test to tests/cli/

- Fire on_session_finalize and on_session_reset in gateway _handle_reset_command()
- Fire on_session_finalize during gateway stop() for each active agent
- Move CLI test from tests/ root to tests/cli/ (matches recent restructure)
- Add 5 gateway tests covering reset hooks, ordering, shutdown, and error handling
- Place on_session_reset after new session is guaranteed to exist (covers
  the get_or_create_session fallback path)

											
										
										
											2026-04-08 04:22:55 -07:00
+								        # Fire plugin on_session_finalize hook (session boundary)
 								        try:
 								            from hermes_cli.plugins import invoke_hook as _invoke_hook
 								            _old_sid = old_entry.session_id if old_entry else None
 								            _invoke_hook("on_session_finalize", session_id=_old_sid,
 								                         platform=source.platform.value if source.platform else "")
 								        except Exception:
 								            pass
-												feat(hooks): emit session:end lifecycle event (#1725)

Based on PR #1432 by @bayrakdarerdem. session:start was already on main; this adds the session:end event.

Co-authored-by: bayrakdarerdem <bayrakdarerdem@users.noreply.github.com>
											
										
										
											2026-03-17 04:17:44 -07:00
+								        # Emit session:end hook (session is ending)
 								        await self.hooks.emit("session:end", {
 								            "platform": source.platform.value if source.platform else "",
 								            "user_id": source.user_id,
 								            "session_key": session_key,
 								        })
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
+								        # Emit session:reset hook
 								        await self.hooks.emit("session:reset", {
 								            "platform": source.platform.value if source.platform else "",
 								            "user_id": source.user_id,
 								            "session_key": session_key,
 								        })
-												fix: add gateway coverage for session boundary hooks, move test to tests/cli/

- Fire on_session_finalize and on_session_reset in gateway _handle_reset_command()
- Fire on_session_finalize during gateway stop() for each active agent
- Move CLI test from tests/ root to tests/cli/ (matches recent restructure)
- Add 5 gateway tests covering reset hooks, ordering, shutdown, and error handling
- Place on_session_reset after new session is guaranteed to exist (covers
  the get_or_create_session fallback path)

											
										
										
											2026-04-08 04:22:55 -07:00
-												feat(gateway): surface session config on /new, /reset, and auto-reset (#3321)

When a new session starts in the gateway (via /new, /reset, or
auto-reset), send the user a summary of the detected configuration:

  ✨ Session reset! Starting fresh.

  ◆ Model: qwen3.5:27b-q4_K_M
  ◆ Provider: custom
  ◆ Context: 8K tokens (config)
  ◆ Endpoint: http://localhost:11434/v1

This makes misconfigured context length immediately visible — a user
running a local 8K model that falls to the 128K default will see:

  ◆ Context: 128K tokens (default — set model.context_length in config to override)

Instead of silently getting no compression and degrading responses.

- _format_session_info() resolves model, provider, context length,
  and endpoint from config + runtime, matching the hygiene code's
  resolution chain
- Local/custom endpoints shown; cloud endpoints hidden (not useful)
- Context source annotated: config, detected, or default with hint
- Appended to /new and /reset responses, and auto-reset notifications
- 9 tests covering all formatting paths and failure resilience

Addresses the user-facing side of #2708 — instead of trying to fix
every edge case in context detection, surface the values so users
can immediately see when something is wrong.
											
										
										
											2026-03-26 19:27:58 -07:00
+								        # Resolve session config info to surface to the user
 								        try:
 								            session_info = self._format_session_info()
 								        except Exception:
 								            session_info = ""
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        if new_entry:
-												feat(gateway): surface session config on /new, /reset, and auto-reset (#3321)

When a new session starts in the gateway (via /new, /reset, or
auto-reset), send the user a summary of the detected configuration:

  ✨ Session reset! Starting fresh.

  ◆ Model: qwen3.5:27b-q4_K_M
  ◆ Provider: custom
  ◆ Context: 8K tokens (config)
  ◆ Endpoint: http://localhost:11434/v1

This makes misconfigured context length immediately visible — a user
running a local 8K model that falls to the 128K default will see:

  ◆ Context: 128K tokens (default — set model.context_length in config to override)

Instead of silently getting no compression and degrading responses.

- _format_session_info() resolves model, provider, context length,
  and endpoint from config + runtime, matching the hygiene code's
  resolution chain
- Local/custom endpoints shown; cloud endpoints hidden (not useful)
- Context source annotated: config, detected, or default with hint
- Appended to /new and /reset responses, and auto-reset notifications
- 9 tests covering all formatting paths and failure resilience

Addresses the user-facing side of #2708 — instead of trying to fix
every edge case in context detection, surface the values so users
can immediately see when something is wrong.
											
										
										
											2026-03-26 19:27:58 -07:00
+								            header = "✨ Session reset! Starting fresh."
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        else:
 								            # No existing session, just create one
-												fix: add gateway coverage for session boundary hooks, move test to tests/cli/

- Fire on_session_finalize and on_session_reset in gateway _handle_reset_command()
- Fire on_session_finalize during gateway stop() for each active agent
- Move CLI test from tests/ root to tests/cli/ (matches recent restructure)
- Add 5 gateway tests covering reset hooks, ordering, shutdown, and error handling
- Place on_session_reset after new session is guaranteed to exist (covers
  the get_or_create_session fallback path)

											
										
										
											2026-04-08 04:22:55 -07:00
+								            new_entry = self.session_store.get_or_create_session(source, force_new=True)
-												feat(gateway): surface session config on /new, /reset, and auto-reset (#3321)

When a new session starts in the gateway (via /new, /reset, or
auto-reset), send the user a summary of the detected configuration:

  ✨ Session reset! Starting fresh.

  ◆ Model: qwen3.5:27b-q4_K_M
  ◆ Provider: custom
  ◆ Context: 8K tokens (config)
  ◆ Endpoint: http://localhost:11434/v1

This makes misconfigured context length immediately visible — a user
running a local 8K model that falls to the 128K default will see:

  ◆ Context: 128K tokens (default — set model.context_length in config to override)

Instead of silently getting no compression and degrading responses.

- _format_session_info() resolves model, provider, context length,
  and endpoint from config + runtime, matching the hygiene code's
  resolution chain
- Local/custom endpoints shown; cloud endpoints hidden (not useful)
- Context source annotated: config, detected, or default with hint
- Appended to /new and /reset responses, and auto-reset notifications
- 9 tests covering all formatting paths and failure resilience

Addresses the user-facing side of #2708 — instead of trying to fix
every edge case in context detection, surface the values so users
can immediately see when something is wrong.
											
										
										
											2026-03-26 19:27:58 -07:00
+								            header = "✨ New session started!"
-												fix: add gateway coverage for session boundary hooks, move test to tests/cli/

- Fire on_session_finalize and on_session_reset in gateway _handle_reset_command()
- Fire on_session_finalize during gateway stop() for each active agent
- Move CLI test from tests/ root to tests/cli/ (matches recent restructure)
- Add 5 gateway tests covering reset hooks, ordering, shutdown, and error handling
- Place on_session_reset after new session is guaranteed to exist (covers
  the get_or_create_session fallback path)

											
										
										
											2026-04-08 04:22:55 -07:00
+								        # Fire plugin on_session_reset hook (new session guaranteed to exist)
 								        try:
 								            from hermes_cli.plugins import invoke_hook as _invoke_hook
 								            _new_sid = new_entry.session_id if new_entry else None
 								            _invoke_hook("on_session_reset", session_id=_new_sid,
 								                         platform=source.platform.value if source.platform else "")
 								        except Exception:
 								            pass
-												feat(cli): show random tip on new session start (#8225)

Add a 'tip of the day' feature that displays a random one-liner about
Hermes Agent features on every new session — CLI startup, /clear, /new,
and gateway /new across all messaging platforms.

- New hermes_cli/tips.py module with 210 curated tips covering slash
  commands, keybindings, CLI flags, config options, tools, gateway
  platforms, profiles, sessions, memory, skills, cron, voice, security,
  and more
- CLI: tips display in skin-aware dim gold color after the welcome line
- Gateway: tips append to the /new and /reset response on all platforms
- Fully wrapped in try/except — tips are non-critical and never break
  startup or reset

Display format (CLI):
  ✦ Tip: /btw <question> asks a quick side question without tools or history.

Display format (gateway):
  ✨ Session reset! Starting fresh.
  ✦ Tip: hermes -c resumes your most recent CLI session.
											
										
										
											2026-04-12 00:34:01 -07:00
+								        # Append a random tip to the reset message
 								        try:
 								            from hermes_cli.tips import get_random_tip
 								            _tip_line = f"\n✦ Tip: {get_random_tip()}"
 								        except Exception:
 								            _tip_line = ""
-												feat(gateway): surface session config on /new, /reset, and auto-reset (#3321)

When a new session starts in the gateway (via /new, /reset, or
auto-reset), send the user a summary of the detected configuration:

  ✨ Session reset! Starting fresh.

  ◆ Model: qwen3.5:27b-q4_K_M
  ◆ Provider: custom
  ◆ Context: 8K tokens (config)
  ◆ Endpoint: http://localhost:11434/v1

This makes misconfigured context length immediately visible — a user
running a local 8K model that falls to the 128K default will see:

  ◆ Context: 128K tokens (default — set model.context_length in config to override)

Instead of silently getting no compression and degrading responses.

- _format_session_info() resolves model, provider, context length,
  and endpoint from config + runtime, matching the hygiene code's
  resolution chain
- Local/custom endpoints shown; cloud endpoints hidden (not useful)
- Context source annotated: config, detected, or default with hint
- Appended to /new and /reset responses, and auto-reset notifications
- 9 tests covering all formatting paths and failure resilience

Addresses the user-facing side of #2708 — instead of trying to fix
every edge case in context detection, surface the values so users
can immediately see when something is wrong.
											
										
										
											2026-03-26 19:27:58 -07:00
+								        if session_info:
-												feat(cli): show random tip on new session start (#8225)

Add a 'tip of the day' feature that displays a random one-liner about
Hermes Agent features on every new session — CLI startup, /clear, /new,
and gateway /new across all messaging platforms.

- New hermes_cli/tips.py module with 210 curated tips covering slash
  commands, keybindings, CLI flags, config options, tools, gateway
  platforms, profiles, sessions, memory, skills, cron, voice, security,
  and more
- CLI: tips display in skin-aware dim gold color after the welcome line
- Gateway: tips append to the /new and /reset response on all platforms
- Fully wrapped in try/except — tips are non-critical and never break
  startup or reset

Display format (CLI):
  ✦ Tip: /btw <question> asks a quick side question without tools or history.

Display format (gateway):
  ✨ Session reset! Starting fresh.
  ✦ Tip: hermes -c resumes your most recent CLI session.
											
										
										
											2026-04-12 00:34:01 -07:00
+								            return f"{header}\n\n{session_info}{_tip_line}"
 								        return f"{header}{_tip_line}"
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
-												feat: add /profile slash command to show active profile (#4027)

Adds /profile to COMMAND_REGISTRY (Info category) with handlers in
both CLI and gateway. Shows the active profile name and home directory.

Works on all platforms — CLI, Telegram, Discord, Slack, etc.
Detects profile by checking if HERMES_HOME is under ~/.hermes/profiles/.
Shows 'default' when running without a profile.
											
										
										
											2026-03-30 13:20:06 -07:00
+								    async def _handle_profile_command(self, event: MessageEvent) -> str:
 								        """Handle /profile — show active profile name and home directory."""
-												fix(profile): use existing get_active_profile_name() for /profile command

Replace inline Path.home() / '.hermes' / 'profiles' detection in both CLI
and gateway /profile handlers with the existing get_active_profile_name()
from hermes_cli.profiles — which already handles custom-root deployments,
standard profiles, and Docker layouts.

Fixes /profile incorrectly reporting 'default' when HERMES_HOME points to
a custom-root profile path like /opt/data/profiles/coder.

Based on PR #10484 by Xowiek.

											
										
										
											2026-04-15 17:38:41 -07:00
+								        from hermes_constants import display_hermes_home
 								        from hermes_cli.profiles import get_active_profile_name
-												feat: add /profile slash command to show active profile (#4027)

Adds /profile to COMMAND_REGISTRY (Info category) with handlers in
both CLI and gateway. Shows the active profile name and home directory.

Works on all platforms — CLI, Telegram, Discord, Slack, etc.
Detects profile by checking if HERMES_HOME is under ~/.hermes/profiles/.
Shows 'default' when running without a profile.
											
										
										
											2026-03-30 13:20:06 -07:00
 								        display = display_hermes_home()
-												fix(profile): use existing get_active_profile_name() for /profile command

Replace inline Path.home() / '.hermes' / 'profiles' detection in both CLI
and gateway /profile handlers with the existing get_active_profile_name()
from hermes_cli.profiles — which already handles custom-root deployments,
standard profiles, and Docker layouts.

Fixes /profile incorrectly reporting 'default' when HERMES_HOME points to
a custom-root profile path like /opt/data/profiles/coder.

Based on PR #10484 by Xowiek.

											
										
										
											2026-04-15 17:38:41 -07:00
+								        profile_name = get_active_profile_name()
-												feat: add /profile slash command to show active profile (#4027)

Adds /profile to COMMAND_REGISTRY (Info category) with handlers in
both CLI and gateway. Shows the active profile name and home directory.

Works on all platforms — CLI, Telegram, Discord, Slack, etc.
Detects profile by checking if HERMES_HOME is under ~/.hermes/profiles/.
Shows 'default' when running without a profile.
											
										
										
											2026-03-30 13:20:06 -07:00
-												fix(profile): use existing get_active_profile_name() for /profile command

Replace inline Path.home() / '.hermes' / 'profiles' detection in both CLI
and gateway /profile handlers with the existing get_active_profile_name()
from hermes_cli.profiles — which already handles custom-root deployments,
standard profiles, and Docker layouts.

Fixes /profile incorrectly reporting 'default' when HERMES_HOME points to
a custom-root profile path like /opt/data/profiles/coder.

Based on PR #10484 by Xowiek.

											
										
										
											2026-04-15 17:38:41 -07:00
+								        lines = [
 								            f"👤 **Profile:** `{profile_name}`",
 								            f"📂 **Home:** `{display}`",
 								        ]
-												feat: add /profile slash command to show active profile (#4027)

Adds /profile to COMMAND_REGISTRY (Info category) with handlers in
both CLI and gateway. Shows the active profile name and home directory.

Works on all platforms — CLI, Telegram, Discord, Slack, etc.
Detects profile by checking if HERMES_HOME is under ~/.hermes/profiles/.
Shows 'default' when running without a profile.
											
										
										
											2026-03-30 13:20:06 -07:00
 								        return "\n".join(lines)
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								    async def _handle_status_command(self, event: MessageEvent) -> str:
 								        """Handle /status command."""
 								        source = event.source
 								        session_entry = self.session_store.get_or_create_session(source)
-												fix(gateway): show full session id and title in /status

											
										
										
											2026-04-07 11:51:33 -07:00
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        connected_platforms = [p.value for p in self.adapters.keys()]
-												fix(gateway): show full session id and title in /status

											
										
										
											2026-04-07 11:51:33 -07:00
-												Implement interrupt handling for agent and CLI input and persistent prompt line at bottom of CLI :)

- Enhanced the AIAgent class to support interrupt requests, allowing for graceful interruption of ongoing tasks and processing of new messages.
- Updated the HermesCLI to manage user input in a persistent manner, enabling real-time interruption of the agent's conversation.
- Introduced a mechanism in the GatewayRunner to handle incoming messages while an agent is running, allowing for immediate response to user commands.
- Improved overall user experience by providing feedback during interruptions and ensuring that pending messages are processed correctly.

											
										
										
											2026-02-03 16:15:49 -08:00
+								        # Check if there's an active agent
 								        session_key = session_entry.session_key
 								        is_running = session_key in self._running_agents
-												fix(gateway): show full session id and title in /status

											
										
										
											2026-04-07 11:51:33 -07:00
 								        title = None
 								        if self._session_db:
 								            try:
 								                title = self._session_db.get_session_title(session_entry.session_id)
 								            except Exception:
 								                title = None
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        lines = [
 								            "📊 **Hermes Gateway Status**",
 								            "",
-												fix(gateway): show full session id and title in /status

											
										
										
											2026-04-07 11:51:33 -07:00
+								            f"**Session ID:** `{session_entry.session_id}`",
 								        ]
 								        if title:
 								            lines.append(f"**Title:** {title}")
 								        lines.extend([
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								            f"**Created:** {session_entry.created_at.strftime('%Y-%m-%d %H:%M')}",
 								            f"**Last Activity:** {session_entry.updated_at.strftime('%Y-%m-%d %H:%M')}",
 								            f"**Tokens:** {session_entry.total_tokens:,}",
-												Implement interrupt handling for agent and CLI input and persistent prompt line at bottom of CLI :)

- Enhanced the AIAgent class to support interrupt requests, allowing for graceful interruption of ongoing tasks and processing of new messages.
- Updated the HermesCLI to manage user input in a persistent manner, enabling real-time interruption of the agent's conversation.
- Introduced a mechanism in the GatewayRunner to handle incoming messages while an agent is running, allowing for immediate response to user commands.
- Improved overall user experience by providing feedback during interruptions and ensuring that pending messages are processed correctly.

											
										
										
											2026-02-03 16:15:49 -08:00
+								            f"**Agent Running:** {'Yes ⚡' if is_running else 'No'}",
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								            "",
 								            f"**Connected Platforms:** {', '.join(connected_platforms)}",
-												fix(gateway): show full session id and title in /status

											
										
										
											2026-04-07 11:51:33 -07:00
+								        ])
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        return "\n".join(lines)
-												Implement interrupt handling for agent and CLI input and persistent prompt line at bottom of CLI :)

- Enhanced the AIAgent class to support interrupt requests, allowing for graceful interruption of ongoing tasks and processing of new messages.
- Updated the HermesCLI to manage user input in a persistent manner, enabling real-time interruption of the agent's conversation.
- Introduced a mechanism in the GatewayRunner to handle incoming messages while an agent is running, allowing for immediate response to user commands.
- Improved overall user experience by providing feedback during interruptions and ensuring that pending messages are processed correctly.

											
										
										
											2026-02-03 16:15:49 -08:00
+								    async def _handle_stop_command(self, event: MessageEvent) -> str:
-												fix(gateway): recover from hung agents — /stop force-unlocks session (#3104)

When an agent thread hangs (truly blocked, never checks _interrupt_requested),
/stop now force-cleans _running_agents to unlock the session immediately.

Two changes:
- Early /stop intercept in the running-agent guard: bypasses normal command
  dispatch to force-interrupt and unlock the session. Follows the same pattern
  as the existing /new intercept.
- Sentinel /stop: force-cleans the sentinel instead of returning 'nothing to
  stop yet', so /stop during slow startup actually unlocks the session.

Follow-up improvements over original PR:
- Consolidated duplicate resolve_command imports into single early resolution
- Updated _handle_stop_command to also force-clean for consistency
- Removed 10-minute hard timeout on the executor (would kill legitimate
  long-running agent tasks; the /stop force-clean handles recovery)

Cherry-picked from Mibayy's PR #2498.

Co-authored-by: Mibayy <Mibayy@users.noreply.github.com>
											
										
										
											2026-03-25 18:46:50 -07:00
+								        """Handle /stop command - interrupt a running agent.
 								        When an agent is truly hung (blocked thread that never checks
 								        _interrupt_requested), the early intercept in _handle_message()
 								        handles /stop before this method is reached.  This handler fires
 								        only through normal command dispatch (no running agent) or as a
 								        fallback.  Force-clean the session lock in all cases for safety.
-												fix(gateway): break stuck session resume loops on restart (#7536)

Cherry-picked from PR #7747 with follow-up fixes:
- Narrowed suspend_all_active() to suspend_recently_active() — only
  suspends sessions updated within the last 2 minutes (likely in-flight),
  not all sessions which would unnecessarily reset idle users
- /stop with no running agent no longer suspends the session; only
  actual force-stops mark the session for reset

											
										
										
											2026-04-11 11:59:00 -07:00
-												fix(gateway): /stop no longer resets the session (#9224)

/stop was calling suspend_session() which marked the session for auto-reset
on the next message. This meant users lost their conversation history every
time they stopped a running agent — especially painful for untitled sessions
that can't be resumed by name.

Now /stop just interrupts the agent and cleans the session lock. The session
stays intact so users can continue the conversation.

The suspend behavior was introduced in #7536 to break stuck session resume
loops on gateway restart. That case is already handled by
suspend_recently_active() which runs at gateway startup, so removing it from
/stop doesn't regress the original fix.
											
										
										
											2026-04-13 14:59:05 -07:00
+								        The session is preserved so the user can continue the conversation.
-												fix(gateway): recover from hung agents — /stop force-unlocks session (#3104)

When an agent thread hangs (truly blocked, never checks _interrupt_requested),
/stop now force-cleans _running_agents to unlock the session immediately.

Two changes:
- Early /stop intercept in the running-agent guard: bypasses normal command
  dispatch to force-interrupt and unlock the session. Follows the same pattern
  as the existing /new intercept.
- Sentinel /stop: force-cleans the sentinel instead of returning 'nothing to
  stop yet', so /stop during slow startup actually unlocks the session.

Follow-up improvements over original PR:
- Consolidated duplicate resolve_command imports into single early resolution
- Updated _handle_stop_command to also force-clean for consistency
- Removed 10-minute hard timeout on the executor (would kill legitimate
  long-running agent tasks; the /stop force-clean handles recovery)

Cherry-picked from Mibayy's PR #2498.

Co-authored-by: Mibayy <Mibayy@users.noreply.github.com>
											
										
										
											2026-03-25 18:46:50 -07:00
+								        """
-												Implement interrupt handling for agent and CLI input and persistent prompt line at bottom of CLI :)

- Enhanced the AIAgent class to support interrupt requests, allowing for graceful interruption of ongoing tasks and processing of new messages.
- Updated the HermesCLI to manage user input in a persistent manner, enabling real-time interruption of the agent's conversation.
- Introduced a mechanism in the GatewayRunner to handle incoming messages while an agent is running, allowing for immediate response to user commands.
- Improved overall user experience by providing feedback during interruptions and ensuring that pending messages are processed correctly.

											
										
										
											2026-02-03 16:15:49 -08:00
+								        source = event.source
 								        session_entry = self.session_store.get_or_create_session(source)
 								        session_key = session_entry.session_key
-												fix(gateway): break stuck session resume loops on restart (#7536)

Cherry-picked from PR #7747 with follow-up fixes:
- Narrowed suspend_all_active() to suspend_recently_active() — only
  suspends sessions updated within the last 2 minutes (likely in-flight),
  not all sessions which would unnecessarily reset idle users
- /stop with no running agent no longer suspends the session; only
  actual force-stops mark the session for reset

											
										
										
											2026-04-11 11:59:00 -07:00
-												fix: harden sentinel guard for /stop during setup and shutdown

- /stop during sentinel returns helpful message instead of queuing
- Shutdown loop skips sentinel entries instead of catching AttributeError
- _handle_stop_command guards against sentinel (defensive)
- Added tests for both edge cases (7 total race guard tests)

											
										
										
											2026-03-19 18:26:09 -07:00
+								        agent = self._running_agents.get(session_key)
 								        if agent is _AGENT_PENDING_SENTINEL:
-												fix(gateway): recover from hung agents — /stop force-unlocks session (#3104)

When an agent thread hangs (truly blocked, never checks _interrupt_requested),
/stop now force-cleans _running_agents to unlock the session immediately.

Two changes:
- Early /stop intercept in the running-agent guard: bypasses normal command
  dispatch to force-interrupt and unlock the session. Follows the same pattern
  as the existing /new intercept.
- Sentinel /stop: force-cleans the sentinel instead of returning 'nothing to
  stop yet', so /stop during slow startup actually unlocks the session.

Follow-up improvements over original PR:
- Consolidated duplicate resolve_command imports into single early resolution
- Updated _handle_stop_command to also force-clean for consistency
- Removed 10-minute hard timeout on the executor (would kill legitimate
  long-running agent tasks; the /stop force-clean handles recovery)

Cherry-picked from Mibayy's PR #2498.

Co-authored-by: Mibayy <Mibayy@users.noreply.github.com>
											
										
										
											2026-03-25 18:46:50 -07:00
+								            # Force-clean the sentinel so the session is unlocked.
 								            if session_key in self._running_agents:
 								                del self._running_agents[session_key]
-												fix(gateway): /stop no longer resets the session (#9224)

/stop was calling suspend_session() which marked the session for auto-reset
on the next message. This meant users lost their conversation history every
time they stopped a running agent — especially painful for untitled sessions
that can't be resumed by name.

Now /stop just interrupts the agent and cleans the session lock. The session
stays intact so users can continue the conversation.

The suspend behavior was introduced in #7536 to break stuck session resume
loops on gateway restart. That case is already handled by
suspend_recently_active() which runs at gateway startup, so removing it from
/stop doesn't regress the original fix.
											
										
										
											2026-04-13 14:59:05 -07:00
+								            logger.info("STOP (pending) for session %s — sentinel cleared", session_key[:20])
 								            return "⚡ Stopped. The agent hadn't started yet — you can continue this session."
-												fix: harden sentinel guard for /stop during setup and shutdown

- /stop during sentinel returns helpful message instead of queuing
- Shutdown loop skips sentinel entries instead of catching AttributeError
- _handle_stop_command guards against sentinel (defensive)
- Added tests for both edge cases (7 total race guard tests)

											
										
										
											2026-03-19 18:26:09 -07:00
+								        if agent:
-												fix(gateway): recover from hung agents — /stop force-unlocks session (#3104)

When an agent thread hangs (truly blocked, never checks _interrupt_requested),
/stop now force-cleans _running_agents to unlock the session immediately.

Two changes:
- Early /stop intercept in the running-agent guard: bypasses normal command
  dispatch to force-interrupt and unlock the session. Follows the same pattern
  as the existing /new intercept.
- Sentinel /stop: force-cleans the sentinel instead of returning 'nothing to
  stop yet', so /stop during slow startup actually unlocks the session.

Follow-up improvements over original PR:
- Consolidated duplicate resolve_command imports into single early resolution
- Updated _handle_stop_command to also force-clean for consistency
- Removed 10-minute hard timeout on the executor (would kill legitimate
  long-running agent tasks; the /stop force-clean handles recovery)

Cherry-picked from Mibayy's PR #2498.

Co-authored-by: Mibayy <Mibayy@users.noreply.github.com>
											
										
										
											2026-03-25 18:46:50 -07:00
+								            agent.interrupt("Stop requested")
 								            # Force-clean the session lock so a truly hung agent doesn't
 								            # keep it locked forever.
 								            if session_key in self._running_agents:
 								                del self._running_agents[session_key]
-												fix(gateway): /stop no longer resets the session (#9224)

/stop was calling suspend_session() which marked the session for auto-reset
on the next message. This meant users lost their conversation history every
time they stopped a running agent — especially painful for untitled sessions
that can't be resumed by name.

Now /stop just interrupts the agent and cleans the session lock. The session
stays intact so users can continue the conversation.

The suspend behavior was introduced in #7536 to break stuck session resume
loops on gateway restart. That case is already handled by
suspend_recently_active() which runs at gateway startup, so removing it from
/stop doesn't regress the original fix.
											
										
										
											2026-04-13 14:59:05 -07:00
+								            return "⚡ Stopped. You can continue this session."
-												Implement interrupt handling for agent and CLI input and persistent prompt line at bottom of CLI :)

- Enhanced the AIAgent class to support interrupt requests, allowing for graceful interruption of ongoing tasks and processing of new messages.
- Updated the HermesCLI to manage user input in a persistent manner, enabling real-time interruption of the agent's conversation.
- Introduced a mechanism in the GatewayRunner to handle incoming messages while an agent is running, allowing for immediate response to user commands.
- Improved overall user experience by providing feedback during interruptions and ensuring that pending messages are processed correctly.

											
										
										
											2026-02-03 16:15:49 -08:00
+								        else:
 								            return "No active task to stop."
-												fix(gateway): drain in-flight work before restart

											
										
										
											2026-04-10 10:19:17 -07:00
 								    async def _handle_restart_command(self, event: MessageEvent) -> str:
 								        """Handle /restart command - drain active work, then restart the gateway."""
 								        if self._restart_requested or self._draining:
 								            count = self._running_agent_count()
 								            if count:
 								                return f"⏳ Draining {count} active agent(s) before restart..."
 								            return "⏳ Gateway restart already in progress..."
-												feat(gateway): notify /restart requester when gateway comes back online

When a user sends /restart, the gateway now persists their routing info
(platform, chat_id, thread_id) to .restart_notify.json. After the new
gateway process starts and adapters connect, it reads the file, sends a
'Gateway restarted successfully' message to that specific chat, and
cleans up the file.

This follows the same pattern as _send_update_notification (used by
/update). Thread IDs are preserved so the notification lands in the
correct Telegram topic or Discord thread.

Previously, after /restart the user had no feedback that the gateway was
back — they had to send a message to find out. Now they get a proactive
notification and know their session continues.

											
										
										
											2026-04-12 21:19:44 -07:00
+								        # Save the requester's routing info so the new gateway process can
 								        # notify them once it comes back online.
 								        try:
 								            import json as _json
 								            notify_data = {
 								                "platform": event.source.platform.value if event.source.platform else None,
 								                "chat_id": event.source.chat_id,
 								            }
 								            if event.source.thread_id:
 								                notify_data["thread_id"] = event.source.thread_id
 								            (_hermes_home / ".restart_notify.json").write_text(
 								                _json.dumps(notify_data)
 								            )
 								        except Exception as e:
 								            logger.debug("Failed to write restart notify file: %s", e)
-												fix(gateway): drain in-flight work before restart

											
										
										
											2026-04-10 10:19:17 -07:00
+								        active_agents = self._running_agent_count()
-												fix(gateway): /restart uses service restart under systemd instead of detached subprocess

The detached bash subprocess spawned by /restart gets killed by
systemd's KillMode=mixed cgroup cleanup, leaving the gateway dead.

Under systemd (detected via INVOCATION_ID env var), /restart now uses
via_service=True which exits with code 75 — RestartForceExitStatus=75
in the unit file makes systemd auto-restart the service. The detached
subprocess approach is preserved as fallback for non-systemd
environments (Docker, tmux, foreground mode).

											
										
										
											2026-04-12 22:32:19 -07:00
+								        # When running under a service manager (systemd/launchd), use the
 								        # service restart path: exit with code 75 so the service manager
 								        # restarts us.  The detached subprocess approach (setsid + bash)
 								        # doesn't work under systemd because KillMode=mixed kills all
 								        # processes in the cgroup, including the detached helper.
 								        _under_service = bool(os.environ.get("INVOCATION_ID"))  # systemd sets this
 								        if _under_service:
 								            self.request_restart(detached=False, via_service=True)
 								        else:
 								            self.request_restart(detached=True, via_service=False)
-												fix(gateway): drain in-flight work before restart

											
										
										
											2026-04-10 10:19:17 -07:00
+								        if active_agents:
 								            return f"⏳ Draining {active_agents} active agent(s) before restart..."
-												fix(gateway): improve /restart response with fallback instructions

											
										
										
											2026-04-12 22:34:23 -07:00
+								        return "♻ Restarting gateway. If you aren't notified within 60 seconds, restart from the console with `hermes gateway restart`."
-												fix(gateway): drain in-flight work before restart

											
										
										
											2026-04-10 10:19:17 -07:00
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
+								    async def _handle_help_command(self, event: MessageEvent) -> str:
 								        """Handle /help command - list available commands."""
-												refactor: centralize slash command registry (#1603)

* refactor: centralize slash command registry

Replace 7+ scattered command definition sites with a single
CommandDef registry in hermes_cli/commands.py. All downstream
consumers now derive from this registry:

- CLI process_command() resolves aliases via resolve_command()
- Gateway _known_commands uses GATEWAY_KNOWN_COMMANDS frozenset
- Gateway help text generated by gateway_help_lines()
- Telegram BotCommands generated by telegram_bot_commands()
- Slack subcommand map generated by slack_subcommand_map()

Adding a command or alias is now a one-line change to
COMMAND_REGISTRY instead of touching 6+ files.

Bugfixes included:
- Telegram now registers /rollback, /background (were missing)
- Slack now has /voice, /update, /reload-mcp (were missing)
- Gateway duplicate 'reasoning' dispatch (dead code) removed
- Gateway help text can no longer drift from CLI help

Backwards-compatible: COMMANDS and COMMANDS_BY_CATEGORY dicts are
rebuilt from the registry, so existing imports work unchanged.

* docs: update developer docs for centralized command registry

Update AGENTS.md with full 'Slash Command Registry' and 'Adding a
Slash Command' sections covering CommandDef fields, registry helpers,
and the one-line alias workflow.

Also update:
- CONTRIBUTING.md: commands.py description
- website/docs/reference/slash-commands.md: reference central registry
- docs/plans/centralize-command-registry.md: mark COMPLETED
- plans/checkpoint-rollback.md: reference new pattern
- hermes-agent-dev skill: architecture table

* chore: remove stale plan docs
											
										
										
											2026-03-16 23:21:03 -07:00
+								        from hermes_cli.commands import gateway_help_lines
-												feat(skills): implement dynamic skill slash commands for CLI and gateway

											
										
										
											2026-02-28 11:18:50 -08:00
+								        lines = [
 								            "📖 **Hermes Commands**\n",
-												refactor: centralize slash command registry (#1603)

* refactor: centralize slash command registry

Replace 7+ scattered command definition sites with a single
CommandDef registry in hermes_cli/commands.py. All downstream
consumers now derive from this registry:

- CLI process_command() resolves aliases via resolve_command()
- Gateway _known_commands uses GATEWAY_KNOWN_COMMANDS frozenset
- Gateway help text generated by gateway_help_lines()
- Telegram BotCommands generated by telegram_bot_commands()
- Slack subcommand map generated by slack_subcommand_map()

Adding a command or alias is now a one-line change to
COMMAND_REGISTRY instead of touching 6+ files.

Bugfixes included:
- Telegram now registers /rollback, /background (were missing)
- Slack now has /voice, /update, /reload-mcp (were missing)
- Gateway duplicate 'reasoning' dispatch (dead code) removed
- Gateway help text can no longer drift from CLI help

Backwards-compatible: COMMANDS and COMMANDS_BY_CATEGORY dicts are
rebuilt from the registry, so existing imports work unchanged.

* docs: update developer docs for centralized command registry

Update AGENTS.md with full 'Slash Command Registry' and 'Adding a
Slash Command' sections covering CommandDef fields, registry helpers,
and the one-line alias workflow.

Also update:
- CONTRIBUTING.md: commands.py description
- website/docs/reference/slash-commands.md: reference central registry
- docs/plans/centralize-command-registry.md: mark COMPLETED
- plans/checkpoint-rollback.md: reference new pattern
- hermes-agent-dev skill: architecture table

* chore: remove stale plan docs
											
										
										
											2026-03-16 23:21:03 -07:00
+								            *gateway_help_lines(),
-												feat(skills): implement dynamic skill slash commands for CLI and gateway

											
										
										
											2026-02-28 11:18:50 -08:00
+								        ]
 								        try:
 								            from agent.skill_commands import get_skill_commands
 								            skill_cmds = get_skill_commands()
 								            if skill_cmds:
-												feat(gateway): skill-aware slash commands, paginated /commands, Telegram 100-cap (#3934)

* feat(gateway): skill-aware slash commands, paginated /commands, Telegram 100-cap

Map active skills to Telegram's slash command menu so users can
discover and invoke skills directly. Three changes:

1. Telegram menu now includes active skill commands alongside built-in
   commands, capped at 100 entries (Telegram Bot API limit). Overflow
   commands remain callable but hidden from the picker. Logged at
   startup when cap is hit.

2. New /commands [page] gateway command for paginated browsing of all
   commands + skills. /help now shows first 10 skill commands and
   points to /commands for the full list.

3. When a user types a slash command that matches a disabled or
   uninstalled skill, they get actionable guidance:
   - Disabled: 'Enable it with: hermes skills config'
   - Optional (not installed): 'Install with: hermes skills install official/<path>'

Built on ideas from PR #3921 by @kshitijk4poor.

* chore: move 21 niche skills to optional-skills

Move specialized/niche skills from built-in (skills/) to optional
(optional-skills/) to reduce the default skill count. Users can
install them with: hermes skills install official/<category>/<name>

Moved skills (21):
- mlops: accelerate, chroma, faiss, flash-attention,
  hermes-atropos-environments, huggingface-tokenizers, instructor,
  lambda-labs, llava, nemo-curator, pinecone, pytorch-lightning,
  qdrant, saelens, simpo, slime, tensorrt-llm, torchtitan
- research: domain-intel, duckduckgo-search
- devops: inference-sh cli

Built-in skills: 96 → 75
Optional skills: 22 → 43

* fix: only include repo built-in skills in Telegram menu, not user-installed

User-installed skills (from hub or manually added) stay accessible via
/skills and by typing the command directly, but don't get registered
in the Telegram slash command picker. Only skills whose SKILL.md is
under the repo's skills/ directory are included in the menu.

This keeps the Telegram menu focused on the curated built-in set while
user-installed skills remain discoverable through /skills and /commands.
											
										
										
											2026-03-30 10:57:30 -07:00
+								                lines.append(f"\n⚡ **Skill Commands** ({len(skill_cmds)} active):")
 								                # Show first 10, then point to /commands for the rest
 								                sorted_cmds = sorted(skill_cmds)
 								                for cmd in sorted_cmds[:10]:
-												feat(skills): implement dynamic skill slash commands for CLI and gateway

											
										
										
											2026-02-28 11:18:50 -08:00
+								                    lines.append(f"`{cmd}` — {skill_cmds[cmd]['description']}")
-												feat(gateway): skill-aware slash commands, paginated /commands, Telegram 100-cap (#3934)

* feat(gateway): skill-aware slash commands, paginated /commands, Telegram 100-cap

Map active skills to Telegram's slash command menu so users can
discover and invoke skills directly. Three changes:

1. Telegram menu now includes active skill commands alongside built-in
   commands, capped at 100 entries (Telegram Bot API limit). Overflow
   commands remain callable but hidden from the picker. Logged at
   startup when cap is hit.

2. New /commands [page] gateway command for paginated browsing of all
   commands + skills. /help now shows first 10 skill commands and
   points to /commands for the full list.

3. When a user types a slash command that matches a disabled or
   uninstalled skill, they get actionable guidance:
   - Disabled: 'Enable it with: hermes skills config'
   - Optional (not installed): 'Install with: hermes skills install official/<path>'

Built on ideas from PR #3921 by @kshitijk4poor.

* chore: move 21 niche skills to optional-skills

Move specialized/niche skills from built-in (skills/) to optional
(optional-skills/) to reduce the default skill count. Users can
install them with: hermes skills install official/<category>/<name>

Moved skills (21):
- mlops: accelerate, chroma, faiss, flash-attention,
  hermes-atropos-environments, huggingface-tokenizers, instructor,
  lambda-labs, llava, nemo-curator, pinecone, pytorch-lightning,
  qdrant, saelens, simpo, slime, tensorrt-llm, torchtitan
- research: domain-intel, duckduckgo-search
- devops: inference-sh cli

Built-in skills: 96 → 75
Optional skills: 22 → 43

* fix: only include repo built-in skills in Telegram menu, not user-installed

User-installed skills (from hub or manually added) stay accessible via
/skills and by typing the command directly, but don't get registered
in the Telegram slash command picker. Only skills whose SKILL.md is
under the repo's skills/ directory are included in the menu.

This keeps the Telegram menu focused on the curated built-in set while
user-installed skills remain discoverable through /skills and /commands.
											
										
										
											2026-03-30 10:57:30 -07:00
+								                if len(sorted_cmds) > 10:
 								                    lines.append(f"\n... and {len(sorted_cmds) - 10} more. Use `/commands` for the full paginated list.")
-												feat(skills): implement dynamic skill slash commands for CLI and gateway

											
										
										
											2026-02-28 11:18:50 -08:00
+								        except Exception:
 								            pass
 								        return "\n".join(lines)
-												feat(gateway): skill-aware slash commands, paginated /commands, Telegram 100-cap (#3934)

* feat(gateway): skill-aware slash commands, paginated /commands, Telegram 100-cap

Map active skills to Telegram's slash command menu so users can
discover and invoke skills directly. Three changes:

1. Telegram menu now includes active skill commands alongside built-in
   commands, capped at 100 entries (Telegram Bot API limit). Overflow
   commands remain callable but hidden from the picker. Logged at
   startup when cap is hit.

2. New /commands [page] gateway command for paginated browsing of all
   commands + skills. /help now shows first 10 skill commands and
   points to /commands for the full list.

3. When a user types a slash command that matches a disabled or
   uninstalled skill, they get actionable guidance:
   - Disabled: 'Enable it with: hermes skills config'
   - Optional (not installed): 'Install with: hermes skills install official/<path>'

Built on ideas from PR #3921 by @kshitijk4poor.

* chore: move 21 niche skills to optional-skills

Move specialized/niche skills from built-in (skills/) to optional
(optional-skills/) to reduce the default skill count. Users can
install them with: hermes skills install official/<category>/<name>

Moved skills (21):
- mlops: accelerate, chroma, faiss, flash-attention,
  hermes-atropos-environments, huggingface-tokenizers, instructor,
  lambda-labs, llava, nemo-curator, pinecone, pytorch-lightning,
  qdrant, saelens, simpo, slime, tensorrt-llm, torchtitan
- research: domain-intel, duckduckgo-search
- devops: inference-sh cli

Built-in skills: 96 → 75
Optional skills: 22 → 43

* fix: only include repo built-in skills in Telegram menu, not user-installed

User-installed skills (from hub or manually added) stay accessible via
/skills and by typing the command directly, but don't get registered
in the Telegram slash command picker. Only skills whose SKILL.md is
under the repo's skills/ directory are included in the menu.

This keeps the Telegram menu focused on the curated built-in set while
user-installed skills remain discoverable through /skills and /commands.
											
										
										
											2026-03-30 10:57:30 -07:00
 								    async def _handle_commands_command(self, event: MessageEvent) -> str:
 								        """Handle /commands [page] - paginated list of all commands and skills."""
 								        from hermes_cli.commands import gateway_help_lines
 								        raw_args = event.get_command_args().strip()
 								        if raw_args:
 								            try:
 								                requested_page = int(raw_args)
 								            except ValueError:
 								                return "Usage: `/commands [page]`"
 								        else:
 								            requested_page = 1
 								        # Build combined entry list: built-in commands + skill commands
 								        entries = list(gateway_help_lines())
 								        try:
 								            from agent.skill_commands import get_skill_commands
 								            skill_cmds = get_skill_commands()
 								            if skill_cmds:
 								                entries.append("")
 								                entries.append("⚡ **Skill Commands**:")
 								                for cmd in sorted(skill_cmds):
 								                    desc = skill_cmds[cmd].get("description", "").strip() or "Skill command"
 								                    entries.append(f"`{cmd}` — {desc}")
 								        except Exception:
 								            pass
 								        if not entries:
 								            return "No commands available."
 								        from gateway.config import Platform
 								        page_size = 15 if event.source.platform == Platform.TELEGRAM else 20
 								        total_pages = max(1, (len(entries) + page_size - 1) // page_size)
 								        page = max(1, min(requested_page, total_pages))
 								        start = (page - 1) * page_size
 								        page_entries = entries[start:start + page_size]
 								        lines = [
 								            f"📚 **Commands** ({len(entries)} total, page {page}/{total_pages})",
 								            "",
 								            *page_entries,
 								        ]
 								        if total_pages > 1:
 								            nav_parts = []
 								            if page > 1:
 								                nav_parts.append(f"`/commands {page - 1}` ← prev")
 								            if page < total_pages:
 								                nav_parts.append(f"next → `/commands {page + 1}`")
 								            lines.extend(["", " | ".join(nav_parts)])
 								        if page != requested_page:
 								            lines.append(f"_(Requested page {requested_page} was out of range, showing page {page}.)_")
 								        return "\n".join(lines)
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
-												feat: interactive model picker for Telegram and Discord (#5742)

/model with no args now shows an interactive UI on Telegram and Discord
instead of a text list:

Telegram: Inline keyboard buttons — two-step drill-down.
  Step 1: Provider buttons with model counts (e.g. 'OpenRouter (15)')
  Step 2: Model buttons within the selected provider
  Edits the same message in-place as the user navigates.
  Back/Cancel buttons for navigation.

Discord: Embed + Select dropdown menus via discord.ui.View.
  Step 1: Provider dropdown with model counts
  Step 2: Model dropdown within the selected provider
  Back/Cancel buttons. Auth-gated to allowed users.

Platforms without picker support (Slack, WhatsApp, Signal, etc.)
fall back to the existing text list.

/model <name> continues to work as a direct text switch on all
platforms — the interactive picker is only for bare /model.

Implementation:
- TelegramAdapter.send_model_picker() + _handle_model_picker_callback()
  with compact callback_data (mp:/mm:/mb/mx, all within 64-byte limit)
- DiscordAdapter.send_model_picker() + ModelPickerView (discord.ui.View)
  with Select menus (up to 25 options per dropdown)
- GatewayRunner._handle_model_command() detects adapter capability via
  getattr(type(adapter), 'send_model_picker', None) (safe with mocks)
  and sends picker with async callback closure for the switch logic
- Callback performs full switch: switch_model(), cached agent update,
  session override, pending model note — same as /model <name>
											
										
										
											2026-04-06 23:00:04 -07:00
+								    async def _handle_model_command(self, event: MessageEvent) -> Optional[str]:
-												feat: /model command — models.dev primary database + --provider flag (#5181)

Full overhaul of the model/provider system.

## What changed
- models.dev (109 providers, 4000+ models) as primary database for provider identity AND model metadata
- --provider flag replaces colon syntax for explicit provider switching
- Full ModelInfo/ProviderInfo dataclasses with context, cost, capabilities, modalities
- HermesOverlay system merges models.dev + Hermes-specific transport/auth/aggregator flags
- User-defined endpoints via config.yaml providers: section
- /model (no args) lists authenticated providers with curated model catalog
- Rich metadata display: context window, max output, cost/M tokens, capabilities
- Config migration: custom_providers list → providers dict (v11→v12)
- AIAgent.switch_model() for in-place model swap preserving conversation

## Files
agent/models_dev.py, hermes_cli/providers.py, hermes_cli/model_switch.py,
hermes_cli/model_normalize.py, cli.py, gateway/run.py, run_agent.py,
hermes_cli/config.py, hermes_cli/commands.py
											
										
										
											2026-04-05 01:04:44 -07:00
+								        """Handle /model command — switch model for this session.
 								        Supports:
-												feat: interactive model picker for Telegram and Discord (#5742)

/model with no args now shows an interactive UI on Telegram and Discord
instead of a text list:

Telegram: Inline keyboard buttons — two-step drill-down.
  Step 1: Provider buttons with model counts (e.g. 'OpenRouter (15)')
  Step 2: Model buttons within the selected provider
  Edits the same message in-place as the user navigates.
  Back/Cancel buttons for navigation.

Discord: Embed + Select dropdown menus via discord.ui.View.
  Step 1: Provider dropdown with model counts
  Step 2: Model dropdown within the selected provider
  Back/Cancel buttons. Auth-gated to allowed users.

Platforms without picker support (Slack, WhatsApp, Signal, etc.)
fall back to the existing text list.

/model <name> continues to work as a direct text switch on all
platforms — the interactive picker is only for bare /model.

Implementation:
- TelegramAdapter.send_model_picker() + _handle_model_picker_callback()
  with compact callback_data (mp:/mm:/mb/mx, all within 64-byte limit)
- DiscordAdapter.send_model_picker() + ModelPickerView (discord.ui.View)
  with Select menus (up to 25 options per dropdown)
- GatewayRunner._handle_model_command() detects adapter capability via
  getattr(type(adapter), 'send_model_picker', None) (safe with mocks)
  and sends picker with async callback closure for the switch logic
- Callback performs full switch: switch_model(), cached agent update,
  session override, pending model note — same as /model <name>
											
										
										
											2026-04-06 23:00:04 -07:00
+								          /model                              — interactive picker (Telegram/Discord) or text list
-												feat: /model command — models.dev primary database + --provider flag (#5181)

Full overhaul of the model/provider system.

## What changed
- models.dev (109 providers, 4000+ models) as primary database for provider identity AND model metadata
- --provider flag replaces colon syntax for explicit provider switching
- Full ModelInfo/ProviderInfo dataclasses with context, cost, capabilities, modalities
- HermesOverlay system merges models.dev + Hermes-specific transport/auth/aggregator flags
- User-defined endpoints via config.yaml providers: section
- /model (no args) lists authenticated providers with curated model catalog
- Rich metadata display: context window, max output, cost/M tokens, capabilities
- Config migration: custom_providers list → providers dict (v11→v12)
- AIAgent.switch_model() for in-place model swap preserving conversation

## Files
agent/models_dev.py, hermes_cli/providers.py, hermes_cli/model_switch.py,
hermes_cli/model_normalize.py, cli.py, gateway/run.py, run_agent.py,
hermes_cli/config.py, hermes_cli/commands.py
											
										
										
											2026-04-05 01:04:44 -07:00
+								          /model <name>                       — switch for this session only
 								          /model <name> --global              — switch and persist to config.yaml
 								          /model <name> --provider <provider> — switch provider + model
 								          /model --provider <provider>        — switch to provider, auto-detect model
 								        """
 								        import yaml
 								        from hermes_cli.model_switch import (
 								            switch_model as _switch_model, parse_model_flags,
 								            list_authenticated_providers,
 								        )
 								        from hermes_cli.providers import get_label
 								        raw_args = event.get_command_args().strip()
 								        # Parse --provider and --global flags
 								        model_input, explicit_provider, persist_global = parse_model_flags(raw_args)
 								        # Read current model/provider from config
 								        current_model = ""
 								        current_provider = "openrouter"
 								        current_base_url = ""
 								        current_api_key = ""
 								        user_provs = None
-												fix: include custom_providers in /model command listings and resolution

Custom providers defined in config.yaml under  were
completely invisible to the /model command in both gateway (Telegram,
Discord, etc.) and CLI. The provider listing skipped them and explicit
switching via --provider failed with "Unknown provider".

Root cause: gateway/run.py, cli.py, and model_switch.py only read the
 dict from config, ignoring  entirely.

Changes:
- providers.py: add resolve_custom_provider() and extend
  resolve_provider_full() to check custom_providers after user_providers
- model_switch.py: propagate custom_providers through switch_model(),
  list_authenticated_providers(), and get_authenticated_provider_slugs();
  add custom provider section to provider listings
- gateway/run.py: read custom_providers from config, pass to all
  model-switch calls
- cli.py: hoist config loading, pass custom_providers to listing and
  switch calls

Tests: 4 new regression tests covering listing, resolution, and gateway
command handler. All 71 tests pass.

											
										
										
											2026-04-09 22:33:34 +02:00
+								        custom_provs = None
-												feat: /model command — models.dev primary database + --provider flag (#5181)

Full overhaul of the model/provider system.

## What changed
- models.dev (109 providers, 4000+ models) as primary database for provider identity AND model metadata
- --provider flag replaces colon syntax for explicit provider switching
- Full ModelInfo/ProviderInfo dataclasses with context, cost, capabilities, modalities
- HermesOverlay system merges models.dev + Hermes-specific transport/auth/aggregator flags
- User-defined endpoints via config.yaml providers: section
- /model (no args) lists authenticated providers with curated model catalog
- Rich metadata display: context window, max output, cost/M tokens, capabilities
- Config migration: custom_providers list → providers dict (v11→v12)
- AIAgent.switch_model() for in-place model swap preserving conversation

## Files
agent/models_dev.py, hermes_cli/providers.py, hermes_cli/model_switch.py,
hermes_cli/model_normalize.py, cli.py, gateway/run.py, run_agent.py,
hermes_cli/config.py, hermes_cli/commands.py
											
										
										
											2026-04-05 01:04:44 -07:00
+								        config_path = _hermes_home / "config.yaml"
 								        try:
 								            if config_path.exists():
 								                with open(config_path, encoding="utf-8") as f:
 								                    cfg = yaml.safe_load(f) or {}
 								                model_cfg = cfg.get("model", {})
 								                if isinstance(model_cfg, dict):
-												fix: /model --global writes model.name instead of model.default

The canonical config key for model name is model.default (used by setup,
auth, runtime_provider, profile list, and CLI startup). But /model --global
wrote to model.name in both gateway and CLI paths.

This caused:
- hermes profile list showing the old model (reads model.default)
- Gateway restart reverting to the old model (_resolve_gateway_model reads model.default)
- CLI startup using the old model (main.py reads model.default)

The only reason it appeared to work in Telegram was the cached agent
staying alive with the in-place switch.

Fix: change all 3 write/read sites to use model.default.

											
										
										
											2026-04-06 15:19:12 +02:00
+								                    current_model = model_cfg.get("default", "")
-												feat: /model command — models.dev primary database + --provider flag (#5181)

Full overhaul of the model/provider system.

## What changed
- models.dev (109 providers, 4000+ models) as primary database for provider identity AND model metadata
- --provider flag replaces colon syntax for explicit provider switching
- Full ModelInfo/ProviderInfo dataclasses with context, cost, capabilities, modalities
- HermesOverlay system merges models.dev + Hermes-specific transport/auth/aggregator flags
- User-defined endpoints via config.yaml providers: section
- /model (no args) lists authenticated providers with curated model catalog
- Rich metadata display: context window, max output, cost/M tokens, capabilities
- Config migration: custom_providers list → providers dict (v11→v12)
- AIAgent.switch_model() for in-place model swap preserving conversation

## Files
agent/models_dev.py, hermes_cli/providers.py, hermes_cli/model_switch.py,
hermes_cli/model_normalize.py, cli.py, gateway/run.py, run_agent.py,
hermes_cli/config.py, hermes_cli/commands.py
											
										
										
											2026-04-05 01:04:44 -07:00
+								                    current_provider = model_cfg.get("provider", current_provider)
 								                    current_base_url = model_cfg.get("base_url", "")
 								                user_provs = cfg.get("providers")
-												fix(config): restore custom providers after v11→v12 migration

The v11→v12 migration converts custom_providers (list) into providers
(dict), then deletes the list. But all runtime resolvers read from
custom_providers — after migration, named custom endpoints silently stop
resolving and fallback chains fail with AuthError.

Add get_compatible_custom_providers() that reads from both config schemas
(legacy custom_providers list + v12+ providers dict), normalizes entries,
deduplicates, and returns a unified list. Update ALL consumers:

- hermes_cli/runtime_provider.py: _get_named_custom_provider() + key_env
- hermes_cli/auth_commands.py: credential pool provider names
- hermes_cli/main.py: model picker + _model_flow_named_custom()
- agent/auxiliary_client.py: key_env + custom_entry model fallback
- agent/credential_pool.py: _iter_custom_providers()
- cli.py + gateway/run.py: /model switch custom_providers passthrough
- run_agent.py + gateway/run.py: per-model context_length lookup

Also: use config.pop() instead of del for safer migration, fix stale
_config_version assertions in tests, add pool mock to codex test.

Co-authored-by: 墨綠BG <s5460703@gmail.com>
Closes #8776, salvaged from PR #8814

											
										
										
											2026-04-13 05:26:32 -07:00
+								                try:
 								                    from hermes_cli.config import get_compatible_custom_providers
 								                    custom_provs = get_compatible_custom_providers(cfg)
 								                except Exception:
 								                    custom_provs = cfg.get("custom_providers")
-												feat: /model command — models.dev primary database + --provider flag (#5181)

Full overhaul of the model/provider system.

## What changed
- models.dev (109 providers, 4000+ models) as primary database for provider identity AND model metadata
- --provider flag replaces colon syntax for explicit provider switching
- Full ModelInfo/ProviderInfo dataclasses with context, cost, capabilities, modalities
- HermesOverlay system merges models.dev + Hermes-specific transport/auth/aggregator flags
- User-defined endpoints via config.yaml providers: section
- /model (no args) lists authenticated providers with curated model catalog
- Rich metadata display: context window, max output, cost/M tokens, capabilities
- Config migration: custom_providers list → providers dict (v11→v12)
- AIAgent.switch_model() for in-place model swap preserving conversation

## Files
agent/models_dev.py, hermes_cli/providers.py, hermes_cli/model_switch.py,
hermes_cli/model_normalize.py, cli.py, gateway/run.py, run_agent.py,
hermes_cli/config.py, hermes_cli/commands.py
											
										
										
											2026-04-05 01:04:44 -07:00
+								        except Exception:
 								            pass
 								        # Check for session override
 								        source = event.source
 								        session_key = self._session_key_for_source(source)
-												fix(gateway): address restart review feedback

											
										
										
											2026-04-10 14:00:21 -07:00
+								        override = self._session_model_overrides.get(session_key, {})
-												feat: /model command — models.dev primary database + --provider flag (#5181)

Full overhaul of the model/provider system.

## What changed
- models.dev (109 providers, 4000+ models) as primary database for provider identity AND model metadata
- --provider flag replaces colon syntax for explicit provider switching
- Full ModelInfo/ProviderInfo dataclasses with context, cost, capabilities, modalities
- HermesOverlay system merges models.dev + Hermes-specific transport/auth/aggregator flags
- User-defined endpoints via config.yaml providers: section
- /model (no args) lists authenticated providers with curated model catalog
- Rich metadata display: context window, max output, cost/M tokens, capabilities
- Config migration: custom_providers list → providers dict (v11→v12)
- AIAgent.switch_model() for in-place model swap preserving conversation

## Files
agent/models_dev.py, hermes_cli/providers.py, hermes_cli/model_switch.py,
hermes_cli/model_normalize.py, cli.py, gateway/run.py, run_agent.py,
hermes_cli/config.py, hermes_cli/commands.py
											
										
										
											2026-04-05 01:04:44 -07:00
+								        if override:
 								            current_model = override.get("model", current_model)
 								            current_provider = override.get("provider", current_provider)
 								            current_base_url = override.get("base_url", current_base_url)
 								            current_api_key = override.get("api_key", current_api_key)
-												feat: interactive model picker for Telegram and Discord (#5742)

/model with no args now shows an interactive UI on Telegram and Discord
instead of a text list:

Telegram: Inline keyboard buttons — two-step drill-down.
  Step 1: Provider buttons with model counts (e.g. 'OpenRouter (15)')
  Step 2: Model buttons within the selected provider
  Edits the same message in-place as the user navigates.
  Back/Cancel buttons for navigation.

Discord: Embed + Select dropdown menus via discord.ui.View.
  Step 1: Provider dropdown with model counts
  Step 2: Model dropdown within the selected provider
  Back/Cancel buttons. Auth-gated to allowed users.

Platforms without picker support (Slack, WhatsApp, Signal, etc.)
fall back to the existing text list.

/model <name> continues to work as a direct text switch on all
platforms — the interactive picker is only for bare /model.

Implementation:
- TelegramAdapter.send_model_picker() + _handle_model_picker_callback()
  with compact callback_data (mp:/mm:/mb/mx, all within 64-byte limit)
- DiscordAdapter.send_model_picker() + ModelPickerView (discord.ui.View)
  with Select menus (up to 25 options per dropdown)
- GatewayRunner._handle_model_command() detects adapter capability via
  getattr(type(adapter), 'send_model_picker', None) (safe with mocks)
  and sends picker with async callback closure for the switch logic
- Callback performs full switch: switch_model(), cached agent update,
  session override, pending model note — same as /model <name>
											
										
										
											2026-04-06 23:00:04 -07:00
+								        # No args: show interactive picker (Telegram/Discord) or text list
-												feat: /model command — models.dev primary database + --provider flag (#5181)

Full overhaul of the model/provider system.

## What changed
- models.dev (109 providers, 4000+ models) as primary database for provider identity AND model metadata
- --provider flag replaces colon syntax for explicit provider switching
- Full ModelInfo/ProviderInfo dataclasses with context, cost, capabilities, modalities
- HermesOverlay system merges models.dev + Hermes-specific transport/auth/aggregator flags
- User-defined endpoints via config.yaml providers: section
- /model (no args) lists authenticated providers with curated model catalog
- Rich metadata display: context window, max output, cost/M tokens, capabilities
- Config migration: custom_providers list → providers dict (v11→v12)
- AIAgent.switch_model() for in-place model swap preserving conversation

## Files
agent/models_dev.py, hermes_cli/providers.py, hermes_cli/model_switch.py,
hermes_cli/model_normalize.py, cli.py, gateway/run.py, run_agent.py,
hermes_cli/config.py, hermes_cli/commands.py
											
										
										
											2026-04-05 01:04:44 -07:00
+								        if not model_input and not explicit_provider:
-												feat: interactive model picker for Telegram and Discord (#5742)

/model with no args now shows an interactive UI on Telegram and Discord
instead of a text list:

Telegram: Inline keyboard buttons — two-step drill-down.
  Step 1: Provider buttons with model counts (e.g. 'OpenRouter (15)')
  Step 2: Model buttons within the selected provider
  Edits the same message in-place as the user navigates.
  Back/Cancel buttons for navigation.

Discord: Embed + Select dropdown menus via discord.ui.View.
  Step 1: Provider dropdown with model counts
  Step 2: Model dropdown within the selected provider
  Back/Cancel buttons. Auth-gated to allowed users.

Platforms without picker support (Slack, WhatsApp, Signal, etc.)
fall back to the existing text list.

/model <name> continues to work as a direct text switch on all
platforms — the interactive picker is only for bare /model.

Implementation:
- TelegramAdapter.send_model_picker() + _handle_model_picker_callback()
  with compact callback_data (mp:/mm:/mb/mx, all within 64-byte limit)
- DiscordAdapter.send_model_picker() + ModelPickerView (discord.ui.View)
  with Select menus (up to 25 options per dropdown)
- GatewayRunner._handle_model_command() detects adapter capability via
  getattr(type(adapter), 'send_model_picker', None) (safe with mocks)
  and sends picker with async callback closure for the switch logic
- Callback performs full switch: switch_model(), cached agent update,
  session override, pending model note — same as /model <name>
											
										
										
											2026-04-06 23:00:04 -07:00
+								            # Try interactive picker if the platform supports it
 								            adapter = self.adapters.get(source.platform)
 								            has_picker = (
 								                adapter is not None
 								                and getattr(type(adapter), "send_model_picker", None) is not None
 								            )
 								            if has_picker:
 								                try:
 								                    providers = list_authenticated_providers(
 								                        current_provider=current_provider,
 								                        user_providers=user_provs,
-												fix: include custom_providers in /model command listings and resolution

Custom providers defined in config.yaml under  were
completely invisible to the /model command in both gateway (Telegram,
Discord, etc.) and CLI. The provider listing skipped them and explicit
switching via --provider failed with "Unknown provider".

Root cause: gateway/run.py, cli.py, and model_switch.py only read the
 dict from config, ignoring  entirely.

Changes:
- providers.py: add resolve_custom_provider() and extend
  resolve_provider_full() to check custom_providers after user_providers
- model_switch.py: propagate custom_providers through switch_model(),
  list_authenticated_providers(), and get_authenticated_provider_slugs();
  add custom provider section to provider listings
- gateway/run.py: read custom_providers from config, pass to all
  model-switch calls
- cli.py: hoist config loading, pass custom_providers to listing and
  switch calls

Tests: 4 new regression tests covering listing, resolution, and gateway
command handler. All 71 tests pass.

											
										
										
											2026-04-09 22:33:34 +02:00
+								                        custom_providers=custom_provs,
-												feat(telegram): paginated model picker with Next/Prev navigation

- Raise max_models from 8 to 50 so all curated models come through
- Add _build_model_keyboard() helper with 8-per-page pagination
- Next ▶ / ◀ Prev buttons with page counter (e.g. 2/4)
- mg:<page> callback data for page navigation
- Catch-all query.answer() for noop buttons

											
										
										
											2026-04-06 23:10:40 -07:00
+								                        max_models=50,
-												feat: interactive model picker for Telegram and Discord (#5742)

/model with no args now shows an interactive UI on Telegram and Discord
instead of a text list:

Telegram: Inline keyboard buttons — two-step drill-down.
  Step 1: Provider buttons with model counts (e.g. 'OpenRouter (15)')
  Step 2: Model buttons within the selected provider
  Edits the same message in-place as the user navigates.
  Back/Cancel buttons for navigation.

Discord: Embed + Select dropdown menus via discord.ui.View.
  Step 1: Provider dropdown with model counts
  Step 2: Model dropdown within the selected provider
  Back/Cancel buttons. Auth-gated to allowed users.

Platforms without picker support (Slack, WhatsApp, Signal, etc.)
fall back to the existing text list.

/model <name> continues to work as a direct text switch on all
platforms — the interactive picker is only for bare /model.

Implementation:
- TelegramAdapter.send_model_picker() + _handle_model_picker_callback()
  with compact callback_data (mp:/mm:/mb/mx, all within 64-byte limit)
- DiscordAdapter.send_model_picker() + ModelPickerView (discord.ui.View)
  with Select menus (up to 25 options per dropdown)
- GatewayRunner._handle_model_command() detects adapter capability via
  getattr(type(adapter), 'send_model_picker', None) (safe with mocks)
  and sends picker with async callback closure for the switch logic
- Callback performs full switch: switch_model(), cached agent update,
  session override, pending model note — same as /model <name>
											
										
										
											2026-04-06 23:00:04 -07:00
+								                    )
 								                except Exception:
 								                    providers = []
 								                if providers:
 								                    # Build a callback closure for when the user picks a model.
 								                    # Captures self + locals needed for the switch logic.
 								                    _self = self
 								                    _session_key = session_key
 								                    _cur_model = current_model
 								                    _cur_provider = current_provider
 								                    _cur_base_url = current_base_url
 								                    _cur_api_key = current_api_key
 								                    async def _on_model_selected(
 								                        _chat_id: str, model_id: str, provider_slug: str
 								                    ) -> str:
 								                        """Perform the model switch and return confirmation text."""
 								                        result = _switch_model(
 								                            raw_input=model_id,
 								                            current_provider=_cur_provider,
 								                            current_model=_cur_model,
 								                            current_base_url=_cur_base_url,
 								                            current_api_key=_cur_api_key,
 								                            is_global=False,
 								                            explicit_provider=provider_slug,
-												fix: include custom_providers in /model command listings and resolution

Custom providers defined in config.yaml under  were
completely invisible to the /model command in both gateway (Telegram,
Discord, etc.) and CLI. The provider listing skipped them and explicit
switching via --provider failed with "Unknown provider".

Root cause: gateway/run.py, cli.py, and model_switch.py only read the
 dict from config, ignoring  entirely.

Changes:
- providers.py: add resolve_custom_provider() and extend
  resolve_provider_full() to check custom_providers after user_providers
- model_switch.py: propagate custom_providers through switch_model(),
  list_authenticated_providers(), and get_authenticated_provider_slugs();
  add custom provider section to provider listings
- gateway/run.py: read custom_providers from config, pass to all
  model-switch calls
- cli.py: hoist config loading, pass custom_providers to listing and
  switch calls

Tests: 4 new regression tests covering listing, resolution, and gateway
command handler. All 71 tests pass.

											
										
										
											2026-04-09 22:33:34 +02:00
+								                            user_providers=user_provs,
 								                            custom_providers=custom_provs,
-												feat: interactive model picker for Telegram and Discord (#5742)

/model with no args now shows an interactive UI on Telegram and Discord
instead of a text list:

Telegram: Inline keyboard buttons — two-step drill-down.
  Step 1: Provider buttons with model counts (e.g. 'OpenRouter (15)')
  Step 2: Model buttons within the selected provider
  Edits the same message in-place as the user navigates.
  Back/Cancel buttons for navigation.

Discord: Embed + Select dropdown menus via discord.ui.View.
  Step 1: Provider dropdown with model counts
  Step 2: Model dropdown within the selected provider
  Back/Cancel buttons. Auth-gated to allowed users.

Platforms without picker support (Slack, WhatsApp, Signal, etc.)
fall back to the existing text list.

/model <name> continues to work as a direct text switch on all
platforms — the interactive picker is only for bare /model.

Implementation:
- TelegramAdapter.send_model_picker() + _handle_model_picker_callback()
  with compact callback_data (mp:/mm:/mb/mx, all within 64-byte limit)
- DiscordAdapter.send_model_picker() + ModelPickerView (discord.ui.View)
  with Select menus (up to 25 options per dropdown)
- GatewayRunner._handle_model_command() detects adapter capability via
  getattr(type(adapter), 'send_model_picker', None) (safe with mocks)
  and sends picker with async callback closure for the switch logic
- Callback performs full switch: switch_model(), cached agent update,
  session override, pending model note — same as /model <name>
											
										
										
											2026-04-06 23:00:04 -07:00
+								                        )
 								                        if not result.success:
 								                            return f"Error: {result.error_message}"
 								                        # Update cached agent in-place
 								                        cached_entry = None
 								                        _cache_lock = getattr(_self, "_agent_cache_lock", None)
 								                        _cache = getattr(_self, "_agent_cache", None)
 								                        if _cache_lock and _cache is not None:
 								                            with _cache_lock:
 								                                cached_entry = _cache.get(_session_key)
 								                        if cached_entry and cached_entry[0] is not None:
 								                            try:
 								                                cached_entry[0].switch_model(
 								                                    new_model=result.new_model,
 								                                    new_provider=result.target_provider,
 								                                    api_key=result.api_key,
 								                                    base_url=result.base_url,
 								                                    api_mode=result.api_mode,
 								                                )
 								                            except Exception as exc:
 								                                logger.warning("Picker model switch failed for cached agent: %s", exc)
 								                        # Store model note + session override
 								                        if not hasattr(_self, "_pending_model_notes"):
 								                            _self._pending_model_notes = {}
 								                        _self._pending_model_notes[_session_key] = (
 								                            f"[Note: model was just switched from {_cur_model} to {result.new_model} "
 								                            f"via {result.provider_label or result.target_provider}. "
 								                            f"Adjust your self-identification accordingly.]"
 								                        )
 								                        _self._session_model_overrides[_session_key] = {
 								                            "model": result.new_model,
 								                            "provider": result.target_provider,
 								                            "api_key": result.api_key,
 								                            "base_url": result.base_url,
 								                            "api_mode": result.api_mode,
 								                        }
-												fix(gateway): evict cached agent on /model switch + add diagnostic logging (#8276)

After /model switches the model (both picker and text paths), the cached
agent's config signature becomes stale — the agent was updated in-place
via switch_model() but the cache tuple's signature was never refreshed.
The next turn *should* detect the signature mismatch and create a fresh
agent, but this relies on the new model's signature differing from the
old one in _agent_config_signature().

Evicting the cached agent explicitly after storing the session override
is more defensive — the next turn is guaranteed to create a fresh agent
from the override without depending on signature mismatch detection.

Also adds debug logging at three key decision points so we can trace
exactly what happens when /model + /retry interact:
- _resolve_session_agent_runtime: which override path is taken (fast
  with api_key vs fallback), or why no override was found
- _run_agent.run_sync: final resolved model/provider before agent
  creation

Reported: /model switch to xiaomi/mimo-v2-pro followed by /retry still
used the old model (glm-5.1).
											
										
										
											2026-04-12 01:58:17 -07:00
+								                        # Evict cached agent so the next turn creates a fresh
 								                        # agent from the override rather than relying on the
 								                        # stale cache signature to trigger a rebuild.
 								                        _self._evict_cached_agent(_session_key)
-												feat: interactive model picker for Telegram and Discord (#5742)

/model with no args now shows an interactive UI on Telegram and Discord
instead of a text list:

Telegram: Inline keyboard buttons — two-step drill-down.
  Step 1: Provider buttons with model counts (e.g. 'OpenRouter (15)')
  Step 2: Model buttons within the selected provider
  Edits the same message in-place as the user navigates.
  Back/Cancel buttons for navigation.

Discord: Embed + Select dropdown menus via discord.ui.View.
  Step 1: Provider dropdown with model counts
  Step 2: Model dropdown within the selected provider
  Back/Cancel buttons. Auth-gated to allowed users.

Platforms without picker support (Slack, WhatsApp, Signal, etc.)
fall back to the existing text list.

/model <name> continues to work as a direct text switch on all
platforms — the interactive picker is only for bare /model.

Implementation:
- TelegramAdapter.send_model_picker() + _handle_model_picker_callback()
  with compact callback_data (mp:/mm:/mb/mx, all within 64-byte limit)
- DiscordAdapter.send_model_picker() + ModelPickerView (discord.ui.View)
  with Select menus (up to 25 options per dropdown)
- GatewayRunner._handle_model_command() detects adapter capability via
  getattr(type(adapter), 'send_model_picker', None) (safe with mocks)
  and sends picker with async callback closure for the switch logic
- Callback performs full switch: switch_model(), cached agent update,
  session override, pending model note — same as /model <name>
											
										
										
											2026-04-06 23:00:04 -07:00
+								                        # Build confirmation text
 								                        plabel = result.provider_label or result.target_provider
 								                        lines = [f"Model switched to `{result.new_model}`"]
 								                        lines.append(f"Provider: {plabel}")
 								                        mi = result.model_info
 								                        if mi:
 								                            if mi.context_window:
 								                                lines.append(f"Context: {mi.context_window:,} tokens")
 								                            if mi.max_output:
 								                                lines.append(f"Max output: {mi.max_output:,} tokens")
 								                            if mi.has_cost_data():
 								                                lines.append(f"Cost: {mi.format_cost()}")
 								                            lines.append(f"Capabilities: {mi.format_capabilities()}")
 								                        lines.append("_(session only — use `/model <name> --global` to persist)_")
 								                        return "\n".join(lines)
 								                    metadata = {"thread_id": source.thread_id} if source.thread_id else None
 								                    result = await adapter.send_model_picker(
 								                        chat_id=source.chat_id,
 								                        providers=providers,
 								                        current_model=current_model,
 								                        current_provider=current_provider,
 								                        session_key=session_key,
 								                        on_model_selected=_on_model_selected,
 								                        metadata=metadata,
 								                    )
 								                    if result.success:
 								                        return None  # Picker sent — adapter handles the response
 								            # Fallback: text list (for platforms without picker or if picker failed)
-												feat: /model command — models.dev primary database + --provider flag (#5181)

Full overhaul of the model/provider system.

## What changed
- models.dev (109 providers, 4000+ models) as primary database for provider identity AND model metadata
- --provider flag replaces colon syntax for explicit provider switching
- Full ModelInfo/ProviderInfo dataclasses with context, cost, capabilities, modalities
- HermesOverlay system merges models.dev + Hermes-specific transport/auth/aggregator flags
- User-defined endpoints via config.yaml providers: section
- /model (no args) lists authenticated providers with curated model catalog
- Rich metadata display: context window, max output, cost/M tokens, capabilities
- Config migration: custom_providers list → providers dict (v11→v12)
- AIAgent.switch_model() for in-place model swap preserving conversation

## Files
agent/models_dev.py, hermes_cli/providers.py, hermes_cli/model_switch.py,
hermes_cli/model_normalize.py, cli.py, gateway/run.py, run_agent.py,
hermes_cli/config.py, hermes_cli/commands.py
											
										
										
											2026-04-05 01:04:44 -07:00
+								            provider_label = get_label(current_provider)
 								            lines = [f"Current: `{current_model or 'unknown'}` on {provider_label}", ""]
 								            try:
 								                providers = list_authenticated_providers(
 								                    current_provider=current_provider,
 								                    user_providers=user_provs,
-												fix: include custom_providers in /model command listings and resolution

Custom providers defined in config.yaml under  were
completely invisible to the /model command in both gateway (Telegram,
Discord, etc.) and CLI. The provider listing skipped them and explicit
switching via --provider failed with "Unknown provider".

Root cause: gateway/run.py, cli.py, and model_switch.py only read the
 dict from config, ignoring  entirely.

Changes:
- providers.py: add resolve_custom_provider() and extend
  resolve_provider_full() to check custom_providers after user_providers
- model_switch.py: propagate custom_providers through switch_model(),
  list_authenticated_providers(), and get_authenticated_provider_slugs();
  add custom provider section to provider listings
- gateway/run.py: read custom_providers from config, pass to all
  model-switch calls
- cli.py: hoist config loading, pass custom_providers to listing and
  switch calls

Tests: 4 new regression tests covering listing, resolution, and gateway
command handler. All 71 tests pass.

											
										
										
											2026-04-09 22:33:34 +02:00
+								                    custom_providers=custom_provs,
-												feat: /model command — models.dev primary database + --provider flag (#5181)

Full overhaul of the model/provider system.

## What changed
- models.dev (109 providers, 4000+ models) as primary database for provider identity AND model metadata
- --provider flag replaces colon syntax for explicit provider switching
- Full ModelInfo/ProviderInfo dataclasses with context, cost, capabilities, modalities
- HermesOverlay system merges models.dev + Hermes-specific transport/auth/aggregator flags
- User-defined endpoints via config.yaml providers: section
- /model (no args) lists authenticated providers with curated model catalog
- Rich metadata display: context window, max output, cost/M tokens, capabilities
- Config migration: custom_providers list → providers dict (v11→v12)
- AIAgent.switch_model() for in-place model swap preserving conversation

## Files
agent/models_dev.py, hermes_cli/providers.py, hermes_cli/model_switch.py,
hermes_cli/model_normalize.py, cli.py, gateway/run.py, run_agent.py,
hermes_cli/config.py, hermes_cli/commands.py
											
										
										
											2026-04-05 01:04:44 -07:00
+								                    max_models=5,
 								                )
 								                for p in providers:
 								                    tag = " (current)" if p["is_current"] else ""
 								                    lines.append(f"**{p['name']}** `--provider {p['slug']}`{tag}:")
 								                    if p["models"]:
 								                        model_strs = ", ".join(f"`{m}`" for m in p["models"])
 								                        extra = f" (+{p['total_models'] - len(p['models'])} more)" if p["total_models"] > len(p["models"]) else ""
 								                        lines.append(f"  {model_strs}{extra}")
 								                    elif p.get("api_url"):
 								                        lines.append(f"  `{p['api_url']}`")
 								                    lines.append("")
 								            except Exception:
 								                pass
 								            lines.append("`/model <name>` — switch model")
 								            lines.append("`/model <name> --provider <slug>` — switch provider")
 								            lines.append("`/model <name> --global` — persist")
 								            return "\n".join(lines)
 								        # Perform the switch
 								        result = _switch_model(
 								            raw_input=model_input,
 								            current_provider=current_provider,
 								            current_model=current_model,
 								            current_base_url=current_base_url,
 								            current_api_key=current_api_key,
 								            is_global=persist_global,
 								            explicit_provider=explicit_provider,
-												fix: include custom_providers in /model command listings and resolution

Custom providers defined in config.yaml under  were
completely invisible to the /model command in both gateway (Telegram,
Discord, etc.) and CLI. The provider listing skipped them and explicit
switching via --provider failed with "Unknown provider".

Root cause: gateway/run.py, cli.py, and model_switch.py only read the
 dict from config, ignoring  entirely.

Changes:
- providers.py: add resolve_custom_provider() and extend
  resolve_provider_full() to check custom_providers after user_providers
- model_switch.py: propagate custom_providers through switch_model(),
  list_authenticated_providers(), and get_authenticated_provider_slugs();
  add custom provider section to provider listings
- gateway/run.py: read custom_providers from config, pass to all
  model-switch calls
- cli.py: hoist config loading, pass custom_providers to listing and
  switch calls

Tests: 4 new regression tests covering listing, resolution, and gateway
command handler. All 71 tests pass.

											
										
										
											2026-04-09 22:33:34 +02:00
+								            user_providers=user_provs,
 								            custom_providers=custom_provs,
-												feat: /model command — models.dev primary database + --provider flag (#5181)

Full overhaul of the model/provider system.

## What changed
- models.dev (109 providers, 4000+ models) as primary database for provider identity AND model metadata
- --provider flag replaces colon syntax for explicit provider switching
- Full ModelInfo/ProviderInfo dataclasses with context, cost, capabilities, modalities
- HermesOverlay system merges models.dev + Hermes-specific transport/auth/aggregator flags
- User-defined endpoints via config.yaml providers: section
- /model (no args) lists authenticated providers with curated model catalog
- Rich metadata display: context window, max output, cost/M tokens, capabilities
- Config migration: custom_providers list → providers dict (v11→v12)
- AIAgent.switch_model() for in-place model swap preserving conversation

## Files
agent/models_dev.py, hermes_cli/providers.py, hermes_cli/model_switch.py,
hermes_cli/model_normalize.py, cli.py, gateway/run.py, run_agent.py,
hermes_cli/config.py, hermes_cli/commands.py
											
										
										
											2026-04-05 01:04:44 -07:00
+								        )
 								        if not result.success:
 								            return f"Error: {result.error_message}"
 								        # If there's a cached agent, update it in-place
 								        cached_entry = None
 								        _cache_lock = getattr(self, "_agent_cache_lock", None)
 								        _cache = getattr(self, "_agent_cache", None)
 								        if _cache_lock and _cache is not None:
 								            with _cache_lock:
 								                cached_entry = _cache.get(session_key)
 								        if cached_entry and cached_entry[0] is not None:
 								            try:
 								                cached_entry[0].switch_model(
 								                    new_model=result.new_model,
 								                    new_provider=result.target_provider,
 								                    api_key=result.api_key,
 								                    base_url=result.base_url,
 								                    api_mode=result.api_mode,
 								                )
 								            except Exception as exc:
 								                logger.warning("In-place model switch failed for cached agent: %s", exc)
-												fix: Ollama Cloud auth, /model switch persistence, and alias tab completion

- Add OLLAMA_API_KEY to credential resolution chain for ollama.com endpoints
- Update requested_provider/_explicit_api_key/_explicit_base_url after /model
  switch so _ensure_runtime_credentials() doesn't revert the switch
- Pass base_url/api_key from fallback config to resolve_provider_client()
- Add DirectAlias system: user-configurable model_aliases in config.yaml
  checked before catalog resolution, with reverse lookup by model ID
- Add /model tab completion showing aliases with provider metadata

Co-authored-by: LucidPaths <LucidPaths@users.noreply.github.com>

											
										
										
											2026-04-05 10:58:44 -07:00
+								        # Store a note to prepend to the next user message so the model
 								        # knows about the switch (avoids system messages mid-history).
 								        if not hasattr(self, "_pending_model_notes"):
 								            self._pending_model_notes = {}
 								        self._pending_model_notes[session_key] = (
 								            f"[Note: model was just switched from {current_model} to {result.new_model} "
 								            f"via {result.provider_label or result.target_provider}. "
 								            f"Adjust your self-identification accordingly.]"
 								        )
-												feat: /model command — models.dev primary database + --provider flag (#5181)

Full overhaul of the model/provider system.

## What changed
- models.dev (109 providers, 4000+ models) as primary database for provider identity AND model metadata
- --provider flag replaces colon syntax for explicit provider switching
- Full ModelInfo/ProviderInfo dataclasses with context, cost, capabilities, modalities
- HermesOverlay system merges models.dev + Hermes-specific transport/auth/aggregator flags
- User-defined endpoints via config.yaml providers: section
- /model (no args) lists authenticated providers with curated model catalog
- Rich metadata display: context window, max output, cost/M tokens, capabilities
- Config migration: custom_providers list → providers dict (v11→v12)
- AIAgent.switch_model() for in-place model swap preserving conversation

## Files
agent/models_dev.py, hermes_cli/providers.py, hermes_cli/model_switch.py,
hermes_cli/model_normalize.py, cli.py, gateway/run.py, run_agent.py,
hermes_cli/config.py, hermes_cli/commands.py
											
										
										
											2026-04-05 01:04:44 -07:00
+								        # Store session override so next agent creation uses the new model
 								        self._session_model_overrides[session_key] = {
 								            "model": result.new_model,
 								            "provider": result.target_provider,
 								            "api_key": result.api_key,
 								            "base_url": result.base_url,
 								            "api_mode": result.api_mode,
 								        }
-												fix(gateway): evict cached agent on /model switch + add diagnostic logging (#8276)

After /model switches the model (both picker and text paths), the cached
agent's config signature becomes stale — the agent was updated in-place
via switch_model() but the cache tuple's signature was never refreshed.
The next turn *should* detect the signature mismatch and create a fresh
agent, but this relies on the new model's signature differing from the
old one in _agent_config_signature().

Evicting the cached agent explicitly after storing the session override
is more defensive — the next turn is guaranteed to create a fresh agent
from the override without depending on signature mismatch detection.

Also adds debug logging at three key decision points so we can trace
exactly what happens when /model + /retry interact:
- _resolve_session_agent_runtime: which override path is taken (fast
  with api_key vs fallback), or why no override was found
- _run_agent.run_sync: final resolved model/provider before agent
  creation

Reported: /model switch to xiaomi/mimo-v2-pro followed by /retry still
used the old model (glm-5.1).
											
										
										
											2026-04-12 01:58:17 -07:00
+								        # Evict cached agent so the next turn creates a fresh agent from the
 								        # override rather than relying on cache signature mismatch detection.
 								        self._evict_cached_agent(session_key)
-												feat: /model command — models.dev primary database + --provider flag (#5181)

Full overhaul of the model/provider system.

## What changed
- models.dev (109 providers, 4000+ models) as primary database for provider identity AND model metadata
- --provider flag replaces colon syntax for explicit provider switching
- Full ModelInfo/ProviderInfo dataclasses with context, cost, capabilities, modalities
- HermesOverlay system merges models.dev + Hermes-specific transport/auth/aggregator flags
- User-defined endpoints via config.yaml providers: section
- /model (no args) lists authenticated providers with curated model catalog
- Rich metadata display: context window, max output, cost/M tokens, capabilities
- Config migration: custom_providers list → providers dict (v11→v12)
- AIAgent.switch_model() for in-place model swap preserving conversation

## Files
agent/models_dev.py, hermes_cli/providers.py, hermes_cli/model_switch.py,
hermes_cli/model_normalize.py, cli.py, gateway/run.py, run_agent.py,
hermes_cli/config.py, hermes_cli/commands.py
											
										
										
											2026-04-05 01:04:44 -07:00
+								        # Persist to config if --global
 								        if persist_global:
 								            try:
 								                if config_path.exists():
 								                    with open(config_path, encoding="utf-8") as f:
 								                        cfg = yaml.safe_load(f) or {}
 								                else:
 								                    cfg = {}
 								                model_cfg = cfg.setdefault("model", {})
-												fix: /model --global writes model.name instead of model.default

The canonical config key for model name is model.default (used by setup,
auth, runtime_provider, profile list, and CLI startup). But /model --global
wrote to model.name in both gateway and CLI paths.

This caused:
- hermes profile list showing the old model (reads model.default)
- Gateway restart reverting to the old model (_resolve_gateway_model reads model.default)
- CLI startup using the old model (main.py reads model.default)

The only reason it appeared to work in Telegram was the cached agent
staying alive with the in-place switch.

Fix: change all 3 write/read sites to use model.default.

											
										
										
											2026-04-06 15:19:12 +02:00
+								                model_cfg["default"] = result.new_model
-												feat: /model command — models.dev primary database + --provider flag (#5181)

Full overhaul of the model/provider system.

## What changed
- models.dev (109 providers, 4000+ models) as primary database for provider identity AND model metadata
- --provider flag replaces colon syntax for explicit provider switching
- Full ModelInfo/ProviderInfo dataclasses with context, cost, capabilities, modalities
- HermesOverlay system merges models.dev + Hermes-specific transport/auth/aggregator flags
- User-defined endpoints via config.yaml providers: section
- /model (no args) lists authenticated providers with curated model catalog
- Rich metadata display: context window, max output, cost/M tokens, capabilities
- Config migration: custom_providers list → providers dict (v11→v12)
- AIAgent.switch_model() for in-place model swap preserving conversation

## Files
agent/models_dev.py, hermes_cli/providers.py, hermes_cli/model_switch.py,
hermes_cli/model_normalize.py, cli.py, gateway/run.py, run_agent.py,
hermes_cli/config.py, hermes_cli/commands.py
											
										
										
											2026-04-05 01:04:44 -07:00
+								                model_cfg["provider"] = result.target_provider
 								                if result.base_url:
 								                    model_cfg["base_url"] = result.base_url
 								                from hermes_cli.config import save_config
 								                save_config(cfg)
 								            except Exception as e:
 								                logger.warning("Failed to persist model switch: %s", e)
 								        # Build confirmation message with full metadata
 								        provider_label = result.provider_label or result.target_provider
 								        lines = [f"Model switched to `{result.new_model}`"]
 								        lines.append(f"Provider: {provider_label}")
 								        # Rich metadata from models.dev
 								        mi = result.model_info
 								        if mi:
 								            if mi.context_window:
 								                lines.append(f"Context: {mi.context_window:,} tokens")
 								            if mi.max_output:
 								                lines.append(f"Max output: {mi.max_output:,} tokens")
 								            if mi.has_cost_data():
 								                lines.append(f"Cost: {mi.format_cost()}")
 								            lines.append(f"Capabilities: {mi.format_capabilities()}")
 								        else:
 								            try:
 								                from agent.model_metadata import get_model_context_length
 								                ctx = get_model_context_length(
 								                    result.new_model,
 								                    base_url=result.base_url or current_base_url,
 								                    api_key=result.api_key or current_api_key,
 								                    provider=result.target_provider,
 								                )
 								                lines.append(f"Context: {ctx:,} tokens")
 								            except Exception:
 								                pass
 								        # Cache notice
 								        cache_enabled = (
 								            ("openrouter" in (result.base_url or "").lower() and "claude" in result.new_model.lower())
 								            or result.api_mode == "anthropic_messages"
 								        )
 								        if cache_enabled:
 								            lines.append("Prompt caching: enabled")
 								        if result.warning_message:
 								            lines.append(f"Warning: {result.warning_message}")
 								        if persist_global:
 								            lines.append("Saved to config.yaml (`--global`)")
 								        else:
 								            lines.append("_(session only -- add `--global` to persist)_")
 								        return "\n".join(lines)
-												feat: /provider command + fix gateway bugs + harden parse_model_input

/provider command (CLI + gateway):
  Shows all providers with auth status (✓/✗), aliases, and active marker.
  Users can now discover what provider names work with provider:model syntax.

Gateway bugs fixed:
  - Config was saved even when validation.persist=False (told user 'session
    only' but actually persisted the unvalidated model)
  - HERMES_INFERENCE_PROVIDER env var not set on provider switch, causing
    the switch to be silently overridden if that env var was already set

parse_model_input hardened:
  - Colon only treated as provider delimiter if left side is a recognized
    provider name or alias. 'anthropic/claude-3.5-sonnet:beta' now passes
    through as a model name instead of trying provider='anthropic/claude-3.5-sonnet'.
  - HTTP URLs, random colons no longer misinterpreted.

56 tests passing across model validation, CLI commands, and integration.

											
										
										
											2026-03-08 06:09:36 -07:00
+								    async def _handle_provider_command(self, event: MessageEvent) -> str:
 								        """Handle /provider command - show available providers."""
 								        import yaml
 								        from hermes_cli.models import (
 								            list_available_providers,
 								            normalize_provider,
 								            _PROVIDER_LABELS,
 								        )
 								        # Resolve current provider from config
 								        current_provider = "openrouter"
-												fix(gateway): handle provider command without config

											
										
										
											2026-04-09 23:14:19 -07:00
+								        model_cfg = {}
-												feat: /provider command + fix gateway bugs + harden parse_model_input

/provider command (CLI + gateway):
  Shows all providers with auth status (✓/✗), aliases, and active marker.
  Users can now discover what provider names work with provider:model syntax.

Gateway bugs fixed:
  - Config was saved even when validation.persist=False (told user 'session
    only' but actually persisted the unvalidated model)
  - HERMES_INFERENCE_PROVIDER env var not set on provider switch, causing
    the switch to be silently overridden if that env var was already set

parse_model_input hardened:
  - Colon only treated as provider delimiter if left side is a recognized
    provider name or alias. 'anthropic/claude-3.5-sonnet:beta' now passes
    through as a model name instead of trying provider='anthropic/claude-3.5-sonnet'.
  - HTTP URLs, random colons no longer misinterpreted.

56 tests passing across model validation, CLI commands, and integration.

											
										
										
											2026-03-08 06:09:36 -07:00
+								        config_path = _hermes_home / 'config.yaml'
 								        try:
 								            if config_path.exists():
-												Add explicit encoding="utf-8" to all config/data file open() calls

On Windows, open() defaults to the system locale encoding (cp1252,
cp1254, etc.) rather than UTF-8. This breaks any file containing
non-ASCII characters, and also causes crashes when writing JSON with
ensure_ascii=False.

This adds encoding="utf-8" to open() calls in:
- gateway/run.py (config.yaml reads/writes throughout)
- gateway/config.py (gateway.json and config.yaml)
- hermes_cli/config.py (config.yaml load/save)
- hermes_cli/main.py (session export with ensure_ascii=False)
- hermes_cli/status.py (jobs.json and sessions.json)

											
										
										
											2026-03-05 17:04:33 -05:00
+								                with open(config_path, encoding="utf-8") as f:
-												feat: /provider command + fix gateway bugs + harden parse_model_input

/provider command (CLI + gateway):
  Shows all providers with auth status (✓/✗), aliases, and active marker.
  Users can now discover what provider names work with provider:model syntax.

Gateway bugs fixed:
  - Config was saved even when validation.persist=False (told user 'session
    only' but actually persisted the unvalidated model)
  - HERMES_INFERENCE_PROVIDER env var not set on provider switch, causing
    the switch to be silently overridden if that env var was already set

parse_model_input hardened:
  - Colon only treated as provider delimiter if left side is a recognized
    provider name or alias. 'anthropic/claude-3.5-sonnet:beta' now passes
    through as a model name instead of trying provider='anthropic/claude-3.5-sonnet'.
  - HTTP URLs, random colons no longer misinterpreted.

56 tests passing across model validation, CLI commands, and integration.

											
										
										
											2026-03-08 06:09:36 -07:00
+								                    cfg = yaml.safe_load(f) or {}
 								                model_cfg = cfg.get("model", {})
 								                if isinstance(model_cfg, dict):
 								                    current_provider = model_cfg.get("provider", current_provider)
 								        except Exception:
 								            pass
 								        current_provider = normalize_provider(current_provider)
 								        if current_provider == "auto":
 								            try:
 								                from hermes_cli.auth import resolve_provider as _resolve_provider
 								                current_provider = _resolve_provider(current_provider)
 								            except Exception:
 								                current_provider = "openrouter"
-												refactor: make config.yaml the single source of truth for endpoint URLs (#4165)

OPENAI_BASE_URL was written to .env AND config.yaml, creating a dual-source
confusion. Users (especially Docker) would see the URL in .env and assume
that's where all config lives, then wonder why LLM_MODEL in .env didn't work.

Changes:
- Remove all 27 save_env_value("OPENAI_BASE_URL", ...) calls across main.py,
  setup.py, and tools_config.py
- Remove OPENAI_BASE_URL env var reading from runtime_provider.py, cli.py,
  models.py, and gateway/run.py
- Remove LLM_MODEL/HERMES_MODEL env var reading from gateway/run.py and
  auxiliary_client.py — config.yaml model.default is authoritative
- Vision base URL now saved to config.yaml auxiliary.vision.base_url
  (both setup wizard and tools_config paths)
- Tests updated to set config values instead of env vars

Convention enforced: .env is for SECRETS only (API keys). All other
configuration (model names, base URLs, provider selection) lives
exclusively in config.yaml.
											
										
										
											2026-03-30 22:02:53 -07:00
+								        # Detect custom endpoint from config base_url
 								        if current_provider == "openrouter":
 								            _cfg_base = model_cfg.get("base_url", "") if isinstance(model_cfg, dict) else ""
 								            if _cfg_base and "openrouter.ai" not in _cfg_base:
 								                current_provider = "custom"
-												fix: custom endpoint provider shows as openrouter in gateway

Three issues caused the gateway to display 'openrouter' instead of
'Custom endpoint' when users configured a custom OAI-compatible endpoint:

1. hermes setup: custom endpoint path saved OPENAI_BASE_URL and
   OPENAI_API_KEY to .env but never wrote model.provider to config.yaml.
   All other providers (Codex, z.ai, Kimi, etc.) call
   _update_config_for_provider() which sets this — custom was the only
   path that skipped it. Now writes model.provider='custom' and
   model.base_url to config.yaml.

2. hermes model: custom endpoint set model.provider='auto' in config.yaml.
   The CLI display had a hack to detect OPENAI_BASE_URL and override to
   'custom', but the gateway didn't. Now sets model.provider='custom'
   directly.

3. gateway /model and /provider commands: defaulted to 'openrouter' and
   read config.yaml — which had no provider set. Added OPENAI_BASE_URL
   detection fallback (same pattern the CLI uses) as a defensive catch
   for existing users who set up before this fix.

											
										
										
											2026-03-09 02:38:34 -07:00
-												feat: /provider command + fix gateway bugs + harden parse_model_input

/provider command (CLI + gateway):
  Shows all providers with auth status (✓/✗), aliases, and active marker.
  Users can now discover what provider names work with provider:model syntax.

Gateway bugs fixed:
  - Config was saved even when validation.persist=False (told user 'session
    only' but actually persisted the unvalidated model)
  - HERMES_INFERENCE_PROVIDER env var not set on provider switch, causing
    the switch to be silently overridden if that env var was already set

parse_model_input hardened:
  - Colon only treated as provider delimiter if left side is a recognized
    provider name or alias. 'anthropic/claude-3.5-sonnet:beta' now passes
    through as a model name instead of trying provider='anthropic/claude-3.5-sonnet'.
  - HTTP URLs, random colons no longer misinterpreted.

56 tests passing across model validation, CLI commands, and integration.

											
										
										
											2026-03-08 06:09:36 -07:00
+								        current_label = _PROVIDER_LABELS.get(current_provider, current_provider)
 								        lines = [
 								            f"🔌 **Current provider:** {current_label} (`{current_provider}`)",
 								            "",
 								            "**Available providers:**",
 								        ]
 								        providers = list_available_providers()
 								        for p in providers:
 								            marker = " ← active" if p["id"] == current_provider else ""
 								            auth = "✅" if p["authenticated"] else "❌"
 								            aliases = f"  _(also: {', '.join(p['aliases'])})_" if p["aliases"] else ""
 								            lines.append(f"{auth} `{p['id']}` — {p['label']}{aliases}{marker}")
 								        lines.append("")
 								        lines.append("Switch: `/model provider:model-name`")
 								        lines.append("Setup: `hermes setup`")
 								        return "\n".join(lines)
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
 								    async def _handle_personality_command(self, event: MessageEvent) -> str:
 								        """Handle /personality command - list or set a personality."""
-												fix(gateway): sync /model and /personality with CLI config.yaml pattern

											
										
										
											2026-02-27 11:14:14 -05:00
+								        import yaml
-												fix(gateway): use profile-aware Hermes paths in runtime hints

											
										
										
											2026-04-15 22:27:36 +03:00
+								        from hermes_constants import display_hermes_home
-												fix(gateway): sync /model and /personality with CLI config.yaml pattern

											
										
										
											2026-02-27 11:14:14 -05:00
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
+								        args = event.get_command_args().strip().lower()
-												fix(gateway): sync /model and /personality with CLI config.yaml pattern

											
										
										
											2026-02-27 11:14:14 -05:00
+								        config_path = _hermes_home / 'config.yaml'
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
+								        try:
 								            if config_path.exists():
-												Add explicit encoding="utf-8" to all config/data file open() calls

On Windows, open() defaults to the system locale encoding (cp1252,
cp1254, etc.) rather than UTF-8. This breaks any file containing
non-ASCII characters, and also causes crashes when writing JSON with
ensure_ascii=False.

This adds encoding="utf-8" to open() calls in:
- gateway/run.py (config.yaml reads/writes throughout)
- gateway/config.py (gateway.json and config.yaml)
- hermes_cli/config.py (config.yaml load/save)
- hermes_cli/main.py (session export with ensure_ascii=False)
- hermes_cli/status.py (jobs.json and sessions.json)

											
										
										
											2026-03-05 17:04:33 -05:00
+								                with open(config_path, 'r', encoding="utf-8") as f:
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
+								                    config = yaml.safe_load(f) or {}
 								                personalities = config.get("agent", {}).get("personalities", {})
 								            else:
-												fix(gateway): sync /model and /personality with CLI config.yaml pattern

											
										
										
											2026-02-27 11:14:14 -05:00
+								                config = {}
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
+								                personalities = {}
 								        except Exception:
-												fix(gateway): sync /model and /personality with CLI config.yaml pattern

											
										
										
											2026-02-27 11:14:14 -05:00
+								            config = {}
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
+								            personalities = {}
-												fix(gateway): sync /model and /personality with CLI config.yaml pattern

											
										
										
											2026-02-27 11:14:14 -05:00
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
+								        if not personalities:
-												fix(gateway): use profile-aware Hermes paths in runtime hints

											
										
										
											2026-04-15 22:27:36 +03:00
+								            return f"No personalities configured in `{display_hermes_home()}/config.yaml`"
-												fix(gateway): sync /model and /personality with CLI config.yaml pattern

											
										
										
											2026-02-27 11:14:14 -05:00
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
+								        if not args:
 								            lines = ["🎭 **Available Personalities**\n"]
-												feat(cli,gateway): add /personality none and custom personality support

Closes #643

Changes:
- /personality none|default|neutral — clears system prompt overlay
- Custom personalities in config.yaml support dict format with:
  name, description, system_prompt, tone, style directives
- Backwards compatible — existing string format still works
- CLI + gateway both updated
- 18 tests covering none/default/neutral, dict format, string format,
  list display, save to config

											
										
										
											2026-03-09 17:18:09 +03:00
+								            lines.append("• `none` — (no personality overlay)")
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
+								            for name, prompt in personalities.items():
-												feat(cli,gateway): add /personality none and custom personality support

Closes #643

Changes:
- /personality none|default|neutral — clears system prompt overlay
- Custom personalities in config.yaml support dict format with:
  name, description, system_prompt, tone, style directives
- Backwards compatible — existing string format still works
- CLI + gateway both updated
- 18 tests covering none/default/neutral, dict format, string format,
  list display, save to config

											
										
										
											2026-03-09 17:18:09 +03:00
+								                if isinstance(prompt, dict):
 								                    preview = prompt.get("description") or prompt.get("system_prompt", "")[:50]
 								                else:
 								                    preview = prompt[:50] + "..." if len(prompt) > 50 else prompt
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
+								                lines.append(f"• `{name}` — {preview}")
-												chore: fix 154 f-strings, simplify getattr/URL patterns, remove dead code (#3119)

Three categories of cleanup, all zero-behavioral-change:

1. F-strings without placeholders (154 fixes across 29 files)
   - Converted f'...' to '...' where no {expression} was present
   - Heaviest files: run_agent.py (24), cli.py (20), honcho_integration/cli.py (34)

2. Simplify defensive patterns in run_agent.py
   - Added explicit self._is_anthropic_oauth = False in __init__ (before
     the api_mode branch that conditionally sets it)
   - Replaced 7x getattr(self, '_is_anthropic_oauth', False) with direct
     self._is_anthropic_oauth (attribute always initialized now)
   - Added _is_openrouter_url() and _is_anthropic_url() helper methods
   - Replaced 3 inline 'openrouter' in self._base_url_lower checks

3. Remove dead code in small files
   - hermes_cli/claw.py: removed unused 'total' computation
   - tools/fuzzy_match.py: removed unused strip_indent() function and
     pattern_stripped variable

Full test suite: 6184 passed, 0 failures
E2E PTY: banner clean, tool calls work, zero garbled ANSI
											
										
										
											2026-03-25 19:47:58 -07:00
+								            lines.append("\nUsage: `/personality <name>`")
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
+								            return "\n".join(lines)
-												fix(gateway): sync /model and /personality with CLI config.yaml pattern

											
										
										
											2026-02-27 11:14:14 -05:00
-												feat(cli,gateway): add /personality none and custom personality support

Closes #643

Changes:
- /personality none|default|neutral — clears system prompt overlay
- Custom personalities in config.yaml support dict format with:
  name, description, system_prompt, tone, style directives
- Backwards compatible — existing string format still works
- CLI + gateway both updated
- 18 tests covering none/default/neutral, dict format, string format,
  list display, save to config

											
										
										
											2026-03-09 17:18:09 +03:00
+								        def _resolve_prompt(value):
 								            if isinstance(value, dict):
 								                parts = [value.get("system_prompt", "")]
 								                if value.get("tone"):
 								                    parts.append(f'Tone: {value["tone"]}')
 								                if value.get("style"):
 								                    parts.append(f'Style: {value["style"]}')
 								                return "\n".join(p for p in parts if p)
 								            return str(value)
 								        if args in ("none", "default", "neutral"):
 								            try:
 								                if "agent" not in config or not isinstance(config.get("agent"), dict):
 								                    config["agent"] = {}
 								                config["agent"]["system_prompt"] = ""
-												fix(gateway): use atomic writes for config.yaml to prevent data loss (#3800)

Replace all 5 plain open(config_path, 'w') calls in gateway command
handlers with atomic_yaml_write() from utils.py. This uses the
established tempfile + fsync + os.replace pattern to ensure config.yaml
is never left half-written if the process is killed mid-write.

Affected handlers: /personality (clear + set), /sethome, /reasoning
(_save_config_key helper), /verbose (tool_progress cycling).

Also fixes missing encoding='utf-8' on the /personality clear write.

Salvaged from PR #1211 by albatrosjj.
											
										
										
											2026-03-29 15:31:21 -07:00
+								                atomic_yaml_write(config_path, config)
-												feat(cli,gateway): add /personality none and custom personality support

Closes #643

Changes:
- /personality none|default|neutral — clears system prompt overlay
- Custom personalities in config.yaml support dict format with:
  name, description, system_prompt, tone, style directives
- Backwards compatible — existing string format still works
- CLI + gateway both updated
- 18 tests covering none/default/neutral, dict format, string format,
  list display, save to config

											
										
										
											2026-03-09 17:18:09 +03:00
+								            except Exception as e:
 								                return f"⚠️ Failed to save personality change: {e}"
 								            self._ephemeral_system_prompt = ""
 								            return "🎭 Personality cleared — using base agent behavior.\n_(takes effect on next message)_"
 								        elif args in personalities:
 								            new_prompt = _resolve_prompt(personalities[args])
-												fix(gateway): sync /model and /personality with CLI config.yaml pattern

											
										
										
											2026-02-27 11:14:14 -05:00
 								            # Write to config.yaml, same pattern as CLI save_config_value.
 								            try:
 								                if "agent" not in config or not isinstance(config.get("agent"), dict):
 								                    config["agent"] = {}
 								                config["agent"]["system_prompt"] = new_prompt
-												fix(gateway): use atomic writes for config.yaml to prevent data loss (#3800)

Replace all 5 plain open(config_path, 'w') calls in gateway command
handlers with atomic_yaml_write() from utils.py. This uses the
established tempfile + fsync + os.replace pattern to ensure config.yaml
is never left half-written if the process is killed mid-write.

Affected handlers: /personality (clear + set), /sethome, /reasoning
(_save_config_key helper), /verbose (tool_progress cycling).

Also fixes missing encoding='utf-8' on the /personality clear write.

Salvaged from PR #1211 by albatrosjj.
											
										
										
											2026-03-29 15:31:21 -07:00
+								                atomic_yaml_write(config_path, config)
-												fix(gateway): sync /model and /personality with CLI config.yaml pattern

											
										
										
											2026-02-27 11:14:14 -05:00
+								            except Exception as e:
 								                return f"⚠️ Failed to save personality change: {e}"
 								            # Update in-memory so it takes effect on the very next message.
 								            self._ephemeral_system_prompt = new_prompt
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
+								            return f"🎭 Personality set to **{args}**\n_(takes effect on next message)_"
-												fix(gateway): sync /model and /personality with CLI config.yaml pattern

											
										
										
											2026-02-27 11:14:14 -05:00
-												refactor: codebase-wide lint cleanup — unused imports, dead code, and inefficient patterns (#5821)

Comprehensive cleanup across 80 files based on automated (ruff, pyflakes, vulture)
and manual analysis of the entire codebase.

Changes by category:

Unused imports removed (~95 across 55 files):
- Removed genuinely unused imports from all major subsystems
- agent/, hermes_cli/, tools/, gateway/, plugins/, cron/
- Includes imports in try/except blocks that were truly unused
  (vs availability checks which were left alone)

Unused variables removed (~25):
- Removed dead variables: connected, inner, channels, last_exc,
  source, new_server_names, verify, pconfig, default_terminal,
  result, pending_handled, temperature, loop
- Dropped unused argparse subparser assignments in hermes_cli/main.py
  (12 instances of add_parser() where result was never used)

Dead code removed:
- run_agent.py: Removed dead ternary (None if False else None) and
  surrounding unreachable branch in identity fallback
- run_agent.py: Removed write-only attribute _last_reported_tool
- hermes_cli/providers.py: Removed dead @property decorator on
  module-level function (decorator has no effect outside a class)
- gateway/run.py: Removed unused MCP config load before reconnect
- gateway/platforms/slack.py: Removed dead SessionSource construction

Undefined name bugs fixed (would cause NameError at runtime):
- batch_runner.py: Added missing logger = logging.getLogger(__name__)
- tools/environments/daytona.py: Added missing Dict and Path imports

Unnecessary global statements removed (14):
- tools/terminal_tool.py: 5 functions declared global for dicts
  they only mutated via .pop()/[key]=value (no rebinding)
- tools/browser_tool.py: cleanup thread loop only reads flag
- tools/rl_training_tool.py: 4 functions only do dict mutations
- tools/mcp_oauth.py: only reads the global
- hermes_time.py: only reads cached values

Inefficient patterns fixed:
- startswith/endswith tuple form: 15 instances of
  x.startswith('a') or x.startswith('b') consolidated to
  x.startswith(('a', 'b'))
- len(x)==0 / len(x)>0: 13 instances replaced with pythonic
  truthiness checks (not x / bool(x))
- in dict.keys(): 5 instances simplified to in dict
- Redefined unused name: removed duplicate _strip_mdv2 import in
  send_message_tool.py

Other fixes:
- hermes_cli/doctor.py: Replaced undefined logger.debug() with pass
- hermes_cli/config.py: Consolidated chained .endswith() calls

Test results: 3934 passed, 17 failed (all pre-existing on main),
19 skipped. Zero regressions.
											
										
										
											2026-04-07 10:25:31 -07:00
+								        available = "`none`, " + ", ".join(f"`{n}`" for n in personalities)
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
+								        return f"Unknown personality: `{args}`\n\nAvailable: {available}"
 								    async def _handle_retry_command(self, event: MessageEvent) -> str:
 								        """Handle /retry command - re-send the last user message."""
 								        source = event.source
 								        session_entry = self.session_store.get_or_create_session(source)
 								        history = self.session_store.load_transcript(session_entry.session_id)
 								        # Find the last user message
 								        last_user_msg = None
 								        last_user_idx = None
 								        for i in range(len(history) - 1, -1, -1):
 								            if history[i].get("role") == "user":
 								                last_user_msg = history[i].get("content", "")
 								                last_user_idx = i
 								                break
 								        if not last_user_msg:
 								            return "No previous message to retry."
-												fix: /retry, /undo, /compress, and /reset gateway commands (#210)

- /retry, /undo, /compress were setting a non-existent conversation_history
  attribute on SessionEntry (a @dataclass with no such field). The dangling
  attribute was silently created but never read — transcript was reloaded
  from DB on next interaction, making all three commands no-ops.

- /reset accessed self.session_store._sessions (non-existent) instead of
  self.session_store._entries, causing AttributeError caught by a bare
  except, silently skipping the pre-reset memory flush.

Fix:
- Add SessionDB.clear_messages() to delete messages and reset counters
- Add SessionStore.rewrite_transcript() to atomically replace transcript
  in both SQLite and legacy JSONL storage
- Replace all dangling attr assignments with rewrite_transcript() calls
- Fix _sessions → _entries in /reset handler

Closes #210

											
										
										
											2026-03-02 00:14:49 -08:00
+								        # Truncate history to before the last user message and persist
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
+								        truncated = history[:last_user_idx]
-												fix: /retry, /undo, /compress, and /reset gateway commands (#210)

- /retry, /undo, /compress were setting a non-existent conversation_history
  attribute on SessionEntry (a @dataclass with no such field). The dangling
  attribute was silently created but never read — transcript was reloaded
  from DB on next interaction, making all three commands no-ops.

- /reset accessed self.session_store._sessions (non-existent) instead of
  self.session_store._entries, causing AttributeError caught by a bare
  except, silently skipping the pre-reset memory flush.

Fix:
- Add SessionDB.clear_messages() to delete messages and reset counters
- Add SessionStore.rewrite_transcript() to atomically replace transcript
  in both SQLite and legacy JSONL storage
- Replace all dangling attr assignments with rewrite_transcript() calls
- Fix _sessions → _entries in /reset handler

Closes #210

											
										
										
											2026-03-02 00:14:49 -08:00
+								        self.session_store.rewrite_transcript(session_entry.session_id, truncated)
-												fix: integration hardening for gateway token tracking

Follow-up to 58dbd81 — ensures smooth transition for existing users:

- Backward compat: old session files without last_prompt_tokens
  default to 0 via data.get('last_prompt_tokens', 0)
- /compress, /undo, /retry: reset last_prompt_tokens to 0 after
  rewriting transcripts (stale token counts would under-report)
- Auto-compression hygiene: reset last_prompt_tokens after rewriting
- update_session: use None sentinel (not 0) as default so callers
  can explicitly reset to 0 while normal calls don't clobber
- 6 new tests covering: default value, serialization roundtrip,
  old-format migration, set/reset/no-change semantics
- /reset: new SessionEntry naturally gets last_prompt_tokens=0

2942 tests pass.

											
										
										
											2026-03-10 23:40:24 -07:00
+								        # Reset stored token count — transcript was truncated
 								        session_entry.last_prompt_tokens = 0
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
 								        # Re-send by creating a fake text event with the old message
 								        retry_event = MessageEvent(
 								            text=last_user_msg,
 								            message_type=MessageType.TEXT,
 								            source=source,
 								            raw_message=event.raw_message,
-												fix: remove redundant key normalization and defensive getattr in channel_prompts

- Remove double str() normalization in _resolve_channel_prompt since
  config bridging already handles numeric YAML key conversion
- Remove dead prompts.get(str(key)) fallback that could never match
  after keys were already normalized to strings
- Replace getattr(event, "channel_prompt", None) with direct attribute
  access since channel_prompt is a declared dataclass field
- Update test to verify normalization responsibility lives in config bridging

											
										
										
											2026-04-13 17:26:25 -07:00
+								            channel_prompt=event.channel_prompt,
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
+								        )
 								        # Let the normal message handler process it
-												fix(gateway): return response from /retry handler instead of discarding it

											
										
										
											2026-03-05 19:59:54 +03:00
+								        return await self._handle_message(retry_event)
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
 								    async def _handle_undo_command(self, event: MessageEvent) -> str:
 								        """Handle /undo command - remove the last user/assistant exchange."""
 								        source = event.source
 								        session_entry = self.session_store.get_or_create_session(source)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        history = self.session_store.load_transcript(session_entry.session_id)
 								        # Find the last user message and remove everything from it onward
 								        last_user_idx = None
 								        for i in range(len(history) - 1, -1, -1):
 								            if history[i].get("role") == "user":
 								                last_user_idx = i
 								                break
 								        if last_user_idx is None:
 								            return "Nothing to undo."
 								        removed_msg = history[last_user_idx].get("content", "")
 								        removed_count = len(history) - last_user_idx
 								        self.session_store.rewrite_transcript(session_entry.session_id, history[:last_user_idx])
 								        # Reset stored token count — transcript was truncated
 								        session_entry.last_prompt_tokens = 0
 								        preview = removed_msg[:40] + "..." if len(removed_msg) > 40 else removed_msg
 								        return f"↩️ Undid {removed_count} message(s).\nRemoved: \"{preview}\""
 								    async def _handle_set_home_command(self, event: MessageEvent) -> str:
 								        """Handle /sethome command -- set the current chat as the platform's home channel."""
 								        source = event.source
 								        platform_name = source.platform.value if source.platform else "unknown"
 								        chat_id = source.chat_id
 								        chat_name = source.chat_name or chat_id
 								        env_key = f"{platform_name.upper()}_HOME_CHANNEL"
 								        # Save to config.yaml
 								        try:
 								            import yaml
 								            config_path = _hermes_home / 'config.yaml'
 								            user_config = {}
 								            if config_path.exists():
 								                with open(config_path, encoding="utf-8") as f:
 								                    user_config = yaml.safe_load(f) or {}
 								            user_config[env_key] = chat_id
-												fix(gateway): use atomic writes for config.yaml to prevent data loss (#3800)

Replace all 5 plain open(config_path, 'w') calls in gateway command
handlers with atomic_yaml_write() from utils.py. This uses the
established tempfile + fsync + os.replace pattern to ensure config.yaml
is never left half-written if the process is killed mid-write.

Affected handlers: /personality (clear + set), /sethome, /reasoning
(_save_config_key helper), /verbose (tool_progress cycling).

Also fixes missing encoding='utf-8' on the /personality clear write.

Salvaged from PR #1211 by albatrosjj.
											
										
										
											2026-03-29 15:31:21 -07:00
+								            atomic_yaml_write(config_path, user_config)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            # Also set in the current environment so it takes effect immediately
 								            os.environ[env_key] = str(chat_id)
 								        except Exception as e:
 								            return f"Failed to save home channel: {e}"
 								        return (
 								            f"✅ Home channel set to **{chat_name}** (ID: {chat_id}).\n"
 								            f"Cron jobs and cross-platform messages will be delivered here."
 								        )
-												feat: Discord voice channel support — bot joins VC and speaks replies

- /voice channel: bot joins user's voice channel, speaks TTS replies
- /voice leave: disconnect from voice channel
- Auto-disconnect after 5 min inactivity
- _get_guild_id() helper extracts guild from raw_message
- Load opus codec for voice playback
- discord.py[voice] in pyproject.toml (pulls PyNaCl + davey)

											
										
										
											2026-03-11 02:13:43 +03:00
+								    @staticmethod
 								    def _get_guild_id(event: MessageEvent) -> Optional[int]:
 								        """Extract Discord guild_id from the raw message object."""
 								        raw = getattr(event, "raw_message", None)
 								        if raw is None:
 								            return None
 								        # Slash command interaction
 								        if hasattr(raw, "guild_id") and raw.guild_id:
 								            return int(raw.guild_id)
 								        # Regular message
 								        if hasattr(raw, "guild") and raw.guild:
 								            return raw.guild.id
 								        return None
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
+								    async def _handle_voice_command(self, event: MessageEvent) -> str:
-												feat: Discord voice channel support — bot joins VC and speaks replies

- /voice channel: bot joins user's voice channel, speaks TTS replies
- /voice leave: disconnect from voice channel
- Auto-disconnect after 5 min inactivity
- _get_guild_id() helper extracts guild from raw_message
- Load opus codec for voice playback
- discord.py[voice] in pyproject.toml (pulls PyNaCl + davey)

											
										
										
											2026-03-11 02:13:43 +03:00
+								        """Handle /voice [on|off|tts|channel|leave|status] command."""
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
+								        args = event.get_command_args().strip().lower()
 								        chat_id = event.source.chat_id
-												fix: address PR review round 5 — streaming guard, VC auth, history prefix, auto-TTS control

1. Gate _streaming_api_call to chat_completions mode only — Anthropic and
   Codex fall back to _interruptible_api_call. Preserve Anthropic base_url
   across all client rebuild paths (interrupt, fallback, 401 refresh).

2. Discord VC synthetic events now use chat_type="channel" instead of
   defaulting to "dm" — prevents session bleed into DM context.
   Authorization runs before echoing transcript. Sanitize @everyone/@here
   in voice transcripts.

3. CLI voice prefix ("[Voice input...]") is now API-call-local only —
   stripped from returned history so it never persists to session DB or
   resumed sessions.

4. /voice off now disables base adapter auto-TTS via _auto_tts_disabled_chats
   set — voice input no longer triggers TTS when voice mode is off.

											
										
										
											2026-03-14 10:31:49 +03:00
+								        adapter = self.adapters.get(event.source.platform)
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
+								        if args in ("on", "enable"):
 								            self._voice_mode[chat_id] = "voice_only"
 								            self._save_voice_modes()
-												fix: address PR review round 5 — streaming guard, VC auth, history prefix, auto-TTS control

1. Gate _streaming_api_call to chat_completions mode only — Anthropic and
   Codex fall back to _interruptible_api_call. Preserve Anthropic base_url
   across all client rebuild paths (interrupt, fallback, 401 refresh).

2. Discord VC synthetic events now use chat_type="channel" instead of
   defaulting to "dm" — prevents session bleed into DM context.
   Authorization runs before echoing transcript. Sanitize @everyone/@here
   in voice transcripts.

3. CLI voice prefix ("[Voice input...]") is now API-call-local only —
   stripped from returned history so it never persists to session DB or
   resumed sessions.

4. /voice off now disables base adapter auto-TTS via _auto_tts_disabled_chats
   set — voice input no longer triggers TTS when voice mode is off.

											
										
										
											2026-03-14 10:31:49 +03:00
+								            if adapter:
-												fix: persist clean voice transcripts and /voice off state

- keep CLI voice prefixes API-local while storing the original user text
- persist explicit gateway off state and restore adapter auto-TTS suppression on restart
- add regression coverage for both behaviors

											
										
										
											2026-03-14 06:14:22 -07:00
+								                self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=False)
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
+								            return (
 								                "Voice mode enabled.\n"
 								                "I'll reply with voice when you send voice messages.\n"
 								                "Use /voice tts to get voice replies for all messages."
 								            )
 								        elif args in ("off", "disable"):
-												fix: persist clean voice transcripts and /voice off state

- keep CLI voice prefixes API-local while storing the original user text
- persist explicit gateway off state and restore adapter auto-TTS suppression on restart
- add regression coverage for both behaviors

											
										
										
											2026-03-14 06:14:22 -07:00
+								            self._voice_mode[chat_id] = "off"
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
+								            self._save_voice_modes()
-												fix: address PR review round 5 — streaming guard, VC auth, history prefix, auto-TTS control

1. Gate _streaming_api_call to chat_completions mode only — Anthropic and
   Codex fall back to _interruptible_api_call. Preserve Anthropic base_url
   across all client rebuild paths (interrupt, fallback, 401 refresh).

2. Discord VC synthetic events now use chat_type="channel" instead of
   defaulting to "dm" — prevents session bleed into DM context.
   Authorization runs before echoing transcript. Sanitize @everyone/@here
   in voice transcripts.

3. CLI voice prefix ("[Voice input...]") is now API-call-local only —
   stripped from returned history so it never persists to session DB or
   resumed sessions.

4. /voice off now disables base adapter auto-TTS via _auto_tts_disabled_chats
   set — voice input no longer triggers TTS when voice mode is off.

											
										
										
											2026-03-14 10:31:49 +03:00
+								            if adapter:
-												fix: persist clean voice transcripts and /voice off state

- keep CLI voice prefixes API-local while storing the original user text
- persist explicit gateway off state and restore adapter auto-TTS suppression on restart
- add regression coverage for both behaviors

											
										
										
											2026-03-14 06:14:22 -07:00
+								                self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=True)
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
+								            return "Voice mode disabled. Text-only replies."
 								        elif args == "tts":
 								            self._voice_mode[chat_id] = "all"
 								            self._save_voice_modes()
-												fix: address PR review round 5 — streaming guard, VC auth, history prefix, auto-TTS control

1. Gate _streaming_api_call to chat_completions mode only — Anthropic and
   Codex fall back to _interruptible_api_call. Preserve Anthropic base_url
   across all client rebuild paths (interrupt, fallback, 401 refresh).

2. Discord VC synthetic events now use chat_type="channel" instead of
   defaulting to "dm" — prevents session bleed into DM context.
   Authorization runs before echoing transcript. Sanitize @everyone/@here
   in voice transcripts.

3. CLI voice prefix ("[Voice input...]") is now API-call-local only —
   stripped from returned history so it never persists to session DB or
   resumed sessions.

4. /voice off now disables base adapter auto-TTS via _auto_tts_disabled_chats
   set — voice input no longer triggers TTS when voice mode is off.

											
										
										
											2026-03-14 10:31:49 +03:00
+								            if adapter:
-												fix: persist clean voice transcripts and /voice off state

- keep CLI voice prefixes API-local while storing the original user text
- persist explicit gateway off state and restore adapter auto-TTS suppression on restart
- add regression coverage for both behaviors

											
										
										
											2026-03-14 06:14:22 -07:00
+								                self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=False)
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
+								            return (
 								                "Auto-TTS enabled.\n"
 								                "All replies will include a voice message."
 								            )
-												feat: Discord voice channel support — bot joins VC and speaks replies

- /voice channel: bot joins user's voice channel, speaks TTS replies
- /voice leave: disconnect from voice channel
- Auto-disconnect after 5 min inactivity
- _get_guild_id() helper extracts guild from raw_message
- Load opus codec for voice playback
- discord.py[voice] in pyproject.toml (pulls PyNaCl + davey)

											
										
										
											2026-03-11 02:13:43 +03:00
+								        elif args in ("channel", "join"):
 								            return await self._handle_voice_channel_join(event)
 								        elif args == "leave":
 								            return await self._handle_voice_channel_leave(event)
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
+								        elif args == "status":
 								            mode = self._voice_mode.get(chat_id, "off")
 								            labels = {
 								                "off": "Off (text only)",
 								                "voice_only": "On (voice reply to voice messages)",
 								                "all": "TTS (voice reply to all messages)",
 								            }
-												feat: Discord voice channel support — bot joins VC and speaks replies

- /voice channel: bot joins user's voice channel, speaks TTS replies
- /voice leave: disconnect from voice channel
- Auto-disconnect after 5 min inactivity
- _get_guild_id() helper extracts guild from raw_message
- Load opus codec for voice playback
- discord.py[voice] in pyproject.toml (pulls PyNaCl + davey)

											
										
										
											2026-03-11 02:13:43 +03:00
+								            # Append voice channel info if connected
 								            adapter = self.adapters.get(event.source.platform)
 								            guild_id = self._get_guild_id(event)
-												feat: add voice channel awareness — inject participant and speaking state into agent context

											
										
										
											2026-03-14 02:14:34 +03:00
+								            if guild_id and hasattr(adapter, "get_voice_channel_info"):
 								                info = adapter.get_voice_channel_info(guild_id)
 								                if info:
 								                    lines = [
 								                        f"Voice mode: {labels.get(mode, mode)}",
 								                        f"Voice channel: #{info['channel_name']}",
 								                        f"Participants: {info['member_count']}",
 								                    ]
 								                    for m in info["members"]:
 								                        status = " (speaking)" if m.get("is_speaking") else ""
 								                        lines.append(f"  - {m['display_name']}{status}")
 								                    return "\n".join(lines)
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
+								            return f"Voice mode: {labels.get(mode, mode)}"
 								        else:
 								            # Toggle: off → on, on/all → off
 								            current = self._voice_mode.get(chat_id, "off")
 								            if current == "off":
 								                self._voice_mode[chat_id] = "voice_only"
 								                self._save_voice_modes()
-												fix: address PR review round 5 — streaming guard, VC auth, history prefix, auto-TTS control

1. Gate _streaming_api_call to chat_completions mode only — Anthropic and
   Codex fall back to _interruptible_api_call. Preserve Anthropic base_url
   across all client rebuild paths (interrupt, fallback, 401 refresh).

2. Discord VC synthetic events now use chat_type="channel" instead of
   defaulting to "dm" — prevents session bleed into DM context.
   Authorization runs before echoing transcript. Sanitize @everyone/@here
   in voice transcripts.

3. CLI voice prefix ("[Voice input...]") is now API-call-local only —
   stripped from returned history so it never persists to session DB or
   resumed sessions.

4. /voice off now disables base adapter auto-TTS via _auto_tts_disabled_chats
   set — voice input no longer triggers TTS when voice mode is off.

											
										
										
											2026-03-14 10:31:49 +03:00
+								                if adapter:
-												fix: persist clean voice transcripts and /voice off state

- keep CLI voice prefixes API-local while storing the original user text
- persist explicit gateway off state and restore adapter auto-TTS suppression on restart
- add regression coverage for both behaviors

											
										
										
											2026-03-14 06:14:22 -07:00
+								                    self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=False)
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
+								                return "Voice mode enabled."
 								            else:
-												fix: persist clean voice transcripts and /voice off state

- keep CLI voice prefixes API-local while storing the original user text
- persist explicit gateway off state and restore adapter auto-TTS suppression on restart
- add regression coverage for both behaviors

											
										
										
											2026-03-14 06:14:22 -07:00
+								                self._voice_mode[chat_id] = "off"
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
+								                self._save_voice_modes()
-												fix: address PR review round 5 — streaming guard, VC auth, history prefix, auto-TTS control

1. Gate _streaming_api_call to chat_completions mode only — Anthropic and
   Codex fall back to _interruptible_api_call. Preserve Anthropic base_url
   across all client rebuild paths (interrupt, fallback, 401 refresh).

2. Discord VC synthetic events now use chat_type="channel" instead of
   defaulting to "dm" — prevents session bleed into DM context.
   Authorization runs before echoing transcript. Sanitize @everyone/@here
   in voice transcripts.

3. CLI voice prefix ("[Voice input...]") is now API-call-local only —
   stripped from returned history so it never persists to session DB or
   resumed sessions.

4. /voice off now disables base adapter auto-TTS via _auto_tts_disabled_chats
   set — voice input no longer triggers TTS when voice mode is off.

											
										
										
											2026-03-14 10:31:49 +03:00
+								                if adapter:
-												fix: persist clean voice transcripts and /voice off state

- keep CLI voice prefixes API-local while storing the original user text
- persist explicit gateway off state and restore adapter auto-TTS suppression on restart
- add regression coverage for both behaviors

											
										
										
											2026-03-14 06:14:22 -07:00
+								                    self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=True)
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
+								                return "Voice mode disabled."
-												feat: Discord voice channel support — bot joins VC and speaks replies

- /voice channel: bot joins user's voice channel, speaks TTS replies
- /voice leave: disconnect from voice channel
- Auto-disconnect after 5 min inactivity
- _get_guild_id() helper extracts guild from raw_message
- Load opus codec for voice playback
- discord.py[voice] in pyproject.toml (pulls PyNaCl + davey)

											
										
										
											2026-03-11 02:13:43 +03:00
+								    async def _handle_voice_channel_join(self, event: MessageEvent) -> str:
 								        """Join the user's current Discord voice channel."""
 								        adapter = self.adapters.get(event.source.platform)
 								        if not hasattr(adapter, "join_voice_channel"):
 								            return "Voice channels are not supported on this platform."
 								        guild_id = self._get_guild_id(event)
 								        if not guild_id:
 								            return "This command only works in a Discord server."
 								        voice_channel = await adapter.get_user_voice_channel(
 								            guild_id, event.source.user_id
 								        )
 								        if not voice_channel:
 								            return "You need to be in a voice channel first."
-												fix: 8 voice pipeline bugs with tests proving each fix

1. VoiceReceiver.stop() now acquires _lock before clearing shared state
   to prevent race with _on_packet on the socket reader thread
2. _packet_debug_count moved from class-level to instance-level to avoid
   cross-instance race condition in multi-guild setups
3. play_in_voice_channel uses asyncio.get_running_loop() instead of
   deprecated asyncio.get_event_loop()
4. _send_voice_reply uses uuid for filenames instead of time-based names
   that can collide when two replies happen in the same second
5. Voice timeout now notifies runner via _on_voice_disconnect callback
   so runner cleans up _voice_mode state (prevents orphaned TTS replies)
6. play_in_voice_channel adds PLAYBACK_TIMEOUT (120s) to prevent
   infinite blocking when FFmpeg callback is never called
7. _send_voice_reply moves temp file cleanup to finally block so files
   are always cleaned up even when send_voice/play raises
8. Base adapter auto-TTS wraps play_tts in try/finally with os.remove
   to clean up generated audio files after playback

18 new tests (120 total voice tests)

											
										
										
											2026-03-11 23:57:42 +03:00
+								        # Wire callbacks BEFORE join so voice input arriving immediately
-												fix: voice pipeline thread safety and error handling bugs

- Add lock protection around VoiceReceiver buffer writes in _on_packet
  to prevent race condition with check_silence on different threads
- Wire _voice_input_callback BEFORE join_voice_channel to avoid
  losing voice input during the join window
- Add try/except around leave_voice_channel to ensure state cleanup
  (voice_mode, callback) even if leave raises an exception
- Guard against empty text after markdown stripping in base.py auto-TTS
- Add 11 tests proving each bug and verifying the fix

											
										
										
											2026-03-11 23:36:47 +03:00
+								        # after connection is not lost.
 								        if hasattr(adapter, "_voice_input_callback"):
 								            adapter._voice_input_callback = self._handle_voice_channel_input
-												fix: 8 voice pipeline bugs with tests proving each fix

1. VoiceReceiver.stop() now acquires _lock before clearing shared state
   to prevent race with _on_packet on the socket reader thread
2. _packet_debug_count moved from class-level to instance-level to avoid
   cross-instance race condition in multi-guild setups
3. play_in_voice_channel uses asyncio.get_running_loop() instead of
   deprecated asyncio.get_event_loop()
4. _send_voice_reply uses uuid for filenames instead of time-based names
   that can collide when two replies happen in the same second
5. Voice timeout now notifies runner via _on_voice_disconnect callback
   so runner cleans up _voice_mode state (prevents orphaned TTS replies)
6. play_in_voice_channel adds PLAYBACK_TIMEOUT (120s) to prevent
   infinite blocking when FFmpeg callback is never called
7. _send_voice_reply moves temp file cleanup to finally block so files
   are always cleaned up even when send_voice/play raises
8. Base adapter auto-TTS wraps play_tts in try/finally with os.remove
   to clean up generated audio files after playback

18 new tests (120 total voice tests)

											
										
										
											2026-03-11 23:57:42 +03:00
+								        if hasattr(adapter, "_on_voice_disconnect"):
 								            adapter._on_voice_disconnect = self._handle_voice_timeout_cleanup
-												fix: voice pipeline thread safety and error handling bugs

- Add lock protection around VoiceReceiver buffer writes in _on_packet
  to prevent race condition with check_silence on different threads
- Wire _voice_input_callback BEFORE join_voice_channel to avoid
  losing voice input during the join window
- Add try/except around leave_voice_channel to ensure state cleanup
  (voice_mode, callback) even if leave raises an exception
- Guard against empty text after markdown stripping in base.py auto-TTS
- Add 11 tests proving each bug and verifying the fix

											
										
										
											2026-03-11 23:36:47 +03:00
-												feat: Discord voice channel support — bot joins VC and speaks replies

- /voice channel: bot joins user's voice channel, speaks TTS replies
- /voice leave: disconnect from voice channel
- Auto-disconnect after 5 min inactivity
- _get_guild_id() helper extracts guild from raw_message
- Load opus codec for voice playback
- discord.py[voice] in pyproject.toml (pulls PyNaCl + davey)

											
										
										
											2026-03-11 02:13:43 +03:00
+								        try:
 								            success = await adapter.join_voice_channel(voice_channel)
 								        except Exception as e:
 								            logger.warning("Failed to join voice channel: %s", e)
-												fix: voice pipeline thread safety and error handling bugs

- Add lock protection around VoiceReceiver buffer writes in _on_packet
  to prevent race condition with check_silence on different threads
- Wire _voice_input_callback BEFORE join_voice_channel to avoid
  losing voice input during the join window
- Add try/except around leave_voice_channel to ensure state cleanup
  (voice_mode, callback) even if leave raises an exception
- Guard against empty text after markdown stripping in base.py auto-TTS
- Add 11 tests proving each bug and verifying the fix

											
										
										
											2026-03-11 23:36:47 +03:00
+								            adapter._voice_input_callback = None
-												fix(voice): show clear error when voice dependencies are missing

When PyNaCl or davey is not installed, joining a voice channel fails
with a raw exception. Now shows a human-readable message pointing
the user to reinstall with voice support.

Closes #1336

											
										
										
											2026-03-15 11:58:48 +03:00
+								            err_lower = str(e).lower()
 								            if "pynacl" in err_lower or "nacl" in err_lower or "davey" in err_lower:
-												test(voice): clarify install guidance and local skips

Add an explicit messaging-extra install hint to the missing PyNaCl/davey error path, cover it with a voice-channel join regression test, and skip the low-level NaCl packet tests when PyNaCl is not installed locally.

											
										
										
											2026-03-15 05:24:34 -07:00
+								                return (
 								                    "Voice dependencies are missing (PyNaCl / davey). "
 								                    "Install or reinstall Hermes with the messaging extra, e.g. "
 								                    "`pip install hermes-agent[messaging]`."
 								                )
-												feat: Discord voice channel support — bot joins VC and speaks replies

- /voice channel: bot joins user's voice channel, speaks TTS replies
- /voice leave: disconnect from voice channel
- Auto-disconnect after 5 min inactivity
- _get_guild_id() helper extracts guild from raw_message
- Load opus codec for voice playback
- discord.py[voice] in pyproject.toml (pulls PyNaCl + davey)

											
										
										
											2026-03-11 02:13:43 +03:00
+								            return f"Failed to join voice channel: {e}"
 								        if success:
 								            adapter._voice_text_channels[guild_id] = int(event.source.chat_id)
-												fix(discord): voice session continuity and signal handler thread safety

- Store source metadata on /voice channel join so voice input shares the
  same session as the linked text channel conversation
- Treat voice-linked text channels as free-response (skip @mention and
  auto-thread) while voice is active
- Scope the voice-linked exemption to the exact bound channel, not
  sibling threads
- Guard signal handler registration in start_gateway() for non-main
  threads (prevents RuntimeError when gateway runs in a daemon thread)
- Clean up _voice_sources on leave_voice_channel

Salvaged from PR #3475 by twilwa (Modal runtime portions excluded).

											
										
										
											2026-04-13 04:47:11 -07:00
+								            if hasattr(adapter, "_voice_sources"):
 								                adapter._voice_sources[guild_id] = event.source.to_dict()
-												feat: Discord voice channel support — bot joins VC and speaks replies

- /voice channel: bot joins user's voice channel, speaks TTS replies
- /voice leave: disconnect from voice channel
- Auto-disconnect after 5 min inactivity
- _get_guild_id() helper extracts guild from raw_message
- Load opus codec for voice playback
- discord.py[voice] in pyproject.toml (pulls PyNaCl + davey)

											
										
										
											2026-03-11 02:13:43 +03:00
+								            self._voice_mode[event.source.chat_id] = "all"
 								            self._save_voice_modes()
-												fix: persist clean voice transcripts and /voice off state

- keep CLI voice prefixes API-local while storing the original user text
- persist explicit gateway off state and restore adapter auto-TTS suppression on restart
- add regression coverage for both behaviors

											
										
										
											2026-03-14 06:14:22 -07:00
+								            self._set_adapter_auto_tts_disabled(adapter, event.source.chat_id, disabled=False)
-												feat: Discord voice channel support — bot joins VC and speaks replies

- /voice channel: bot joins user's voice channel, speaks TTS replies
- /voice leave: disconnect from voice channel
- Auto-disconnect after 5 min inactivity
- _get_guild_id() helper extracts guild from raw_message
- Load opus codec for voice playback
- discord.py[voice] in pyproject.toml (pulls PyNaCl + davey)

											
										
										
											2026-03-11 02:13:43 +03:00
+								            return (
 								                f"Joined voice channel **{voice_channel.name}**.\n"
-												feat: add Discord voice channel listening — STT transcription and agent response pipeline

Phase 2 of voice channel support: bot listens to users speaking in VC,
transcribes speech via Groq Whisper, and processes through the agent pipeline.

- Add VoiceReceiver class for RTP packet capture, NaCl/DAVE decryption, Opus decode
- Add silence detection and per-user PCM buffering
- Wire voice input callback from adapter to GatewayRunner
- Fix adapter dict key: use Platform.DISCORD enum instead of string
- Fix guild_id extraction for synthetic voice events via SimpleNamespace raw_message
- Pause/resume receiver during TTS playback to prevent echo

											
										
										
											2026-03-11 04:34:58 +03:00
+								                f"I'll speak my replies and listen to you. Use /voice leave to disconnect."
-												feat: Discord voice channel support — bot joins VC and speaks replies

- /voice channel: bot joins user's voice channel, speaks TTS replies
- /voice leave: disconnect from voice channel
- Auto-disconnect after 5 min inactivity
- _get_guild_id() helper extracts guild from raw_message
- Load opus codec for voice playback
- discord.py[voice] in pyproject.toml (pulls PyNaCl + davey)

											
										
										
											2026-03-11 02:13:43 +03:00
+								            )
-												fix: voice pipeline thread safety and error handling bugs

- Add lock protection around VoiceReceiver buffer writes in _on_packet
  to prevent race condition with check_silence on different threads
- Wire _voice_input_callback BEFORE join_voice_channel to avoid
  losing voice input during the join window
- Add try/except around leave_voice_channel to ensure state cleanup
  (voice_mode, callback) even if leave raises an exception
- Guard against empty text after markdown stripping in base.py auto-TTS
- Add 11 tests proving each bug and verifying the fix

											
										
										
											2026-03-11 23:36:47 +03:00
+								        # Join failed — clear callback
 								        adapter._voice_input_callback = None
-												feat: Discord voice channel support — bot joins VC and speaks replies

- /voice channel: bot joins user's voice channel, speaks TTS replies
- /voice leave: disconnect from voice channel
- Auto-disconnect after 5 min inactivity
- _get_guild_id() helper extracts guild from raw_message
- Load opus codec for voice playback
- discord.py[voice] in pyproject.toml (pulls PyNaCl + davey)

											
										
										
											2026-03-11 02:13:43 +03:00
+								        return "Failed to join voice channel. Check bot permissions (Connect + Speak)."
 								    async def _handle_voice_channel_leave(self, event: MessageEvent) -> str:
 								        """Leave the Discord voice channel."""
 								        adapter = self.adapters.get(event.source.platform)
 								        guild_id = self._get_guild_id(event)
 								        if not guild_id or not hasattr(adapter, "leave_voice_channel"):
 								            return "Not in a voice channel."
 								        if not hasattr(adapter, "is_in_voice_channel") or not adapter.is_in_voice_channel(guild_id):
 								            return "Not in a voice channel."
-												fix: voice pipeline thread safety and error handling bugs

- Add lock protection around VoiceReceiver buffer writes in _on_packet
  to prevent race condition with check_silence on different threads
- Wire _voice_input_callback BEFORE join_voice_channel to avoid
  losing voice input during the join window
- Add try/except around leave_voice_channel to ensure state cleanup
  (voice_mode, callback) even if leave raises an exception
- Guard against empty text after markdown stripping in base.py auto-TTS
- Add 11 tests proving each bug and verifying the fix

											
										
										
											2026-03-11 23:36:47 +03:00
+								        try:
 								            await adapter.leave_voice_channel(guild_id)
 								        except Exception as e:
 								            logger.warning("Error leaving voice channel: %s", e)
 								        # Always clean up state even if leave raised an exception
-												fix: persist clean voice transcripts and /voice off state

- keep CLI voice prefixes API-local while storing the original user text
- persist explicit gateway off state and restore adapter auto-TTS suppression on restart
- add regression coverage for both behaviors

											
										
										
											2026-03-14 06:14:22 -07:00
+								        self._voice_mode[event.source.chat_id] = "off"
-												feat: Discord voice channel support — bot joins VC and speaks replies

- /voice channel: bot joins user's voice channel, speaks TTS replies
- /voice leave: disconnect from voice channel
- Auto-disconnect after 5 min inactivity
- _get_guild_id() helper extracts guild from raw_message
- Load opus codec for voice playback
- discord.py[voice] in pyproject.toml (pulls PyNaCl + davey)

											
										
										
											2026-03-11 02:13:43 +03:00
+								        self._save_voice_modes()
-												fix: persist clean voice transcripts and /voice off state

- keep CLI voice prefixes API-local while storing the original user text
- persist explicit gateway off state and restore adapter auto-TTS suppression on restart
- add regression coverage for both behaviors

											
										
										
											2026-03-14 06:14:22 -07:00
+								        self._set_adapter_auto_tts_disabled(adapter, event.source.chat_id, disabled=True)
-												fix: voice pipeline thread safety and error handling bugs

- Add lock protection around VoiceReceiver buffer writes in _on_packet
  to prevent race condition with check_silence on different threads
- Wire _voice_input_callback BEFORE join_voice_channel to avoid
  losing voice input during the join window
- Add try/except around leave_voice_channel to ensure state cleanup
  (voice_mode, callback) even if leave raises an exception
- Guard against empty text after markdown stripping in base.py auto-TTS
- Add 11 tests proving each bug and verifying the fix

											
										
										
											2026-03-11 23:36:47 +03:00
+								        if hasattr(adapter, "_voice_input_callback"):
 								            adapter._voice_input_callback = None
-												feat: Discord voice channel support — bot joins VC and speaks replies

- /voice channel: bot joins user's voice channel, speaks TTS replies
- /voice leave: disconnect from voice channel
- Auto-disconnect after 5 min inactivity
- _get_guild_id() helper extracts guild from raw_message
- Load opus codec for voice playback
- discord.py[voice] in pyproject.toml (pulls PyNaCl + davey)

											
										
										
											2026-03-11 02:13:43 +03:00
+								        return "Left voice channel."
-												fix: 8 voice pipeline bugs with tests proving each fix

1. VoiceReceiver.stop() now acquires _lock before clearing shared state
   to prevent race with _on_packet on the socket reader thread
2. _packet_debug_count moved from class-level to instance-level to avoid
   cross-instance race condition in multi-guild setups
3. play_in_voice_channel uses asyncio.get_running_loop() instead of
   deprecated asyncio.get_event_loop()
4. _send_voice_reply uses uuid for filenames instead of time-based names
   that can collide when two replies happen in the same second
5. Voice timeout now notifies runner via _on_voice_disconnect callback
   so runner cleans up _voice_mode state (prevents orphaned TTS replies)
6. play_in_voice_channel adds PLAYBACK_TIMEOUT (120s) to prevent
   infinite blocking when FFmpeg callback is never called
7. _send_voice_reply moves temp file cleanup to finally block so files
   are always cleaned up even when send_voice/play raises
8. Base adapter auto-TTS wraps play_tts in try/finally with os.remove
   to clean up generated audio files after playback

18 new tests (120 total voice tests)

											
										
										
											2026-03-11 23:57:42 +03:00
+								    def _handle_voice_timeout_cleanup(self, chat_id: str) -> None:
 								        """Called by the adapter when a voice channel times out.
 								        Cleans up runner-side voice_mode state that the adapter cannot reach.
 								        """
-												fix: persist clean voice transcripts and /voice off state

- keep CLI voice prefixes API-local while storing the original user text
- persist explicit gateway off state and restore adapter auto-TTS suppression on restart
- add regression coverage for both behaviors

											
										
										
											2026-03-14 06:14:22 -07:00
+								        self._voice_mode[chat_id] = "off"
-												fix: 8 voice pipeline bugs with tests proving each fix

1. VoiceReceiver.stop() now acquires _lock before clearing shared state
   to prevent race with _on_packet on the socket reader thread
2. _packet_debug_count moved from class-level to instance-level to avoid
   cross-instance race condition in multi-guild setups
3. play_in_voice_channel uses asyncio.get_running_loop() instead of
   deprecated asyncio.get_event_loop()
4. _send_voice_reply uses uuid for filenames instead of time-based names
   that can collide when two replies happen in the same second
5. Voice timeout now notifies runner via _on_voice_disconnect callback
   so runner cleans up _voice_mode state (prevents orphaned TTS replies)
6. play_in_voice_channel adds PLAYBACK_TIMEOUT (120s) to prevent
   infinite blocking when FFmpeg callback is never called
7. _send_voice_reply moves temp file cleanup to finally block so files
   are always cleaned up even when send_voice/play raises
8. Base adapter auto-TTS wraps play_tts in try/finally with os.remove
   to clean up generated audio files after playback

18 new tests (120 total voice tests)

											
										
										
											2026-03-11 23:57:42 +03:00
+								        self._save_voice_modes()
-												fix: persist clean voice transcripts and /voice off state

- keep CLI voice prefixes API-local while storing the original user text
- persist explicit gateway off state and restore adapter auto-TTS suppression on restart
- add regression coverage for both behaviors

											
										
										
											2026-03-14 06:14:22 -07:00
+								        adapter = self.adapters.get(Platform.DISCORD)
 								        self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=True)
-												fix: 8 voice pipeline bugs with tests proving each fix

1. VoiceReceiver.stop() now acquires _lock before clearing shared state
   to prevent race with _on_packet on the socket reader thread
2. _packet_debug_count moved from class-level to instance-level to avoid
   cross-instance race condition in multi-guild setups
3. play_in_voice_channel uses asyncio.get_running_loop() instead of
   deprecated asyncio.get_event_loop()
4. _send_voice_reply uses uuid for filenames instead of time-based names
   that can collide when two replies happen in the same second
5. Voice timeout now notifies runner via _on_voice_disconnect callback
   so runner cleans up _voice_mode state (prevents orphaned TTS replies)
6. play_in_voice_channel adds PLAYBACK_TIMEOUT (120s) to prevent
   infinite blocking when FFmpeg callback is never called
7. _send_voice_reply moves temp file cleanup to finally block so files
   are always cleaned up even when send_voice/play raises
8. Base adapter auto-TTS wraps play_tts in try/finally with os.remove
   to clean up generated audio files after playback

18 new tests (120 total voice tests)

											
										
										
											2026-03-11 23:57:42 +03:00
-												feat: add Discord voice channel listening — STT transcription and agent response pipeline

Phase 2 of voice channel support: bot listens to users speaking in VC,
transcribes speech via Groq Whisper, and processes through the agent pipeline.

- Add VoiceReceiver class for RTP packet capture, NaCl/DAVE decryption, Opus decode
- Add silence detection and per-user PCM buffering
- Wire voice input callback from adapter to GatewayRunner
- Fix adapter dict key: use Platform.DISCORD enum instead of string
- Fix guild_id extraction for synthetic voice events via SimpleNamespace raw_message
- Pause/resume receiver during TTS playback to prevent echo

											
										
										
											2026-03-11 04:34:58 +03:00
+								    async def _handle_voice_channel_input(
 								        self, guild_id: int, user_id: int, transcript: str
 								    ):
 								        """Handle transcribed voice from a user in a voice channel.
 								        Creates a synthetic MessageEvent and processes it through the
 								        adapter's full message pipeline (session, typing, agent, TTS reply).
 								        """
 								        adapter = self.adapters.get(Platform.DISCORD)
 								        if not adapter:
 								            return
 								        text_ch_id = adapter._voice_text_channels.get(guild_id)
 								        if not text_ch_id:
 								            return
-												fix(discord): voice session continuity and signal handler thread safety

- Store source metadata on /voice channel join so voice input shares the
  same session as the linked text channel conversation
- Treat voice-linked text channels as free-response (skip @mention and
  auto-thread) while voice is active
- Scope the voice-linked exemption to the exact bound channel, not
  sibling threads
- Guard signal handler registration in start_gateway() for non-main
  threads (prevents RuntimeError when gateway runs in a daemon thread)
- Clean up _voice_sources on leave_voice_channel

Salvaged from PR #3475 by twilwa (Modal runtime portions excluded).

											
										
										
											2026-04-13 04:47:11 -07:00
+								        # Build source — reuse the linked text channel's metadata when available
 								        # so voice input shares the same session as the bound text conversation.
 								        source_data = getattr(adapter, "_voice_sources", {}).get(guild_id)
 								        if source_data:
 								            source = SessionSource.from_dict(source_data)
 								            source.user_id = str(user_id)
 								            source.user_name = str(user_id)
 								        else:
 								            source = SessionSource(
 								                platform=Platform.DISCORD,
 								                chat_id=str(text_ch_id),
 								                user_id=str(user_id),
 								                user_name=str(user_id),
 								                chat_type="channel",
 								            )
-												fix: address PR review round 5 — streaming guard, VC auth, history prefix, auto-TTS control

1. Gate _streaming_api_call to chat_completions mode only — Anthropic and
   Codex fall back to _interruptible_api_call. Preserve Anthropic base_url
   across all client rebuild paths (interrupt, fallback, 401 refresh).

2. Discord VC synthetic events now use chat_type="channel" instead of
   defaulting to "dm" — prevents session bleed into DM context.
   Authorization runs before echoing transcript. Sanitize @everyone/@here
   in voice transcripts.

3. CLI voice prefix ("[Voice input...]") is now API-call-local only —
   stripped from returned history so it never persists to session DB or
   resumed sessions.

4. /voice off now disables base adapter auto-TTS via _auto_tts_disabled_chats
   set — voice input no longer triggers TTS when voice mode is off.

											
										
										
											2026-03-14 10:31:49 +03:00
+								        # Check authorization before processing voice input
 								        if not self._is_user_authorized(source):
 								            logger.debug("Unauthorized voice input from user %d, ignoring", user_id)
 								            return
 								        # Show transcript in text channel (after auth, with mention sanitization)
-												feat: add Discord voice channel listening — STT transcription and agent response pipeline

Phase 2 of voice channel support: bot listens to users speaking in VC,
transcribes speech via Groq Whisper, and processes through the agent pipeline.

- Add VoiceReceiver class for RTP packet capture, NaCl/DAVE decryption, Opus decode
- Add silence detection and per-user PCM buffering
- Wire voice input callback from adapter to GatewayRunner
- Fix adapter dict key: use Platform.DISCORD enum instead of string
- Fix guild_id extraction for synthetic voice events via SimpleNamespace raw_message
- Pause/resume receiver during TTS playback to prevent echo

											
										
										
											2026-03-11 04:34:58 +03:00
+								        try:
 								            channel = adapter._client.get_channel(text_ch_id)
 								            if channel:
-												fix: address PR review round 5 — streaming guard, VC auth, history prefix, auto-TTS control

1. Gate _streaming_api_call to chat_completions mode only — Anthropic and
   Codex fall back to _interruptible_api_call. Preserve Anthropic base_url
   across all client rebuild paths (interrupt, fallback, 401 refresh).

2. Discord VC synthetic events now use chat_type="channel" instead of
   defaulting to "dm" — prevents session bleed into DM context.
   Authorization runs before echoing transcript. Sanitize @everyone/@here
   in voice transcripts.

3. CLI voice prefix ("[Voice input...]") is now API-call-local only —
   stripped from returned history so it never persists to session DB or
   resumed sessions.

4. /voice off now disables base adapter auto-TTS via _auto_tts_disabled_chats
   set — voice input no longer triggers TTS when voice mode is off.

											
										
										
											2026-03-14 10:31:49 +03:00
+								                safe_text = transcript[:2000].replace("@everyone", "@\u200beveryone").replace("@here", "@\u200bhere")
 								                await channel.send(f"**[Voice]** <@{user_id}>: {safe_text}")
-												feat: add Discord voice channel listening — STT transcription and agent response pipeline

Phase 2 of voice channel support: bot listens to users speaking in VC,
transcribes speech via Groq Whisper, and processes through the agent pipeline.

- Add VoiceReceiver class for RTP packet capture, NaCl/DAVE decryption, Opus decode
- Add silence detection and per-user PCM buffering
- Wire voice input callback from adapter to GatewayRunner
- Fix adapter dict key: use Platform.DISCORD enum instead of string
- Fix guild_id extraction for synthetic voice events via SimpleNamespace raw_message
- Pause/resume receiver during TTS playback to prevent echo

											
										
										
											2026-03-11 04:34:58 +03:00
+								        except Exception:
 								            pass
 								        # Build a synthetic MessageEvent and feed through the normal pipeline
 								        # Use SimpleNamespace as raw_message so _get_guild_id() can extract
 								        # guild_id and _send_voice_reply() plays audio in the voice channel.
 								        from types import SimpleNamespace
 								        event = MessageEvent(
 								            source=source,
 								            text=transcript,
 								            message_type=MessageType.VOICE,
 								            raw_message=SimpleNamespace(guild_id=guild_id, guild=None),
 								        )
 								        await adapter.handle_message(event)
-												fix: extract voice reply logic and add comprehensive tests

- Fix tempfile.mktemp() TOCTOU race in Discord voice input (use NamedTemporaryFile)
- Extract voice reply decision from _handle_message into _should_send_voice_reply()
- Rewrite TestAutoVoiceReply to call real method instead of testing a copy
- Add 59 new tests: VoiceReceiver, VC commands, adapter methods, streaming TTS

											
										
										
											2026-03-11 23:18:49 +03:00
+								    def _should_send_voice_reply(
 								        self,
 								        event: MessageEvent,
 								        response: str,
 								        agent_messages: list,
-												fix(voice): enable TTS voice reply when streaming is active (#2322)

When streaming is enabled, the base adapter receives None from
_handle_message (already_sent=True) and cannot run auto-TTS for
voice input. The runner was unconditionally skipping voice input
TTS assuming the base adapter would handle it.

Now the runner takes over TTS responsibility when streaming has
already delivered the text response, so voice channel playback
works with both streaming on and off.

Streaming off behavior is unchanged (default already_sent=False
preserves the original code path exactly).

Co-authored-by: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com>
											
										
										
											2026-03-21 08:08:37 -07:00
+								        already_sent: bool = False,
-												fix: extract voice reply logic and add comprehensive tests

- Fix tempfile.mktemp() TOCTOU race in Discord voice input (use NamedTemporaryFile)
- Extract voice reply decision from _handle_message into _should_send_voice_reply()
- Rewrite TestAutoVoiceReply to call real method instead of testing a copy
- Add 59 new tests: VoiceReceiver, VC commands, adapter methods, streaming TTS

											
										
										
											2026-03-11 23:18:49 +03:00
+								    ) -> bool:
 								        """Decide whether the runner should send a TTS voice reply.
 								        Returns False when:
 								        - voice_mode is off for this chat
 								        - response is empty or an error
 								        - agent already called text_to_speech tool (dedup)
 								        - voice input and base adapter auto-TTS already handled it (skip_double)
-												fix(voice): enable TTS voice reply when streaming is active (#2322)

When streaming is enabled, the base adapter receives None from
_handle_message (already_sent=True) and cannot run auto-TTS for
voice input. The runner was unconditionally skipping voice input
TTS assuming the base adapter would handle it.

Now the runner takes over TTS responsibility when streaming has
already delivered the text response, so voice channel playback
works with both streaming on and off.

Streaming off behavior is unchanged (default already_sent=False
preserves the original code path exactly).

Co-authored-by: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com>
											
										
										
											2026-03-21 08:08:37 -07:00
+								          UNLESS streaming already consumed the response (already_sent=True),
 								          in which case the base adapter won't have text for auto-TTS so the
 								          runner must handle it.
-												fix: extract voice reply logic and add comprehensive tests

- Fix tempfile.mktemp() TOCTOU race in Discord voice input (use NamedTemporaryFile)
- Extract voice reply decision from _handle_message into _should_send_voice_reply()
- Rewrite TestAutoVoiceReply to call real method instead of testing a copy
- Add 59 new tests: VoiceReceiver, VC commands, adapter methods, streaming TTS

											
										
										
											2026-03-11 23:18:49 +03:00
+								        """
 								        if not response or response.startswith("Error:"):
 								            return False
 								        chat_id = event.source.chat_id
 								        voice_mode = self._voice_mode.get(chat_id, "off")
 								        is_voice_input = (event.message_type == MessageType.VOICE)
 								        should = (
 								            (voice_mode == "all")
 								            or (voice_mode == "voice_only" and is_voice_input)
 								        )
 								        if not should:
 								            return False
 								        # Dedup: agent already called TTS tool
 								        has_agent_tts = any(
 								            msg.get("role") == "assistant"
 								            and any(
 								                tc.get("function", {}).get("name") == "text_to_speech"
 								                for tc in (msg.get("tool_calls") or [])
 								            )
 								            for msg in agent_messages
 								        )
 								        if has_agent_tts:
 								            return False
-												fix(voice): make play_tts play in VC instead of no-op

play_tts was returning success without playing anything when bot was
in a voice channel. Now it calls play_in_voice_channel directly.

Simplified skip_double dedup: base adapter handles voice input TTS
via play_tts (which now works for VC), runner skips to avoid double.

											
										
										
											2026-03-14 23:57:40 +03:00
+								        # Dedup: base adapter auto-TTS already handles voice input
 								        # (play_tts plays in VC when connected, so runner can skip).
-												fix(voice): enable TTS voice reply when streaming is active (#2322)

When streaming is enabled, the base adapter receives None from
_handle_message (already_sent=True) and cannot run auto-TTS for
voice input. The runner was unconditionally skipping voice input
TTS assuming the base adapter would handle it.

Now the runner takes over TTS responsibility when streaming has
already delivered the text response, so voice channel playback
works with both streaming on and off.

Streaming off behavior is unchanged (default already_sent=False
preserves the original code path exactly).

Co-authored-by: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com>
											
										
										
											2026-03-21 08:08:37 -07:00
+								        # When streaming already delivered the text (already_sent=True),
 								        # the base adapter will receive None and can't run auto-TTS,
 								        # so the runner must take over.
 								        if is_voice_input and not already_sent:
-												fix: extract voice reply logic and add comprehensive tests

- Fix tempfile.mktemp() TOCTOU race in Discord voice input (use NamedTemporaryFile)
- Extract voice reply decision from _handle_message into _should_send_voice_reply()
- Rewrite TestAutoVoiceReply to call real method instead of testing a copy
- Add 59 new tests: VoiceReceiver, VC commands, adapter methods, streaming TTS

											
										
										
											2026-03-11 23:18:49 +03:00
+								            return False
 								        return True
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
+								    async def _send_voice_reply(self, event: MessageEvent, text: str) -> None:
 								        """Generate TTS audio and send as a voice message before the text reply."""
-												fix: 8 voice pipeline bugs with tests proving each fix

1. VoiceReceiver.stop() now acquires _lock before clearing shared state
   to prevent race with _on_packet on the socket reader thread
2. _packet_debug_count moved from class-level to instance-level to avoid
   cross-instance race condition in multi-guild setups
3. play_in_voice_channel uses asyncio.get_running_loop() instead of
   deprecated asyncio.get_event_loop()
4. _send_voice_reply uses uuid for filenames instead of time-based names
   that can collide when two replies happen in the same second
5. Voice timeout now notifies runner via _on_voice_disconnect callback
   so runner cleans up _voice_mode state (prevents orphaned TTS replies)
6. play_in_voice_channel adds PLAYBACK_TIMEOUT (120s) to prevent
   infinite blocking when FFmpeg callback is never called
7. _send_voice_reply moves temp file cleanup to finally block so files
   are always cleaned up even when send_voice/play raises
8. Base adapter auto-TTS wraps play_tts in try/finally with os.remove
   to clean up generated audio files after playback

18 new tests (120 total voice tests)

											
										
										
											2026-03-11 23:57:42 +03:00
+								        import uuid as _uuid
 								        audio_path = None
 								        actual_path = None
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
+								        try:
 								            from tools.tts_tool import text_to_speech_tool, _strip_markdown_for_tts
 								            tts_text = _strip_markdown_for_tts(text[:4000])
 								            if not tts_text:
 								                return
-												fix: Discord voice bubble + edge-tts mp3/ogg format mismatch

- Send Discord voice messages with flags=8192 and waveform metadata
  so they render as native voice bubbles instead of file attachments
- Use .mp3 output path for TTS so edge-tts opus conversion works
  correctly (edge always outputs mp3, convert was skipped for .ogg)
- Use actual file_path from TTS result after potential opus conversion

											
										
										
											2026-03-11 00:24:29 +03:00
+								            # Use .mp3 extension so edge-tts conversion to opus works correctly.
 								            # The TTS tool may convert to .ogg — use file_path from result.
 								            audio_path = os.path.join(
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
+								                tempfile.gettempdir(), "hermes_voice",
-												fix: 8 voice pipeline bugs with tests proving each fix

1. VoiceReceiver.stop() now acquires _lock before clearing shared state
   to prevent race with _on_packet on the socket reader thread
2. _packet_debug_count moved from class-level to instance-level to avoid
   cross-instance race condition in multi-guild setups
3. play_in_voice_channel uses asyncio.get_running_loop() instead of
   deprecated asyncio.get_event_loop()
4. _send_voice_reply uses uuid for filenames instead of time-based names
   that can collide when two replies happen in the same second
5. Voice timeout now notifies runner via _on_voice_disconnect callback
   so runner cleans up _voice_mode state (prevents orphaned TTS replies)
6. play_in_voice_channel adds PLAYBACK_TIMEOUT (120s) to prevent
   infinite blocking when FFmpeg callback is never called
7. _send_voice_reply moves temp file cleanup to finally block so files
   are always cleaned up even when send_voice/play raises
8. Base adapter auto-TTS wraps play_tts in try/finally with os.remove
   to clean up generated audio files after playback

18 new tests (120 total voice tests)

											
										
										
											2026-03-11 23:57:42 +03:00
+								                f"tts_reply_{_uuid.uuid4().hex[:12]}.mp3",
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
+								            )
-												fix: Discord voice bubble + edge-tts mp3/ogg format mismatch

- Send Discord voice messages with flags=8192 and waveform metadata
  so they render as native voice bubbles instead of file attachments
- Use .mp3 output path for TTS so edge-tts opus conversion works
  correctly (edge always outputs mp3, convert was skipped for .ogg)
- Use actual file_path from TTS result after potential opus conversion

											
										
										
											2026-03-11 00:24:29 +03:00
+								            os.makedirs(os.path.dirname(audio_path), exist_ok=True)
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
 								            result_json = await asyncio.to_thread(
-												fix: Discord voice bubble + edge-tts mp3/ogg format mismatch

- Send Discord voice messages with flags=8192 and waveform metadata
  so they render as native voice bubbles instead of file attachments
- Use .mp3 output path for TTS so edge-tts opus conversion works
  correctly (edge always outputs mp3, convert was skipped for .ogg)
- Use actual file_path from TTS result after potential opus conversion

											
										
										
											2026-03-11 00:24:29 +03:00
+								                text_to_speech_tool, text=tts_text, output_path=audio_path
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
+								            )
 								            result = json.loads(result_json)
-												fix: Discord voice bubble + edge-tts mp3/ogg format mismatch

- Send Discord voice messages with flags=8192 and waveform metadata
  so they render as native voice bubbles instead of file attachments
- Use .mp3 output path for TTS so edge-tts opus conversion works
  correctly (edge always outputs mp3, convert was skipped for .ogg)
- Use actual file_path from TTS result after potential opus conversion

											
										
										
											2026-03-11 00:24:29 +03:00
+								            # Use the actual file path from result (may differ after opus conversion)
 								            actual_path = result.get("file_path", audio_path)
 								            if not result.get("success") or not os.path.isfile(actual_path):
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
+								                logger.warning("Auto voice reply TTS failed: %s", result.get("error"))
 								                return
 								            adapter = self.adapters.get(event.source.platform)
-												feat: Discord voice channel support — bot joins VC and speaks replies

- /voice channel: bot joins user's voice channel, speaks TTS replies
- /voice leave: disconnect from voice channel
- Auto-disconnect after 5 min inactivity
- _get_guild_id() helper extracts guild from raw_message
- Load opus codec for voice playback
- discord.py[voice] in pyproject.toml (pulls PyNaCl + davey)

											
										
										
											2026-03-11 02:13:43 +03:00
 								            # If connected to a voice channel, play there instead of sending a file
 								            guild_id = self._get_guild_id(event)
 								            if (guild_id
 								                    and hasattr(adapter, "play_in_voice_channel")
 								                    and hasattr(adapter, "is_in_voice_channel")
 								                    and adapter.is_in_voice_channel(guild_id)):
 								                await adapter.play_in_voice_channel(guild_id, actual_path)
 								            elif adapter and hasattr(adapter, "send_voice"):
-												feat: add /voice slash command to Discord + fix cross-platform send_voice

- Register /voice as Discord slash command with mode choices
- Fix _send_voice_reply to handle adapters that don't accept metadata
  parameter (Discord) by inspecting the method signature at runtime

											
										
										
											2026-03-10 23:37:02 +03:00
+								                send_kwargs: Dict[str, Any] = {
 								                    "chat_id": event.source.chat_id,
-												fix: Discord voice bubble + edge-tts mp3/ogg format mismatch

- Send Discord voice messages with flags=8192 and waveform metadata
  so they render as native voice bubbles instead of file attachments
- Use .mp3 output path for TTS so edge-tts opus conversion works
  correctly (edge always outputs mp3, convert was skipped for .ogg)
- Use actual file_path from TTS result after potential opus conversion

											
										
										
											2026-03-11 00:24:29 +03:00
+								                    "audio_path": actual_path,
-												feat: add /voice slash command to Discord + fix cross-platform send_voice

- Register /voice as Discord slash command with mode choices
- Fix _send_voice_reply to handle adapters that don't accept metadata
  parameter (Discord) by inspecting the method signature at runtime

											
										
										
											2026-03-10 23:37:02 +03:00
+								                    "reply_to": event.message_id,
 								                }
 								                if event.source.thread_id:
 								                    send_kwargs["metadata"] = {"thread_id": event.source.thread_id}
 								                await adapter.send_voice(**send_kwargs)
-												fix: 8 voice pipeline bugs with tests proving each fix

1. VoiceReceiver.stop() now acquires _lock before clearing shared state
   to prevent race with _on_packet on the socket reader thread
2. _packet_debug_count moved from class-level to instance-level to avoid
   cross-instance race condition in multi-guild setups
3. play_in_voice_channel uses asyncio.get_running_loop() instead of
   deprecated asyncio.get_event_loop()
4. _send_voice_reply uses uuid for filenames instead of time-based names
   that can collide when two replies happen in the same second
5. Voice timeout now notifies runner via _on_voice_disconnect callback
   so runner cleans up _voice_mode state (prevents orphaned TTS replies)
6. play_in_voice_channel adds PLAYBACK_TIMEOUT (120s) to prevent
   infinite blocking when FFmpeg callback is never called
7. _send_voice_reply moves temp file cleanup to finally block so files
   are always cleaned up even when send_voice/play raises
8. Base adapter auto-TTS wraps play_tts in try/finally with os.remove
   to clean up generated audio files after playback

18 new tests (120 total voice tests)

											
										
										
											2026-03-11 23:57:42 +03:00
+								        except Exception as e:
 								            logger.warning("Auto voice reply failed: %s", e, exc_info=True)
 								        finally:
 								            for p in {audio_path, actual_path} - {None}:
-												fix: Discord voice bubble + edge-tts mp3/ogg format mismatch

- Send Discord voice messages with flags=8192 and waveform metadata
  so they render as native voice bubbles instead of file attachments
- Use .mp3 output path for TTS so edge-tts opus conversion works
  correctly (edge always outputs mp3, convert was skipped for .ogg)
- Use actual file_path from TTS result after potential opus conversion

											
										
										
											2026-03-11 00:24:29 +03:00
+								                try:
 								                    os.unlink(p)
 								                except OSError:
 								                    pass
-												feat: add /voice command for auto voice reply in Telegram gateway

- /voice on: reply with voice when user sends voice messages
- /voice tts: reply with voice to all messages
- /voice off: disable, text-only replies
- /voice status: show current mode
- Per-chat state persisted to gateway_voice_mode.json
- Dedup: skips auto-reply if agent already called text_to_speech tool
- drop_pending_updates=True to ignore stale Telegram messages on restart
- 25 tests covering command handler, reply logic, and edge cases

											
										
										
											2026-03-10 22:55:36 +03:00
-												fix(gateway): deliver MEDIA: files after streaming responses

When streaming is enabled, text chunks are sent to the user in
real-time including raw MEDIA: tags. The normal post-processing in
_process_message_background is skipped when already_sent=True, so
MEDIA: files were never extracted or delivered — the user just saw
the raw MEDIA:/path/to/file text.

Fix: after streaming completes, extract MEDIA: tags and local file
paths from the response and deliver them via the platform adapter.
The text is already sent (with the raw tag visible in the stream),
but the actual files now get delivered as attachments.

											
										
										
											2026-03-21 16:01:25 -07:00
+								    async def _deliver_media_from_response(
 								        self,
 								        response: str,
 								        event: MessageEvent,
 								        adapter,
 								    ) -> None:
 								        """Extract MEDIA: tags and local file paths from a response and deliver them.
 								        Called after streaming has already sent the text to the user, so the
 								        text itself is already delivered — this only handles file attachments
 								        that the normal _process_message_background path would have caught.
 								        """
 								        from pathlib import Path
 								        try:
 								            media_files, _ = adapter.extract_media(response)
 								            _, cleaned = adapter.extract_images(response)
 								            local_files, _ = adapter.extract_local_files(cleaned)
 								            _thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None
 								            _AUDIO_EXTS = {'.ogg', '.opus', '.mp3', '.wav', '.m4a'}
 								            _VIDEO_EXTS = {'.mp4', '.mov', '.avi', '.mkv', '.webm', '.3gp'}
 								            _IMAGE_EXTS = {'.jpg', '.jpeg', '.png', '.webp', '.gif'}
 								            for media_path, is_voice in media_files:
 								                try:
 								                    ext = Path(media_path).suffix.lower()
 								                    if ext in _AUDIO_EXTS:
 								                        await adapter.send_voice(
 								                            chat_id=event.source.chat_id,
 								                            audio_path=media_path,
 								                            metadata=_thread_meta,
 								                        )
 								                    elif ext in _VIDEO_EXTS:
 								                        await adapter.send_video(
 								                            chat_id=event.source.chat_id,
 								                            video_path=media_path,
 								                            metadata=_thread_meta,
 								                        )
 								                    elif ext in _IMAGE_EXTS:
 								                        await adapter.send_image_file(
 								                            chat_id=event.source.chat_id,
 								                            image_path=media_path,
 								                            metadata=_thread_meta,
 								                        )
 								                    else:
 								                        await adapter.send_document(
 								                            chat_id=event.source.chat_id,
 								                            file_path=media_path,
 								                            metadata=_thread_meta,
 								                        )
 								                except Exception as e:
 								                    logger.warning("[%s] Post-stream media delivery failed: %s", adapter.name, e)
 								            for file_path in local_files:
 								                try:
 								                    ext = Path(file_path).suffix.lower()
 								                    if ext in _IMAGE_EXTS:
 								                        await adapter.send_image_file(
 								                            chat_id=event.source.chat_id,
 								                            image_path=file_path,
 								                            metadata=_thread_meta,
 								                        )
 								                    else:
 								                        await adapter.send_document(
 								                            chat_id=event.source.chat_id,
 								                            file_path=file_path,
 								                            metadata=_thread_meta,
 								                        )
 								                except Exception as e:
 								                    logger.warning("[%s] Post-stream file delivery failed: %s", adapter.name, e)
 								        except Exception as e:
 								            logger.warning("Post-stream media extraction failed: %s", e)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								    async def _handle_rollback_command(self, event: MessageEvent) -> str:
 								        """Handle /rollback command — list or restore filesystem checkpoints."""
 								        from tools.checkpoint_manager import CheckpointManager, format_checkpoint_list
 								        # Read checkpoint config from config.yaml
 								        cp_cfg = {}
 								        try:
 								            import yaml as _y
 								            _cfg_path = _hermes_home / "config.yaml"
 								            if _cfg_path.exists():
 								                with open(_cfg_path, encoding="utf-8") as _f:
 								                    _data = _y.safe_load(_f) or {}
 								                cp_cfg = _data.get("checkpoints", {})
 								                if isinstance(cp_cfg, bool):
 								                    cp_cfg = {"enabled": cp_cfg}
 								        except Exception:
 								            pass
 								        if not cp_cfg.get("enabled", False):
 								            return (
 								                "Checkpoints are not enabled.\n"
 								                "Enable in config.yaml:\n```\ncheckpoints:\n  enabled: true\n```"
 								            )
 								        mgr = CheckpointManager(
 								            enabled=True,
 								            max_snapshots=cp_cfg.get("max_snapshots", 50),
 								        )
-												fix: enforce config.yaml as sole CWD source + deprecate .env CWD vars + add hermes memory reset (#11029)

config.yaml terminal.cwd is now the single source of truth for working
directory. MESSAGING_CWD and TERMINAL_CWD in .env are deprecated with a
migration warning.

Changes:

1. config.py: Remove MESSAGING_CWD from OPTIONAL_ENV_VARS (setup wizard
   no longer prompts for it). Add warn_deprecated_cwd_env_vars() that
   prints a migration hint when deprecated env vars are detected.

2. gateway/run.py: Replace all MESSAGING_CWD reads with TERMINAL_CWD
   (which is bridged from config.yaml terminal.cwd). MESSAGING_CWD is
   still accepted as a backward-compat fallback with deprecation warning.
   Config bridge skips cwd placeholder values so they don't clobber
   the resolved TERMINAL_CWD.

3. cli.py: Guard against lazy-import clobbering — when cli.py is
   imported lazily during gateway runtime (via delegate_tool), don't
   let load_cli_config() overwrite an already-resolved TERMINAL_CWD
   with os.getcwd() of the service's working directory. (#10817)

4. hermes_cli/main.py: Add 'hermes memory reset' command with
   --target all/memory/user and --yes flags. Profile-scoped via
   HERMES_HOME.

Migration path for users with .env settings:
  Remove MESSAGING_CWD / TERMINAL_CWD from .env
  Add to config.yaml:
    terminal:
      cwd: /your/project/path

Addresses: #10225, #4672, #10817, #7663
											
										
										
											2026-04-16 06:48:33 -07:00
+								        cwd = os.getenv("TERMINAL_CWD", str(Path.home()))
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        arg = event.get_command_args().strip()
 								        if not arg:
 								            checkpoints = mgr.list_checkpoints(cwd)
 								            return format_checkpoint_list(checkpoints, cwd)
 								        # Restore by number or hash
 								        checkpoints = mgr.list_checkpoints(cwd)
 								        if not checkpoints:
 								            return f"No checkpoints found for {cwd}"
 								        target_hash = None
 								        try:
 								            idx = int(arg) - 1
 								            if 0 <= idx < len(checkpoints):
 								                target_hash = checkpoints[idx]["hash"]
 								            else:
 								                return f"Invalid checkpoint number. Use 1-{len(checkpoints)}."
 								        except ValueError:
 								            target_hash = arg
 								        result = mgr.restore(cwd, target_hash)
 								        if result["success"]:
 								            return (
 								                f"✅ Restored to checkpoint {result['restored_to']}: {result['reason']}\n"
 								                f"A pre-rollback snapshot was saved automatically."
 								            )
 								        return f"❌ {result['error']}"
 								    async def _handle_background_command(self, event: MessageEvent) -> str:
 								        """Handle /background <prompt> — run a prompt in a separate background session.
 								        Spawns a new AIAgent in a background thread with its own session.
 								        When it completes, sends the result back to the same chat without
 								        modifying the active session's conversation history.
 								        """
 								        prompt = event.get_command_args().strip()
 								        if not prompt:
 								            return (
 								                "Usage: /background <prompt>\n"
 								                "Example: /background Summarize the top HN stories today\n\n"
 								                "Runs the prompt in a separate session. "
 								                "You can keep chatting — the result will appear here when done."
 								            )
 								        source = event.source
 								        task_id = f"bg_{datetime.now().strftime('%H%M%S')}_{os.urandom(3).hex()}"
 								        # Fire-and-forget the background task
-												fix(gateway): track background task references in GatewayRunner (#3254)

Asyncio tasks created with create_task() but never stored can be
garbage collected mid-execution. Add self._background_tasks set to
hold references, with add_done_callback cleanup. Tracks:
- /background command task
- session-reset memory flush task
- session-resume memory flush task
Cancel all pending tasks in stop().

Update test fixtures that construct GatewayRunner via object.__new__()
to include the new _background_tasks attribute.

Cherry-picked from PR #3167 by memosr. The original PR also deleted
the DM topic auto-skill loading code — that deletion was excluded
from this salvage as it removes a shipped feature (#2598).

Co-authored-by: memosr.eth <96793918+memosr@users.noreply.github.com>
											
										
										
											2026-03-26 14:33:48 -07:00
+								        _task = asyncio.create_task(
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            self._run_background_task(prompt, source, task_id)
 								        )
-												fix(gateway): track background task references in GatewayRunner (#3254)

Asyncio tasks created with create_task() but never stored can be
garbage collected mid-execution. Add self._background_tasks set to
hold references, with add_done_callback cleanup. Tracks:
- /background command task
- session-reset memory flush task
- session-resume memory flush task
Cancel all pending tasks in stop().

Update test fixtures that construct GatewayRunner via object.__new__()
to include the new _background_tasks attribute.

Cherry-picked from PR #3167 by memosr. The original PR also deleted
the DM topic auto-skill loading code — that deletion was excluded
from this salvage as it removes a shipped feature (#2598).

Co-authored-by: memosr.eth <96793918+memosr@users.noreply.github.com>
											
										
										
											2026-03-26 14:33:48 -07:00
+								        self._background_tasks.add(_task)
 								        _task.add_done_callback(self._background_tasks.discard)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
 								        preview = prompt[:60] + ("..." if len(prompt) > 60 else "")
 								        return f'🔄 Background task started: "{preview}"\nTask ID: {task_id}\nYou can keep chatting — results will appear when done.'
 								    async def _run_background_task(
 								        self, prompt: str, source: "SessionSource", task_id: str
 								    ) -> None:
 								        """Execute a background agent task and deliver the result to the chat."""
 								        from run_agent import AIAgent
 								        adapter = self.adapters.get(source.platform)
 								        if not adapter:
 								            logger.warning("No adapter for platform %s in background task %s", source.platform, task_id)
 								            return
 								        _thread_metadata = {"thread_id": source.thread_id} if source.thread_id else None
 								        try:
-												fix: honor session-scoped gateway model overrides

											
										
										
											2026-04-11 04:24:45 -05:00
+								            user_config = _load_gateway_config()
 								            model, runtime_kwargs = self._resolve_session_agent_runtime(
 								                source=source,
 								                user_config=user_config,
 								            )
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            if not runtime_kwargs.get("api_key"):
 								                await adapter.send(
 								                    source.chat_id,
 								                    f"❌ Background task {task_id} failed: no provider credentials configured.",
 								                    metadata=_thread_metadata,
 								                )
 								                return
-												fix: MCP toolset resolution for runtime and config (#3252)

Gateway sessions had their own inline toolset resolution that only read
platform_toolsets from config, which never includes MCP server names.
MCP tools were discovered and registered but invisible to the model.

- Replace duplicated gateway toolset resolution in _run_agent() and
  _run_background_task() with calls to the shared _get_platform_tools()
- Extend _get_platform_tools() to include globally enabled MCP servers
  at runtime (include_default_mcp_servers=True), while config-editing
  flows use include_default_mcp_servers=False to avoid persisting
  implicit MCP defaults into platform_toolsets
- Add homeassistant to PLATFORMS dict (was missing, caused KeyError)
- Fix CLI entry point to use _get_platform_tools() as well, so MCP
  tools are visible in CLI mode too
- Remove redundant platform_key reassignment in _run_background_task

Co-authored-by: kshitijk4poor <kshitijk4poor@users.noreply.github.com>
											
										
										
											2026-03-26 13:39:41 -07:00
+								            platform_key = _platform_config_key(source.platform)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
-												fix: MCP toolset resolution for runtime and config (#3252)

Gateway sessions had their own inline toolset resolution that only read
platform_toolsets from config, which never includes MCP server names.
MCP tools were discovered and registered but invisible to the model.

- Replace duplicated gateway toolset resolution in _run_agent() and
  _run_background_task() with calls to the shared _get_platform_tools()
- Extend _get_platform_tools() to include globally enabled MCP servers
  at runtime (include_default_mcp_servers=True), while config-editing
  flows use include_default_mcp_servers=False to avoid persisting
  implicit MCP defaults into platform_toolsets
- Add homeassistant to PLATFORMS dict (was missing, caused KeyError)
- Fix CLI entry point to use _get_platform_tools() as well, so MCP
  tools are visible in CLI mode too
- Remove redundant platform_key reassignment in _run_background_task

Co-authored-by: kshitijk4poor <kshitijk4poor@users.noreply.github.com>
											
										
										
											2026-03-26 13:39:41 -07:00
+								            from hermes_cli.tools_config import _get_platform_tools
 								            enabled_toolsets = sorted(_get_platform_tools(user_config, platform_key))
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
 								            pr = self._provider_routing
 								            max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "90"))
-												feat(gateway): add reasoning hot reload

Add a /reasoning command across gateway adapters so users can
inspect or change reasoning effort without editing config by hand.

Reload reasoning settings from config.yaml before each agent run,
including background tasks, so the next message picks up the new
value consistently.

											
										
										
											2026-03-11 22:12:11 +08:00
+								            reasoning_config = self._load_reasoning_config()
 								            self._reasoning_config = reasoning_config
-												feat(gateway): add fast mode support to gateway chats

											
										
										
											2026-04-10 08:32:56 +01:00
+								            self._service_tier = self._load_service_tier()
-												fix: hermes update causes dual gateways on macOS (launchd) (#1567)

* feat: add optional smart model routing

Add a conservative cheap-vs-strong routing option that can send very short/simple turns to a cheaper model across providers while keeping the primary model for complex work. Wire it through CLI, gateway, and cron, and document the config.yaml workflow.

* fix(gateway): remove recursive ExecStop from systemd units, extend TimeoutStopSec to 60s

* fix(gateway): avoid recursive ExecStop in user systemd unit

* fix: extend ExecStop removal and TimeoutStopSec=60 to system unit

The cherry-picked PR #1448 fix only covered the user systemd unit.
The system unit had the same TimeoutStopSec=15 and could benefit
from the same 60s timeout for clean shutdown. Also adds a regression
test for the system unit.

---------

Co-authored-by: Ninja <ninja@local>

* feat(skills): add blender-mcp optional skill for 3D modeling

Control a running Blender instance from Hermes via socket connection
to the blender-mcp addon (port 9876). Supports creating 3D objects,
materials, animations, and running arbitrary bpy code.

Placed in optional-skills/ since it requires Blender 4.3+ desktop
with a third-party addon manually started each session.

* feat(acp): support slash commands in ACP adapter (#1532)

Adds /help, /model, /tools, /context, /reset, /compact, /version
to the ACP adapter (VS Code, Zed, JetBrains). Commands are handled
directly in the server without instantiating the TUI — each command
queries agent/session state and returns plain text.

Unrecognized /commands fall through to the LLM as normal messages.

/model uses detect_provider_for_model() for auto-detection when
switching models, matching the CLI and gateway behavior.

Fixes #1402

* fix(logging): improve error logging in session search tool (#1533)

* fix(gateway): restart on retryable startup failures (#1517)

* feat(email): add skip_attachments option via config.yaml

* feat(email): add skip_attachments option via config.yaml

Adds a config.yaml-driven option to skip email attachments in the
gateway email adapter. Useful for malware protection and bandwidth
savings.

Configure in config.yaml:
  platforms:
    email:
      skip_attachments: true

Based on PR #1521 by @an420eth, changed from env var to config.yaml
(via PlatformConfig.extra) to match the project's config-first pattern.

* docs: document skip_attachments option for email adapter

* fix(telegram): retry on transient TLS failures during connect and send

Add exponential-backoff retry (3 attempts) around initialize() to
handle transient TLS resets during gateway startup. Also catches
TimedOut and OSError in addition to NetworkError.

Add exponential-backoff retry (3 attempts) around send_message() for
NetworkError during message delivery, wrapping the existing Markdown
fallback logic.

Both imports are guarded with try/except ImportError for test
environments where telegram is mocked.

Based on PR #1527 by cmd8. Closes #1526.

* feat: permissive block_anchor thresholds and unicode normalization (#1539)

Salvaged from PR #1528 by an420eth. Closes #517.

Improves _strategy_block_anchor in fuzzy_match.py:
- Add unicode normalization (smart quotes, em/en-dashes, ellipsis,
  non-breaking spaces → ASCII) so LLM-produced unicode artifacts
  don't break anchor line matching
- Lower thresholds: 0.10 for unique matches (was 0.70), 0.30 for
  multiple candidates — if first/last lines match exactly, the
  block is almost certainly correct
- Use original (non-normalized) content for offset calculation to
  preserve correct character positions

Tested: 3 new scenarios fixed (em-dash anchors, non-breaking space
anchors, very-low-similarity unique matches), zero regressions on
all 9 existing fuzzy match tests.

Co-authored-by: an420eth <an420eth@users.noreply.github.com>

* feat(cli): add file path autocomplete in the input prompt (#1545)

When typing a path-like token (./  ../  ~/  /  or containing /),
the CLI now shows filesystem completions in the dropdown menu.
Directories show a trailing slash and 'dir' label; files show
their size. Completions are case-insensitive and capped at 30
entries.

Triggered by tokens like:
  edit ./src/ma     → shows ./src/main.py, ./src/manifest.json, ...
  check ~/doc       → shows ~/docs/, ~/documents/, ...
  read /etc/hos     → shows /etc/hosts, /etc/hostname, ...
  open tools/reg    → shows tools/registry.py

Slash command autocomplete (/help, /model, etc.) is unaffected —
it still triggers when the input starts with /.

Inspired by OpenCode PR #145 (file path completion menu).

Implementation:
- hermes_cli/commands.py: _extract_path_word() detects path-like
  tokens, _path_completions() yields filesystem Completions with
  size labels, get_completions() routes to paths vs slash commands
- tests/hermes_cli/test_path_completion.py: 26 tests covering
  path extraction, prefix filtering, directory markers, home
  expansion, case-insensitivity, integration with slash commands

* feat(privacy): redact PII from LLM context when privacy.redact_pii is enabled

Add privacy.redact_pii config option (boolean, default false). When
enabled, the gateway redacts personally identifiable information from
the system prompt before sending it to the LLM provider:

- Phone numbers (user IDs on WhatsApp/Signal) → hashed to user_<sha256>
- User IDs → hashed to user_<sha256>
- Chat IDs → numeric portion hashed, platform prefix preserved
- Home channel IDs → hashed
- Names/usernames → NOT affected (user-chosen, publicly visible)

Hashes are deterministic (same user → same hash) so the model can
still distinguish users in group chats. Routing and delivery use
the original values internally — redaction only affects LLM context.

Inspired by OpenClaw PR #47959.

* fix(privacy): skip PII redaction on Discord/Slack (mentions need real IDs)

Discord uses <@user_id> for mentions and Slack uses <@U12345> — the LLM
needs the real ID to tag users. Redaction now only applies to WhatsApp,
Signal, and Telegram where IDs are pure routing metadata.

Add 4 platform-specific tests covering Discord, WhatsApp, Signal, Slack.

* feat: smart approvals + /stop command (inspired by OpenAI Codex)

* feat: smart approvals — LLM-based risk assessment for dangerous commands

Adds a 'smart' approval mode that uses the auxiliary LLM to assess
whether a flagged command is genuinely dangerous or a false positive,
auto-approving low-risk commands without prompting the user.

Inspired by OpenAI Codex's Smart Approvals guardian subagent
(openai/codex#13860).

Config (config.yaml):
  approvals:
    mode: manual   # manual (default), smart, off

Modes:
- manual — current behavior, always prompt the user
- smart  — aux LLM evaluates risk: APPROVE (auto-allow), DENY (block),
           or ESCALATE (fall through to manual prompt)
- off    — skip all approval prompts (equivalent to --yolo)

When smart mode auto-approves, the pattern gets session-level approval
so subsequent uses of the same pattern don't trigger another LLM call.
When it denies, the command is blocked without user prompt. When
uncertain, it escalates to the normal manual approval flow.

The LLM prompt is carefully scoped: it sees only the command text and
the flagged reason, assesses actual risk vs false positive, and returns
a single-word verdict.

* feat: make smart approval model configurable via config.yaml

Adds auxiliary.approval section to config.yaml with the same
provider/model/base_url/api_key pattern as other aux tasks (vision,
web_extract, compression, etc.).

Config:
  auxiliary:
    approval:
      provider: auto
      model: ''        # fast/cheap model recommended
      base_url: ''
      api_key: ''

Bridged to env vars in both CLI and gateway paths so the aux client
picks them up automatically.

* feat: add /stop command to kill all background processes

Adds a /stop slash command that kills all running background processes
at once. Currently users have to process(list) then process(kill) for
each one individually.

Inspired by OpenAI Codex's separation of interrupt (Ctrl+C stops current
turn) from /stop (cleans up background processes). See openai/codex#14602.

Ctrl+C continues to only interrupt the active agent turn — background
dev servers, watchers, etc. are preserved. /stop is the explicit way
to clean them all up.

* feat: first-class plugin architecture + hide status bar cost by default (#1544)

The persistent status bar now shows context %, token counts, and
duration but NOT $ cost by default. Cost display is opt-in via:

  display:
    show_cost: true

in config.yaml, or: hermes config set display.show_cost true

The /usage command still shows full cost breakdown since the user
explicitly asked for it — this only affects the always-visible bar.

Status bar without cost:
  ⚕ claude-sonnet-4 │ 12K/200K │ 6% │ 15m

Status bar with show_cost: true:
  ⚕ claude-sonnet-4 │ 12K/200K │ 6% │ $0.06 │ 15m

* feat: improve memory prioritization + aggressive skill updates (inspired by OpenAI Codex)

* feat: improve memory prioritization — user preferences over procedural knowledge

Inspired by OpenAI Codex's memory prompt improvements (openai/codex#14493)
which focus memory writes on user preferences and recurring patterns
rather than procedural task details.

Key insight: 'Optimize for reducing future user steering — the most
valuable memory prevents the user from having to repeat themselves.'

Changes:
- MEMORY_GUIDANCE (prompt_builder.py): added prioritization hierarchy
  and the core principle about reducing user steering
- MEMORY_SCHEMA (memory_tool.py): reordered WHEN TO SAVE list to put
  corrections first, added explicit PRIORITY guidance
- Memory nudge (run_agent.py): now asks specifically about preferences,
  corrections, and workflow patterns instead of generic 'anything'
- Memory flush (run_agent.py): now instructs to prioritize user
  preferences and corrections over task-specific details

* feat: more aggressive skill creation and update prompting

Press harder on skill updates — the agent should proactively patch
skills when it encounters issues during use, not wait to be asked.

Changes:
- SKILLS_GUIDANCE: 'consider saving' → 'save'; added explicit instruction
  to patch skills immediately when found outdated/wrong
- Skills header: added instruction to update loaded skills before finishing
  if they had missing steps or wrong commands
- Skill nudge: more assertive ('save the approach' not 'consider saving'),
  now also prompts for updating existing skills used in the task
- Skill nudge interval: lowered default from 15 to 10 iterations
- skill_manage schema: added 'patch it immediately' to update triggers

* feat: first-class plugin architecture (#1555)

Plugin system for extending Hermes with custom tools, hooks, and
integrations — no source code changes required.

Core system (hermes_cli/plugins.py):
  - Plugin discovery from ~/.hermes/plugins/, .hermes/plugins/, and
    pip entry_points (hermes_agent.plugins group)
  - PluginContext with register_tool() and register_hook()
  - 6 lifecycle hooks: pre/post tool_call, pre/post llm_call,
    on_session_start/end
  - Namespace package handling for relative imports in plugins
  - Graceful error isolation — broken plugins never crash the agent

Integration (model_tools.py):
  - Plugin discovery runs after built-in + MCP tools
  - Plugin tools bypass toolset filter via get_plugin_tool_names()
  - Pre/post tool call hooks fire in handle_function_call()

CLI:
  - /plugins command shows loaded plugins, tool counts, status
  - Added to COMMANDS dict for autocomplete

Docs:
  - Getting started guide (build-a-hermes-plugin.md) — full tutorial
    building a calculator plugin step by step
  - Reference page (features/plugins.md) — quick overview + tables
  - Covers: file structure, schemas, handlers, hooks, data files,
    bundled skills, env var gating, pip distribution, common mistakes

Tests: 16 tests covering discovery, loading, hooks, tool visibility.

* fix: hermes update causes dual gateways on macOS (launchd)

Three bugs worked together to create the dual-gateway problem:

1. cmd_update only checked systemd for gateway restart, completely
   ignoring launchd on macOS. After killing the PID it would print
   'Restart it with: hermes gateway run' even when launchd was about
   to auto-respawn the process.

2. launchd's KeepAlive.SuccessfulExit=false respawns the gateway
   after SIGTERM (non-zero exit), so the user's manual restart
   created a second instance.

3. The launchd plist lacked --replace (systemd had it), so the
   respawned gateway didn't kill stale instances on startup.

Fixes:
- Add --replace to launchd ProgramArguments (matches systemd)
- Add launchd detection to cmd_update's auto-restart logic
- Print 'auto-restart via launchd' instead of manual restart hint

* fix: add launchd plist auto-refresh + explicit restart in cmd_update

Two integration issues with the initial fix:

1. Existing macOS users with old plist (no --replace) would never
   get the fix until manual uninstall/reinstall. Added
   refresh_launchd_plist_if_needed() — mirrors the existing
   refresh_systemd_unit_if_needed(). Called from launchd_start(),
   launchd_restart(), and cmd_update.

2. cmd_update relied on KeepAlive respawn after SIGTERM rather than
   explicit launchctl stop/start. This caused races: launchd would
   respawn the old process before the PID file was cleaned up.
   Now does explicit stop+start (matching how systemd gets an
   explicit systemctl restart), with plist refresh first so the
   new --replace flag is picked up.

---------

Co-authored-by: Ninja <ninja@local>
Co-authored-by: alireza78a <alireza78a@users.noreply.github.com>
Co-authored-by: Oktay Aydin <113846926+aydnOktay@users.noreply.github.com>
Co-authored-by: JP Lew <polydegen@protonmail.com>
Co-authored-by: an420eth <an420eth@users.noreply.github.com>
											
										
										
											2026-03-16 12:36:29 -07:00
+								            turn_route = self._resolve_turn_agent_config(prompt, model, runtime_kwargs)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
 								            def run_sync():
 								                agent = AIAgent(
-												fix: hermes update causes dual gateways on macOS (launchd) (#1567)

* feat: add optional smart model routing

Add a conservative cheap-vs-strong routing option that can send very short/simple turns to a cheaper model across providers while keeping the primary model for complex work. Wire it through CLI, gateway, and cron, and document the config.yaml workflow.

* fix(gateway): remove recursive ExecStop from systemd units, extend TimeoutStopSec to 60s

* fix(gateway): avoid recursive ExecStop in user systemd unit

* fix: extend ExecStop removal and TimeoutStopSec=60 to system unit

The cherry-picked PR #1448 fix only covered the user systemd unit.
The system unit had the same TimeoutStopSec=15 and could benefit
from the same 60s timeout for clean shutdown. Also adds a regression
test for the system unit.

---------

Co-authored-by: Ninja <ninja@local>

* feat(skills): add blender-mcp optional skill for 3D modeling

Control a running Blender instance from Hermes via socket connection
to the blender-mcp addon (port 9876). Supports creating 3D objects,
materials, animations, and running arbitrary bpy code.

Placed in optional-skills/ since it requires Blender 4.3+ desktop
with a third-party addon manually started each session.

* feat(acp): support slash commands in ACP adapter (#1532)

Adds /help, /model, /tools, /context, /reset, /compact, /version
to the ACP adapter (VS Code, Zed, JetBrains). Commands are handled
directly in the server without instantiating the TUI — each command
queries agent/session state and returns plain text.

Unrecognized /commands fall through to the LLM as normal messages.

/model uses detect_provider_for_model() for auto-detection when
switching models, matching the CLI and gateway behavior.

Fixes #1402

* fix(logging): improve error logging in session search tool (#1533)

* fix(gateway): restart on retryable startup failures (#1517)

* feat(email): add skip_attachments option via config.yaml

* feat(email): add skip_attachments option via config.yaml

Adds a config.yaml-driven option to skip email attachments in the
gateway email adapter. Useful for malware protection and bandwidth
savings.

Configure in config.yaml:
  platforms:
    email:
      skip_attachments: true

Based on PR #1521 by @an420eth, changed from env var to config.yaml
(via PlatformConfig.extra) to match the project's config-first pattern.

* docs: document skip_attachments option for email adapter

* fix(telegram): retry on transient TLS failures during connect and send

Add exponential-backoff retry (3 attempts) around initialize() to
handle transient TLS resets during gateway startup. Also catches
TimedOut and OSError in addition to NetworkError.

Add exponential-backoff retry (3 attempts) around send_message() for
NetworkError during message delivery, wrapping the existing Markdown
fallback logic.

Both imports are guarded with try/except ImportError for test
environments where telegram is mocked.

Based on PR #1527 by cmd8. Closes #1526.

* feat: permissive block_anchor thresholds and unicode normalization (#1539)

Salvaged from PR #1528 by an420eth. Closes #517.

Improves _strategy_block_anchor in fuzzy_match.py:
- Add unicode normalization (smart quotes, em/en-dashes, ellipsis,
  non-breaking spaces → ASCII) so LLM-produced unicode artifacts
  don't break anchor line matching
- Lower thresholds: 0.10 for unique matches (was 0.70), 0.30 for
  multiple candidates — if first/last lines match exactly, the
  block is almost certainly correct
- Use original (non-normalized) content for offset calculation to
  preserve correct character positions

Tested: 3 new scenarios fixed (em-dash anchors, non-breaking space
anchors, very-low-similarity unique matches), zero regressions on
all 9 existing fuzzy match tests.

Co-authored-by: an420eth <an420eth@users.noreply.github.com>

* feat(cli): add file path autocomplete in the input prompt (#1545)

When typing a path-like token (./  ../  ~/  /  or containing /),
the CLI now shows filesystem completions in the dropdown menu.
Directories show a trailing slash and 'dir' label; files show
their size. Completions are case-insensitive and capped at 30
entries.

Triggered by tokens like:
  edit ./src/ma     → shows ./src/main.py, ./src/manifest.json, ...
  check ~/doc       → shows ~/docs/, ~/documents/, ...
  read /etc/hos     → shows /etc/hosts, /etc/hostname, ...
  open tools/reg    → shows tools/registry.py

Slash command autocomplete (/help, /model, etc.) is unaffected —
it still triggers when the input starts with /.

Inspired by OpenCode PR #145 (file path completion menu).

Implementation:
- hermes_cli/commands.py: _extract_path_word() detects path-like
  tokens, _path_completions() yields filesystem Completions with
  size labels, get_completions() routes to paths vs slash commands
- tests/hermes_cli/test_path_completion.py: 26 tests covering
  path extraction, prefix filtering, directory markers, home
  expansion, case-insensitivity, integration with slash commands

* feat(privacy): redact PII from LLM context when privacy.redact_pii is enabled

Add privacy.redact_pii config option (boolean, default false). When
enabled, the gateway redacts personally identifiable information from
the system prompt before sending it to the LLM provider:

- Phone numbers (user IDs on WhatsApp/Signal) → hashed to user_<sha256>
- User IDs → hashed to user_<sha256>
- Chat IDs → numeric portion hashed, platform prefix preserved
- Home channel IDs → hashed
- Names/usernames → NOT affected (user-chosen, publicly visible)

Hashes are deterministic (same user → same hash) so the model can
still distinguish users in group chats. Routing and delivery use
the original values internally — redaction only affects LLM context.

Inspired by OpenClaw PR #47959.

* fix(privacy): skip PII redaction on Discord/Slack (mentions need real IDs)

Discord uses <@user_id> for mentions and Slack uses <@U12345> — the LLM
needs the real ID to tag users. Redaction now only applies to WhatsApp,
Signal, and Telegram where IDs are pure routing metadata.

Add 4 platform-specific tests covering Discord, WhatsApp, Signal, Slack.

* feat: smart approvals + /stop command (inspired by OpenAI Codex)

* feat: smart approvals — LLM-based risk assessment for dangerous commands

Adds a 'smart' approval mode that uses the auxiliary LLM to assess
whether a flagged command is genuinely dangerous or a false positive,
auto-approving low-risk commands without prompting the user.

Inspired by OpenAI Codex's Smart Approvals guardian subagent
(openai/codex#13860).

Config (config.yaml):
  approvals:
    mode: manual   # manual (default), smart, off

Modes:
- manual — current behavior, always prompt the user
- smart  — aux LLM evaluates risk: APPROVE (auto-allow), DENY (block),
           or ESCALATE (fall through to manual prompt)
- off    — skip all approval prompts (equivalent to --yolo)

When smart mode auto-approves, the pattern gets session-level approval
so subsequent uses of the same pattern don't trigger another LLM call.
When it denies, the command is blocked without user prompt. When
uncertain, it escalates to the normal manual approval flow.

The LLM prompt is carefully scoped: it sees only the command text and
the flagged reason, assesses actual risk vs false positive, and returns
a single-word verdict.

* feat: make smart approval model configurable via config.yaml

Adds auxiliary.approval section to config.yaml with the same
provider/model/base_url/api_key pattern as other aux tasks (vision,
web_extract, compression, etc.).

Config:
  auxiliary:
    approval:
      provider: auto
      model: ''        # fast/cheap model recommended
      base_url: ''
      api_key: ''

Bridged to env vars in both CLI and gateway paths so the aux client
picks them up automatically.

* feat: add /stop command to kill all background processes

Adds a /stop slash command that kills all running background processes
at once. Currently users have to process(list) then process(kill) for
each one individually.

Inspired by OpenAI Codex's separation of interrupt (Ctrl+C stops current
turn) from /stop (cleans up background processes). See openai/codex#14602.

Ctrl+C continues to only interrupt the active agent turn — background
dev servers, watchers, etc. are preserved. /stop is the explicit way
to clean them all up.

* feat: first-class plugin architecture + hide status bar cost by default (#1544)

The persistent status bar now shows context %, token counts, and
duration but NOT $ cost by default. Cost display is opt-in via:

  display:
    show_cost: true

in config.yaml, or: hermes config set display.show_cost true

The /usage command still shows full cost breakdown since the user
explicitly asked for it — this only affects the always-visible bar.

Status bar without cost:
  ⚕ claude-sonnet-4 │ 12K/200K │ 6% │ 15m

Status bar with show_cost: true:
  ⚕ claude-sonnet-4 │ 12K/200K │ 6% │ $0.06 │ 15m

* feat: improve memory prioritization + aggressive skill updates (inspired by OpenAI Codex)

* feat: improve memory prioritization — user preferences over procedural knowledge

Inspired by OpenAI Codex's memory prompt improvements (openai/codex#14493)
which focus memory writes on user preferences and recurring patterns
rather than procedural task details.

Key insight: 'Optimize for reducing future user steering — the most
valuable memory prevents the user from having to repeat themselves.'

Changes:
- MEMORY_GUIDANCE (prompt_builder.py): added prioritization hierarchy
  and the core principle about reducing user steering
- MEMORY_SCHEMA (memory_tool.py): reordered WHEN TO SAVE list to put
  corrections first, added explicit PRIORITY guidance
- Memory nudge (run_agent.py): now asks specifically about preferences,
  corrections, and workflow patterns instead of generic 'anything'
- Memory flush (run_agent.py): now instructs to prioritize user
  preferences and corrections over task-specific details

* feat: more aggressive skill creation and update prompting

Press harder on skill updates — the agent should proactively patch
skills when it encounters issues during use, not wait to be asked.

Changes:
- SKILLS_GUIDANCE: 'consider saving' → 'save'; added explicit instruction
  to patch skills immediately when found outdated/wrong
- Skills header: added instruction to update loaded skills before finishing
  if they had missing steps or wrong commands
- Skill nudge: more assertive ('save the approach' not 'consider saving'),
  now also prompts for updating existing skills used in the task
- Skill nudge interval: lowered default from 15 to 10 iterations
- skill_manage schema: added 'patch it immediately' to update triggers

* feat: first-class plugin architecture (#1555)

Plugin system for extending Hermes with custom tools, hooks, and
integrations — no source code changes required.

Core system (hermes_cli/plugins.py):
  - Plugin discovery from ~/.hermes/plugins/, .hermes/plugins/, and
    pip entry_points (hermes_agent.plugins group)
  - PluginContext with register_tool() and register_hook()
  - 6 lifecycle hooks: pre/post tool_call, pre/post llm_call,
    on_session_start/end
  - Namespace package handling for relative imports in plugins
  - Graceful error isolation — broken plugins never crash the agent

Integration (model_tools.py):
  - Plugin discovery runs after built-in + MCP tools
  - Plugin tools bypass toolset filter via get_plugin_tool_names()
  - Pre/post tool call hooks fire in handle_function_call()

CLI:
  - /plugins command shows loaded plugins, tool counts, status
  - Added to COMMANDS dict for autocomplete

Docs:
  - Getting started guide (build-a-hermes-plugin.md) — full tutorial
    building a calculator plugin step by step
  - Reference page (features/plugins.md) — quick overview + tables
  - Covers: file structure, schemas, handlers, hooks, data files,
    bundled skills, env var gating, pip distribution, common mistakes

Tests: 16 tests covering discovery, loading, hooks, tool visibility.

* fix: hermes update causes dual gateways on macOS (launchd)

Three bugs worked together to create the dual-gateway problem:

1. cmd_update only checked systemd for gateway restart, completely
   ignoring launchd on macOS. After killing the PID it would print
   'Restart it with: hermes gateway run' even when launchd was about
   to auto-respawn the process.

2. launchd's KeepAlive.SuccessfulExit=false respawns the gateway
   after SIGTERM (non-zero exit), so the user's manual restart
   created a second instance.

3. The launchd plist lacked --replace (systemd had it), so the
   respawned gateway didn't kill stale instances on startup.

Fixes:
- Add --replace to launchd ProgramArguments (matches systemd)
- Add launchd detection to cmd_update's auto-restart logic
- Print 'auto-restart via launchd' instead of manual restart hint

* fix: add launchd plist auto-refresh + explicit restart in cmd_update

Two integration issues with the initial fix:

1. Existing macOS users with old plist (no --replace) would never
   get the fix until manual uninstall/reinstall. Added
   refresh_launchd_plist_if_needed() — mirrors the existing
   refresh_systemd_unit_if_needed(). Called from launchd_start(),
   launchd_restart(), and cmd_update.

2. cmd_update relied on KeepAlive respawn after SIGTERM rather than
   explicit launchctl stop/start. This caused races: launchd would
   respawn the old process before the PID file was cleaned up.
   Now does explicit stop+start (matching how systemd gets an
   explicit systemctl restart), with plist refresh first so the
   new --replace flag is picked up.

---------

Co-authored-by: Ninja <ninja@local>
Co-authored-by: alireza78a <alireza78a@users.noreply.github.com>
Co-authored-by: Oktay Aydin <113846926+aydnOktay@users.noreply.github.com>
Co-authored-by: JP Lew <polydegen@protonmail.com>
Co-authored-by: an420eth <an420eth@users.noreply.github.com>
											
										
										
											2026-03-16 12:36:29 -07:00
+								                    model=turn_route["model"],
 								                    **turn_route["runtime"],
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                    max_iterations=max_iterations,
 								                    quiet_mode=True,
 								                    verbose_logging=False,
 								                    enabled_toolsets=enabled_toolsets,
-												feat(gateway): add reasoning hot reload

Add a /reasoning command across gateway adapters so users can
inspect or change reasoning effort without editing config by hand.

Reload reasoning settings from config.yaml before each agent run,
including background tasks, so the next message picks up the new
value consistently.

											
										
										
											2026-03-11 22:12:11 +08:00
+								                    reasoning_config=reasoning_config,
-												feat(gateway): add fast mode support to gateway chats

											
										
										
											2026-04-10 08:32:56 +01:00
+								                    service_tier=self._service_tier,
 								                    request_overrides=turn_route.get("request_overrides"),
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                    providers_allowed=pr.get("only"),
 								                    providers_ignored=pr.get("ignore"),
 								                    providers_order=pr.get("order"),
 								                    provider_sort=pr.get("sort"),
 								                    provider_require_parameters=pr.get("require_parameters", False),
 								                    provider_data_collection=pr.get("data_collection"),
 								                    session_id=task_id,
 								                    platform=platform_key,
-												fix: thread gateway user_id to memory plugins for per-user scoping (#5895)

Memory plugins (Mem0, Honcho) used static identifiers ('hermes-user',
config peerName) meaning all gateway users shared the same memory bucket.

Changes:
- AIAgent.__init__: add user_id parameter, store as self._user_id
- run_agent.py: include user_id in _init_kwargs passed to memory providers
- gateway/run.py: pass source.user_id to AIAgent in primary + background paths
- Mem0 plugin: prefer kwargs user_id over config default
- Honcho plugin: override cfg.peer_name with gateway user_id when present

CLI sessions (user_id=None) preserve existing defaults. Only gateway
sessions with a real platform user_id get per-user memory scoping.

Reported by plev333.
											
										
										
											2026-04-07 11:14:12 -07:00
+								                    user_id=source.user_id,
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                    session_db=self._session_db,
 								                    fallback_model=self._fallback_model,
 								                )
-												fix(gateway): close temporary agents after one-off tasks

Add shared _cleanup_agent_resources() for temporary gateway AIAgent
instances. Apply cleanup to memory flush, background tasks, /btw,
manual /compress, and session-hygiene auto-compression. Prevents
unclosed aiohttp client session leaks.

Cherry-picked from #10899 by @LeonSGP43. Consolidates #10945 by @Lubrsy706.
Fixes #10865.

Co-authored-by: Lubrsy706 <Lubrsy706@users.noreply.github.com>

											
										
										
											2026-04-16 18:41:31 +05:30
+								                try:
 								                    return agent.run_conversation(
 								                        user_message=prompt,
 								                        task_id=task_id,
 								                    )
 								                finally:
 								                    self._cleanup_agent_resources(agent)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
-												fix(gateway): preserve notify context in executor threads

Gateway executor work now inherits the active session contextvars via
copy_context() so background process watchers retain the correct
platform/chat/user/session metadata for routing completion events back
to the originating chat.

Cherry-picked from #10647 by @helix4u with:
- Use asyncio.get_running_loop() instead of deprecated get_event_loop()
- Strip trailing whitespace
- Add *args forwarding test
- Add exception propagation test

											
										
										
											2026-04-16 14:27:54 +05:30
+								            result = await self._run_in_executor_with_context(run_sync)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
 								            response = result.get("final_response", "") if result else ""
 								            if not response and result and result.get("error"):
 								                response = f"Error: {result['error']}"
 								            # Extract media files from the response
 								            if response:
 								                media_files, response = adapter.extract_media(response)
 								                images, text_content = adapter.extract_images(response)
 								                preview = prompt[:60] + ("..." if len(prompt) > 60 else "")
 								                header = f'✅ Background task complete\nPrompt: "{preview}"\n\n'
 								                if text_content:
 								                    await adapter.send(
 								                        chat_id=source.chat_id,
 								                        content=header + text_content,
 								                        metadata=_thread_metadata,
 								                    )
 								                elif not images and not media_files:
 								                    await adapter.send(
 								                        chat_id=source.chat_id,
 								                        content=header + "(No response generated)",
 								                        metadata=_thread_metadata,
 								                    )
 								                # Send extracted images
 								                for image_url, alt_text in (images or []):
 								                    try:
 								                        await adapter.send_image(
 								                            chat_id=source.chat_id,
 								                            image_url=image_url,
 								                            caption=alt_text,
 								                        )
 								                    except Exception:
 								                        pass
 								                # Send media files
 								                for media_path in (media_files or []):
 								                    try:
-												fix: background task media delivery + vision download timeout (#3919)

* feat(telegram): add webhook mode as alternative to polling

When TELEGRAM_WEBHOOK_URL is set, the adapter starts an HTTP webhook
server (via python-telegram-bot's start_webhook()) instead of long
polling. This enables cloud platforms like Fly.io and Railway to
auto-wake suspended machines on inbound HTTP traffic.

Polling remains the default — no behavior change unless the env var
is set.

Env vars:
  TELEGRAM_WEBHOOK_URL    Public HTTPS URL for Telegram to push to
  TELEGRAM_WEBHOOK_PORT   Local listen port (default 8443)
  TELEGRAM_WEBHOOK_SECRET Secret token for update verification

Cherry-picked and adapted from PR #2022 by SHL0MS. Preserved all
current main enhancements (network error recovery, polling conflict
detection, DM topics setup).

Co-authored-by: SHL0MS <SHL0MS@users.noreply.github.com>

* fix: send_document call in background task delivery + vision download timeout

Two fixes salvaged from PR #2269 by amethystani:

1. gateway/run.py: adapter.send_file() → adapter.send_document()
   send_file() doesn't exist on BasePlatformAdapter. Background task
   media files were silently never delivered (AttributeError swallowed
   by except Exception: pass).

2. tools/vision_tools.py: configurable image download timeout via
   HERMES_VISION_DOWNLOAD_TIMEOUT env var (default 30s), plus guard
   against raise None when max_retries=0.

The third fix in #2269 (opencode-go auth config) was already resolved
on main.

Co-authored-by: amethystani <amethystani@users.noreply.github.com>

---------

Co-authored-by: SHL0MS <SHL0MS@users.noreply.github.com>
Co-authored-by: amethystani <amethystani@users.noreply.github.com>
											
										
										
											2026-03-30 02:59:39 -07:00
+								                        await adapter.send_document(
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                            chat_id=source.chat_id,
 								                            file_path=media_path,
 								                        )
 								                    except Exception:
 								                        pass
 								            else:
 								                preview = prompt[:60] + ("..." if len(prompt) > 60 else "")
 								                await adapter.send(
 								                    chat_id=source.chat_id,
 								                    content=f'✅ Background task complete\nPrompt: "{preview}"\n\n(No response generated)',
 								                    metadata=_thread_metadata,
 								                )
 								        except Exception as e:
 								            logger.exception("Background task %s failed", task_id)
 								            try:
 								                await adapter.send(
 								                    chat_id=source.chat_id,
 								                    content=f"❌ Background task {task_id} failed: {e}",
 								                    metadata=_thread_metadata,
 								                )
 								            except Exception:
 								                pass
-												feat: add /btw command for ephemeral side questions (#4161)

Adds /btw <question> — ask a quick follow-up using the current
session context without interrupting the main conversation.

- Snapshots conversation history, answers with a no-tools agent
- Response is not persisted to session history or DB
- Runs in a background thread (CLI) / async task (gateway)
- Per-session guard prevents concurrent /btw in gateway

Implementation:
- model_tools.py: enabled_toolsets=[] now correctly means "no tools"
  (was falsy, fell through to default "all tools")
- run_agent.py: persist_session=False gates _persist_session()
- cli.py: _handle_btw_command (background thread, Rich panel output)
- gateway/run.py: _handle_btw_command + _run_btw_task (async task)
- hermes_cli/commands.py: CommandDef for "btw"

Inspired by PR #3504 by areu01or00, reimplemented cleanly on current
main with the enabled_toolsets=[] fix and without the __btw_no_tools__
hack.
											
										
										
											2026-03-30 21:10:05 -07:00
+								    async def _handle_btw_command(self, event: MessageEvent) -> str:
 								        """Handle /btw <question> — ephemeral side question in the same chat."""
 								        question = event.get_command_args().strip()
 								        if not question:
 								            return (
 								                "Usage: /btw <question>\n"
 								                "Example: /btw what module owns session title sanitization?\n\n"
 								                "Answers using session context. No tools, not persisted."
 								            )
 								        source = event.source
 								        session_key = self._session_key_for_source(source)
 								        # Guard: one /btw at a time per session
 								        existing = getattr(self, "_active_btw_tasks", {}).get(session_key)
 								        if existing and not existing.done():
 								            return "A /btw is already running for this chat. Wait for it to finish."
 								        if not hasattr(self, "_active_btw_tasks"):
 								            self._active_btw_tasks: dict = {}
 								        import uuid as _uuid
 								        task_id = f"btw_{datetime.now().strftime('%H%M%S')}_{_uuid.uuid4().hex[:6]}"
 								        _task = asyncio.create_task(self._run_btw_task(question, source, session_key, task_id))
 								        self._background_tasks.add(_task)
 								        self._active_btw_tasks[session_key] = _task
 								        def _cleanup(task):
 								            self._background_tasks.discard(task)
 								            if self._active_btw_tasks.get(session_key) is task:
 								                self._active_btw_tasks.pop(session_key, None)
 								        _task.add_done_callback(_cleanup)
 								        preview = question[:60] + ("..." if len(question) > 60 else "")
 								        return f'💬 /btw: "{preview}"\nReply will appear here shortly.'
 								    async def _run_btw_task(
 								        self, question: str, source, session_key: str, task_id: str,
 								    ) -> None:
 								        """Execute an ephemeral /btw side question and deliver the answer."""
 								        from run_agent import AIAgent
 								        adapter = self.adapters.get(source.platform)
 								        if not adapter:
 								            logger.warning("No adapter for platform %s in /btw task %s", source.platform, task_id)
 								            return
 								        _thread_meta = {"thread_id": source.thread_id} if source.thread_id else None
 								        try:
-												fix: honor session-scoped gateway model overrides

											
										
										
											2026-04-11 04:24:45 -05:00
+								            user_config = _load_gateway_config()
 								            model, runtime_kwargs = self._resolve_session_agent_runtime(
 								                source=source,
 								                session_key=session_key,
 								                user_config=user_config,
 								            )
-												feat: add /btw command for ephemeral side questions (#4161)

Adds /btw <question> — ask a quick follow-up using the current
session context without interrupting the main conversation.

- Snapshots conversation history, answers with a no-tools agent
- Response is not persisted to session history or DB
- Runs in a background thread (CLI) / async task (gateway)
- Per-session guard prevents concurrent /btw in gateway

Implementation:
- model_tools.py: enabled_toolsets=[] now correctly means "no tools"
  (was falsy, fell through to default "all tools")
- run_agent.py: persist_session=False gates _persist_session()
- cli.py: _handle_btw_command (background thread, Rich panel output)
- gateway/run.py: _handle_btw_command + _run_btw_task (async task)
- hermes_cli/commands.py: CommandDef for "btw"

Inspired by PR #3504 by areu01or00, reimplemented cleanly on current
main with the enabled_toolsets=[] fix and without the __btw_no_tools__
hack.
											
										
										
											2026-03-30 21:10:05 -07:00
+								            if not runtime_kwargs.get("api_key"):
 								                await adapter.send(
 								                    source.chat_id,
 								                    "❌ /btw failed: no provider credentials configured.",
 								                    metadata=_thread_meta,
 								                )
 								                return
 								            platform_key = _platform_config_key(source.platform)
 								            reasoning_config = self._load_reasoning_config()
-												feat(gateway): add fast mode support to gateway chats

											
										
										
											2026-04-10 08:32:56 +01:00
+								            self._service_tier = self._load_service_tier()
-												feat: add /btw command for ephemeral side questions (#4161)

Adds /btw <question> — ask a quick follow-up using the current
session context without interrupting the main conversation.

- Snapshots conversation history, answers with a no-tools agent
- Response is not persisted to session history or DB
- Runs in a background thread (CLI) / async task (gateway)
- Per-session guard prevents concurrent /btw in gateway

Implementation:
- model_tools.py: enabled_toolsets=[] now correctly means "no tools"
  (was falsy, fell through to default "all tools")
- run_agent.py: persist_session=False gates _persist_session()
- cli.py: _handle_btw_command (background thread, Rich panel output)
- gateway/run.py: _handle_btw_command + _run_btw_task (async task)
- hermes_cli/commands.py: CommandDef for "btw"

Inspired by PR #3504 by areu01or00, reimplemented cleanly on current
main with the enabled_toolsets=[] fix and without the __btw_no_tools__
hack.
											
										
										
											2026-03-30 21:10:05 -07:00
+								            turn_route = self._resolve_turn_agent_config(question, model, runtime_kwargs)
 								            pr = self._provider_routing
 								            # Snapshot history from running agent or stored transcript
 								            running_agent = self._running_agents.get(session_key)
 								            if running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
 								                history_snapshot = list(getattr(running_agent, "_session_messages", []) or [])
 								            else:
 								                session_entry = self.session_store.get_or_create_session(source)
 								                history_snapshot = self.session_store.load_transcript(session_entry.session_id)
 								            btw_prompt = (
 								                "[Ephemeral /btw side question. Answer using the conversation "
 								                "context. No tools available. Be direct and concise.]\n\n"
 								                + question
 								            )
 								            def run_sync():
 								                agent = AIAgent(
 								                    model=turn_route["model"],
 								                    **turn_route["runtime"],
 								                    max_iterations=8,
 								                    quiet_mode=True,
 								                    verbose_logging=False,
 								                    enabled_toolsets=[],
 								                    reasoning_config=reasoning_config,
-												feat(gateway): add fast mode support to gateway chats

											
										
										
											2026-04-10 08:32:56 +01:00
+								                    service_tier=self._service_tier,
 								                    request_overrides=turn_route.get("request_overrides"),
-												feat: add /btw command for ephemeral side questions (#4161)

Adds /btw <question> — ask a quick follow-up using the current
session context without interrupting the main conversation.

- Snapshots conversation history, answers with a no-tools agent
- Response is not persisted to session history or DB
- Runs in a background thread (CLI) / async task (gateway)
- Per-session guard prevents concurrent /btw in gateway

Implementation:
- model_tools.py: enabled_toolsets=[] now correctly means "no tools"
  (was falsy, fell through to default "all tools")
- run_agent.py: persist_session=False gates _persist_session()
- cli.py: _handle_btw_command (background thread, Rich panel output)
- gateway/run.py: _handle_btw_command + _run_btw_task (async task)
- hermes_cli/commands.py: CommandDef for "btw"

Inspired by PR #3504 by areu01or00, reimplemented cleanly on current
main with the enabled_toolsets=[] fix and without the __btw_no_tools__
hack.
											
										
										
											2026-03-30 21:10:05 -07:00
+								                    providers_allowed=pr.get("only"),
 								                    providers_ignored=pr.get("ignore"),
 								                    providers_order=pr.get("order"),
 								                    provider_sort=pr.get("sort"),
 								                    provider_require_parameters=pr.get("require_parameters", False),
 								                    provider_data_collection=pr.get("data_collection"),
 								                    session_id=task_id,
 								                    platform=platform_key,
 								                    session_db=None,
 								                    fallback_model=self._fallback_model,
 								                    skip_memory=True,
 								                    skip_context_files=True,
 								                    persist_session=False,
 								                )
-												fix(gateway): close temporary agents after one-off tasks

Add shared _cleanup_agent_resources() for temporary gateway AIAgent
instances. Apply cleanup to memory flush, background tasks, /btw,
manual /compress, and session-hygiene auto-compression. Prevents
unclosed aiohttp client session leaks.

Cherry-picked from #10899 by @LeonSGP43. Consolidates #10945 by @Lubrsy706.
Fixes #10865.

Co-authored-by: Lubrsy706 <Lubrsy706@users.noreply.github.com>

											
										
										
											2026-04-16 18:41:31 +05:30
+								                try:
 								                    return agent.run_conversation(
 								                        user_message=btw_prompt,
 								                        conversation_history=history_snapshot,
 								                        task_id=task_id,
 								                    )
 								                finally:
 								                    self._cleanup_agent_resources(agent)
-												feat: add /btw command for ephemeral side questions (#4161)

Adds /btw <question> — ask a quick follow-up using the current
session context without interrupting the main conversation.

- Snapshots conversation history, answers with a no-tools agent
- Response is not persisted to session history or DB
- Runs in a background thread (CLI) / async task (gateway)
- Per-session guard prevents concurrent /btw in gateway

Implementation:
- model_tools.py: enabled_toolsets=[] now correctly means "no tools"
  (was falsy, fell through to default "all tools")
- run_agent.py: persist_session=False gates _persist_session()
- cli.py: _handle_btw_command (background thread, Rich panel output)
- gateway/run.py: _handle_btw_command + _run_btw_task (async task)
- hermes_cli/commands.py: CommandDef for "btw"

Inspired by PR #3504 by areu01or00, reimplemented cleanly on current
main with the enabled_toolsets=[] fix and without the __btw_no_tools__
hack.
											
										
										
											2026-03-30 21:10:05 -07:00
-												fix(gateway): preserve notify context in executor threads

Gateway executor work now inherits the active session contextvars via
copy_context() so background process watchers retain the correct
platform/chat/user/session metadata for routing completion events back
to the originating chat.

Cherry-picked from #10647 by @helix4u with:
- Use asyncio.get_running_loop() instead of deprecated get_event_loop()
- Strip trailing whitespace
- Add *args forwarding test
- Add exception propagation test

											
										
										
											2026-04-16 14:27:54 +05:30
+								            result = await self._run_in_executor_with_context(run_sync)
-												feat: add /btw command for ephemeral side questions (#4161)

Adds /btw <question> — ask a quick follow-up using the current
session context without interrupting the main conversation.

- Snapshots conversation history, answers with a no-tools agent
- Response is not persisted to session history or DB
- Runs in a background thread (CLI) / async task (gateway)
- Per-session guard prevents concurrent /btw in gateway

Implementation:
- model_tools.py: enabled_toolsets=[] now correctly means "no tools"
  (was falsy, fell through to default "all tools")
- run_agent.py: persist_session=False gates _persist_session()
- cli.py: _handle_btw_command (background thread, Rich panel output)
- gateway/run.py: _handle_btw_command + _run_btw_task (async task)
- hermes_cli/commands.py: CommandDef for "btw"

Inspired by PR #3504 by areu01or00, reimplemented cleanly on current
main with the enabled_toolsets=[] fix and without the __btw_no_tools__
hack.
											
										
										
											2026-03-30 21:10:05 -07:00
 								            response = (result.get("final_response") or "") if result else ""
 								            if not response and result and result.get("error"):
 								                response = f"Error: {result['error']}"
 								            if not response:
 								                response = "(No response generated)"
 								            media_files, response = adapter.extract_media(response)
 								            images, text_content = adapter.extract_images(response)
 								            preview = question[:60] + ("..." if len(question) > 60 else "")
 								            header = f'💬 /btw: "{preview}"\n\n'
 								            if text_content:
 								                await adapter.send(
 								                    chat_id=source.chat_id,
 								                    content=header + text_content,
 								                    metadata=_thread_meta,
 								                )
 								            elif not images and not media_files:
 								                await adapter.send(
 								                    chat_id=source.chat_id,
 								                    content=header + "(No response generated)",
 								                    metadata=_thread_meta,
 								                )
 								            for image_url, alt_text in (images or []):
 								                try:
 								                    await adapter.send_image(chat_id=source.chat_id, image_url=image_url, caption=alt_text)
 								                except Exception:
 								                    pass
 								            for media_path in (media_files or []):
 								                try:
 								                    await adapter.send_file(chat_id=source.chat_id, file_path=media_path)
 								                except Exception:
 								                    pass
 								        except Exception as e:
 								            logger.exception("/btw task %s failed", task_id)
 								            try:
 								                await adapter.send(
 								                    chat_id=source.chat_id,
 								                    content=f"❌ /btw failed: {e}",
 								                    metadata=_thread_meta,
 								                )
 								            except Exception:
 								                pass
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								    async def _handle_reasoning_command(self, event: MessageEvent) -> str:
 								        """Handle /reasoning command — manage reasoning effort and display toggle.
 								        Usage:
 								            /reasoning              Show current effort level and display state
-												fix: normalize reasoning effort ordering in UI

											
										
										
											2026-04-09 11:06:39 -05:00
+								            /reasoning <level>      Set reasoning effort (none, minimal, low, medium, high, xhigh)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            /reasoning show|on      Show model reasoning in responses
 								            /reasoning hide|off     Hide model reasoning from responses
 								        """
 								        import yaml
 								        args = event.get_command_args().strip().lower()
 								        config_path = _hermes_home / "config.yaml"
-												feat(gateway): add reasoning hot reload

Add a /reasoning command across gateway adapters so users can
inspect or change reasoning effort without editing config by hand.

Reload reasoning settings from config.yaml before each agent run,
including background tasks, so the next message picks up the new
value consistently.

											
										
										
											2026-03-11 22:12:11 +08:00
+								        self._reasoning_config = self._load_reasoning_config()
 								        self._show_reasoning = self._load_show_reasoning()
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
 								        def _save_config_key(key_path: str, value):
 								            """Save a dot-separated key to config.yaml."""
 								            try:
 								                user_config = {}
 								                if config_path.exists():
 								                    with open(config_path, encoding="utf-8") as f:
 								                        user_config = yaml.safe_load(f) or {}
 								                keys = key_path.split(".")
 								                current = user_config
 								                for k in keys[:-1]:
 								                    if k not in current or not isinstance(current[k], dict):
 								                        current[k] = {}
 								                    current = current[k]
 								                current[keys[-1]] = value
-												fix(gateway): use atomic writes for config.yaml to prevent data loss (#3800)

Replace all 5 plain open(config_path, 'w') calls in gateway command
handlers with atomic_yaml_write() from utils.py. This uses the
established tempfile + fsync + os.replace pattern to ensure config.yaml
is never left half-written if the process is killed mid-write.

Affected handlers: /personality (clear + set), /sethome, /reasoning
(_save_config_key helper), /verbose (tool_progress cycling).

Also fixes missing encoding='utf-8' on the /personality clear write.

Salvaged from PR #1211 by albatrosjj.
											
										
										
											2026-03-29 15:31:21 -07:00
+								                atomic_yaml_write(config_path, user_config)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                return True
 								            except Exception as e:
 								                logger.error("Failed to save config key %s: %s", key_path, e)
 								                return False
 								        if not args:
 								            # Show current state
 								            rc = self._reasoning_config
 								            if rc is None:
 								                level = "medium (default)"
 								            elif rc.get("enabled") is False:
 								                level = "none (disabled)"
 								            else:
 								                level = rc.get("effort", "medium")
 								            display_state = "on ✓" if self._show_reasoning else "off"
 								            return (
 								                "🧠 **Reasoning Settings**\n\n"
 								                f"**Effort:** `{level}`\n"
 								                f"**Display:** {display_state}\n\n"
-												fix: normalize reasoning effort ordering in UI

											
										
										
											2026-04-09 11:06:39 -05:00
+								                "_Usage:_ `/reasoning <none|minimal|low|medium|high|xhigh|show|hide>`"
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            )
-												feat: per-platform display verbosity configuration (#8006)

Add display.platforms section to config.yaml for per-platform overrides of
display settings (tool_progress, show_reasoning, streaming, tool_preview_length).

Each platform gets sensible built-in defaults based on capability tier:
- High (telegram, discord): tool_progress=all, streaming follows global
- Medium (slack, mattermost, matrix, feishu): tool_progress=new
- Low (signal, whatsapp, bluebubbles, wecom, etc.): tool_progress=off, streaming=false
- Minimal (email, sms, webhook, homeassistant): tool_progress=off, streaming=false

Example config:
  display:
    platforms:
      telegram:
        tool_progress: all
        show_reasoning: true
      slack:
        tool_progress: off

Resolution order: platform override > global setting > built-in platform default.

Changes:
- New gateway/display_config.py: resolver module with tier-based platform defaults
- gateway/run.py: tool_progress, tool_preview_length, streaming, show_reasoning
  all resolve per-platform via the new resolver
- /verbose command: now cycles tool_progress per-platform (saves to
  display.platforms.<platform>.tool_progress instead of global)
- /reasoning show|hide: now saves show_reasoning per-platform
- Config version 15 -> 16: migrates tool_progress_overrides into display.platforms
- Backward compat: legacy tool_progress_overrides still read as fallback
- 27 new tests for resolver, normalization, migration, backward compat
- Updated verbose command tests for per-platform behavior

Addresses community request for per-channel verbosity control (Guillaume Meyer,
Nathan Danielsen) — high verbosity on backchannel Telegram, low on customer-facing
Slack, none on email.
											
										
										
											2026-04-11 17:20:34 -07:00
+								        # Display toggle (per-platform)
 								        platform_key = _platform_config_key(event.source.platform)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        if args in ("show", "on"):
 								            self._show_reasoning = True
-												feat: per-platform display verbosity configuration (#8006)

Add display.platforms section to config.yaml for per-platform overrides of
display settings (tool_progress, show_reasoning, streaming, tool_preview_length).

Each platform gets sensible built-in defaults based on capability tier:
- High (telegram, discord): tool_progress=all, streaming follows global
- Medium (slack, mattermost, matrix, feishu): tool_progress=new
- Low (signal, whatsapp, bluebubbles, wecom, etc.): tool_progress=off, streaming=false
- Minimal (email, sms, webhook, homeassistant): tool_progress=off, streaming=false

Example config:
  display:
    platforms:
      telegram:
        tool_progress: all
        show_reasoning: true
      slack:
        tool_progress: off

Resolution order: platform override > global setting > built-in platform default.

Changes:
- New gateway/display_config.py: resolver module with tier-based platform defaults
- gateway/run.py: tool_progress, tool_preview_length, streaming, show_reasoning
  all resolve per-platform via the new resolver
- /verbose command: now cycles tool_progress per-platform (saves to
  display.platforms.<platform>.tool_progress instead of global)
- /reasoning show|hide: now saves show_reasoning per-platform
- Config version 15 -> 16: migrates tool_progress_overrides into display.platforms
- Backward compat: legacy tool_progress_overrides still read as fallback
- 27 new tests for resolver, normalization, migration, backward compat
- Updated verbose command tests for per-platform behavior

Addresses community request for per-channel verbosity control (Guillaume Meyer,
Nathan Danielsen) — high verbosity on backchannel Telegram, low on customer-facing
Slack, none on email.
											
										
										
											2026-04-11 17:20:34 -07:00
+								            _save_config_key(f"display.platforms.{platform_key}.show_reasoning", True)
 								            return (
 								                "🧠 ✓ Reasoning display: **ON**\n"
 								                f"Model thinking will be shown before each response on **{platform_key}**."
 								            )
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
 								        if args in ("hide", "off"):
 								            self._show_reasoning = False
-												feat: per-platform display verbosity configuration (#8006)

Add display.platforms section to config.yaml for per-platform overrides of
display settings (tool_progress, show_reasoning, streaming, tool_preview_length).

Each platform gets sensible built-in defaults based on capability tier:
- High (telegram, discord): tool_progress=all, streaming follows global
- Medium (slack, mattermost, matrix, feishu): tool_progress=new
- Low (signal, whatsapp, bluebubbles, wecom, etc.): tool_progress=off, streaming=false
- Minimal (email, sms, webhook, homeassistant): tool_progress=off, streaming=false

Example config:
  display:
    platforms:
      telegram:
        tool_progress: all
        show_reasoning: true
      slack:
        tool_progress: off

Resolution order: platform override > global setting > built-in platform default.

Changes:
- New gateway/display_config.py: resolver module with tier-based platform defaults
- gateway/run.py: tool_progress, tool_preview_length, streaming, show_reasoning
  all resolve per-platform via the new resolver
- /verbose command: now cycles tool_progress per-platform (saves to
  display.platforms.<platform>.tool_progress instead of global)
- /reasoning show|hide: now saves show_reasoning per-platform
- Config version 15 -> 16: migrates tool_progress_overrides into display.platforms
- Backward compat: legacy tool_progress_overrides still read as fallback
- 27 new tests for resolver, normalization, migration, backward compat
- Updated verbose command tests for per-platform behavior

Addresses community request for per-channel verbosity control (Guillaume Meyer,
Nathan Danielsen) — high verbosity on backchannel Telegram, low on customer-facing
Slack, none on email.
											
										
										
											2026-04-11 17:20:34 -07:00
+								            _save_config_key(f"display.platforms.{platform_key}.show_reasoning", False)
 								            return f"🧠 ✓ Reasoning display: **OFF** for **{platform_key}**"
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
 								        # Effort level change
 								        effort = args.strip()
 								        if effort == "none":
 								            parsed = {"enabled": False}
-												fix: normalize remaining reasoning effort orderings and add missing 'minimal'

Follow-up to cherry-picked PR #6698. Fixes spots the original PR missed:
- hermes_constants.py: VALID_REASONING_EFFORTS tuple ordering
- gateway/run.py: _load_reasoning_config docstring + validation tuple
- configuration.md and batch-processing.md: docs ordering
- hermes-agent skill: /reasoning usage hint was missing 'minimal'

											
										
										
											2026-04-09 13:27:02 -07:00
+								        elif effort in ("minimal", "low", "medium", "high", "xhigh"):
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            parsed = {"enabled": True, "effort": effort}
 								        else:
 								            return (
 								                f"⚠️ Unknown argument: `{effort}`\n\n"
-												fix: normalize reasoning effort ordering in UI

											
										
										
											2026-04-09 11:06:39 -05:00
+								                "**Valid levels:** none, minimal, low, medium, high, xhigh\n"
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                "**Display:** show, hide"
 								            )
 								        self._reasoning_config = parsed
 								        if _save_config_key("agent.reasoning_effort", effort):
 								            return f"🧠 ✓ Reasoning effort set to `{effort}` (saved to config)\n_(takes effect on next message)_"
 								        else:
 								            return f"🧠 ✓ Reasoning effort set to `{effort}` (this session only)"
-												feat(gateway): add fast mode support to gateway chats

											
										
										
											2026-04-10 08:32:56 +01:00
+								    async def _handle_fast_command(self, event: MessageEvent) -> str:
 								        """Handle /fast — mirror the CLI Priority Processing toggle in gateway chats."""
 								        import yaml
 								        from hermes_cli.models import model_supports_fast_mode
 								        args = event.get_command_args().strip().lower()
 								        config_path = _hermes_home / "config.yaml"
 								        self._service_tier = self._load_service_tier()
 								        user_config = _load_gateway_config()
 								        model = _resolve_gateway_model(user_config)
 								        if not model_supports_fast_mode(model):
 								            return "⚡ /fast is only available for OpenAI models that support Priority Processing."
 								        def _save_config_key(key_path: str, value):
 								            """Save a dot-separated key to config.yaml."""
 								            try:
 								                user_config = {}
 								                if config_path.exists():
 								                    with open(config_path, encoding="utf-8") as f:
 								                        user_config = yaml.safe_load(f) or {}
 								                keys = key_path.split(".")
 								                current = user_config
 								                for k in keys[:-1]:
 								                    if k not in current or not isinstance(current[k], dict):
 								                        current[k] = {}
 								                    current = current[k]
 								                current[keys[-1]] = value
 								                atomic_yaml_write(config_path, user_config)
 								                return True
 								            except Exception as e:
 								                logger.error("Failed to save config key %s: %s", key_path, e)
 								                return False
 								        if not args or args == "status":
 								            status = "fast" if self._service_tier == "priority" else "normal"
 								            return (
 								                "⚡ Priority Processing\n\n"
 								                f"Current mode: `{status}`\n\n"
 								                "_Usage:_ `/fast <normal|fast|status>`"
 								            )
 								        if args in {"fast", "on"}:
 								            self._service_tier = "priority"
 								            saved_value = "fast"
 								            label = "FAST"
 								        elif args in {"normal", "off"}:
 								            self._service_tier = None
 								            saved_value = "normal"
 								            label = "NORMAL"
 								        else:
 								            return (
 								                f"⚠️ Unknown argument: `{args}`\n\n"
 								                "**Valid options:** normal, fast, status"
 								            )
 								        if _save_config_key("agent.service_tier", saved_value):
 								            return f"⚡ ✓ Priority Processing: **{label}** (saved to config)\n_(takes effect on next message)_"
 								        return f"⚡ ✓ Priority Processing: **{label}** (this session only)"
-												feat: add /yolo slash command to toggle dangerous command approvals (#3990)

Adds a /yolo command that toggles HERMES_YOLO_MODE at runtime, skipping
all dangerous command approval prompts for the current session. Works in
both CLI and gateway (Telegram, Discord, etc.).

- /yolo -> ON: all commands auto-approved, no confirmation prompts
- /yolo -> OFF: normal approval flow restored

The --yolo CLI flag already existed for launch-time opt-in. This adds
the ability to toggle mid-session without restarting.

Session-scoped — resets when the process ends. Uses the existing
HERMES_YOLO_MODE env var that check_all_command_guards() already
respects.
											
										
										
											2026-03-30 11:17:09 -07:00
+								    async def _handle_yolo_command(self, event: MessageEvent) -> str:
-												fix(gateway): scope /yolo to the active session

											
										
										
											2026-04-10 16:55:51 +09:00
+								        """Handle /yolo — toggle dangerous command approval bypass for this session only."""
 								        from tools.approval import (
 								            disable_session_yolo,
 								            enable_session_yolo,
 								            is_session_yolo_enabled,
 								        )
 								        session_key = self._session_key_for_source(event.source)
 								        current = is_session_yolo_enabled(session_key)
-												feat: add /yolo slash command to toggle dangerous command approvals (#3990)

Adds a /yolo command that toggles HERMES_YOLO_MODE at runtime, skipping
all dangerous command approval prompts for the current session. Works in
both CLI and gateway (Telegram, Discord, etc.).

- /yolo -> ON: all commands auto-approved, no confirmation prompts
- /yolo -> OFF: normal approval flow restored

The --yolo CLI flag already existed for launch-time opt-in. This adds
the ability to toggle mid-session without restarting.

Session-scoped — resets when the process ends. Uses the existing
HERMES_YOLO_MODE env var that check_all_command_guards() already
respects.
											
										
										
											2026-03-30 11:17:09 -07:00
+								        if current:
-												fix(gateway): scope /yolo to the active session

											
										
										
											2026-04-10 16:55:51 +09:00
+								            disable_session_yolo(session_key)
 								            return "⚠️ YOLO mode **OFF** for this session — dangerous commands will require approval."
-												feat: add /yolo slash command to toggle dangerous command approvals (#3990)

Adds a /yolo command that toggles HERMES_YOLO_MODE at runtime, skipping
all dangerous command approval prompts for the current session. Works in
both CLI and gateway (Telegram, Discord, etc.).

- /yolo -> ON: all commands auto-approved, no confirmation prompts
- /yolo -> OFF: normal approval flow restored

The --yolo CLI flag already existed for launch-time opt-in. This adds
the ability to toggle mid-session without restarting.

Session-scoped — resets when the process ends. Uses the existing
HERMES_YOLO_MODE env var that check_all_command_guards() already
respects.
											
										
										
											2026-03-30 11:17:09 -07:00
+								        else:
-												fix(gateway): scope /yolo to the active session

											
										
										
											2026-04-10 16:55:51 +09:00
+								            enable_session_yolo(session_key)
 								            return "⚡ YOLO mode **ON** for this session — all commands auto-approved. Use with caution."
-												feat: add /yolo slash command to toggle dangerous command approvals (#3990)

Adds a /yolo command that toggles HERMES_YOLO_MODE at runtime, skipping
all dangerous command approval prompts for the current session. Works in
both CLI and gateway (Telegram, Discord, etc.).

- /yolo -> ON: all commands auto-approved, no confirmation prompts
- /yolo -> OFF: normal approval flow restored

The --yolo CLI flag already existed for launch-time opt-in. This adds
the ability to toggle mid-session without restarting.

Session-scoped — resets when the process ends. Uses the existing
HERMES_YOLO_MODE env var that check_all_command_guards() already
respects.
											
										
										
											2026-03-30 11:17:09 -07:00
-												feat: config-gated /verbose command for messaging gateway (#3262)

* feat: config-gated /verbose command for messaging gateway

Add gateway_config_gate field to CommandDef, allowing cli_only commands
to be conditionally available in the gateway based on a config value.

- CommandDef gains gateway_config_gate: str | None — a config dotpath
  that, when truthy, overrides cli_only for gateway surfaces
- /verbose uses gateway_config_gate='display.tool_progress_command'
- Default is off (cli_only behavior preserved)
- When enabled, /verbose cycles tool_progress mode (off/new/all/verbose)
  in the gateway, saving to config.yaml — same cycle as the CLI
- Gateway helpers (help, telegram menus, slack mapping) dynamically
  check config to include/exclude config-gated commands
- GATEWAY_KNOWN_COMMANDS always includes config-gated commands so
  the gateway recognizes them and can respond appropriately
- Handles YAML 1.1 bool coercion (bare 'off' parses as False)
- 8 new tests for the config gate mechanism + gateway handler

* docs: document gateway_config_gate and /verbose messaging support

- AGENTS.md: add gateway_config_gate to CommandDef fields
- slash-commands.md: note /verbose can be enabled for messaging, update Notes
- configuration.md: add tool_progress_command to display section + usage note
- cli.md: cross-link to config docs for messaging enablement
- messaging/index.md: show tool_progress_command in config snippet
- plugins.md: add gateway_config_gate to register_command parameter table
											
										
										
											2026-03-26 14:41:04 -07:00
+								    async def _handle_verbose_command(self, event: MessageEvent) -> str:
 								        """Handle /verbose command — cycle tool progress display mode.
 								        Gated by ``display.tool_progress_command`` in config.yaml (default off).
 								        When enabled, cycles the tool progress mode through off → new → all →
-												feat: per-platform display verbosity configuration (#8006)

Add display.platforms section to config.yaml for per-platform overrides of
display settings (tool_progress, show_reasoning, streaming, tool_preview_length).

Each platform gets sensible built-in defaults based on capability tier:
- High (telegram, discord): tool_progress=all, streaming follows global
- Medium (slack, mattermost, matrix, feishu): tool_progress=new
- Low (signal, whatsapp, bluebubbles, wecom, etc.): tool_progress=off, streaming=false
- Minimal (email, sms, webhook, homeassistant): tool_progress=off, streaming=false

Example config:
  display:
    platforms:
      telegram:
        tool_progress: all
        show_reasoning: true
      slack:
        tool_progress: off

Resolution order: platform override > global setting > built-in platform default.

Changes:
- New gateway/display_config.py: resolver module with tier-based platform defaults
- gateway/run.py: tool_progress, tool_preview_length, streaming, show_reasoning
  all resolve per-platform via the new resolver
- /verbose command: now cycles tool_progress per-platform (saves to
  display.platforms.<platform>.tool_progress instead of global)
- /reasoning show|hide: now saves show_reasoning per-platform
- Config version 15 -> 16: migrates tool_progress_overrides into display.platforms
- Backward compat: legacy tool_progress_overrides still read as fallback
- 27 new tests for resolver, normalization, migration, backward compat
- Updated verbose command tests for per-platform behavior

Addresses community request for per-channel verbosity control (Guillaume Meyer,
Nathan Danielsen) — high verbosity on backchannel Telegram, low on customer-facing
Slack, none on email.
											
										
										
											2026-04-11 17:20:34 -07:00
+								        verbose → off for the *current platform*.  The setting is saved to
 								        ``display.platforms.<platform>.tool_progress`` so each channel can
 								        have its own verbosity level independently.
-												feat: config-gated /verbose command for messaging gateway (#3262)

* feat: config-gated /verbose command for messaging gateway

Add gateway_config_gate field to CommandDef, allowing cli_only commands
to be conditionally available in the gateway based on a config value.

- CommandDef gains gateway_config_gate: str | None — a config dotpath
  that, when truthy, overrides cli_only for gateway surfaces
- /verbose uses gateway_config_gate='display.tool_progress_command'
- Default is off (cli_only behavior preserved)
- When enabled, /verbose cycles tool_progress mode (off/new/all/verbose)
  in the gateway, saving to config.yaml — same cycle as the CLI
- Gateway helpers (help, telegram menus, slack mapping) dynamically
  check config to include/exclude config-gated commands
- GATEWAY_KNOWN_COMMANDS always includes config-gated commands so
  the gateway recognizes them and can respond appropriately
- Handles YAML 1.1 bool coercion (bare 'off' parses as False)
- 8 new tests for the config gate mechanism + gateway handler

* docs: document gateway_config_gate and /verbose messaging support

- AGENTS.md: add gateway_config_gate to CommandDef fields
- slash-commands.md: note /verbose can be enabled for messaging, update Notes
- configuration.md: add tool_progress_command to display section + usage note
- cli.md: cross-link to config docs for messaging enablement
- messaging/index.md: show tool_progress_command in config snippet
- plugins.md: add gateway_config_gate to register_command parameter table
											
										
										
											2026-03-26 14:41:04 -07:00
+								        """
 								        import yaml
 								        config_path = _hermes_home / "config.yaml"
-												feat: per-platform display verbosity configuration (#8006)

Add display.platforms section to config.yaml for per-platform overrides of
display settings (tool_progress, show_reasoning, streaming, tool_preview_length).

Each platform gets sensible built-in defaults based on capability tier:
- High (telegram, discord): tool_progress=all, streaming follows global
- Medium (slack, mattermost, matrix, feishu): tool_progress=new
- Low (signal, whatsapp, bluebubbles, wecom, etc.): tool_progress=off, streaming=false
- Minimal (email, sms, webhook, homeassistant): tool_progress=off, streaming=false

Example config:
  display:
    platforms:
      telegram:
        tool_progress: all
        show_reasoning: true
      slack:
        tool_progress: off

Resolution order: platform override > global setting > built-in platform default.

Changes:
- New gateway/display_config.py: resolver module with tier-based platform defaults
- gateway/run.py: tool_progress, tool_preview_length, streaming, show_reasoning
  all resolve per-platform via the new resolver
- /verbose command: now cycles tool_progress per-platform (saves to
  display.platforms.<platform>.tool_progress instead of global)
- /reasoning show|hide: now saves show_reasoning per-platform
- Config version 15 -> 16: migrates tool_progress_overrides into display.platforms
- Backward compat: legacy tool_progress_overrides still read as fallback
- 27 new tests for resolver, normalization, migration, backward compat
- Updated verbose command tests for per-platform behavior

Addresses community request for per-channel verbosity control (Guillaume Meyer,
Nathan Danielsen) — high verbosity on backchannel Telegram, low on customer-facing
Slack, none on email.
											
										
										
											2026-04-11 17:20:34 -07:00
+								        platform_key = _platform_config_key(event.source.platform)
-												feat: config-gated /verbose command for messaging gateway (#3262)

* feat: config-gated /verbose command for messaging gateway

Add gateway_config_gate field to CommandDef, allowing cli_only commands
to be conditionally available in the gateway based on a config value.

- CommandDef gains gateway_config_gate: str | None — a config dotpath
  that, when truthy, overrides cli_only for gateway surfaces
- /verbose uses gateway_config_gate='display.tool_progress_command'
- Default is off (cli_only behavior preserved)
- When enabled, /verbose cycles tool_progress mode (off/new/all/verbose)
  in the gateway, saving to config.yaml — same cycle as the CLI
- Gateway helpers (help, telegram menus, slack mapping) dynamically
  check config to include/exclude config-gated commands
- GATEWAY_KNOWN_COMMANDS always includes config-gated commands so
  the gateway recognizes them and can respond appropriately
- Handles YAML 1.1 bool coercion (bare 'off' parses as False)
- 8 new tests for the config gate mechanism + gateway handler

* docs: document gateway_config_gate and /verbose messaging support

- AGENTS.md: add gateway_config_gate to CommandDef fields
- slash-commands.md: note /verbose can be enabled for messaging, update Notes
- configuration.md: add tool_progress_command to display section + usage note
- cli.md: cross-link to config docs for messaging enablement
- messaging/index.md: show tool_progress_command in config snippet
- plugins.md: add gateway_config_gate to register_command parameter table
											
										
										
											2026-03-26 14:41:04 -07:00
 								        # --- check config gate ------------------------------------------------
 								        try:
 								            user_config = {}
 								            if config_path.exists():
 								                with open(config_path, encoding="utf-8") as f:
 								                    user_config = yaml.safe_load(f) or {}
 								            gate_enabled = user_config.get("display", {}).get("tool_progress_command", False)
 								        except Exception:
 								            gate_enabled = False
 								        if not gate_enabled:
 								            return (
 								                "The `/verbose` command is not enabled for messaging platforms.\n\n"
 								                "Enable it in `config.yaml`:\n```yaml\n"
 								                "display:\n  tool_progress_command: true\n```"
 								            )
-												feat: per-platform display verbosity configuration (#8006)

Add display.platforms section to config.yaml for per-platform overrides of
display settings (tool_progress, show_reasoning, streaming, tool_preview_length).

Each platform gets sensible built-in defaults based on capability tier:
- High (telegram, discord): tool_progress=all, streaming follows global
- Medium (slack, mattermost, matrix, feishu): tool_progress=new
- Low (signal, whatsapp, bluebubbles, wecom, etc.): tool_progress=off, streaming=false
- Minimal (email, sms, webhook, homeassistant): tool_progress=off, streaming=false

Example config:
  display:
    platforms:
      telegram:
        tool_progress: all
        show_reasoning: true
      slack:
        tool_progress: off

Resolution order: platform override > global setting > built-in platform default.

Changes:
- New gateway/display_config.py: resolver module with tier-based platform defaults
- gateway/run.py: tool_progress, tool_preview_length, streaming, show_reasoning
  all resolve per-platform via the new resolver
- /verbose command: now cycles tool_progress per-platform (saves to
  display.platforms.<platform>.tool_progress instead of global)
- /reasoning show|hide: now saves show_reasoning per-platform
- Config version 15 -> 16: migrates tool_progress_overrides into display.platforms
- Backward compat: legacy tool_progress_overrides still read as fallback
- 27 new tests for resolver, normalization, migration, backward compat
- Updated verbose command tests for per-platform behavior

Addresses community request for per-channel verbosity control (Guillaume Meyer,
Nathan Danielsen) — high verbosity on backchannel Telegram, low on customer-facing
Slack, none on email.
											
										
										
											2026-04-11 17:20:34 -07:00
+								        # --- cycle mode (per-platform) ----------------------------------------
-												feat: config-gated /verbose command for messaging gateway (#3262)

* feat: config-gated /verbose command for messaging gateway

Add gateway_config_gate field to CommandDef, allowing cli_only commands
to be conditionally available in the gateway based on a config value.

- CommandDef gains gateway_config_gate: str | None — a config dotpath
  that, when truthy, overrides cli_only for gateway surfaces
- /verbose uses gateway_config_gate='display.tool_progress_command'
- Default is off (cli_only behavior preserved)
- When enabled, /verbose cycles tool_progress mode (off/new/all/verbose)
  in the gateway, saving to config.yaml — same cycle as the CLI
- Gateway helpers (help, telegram menus, slack mapping) dynamically
  check config to include/exclude config-gated commands
- GATEWAY_KNOWN_COMMANDS always includes config-gated commands so
  the gateway recognizes them and can respond appropriately
- Handles YAML 1.1 bool coercion (bare 'off' parses as False)
- 8 new tests for the config gate mechanism + gateway handler

* docs: document gateway_config_gate and /verbose messaging support

- AGENTS.md: add gateway_config_gate to CommandDef fields
- slash-commands.md: note /verbose can be enabled for messaging, update Notes
- configuration.md: add tool_progress_command to display section + usage note
- cli.md: cross-link to config docs for messaging enablement
- messaging/index.md: show tool_progress_command in config snippet
- plugins.md: add gateway_config_gate to register_command parameter table
											
										
										
											2026-03-26 14:41:04 -07:00
+								        cycle = ["off", "new", "all", "verbose"]
 								        descriptions = {
 								            "off": "⚙️ Tool progress: **OFF** — no tool activity shown.",
-												fix(gateway): respect tool_preview_length in all/new progress modes (#5937)

Previously, all/new tool progress modes always hard-truncated previews
to 40 chars, ignoring the display.tool_preview_length config. This made
it impossible for gateway users to see meaningful command/path info
without switching to verbose mode (which shows too much detail).

Now all/new modes read tool_preview_length from config:
- tool_preview_length: 0 (default/unset) → 40 chars (no regression)
- tool_preview_length: 120 → 120-char previews in all/new mode
- verbose mode: unchanged (already respected the config)

Users who want longer previews can set:
  display:
    tool_preview_length: 120

Reported by demontut_ on Discord.
											
										
										
											2026-04-07 14:10:56 -07:00
+								            "new": "⚙️ Tool progress: **NEW** — shown when tool changes (preview length: `display.tool_preview_length`, default 40).",
 								            "all": "⚙️ Tool progress: **ALL** — every tool call shown (preview length: `display.tool_preview_length`, default 40).",
-												fix(gateway): restore short preview truncation for all/new tool progress modes (#4935)

The tool_preview_length: 0 (unlimited) config change from e314833c
removed truncation from gateway progress messages in all/new modes.
This caused full terminal commands, code blocks, and file paths to
appear as permanent messages in Telegram -- the old 40-char truncation
was the correct behavior for messaging platforms.

Now:
- all/new modes: always truncate previews to 40 chars (old behavior)
- verbose mode: respects tool_preview_length config for JSON args cap

Reported by Paulclgro and socialsurfer on Discord.
											
										
										
											2026-04-03 20:32:01 -07:00
+								            "verbose": "⚙️ Tool progress: **VERBOSE** — every tool call with full arguments.",
-												feat: config-gated /verbose command for messaging gateway (#3262)

* feat: config-gated /verbose command for messaging gateway

Add gateway_config_gate field to CommandDef, allowing cli_only commands
to be conditionally available in the gateway based on a config value.

- CommandDef gains gateway_config_gate: str | None — a config dotpath
  that, when truthy, overrides cli_only for gateway surfaces
- /verbose uses gateway_config_gate='display.tool_progress_command'
- Default is off (cli_only behavior preserved)
- When enabled, /verbose cycles tool_progress mode (off/new/all/verbose)
  in the gateway, saving to config.yaml — same cycle as the CLI
- Gateway helpers (help, telegram menus, slack mapping) dynamically
  check config to include/exclude config-gated commands
- GATEWAY_KNOWN_COMMANDS always includes config-gated commands so
  the gateway recognizes them and can respond appropriately
- Handles YAML 1.1 bool coercion (bare 'off' parses as False)
- 8 new tests for the config gate mechanism + gateway handler

* docs: document gateway_config_gate and /verbose messaging support

- AGENTS.md: add gateway_config_gate to CommandDef fields
- slash-commands.md: note /verbose can be enabled for messaging, update Notes
- configuration.md: add tool_progress_command to display section + usage note
- cli.md: cross-link to config docs for messaging enablement
- messaging/index.md: show tool_progress_command in config snippet
- plugins.md: add gateway_config_gate to register_command parameter table
											
										
										
											2026-03-26 14:41:04 -07:00
+								        }
-												feat: per-platform display verbosity configuration (#8006)

Add display.platforms section to config.yaml for per-platform overrides of
display settings (tool_progress, show_reasoning, streaming, tool_preview_length).

Each platform gets sensible built-in defaults based on capability tier:
- High (telegram, discord): tool_progress=all, streaming follows global
- Medium (slack, mattermost, matrix, feishu): tool_progress=new
- Low (signal, whatsapp, bluebubbles, wecom, etc.): tool_progress=off, streaming=false
- Minimal (email, sms, webhook, homeassistant): tool_progress=off, streaming=false

Example config:
  display:
    platforms:
      telegram:
        tool_progress: all
        show_reasoning: true
      slack:
        tool_progress: off

Resolution order: platform override > global setting > built-in platform default.

Changes:
- New gateway/display_config.py: resolver module with tier-based platform defaults
- gateway/run.py: tool_progress, tool_preview_length, streaming, show_reasoning
  all resolve per-platform via the new resolver
- /verbose command: now cycles tool_progress per-platform (saves to
  display.platforms.<platform>.tool_progress instead of global)
- /reasoning show|hide: now saves show_reasoning per-platform
- Config version 15 -> 16: migrates tool_progress_overrides into display.platforms
- Backward compat: legacy tool_progress_overrides still read as fallback
- 27 new tests for resolver, normalization, migration, backward compat
- Updated verbose command tests for per-platform behavior

Addresses community request for per-channel verbosity control (Guillaume Meyer,
Nathan Danielsen) — high verbosity on backchannel Telegram, low on customer-facing
Slack, none on email.
											
										
										
											2026-04-11 17:20:34 -07:00
+								        # Read current effective mode for this platform via the resolver
 								        from gateway.display_config import resolve_display_setting
 								        current = resolve_display_setting(user_config, platform_key, "tool_progress", "all")
-												feat: config-gated /verbose command for messaging gateway (#3262)

* feat: config-gated /verbose command for messaging gateway

Add gateway_config_gate field to CommandDef, allowing cli_only commands
to be conditionally available in the gateway based on a config value.

- CommandDef gains gateway_config_gate: str | None — a config dotpath
  that, when truthy, overrides cli_only for gateway surfaces
- /verbose uses gateway_config_gate='display.tool_progress_command'
- Default is off (cli_only behavior preserved)
- When enabled, /verbose cycles tool_progress mode (off/new/all/verbose)
  in the gateway, saving to config.yaml — same cycle as the CLI
- Gateway helpers (help, telegram menus, slack mapping) dynamically
  check config to include/exclude config-gated commands
- GATEWAY_KNOWN_COMMANDS always includes config-gated commands so
  the gateway recognizes them and can respond appropriately
- Handles YAML 1.1 bool coercion (bare 'off' parses as False)
- 8 new tests for the config gate mechanism + gateway handler

* docs: document gateway_config_gate and /verbose messaging support

- AGENTS.md: add gateway_config_gate to CommandDef fields
- slash-commands.md: note /verbose can be enabled for messaging, update Notes
- configuration.md: add tool_progress_command to display section + usage note
- cli.md: cross-link to config docs for messaging enablement
- messaging/index.md: show tool_progress_command in config snippet
- plugins.md: add gateway_config_gate to register_command parameter table
											
										
										
											2026-03-26 14:41:04 -07:00
+								        if current not in cycle:
 								            current = "all"
 								        idx = (cycle.index(current) + 1) % len(cycle)
 								        new_mode = cycle[idx]
-												feat: per-platform display verbosity configuration (#8006)

Add display.platforms section to config.yaml for per-platform overrides of
display settings (tool_progress, show_reasoning, streaming, tool_preview_length).

Each platform gets sensible built-in defaults based on capability tier:
- High (telegram, discord): tool_progress=all, streaming follows global
- Medium (slack, mattermost, matrix, feishu): tool_progress=new
- Low (signal, whatsapp, bluebubbles, wecom, etc.): tool_progress=off, streaming=false
- Minimal (email, sms, webhook, homeassistant): tool_progress=off, streaming=false

Example config:
  display:
    platforms:
      telegram:
        tool_progress: all
        show_reasoning: true
      slack:
        tool_progress: off

Resolution order: platform override > global setting > built-in platform default.

Changes:
- New gateway/display_config.py: resolver module with tier-based platform defaults
- gateway/run.py: tool_progress, tool_preview_length, streaming, show_reasoning
  all resolve per-platform via the new resolver
- /verbose command: now cycles tool_progress per-platform (saves to
  display.platforms.<platform>.tool_progress instead of global)
- /reasoning show|hide: now saves show_reasoning per-platform
- Config version 15 -> 16: migrates tool_progress_overrides into display.platforms
- Backward compat: legacy tool_progress_overrides still read as fallback
- 27 new tests for resolver, normalization, migration, backward compat
- Updated verbose command tests for per-platform behavior

Addresses community request for per-channel verbosity control (Guillaume Meyer,
Nathan Danielsen) — high verbosity on backchannel Telegram, low on customer-facing
Slack, none on email.
											
										
										
											2026-04-11 17:20:34 -07:00
+								        # Save to display.platforms.<platform>.tool_progress
-												feat: config-gated /verbose command for messaging gateway (#3262)

* feat: config-gated /verbose command for messaging gateway

Add gateway_config_gate field to CommandDef, allowing cli_only commands
to be conditionally available in the gateway based on a config value.

- CommandDef gains gateway_config_gate: str | None — a config dotpath
  that, when truthy, overrides cli_only for gateway surfaces
- /verbose uses gateway_config_gate='display.tool_progress_command'
- Default is off (cli_only behavior preserved)
- When enabled, /verbose cycles tool_progress mode (off/new/all/verbose)
  in the gateway, saving to config.yaml — same cycle as the CLI
- Gateway helpers (help, telegram menus, slack mapping) dynamically
  check config to include/exclude config-gated commands
- GATEWAY_KNOWN_COMMANDS always includes config-gated commands so
  the gateway recognizes them and can respond appropriately
- Handles YAML 1.1 bool coercion (bare 'off' parses as False)
- 8 new tests for the config gate mechanism + gateway handler

* docs: document gateway_config_gate and /verbose messaging support

- AGENTS.md: add gateway_config_gate to CommandDef fields
- slash-commands.md: note /verbose can be enabled for messaging, update Notes
- configuration.md: add tool_progress_command to display section + usage note
- cli.md: cross-link to config docs for messaging enablement
- messaging/index.md: show tool_progress_command in config snippet
- plugins.md: add gateway_config_gate to register_command parameter table
											
										
										
											2026-03-26 14:41:04 -07:00
+								        try:
 								            if "display" not in user_config or not isinstance(user_config.get("display"), dict):
 								                user_config["display"] = {}
-												feat: per-platform display verbosity configuration (#8006)

Add display.platforms section to config.yaml for per-platform overrides of
display settings (tool_progress, show_reasoning, streaming, tool_preview_length).

Each platform gets sensible built-in defaults based on capability tier:
- High (telegram, discord): tool_progress=all, streaming follows global
- Medium (slack, mattermost, matrix, feishu): tool_progress=new
- Low (signal, whatsapp, bluebubbles, wecom, etc.): tool_progress=off, streaming=false
- Minimal (email, sms, webhook, homeassistant): tool_progress=off, streaming=false

Example config:
  display:
    platforms:
      telegram:
        tool_progress: all
        show_reasoning: true
      slack:
        tool_progress: off

Resolution order: platform override > global setting > built-in platform default.

Changes:
- New gateway/display_config.py: resolver module with tier-based platform defaults
- gateway/run.py: tool_progress, tool_preview_length, streaming, show_reasoning
  all resolve per-platform via the new resolver
- /verbose command: now cycles tool_progress per-platform (saves to
  display.platforms.<platform>.tool_progress instead of global)
- /reasoning show|hide: now saves show_reasoning per-platform
- Config version 15 -> 16: migrates tool_progress_overrides into display.platforms
- Backward compat: legacy tool_progress_overrides still read as fallback
- 27 new tests for resolver, normalization, migration, backward compat
- Updated verbose command tests for per-platform behavior

Addresses community request for per-channel verbosity control (Guillaume Meyer,
Nathan Danielsen) — high verbosity on backchannel Telegram, low on customer-facing
Slack, none on email.
											
										
										
											2026-04-11 17:20:34 -07:00
+								            display = user_config["display"]
 								            if "platforms" not in display or not isinstance(display.get("platforms"), dict):
 								                display["platforms"] = {}
 								            if platform_key not in display["platforms"] or not isinstance(display["platforms"].get(platform_key), dict):
 								                display["platforms"][platform_key] = {}
 								            display["platforms"][platform_key]["tool_progress"] = new_mode
-												fix(gateway): use atomic writes for config.yaml to prevent data loss (#3800)

Replace all 5 plain open(config_path, 'w') calls in gateway command
handlers with atomic_yaml_write() from utils.py. This uses the
established tempfile + fsync + os.replace pattern to ensure config.yaml
is never left half-written if the process is killed mid-write.

Affected handlers: /personality (clear + set), /sethome, /reasoning
(_save_config_key helper), /verbose (tool_progress cycling).

Also fixes missing encoding='utf-8' on the /personality clear write.

Salvaged from PR #1211 by albatrosjj.
											
										
										
											2026-03-29 15:31:21 -07:00
+								            atomic_yaml_write(config_path, user_config)
-												feat: per-platform display verbosity configuration (#8006)

Add display.platforms section to config.yaml for per-platform overrides of
display settings (tool_progress, show_reasoning, streaming, tool_preview_length).

Each platform gets sensible built-in defaults based on capability tier:
- High (telegram, discord): tool_progress=all, streaming follows global
- Medium (slack, mattermost, matrix, feishu): tool_progress=new
- Low (signal, whatsapp, bluebubbles, wecom, etc.): tool_progress=off, streaming=false
- Minimal (email, sms, webhook, homeassistant): tool_progress=off, streaming=false

Example config:
  display:
    platforms:
      telegram:
        tool_progress: all
        show_reasoning: true
      slack:
        tool_progress: off

Resolution order: platform override > global setting > built-in platform default.

Changes:
- New gateway/display_config.py: resolver module with tier-based platform defaults
- gateway/run.py: tool_progress, tool_preview_length, streaming, show_reasoning
  all resolve per-platform via the new resolver
- /verbose command: now cycles tool_progress per-platform (saves to
  display.platforms.<platform>.tool_progress instead of global)
- /reasoning show|hide: now saves show_reasoning per-platform
- Config version 15 -> 16: migrates tool_progress_overrides into display.platforms
- Backward compat: legacy tool_progress_overrides still read as fallback
- 27 new tests for resolver, normalization, migration, backward compat
- Updated verbose command tests for per-platform behavior

Addresses community request for per-channel verbosity control (Guillaume Meyer,
Nathan Danielsen) — high verbosity on backchannel Telegram, low on customer-facing
Slack, none on email.
											
										
										
											2026-04-11 17:20:34 -07:00
+								            return (
 								                f"{descriptions[new_mode]}\n"
 								                f"_(saved for **{platform_key}** — takes effect on next message)_"
 								            )
-												feat: config-gated /verbose command for messaging gateway (#3262)

* feat: config-gated /verbose command for messaging gateway

Add gateway_config_gate field to CommandDef, allowing cli_only commands
to be conditionally available in the gateway based on a config value.

- CommandDef gains gateway_config_gate: str | None — a config dotpath
  that, when truthy, overrides cli_only for gateway surfaces
- /verbose uses gateway_config_gate='display.tool_progress_command'
- Default is off (cli_only behavior preserved)
- When enabled, /verbose cycles tool_progress mode (off/new/all/verbose)
  in the gateway, saving to config.yaml — same cycle as the CLI
- Gateway helpers (help, telegram menus, slack mapping) dynamically
  check config to include/exclude config-gated commands
- GATEWAY_KNOWN_COMMANDS always includes config-gated commands so
  the gateway recognizes them and can respond appropriately
- Handles YAML 1.1 bool coercion (bare 'off' parses as False)
- 8 new tests for the config gate mechanism + gateway handler

* docs: document gateway_config_gate and /verbose messaging support

- AGENTS.md: add gateway_config_gate to CommandDef fields
- slash-commands.md: note /verbose can be enabled for messaging, update Notes
- configuration.md: add tool_progress_command to display section + usage note
- cli.md: cross-link to config docs for messaging enablement
- messaging/index.md: show tool_progress_command in config snippet
- plugins.md: add gateway_config_gate to register_command parameter table
											
										
										
											2026-03-26 14:41:04 -07:00
+								        except Exception as e:
 								            logger.warning("Failed to save tool_progress mode: %s", e)
 								            return f"{descriptions[new_mode]}\n_(could not save to config: {e})_"
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								    async def _handle_compress_command(self, event: MessageEvent) -> str:
-												feat: /compress <focus> — guided compression with focus topic (#8017)

Adds an optional focus topic to /compress: `/compress database schema`
guides the summariser to preserve information related to the focus topic
(60-70% of summary budget) while compressing everything else more aggressively.
Inspired by Claude Code's /compact <focus>.

Changes:
- context_compressor.py: focus_topic parameter on _generate_summary() and
  compress(); appends FOCUS TOPIC guidance block to the LLM prompt
- run_agent.py: focus_topic parameter on _compress_context(), passed through
  to the compressor
- cli.py: _manual_compress() extracts focus topic from command string,
  preserves existing manual_compression_feedback integration (no regression)
- gateway/run.py: _handle_compress_command() extracts focus from event args
  and passes through — full gateway parity
- commands.py: args_hint="[focus topic]" on /compress CommandDef

Salvaged from PR #7459 (CLI /compress focus only — /context command deferred).
15 new tests across CLI, compressor, and gateway.
											
										
										
											2026-04-11 19:23:29 -07:00
+								        """Handle /compress command -- manually compress conversation context.
 								        Accepts an optional focus topic: ``/compress <focus>`` guides the
 								        summariser to preserve information related to *focus* while being
 								        more aggressive about discarding everything else.
 								        """
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        source = event.source
 								        session_entry = self.session_store.get_or_create_session(source)
 								        history = self.session_store.load_transcript(session_entry.session_id)
 								        if not history or len(history) < 4:
 								            return "Not enough conversation to compress (need at least 4 messages)."
-												feat: /compress <focus> — guided compression with focus topic (#8017)

Adds an optional focus topic to /compress: `/compress database schema`
guides the summariser to preserve information related to the focus topic
(60-70% of summary budget) while compressing everything else more aggressively.
Inspired by Claude Code's /compact <focus>.

Changes:
- context_compressor.py: focus_topic parameter on _generate_summary() and
  compress(); appends FOCUS TOPIC guidance block to the LLM prompt
- run_agent.py: focus_topic parameter on _compress_context(), passed through
  to the compressor
- cli.py: _manual_compress() extracts focus topic from command string,
  preserves existing manual_compression_feedback integration (no regression)
- gateway/run.py: _handle_compress_command() extracts focus from event args
  and passes through — full gateway parity
- commands.py: args_hint="[focus topic]" on /compress CommandDef

Salvaged from PR #7459 (CLI /compress focus only — /context command deferred).
15 new tests across CLI, compressor, and gateway.
											
										
										
											2026-04-11 19:23:29 -07:00
+								        # Extract optional focus topic from command args
 								        focus_topic = (event.get_command_args() or "").strip() or None
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        try:
 								            from run_agent import AIAgent
-												fix(gateway): make manual compression feedback truthful

											
										
										
											2026-04-09 21:23:35 -07:00
+								            from agent.manual_compression_feedback import summarize_manual_compression
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            from agent.model_metadata import estimate_messages_tokens_rough
-												fix: honor session-scoped gateway model overrides

											
										
										
											2026-04-11 04:24:45 -05:00
+								            session_key = self._session_key_for_source(source)
 								            model, runtime_kwargs = self._resolve_session_agent_runtime(
 								                source=source,
 								                session_key=session_key,
 								            )
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            if not runtime_kwargs.get("api_key"):
 								                return "No provider configured -- cannot compress."
 								            msgs = [
 								                {"role": m.get("role"), "content": m.get("content")}
 								                for m in history
 								                if m.get("role") in ("user", "assistant") and m.get("content")
 								            ]
 								            original_count = len(msgs)
 								            approx_tokens = estimate_messages_tokens_rough(msgs)
 								            tmp_agent = AIAgent(
 								                **runtime_kwargs,
 								                model=model,
 								                max_iterations=4,
 								                quiet_mode=True,
-												feat(honcho): context injection overhaul, 5-tool surface, cost safety, session isolation (#10619)

Salvaged from PR #9884 by erosika. Cherry-picked plugin changes onto
current main with minimal core modifications.

Plugin changes (plugins/memory/honcho/):
- New honcho_reasoning tool (5th tool, splits LLM calls from honcho_context)
- Two-layer context injection: base context (summary + representation + card)
  on contextCadence, dialectic supplement on dialecticCadence
- Multi-pass dialectic depth (1-3 passes) with early bail-out on strong signal
- Cold/warm prompt selection based on session state
- dialecticCadence defaults to 3 (was 1) — ~66% fewer Honcho LLM calls
- Session summary injection for conversational continuity
- Bidirectional peer targeting on all 5 tools
- Correctness fixes: peer param fallback, None guard on set_peer_card,
  schema validation, signal_sufficient anchored regex, mid->medium level fix

Core changes (~20 lines across 3 files):
- agent/memory_manager.py: Enhanced sanitize_context() to strip full
  <memory-context> blocks and system notes (prevents leak from saveMessages)
- run_agent.py: gateway_session_key param for stable per-chat Honcho sessions,
  on_turn_start() call before prefetch_all() for cadence tracking,
  sanitize_context() on user messages to strip leaked memory blocks
- gateway/run.py: skip_memory=True on 2 temp agents (prevents orphan sessions),
  gateway_session_key threading to main agent

Tests: 509 passed (3 skipped — honcho SDK not installed locally)
Docs: Updated honcho.md, memory-providers.md, tools-reference.md, SKILL.md

Co-authored-by: erosika <erosika@users.noreply.github.com>
											
										
										
											2026-04-15 19:12:19 -07:00
+								                skip_memory=True,
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                enabled_toolsets=["memory"],
 								                session_id=session_entry.session_id,
 								            )
-												fix(gateway): close temporary agents after one-off tasks

Add shared _cleanup_agent_resources() for temporary gateway AIAgent
instances. Apply cleanup to memory flush, background tasks, /btw,
manual /compress, and session-hygiene auto-compression. Prevents
unclosed aiohttp client session leaks.

Cherry-picked from #10899 by @LeonSGP43. Consolidates #10945 by @Lubrsy706.
Fixes #10865.

Co-authored-by: Lubrsy706 <Lubrsy706@users.noreply.github.com>

											
										
										
											2026-04-16 18:41:31 +05:30
+								            try:
 								                tmp_agent._print_fn = lambda *a, **kw: None
 								                compressor = tmp_agent.context_compressor
 								                compress_start = compressor.protect_first_n
 								                compress_start = compressor._align_boundary_forward(msgs, compress_start)
 								                compress_end = compressor._find_tail_cut_by_tokens(msgs, compress_start)
 								                if compress_start >= compress_end:
 								                    return "Nothing to compress yet (the transcript is still all protected context)."
 								                loop = asyncio.get_running_loop()
 								                compressed, _ = await loop.run_in_executor(
 								                    None,
 								                    lambda: tmp_agent._compress_context(msgs, "", approx_tokens=approx_tokens, focus_topic=focus_topic)
 								                )
-												fix(gateway): preserve transcript on /compress and hygiene compression (salvage #3516) (#3556)

* fix(gateway): preserve full transcript on /compress instead of overwriting

The /compress command calls _compress_context() which correctly ends the
old session (preserving its full transcript in SQLite) and creates a new
session_id for the continuation. However, it then immediately called
rewrite_transcript() on the OLD session_id, overwriting the preserved
transcript with the compressed version — destroying searchable history.

Auto-compression (triggered by context pressure) does not have this bug
because the gateway already handles the session_id swap via the
agent.session_id != session_id check after _run_agent_sync.

Fix: after _compress_context creates the new session, write the compressed
messages into the NEW session_id and update the session store pointer.
The old session's full transcript stays intact and searchable via
session_search.

Before: /compress destroys original messages, session_search can't find
details from compressed portions.

After: /compress behaves like /new for history — full transcript preserved,
compressed context for the live session.

* fix(gateway): preserve transcript on /compress and hygiene compression

Apply session_id swap after _compress_context in both /compress handler
and hygiene pre-compression. _compress_context creates a new session
(ending the old one), but both paths were calling rewrite_transcript on
the OLD session_id — overwriting the preserved transcript and destroying
searchable history.

Now follows the same pattern as the auto-compression handler (lines
5415-5423): detect the new session_id, update the session store entry,
and write compressed messages to the new session.

Also fix FakeCompressAgent test mock to include session_id attribute
and simulate the session_id change that real _compress_context performs.

Co-authored-by: MacroAnarchy <MacroAnarchy@users.noreply.github.com>

---------

Co-authored-by: MacroAnarchy <MacroAnarchy@users.noreply.github.com>
											
										
										
											2026-03-28 12:23:43 -07:00
-												fix(gateway): close temporary agents after one-off tasks

Add shared _cleanup_agent_resources() for temporary gateway AIAgent
instances. Apply cleanup to memory flush, background tasks, /btw,
manual /compress, and session-hygiene auto-compression. Prevents
unclosed aiohttp client session leaks.

Cherry-picked from #10899 by @LeonSGP43. Consolidates #10945 by @Lubrsy706.
Fixes #10865.

Co-authored-by: Lubrsy706 <Lubrsy706@users.noreply.github.com>

											
										
										
											2026-04-16 18:41:31 +05:30
+								                # _compress_context already calls end_session() on the old session
 								                # (preserving its full transcript in SQLite) and creates a new
 								                # session_id for the continuation.  Write the compressed messages
 								                # into the NEW session so the original history stays searchable.
 								                new_session_id = tmp_agent.session_id
 								                if new_session_id != session_entry.session_id:
 								                    session_entry.session_id = new_session_id
 								                    self.session_store._save()
 								                self.session_store.rewrite_transcript(new_session_id, compressed)
 								                # Reset stored token count — transcript changed, old value is stale
 								                self.session_store.update_session(
 								                    session_entry.session_key, last_prompt_tokens=0
 								                )
 								                new_tokens = estimate_messages_tokens_rough(compressed)
 								                summary = summarize_manual_compression(
 								                    msgs,
 								                    compressed,
 								                    approx_tokens,
 								                    new_tokens,
 								                )
 								            finally:
 								                self._cleanup_agent_resources(tmp_agent)
-												feat: /compress <focus> — guided compression with focus topic (#8017)

Adds an optional focus topic to /compress: `/compress database schema`
guides the summariser to preserve information related to the focus topic
(60-70% of summary budget) while compressing everything else more aggressively.
Inspired by Claude Code's /compact <focus>.

Changes:
- context_compressor.py: focus_topic parameter on _generate_summary() and
  compress(); appends FOCUS TOPIC guidance block to the LLM prompt
- run_agent.py: focus_topic parameter on _compress_context(), passed through
  to the compressor
- cli.py: _manual_compress() extracts focus topic from command string,
  preserves existing manual_compression_feedback integration (no regression)
- gateway/run.py: _handle_compress_command() extracts focus from event args
  and passes through — full gateway parity
- commands.py: args_hint="[focus topic]" on /compress CommandDef

Salvaged from PR #7459 (CLI /compress focus only — /context command deferred).
15 new tests across CLI, compressor, and gateway.
											
										
										
											2026-04-11 19:23:29 -07:00
+								            lines = [f"🗜️ {summary['headline']}"]
 								            if focus_topic:
 								                lines.append(f"Focus: \"{focus_topic}\"")
 								            lines.append(summary["token_line"])
-												fix(gateway): make manual compression feedback truthful

											
										
										
											2026-04-09 21:23:35 -07:00
+								            if summary["note"]:
 								                lines.append(summary["note"])
 								            return "\n".join(lines)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        except Exception as e:
 								            logger.warning("Manual compress failed: %s", e)
 								            return f"Compression failed: {e}"
 								    async def _handle_title_command(self, event: MessageEvent) -> str:
 								        """Handle /title command — set or show the current session's title."""
 								        source = event.source
 								        session_entry = self.session_store.get_or_create_session(source)
 								        session_id = session_entry.session_id
 								        if not self._session_db:
 								            return "Session database not available."
-												fix(gateway): /title command fails when session doesn't exist in SQLite yet (#2379)

The /title command would fail with 'Session not found in database.' when
used as the first command in a new session. This happened because:

1. Gateway creates session in session_store (in-memory)
2. But SQLite _session_db only gets sessions when agent flushes messages
3. set_session_title() does UPDATE which fails if row doesn't exist

Now we check if session exists in SQLite and create it if needed before
attempting to set the title.

Fixes: Session not found in database. error on /title in new chats
											
										
										
											2026-03-21 19:04:53 -04:00
+								        # Ensure session exists in SQLite DB (it may only exist in session_store
 								        # if this is the first command in a new session)
 								        existing_title = self._session_db.get_session_title(session_id)
 								        if existing_title is None:
 								            # Session doesn't exist in DB yet — create it
 								            try:
 								                self._session_db.create_session(
 								                    session_id=session_id,
 								                    source=source.platform.value if source.platform else "unknown",
 								                    user_id=source.user_id,
 								                )
 								            except Exception:
 								                pass  # Session might already exist, ignore errors
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        title_arg = event.get_command_args().strip()
 								        if title_arg:
 								            # Sanitize the title before setting
 								            try:
 								                sanitized = self._session_db.sanitize_title(title_arg)
 								            except ValueError as e:
 								                return f"⚠️ {e}"
 								            if not sanitized:
 								                return "⚠️ Title is empty after cleanup. Please use printable characters."
 								            # Set the title
 								            try:
 								                if self._session_db.set_session_title(session_id, sanitized):
 								                    return f"✏️ Session title set: **{sanitized}**"
 								                else:
 								                    return "Session not found in database."
 								            except ValueError as e:
 								                return f"⚠️ {e}"
 								        else:
-												feat: auto-generate session titles after first exchange

After the first user→assistant exchange, Hermes now generates a short
descriptive session title via the auxiliary LLM (compression task config).
Title generation runs in a background thread so it never delays the
user-facing response.

Key behaviors:
- Fires only on the first 1-2 exchanges (checks user message count)
- Skips if a title already exists (user-set titles are never overwritten)
- Uses call_llm with compression task config (cheapest/fastest model)
- Truncates long messages to keep the title generation request small
- Cleans up LLM output: strips quotes, 'Title:' prefixes, enforces 80 char max
- Works in both CLI and gateway (Telegram/Discord/etc.)

Also updates /title (no args) to show the session ID alongside the title
in both CLI and gateway.

Implements #1426

											
										
										
											2026-03-17 04:14:40 -07:00
+								            # Show the current title and session ID
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            title = self._session_db.get_session_title(session_id)
 								            if title:
-												feat: auto-generate session titles after first exchange

After the first user→assistant exchange, Hermes now generates a short
descriptive session title via the auxiliary LLM (compression task config).
Title generation runs in a background thread so it never delays the
user-facing response.

Key behaviors:
- Fires only on the first 1-2 exchanges (checks user message count)
- Skips if a title already exists (user-set titles are never overwritten)
- Uses call_llm with compression task config (cheapest/fastest model)
- Truncates long messages to keep the title generation request small
- Cleans up LLM output: strips quotes, 'Title:' prefixes, enforces 80 char max
- Works in both CLI and gateway (Telegram/Discord/etc.)

Also updates /title (no args) to show the session ID alongside the title
in both CLI and gateway.

Implements #1426

											
										
										
											2026-03-17 04:14:40 -07:00
+								                return f"📌 Session: `{session_id}`\nTitle: **{title}**"
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            else:
-												feat: auto-generate session titles after first exchange

After the first user→assistant exchange, Hermes now generates a short
descriptive session title via the auxiliary LLM (compression task config).
Title generation runs in a background thread so it never delays the
user-facing response.

Key behaviors:
- Fires only on the first 1-2 exchanges (checks user message count)
- Skips if a title already exists (user-set titles are never overwritten)
- Uses call_llm with compression task config (cheapest/fastest model)
- Truncates long messages to keep the title generation request small
- Cleans up LLM output: strips quotes, 'Title:' prefixes, enforces 80 char max
- Works in both CLI and gateway (Telegram/Discord/etc.)

Also updates /title (no args) to show the session ID alongside the title
in both CLI and gateway.

Implements #1426

											
										
										
											2026-03-17 04:14:40 -07:00
+								                return f"📌 Session: `{session_id}`\nNo title set. Usage: `/title My Session Name`"
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
 								    async def _handle_resume_command(self, event: MessageEvent) -> str:
 								        """Handle /resume command — switch to a previously-named session."""
 								        if not self._session_db:
 								            return "Session database not available."
 								        source = event.source
-												fix(gateway): make group session isolation configurable

default group and channel sessions to per-user isolation, allow opting back into shared room sessions via config.yaml, and document Discord gateway routing and session behavior.

											
										
										
											2026-03-16 00:22:23 -07:00
+								        session_key = self._session_key_for_source(source)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        name = event.get_command_args().strip()
 								        if not name:
 								            # List recent titled sessions for this user/platform
 								            try:
 								                user_source = source.platform.value if source.platform else None
 								                sessions = self._session_db.list_sessions_rich(
 								                    source=user_source, limit=10
 								                )
 								                titled = [s for s in sessions if s.get("title")]
 								                if not titled:
 								                    return (
 								                        "No named sessions found.\n"
 								                        "Use `/title My Session` to name your current session, "
 								                        "then `/resume My Session` to return to it later."
 								                    )
 								                lines = ["📋 **Named Sessions**\n"]
 								                for s in titled[:10]:
 								                    title = s["title"]
 								                    preview = s.get("preview", "")[:40]
 								                    preview_part = f" — _{preview}_" if preview else ""
 								                    lines.append(f"• **{title}**{preview_part}")
 								                lines.append("\nUsage: `/resume <session name>`")
 								                return "\n".join(lines)
 								            except Exception as e:
 								                logger.debug("Failed to list titled sessions: %s", e)
 								                return f"Could not list sessions: {e}"
 								        # Resolve the name to a session ID
 								        target_id = self._session_db.resolve_session_by_title(name)
 								        if not target_id:
 								            return (
 								                f"No session found matching '**{name}**'.\n"
 								                "Use `/resume` with no arguments to see available sessions."
 								            )
 								        # Check if already on that session
 								        current_entry = self.session_store.get_or_create_session(source)
 								        if current_entry.session_id == target_id:
 								            return f"📌 Already on session **{name}**."
 								        # Flush memories for current session before switching
 								        try:
-												fix(gateway): track background task references in GatewayRunner (#3254)

Asyncio tasks created with create_task() but never stored can be
garbage collected mid-execution. Add self._background_tasks set to
hold references, with add_done_callback cleanup. Tracks:
- /background command task
- session-reset memory flush task
- session-resume memory flush task
Cancel all pending tasks in stop().

Update test fixtures that construct GatewayRunner via object.__new__()
to include the new _background_tasks attribute.

Cherry-picked from PR #3167 by memosr. The original PR also deleted
the DM topic auto-skill loading code — that deletion was excluded
from this salvage as it removes a shipped feature (#2598).

Co-authored-by: memosr.eth <96793918+memosr@users.noreply.github.com>
											
										
										
											2026-03-26 14:33:48 -07:00
+								            _flush_task = asyncio.create_task(
-												fix: honor session-scoped gateway model overrides

											
										
										
											2026-04-11 04:24:45 -05:00
+								                self._async_flush_memories(current_entry.session_id, session_key)
-												fix(honcho): isolate session routing for multi-user gateway (#1500)

Salvaged from PR #1470 by adavyas.

Core fix: Honcho tool calls in a multi-session gateway could route to
the wrong session because honcho_tools.py relied on process-global
state. Now threads session context through the call chain:
  AIAgent._invoke_tool() → handle_function_call() → registry.dispatch()
  → handler **kw → _resolve_session_context()

Changes:
- Add _resolve_session_context() to prefer per-call context over globals
- Plumb honcho_manager + honcho_session_key through handle_function_call
- Add sync_honcho=False to run_conversation() for synthetic flush turns
- Pass honcho_session_key through gateway memory flush lifecycle
- Harden gateway PID detection when /proc cmdline is unreadable
- Make interrupt test scripts import-safe for pytest-xdist
- Wrap BibTeX examples in Jekyll raw blocks for docs build
- Fix thread-order-dependent assertion in client lifecycle test
- Expand Honcho docs: session isolation, lifecycle, routing internals

Dropped from original PR:
- Indentation change in _create_request_openai_client that would move
  client creation inside the lock (causes unnecessary contention)

Co-authored-by: adavyas <adavyas@users.noreply.github.com>
											
										
										
											2026-03-16 00:23:47 -07:00
+								            )
-												fix(gateway): track background task references in GatewayRunner (#3254)

Asyncio tasks created with create_task() but never stored can be
garbage collected mid-execution. Add self._background_tasks set to
hold references, with add_done_callback cleanup. Tracks:
- /background command task
- session-reset memory flush task
- session-resume memory flush task
Cancel all pending tasks in stop().

Update test fixtures that construct GatewayRunner via object.__new__()
to include the new _background_tasks attribute.

Cherry-picked from PR #3167 by memosr. The original PR also deleted
the DM topic auto-skill loading code — that deletion was excluded
from this salvage as it removes a shipped feature (#2598).

Co-authored-by: memosr.eth <96793918+memosr@users.noreply.github.com>
											
										
										
											2026-03-26 14:33:48 -07:00
+								            self._background_tasks.add(_flush_task)
 								            _flush_task.add_done_callback(self._background_tasks.discard)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        except Exception as e:
 								            logger.debug("Memory flush on resume failed: %s", e)
 								        # Clear any running agent for this session key
 								        if session_key in self._running_agents:
 								            del self._running_agents[session_key]
 								        # Switch the session entry to point at the old session
 								        new_entry = self.session_store.switch_session(session_key, target_id)
 								        if not new_entry:
 								            return "Failed to switch session."
 								        # Get the title for confirmation
 								        title = self._session_db.get_session_title(target_id) or name
 								        # Count messages for context
 								        history = self.session_store.load_transcript(target_id)
 								        msg_count = len([m for m in history if m.get("role") == "user"]) if history else 0
 								        msg_part = f" ({msg_count} message{'s' if msg_count != 1 else ''})" if msg_count else ""
 								        return f"↻ Resumed session **{title}**{msg_part}. Conversation restored."
-												fix: clear ghost status-bar lines on terminal resize (#4960)

* feat: add /branch (/fork) command for session branching

Inspired by Claude Code's /branch command. Creates a copy of the current
session's conversation history in a new session, allowing the user to
explore a different approach without losing the original.

Works like 'git checkout -b' for conversations:
- /branch            — auto-generates a title from the parent session
- /branch my-idea    — uses a custom title
- /fork              — alias for /branch

Implementation:
- CLI: _handle_branch_command() in cli.py
- Gateway: _handle_branch_command() in gateway/run.py
- CommandDef with 'fork' alias in commands.py
- Uses existing parent_session_id field in session DB
- Uses get_next_title_in_lineage() for auto-numbered branches
- 14 tests covering session creation, history copy, parent links,
  title generation, edge cases, and agent sync

* fix: clear ghost status-bar lines on terminal resize

When the terminal shrinks (e.g. un-maximize), the emulator reflows
previously full-width rows (status bar, input rules) into multiple
narrower rows. prompt_toolkit's _on_resize only cursor_up()s by the
stored layout height, missing the extra rows from reflow — leaving
ghost duplicates of the status bar visible.

Fix: monkey-patch Application._on_resize to detect width shrinks,
calculate the extra rows created by reflow, and inflate the renderer's
cursor_pos.y so the erase moves up far enough to clear ghosts.
											
										
										
											2026-04-03 22:43:45 -07:00
+								    async def _handle_branch_command(self, event: MessageEvent) -> str:
 								        """Handle /branch [name] — fork the current session into a new independent copy.
 								        Copies conversation history to a new session so the user can explore
 								        a different approach without losing the original.
 								        Inspired by Claude Code's /branch command.
 								        """
 								        import uuid as _uuid
 								        if not self._session_db:
 								            return "Session database not available."
 								        source = event.source
 								        session_key = self._session_key_for_source(source)
 								        # Load the current session and its transcript
 								        current_entry = self.session_store.get_or_create_session(source)
 								        history = self.session_store.load_transcript(current_entry.session_id)
 								        if not history:
 								            return "No conversation to branch — send a message first."
 								        branch_name = event.get_command_args().strip()
 								        # Generate the new session ID
 								        from datetime import datetime as _dt
 								        now = _dt.now()
 								        timestamp_str = now.strftime("%Y%m%d_%H%M%S")
 								        short_uuid = _uuid.uuid4().hex[:6]
 								        new_session_id = f"{timestamp_str}_{short_uuid}"
 								        # Determine branch title
 								        if branch_name:
 								            branch_title = branch_name
 								        else:
 								            current_title = self._session_db.get_session_title(current_entry.session_id)
 								            base = current_title or "branch"
 								            branch_title = self._session_db.get_next_title_in_lineage(base)
 								        parent_session_id = current_entry.session_id
 								        # Create the new session with parent link
 								        try:
 								            self._session_db.create_session(
 								                session_id=new_session_id,
 								                source=source.platform.value if source.platform else "gateway",
 								                model=(self.config.get("model", {}) or {}).get("default") if isinstance(self.config, dict) else None,
 								                parent_session_id=parent_session_id,
 								            )
 								        except Exception as e:
 								            logger.error("Failed to create branch session: %s", e)
 								            return f"Failed to create branch: {e}"
 								        # Copy conversation history to the new session
 								        for msg in history:
 								            try:
 								                self._session_db.append_message(
 								                    session_id=new_session_id,
 								                    role=msg.get("role", "user"),
 								                    content=msg.get("content"),
 								                    tool_name=msg.get("tool_name") or msg.get("name"),
 								                    tool_calls=msg.get("tool_calls"),
 								                    tool_call_id=msg.get("tool_call_id"),
 								                    reasoning=msg.get("reasoning"),
 								                )
 								            except Exception:
 								                pass  # Best-effort copy
 								        # Set title
 								        try:
 								            self._session_db.set_session_title(new_session_id, branch_title)
 								        except Exception:
 								            pass
 								        # Switch the session store entry to the new session
 								        new_entry = self.session_store.switch_session(session_key, new_session_id)
 								        if not new_entry:
 								            return "Branch created but failed to switch to it."
 								        # Evict any cached agent for this session
 								        self._evict_cached_agent(session_key)
 								        msg_count = len([m for m in history if m.get("role") == "user"])
 								        return (
 								            f"⑂ Branched to **{branch_title}**"
 								            f" ({msg_count} message{'s' if msg_count != 1 else ''} copied)\n"
 								            f"Original: `{parent_session_id}`\n"
 								            f"Branch: `{new_session_id}`\n"
 								            f"Use `/resume` to switch back to the original."
 								        )
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								    async def _handle_usage_command(self, event: MessageEvent) -> str:
-												fix(gateway): /usage now shows rate limits, cost, and token details between turns (#7038)

The gateway /usage handler only looked in _running_agents for the agent
object, which is only populated while the agent is actively processing a
message. Between turns (when users actually type /usage), the dict is
empty and the handler fell through to a rough message-count estimate.

The agent object actually lives in _agent_cache between turns (kept for
prompt caching). This fix checks both dicts, with _running_agents taking
priority (mid-turn) and _agent_cache as the between-turns fallback.

Also brings the gateway output to parity with the CLI /usage:
- Model name
- Detailed token breakdown (input, output, cache read, cache write)
- Cost estimation (estimated amount or 'included' for subscriptions)
- Cache token lines hidden when zero (cleaner output)

This fixes Nous Portal rate limit headers not showing up for gateway
users — the data was being captured correctly but the handler could
never see it.
											
										
										
											2026-04-10 02:33:01 -07:00
+								        """Handle /usage command -- show token usage for the current session.
 								        Checks both _running_agents (mid-turn) and _agent_cache (between turns)
 								        so that rate limits, cost estimates, and detailed token breakdowns are
 								        available whenever the user asks, not only while the agent is running.
 								        """
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        source = event.source
-												fix(gateway): make group session isolation configurable

default group and channel sessions to per-user isolation, allow opting back into shared room sessions via config.yaml, and document Discord gateway routing and session behavior.

											
										
										
											2026-03-16 00:22:23 -07:00
+								        session_key = self._session_key_for_source(source)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
-												fix(gateway): /usage now shows rate limits, cost, and token details between turns (#7038)

The gateway /usage handler only looked in _running_agents for the agent
object, which is only populated while the agent is actively processing a
message. Between turns (when users actually type /usage), the dict is
empty and the handler fell through to a rough message-count estimate.

The agent object actually lives in _agent_cache between turns (kept for
prompt caching). This fix checks both dicts, with _running_agents taking
priority (mid-turn) and _agent_cache as the between-turns fallback.

Also brings the gateway output to parity with the CLI /usage:
- Model name
- Detailed token breakdown (input, output, cache read, cache write)
- Cost estimation (estimated amount or 'included' for subscriptions)
- Cache token lines hidden when zero (cleaner output)

This fixes Nous Portal rate limit headers not showing up for gateway
users — the data was being captured correctly but the handler could
never see it.
											
										
										
											2026-04-10 02:33:01 -07:00
+								        # Try running agent first (mid-turn), then cached agent (between turns)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        agent = self._running_agents.get(session_key)
-												fix(gateway): /usage now shows rate limits, cost, and token details between turns (#7038)

The gateway /usage handler only looked in _running_agents for the agent
object, which is only populated while the agent is actively processing a
message. Between turns (when users actually type /usage), the dict is
empty and the handler fell through to a rough message-count estimate.

The agent object actually lives in _agent_cache between turns (kept for
prompt caching). This fix checks both dicts, with _running_agents taking
priority (mid-turn) and _agent_cache as the between-turns fallback.

Also brings the gateway output to parity with the CLI /usage:
- Model name
- Detailed token breakdown (input, output, cache read, cache write)
- Cost estimation (estimated amount or 'included' for subscriptions)
- Cache token lines hidden when zero (cleaner output)

This fixes Nous Portal rate limit headers not showing up for gateway
users — the data was being captured correctly but the handler could
never see it.
											
										
										
											2026-04-10 02:33:01 -07:00
+								        if not agent or agent is _AGENT_PENDING_SENTINEL:
 								            _cache_lock = getattr(self, "_agent_cache_lock", None)
 								            _cache = getattr(self, "_agent_cache", None)
 								            if _cache_lock and _cache is not None:
 								                with _cache_lock:
 								                    cached = _cache.get(session_key)
 								                    if cached:
 								                        agent = cached[0]
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        if agent and hasattr(agent, "session_total_tokens") and agent.session_api_calls > 0:
-												feat: capture provider rate limit headers and show in /usage (#6541)

Parse x-ratelimit-* headers from inference API responses (Nous Portal,
OpenRouter, OpenAI-compatible) and display them in the /usage command.

- New agent/rate_limit_tracker.py: parse 12 rate limit headers (RPM/RPH/
  TPM/TPH limits, remaining, reset timers), format as progress bars (CLI)
  or compact one-liner (gateway)
- Hook into streaming path in run_agent.py: stream.response.headers is
  available on the OpenAI SDK Stream object before chunks are consumed
- CLI /usage: appends rate limit section with progress bars + warnings
  when any bucket exceeds 80%
- Gateway /usage: appends compact rate limit summary
- 24 unit tests covering parsing, formatting, edge cases

Headers captured per response:
  x-ratelimit-{limit,remaining,reset}-{requests,tokens}{,-1h}

Example CLI display:
  Nous Rate Limits (captured just now):
    Requests/min [░░░░░░░░░░░░░░░░░░░░]  0.1%  1/800 used  (799 left, resets in 59s)
    Tokens/hr    [░░░░░░░░░░░░░░░░░░░░]  0.0%  49/336.0M   (336.0M left, resets in 52m)
											
										
										
											2026-04-09 03:43:14 -07:00
+								            lines = []
-												fix(gateway): /usage now shows rate limits, cost, and token details between turns (#7038)

The gateway /usage handler only looked in _running_agents for the agent
object, which is only populated while the agent is actively processing a
message. Between turns (when users actually type /usage), the dict is
empty and the handler fell through to a rough message-count estimate.

The agent object actually lives in _agent_cache between turns (kept for
prompt caching). This fix checks both dicts, with _running_agents taking
priority (mid-turn) and _agent_cache as the between-turns fallback.

Also brings the gateway output to parity with the CLI /usage:
- Model name
- Detailed token breakdown (input, output, cache read, cache write)
- Cost estimation (estimated amount or 'included' for subscriptions)
- Cache token lines hidden when zero (cleaner output)

This fixes Nous Portal rate limit headers not showing up for gateway
users — the data was being captured correctly but the handler could
never see it.
											
										
										
											2026-04-10 02:33:01 -07:00
+								            # Rate limits (when available from provider headers)
-												feat: capture provider rate limit headers and show in /usage (#6541)

Parse x-ratelimit-* headers from inference API responses (Nous Portal,
OpenRouter, OpenAI-compatible) and display them in the /usage command.

- New agent/rate_limit_tracker.py: parse 12 rate limit headers (RPM/RPH/
  TPM/TPH limits, remaining, reset timers), format as progress bars (CLI)
  or compact one-liner (gateway)
- Hook into streaming path in run_agent.py: stream.response.headers is
  available on the OpenAI SDK Stream object before chunks are consumed
- CLI /usage: appends rate limit section with progress bars + warnings
  when any bucket exceeds 80%
- Gateway /usage: appends compact rate limit summary
- 24 unit tests covering parsing, formatting, edge cases

Headers captured per response:
  x-ratelimit-{limit,remaining,reset}-{requests,tokens}{,-1h}

Example CLI display:
  Nous Rate Limits (captured just now):
    Requests/min [░░░░░░░░░░░░░░░░░░░░]  0.1%  1/800 used  (799 left, resets in 59s)
    Tokens/hr    [░░░░░░░░░░░░░░░░░░░░]  0.0%  49/336.0M   (336.0M left, resets in 52m)
											
										
										
											2026-04-09 03:43:14 -07:00
+								            rl_state = agent.get_rate_limit_state()
 								            if rl_state and rl_state.has_data:
 								                from agent.rate_limit_tracker import format_rate_limit_compact
 								                lines.append(f"⏱️ **Rate Limits:** {format_rate_limit_compact(rl_state)}")
 								                lines.append("")
-												fix(gateway): /usage now shows rate limits, cost, and token details between turns (#7038)

The gateway /usage handler only looked in _running_agents for the agent
object, which is only populated while the agent is actively processing a
message. Between turns (when users actually type /usage), the dict is
empty and the handler fell through to a rough message-count estimate.

The agent object actually lives in _agent_cache between turns (kept for
prompt caching). This fix checks both dicts, with _running_agents taking
priority (mid-turn) and _agent_cache as the between-turns fallback.

Also brings the gateway output to parity with the CLI /usage:
- Model name
- Detailed token breakdown (input, output, cache read, cache write)
- Cost estimation (estimated amount or 'included' for subscriptions)
- Cache token lines hidden when zero (cleaner output)

This fixes Nous Portal rate limit headers not showing up for gateway
users — the data was being captured correctly but the handler could
never see it.
											
										
										
											2026-04-10 02:33:01 -07:00
+								            # Session token usage — detailed breakdown matching CLI
 								            input_tokens = getattr(agent, "session_input_tokens", 0) or 0
 								            output_tokens = getattr(agent, "session_output_tokens", 0) or 0
 								            cache_read = getattr(agent, "session_cache_read_tokens", 0) or 0
 								            cache_write = getattr(agent, "session_cache_write_tokens", 0) or 0
-												feat: capture provider rate limit headers and show in /usage (#6541)

Parse x-ratelimit-* headers from inference API responses (Nous Portal,
OpenRouter, OpenAI-compatible) and display them in the /usage command.

- New agent/rate_limit_tracker.py: parse 12 rate limit headers (RPM/RPH/
  TPM/TPH limits, remaining, reset timers), format as progress bars (CLI)
  or compact one-liner (gateway)
- Hook into streaming path in run_agent.py: stream.response.headers is
  available on the OpenAI SDK Stream object before chunks are consumed
- CLI /usage: appends rate limit section with progress bars + warnings
  when any bucket exceeds 80%
- Gateway /usage: appends compact rate limit summary
- 24 unit tests covering parsing, formatting, edge cases

Headers captured per response:
  x-ratelimit-{limit,remaining,reset}-{requests,tokens}{,-1h}

Example CLI display:
  Nous Rate Limits (captured just now):
    Requests/min [░░░░░░░░░░░░░░░░░░░░]  0.1%  1/800 used  (799 left, resets in 59s)
    Tokens/hr    [░░░░░░░░░░░░░░░░░░░░]  0.0%  49/336.0M   (336.0M left, resets in 52m)
											
										
										
											2026-04-09 03:43:14 -07:00
+								            lines.append("📊 **Session Token Usage**")
-												fix(gateway): /usage now shows rate limits, cost, and token details between turns (#7038)

The gateway /usage handler only looked in _running_agents for the agent
object, which is only populated while the agent is actively processing a
message. Between turns (when users actually type /usage), the dict is
empty and the handler fell through to a rough message-count estimate.

The agent object actually lives in _agent_cache between turns (kept for
prompt caching). This fix checks both dicts, with _running_agents taking
priority (mid-turn) and _agent_cache as the between-turns fallback.

Also brings the gateway output to parity with the CLI /usage:
- Model name
- Detailed token breakdown (input, output, cache read, cache write)
- Cost estimation (estimated amount or 'included' for subscriptions)
- Cache token lines hidden when zero (cleaner output)

This fixes Nous Portal rate limit headers not showing up for gateway
users — the data was being captured correctly but the handler could
never see it.
											
										
										
											2026-04-10 02:33:01 -07:00
+								            lines.append(f"Model: `{agent.model}`")
 								            lines.append(f"Input tokens: {input_tokens:,}")
 								            if cache_read:
 								                lines.append(f"Cache read tokens: {cache_read:,}")
 								            if cache_write:
 								                lines.append(f"Cache write tokens: {cache_write:,}")
 								            lines.append(f"Output tokens: {output_tokens:,}")
-												feat: capture provider rate limit headers and show in /usage (#6541)

Parse x-ratelimit-* headers from inference API responses (Nous Portal,
OpenRouter, OpenAI-compatible) and display them in the /usage command.

- New agent/rate_limit_tracker.py: parse 12 rate limit headers (RPM/RPH/
  TPM/TPH limits, remaining, reset timers), format as progress bars (CLI)
  or compact one-liner (gateway)
- Hook into streaming path in run_agent.py: stream.response.headers is
  available on the OpenAI SDK Stream object before chunks are consumed
- CLI /usage: appends rate limit section with progress bars + warnings
  when any bucket exceeds 80%
- Gateway /usage: appends compact rate limit summary
- 24 unit tests covering parsing, formatting, edge cases

Headers captured per response:
  x-ratelimit-{limit,remaining,reset}-{requests,tokens}{,-1h}

Example CLI display:
  Nous Rate Limits (captured just now):
    Requests/min [░░░░░░░░░░░░░░░░░░░░]  0.1%  1/800 used  (799 left, resets in 59s)
    Tokens/hr    [░░░░░░░░░░░░░░░░░░░░]  0.0%  49/336.0M   (336.0M left, resets in 52m)
											
										
										
											2026-04-09 03:43:14 -07:00
+								            lines.append(f"Total: {agent.session_total_tokens:,}")
 								            lines.append(f"API calls: {agent.session_api_calls}")
-												fix(gateway): /usage now shows rate limits, cost, and token details between turns (#7038)

The gateway /usage handler only looked in _running_agents for the agent
object, which is only populated while the agent is actively processing a
message. Between turns (when users actually type /usage), the dict is
empty and the handler fell through to a rough message-count estimate.

The agent object actually lives in _agent_cache between turns (kept for
prompt caching). This fix checks both dicts, with _running_agents taking
priority (mid-turn) and _agent_cache as the between-turns fallback.

Also brings the gateway output to parity with the CLI /usage:
- Model name
- Detailed token breakdown (input, output, cache read, cache write)
- Cost estimation (estimated amount or 'included' for subscriptions)
- Cache token lines hidden when zero (cleaner output)

This fixes Nous Portal rate limit headers not showing up for gateway
users — the data was being captured correctly but the handler could
never see it.
											
										
										
											2026-04-10 02:33:01 -07:00
 								            # Cost estimation
 								            try:
 								                from agent.usage_pricing import CanonicalUsage, estimate_usage_cost
 								                cost_result = estimate_usage_cost(
 								                    agent.model,
 								                    CanonicalUsage(
 								                        input_tokens=input_tokens,
 								                        output_tokens=output_tokens,
 								                        cache_read_tokens=cache_read,
 								                        cache_write_tokens=cache_write,
 								                    ),
 								                    provider=getattr(agent, "provider", None),
 								                    base_url=getattr(agent, "base_url", None),
 								                )
 								                if cost_result.amount_usd is not None:
 								                    prefix = "~" if cost_result.status == "estimated" else ""
 								                    lines.append(f"Cost: {prefix}${float(cost_result.amount_usd):.4f}")
 								                elif cost_result.status == "included":
 								                    lines.append("Cost: included")
 								            except Exception:
 								                pass
 								            # Context window and compressions
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            ctx = agent.context_compressor
 								            if ctx.last_prompt_tokens:
-												fix: cap percentage displays at 100% in stats, gateway, and memory tool (#3599)

Salvage of PR #3533 (binhnt92). Follow-up to #3480 — applies min(100, ...) to 5 remaining unclamped percentage display sites in context_compressor, cli /stats, gateway /stats, and memory tool. Defensive clamps now that the root cause (estimation heuristic) was already removed in #3480.

Co-Authored-By: binhnt92 <binhnt92@users.noreply.github.com>
											
										
										
											2026-03-28 14:55:18 -07:00
+								                pct = min(100, ctx.last_prompt_tokens / ctx.context_length * 100) if ctx.context_length else 0
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                lines.append(f"Context: {ctx.last_prompt_tokens:,} / {ctx.context_length:,} ({pct:.0f}%)")
 								            if ctx.compression_count:
 								                lines.append(f"Compressions: {ctx.compression_count}")
-												feat: capture provider rate limit headers and show in /usage (#6541)

Parse x-ratelimit-* headers from inference API responses (Nous Portal,
OpenRouter, OpenAI-compatible) and display them in the /usage command.

- New agent/rate_limit_tracker.py: parse 12 rate limit headers (RPM/RPH/
  TPM/TPH limits, remaining, reset timers), format as progress bars (CLI)
  or compact one-liner (gateway)
- Hook into streaming path in run_agent.py: stream.response.headers is
  available on the OpenAI SDK Stream object before chunks are consumed
- CLI /usage: appends rate limit section with progress bars + warnings
  when any bucket exceeds 80%
- Gateway /usage: appends compact rate limit summary
- 24 unit tests covering parsing, formatting, edge cases

Headers captured per response:
  x-ratelimit-{limit,remaining,reset}-{requests,tokens}{,-1h}

Example CLI display:
  Nous Rate Limits (captured just now):
    Requests/min [░░░░░░░░░░░░░░░░░░░░]  0.1%  1/800 used  (799 left, resets in 59s)
    Tokens/hr    [░░░░░░░░░░░░░░░░░░░░]  0.0%  49/336.0M   (336.0M left, resets in 52m)
											
										
										
											2026-04-09 03:43:14 -07:00
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            return "\n".join(lines)
-												fix(gateway): /usage now shows rate limits, cost, and token details between turns (#7038)

The gateway /usage handler only looked in _running_agents for the agent
object, which is only populated while the agent is actively processing a
message. Between turns (when users actually type /usage), the dict is
empty and the handler fell through to a rough message-count estimate.

The agent object actually lives in _agent_cache between turns (kept for
prompt caching). This fix checks both dicts, with _running_agents taking
priority (mid-turn) and _agent_cache as the between-turns fallback.

Also brings the gateway output to parity with the CLI /usage:
- Model name
- Detailed token breakdown (input, output, cache read, cache write)
- Cost estimation (estimated amount or 'included' for subscriptions)
- Cache token lines hidden when zero (cleaner output)

This fixes Nous Portal rate limit headers not showing up for gateway
users — the data was being captured correctly but the handler could
never see it.
											
										
										
											2026-04-10 02:33:01 -07:00
+								        # No agent at all -- check session history for a rough count
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        session_entry = self.session_store.get_or_create_session(source)
 								        history = self.session_store.load_transcript(session_entry.session_id)
 								        if history:
 								            from agent.model_metadata import estimate_messages_tokens_rough
 								            msgs = [m for m in history if m.get("role") in ("user", "assistant") and m.get("content")]
 								            approx = estimate_messages_tokens_rough(msgs)
 								            return (
 								                f"📊 **Session Info**\n"
 								                f"Messages: {len(msgs)}\n"
 								                f"Estimated context: ~{approx:,} tokens\n"
-												fix(gateway): /usage now shows rate limits, cost, and token details between turns (#7038)

The gateway /usage handler only looked in _running_agents for the agent
object, which is only populated while the agent is actively processing a
message. Between turns (when users actually type /usage), the dict is
empty and the handler fell through to a rough message-count estimate.

The agent object actually lives in _agent_cache between turns (kept for
prompt caching). This fix checks both dicts, with _running_agents taking
priority (mid-turn) and _agent_cache as the between-turns fallback.

Also brings the gateway output to parity with the CLI /usage:
- Model name
- Detailed token breakdown (input, output, cache read, cache write)
- Cost estimation (estimated amount or 'included' for subscriptions)
- Cache token lines hidden when zero (cleaner output)

This fixes Nous Portal rate limit headers not showing up for gateway
users — the data was being captured correctly but the handler could
never see it.
											
										
										
											2026-04-10 02:33:01 -07:00
+								                f"_(Detailed usage available after the first agent response)_"
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            )
 								        return "No usage data available for this session."
 								    async def _handle_insights_command(self, event: MessageEvent) -> str:
 								        """Handle /insights command -- show usage insights and analytics."""
 								        import asyncio as _asyncio
 								        args = event.get_command_args().strip()
-												fix(model-switch): normalize Unicode dashes from Telegram/iOS input

Telegram on iOS auto-converts double hyphens (--) to em dashes (—)
or en dashes (–) via autocorrect. This breaks /model flag parsing
since parse_model_flags() only recognizes literal '--provider' and
'--global'.

When the flag isn't parsed, the entire string (e.g. 'glm-5.1 —provider zai')
gets treated as the model name and fails with 'Model names cannot
contain spaces.'

Fix: normalize Unicode dashes (U+2012-U+2015) to '--' when they
appear before flag keywords (provider, global), before flag extraction.

The existing test suite in test_model_switch_provider_routing.py
already covers all four dash variants — this commit adds the code
that makes them pass.

											
										
										
											2026-04-10 06:38:27 -06:00
 								        # Normalize Unicode dashes (Telegram/iOS auto-converts -- to em/en dash)
 								        import re as _re
 								        args = _re.sub(r'[\u2012\u2013\u2014\u2015](days|source)', r'--\1', args)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        days = 30
 								        source = None
 								        # Parse simple args: /insights 7  or  /insights --days 7
 								        if args:
 								            parts = args.split()
 								            i = 0
 								            while i < len(parts):
 								                if parts[i] == "--days" and i + 1 < len(parts):
 								                    try:
 								                        days = int(parts[i + 1])
 								                    except ValueError:
 								                        return f"Invalid --days value: {parts[i + 1]}"
 								                    i += 2
 								                elif parts[i] == "--source" and i + 1 < len(parts):
 								                    source = parts[i + 1]
 								                    i += 2
 								                elif parts[i].isdigit():
 								                    days = int(parts[i])
 								                    i += 1
 								                else:
 								                    i += 1
 								        try:
 								            from hermes_state import SessionDB
 								            from agent.insights import InsightsEngine
-												chore(gateway): replace deprecated asyncio.get_event_loop() with get_running_loop() (#11005)

All 10 call sites in gateway/run.py and gateway/platforms/api_server.py
are inside async functions where a loop is guaranteed to be running.

get_event_loop() is deprecated since Python 3.10 — it can silently
create a new loop when none is running, masking bugs.
get_running_loop() raises RuntimeError instead, which is safer.

Surfaced during review of PRs #10533 and #10647.

Co-authored-by: kshitijk4poor <kshitijk4poor@users.noreply.github.com>
											
										
										
											2026-04-16 05:13:39 -07:00
+								            loop = _asyncio.get_running_loop()
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
 								            def _run_insights():
 								                db = SessionDB()
 								                engine = InsightsEngine(db)
 								                report = engine.generate(days=days, source=source)
 								                result = engine.format_gateway(report)
 								                db.close()
 								                return result
 								            return await loop.run_in_executor(None, _run_insights)
 								        except Exception as e:
 								            logger.error("Insights command error: %s", e, exc_info=True)
 								            return f"Error generating insights: {e}"
 								    async def _handle_reload_mcp_command(self, event: MessageEvent) -> str:
 								        """Handle /reload-mcp command -- disconnect and reconnect all MCP servers."""
-												chore(gateway): replace deprecated asyncio.get_event_loop() with get_running_loop() (#11005)

All 10 call sites in gateway/run.py and gateway/platforms/api_server.py
are inside async functions where a loop is guaranteed to be running.

get_event_loop() is deprecated since Python 3.10 — it can silently
create a new loop when none is running, masking bugs.
get_running_loop() raises RuntimeError instead, which is safer.

Surfaced during review of PRs #10533 and #10647.

Co-authored-by: kshitijk4poor <kshitijk4poor@users.noreply.github.com>
											
										
										
											2026-04-16 05:13:39 -07:00
+								        loop = asyncio.get_running_loop()
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        try:
-												refactor: remove dead code — 1,784 lines across 77 files (#9180)

Deep scan with vulture, pyflakes, and manual cross-referencing identified:
- 41 dead functions/methods (zero callers in production)
- 7 production-dead functions (only test callers, tests deleted)
- 5 dead constants/variables
- ~35 unused imports across agent/, hermes_cli/, tools/, gateway/

Categories of dead code removed:
- Refactoring leftovers: _set_default_model, _setup_copilot_reasoning_selection,
  rebuild_lookups, clear_session_context, get_logs_dir, clear_session
- Unused API surface: search_models_dev, get_pricing, skills_categories,
  get_read_files_summary, clear_read_tracker, menu_labels, get_spinner_list
- Dead compatibility wrappers: schedule_cronjob, list_cronjobs, remove_cronjob
- Stale debug helpers: get_debug_session_info copies in 4 tool files
  (centralized version in debug_helpers.py already exists)
- Dead gateway methods: send_emote, send_notice (matrix), send_reaction
  (bluebubbles), _normalize_inbound_text (feishu), fetch_room_history
  (matrix), _start_typing_indicator (signal), parse_feishu_post_content
- Dead constants: NOUS_API_BASE_URL, SKILLS_TOOL_DESCRIPTION,
  FILE_TOOLS, VALID_ASPECT_RATIOS, MEMORY_DIR
- Unused UI code: _interactive_provider_selection,
  _interactive_model_selection (superseded by prompt_toolkit picker)

Test suite verified: 609 tests covering affected files all pass.
Tests for removed functions deleted. Tests using removed utilities
(clear_read_tracker, MEMORY_DIR) updated to use internal APIs directly.
											
										
										
											2026-04-13 16:32:04 -07:00
+								            from tools.mcp_tool import shutdown_mcp_servers, discover_mcp_tools, _servers, _lock
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
 								            # Capture old server names before shutdown
 								            with _lock:
 								                old_servers = set(_servers.keys())
 								            # Read new config before shutting down, so we know what will be added/removed
 								            # Shutdown existing connections
 								            await loop.run_in_executor(None, shutdown_mcp_servers)
 								            # Reconnect by discovering tools (reads config.yaml fresh)
 								            new_tools = await loop.run_in_executor(None, discover_mcp_tools)
 								            # Compute what changed
 								            with _lock:
 								                connected_servers = set(_servers.keys())
 								            added = connected_servers - old_servers
 								            removed = old_servers - connected_servers
 								            reconnected = connected_servers & old_servers
 								            lines = ["🔄 **MCP Servers Reloaded**\n"]
 								            if reconnected:
 								                lines.append(f"♻️ Reconnected: {', '.join(sorted(reconnected))}")
 								            if added:
 								                lines.append(f"➕ Added: {', '.join(sorted(added))}")
 								            if removed:
 								                lines.append(f"➖ Removed: {', '.join(sorted(removed))}")
 								            if not connected_servers:
 								                lines.append("No MCP servers connected.")
 								            else:
 								                lines.append(f"\n🔧 {len(new_tools)} tool(s) available from {len(connected_servers)} server(s)")
 								            # Inject a message at the END of the session history so the
 								            # model knows tools changed on its next turn.  Appended after
 								            # all existing messages to preserve prompt-cache for the prefix.
 								            change_parts = []
 								            if added:
 								                change_parts.append(f"Added servers: {', '.join(sorted(added))}")
 								            if removed:
 								                change_parts.append(f"Removed servers: {', '.join(sorted(removed))}")
 								            if reconnected:
 								                change_parts.append(f"Reconnected servers: {', '.join(sorted(reconnected))}")
 								            tool_summary = f"{len(new_tools)} MCP tool(s) now available" if new_tools else "No MCP tools available"
 								            change_detail = ". ".join(change_parts) + ". " if change_parts else ""
 								            reload_msg = {
 								                "role": "user",
 								                "content": f"[SYSTEM: MCP servers have been reloaded. {change_detail}{tool_summary}. The tool list for this conversation has been updated accordingly.]",
 								            }
 								            try:
 								                session_entry = self.session_store.get_or_create_session(event.source)
 								                self.session_store.append_to_transcript(
 								                    session_entry.session_id, reload_msg
 								                )
 								            except Exception:
 								                pass  # Best-effort; don't fail the reload over a transcript write
 								            return "\n".join(lines)
 								        except Exception as e:
 								            logger.warning("MCP reload failed: %s", e)
 								            return f"❌ MCP reload failed: {e}"
-												fix(gateway): replace bare text approval with /approve and /deny commands (#2002)

The gateway approval system previously intercepted bare 'yes'/'no' text
from the user's next message to approve/deny dangerous commands. This was
fragile and dangerous — if the agent asked a clarify question and the user
said 'yes' to answer it, the gateway would execute the pending dangerous
command instead. (Fixes #1888)

Changes:
- Remove bare text matching ('yes', 'y', 'approve', 'ok', etc.) from
  _handle_message approval check
- Add /approve and /deny as gateway-only slash commands in the command
  registry
- /approve supports scoping: /approve (one-time), /approve session,
  /approve always (permanent)
- Add 5-minute timeout for stale approvals
- Gateway appends structured instructions to the agent response when a
  dangerous command is pending, telling the user exactly how to respond
- 9 tests covering approve, deny, timeout, scoping, and verification
  that bare 'yes' no longer triggers execution

Credit to @solo386 and @FlyByNight69420 for identifying and reporting
this security issue in PR #1971 and issue #1888.

Co-authored-by: Test <test@test.com>
											
										
										
											2026-03-18 16:58:20 -07:00
+								    # ------------------------------------------------------------------
 								    # /approve & /deny — explicit dangerous-command approval
 								    # ------------------------------------------------------------------
 								    _APPROVAL_TIMEOUT_SECONDS = 300  # 5 minutes
-												fix(gateway): resume agent after /approve executes blocked command

When a dangerous command was blocked and the user approved it via /approve,
the command was executed but the agent loop had already exited — the agent
never received the command output and the task died silently.

Now _handle_approve_command sends immediate feedback to the user, then
creates a synthetic continuation message with the command output and feeds
it through _handle_message so the agent picks up where it left off.

- Send command result to chat immediately via adapter.send()
- Create synthetic MessageEvent with command + output as context
- Spawn asyncio task to re-invoke agent via _handle_message
- Return None (feedback already sent directly)
- Add test for agent re-invocation after approval
- Update existing approval tests for new return behavior

											
										
										
											2026-03-31 20:50:18 +13:00
+								    async def _handle_approve_command(self, event: MessageEvent) -> Optional[str]:
-												fix: make gateway approval block agent thread like CLI does (#4557)

The gateway's dangerous command approval system was fundamentally broken:
the agent loop continued running after a command was flagged, and the
approval request only reached the user after the agent finished its
entire conversation loop. By then the context was lost.

This change makes the gateway approval mirror the CLI's synchronous
behavior. When a dangerous command is detected:

1. The agent thread blocks on a threading.Event
2. The approval request is sent to the user immediately
3. The user responds with /approve or /deny
4. The event is signaled and the agent resumes with the real result

The agent never sees 'approval_required' as a tool result. It either
gets the command output (approved) or a definitive BLOCKED message
(denied/timed out) — same as CLI mode.

Queue-based design supports multiple concurrent approvals (parallel
subagents via delegate_task, execute_code RPC handlers). Each approval
gets its own _ApprovalEntry with its own threading.Event. /approve
resolves the oldest (FIFO); /approve all resolves all at once.

Changes:
- tools/approval.py: Queue-based per-session blocking gateway approval
  (register/unregister callbacks, resolve with FIFO or all-at-once)
- gateway/run.py: Register approval callback in run_sync(), remove
  post-loop pop_pending hack, /approve and /deny support 'all' flag
- tests: 21 tests including parallel subagent E2E scenarios
											
										
										
											2026-04-02 01:47:19 -07:00
+								        """Handle /approve command — unblock waiting agent thread(s).
 								        The agent thread(s) are blocked inside tools/approval.py waiting for
 								        the user to respond.  This handler signals the event so the agent
 								        resumes and the terminal_tool executes the command inline — the same
 								        flow as the CLI's synchronous input() approval.
-												fix(gateway): replace bare text approval with /approve and /deny commands (#2002)

The gateway approval system previously intercepted bare 'yes'/'no' text
from the user's next message to approve/deny dangerous commands. This was
fragile and dangerous — if the agent asked a clarify question and the user
said 'yes' to answer it, the gateway would execute the pending dangerous
command instead. (Fixes #1888)

Changes:
- Remove bare text matching ('yes', 'y', 'approve', 'ok', etc.) from
  _handle_message approval check
- Add /approve and /deny as gateway-only slash commands in the command
  registry
- /approve supports scoping: /approve (one-time), /approve session,
  /approve always (permanent)
- Add 5-minute timeout for stale approvals
- Gateway appends structured instructions to the agent response when a
  dangerous command is pending, telling the user exactly how to respond
- 9 tests covering approve, deny, timeout, scoping, and verification
  that bare 'yes' no longer triggers execution

Credit to @solo386 and @FlyByNight69420 for identifying and reporting
this security issue in PR #1971 and issue #1888.

Co-authored-by: Test <test@test.com>
											
										
										
											2026-03-18 16:58:20 -07:00
-												fix: make gateway approval block agent thread like CLI does (#4557)

The gateway's dangerous command approval system was fundamentally broken:
the agent loop continued running after a command was flagged, and the
approval request only reached the user after the agent finished its
entire conversation loop. By then the context was lost.

This change makes the gateway approval mirror the CLI's synchronous
behavior. When a dangerous command is detected:

1. The agent thread blocks on a threading.Event
2. The approval request is sent to the user immediately
3. The user responds with /approve or /deny
4. The event is signaled and the agent resumes with the real result

The agent never sees 'approval_required' as a tool result. It either
gets the command output (approved) or a definitive BLOCKED message
(denied/timed out) — same as CLI mode.

Queue-based design supports multiple concurrent approvals (parallel
subagents via delegate_task, execute_code RPC handlers). Each approval
gets its own _ApprovalEntry with its own threading.Event. /approve
resolves the oldest (FIFO); /approve all resolves all at once.

Changes:
- tools/approval.py: Queue-based per-session blocking gateway approval
  (register/unregister callbacks, resolve with FIFO or all-at-once)
- gateway/run.py: Register approval callback in run_sync(), remove
  post-loop pop_pending hack, /approve and /deny support 'all' flag
- tests: 21 tests including parallel subagent E2E scenarios
											
										
										
											2026-04-02 01:47:19 -07:00
+								        Supports multiple concurrent approvals (parallel subagents,
 								        execute_code).  ``/approve`` resolves the oldest pending command;
 								        ``/approve all`` resolves every pending command at once.
-												fix(gateway): resume agent after /approve executes blocked command

When a dangerous command was blocked and the user approved it via /approve,
the command was executed but the agent loop had already exited — the agent
never received the command output and the task died silently.

Now _handle_approve_command sends immediate feedback to the user, then
creates a synthetic continuation message with the command output and feeds
it through _handle_message so the agent picks up where it left off.

- Send command result to chat immediately via adapter.send()
- Create synthetic MessageEvent with command + output as context
- Spawn asyncio task to re-invoke agent via _handle_message
- Return None (feedback already sent directly)
- Add test for agent re-invocation after approval
- Update existing approval tests for new return behavior

											
										
										
											2026-03-31 20:50:18 +13:00
-												fix(gateway): replace bare text approval with /approve and /deny commands (#2002)

The gateway approval system previously intercepted bare 'yes'/'no' text
from the user's next message to approve/deny dangerous commands. This was
fragile and dangerous — if the agent asked a clarify question and the user
said 'yes' to answer it, the gateway would execute the pending dangerous
command instead. (Fixes #1888)

Changes:
- Remove bare text matching ('yes', 'y', 'approve', 'ok', etc.) from
  _handle_message approval check
- Add /approve and /deny as gateway-only slash commands in the command
  registry
- /approve supports scoping: /approve (one-time), /approve session,
  /approve always (permanent)
- Add 5-minute timeout for stale approvals
- Gateway appends structured instructions to the agent response when a
  dangerous command is pending, telling the user exactly how to respond
- 9 tests covering approve, deny, timeout, scoping, and verification
  that bare 'yes' no longer triggers execution

Credit to @solo386 and @FlyByNight69420 for identifying and reporting
this security issue in PR #1971 and issue #1888.

Co-authored-by: Test <test@test.com>
											
										
										
											2026-03-18 16:58:20 -07:00
+								        Usage:
-												fix: make gateway approval block agent thread like CLI does (#4557)

The gateway's dangerous command approval system was fundamentally broken:
the agent loop continued running after a command was flagged, and the
approval request only reached the user after the agent finished its
entire conversation loop. By then the context was lost.

This change makes the gateway approval mirror the CLI's synchronous
behavior. When a dangerous command is detected:

1. The agent thread blocks on a threading.Event
2. The approval request is sent to the user immediately
3. The user responds with /approve or /deny
4. The event is signaled and the agent resumes with the real result

The agent never sees 'approval_required' as a tool result. It either
gets the command output (approved) or a definitive BLOCKED message
(denied/timed out) — same as CLI mode.

Queue-based design supports multiple concurrent approvals (parallel
subagents via delegate_task, execute_code RPC handlers). Each approval
gets its own _ApprovalEntry with its own threading.Event. /approve
resolves the oldest (FIFO); /approve all resolves all at once.

Changes:
- tools/approval.py: Queue-based per-session blocking gateway approval
  (register/unregister callbacks, resolve with FIFO or all-at-once)
- gateway/run.py: Register approval callback in run_sync(), remove
  post-loop pop_pending hack, /approve and /deny support 'all' flag
- tests: 21 tests including parallel subagent E2E scenarios
											
										
										
											2026-04-02 01:47:19 -07:00
+								            /approve              — approve oldest pending command once
 								            /approve all          — approve ALL pending commands at once
 								            /approve session      — approve oldest + remember for session
 								            /approve all session  — approve all + remember for session
 								            /approve always       — approve oldest + remember permanently
 								            /approve all always   — approve all + remember permanently
-												fix(gateway): replace bare text approval with /approve and /deny commands (#2002)

The gateway approval system previously intercepted bare 'yes'/'no' text
from the user's next message to approve/deny dangerous commands. This was
fragile and dangerous — if the agent asked a clarify question and the user
said 'yes' to answer it, the gateway would execute the pending dangerous
command instead. (Fixes #1888)

Changes:
- Remove bare text matching ('yes', 'y', 'approve', 'ok', etc.) from
  _handle_message approval check
- Add /approve and /deny as gateway-only slash commands in the command
  registry
- /approve supports scoping: /approve (one-time), /approve session,
  /approve always (permanent)
- Add 5-minute timeout for stale approvals
- Gateway appends structured instructions to the agent response when a
  dangerous command is pending, telling the user exactly how to respond
- 9 tests covering approve, deny, timeout, scoping, and verification
  that bare 'yes' no longer triggers execution

Credit to @solo386 and @FlyByNight69420 for identifying and reporting
this security issue in PR #1971 and issue #1888.

Co-authored-by: Test <test@test.com>
											
										
										
											2026-03-18 16:58:20 -07:00
+								        """
 								        source = event.source
 								        session_key = self._session_key_for_source(source)
-												fix: make gateway approval block agent thread like CLI does (#4557)

The gateway's dangerous command approval system was fundamentally broken:
the agent loop continued running after a command was flagged, and the
approval request only reached the user after the agent finished its
entire conversation loop. By then the context was lost.

This change makes the gateway approval mirror the CLI's synchronous
behavior. When a dangerous command is detected:

1. The agent thread blocks on a threading.Event
2. The approval request is sent to the user immediately
3. The user responds with /approve or /deny
4. The event is signaled and the agent resumes with the real result

The agent never sees 'approval_required' as a tool result. It either
gets the command output (approved) or a definitive BLOCKED message
(denied/timed out) — same as CLI mode.

Queue-based design supports multiple concurrent approvals (parallel
subagents via delegate_task, execute_code RPC handlers). Each approval
gets its own _ApprovalEntry with its own threading.Event. /approve
resolves the oldest (FIFO); /approve all resolves all at once.

Changes:
- tools/approval.py: Queue-based per-session blocking gateway approval
  (register/unregister callbacks, resolve with FIFO or all-at-once)
- gateway/run.py: Register approval callback in run_sync(), remove
  post-loop pop_pending hack, /approve and /deny support 'all' flag
- tests: 21 tests including parallel subagent E2E scenarios
											
										
										
											2026-04-02 01:47:19 -07:00
+								        from tools.approval import (
 								            resolve_gateway_approval, has_blocking_approval,
 								        )
-												fix(gateway): replace bare text approval with /approve and /deny commands (#2002)

The gateway approval system previously intercepted bare 'yes'/'no' text
from the user's next message to approve/deny dangerous commands. This was
fragile and dangerous — if the agent asked a clarify question and the user
said 'yes' to answer it, the gateway would execute the pending dangerous
command instead. (Fixes #1888)

Changes:
- Remove bare text matching ('yes', 'y', 'approve', 'ok', etc.) from
  _handle_message approval check
- Add /approve and /deny as gateway-only slash commands in the command
  registry
- /approve supports scoping: /approve (one-time), /approve session,
  /approve always (permanent)
- Add 5-minute timeout for stale approvals
- Gateway appends structured instructions to the agent response when a
  dangerous command is pending, telling the user exactly how to respond
- 9 tests covering approve, deny, timeout, scoping, and verification
  that bare 'yes' no longer triggers execution

Credit to @solo386 and @FlyByNight69420 for identifying and reporting
this security issue in PR #1971 and issue #1888.

Co-authored-by: Test <test@test.com>
											
										
										
											2026-03-18 16:58:20 -07:00
-												fix: make gateway approval block agent thread like CLI does (#4557)

The gateway's dangerous command approval system was fundamentally broken:
the agent loop continued running after a command was flagged, and the
approval request only reached the user after the agent finished its
entire conversation loop. By then the context was lost.

This change makes the gateway approval mirror the CLI's synchronous
behavior. When a dangerous command is detected:

1. The agent thread blocks on a threading.Event
2. The approval request is sent to the user immediately
3. The user responds with /approve or /deny
4. The event is signaled and the agent resumes with the real result

The agent never sees 'approval_required' as a tool result. It either
gets the command output (approved) or a definitive BLOCKED message
(denied/timed out) — same as CLI mode.

Queue-based design supports multiple concurrent approvals (parallel
subagents via delegate_task, execute_code RPC handlers). Each approval
gets its own _ApprovalEntry with its own threading.Event. /approve
resolves the oldest (FIFO); /approve all resolves all at once.

Changes:
- tools/approval.py: Queue-based per-session blocking gateway approval
  (register/unregister callbacks, resolve with FIFO or all-at-once)
- gateway/run.py: Register approval callback in run_sync(), remove
  post-loop pop_pending hack, /approve and /deny support 'all' flag
- tests: 21 tests including parallel subagent E2E scenarios
											
										
										
											2026-04-02 01:47:19 -07:00
+								        if not has_blocking_approval(session_key):
 								            if session_key in self._pending_approvals:
 								                self._pending_approvals.pop(session_key)
 								                return "⚠️ Approval expired (agent is no longer waiting). Ask the agent to try again."
 								            return "No pending command to approve."
-												fix(gateway): replace bare text approval with /approve and /deny commands (#2002)

The gateway approval system previously intercepted bare 'yes'/'no' text
from the user's next message to approve/deny dangerous commands. This was
fragile and dangerous — if the agent asked a clarify question and the user
said 'yes' to answer it, the gateway would execute the pending dangerous
command instead. (Fixes #1888)

Changes:
- Remove bare text matching ('yes', 'y', 'approve', 'ok', etc.) from
  _handle_message approval check
- Add /approve and /deny as gateway-only slash commands in the command
  registry
- /approve supports scoping: /approve (one-time), /approve session,
  /approve always (permanent)
- Add 5-minute timeout for stale approvals
- Gateway appends structured instructions to the agent response when a
  dangerous command is pending, telling the user exactly how to respond
- 9 tests covering approve, deny, timeout, scoping, and verification
  that bare 'yes' no longer triggers execution

Credit to @solo386 and @FlyByNight69420 for identifying and reporting
this security issue in PR #1971 and issue #1888.

Co-authored-by: Test <test@test.com>
											
										
										
											2026-03-18 16:58:20 -07:00
-												fix: make gateway approval block agent thread like CLI does (#4557)

The gateway's dangerous command approval system was fundamentally broken:
the agent loop continued running after a command was flagged, and the
approval request only reached the user after the agent finished its
entire conversation loop. By then the context was lost.

This change makes the gateway approval mirror the CLI's synchronous
behavior. When a dangerous command is detected:

1. The agent thread blocks on a threading.Event
2. The approval request is sent to the user immediately
3. The user responds with /approve or /deny
4. The event is signaled and the agent resumes with the real result

The agent never sees 'approval_required' as a tool result. It either
gets the command output (approved) or a definitive BLOCKED message
(denied/timed out) — same as CLI mode.

Queue-based design supports multiple concurrent approvals (parallel
subagents via delegate_task, execute_code RPC handlers). Each approval
gets its own _ApprovalEntry with its own threading.Event. /approve
resolves the oldest (FIFO); /approve all resolves all at once.

Changes:
- tools/approval.py: Queue-based per-session blocking gateway approval
  (register/unregister callbacks, resolve with FIFO or all-at-once)
- gateway/run.py: Register approval callback in run_sync(), remove
  post-loop pop_pending hack, /approve and /deny support 'all' flag
- tests: 21 tests including parallel subagent E2E scenarios
											
										
										
											2026-04-02 01:47:19 -07:00
+								        # Parse args: support "all", "all session", "all always", "session", "always"
 								        args = event.get_command_args().strip().lower().split()
 								        resolve_all = "all" in args
 								        remaining = [a for a in args if a != "all"]
-												fix(gateway): replace bare text approval with /approve and /deny commands (#2002)

The gateway approval system previously intercepted bare 'yes'/'no' text
from the user's next message to approve/deny dangerous commands. This was
fragile and dangerous — if the agent asked a clarify question and the user
said 'yes' to answer it, the gateway would execute the pending dangerous
command instead. (Fixes #1888)

Changes:
- Remove bare text matching ('yes', 'y', 'approve', 'ok', etc.) from
  _handle_message approval check
- Add /approve and /deny as gateway-only slash commands in the command
  registry
- /approve supports scoping: /approve (one-time), /approve session,
  /approve always (permanent)
- Add 5-minute timeout for stale approvals
- Gateway appends structured instructions to the agent response when a
  dangerous command is pending, telling the user exactly how to respond
- 9 tests covering approve, deny, timeout, scoping, and verification
  that bare 'yes' no longer triggers execution

Credit to @solo386 and @FlyByNight69420 for identifying and reporting
this security issue in PR #1971 and issue #1888.

Co-authored-by: Test <test@test.com>
											
										
										
											2026-03-18 16:58:20 -07:00
-												fix: make gateway approval block agent thread like CLI does (#4557)

The gateway's dangerous command approval system was fundamentally broken:
the agent loop continued running after a command was flagged, and the
approval request only reached the user after the agent finished its
entire conversation loop. By then the context was lost.

This change makes the gateway approval mirror the CLI's synchronous
behavior. When a dangerous command is detected:

1. The agent thread blocks on a threading.Event
2. The approval request is sent to the user immediately
3. The user responds with /approve or /deny
4. The event is signaled and the agent resumes with the real result

The agent never sees 'approval_required' as a tool result. It either
gets the command output (approved) or a definitive BLOCKED message
(denied/timed out) — same as CLI mode.

Queue-based design supports multiple concurrent approvals (parallel
subagents via delegate_task, execute_code RPC handlers). Each approval
gets its own _ApprovalEntry with its own threading.Event. /approve
resolves the oldest (FIFO); /approve all resolves all at once.

Changes:
- tools/approval.py: Queue-based per-session blocking gateway approval
  (register/unregister callbacks, resolve with FIFO or all-at-once)
- gateway/run.py: Register approval callback in run_sync(), remove
  post-loop pop_pending hack, /approve and /deny support 'all' flag
- tests: 21 tests including parallel subagent E2E scenarios
											
										
										
											2026-04-02 01:47:19 -07:00
+								        if any(a in ("always", "permanent", "permanently") for a in remaining):
 								            choice = "always"
-												fix(gateway): replace bare text approval with /approve and /deny commands (#2002)

The gateway approval system previously intercepted bare 'yes'/'no' text
from the user's next message to approve/deny dangerous commands. This was
fragile and dangerous — if the agent asked a clarify question and the user
said 'yes' to answer it, the gateway would execute the pending dangerous
command instead. (Fixes #1888)

Changes:
- Remove bare text matching ('yes', 'y', 'approve', 'ok', etc.) from
  _handle_message approval check
- Add /approve and /deny as gateway-only slash commands in the command
  registry
- /approve supports scoping: /approve (one-time), /approve session,
  /approve always (permanent)
- Add 5-minute timeout for stale approvals
- Gateway appends structured instructions to the agent response when a
  dangerous command is pending, telling the user exactly how to respond
- 9 tests covering approve, deny, timeout, scoping, and verification
  that bare 'yes' no longer triggers execution

Credit to @solo386 and @FlyByNight69420 for identifying and reporting
this security issue in PR #1971 and issue #1888.

Co-authored-by: Test <test@test.com>
											
										
										
											2026-03-18 16:58:20 -07:00
+								            scope_msg = " (pattern approved permanently)"
-												fix: make gateway approval block agent thread like CLI does (#4557)

The gateway's dangerous command approval system was fundamentally broken:
the agent loop continued running after a command was flagged, and the
approval request only reached the user after the agent finished its
entire conversation loop. By then the context was lost.

This change makes the gateway approval mirror the CLI's synchronous
behavior. When a dangerous command is detected:

1. The agent thread blocks on a threading.Event
2. The approval request is sent to the user immediately
3. The user responds with /approve or /deny
4. The event is signaled and the agent resumes with the real result

The agent never sees 'approval_required' as a tool result. It either
gets the command output (approved) or a definitive BLOCKED message
(denied/timed out) — same as CLI mode.

Queue-based design supports multiple concurrent approvals (parallel
subagents via delegate_task, execute_code RPC handlers). Each approval
gets its own _ApprovalEntry with its own threading.Event. /approve
resolves the oldest (FIFO); /approve all resolves all at once.

Changes:
- tools/approval.py: Queue-based per-session blocking gateway approval
  (register/unregister callbacks, resolve with FIFO or all-at-once)
- gateway/run.py: Register approval callback in run_sync(), remove
  post-loop pop_pending hack, /approve and /deny support 'all' flag
- tests: 21 tests including parallel subagent E2E scenarios
											
										
										
											2026-04-02 01:47:19 -07:00
+								        elif any(a in ("session", "ses") for a in remaining):
 								            choice = "session"
-												fix(gateway): replace bare text approval with /approve and /deny commands (#2002)

The gateway approval system previously intercepted bare 'yes'/'no' text
from the user's next message to approve/deny dangerous commands. This was
fragile and dangerous — if the agent asked a clarify question and the user
said 'yes' to answer it, the gateway would execute the pending dangerous
command instead. (Fixes #1888)

Changes:
- Remove bare text matching ('yes', 'y', 'approve', 'ok', etc.) from
  _handle_message approval check
- Add /approve and /deny as gateway-only slash commands in the command
  registry
- /approve supports scoping: /approve (one-time), /approve session,
  /approve always (permanent)
- Add 5-minute timeout for stale approvals
- Gateway appends structured instructions to the agent response when a
  dangerous command is pending, telling the user exactly how to respond
- 9 tests covering approve, deny, timeout, scoping, and verification
  that bare 'yes' no longer triggers execution

Credit to @solo386 and @FlyByNight69420 for identifying and reporting
this security issue in PR #1971 and issue #1888.

Co-authored-by: Test <test@test.com>
											
										
										
											2026-03-18 16:58:20 -07:00
+								            scope_msg = " (pattern approved for this session)"
 								        else:
-												fix: make gateway approval block agent thread like CLI does (#4557)

The gateway's dangerous command approval system was fundamentally broken:
the agent loop continued running after a command was flagged, and the
approval request only reached the user after the agent finished its
entire conversation loop. By then the context was lost.

This change makes the gateway approval mirror the CLI's synchronous
behavior. When a dangerous command is detected:

1. The agent thread blocks on a threading.Event
2. The approval request is sent to the user immediately
3. The user responds with /approve or /deny
4. The event is signaled and the agent resumes with the real result

The agent never sees 'approval_required' as a tool result. It either
gets the command output (approved) or a definitive BLOCKED message
(denied/timed out) — same as CLI mode.

Queue-based design supports multiple concurrent approvals (parallel
subagents via delegate_task, execute_code RPC handlers). Each approval
gets its own _ApprovalEntry with its own threading.Event. /approve
resolves the oldest (FIFO); /approve all resolves all at once.

Changes:
- tools/approval.py: Queue-based per-session blocking gateway approval
  (register/unregister callbacks, resolve with FIFO or all-at-once)
- gateway/run.py: Register approval callback in run_sync(), remove
  post-loop pop_pending hack, /approve and /deny support 'all' flag
- tests: 21 tests including parallel subagent E2E scenarios
											
										
										
											2026-04-02 01:47:19 -07:00
+								            choice = "once"
-												fix(gateway): replace bare text approval with /approve and /deny commands (#2002)

The gateway approval system previously intercepted bare 'yes'/'no' text
from the user's next message to approve/deny dangerous commands. This was
fragile and dangerous — if the agent asked a clarify question and the user
said 'yes' to answer it, the gateway would execute the pending dangerous
command instead. (Fixes #1888)

Changes:
- Remove bare text matching ('yes', 'y', 'approve', 'ok', etc.) from
  _handle_message approval check
- Add /approve and /deny as gateway-only slash commands in the command
  registry
- /approve supports scoping: /approve (one-time), /approve session,
  /approve always (permanent)
- Add 5-minute timeout for stale approvals
- Gateway appends structured instructions to the agent response when a
  dangerous command is pending, telling the user exactly how to respond
- 9 tests covering approve, deny, timeout, scoping, and verification
  that bare 'yes' no longer triggers execution

Credit to @solo386 and @FlyByNight69420 for identifying and reporting
this security issue in PR #1971 and issue #1888.

Co-authored-by: Test <test@test.com>
											
										
										
											2026-03-18 16:58:20 -07:00
+								            scope_msg = ""
-												fix: make gateway approval block agent thread like CLI does (#4557)

The gateway's dangerous command approval system was fundamentally broken:
the agent loop continued running after a command was flagged, and the
approval request only reached the user after the agent finished its
entire conversation loop. By then the context was lost.

This change makes the gateway approval mirror the CLI's synchronous
behavior. When a dangerous command is detected:

1. The agent thread blocks on a threading.Event
2. The approval request is sent to the user immediately
3. The user responds with /approve or /deny
4. The event is signaled and the agent resumes with the real result

The agent never sees 'approval_required' as a tool result. It either
gets the command output (approved) or a definitive BLOCKED message
(denied/timed out) — same as CLI mode.

Queue-based design supports multiple concurrent approvals (parallel
subagents via delegate_task, execute_code RPC handlers). Each approval
gets its own _ApprovalEntry with its own threading.Event. /approve
resolves the oldest (FIFO); /approve all resolves all at once.

Changes:
- tools/approval.py: Queue-based per-session blocking gateway approval
  (register/unregister callbacks, resolve with FIFO or all-at-once)
- gateway/run.py: Register approval callback in run_sync(), remove
  post-loop pop_pending hack, /approve and /deny support 'all' flag
- tests: 21 tests including parallel subagent E2E scenarios
											
										
										
											2026-04-02 01:47:19 -07:00
+								        count = resolve_gateway_approval(session_key, choice, resolve_all=resolve_all)
 								        if not count:
 								            return "No pending command to approve."
-												fix(gateway): resume agent after /approve executes blocked command

When a dangerous command was blocked and the user approved it via /approve,
the command was executed but the agent loop had already exited — the agent
never received the command output and the task died silently.

Now _handle_approve_command sends immediate feedback to the user, then
creates a synthetic continuation message with the command output and feeds
it through _handle_message so the agent picks up where it left off.

- Send command result to chat immediately via adapter.send()
- Create synthetic MessageEvent with command + output as context
- Spawn asyncio task to re-invoke agent via _handle_message
- Return None (feedback already sent directly)
- Add test for agent re-invocation after approval
- Update existing approval tests for new return behavior

											
										
										
											2026-03-31 20:50:18 +13:00
-												fix: pause typing indicator during approval waits (#5893)

When the agent waits for dangerous-command approval, the typing
indicator (_keep_typing loop) kept refreshing. On Slack's Assistant
API this is critical: assistant_threads_setStatus disables the
compose box, preventing users from typing /approve or /deny.

- Add _typing_paused set + pause/resume methods to BasePlatformAdapter
- _keep_typing skips send_typing when chat_id is paused
- _approval_notify_sync pauses typing before sending approval prompt
- _handle_approve_command / _handle_deny_command resume typing after

Benefits all platforms — no reason to show 'is thinking...' while
the agent is idle waiting for human input.
											
										
										
											2026-04-07 11:04:50 -07:00
+								        # Resume typing indicator — agent is about to continue processing.
 								        _adapter = self.adapters.get(source.platform)
 								        if _adapter:
 								            _adapter.resume_typing_for_chat(source.chat_id)
-												fix: make gateway approval block agent thread like CLI does (#4557)

The gateway's dangerous command approval system was fundamentally broken:
the agent loop continued running after a command was flagged, and the
approval request only reached the user after the agent finished its
entire conversation loop. By then the context was lost.

This change makes the gateway approval mirror the CLI's synchronous
behavior. When a dangerous command is detected:

1. The agent thread blocks on a threading.Event
2. The approval request is sent to the user immediately
3. The user responds with /approve or /deny
4. The event is signaled and the agent resumes with the real result

The agent never sees 'approval_required' as a tool result. It either
gets the command output (approved) or a definitive BLOCKED message
(denied/timed out) — same as CLI mode.

Queue-based design supports multiple concurrent approvals (parallel
subagents via delegate_task, execute_code RPC handlers). Each approval
gets its own _ApprovalEntry with its own threading.Event. /approve
resolves the oldest (FIFO); /approve all resolves all at once.

Changes:
- tools/approval.py: Queue-based per-session blocking gateway approval
  (register/unregister callbacks, resolve with FIFO or all-at-once)
- gateway/run.py: Register approval callback in run_sync(), remove
  post-loop pop_pending hack, /approve and /deny support 'all' flag
- tests: 21 tests including parallel subagent E2E scenarios
											
										
										
											2026-04-02 01:47:19 -07:00
+								        count_msg = f" ({count} commands)" if count > 1 else ""
 								        logger.info("User approved %d dangerous command(s) via /approve%s", count, scope_msg)
 								        return f"✅ Command{'s' if count > 1 else ''} approved{scope_msg}{count_msg}. The agent is resuming..."
-												fix(gateway): resume agent after /approve executes blocked command

When a dangerous command was blocked and the user approved it via /approve,
the command was executed but the agent loop had already exited — the agent
never received the command output and the task died silently.

Now _handle_approve_command sends immediate feedback to the user, then
creates a synthetic continuation message with the command output and feeds
it through _handle_message so the agent picks up where it left off.

- Send command result to chat immediately via adapter.send()
- Create synthetic MessageEvent with command + output as context
- Spawn asyncio task to re-invoke agent via _handle_message
- Return None (feedback already sent directly)
- Add test for agent re-invocation after approval
- Update existing approval tests for new return behavior

											
										
										
											2026-03-31 20:50:18 +13:00
-												fix: make gateway approval block agent thread like CLI does (#4557)

The gateway's dangerous command approval system was fundamentally broken:
the agent loop continued running after a command was flagged, and the
approval request only reached the user after the agent finished its
entire conversation loop. By then the context was lost.

This change makes the gateway approval mirror the CLI's synchronous
behavior. When a dangerous command is detected:

1. The agent thread blocks on a threading.Event
2. The approval request is sent to the user immediately
3. The user responds with /approve or /deny
4. The event is signaled and the agent resumes with the real result

The agent never sees 'approval_required' as a tool result. It either
gets the command output (approved) or a definitive BLOCKED message
(denied/timed out) — same as CLI mode.

Queue-based design supports multiple concurrent approvals (parallel
subagents via delegate_task, execute_code RPC handlers). Each approval
gets its own _ApprovalEntry with its own threading.Event. /approve
resolves the oldest (FIFO); /approve all resolves all at once.

Changes:
- tools/approval.py: Queue-based per-session blocking gateway approval
  (register/unregister callbacks, resolve with FIFO or all-at-once)
- gateway/run.py: Register approval callback in run_sync(), remove
  post-loop pop_pending hack, /approve and /deny support 'all' flag
- tests: 21 tests including parallel subagent E2E scenarios
											
										
										
											2026-04-02 01:47:19 -07:00
+								    async def _handle_deny_command(self, event: MessageEvent) -> str:
 								        """Handle /deny command — reject pending dangerous command(s).
-												fix(gateway): resume agent after /approve executes blocked command

When a dangerous command was blocked and the user approved it via /approve,
the command was executed but the agent loop had already exited — the agent
never received the command output and the task died silently.

Now _handle_approve_command sends immediate feedback to the user, then
creates a synthetic continuation message with the command output and feeds
it through _handle_message so the agent picks up where it left off.

- Send command result to chat immediately via adapter.send()
- Create synthetic MessageEvent with command + output as context
- Spawn asyncio task to re-invoke agent via _handle_message
- Return None (feedback already sent directly)
- Add test for agent re-invocation after approval
- Update existing approval tests for new return behavior

											
										
										
											2026-03-31 20:50:18 +13:00
-												fix: make gateway approval block agent thread like CLI does (#4557)

The gateway's dangerous command approval system was fundamentally broken:
the agent loop continued running after a command was flagged, and the
approval request only reached the user after the agent finished its
entire conversation loop. By then the context was lost.

This change makes the gateway approval mirror the CLI's synchronous
behavior. When a dangerous command is detected:

1. The agent thread blocks on a threading.Event
2. The approval request is sent to the user immediately
3. The user responds with /approve or /deny
4. The event is signaled and the agent resumes with the real result

The agent never sees 'approval_required' as a tool result. It either
gets the command output (approved) or a definitive BLOCKED message
(denied/timed out) — same as CLI mode.

Queue-based design supports multiple concurrent approvals (parallel
subagents via delegate_task, execute_code RPC handlers). Each approval
gets its own _ApprovalEntry with its own threading.Event. /approve
resolves the oldest (FIFO); /approve all resolves all at once.

Changes:
- tools/approval.py: Queue-based per-session blocking gateway approval
  (register/unregister callbacks, resolve with FIFO or all-at-once)
- gateway/run.py: Register approval callback in run_sync(), remove
  post-loop pop_pending hack, /approve and /deny support 'all' flag
- tests: 21 tests including parallel subagent E2E scenarios
											
										
										
											2026-04-02 01:47:19 -07:00
+								        Signals blocked agent thread(s) with a 'deny' result so they receive
 								        a definitive BLOCKED message, same as the CLI deny flow.
-												fix(gateway): replace bare text approval with /approve and /deny commands (#2002)

The gateway approval system previously intercepted bare 'yes'/'no' text
from the user's next message to approve/deny dangerous commands. This was
fragile and dangerous — if the agent asked a clarify question and the user
said 'yes' to answer it, the gateway would execute the pending dangerous
command instead. (Fixes #1888)

Changes:
- Remove bare text matching ('yes', 'y', 'approve', 'ok', etc.) from
  _handle_message approval check
- Add /approve and /deny as gateway-only slash commands in the command
  registry
- /approve supports scoping: /approve (one-time), /approve session,
  /approve always (permanent)
- Add 5-minute timeout for stale approvals
- Gateway appends structured instructions to the agent response when a
  dangerous command is pending, telling the user exactly how to respond
- 9 tests covering approve, deny, timeout, scoping, and verification
  that bare 'yes' no longer triggers execution

Credit to @solo386 and @FlyByNight69420 for identifying and reporting
this security issue in PR #1971 and issue #1888.

Co-authored-by: Test <test@test.com>
											
										
										
											2026-03-18 16:58:20 -07:00
-												fix: make gateway approval block agent thread like CLI does (#4557)

The gateway's dangerous command approval system was fundamentally broken:
the agent loop continued running after a command was flagged, and the
approval request only reached the user after the agent finished its
entire conversation loop. By then the context was lost.

This change makes the gateway approval mirror the CLI's synchronous
behavior. When a dangerous command is detected:

1. The agent thread blocks on a threading.Event
2. The approval request is sent to the user immediately
3. The user responds with /approve or /deny
4. The event is signaled and the agent resumes with the real result

The agent never sees 'approval_required' as a tool result. It either
gets the command output (approved) or a definitive BLOCKED message
(denied/timed out) — same as CLI mode.

Queue-based design supports multiple concurrent approvals (parallel
subagents via delegate_task, execute_code RPC handlers). Each approval
gets its own _ApprovalEntry with its own threading.Event. /approve
resolves the oldest (FIFO); /approve all resolves all at once.

Changes:
- tools/approval.py: Queue-based per-session blocking gateway approval
  (register/unregister callbacks, resolve with FIFO or all-at-once)
- gateway/run.py: Register approval callback in run_sync(), remove
  post-loop pop_pending hack, /approve and /deny support 'all' flag
- tests: 21 tests including parallel subagent E2E scenarios
											
										
										
											2026-04-02 01:47:19 -07:00
+								        ``/deny`` denies the oldest; ``/deny all`` denies everything.
 								        """
-												fix(gateway): replace bare text approval with /approve and /deny commands (#2002)

The gateway approval system previously intercepted bare 'yes'/'no' text
from the user's next message to approve/deny dangerous commands. This was
fragile and dangerous — if the agent asked a clarify question and the user
said 'yes' to answer it, the gateway would execute the pending dangerous
command instead. (Fixes #1888)

Changes:
- Remove bare text matching ('yes', 'y', 'approve', 'ok', etc.) from
  _handle_message approval check
- Add /approve and /deny as gateway-only slash commands in the command
  registry
- /approve supports scoping: /approve (one-time), /approve session,
  /approve always (permanent)
- Add 5-minute timeout for stale approvals
- Gateway appends structured instructions to the agent response when a
  dangerous command is pending, telling the user exactly how to respond
- 9 tests covering approve, deny, timeout, scoping, and verification
  that bare 'yes' no longer triggers execution

Credit to @solo386 and @FlyByNight69420 for identifying and reporting
this security issue in PR #1971 and issue #1888.

Co-authored-by: Test <test@test.com>
											
										
										
											2026-03-18 16:58:20 -07:00
+								        source = event.source
 								        session_key = self._session_key_for_source(source)
-												fix: make gateway approval block agent thread like CLI does (#4557)

The gateway's dangerous command approval system was fundamentally broken:
the agent loop continued running after a command was flagged, and the
approval request only reached the user after the agent finished its
entire conversation loop. By then the context was lost.

This change makes the gateway approval mirror the CLI's synchronous
behavior. When a dangerous command is detected:

1. The agent thread blocks on a threading.Event
2. The approval request is sent to the user immediately
3. The user responds with /approve or /deny
4. The event is signaled and the agent resumes with the real result

The agent never sees 'approval_required' as a tool result. It either
gets the command output (approved) or a definitive BLOCKED message
(denied/timed out) — same as CLI mode.

Queue-based design supports multiple concurrent approvals (parallel
subagents via delegate_task, execute_code RPC handlers). Each approval
gets its own _ApprovalEntry with its own threading.Event. /approve
resolves the oldest (FIFO); /approve all resolves all at once.

Changes:
- tools/approval.py: Queue-based per-session blocking gateway approval
  (register/unregister callbacks, resolve with FIFO or all-at-once)
- gateway/run.py: Register approval callback in run_sync(), remove
  post-loop pop_pending hack, /approve and /deny support 'all' flag
- tests: 21 tests including parallel subagent E2E scenarios
											
										
										
											2026-04-02 01:47:19 -07:00
+								        from tools.approval import (
 								            resolve_gateway_approval, has_blocking_approval,
 								        )
 								        if not has_blocking_approval(session_key):
 								            if session_key in self._pending_approvals:
 								                self._pending_approvals.pop(session_key)
 								                return "❌ Command denied (approval was stale)."
 								            return "No pending command to deny."
 								        args = event.get_command_args().strip().lower()
 								        resolve_all = "all" in args
 								        count = resolve_gateway_approval(session_key, "deny", resolve_all=resolve_all)
 								        if not count:
-												fix(gateway): replace bare text approval with /approve and /deny commands (#2002)

The gateway approval system previously intercepted bare 'yes'/'no' text
from the user's next message to approve/deny dangerous commands. This was
fragile and dangerous — if the agent asked a clarify question and the user
said 'yes' to answer it, the gateway would execute the pending dangerous
command instead. (Fixes #1888)

Changes:
- Remove bare text matching ('yes', 'y', 'approve', 'ok', etc.) from
  _handle_message approval check
- Add /approve and /deny as gateway-only slash commands in the command
  registry
- /approve supports scoping: /approve (one-time), /approve session,
  /approve always (permanent)
- Add 5-minute timeout for stale approvals
- Gateway appends structured instructions to the agent response when a
  dangerous command is pending, telling the user exactly how to respond
- 9 tests covering approve, deny, timeout, scoping, and verification
  that bare 'yes' no longer triggers execution

Credit to @solo386 and @FlyByNight69420 for identifying and reporting
this security issue in PR #1971 and issue #1888.

Co-authored-by: Test <test@test.com>
											
										
										
											2026-03-18 16:58:20 -07:00
+								            return "No pending command to deny."
-												fix: pause typing indicator during approval waits (#5893)

When the agent waits for dangerous-command approval, the typing
indicator (_keep_typing loop) kept refreshing. On Slack's Assistant
API this is critical: assistant_threads_setStatus disables the
compose box, preventing users from typing /approve or /deny.

- Add _typing_paused set + pause/resume methods to BasePlatformAdapter
- _keep_typing skips send_typing when chat_id is paused
- _approval_notify_sync pauses typing before sending approval prompt
- _handle_approve_command / _handle_deny_command resume typing after

Benefits all platforms — no reason to show 'is thinking...' while
the agent is idle waiting for human input.
											
										
										
											2026-04-07 11:04:50 -07:00
+								        # Resume typing indicator — agent continues (with BLOCKED result).
 								        _adapter = self.adapters.get(source.platform)
 								        if _adapter:
 								            _adapter.resume_typing_for_chat(source.chat_id)
-												fix: make gateway approval block agent thread like CLI does (#4557)

The gateway's dangerous command approval system was fundamentally broken:
the agent loop continued running after a command was flagged, and the
approval request only reached the user after the agent finished its
entire conversation loop. By then the context was lost.

This change makes the gateway approval mirror the CLI's synchronous
behavior. When a dangerous command is detected:

1. The agent thread blocks on a threading.Event
2. The approval request is sent to the user immediately
3. The user responds with /approve or /deny
4. The event is signaled and the agent resumes with the real result

The agent never sees 'approval_required' as a tool result. It either
gets the command output (approved) or a definitive BLOCKED message
(denied/timed out) — same as CLI mode.

Queue-based design supports multiple concurrent approvals (parallel
subagents via delegate_task, execute_code RPC handlers). Each approval
gets its own _ApprovalEntry with its own threading.Event. /approve
resolves the oldest (FIFO); /approve all resolves all at once.

Changes:
- tools/approval.py: Queue-based per-session blocking gateway approval
  (register/unregister callbacks, resolve with FIFO or all-at-once)
- gateway/run.py: Register approval callback in run_sync(), remove
  post-loop pop_pending hack, /approve and /deny support 'all' flag
- tests: 21 tests including parallel subagent E2E scenarios
											
										
										
											2026-04-02 01:47:19 -07:00
+								        count_msg = f" ({count} commands)" if count > 1 else ""
 								        logger.info("User denied %d dangerous command(s) via /deny", count)
 								        return f"❌ Command{'s' if count > 1 else ''} denied{count_msg}."
-												fix(gateway): replace bare text approval with /approve and /deny commands (#2002)

The gateway approval system previously intercepted bare 'yes'/'no' text
from the user's next message to approve/deny dangerous commands. This was
fragile and dangerous — if the agent asked a clarify question and the user
said 'yes' to answer it, the gateway would execute the pending dangerous
command instead. (Fixes #1888)

Changes:
- Remove bare text matching ('yes', 'y', 'approve', 'ok', etc.) from
  _handle_message approval check
- Add /approve and /deny as gateway-only slash commands in the command
  registry
- /approve supports scoping: /approve (one-time), /approve session,
  /approve always (permanent)
- Add 5-minute timeout for stale approvals
- Gateway appends structured instructions to the agent response when a
  dangerous command is pending, telling the user exactly how to respond
- 9 tests covering approve, deny, timeout, scoping, and verification
  that bare 'yes' no longer triggers execution

Credit to @solo386 and @FlyByNight69420 for identifying and reporting
this security issue in PR #1971 and issue #1888.

Co-authored-by: Test <test@test.com>
											
										
										
											2026-03-18 16:58:20 -07:00
-												feat(gateway): live-stream /update output + interactive prompt buttons (#5180)

* feat(gateway): live-stream /update output + forward interactive prompts

Adds real-time output streaming and interactive prompt forwarding for
the gateway /update command, so users on Telegram/Discord/etc see the
full update progress and can respond to prompts (stash restore, config
migration) without needing terminal access.

Changes:

hermes_cli/main.py:
- Add --gateway flag to 'hermes update' argparse
- Add _gateway_prompt() file-based IPC function that writes
  .update_prompt.json and polls for .update_response
- Modify _restore_stashed_changes() to accept optional input_fn
  parameter for gateway mode prompt forwarding
- cmd_update() uses _gateway_prompt when --gateway is set, enabling
  interactive stash restore and config migration prompts

gateway/run.py:
- _handle_update_command: spawn with --gateway flag and
  PYTHONUNBUFFERED=1 for real-time output flushing
- Store session_key in .update_pending.json for cross-restart
  session matching
- Add _update_prompt_pending dict to track sessions awaiting
  update prompt responses
- Replace _watch_for_update_completion with _watch_update_progress:
  streams output chunks every ~4s, detects .update_prompt.json and
  forwards prompts to the user, handles completion/failure/timeout
- Add update prompt interception in _handle_message: when a prompt
  is pending, the user's next message is written to .update_response
  instead of being processed normally
- Preserve _send_update_notification as legacy fallback for
  post-restart cases where adapter isn't available yet

File-based IPC protocol:
- .update_prompt.json: written by update process with prompt text,
  default value, and unique ID
- .update_response: written by gateway with user's answer
- .update_output.txt: existing, now streamed in real-time
- .update_exit_code: existing completion marker

Tests: 16 new tests covering _gateway_prompt IPC, output streaming,
prompt detection/forwarding, message interception, and cleanup.

* feat: interactive buttons for update prompts (Telegram + Discord)

Telegram: Inline keyboard with ✓ Yes / ✗ No buttons. Clicking a button
answers the callback query, edits the message to show the choice, and
writes .update_response directly. CallbackQueryHandler registered on
the update_prompt: prefix.

Discord: UpdatePromptView (discord.ui.View) with green Yes / red No
buttons. Follows the ExecApprovalView pattern — auth check, embed color
update, disabled-after-click. Writes .update_response on click.

All platforms: /approve and /deny (and /yes, /no) now work as shorthand
for yes/no when an update prompt is pending. The text fallback message
instructs users to use these commands. Raw message interception still
works as a fallback for non-command responses.

Gateway watcher checks adapter for send_update_prompt method (class-level
check to avoid MagicMock false positives) and falls back to text prompt
with /approve instructions when unavailable.

* fix: block /update on non-messaging platforms (API, webhooks, ACP)

Add _UPDATE_ALLOWED_PLATFORMS frozenset that explicitly lists messaging
platforms where /update is permitted. API server, webhook, and ACP
platforms get a clear error directing them to run hermes update from
the terminal instead.

ACP and API server already don't reach _handle_message (separate
codepaths), and webhooks have distinct session keys that can't collide
with messaging sessions. This guard is belt-and-suspenders.
											
										
										
											2026-04-05 00:28:58 -07:00
+								    # Platforms where /update is allowed.  ACP, API server, and webhooks are
 								    # programmatic interfaces that should not trigger system updates.
 								    _UPDATE_ALLOWED_PLATFORMS = frozenset({
 								        Platform.TELEGRAM, Platform.DISCORD, Platform.SLACK, Platform.WHATSAPP,
 								        Platform.SIGNAL, Platform.MATTERMOST, Platform.MATRIX,
 								        Platform.HOMEASSISTANT, Platform.EMAIL, Platform.SMS, Platform.DINGTALK,
-												fix: QQBot missing integration points, timestamp parsing, test fix

- Add Platform.QQBOT to _UPDATE_ALLOWED_PLATFORMS (enables /update command)
- Add 'qqbot' to webhook cross-platform delivery routing
- Add 'qqbot' to hermes dump platform detection
- Fix test_name_property casing: 'QQBot' not 'QQBOT'
- Add _parse_qq_timestamp() for ISO 8601 + integer ms compatibility
  (QQ API changed timestamp format — from PR #2411 finding)
- Wire timestamp parsing into all 4 message handlers

											
										
										
											2026-04-13 22:46:50 -07:00
+								        Platform.FEISHU, Platform.WECOM, Platform.WECOM_CALLBACK, Platform.WEIXIN, Platform.BLUEBUBBLES, Platform.QQBOT, Platform.LOCAL,
-												feat(gateway): live-stream /update output + interactive prompt buttons (#5180)

* feat(gateway): live-stream /update output + forward interactive prompts

Adds real-time output streaming and interactive prompt forwarding for
the gateway /update command, so users on Telegram/Discord/etc see the
full update progress and can respond to prompts (stash restore, config
migration) without needing terminal access.

Changes:

hermes_cli/main.py:
- Add --gateway flag to 'hermes update' argparse
- Add _gateway_prompt() file-based IPC function that writes
  .update_prompt.json and polls for .update_response
- Modify _restore_stashed_changes() to accept optional input_fn
  parameter for gateway mode prompt forwarding
- cmd_update() uses _gateway_prompt when --gateway is set, enabling
  interactive stash restore and config migration prompts

gateway/run.py:
- _handle_update_command: spawn with --gateway flag and
  PYTHONUNBUFFERED=1 for real-time output flushing
- Store session_key in .update_pending.json for cross-restart
  session matching
- Add _update_prompt_pending dict to track sessions awaiting
  update prompt responses
- Replace _watch_for_update_completion with _watch_update_progress:
  streams output chunks every ~4s, detects .update_prompt.json and
  forwards prompts to the user, handles completion/failure/timeout
- Add update prompt interception in _handle_message: when a prompt
  is pending, the user's next message is written to .update_response
  instead of being processed normally
- Preserve _send_update_notification as legacy fallback for
  post-restart cases where adapter isn't available yet

File-based IPC protocol:
- .update_prompt.json: written by update process with prompt text,
  default value, and unique ID
- .update_response: written by gateway with user's answer
- .update_output.txt: existing, now streamed in real-time
- .update_exit_code: existing completion marker

Tests: 16 new tests covering _gateway_prompt IPC, output streaming,
prompt detection/forwarding, message interception, and cleanup.

* feat: interactive buttons for update prompts (Telegram + Discord)

Telegram: Inline keyboard with ✓ Yes / ✗ No buttons. Clicking a button
answers the callback query, edits the message to show the choice, and
writes .update_response directly. CallbackQueryHandler registered on
the update_prompt: prefix.

Discord: UpdatePromptView (discord.ui.View) with green Yes / red No
buttons. Follows the ExecApprovalView pattern — auth check, embed color
update, disabled-after-click. Writes .update_response on click.

All platforms: /approve and /deny (and /yes, /no) now work as shorthand
for yes/no when an update prompt is pending. The text fallback message
instructs users to use these commands. Raw message interception still
works as a fallback for non-command responses.

Gateway watcher checks adapter for send_update_prompt method (class-level
check to avoid MagicMock false positives) and falls back to text prompt
with /approve instructions when unavailable.

* fix: block /update on non-messaging platforms (API, webhooks, ACP)

Add _UPDATE_ALLOWED_PLATFORMS frozenset that explicitly lists messaging
platforms where /update is permitted. API server, webhook, and ACP
platforms get a clear error directing them to run hermes update from
the terminal instead.

ACP and API server already don't reach _handle_message (separate
codepaths), and webhooks have distinct session keys that can't collide
with messaging sessions. This guard is belt-and-suspenders.
											
										
										
											2026-04-05 00:28:58 -07:00
+								    })
-												feat: add /debug slash command for all platforms

Adds /debug as a slash command available in CLI, Telegram, Discord,
Slack, and all other gateway platforms. Uploads debug report + full
logs to paste services and returns shareable URLs.

- commands.py: CommandDef in Info category (no cli_only/gateway_only)
- gateway/run.py: async handler with run_in_executor for blocking I/O
- cli.py: dispatch in process_command to run_debug_share

											
										
										
											2026-04-12 18:08:45 -07:00
+								    async def _handle_debug_command(self, event: MessageEvent) -> str:
-												fix: /debug privacy — auto-delete pastes after 1 hour, add privacy notices (#10510)

- Pastes uploaded by /debug now auto-delete after 1 hour via a detached
  background process that sends DELETE to paste.rs
- CLI: shows privacy notice listing what data will be uploaded
- Gateway: only uploads summary report (system info + log tails), NOT
  full log files containing conversation content
- Added 'hermes debug delete <url>' for immediate manual deletion
- 16 new tests covering auto-delete scheduling, paste deletion, privacy
  notices, and the delete subcommand

Addresses user privacy concern where /debug uploaded full conversation
logs to a public paste service with no warning or expiry.
											
										
										
											2026-04-15 13:40:27 -07:00
+								        """Handle /debug — upload debug report (summary only) and return paste URLs.
 								        Gateway uploads ONLY the summary report (system info + log tails),
 								        NOT full log files, to protect conversation privacy.  Users who need
 								        full log uploads should use ``hermes debug share`` from the CLI.
 								        """
-												feat: add /debug slash command for all platforms

Adds /debug as a slash command available in CLI, Telegram, Discord,
Slack, and all other gateway platforms. Uploads debug report + full
logs to paste services and returns shareable URLs.

- commands.py: CommandDef in Info category (no cli_only/gateway_only)
- gateway/run.py: async handler with run_in_executor for blocking I/O
- cli.py: dispatch in process_command to run_debug_share

											
										
										
											2026-04-12 18:08:45 -07:00
+								        import asyncio
 								        from hermes_cli.debug import (
-												fix: /debug privacy — auto-delete pastes after 1 hour, add privacy notices (#10510)

- Pastes uploaded by /debug now auto-delete after 1 hour via a detached
  background process that sends DELETE to paste.rs
- CLI: shows privacy notice listing what data will be uploaded
- Gateway: only uploads summary report (system info + log tails), NOT
  full log files containing conversation content
- Added 'hermes debug delete <url>' for immediate manual deletion
- 16 new tests covering auto-delete scheduling, paste deletion, privacy
  notices, and the delete subcommand

Addresses user privacy concern where /debug uploaded full conversation
logs to a public paste service with no warning or expiry.
											
										
										
											2026-04-15 13:40:27 -07:00
+								            _capture_dump, collect_debug_report,
 								            upload_to_pastebin, _schedule_auto_delete,
 								            _GATEWAY_PRIVACY_NOTICE,
-												feat: add /debug slash command for all platforms

Adds /debug as a slash command available in CLI, Telegram, Discord,
Slack, and all other gateway platforms. Uploads debug report + full
logs to paste services and returns shareable URLs.

- commands.py: CommandDef in Info category (no cli_only/gateway_only)
- gateway/run.py: async handler with run_in_executor for blocking I/O
- cli.py: dispatch in process_command to run_debug_share

											
										
										
											2026-04-12 18:08:45 -07:00
+								        )
 								        loop = asyncio.get_running_loop()
 								        # Run blocking I/O (dump capture, log reads, uploads) in a thread.
 								        def _collect_and_upload():
 								            dump_text = _capture_dump()
 								            report = collect_debug_report(log_lines=200, dump_text=dump_text)
 								            urls = {}
 								            try:
 								                urls["Report"] = upload_to_pastebin(report)
 								            except Exception as exc:
 								                return f"✗ Failed to upload debug report: {exc}"
-												fix: bump debug share paste TTL from 1 hour to 6 hours (#11240)

Users (Teknium) report missing debug reports before the 1-hour auto-delete
fires. 6 hours gives enough window for async bug-report triage without
leaving sensitive log data on public paste services indefinitely.

Applies to both the CLI (hermes debug share) and gateway (/debug) paths.
											
										
										
											2026-04-16 14:34:46 -07:00
+								            # Schedule auto-deletion after 6 hours
-												fix: /debug privacy — auto-delete pastes after 1 hour, add privacy notices (#10510)

- Pastes uploaded by /debug now auto-delete after 1 hour via a detached
  background process that sends DELETE to paste.rs
- CLI: shows privacy notice listing what data will be uploaded
- Gateway: only uploads summary report (system info + log tails), NOT
  full log files containing conversation content
- Added 'hermes debug delete <url>' for immediate manual deletion
- 16 new tests covering auto-delete scheduling, paste deletion, privacy
  notices, and the delete subcommand

Addresses user privacy concern where /debug uploaded full conversation
logs to a public paste service with no warning or expiry.
											
										
										
											2026-04-15 13:40:27 -07:00
+								            _schedule_auto_delete(list(urls.values()))
-												feat: add /debug slash command for all platforms

Adds /debug as a slash command available in CLI, Telegram, Discord,
Slack, and all other gateway platforms. Uploads debug report + full
logs to paste services and returns shareable URLs.

- commands.py: CommandDef in Info category (no cli_only/gateway_only)
- gateway/run.py: async handler with run_in_executor for blocking I/O
- cli.py: dispatch in process_command to run_debug_share

											
										
										
											2026-04-12 18:08:45 -07:00
-												fix: /debug privacy — auto-delete pastes after 1 hour, add privacy notices (#10510)

- Pastes uploaded by /debug now auto-delete after 1 hour via a detached
  background process that sends DELETE to paste.rs
- CLI: shows privacy notice listing what data will be uploaded
- Gateway: only uploads summary report (system info + log tails), NOT
  full log files containing conversation content
- Added 'hermes debug delete <url>' for immediate manual deletion
- 16 new tests covering auto-delete scheduling, paste deletion, privacy
  notices, and the delete subcommand

Addresses user privacy concern where /debug uploaded full conversation
logs to a public paste service with no warning or expiry.
											
										
										
											2026-04-15 13:40:27 -07:00
+								            lines = [_GATEWAY_PRIVACY_NOTICE, "", "**Debug report uploaded:**", ""]
-												feat: add /debug slash command for all platforms

Adds /debug as a slash command available in CLI, Telegram, Discord,
Slack, and all other gateway platforms. Uploads debug report + full
logs to paste services and returns shareable URLs.

- commands.py: CommandDef in Info category (no cli_only/gateway_only)
- gateway/run.py: async handler with run_in_executor for blocking I/O
- cli.py: dispatch in process_command to run_debug_share

											
										
										
											2026-04-12 18:08:45 -07:00
+								            label_width = max(len(k) for k in urls)
 								            for label, url in urls.items():
 								                lines.append(f"`{label:<{label_width}}`  {url}")
-												fix: /debug privacy — auto-delete pastes after 1 hour, add privacy notices (#10510)

- Pastes uploaded by /debug now auto-delete after 1 hour via a detached
  background process that sends DELETE to paste.rs
- CLI: shows privacy notice listing what data will be uploaded
- Gateway: only uploads summary report (system info + log tails), NOT
  full log files containing conversation content
- Added 'hermes debug delete <url>' for immediate manual deletion
- 16 new tests covering auto-delete scheduling, paste deletion, privacy
  notices, and the delete subcommand

Addresses user privacy concern where /debug uploaded full conversation
logs to a public paste service with no warning or expiry.
											
										
										
											2026-04-15 13:40:27 -07:00
+								            lines.append("")
-												fix: bump debug share paste TTL from 1 hour to 6 hours (#11240)

Users (Teknium) report missing debug reports before the 1-hour auto-delete
fires. 6 hours gives enough window for async bug-report triage without
leaving sensitive log data on public paste services indefinitely.

Applies to both the CLI (hermes debug share) and gateway (/debug) paths.
											
										
										
											2026-04-16 14:34:46 -07:00
+								            lines.append("⏱ Pastes will auto-delete in 6 hours.")
-												fix: /debug privacy — auto-delete pastes after 1 hour, add privacy notices (#10510)

- Pastes uploaded by /debug now auto-delete after 1 hour via a detached
  background process that sends DELETE to paste.rs
- CLI: shows privacy notice listing what data will be uploaded
- Gateway: only uploads summary report (system info + log tails), NOT
  full log files containing conversation content
- Added 'hermes debug delete <url>' for immediate manual deletion
- 16 new tests covering auto-delete scheduling, paste deletion, privacy
  notices, and the delete subcommand

Addresses user privacy concern where /debug uploaded full conversation
logs to a public paste service with no warning or expiry.
											
										
										
											2026-04-15 13:40:27 -07:00
+								            lines.append("For full log uploads, use `hermes debug share` from the CLI.")
 								            lines.append("Share these links with the Hermes team for support.")
-												feat: add /debug slash command for all platforms

Adds /debug as a slash command available in CLI, Telegram, Discord,
Slack, and all other gateway platforms. Uploads debug report + full
logs to paste services and returns shareable URLs.

- commands.py: CommandDef in Info category (no cli_only/gateway_only)
- gateway/run.py: async handler with run_in_executor for blocking I/O
- cli.py: dispatch in process_command to run_debug_share

											
										
										
											2026-04-12 18:08:45 -07:00
+								            return "\n".join(lines)
 								        return await loop.run_in_executor(None, _collect_and_upload)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								    async def _handle_update_command(self, event: MessageEvent) -> str:
 								        """Handle /update command — update Hermes Agent to the latest version.
-												fix(gateway): use setsid instead of systemd-run --user for /update (salvage #4024) (#4104)

Salvaged from PR #4024 by @Sertug17. Fixes #4017.

- Replace systemd-run --user --scope with setsid for portable session detach
- Add system-level service detection to cmd_update gateway restart
- Falls back to start_new_session=True on systems without setsid (macOS, minimal containers)
											
										
										
											2026-03-30 20:22:09 -07:00
+								        Spawns ``hermes update`` in a detached session (via ``setsid``) so it
 								        survives the gateway restart that ``hermes update`` may trigger. Marker
-												fix: notify gateway users when updates finish or fail

											
										
										
											2026-03-11 20:43:31 +01:00
+								        files are written so either the current gateway process or the next one
 								        can notify the user when the update finishes.
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        """
 								        import json
 								        import shutil
 								        import subprocess
 								        from datetime import datetime
-												chore: prepare Hermes for Homebrew packaging (#4099)

Co-authored-by: Yabuku-xD <78594762+Yabuku-xD@users.noreply.github.com>
											
										
										
											2026-03-30 17:34:43 -07:00
+								        from hermes_cli.config import is_managed, format_managed_message
-												feat(gateway): live-stream /update output + interactive prompt buttons (#5180)

* feat(gateway): live-stream /update output + forward interactive prompts

Adds real-time output streaming and interactive prompt forwarding for
the gateway /update command, so users on Telegram/Discord/etc see the
full update progress and can respond to prompts (stash restore, config
migration) without needing terminal access.

Changes:

hermes_cli/main.py:
- Add --gateway flag to 'hermes update' argparse
- Add _gateway_prompt() file-based IPC function that writes
  .update_prompt.json and polls for .update_response
- Modify _restore_stashed_changes() to accept optional input_fn
  parameter for gateway mode prompt forwarding
- cmd_update() uses _gateway_prompt when --gateway is set, enabling
  interactive stash restore and config migration prompts

gateway/run.py:
- _handle_update_command: spawn with --gateway flag and
  PYTHONUNBUFFERED=1 for real-time output flushing
- Store session_key in .update_pending.json for cross-restart
  session matching
- Add _update_prompt_pending dict to track sessions awaiting
  update prompt responses
- Replace _watch_for_update_completion with _watch_update_progress:
  streams output chunks every ~4s, detects .update_prompt.json and
  forwards prompts to the user, handles completion/failure/timeout
- Add update prompt interception in _handle_message: when a prompt
  is pending, the user's next message is written to .update_response
  instead of being processed normally
- Preserve _send_update_notification as legacy fallback for
  post-restart cases where adapter isn't available yet

File-based IPC protocol:
- .update_prompt.json: written by update process with prompt text,
  default value, and unique ID
- .update_response: written by gateway with user's answer
- .update_output.txt: existing, now streamed in real-time
- .update_exit_code: existing completion marker

Tests: 16 new tests covering _gateway_prompt IPC, output streaming,
prompt detection/forwarding, message interception, and cleanup.

* feat: interactive buttons for update prompts (Telegram + Discord)

Telegram: Inline keyboard with ✓ Yes / ✗ No buttons. Clicking a button
answers the callback query, edits the message to show the choice, and
writes .update_response directly. CallbackQueryHandler registered on
the update_prompt: prefix.

Discord: UpdatePromptView (discord.ui.View) with green Yes / red No
buttons. Follows the ExecApprovalView pattern — auth check, embed color
update, disabled-after-click. Writes .update_response on click.

All platforms: /approve and /deny (and /yes, /no) now work as shorthand
for yes/no when an update prompt is pending. The text fallback message
instructs users to use these commands. Raw message interception still
works as a fallback for non-command responses.

Gateway watcher checks adapter for send_update_prompt method (class-level
check to avoid MagicMock false positives) and falls back to text prompt
with /approve instructions when unavailable.

* fix: block /update on non-messaging platforms (API, webhooks, ACP)

Add _UPDATE_ALLOWED_PLATFORMS frozenset that explicitly lists messaging
platforms where /update is permitted. API server, webhook, and ACP
platforms get a clear error directing them to run hermes update from
the terminal instead.

ACP and API server already don't reach _handle_message (separate
codepaths), and webhooks have distinct session keys that can't collide
with messaging sessions. This guard is belt-and-suspenders.
											
										
										
											2026-04-05 00:28:58 -07:00
+								        # Block non-messaging platforms (API server, webhooks, ACP)
 								        platform = event.source.platform
 								        if platform not in self._UPDATE_ALLOWED_PLATFORMS:
 								            return "✗ /update is only available from messaging platforms. Run `hermes update` from the terminal."
-												chore: prepare Hermes for Homebrew packaging (#4099)

Co-authored-by: Yabuku-xD <78594762+Yabuku-xD@users.noreply.github.com>
											
										
										
											2026-03-30 17:34:43 -07:00
+								        if is_managed():
 								            return f"✗ {format_managed_message('update Hermes Agent')}"
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
 								        project_root = Path(__file__).parent.parent.resolve()
 								        git_dir = project_root / '.git'
 								        if not git_dir.exists():
 								            return "✗ Not a git repository — cannot update."
-												fix(gateway): fall back to sys.executable -m hermes_cli.main when hermes not on PATH

When shutil.which('hermes') returns None, _resolve_hermes_bin() now tries
sys.executable -m hermes_cli.main as a fallback. This handles setups where
Hermes is launched via a venv or module invocation and the hermes symlink is
not on PATH for the gateway process.

Fixes #1049

											
										
										
											2026-03-12 18:02:21 +03:00
+								        hermes_cmd = _resolve_hermes_bin()
 								        if not hermes_cmd:
 								            return (
 								                "✗ Could not locate the `hermes` command. "
 								                "Hermes is running, but the update command could not find the "
 								                "executable on PATH or via the current Python interpreter. "
 								                "Try running `hermes update` manually in your terminal."
 								            )
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
 								        pending_path = _hermes_home / ".update_pending.json"
 								        output_path = _hermes_home / ".update_output.txt"
-												fix: notify gateway users when updates finish or fail

											
										
										
											2026-03-11 20:43:31 +01:00
+								        exit_code_path = _hermes_home / ".update_exit_code"
-												feat(gateway): live-stream /update output + interactive prompt buttons (#5180)

* feat(gateway): live-stream /update output + forward interactive prompts

Adds real-time output streaming and interactive prompt forwarding for
the gateway /update command, so users on Telegram/Discord/etc see the
full update progress and can respond to prompts (stash restore, config
migration) without needing terminal access.

Changes:

hermes_cli/main.py:
- Add --gateway flag to 'hermes update' argparse
- Add _gateway_prompt() file-based IPC function that writes
  .update_prompt.json and polls for .update_response
- Modify _restore_stashed_changes() to accept optional input_fn
  parameter for gateway mode prompt forwarding
- cmd_update() uses _gateway_prompt when --gateway is set, enabling
  interactive stash restore and config migration prompts

gateway/run.py:
- _handle_update_command: spawn with --gateway flag and
  PYTHONUNBUFFERED=1 for real-time output flushing
- Store session_key in .update_pending.json for cross-restart
  session matching
- Add _update_prompt_pending dict to track sessions awaiting
  update prompt responses
- Replace _watch_for_update_completion with _watch_update_progress:
  streams output chunks every ~4s, detects .update_prompt.json and
  forwards prompts to the user, handles completion/failure/timeout
- Add update prompt interception in _handle_message: when a prompt
  is pending, the user's next message is written to .update_response
  instead of being processed normally
- Preserve _send_update_notification as legacy fallback for
  post-restart cases where adapter isn't available yet

File-based IPC protocol:
- .update_prompt.json: written by update process with prompt text,
  default value, and unique ID
- .update_response: written by gateway with user's answer
- .update_output.txt: existing, now streamed in real-time
- .update_exit_code: existing completion marker

Tests: 16 new tests covering _gateway_prompt IPC, output streaming,
prompt detection/forwarding, message interception, and cleanup.

* feat: interactive buttons for update prompts (Telegram + Discord)

Telegram: Inline keyboard with ✓ Yes / ✗ No buttons. Clicking a button
answers the callback query, edits the message to show the choice, and
writes .update_response directly. CallbackQueryHandler registered on
the update_prompt: prefix.

Discord: UpdatePromptView (discord.ui.View) with green Yes / red No
buttons. Follows the ExecApprovalView pattern — auth check, embed color
update, disabled-after-click. Writes .update_response on click.

All platforms: /approve and /deny (and /yes, /no) now work as shorthand
for yes/no when an update prompt is pending. The text fallback message
instructs users to use these commands. Raw message interception still
works as a fallback for non-command responses.

Gateway watcher checks adapter for send_update_prompt method (class-level
check to avoid MagicMock false positives) and falls back to text prompt
with /approve instructions when unavailable.

* fix: block /update on non-messaging platforms (API, webhooks, ACP)

Add _UPDATE_ALLOWED_PLATFORMS frozenset that explicitly lists messaging
platforms where /update is permitted. API server, webhook, and ACP
platforms get a clear error directing them to run hermes update from
the terminal instead.

ACP and API server already don't reach _handle_message (separate
codepaths), and webhooks have distinct session keys that can't collide
with messaging sessions. This guard is belt-and-suspenders.
											
										
										
											2026-04-05 00:28:58 -07:00
+								        session_key = self._session_key_for_source(event.source)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        pending = {
 								            "platform": event.source.platform.value,
 								            "chat_id": event.source.chat_id,
 								            "user_id": event.source.user_id,
-												feat(gateway): live-stream /update output + interactive prompt buttons (#5180)

* feat(gateway): live-stream /update output + forward interactive prompts

Adds real-time output streaming and interactive prompt forwarding for
the gateway /update command, so users on Telegram/Discord/etc see the
full update progress and can respond to prompts (stash restore, config
migration) without needing terminal access.

Changes:

hermes_cli/main.py:
- Add --gateway flag to 'hermes update' argparse
- Add _gateway_prompt() file-based IPC function that writes
  .update_prompt.json and polls for .update_response
- Modify _restore_stashed_changes() to accept optional input_fn
  parameter for gateway mode prompt forwarding
- cmd_update() uses _gateway_prompt when --gateway is set, enabling
  interactive stash restore and config migration prompts

gateway/run.py:
- _handle_update_command: spawn with --gateway flag and
  PYTHONUNBUFFERED=1 for real-time output flushing
- Store session_key in .update_pending.json for cross-restart
  session matching
- Add _update_prompt_pending dict to track sessions awaiting
  update prompt responses
- Replace _watch_for_update_completion with _watch_update_progress:
  streams output chunks every ~4s, detects .update_prompt.json and
  forwards prompts to the user, handles completion/failure/timeout
- Add update prompt interception in _handle_message: when a prompt
  is pending, the user's next message is written to .update_response
  instead of being processed normally
- Preserve _send_update_notification as legacy fallback for
  post-restart cases where adapter isn't available yet

File-based IPC protocol:
- .update_prompt.json: written by update process with prompt text,
  default value, and unique ID
- .update_response: written by gateway with user's answer
- .update_output.txt: existing, now streamed in real-time
- .update_exit_code: existing completion marker

Tests: 16 new tests covering _gateway_prompt IPC, output streaming,
prompt detection/forwarding, message interception, and cleanup.

* feat: interactive buttons for update prompts (Telegram + Discord)

Telegram: Inline keyboard with ✓ Yes / ✗ No buttons. Clicking a button
answers the callback query, edits the message to show the choice, and
writes .update_response directly. CallbackQueryHandler registered on
the update_prompt: prefix.

Discord: UpdatePromptView (discord.ui.View) with green Yes / red No
buttons. Follows the ExecApprovalView pattern — auth check, embed color
update, disabled-after-click. Writes .update_response on click.

All platforms: /approve and /deny (and /yes, /no) now work as shorthand
for yes/no when an update prompt is pending. The text fallback message
instructs users to use these commands. Raw message interception still
works as a fallback for non-command responses.

Gateway watcher checks adapter for send_update_prompt method (class-level
check to avoid MagicMock false positives) and falls back to text prompt
with /approve instructions when unavailable.

* fix: block /update on non-messaging platforms (API, webhooks, ACP)

Add _UPDATE_ALLOWED_PLATFORMS frozenset that explicitly lists messaging
platforms where /update is permitted. API server, webhook, and ACP
platforms get a clear error directing them to run hermes update from
the terminal instead.

ACP and API server already don't reach _handle_message (separate
codepaths), and webhooks have distinct session keys that can't collide
with messaging sessions. This guard is belt-and-suspenders.
											
										
										
											2026-04-05 00:28:58 -07:00
+								            "session_key": session_key,
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            "timestamp": datetime.now().isoformat(),
 								        }
-												fix(gateway): write update-pending state atomically to prevent corruption

											
										
										
											2026-04-03 23:23:44 +03:00
+								        _tmp_pending = pending_path.with_suffix(".tmp")
 								        _tmp_pending.write_text(json.dumps(pending))
 								        _tmp_pending.replace(pending_path)
-												fix: notify gateway users when updates finish or fail

											
										
										
											2026-03-11 20:43:31 +01:00
+								        exit_code_path.unlink(missing_ok=True)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
-												feat(gateway): live-stream /update output + interactive prompt buttons (#5180)

* feat(gateway): live-stream /update output + forward interactive prompts

Adds real-time output streaming and interactive prompt forwarding for
the gateway /update command, so users on Telegram/Discord/etc see the
full update progress and can respond to prompts (stash restore, config
migration) without needing terminal access.

Changes:

hermes_cli/main.py:
- Add --gateway flag to 'hermes update' argparse
- Add _gateway_prompt() file-based IPC function that writes
  .update_prompt.json and polls for .update_response
- Modify _restore_stashed_changes() to accept optional input_fn
  parameter for gateway mode prompt forwarding
- cmd_update() uses _gateway_prompt when --gateway is set, enabling
  interactive stash restore and config migration prompts

gateway/run.py:
- _handle_update_command: spawn with --gateway flag and
  PYTHONUNBUFFERED=1 for real-time output flushing
- Store session_key in .update_pending.json for cross-restart
  session matching
- Add _update_prompt_pending dict to track sessions awaiting
  update prompt responses
- Replace _watch_for_update_completion with _watch_update_progress:
  streams output chunks every ~4s, detects .update_prompt.json and
  forwards prompts to the user, handles completion/failure/timeout
- Add update prompt interception in _handle_message: when a prompt
  is pending, the user's next message is written to .update_response
  instead of being processed normally
- Preserve _send_update_notification as legacy fallback for
  post-restart cases where adapter isn't available yet

File-based IPC protocol:
- .update_prompt.json: written by update process with prompt text,
  default value, and unique ID
- .update_response: written by gateway with user's answer
- .update_output.txt: existing, now streamed in real-time
- .update_exit_code: existing completion marker

Tests: 16 new tests covering _gateway_prompt IPC, output streaming,
prompt detection/forwarding, message interception, and cleanup.

* feat: interactive buttons for update prompts (Telegram + Discord)

Telegram: Inline keyboard with ✓ Yes / ✗ No buttons. Clicking a button
answers the callback query, edits the message to show the choice, and
writes .update_response directly. CallbackQueryHandler registered on
the update_prompt: prefix.

Discord: UpdatePromptView (discord.ui.View) with green Yes / red No
buttons. Follows the ExecApprovalView pattern — auth check, embed color
update, disabled-after-click. Writes .update_response on click.

All platforms: /approve and /deny (and /yes, /no) now work as shorthand
for yes/no when an update prompt is pending. The text fallback message
instructs users to use these commands. Raw message interception still
works as a fallback for non-command responses.

Gateway watcher checks adapter for send_update_prompt method (class-level
check to avoid MagicMock false positives) and falls back to text prompt
with /approve instructions when unavailable.

* fix: block /update on non-messaging platforms (API, webhooks, ACP)

Add _UPDATE_ALLOWED_PLATFORMS frozenset that explicitly lists messaging
platforms where /update is permitted. API server, webhook, and ACP
platforms get a clear error directing them to run hermes update from
the terminal instead.

ACP and API server already don't reach _handle_message (separate
codepaths), and webhooks have distinct session keys that can't collide
with messaging sessions. This guard is belt-and-suspenders.
											
										
										
											2026-04-05 00:28:58 -07:00
+								        # Spawn `hermes update --gateway` detached so it survives gateway restart.
 								        # --gateway enables file-based IPC for interactive prompts (stash
 								        # restore, config migration) so the gateway can forward them to the
 								        # user instead of silently skipping them.
-												fix(gateway): use setsid instead of systemd-run --user for /update (salvage #4024) (#4104)

Salvaged from PR #4024 by @Sertug17. Fixes #4017.

- Replace systemd-run --user --scope with setsid for portable session detach
- Add system-level service detection to cmd_update gateway restart
- Falls back to start_new_session=True on systems without setsid (macOS, minimal containers)
											
										
										
											2026-03-30 20:22:09 -07:00
+								        # Use setsid for portable session detach (works under system services
 								        # where systemd-run --user fails due to missing D-Bus session).
-												feat(gateway): live-stream /update output + interactive prompt buttons (#5180)

* feat(gateway): live-stream /update output + forward interactive prompts

Adds real-time output streaming and interactive prompt forwarding for
the gateway /update command, so users on Telegram/Discord/etc see the
full update progress and can respond to prompts (stash restore, config
migration) without needing terminal access.

Changes:

hermes_cli/main.py:
- Add --gateway flag to 'hermes update' argparse
- Add _gateway_prompt() file-based IPC function that writes
  .update_prompt.json and polls for .update_response
- Modify _restore_stashed_changes() to accept optional input_fn
  parameter for gateway mode prompt forwarding
- cmd_update() uses _gateway_prompt when --gateway is set, enabling
  interactive stash restore and config migration prompts

gateway/run.py:
- _handle_update_command: spawn with --gateway flag and
  PYTHONUNBUFFERED=1 for real-time output flushing
- Store session_key in .update_pending.json for cross-restart
  session matching
- Add _update_prompt_pending dict to track sessions awaiting
  update prompt responses
- Replace _watch_for_update_completion with _watch_update_progress:
  streams output chunks every ~4s, detects .update_prompt.json and
  forwards prompts to the user, handles completion/failure/timeout
- Add update prompt interception in _handle_message: when a prompt
  is pending, the user's next message is written to .update_response
  instead of being processed normally
- Preserve _send_update_notification as legacy fallback for
  post-restart cases where adapter isn't available yet

File-based IPC protocol:
- .update_prompt.json: written by update process with prompt text,
  default value, and unique ID
- .update_response: written by gateway with user's answer
- .update_output.txt: existing, now streamed in real-time
- .update_exit_code: existing completion marker

Tests: 16 new tests covering _gateway_prompt IPC, output streaming,
prompt detection/forwarding, message interception, and cleanup.

* feat: interactive buttons for update prompts (Telegram + Discord)

Telegram: Inline keyboard with ✓ Yes / ✗ No buttons. Clicking a button
answers the callback query, edits the message to show the choice, and
writes .update_response directly. CallbackQueryHandler registered on
the update_prompt: prefix.

Discord: UpdatePromptView (discord.ui.View) with green Yes / red No
buttons. Follows the ExecApprovalView pattern — auth check, embed color
update, disabled-after-click. Writes .update_response on click.

All platforms: /approve and /deny (and /yes, /no) now work as shorthand
for yes/no when an update prompt is pending. The text fallback message
instructs users to use these commands. Raw message interception still
works as a fallback for non-command responses.

Gateway watcher checks adapter for send_update_prompt method (class-level
check to avoid MagicMock false positives) and falls back to text prompt
with /approve instructions when unavailable.

* fix: block /update on non-messaging platforms (API, webhooks, ACP)

Add _UPDATE_ALLOWED_PLATFORMS frozenset that explicitly lists messaging
platforms where /update is permitted. API server, webhook, and ACP
platforms get a clear error directing them to run hermes update from
the terminal instead.

ACP and API server already don't reach _handle_message (separate
codepaths), and webhooks have distinct session keys that can't collide
with messaging sessions. This guard is belt-and-suspenders.
											
										
										
											2026-04-05 00:28:58 -07:00
+								        # PYTHONUNBUFFERED ensures output is flushed line-by-line so the
 								        # gateway can stream it to the messenger in near-real-time.
-												fix(gateway): fall back to sys.executable -m hermes_cli.main when hermes not on PATH

When shutil.which('hermes') returns None, _resolve_hermes_bin() now tries
sys.executable -m hermes_cli.main as a fallback. This handles setups where
Hermes is launched via a venv or module invocation and the hermes symlink is
not on PATH for the gateway process.

Fixes #1049

											
										
										
											2026-03-12 18:02:21 +03:00
+								        hermes_cmd_str = " ".join(shlex.quote(part) for part in hermes_cmd)
-												fix: notify gateway users when updates finish or fail

											
										
										
											2026-03-11 20:43:31 +01:00
+								        update_cmd = (
-												feat(gateway): live-stream /update output + interactive prompt buttons (#5180)

* feat(gateway): live-stream /update output + forward interactive prompts

Adds real-time output streaming and interactive prompt forwarding for
the gateway /update command, so users on Telegram/Discord/etc see the
full update progress and can respond to prompts (stash restore, config
migration) without needing terminal access.

Changes:

hermes_cli/main.py:
- Add --gateway flag to 'hermes update' argparse
- Add _gateway_prompt() file-based IPC function that writes
  .update_prompt.json and polls for .update_response
- Modify _restore_stashed_changes() to accept optional input_fn
  parameter for gateway mode prompt forwarding
- cmd_update() uses _gateway_prompt when --gateway is set, enabling
  interactive stash restore and config migration prompts

gateway/run.py:
- _handle_update_command: spawn with --gateway flag and
  PYTHONUNBUFFERED=1 for real-time output flushing
- Store session_key in .update_pending.json for cross-restart
  session matching
- Add _update_prompt_pending dict to track sessions awaiting
  update prompt responses
- Replace _watch_for_update_completion with _watch_update_progress:
  streams output chunks every ~4s, detects .update_prompt.json and
  forwards prompts to the user, handles completion/failure/timeout
- Add update prompt interception in _handle_message: when a prompt
  is pending, the user's next message is written to .update_response
  instead of being processed normally
- Preserve _send_update_notification as legacy fallback for
  post-restart cases where adapter isn't available yet

File-based IPC protocol:
- .update_prompt.json: written by update process with prompt text,
  default value, and unique ID
- .update_response: written by gateway with user's answer
- .update_output.txt: existing, now streamed in real-time
- .update_exit_code: existing completion marker

Tests: 16 new tests covering _gateway_prompt IPC, output streaming,
prompt detection/forwarding, message interception, and cleanup.

* feat: interactive buttons for update prompts (Telegram + Discord)

Telegram: Inline keyboard with ✓ Yes / ✗ No buttons. Clicking a button
answers the callback query, edits the message to show the choice, and
writes .update_response directly. CallbackQueryHandler registered on
the update_prompt: prefix.

Discord: UpdatePromptView (discord.ui.View) with green Yes / red No
buttons. Follows the ExecApprovalView pattern — auth check, embed color
update, disabled-after-click. Writes .update_response on click.

All platforms: /approve and /deny (and /yes, /no) now work as shorthand
for yes/no when an update prompt is pending. The text fallback message
instructs users to use these commands. Raw message interception still
works as a fallback for non-command responses.

Gateway watcher checks adapter for send_update_prompt method (class-level
check to avoid MagicMock false positives) and falls back to text prompt
with /approve instructions when unavailable.

* fix: block /update on non-messaging platforms (API, webhooks, ACP)

Add _UPDATE_ALLOWED_PLATFORMS frozenset that explicitly lists messaging
platforms where /update is permitted. API server, webhook, and ACP
platforms get a clear error directing them to run hermes update from
the terminal instead.

ACP and API server already don't reach _handle_message (separate
codepaths), and webhooks have distinct session keys that can't collide
with messaging sessions. This guard is belt-and-suspenders.
											
										
										
											2026-04-05 00:28:58 -07:00
+								            f"PYTHONUNBUFFERED=1 {hermes_cmd_str} update --gateway"
 								            f" > {shlex.quote(str(output_path))} 2>&1; "
-												fix: notify gateway users when updates finish or fail

											
										
										
											2026-03-11 20:43:31 +01:00
+								            f"status=$?; printf '%s' \"$status\" > {shlex.quote(str(exit_code_path))}"
 								        )
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        try:
-												fix(gateway): use setsid instead of systemd-run --user for /update (salvage #4024) (#4104)

Salvaged from PR #4024 by @Sertug17. Fixes #4017.

- Replace systemd-run --user --scope with setsid for portable session detach
- Add system-level service detection to cmd_update gateway restart
- Falls back to start_new_session=True on systems without setsid (macOS, minimal containers)
											
										
										
											2026-03-30 20:22:09 -07:00
+								            setsid_bin = shutil.which("setsid")
 								            if setsid_bin:
 								                # Preferred: setsid creates a new session, fully detached
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                subprocess.Popen(
-												fix(gateway): use setsid instead of systemd-run --user for /update (salvage #4024) (#4104)

Salvaged from PR #4024 by @Sertug17. Fixes #4017.

- Replace systemd-run --user --scope with setsid for portable session detach
- Add system-level service detection to cmd_update gateway restart
- Falls back to start_new_session=True on systems without setsid (macOS, minimal containers)
											
										
										
											2026-03-30 20:22:09 -07:00
+								                    [setsid_bin, "bash", "-c", update_cmd],
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                    stdout=subprocess.DEVNULL,
 								                    stderr=subprocess.DEVNULL,
 								                    start_new_session=True,
 								                )
 								            else:
-												fix(gateway): use setsid instead of systemd-run --user for /update (salvage #4024) (#4104)

Salvaged from PR #4024 by @Sertug17. Fixes #4017.

- Replace systemd-run --user --scope with setsid for portable session detach
- Add system-level service detection to cmd_update gateway restart
- Falls back to start_new_session=True on systems without setsid (macOS, minimal containers)
											
										
										
											2026-03-30 20:22:09 -07:00
+								                # Fallback: start_new_session=True calls os.setsid() in child
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                subprocess.Popen(
-												fix(gateway): use setsid instead of systemd-run --user for /update (salvage #4024) (#4104)

Salvaged from PR #4024 by @Sertug17. Fixes #4017.

- Replace systemd-run --user --scope with setsid for portable session detach
- Add system-level service detection to cmd_update gateway restart
- Falls back to start_new_session=True on systems without setsid (macOS, minimal containers)
											
										
										
											2026-03-30 20:22:09 -07:00
+								                    ["bash", "-c", update_cmd],
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                    stdout=subprocess.DEVNULL,
 								                    stderr=subprocess.DEVNULL,
 								                    start_new_session=True,
 								                )
 								        except Exception as e:
 								            pending_path.unlink(missing_ok=True)
-												fix: notify gateway users when updates finish or fail

											
										
										
											2026-03-11 20:43:31 +01:00
+								            exit_code_path.unlink(missing_ok=True)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            return f"✗ Failed to start update: {e}"
-												fix: notify gateway users when updates finish or fail

											
										
										
											2026-03-11 20:43:31 +01:00
+								        self._schedule_update_notification_watch()
-												feat(gateway): live-stream /update output + interactive prompt buttons (#5180)

* feat(gateway): live-stream /update output + forward interactive prompts

Adds real-time output streaming and interactive prompt forwarding for
the gateway /update command, so users on Telegram/Discord/etc see the
full update progress and can respond to prompts (stash restore, config
migration) without needing terminal access.

Changes:

hermes_cli/main.py:
- Add --gateway flag to 'hermes update' argparse
- Add _gateway_prompt() file-based IPC function that writes
  .update_prompt.json and polls for .update_response
- Modify _restore_stashed_changes() to accept optional input_fn
  parameter for gateway mode prompt forwarding
- cmd_update() uses _gateway_prompt when --gateway is set, enabling
  interactive stash restore and config migration prompts

gateway/run.py:
- _handle_update_command: spawn with --gateway flag and
  PYTHONUNBUFFERED=1 for real-time output flushing
- Store session_key in .update_pending.json for cross-restart
  session matching
- Add _update_prompt_pending dict to track sessions awaiting
  update prompt responses
- Replace _watch_for_update_completion with _watch_update_progress:
  streams output chunks every ~4s, detects .update_prompt.json and
  forwards prompts to the user, handles completion/failure/timeout
- Add update prompt interception in _handle_message: when a prompt
  is pending, the user's next message is written to .update_response
  instead of being processed normally
- Preserve _send_update_notification as legacy fallback for
  post-restart cases where adapter isn't available yet

File-based IPC protocol:
- .update_prompt.json: written by update process with prompt text,
  default value, and unique ID
- .update_response: written by gateway with user's answer
- .update_output.txt: existing, now streamed in real-time
- .update_exit_code: existing completion marker

Tests: 16 new tests covering _gateway_prompt IPC, output streaming,
prompt detection/forwarding, message interception, and cleanup.

* feat: interactive buttons for update prompts (Telegram + Discord)

Telegram: Inline keyboard with ✓ Yes / ✗ No buttons. Clicking a button
answers the callback query, edits the message to show the choice, and
writes .update_response directly. CallbackQueryHandler registered on
the update_prompt: prefix.

Discord: UpdatePromptView (discord.ui.View) with green Yes / red No
buttons. Follows the ExecApprovalView pattern — auth check, embed color
update, disabled-after-click. Writes .update_response on click.

All platforms: /approve and /deny (and /yes, /no) now work as shorthand
for yes/no when an update prompt is pending. The text fallback message
instructs users to use these commands. Raw message interception still
works as a fallback for non-command responses.

Gateway watcher checks adapter for send_update_prompt method (class-level
check to avoid MagicMock false positives) and falls back to text prompt
with /approve instructions when unavailable.

* fix: block /update on non-messaging platforms (API, webhooks, ACP)

Add _UPDATE_ALLOWED_PLATFORMS frozenset that explicitly lists messaging
platforms where /update is permitted. API server, webhook, and ACP
platforms get a clear error directing them to run hermes update from
the terminal instead.

ACP and API server already don't reach _handle_message (separate
codepaths), and webhooks have distinct session keys that can't collide
with messaging sessions. This guard is belt-and-suspenders.
											
										
										
											2026-04-05 00:28:58 -07:00
+								        return "⚕ Starting Hermes update… I'll stream progress here."
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
-												fix: notify gateway users when updates finish or fail

											
										
										
											2026-03-11 20:43:31 +01:00
+								    def _schedule_update_notification_watch(self) -> None:
 								        """Ensure a background task is watching for update completion."""
 								        existing_task = getattr(self, "_update_notification_task", None)
 								        if existing_task and not existing_task.done():
 								            return
 								        try:
 								            self._update_notification_task = asyncio.create_task(
-												feat(gateway): live-stream /update output + interactive prompt buttons (#5180)

* feat(gateway): live-stream /update output + forward interactive prompts

Adds real-time output streaming and interactive prompt forwarding for
the gateway /update command, so users on Telegram/Discord/etc see the
full update progress and can respond to prompts (stash restore, config
migration) without needing terminal access.

Changes:

hermes_cli/main.py:
- Add --gateway flag to 'hermes update' argparse
- Add _gateway_prompt() file-based IPC function that writes
  .update_prompt.json and polls for .update_response
- Modify _restore_stashed_changes() to accept optional input_fn
  parameter for gateway mode prompt forwarding
- cmd_update() uses _gateway_prompt when --gateway is set, enabling
  interactive stash restore and config migration prompts

gateway/run.py:
- _handle_update_command: spawn with --gateway flag and
  PYTHONUNBUFFERED=1 for real-time output flushing
- Store session_key in .update_pending.json for cross-restart
  session matching
- Add _update_prompt_pending dict to track sessions awaiting
  update prompt responses
- Replace _watch_for_update_completion with _watch_update_progress:
  streams output chunks every ~4s, detects .update_prompt.json and
  forwards prompts to the user, handles completion/failure/timeout
- Add update prompt interception in _handle_message: when a prompt
  is pending, the user's next message is written to .update_response
  instead of being processed normally
- Preserve _send_update_notification as legacy fallback for
  post-restart cases where adapter isn't available yet

File-based IPC protocol:
- .update_prompt.json: written by update process with prompt text,
  default value, and unique ID
- .update_response: written by gateway with user's answer
- .update_output.txt: existing, now streamed in real-time
- .update_exit_code: existing completion marker

Tests: 16 new tests covering _gateway_prompt IPC, output streaming,
prompt detection/forwarding, message interception, and cleanup.

* feat: interactive buttons for update prompts (Telegram + Discord)

Telegram: Inline keyboard with ✓ Yes / ✗ No buttons. Clicking a button
answers the callback query, edits the message to show the choice, and
writes .update_response directly. CallbackQueryHandler registered on
the update_prompt: prefix.

Discord: UpdatePromptView (discord.ui.View) with green Yes / red No
buttons. Follows the ExecApprovalView pattern — auth check, embed color
update, disabled-after-click. Writes .update_response on click.

All platforms: /approve and /deny (and /yes, /no) now work as shorthand
for yes/no when an update prompt is pending. The text fallback message
instructs users to use these commands. Raw message interception still
works as a fallback for non-command responses.

Gateway watcher checks adapter for send_update_prompt method (class-level
check to avoid MagicMock false positives) and falls back to text prompt
with /approve instructions when unavailable.

* fix: block /update on non-messaging platforms (API, webhooks, ACP)

Add _UPDATE_ALLOWED_PLATFORMS frozenset that explicitly lists messaging
platforms where /update is permitted. API server, webhook, and ACP
platforms get a clear error directing them to run hermes update from
the terminal instead.

ACP and API server already don't reach _handle_message (separate
codepaths), and webhooks have distinct session keys that can't collide
with messaging sessions. This guard is belt-and-suspenders.
											
										
										
											2026-04-05 00:28:58 -07:00
+								                self._watch_update_progress()
-												fix: notify gateway users when updates finish or fail

											
										
										
											2026-03-11 20:43:31 +01:00
+								            )
 								        except RuntimeError:
 								            logger.debug("Skipping update notification watcher: no running event loop")
-												feat(gateway): live-stream /update output + interactive prompt buttons (#5180)

* feat(gateway): live-stream /update output + forward interactive prompts

Adds real-time output streaming and interactive prompt forwarding for
the gateway /update command, so users on Telegram/Discord/etc see the
full update progress and can respond to prompts (stash restore, config
migration) without needing terminal access.

Changes:

hermes_cli/main.py:
- Add --gateway flag to 'hermes update' argparse
- Add _gateway_prompt() file-based IPC function that writes
  .update_prompt.json and polls for .update_response
- Modify _restore_stashed_changes() to accept optional input_fn
  parameter for gateway mode prompt forwarding
- cmd_update() uses _gateway_prompt when --gateway is set, enabling
  interactive stash restore and config migration prompts

gateway/run.py:
- _handle_update_command: spawn with --gateway flag and
  PYTHONUNBUFFERED=1 for real-time output flushing
- Store session_key in .update_pending.json for cross-restart
  session matching
- Add _update_prompt_pending dict to track sessions awaiting
  update prompt responses
- Replace _watch_for_update_completion with _watch_update_progress:
  streams output chunks every ~4s, detects .update_prompt.json and
  forwards prompts to the user, handles completion/failure/timeout
- Add update prompt interception in _handle_message: when a prompt
  is pending, the user's next message is written to .update_response
  instead of being processed normally
- Preserve _send_update_notification as legacy fallback for
  post-restart cases where adapter isn't available yet

File-based IPC protocol:
- .update_prompt.json: written by update process with prompt text,
  default value, and unique ID
- .update_response: written by gateway with user's answer
- .update_output.txt: existing, now streamed in real-time
- .update_exit_code: existing completion marker

Tests: 16 new tests covering _gateway_prompt IPC, output streaming,
prompt detection/forwarding, message interception, and cleanup.

* feat: interactive buttons for update prompts (Telegram + Discord)

Telegram: Inline keyboard with ✓ Yes / ✗ No buttons. Clicking a button
answers the callback query, edits the message to show the choice, and
writes .update_response directly. CallbackQueryHandler registered on
the update_prompt: prefix.

Discord: UpdatePromptView (discord.ui.View) with green Yes / red No
buttons. Follows the ExecApprovalView pattern — auth check, embed color
update, disabled-after-click. Writes .update_response on click.

All platforms: /approve and /deny (and /yes, /no) now work as shorthand
for yes/no when an update prompt is pending. The text fallback message
instructs users to use these commands. Raw message interception still
works as a fallback for non-command responses.

Gateway watcher checks adapter for send_update_prompt method (class-level
check to avoid MagicMock false positives) and falls back to text prompt
with /approve instructions when unavailable.

* fix: block /update on non-messaging platforms (API, webhooks, ACP)

Add _UPDATE_ALLOWED_PLATFORMS frozenset that explicitly lists messaging
platforms where /update is permitted. API server, webhook, and ACP
platforms get a clear error directing them to run hermes update from
the terminal instead.

ACP and API server already don't reach _handle_message (separate
codepaths), and webhooks have distinct session keys that can't collide
with messaging sessions. This guard is belt-and-suspenders.
											
										
										
											2026-04-05 00:28:58 -07:00
+								    async def _watch_update_progress(
-												fix: notify gateway users when updates finish or fail

											
										
										
											2026-03-11 20:43:31 +01:00
+								        self,
 								        poll_interval: float = 2.0,
-												feat(gateway): live-stream /update output + interactive prompt buttons (#5180)

* feat(gateway): live-stream /update output + forward interactive prompts

Adds real-time output streaming and interactive prompt forwarding for
the gateway /update command, so users on Telegram/Discord/etc see the
full update progress and can respond to prompts (stash restore, config
migration) without needing terminal access.

Changes:

hermes_cli/main.py:
- Add --gateway flag to 'hermes update' argparse
- Add _gateway_prompt() file-based IPC function that writes
  .update_prompt.json and polls for .update_response
- Modify _restore_stashed_changes() to accept optional input_fn
  parameter for gateway mode prompt forwarding
- cmd_update() uses _gateway_prompt when --gateway is set, enabling
  interactive stash restore and config migration prompts

gateway/run.py:
- _handle_update_command: spawn with --gateway flag and
  PYTHONUNBUFFERED=1 for real-time output flushing
- Store session_key in .update_pending.json for cross-restart
  session matching
- Add _update_prompt_pending dict to track sessions awaiting
  update prompt responses
- Replace _watch_for_update_completion with _watch_update_progress:
  streams output chunks every ~4s, detects .update_prompt.json and
  forwards prompts to the user, handles completion/failure/timeout
- Add update prompt interception in _handle_message: when a prompt
  is pending, the user's next message is written to .update_response
  instead of being processed normally
- Preserve _send_update_notification as legacy fallback for
  post-restart cases where adapter isn't available yet

File-based IPC protocol:
- .update_prompt.json: written by update process with prompt text,
  default value, and unique ID
- .update_response: written by gateway with user's answer
- .update_output.txt: existing, now streamed in real-time
- .update_exit_code: existing completion marker

Tests: 16 new tests covering _gateway_prompt IPC, output streaming,
prompt detection/forwarding, message interception, and cleanup.

* feat: interactive buttons for update prompts (Telegram + Discord)

Telegram: Inline keyboard with ✓ Yes / ✗ No buttons. Clicking a button
answers the callback query, edits the message to show the choice, and
writes .update_response directly. CallbackQueryHandler registered on
the update_prompt: prefix.

Discord: UpdatePromptView (discord.ui.View) with green Yes / red No
buttons. Follows the ExecApprovalView pattern — auth check, embed color
update, disabled-after-click. Writes .update_response on click.

All platforms: /approve and /deny (and /yes, /no) now work as shorthand
for yes/no when an update prompt is pending. The text fallback message
instructs users to use these commands. Raw message interception still
works as a fallback for non-command responses.

Gateway watcher checks adapter for send_update_prompt method (class-level
check to avoid MagicMock false positives) and falls back to text prompt
with /approve instructions when unavailable.

* fix: block /update on non-messaging platforms (API, webhooks, ACP)

Add _UPDATE_ALLOWED_PLATFORMS frozenset that explicitly lists messaging
platforms where /update is permitted. API server, webhook, and ACP
platforms get a clear error directing them to run hermes update from
the terminal instead.

ACP and API server already don't reach _handle_message (separate
codepaths), and webhooks have distinct session keys that can't collide
with messaging sessions. This guard is belt-and-suspenders.
											
										
										
											2026-04-05 00:28:58 -07:00
+								        stream_interval: float = 4.0,
-												fix: notify gateway users when updates finish or fail

											
										
										
											2026-03-11 20:43:31 +01:00
+								        timeout: float = 1800.0,
 								    ) -> None:
-												feat(gateway): live-stream /update output + interactive prompt buttons (#5180)

* feat(gateway): live-stream /update output + forward interactive prompts

Adds real-time output streaming and interactive prompt forwarding for
the gateway /update command, so users on Telegram/Discord/etc see the
full update progress and can respond to prompts (stash restore, config
migration) without needing terminal access.

Changes:

hermes_cli/main.py:
- Add --gateway flag to 'hermes update' argparse
- Add _gateway_prompt() file-based IPC function that writes
  .update_prompt.json and polls for .update_response
- Modify _restore_stashed_changes() to accept optional input_fn
  parameter for gateway mode prompt forwarding
- cmd_update() uses _gateway_prompt when --gateway is set, enabling
  interactive stash restore and config migration prompts

gateway/run.py:
- _handle_update_command: spawn with --gateway flag and
  PYTHONUNBUFFERED=1 for real-time output flushing
- Store session_key in .update_pending.json for cross-restart
  session matching
- Add _update_prompt_pending dict to track sessions awaiting
  update prompt responses
- Replace _watch_for_update_completion with _watch_update_progress:
  streams output chunks every ~4s, detects .update_prompt.json and
  forwards prompts to the user, handles completion/failure/timeout
- Add update prompt interception in _handle_message: when a prompt
  is pending, the user's next message is written to .update_response
  instead of being processed normally
- Preserve _send_update_notification as legacy fallback for
  post-restart cases where adapter isn't available yet

File-based IPC protocol:
- .update_prompt.json: written by update process with prompt text,
  default value, and unique ID
- .update_response: written by gateway with user's answer
- .update_output.txt: existing, now streamed in real-time
- .update_exit_code: existing completion marker

Tests: 16 new tests covering _gateway_prompt IPC, output streaming,
prompt detection/forwarding, message interception, and cleanup.

* feat: interactive buttons for update prompts (Telegram + Discord)

Telegram: Inline keyboard with ✓ Yes / ✗ No buttons. Clicking a button
answers the callback query, edits the message to show the choice, and
writes .update_response directly. CallbackQueryHandler registered on
the update_prompt: prefix.

Discord: UpdatePromptView (discord.ui.View) with green Yes / red No
buttons. Follows the ExecApprovalView pattern — auth check, embed color
update, disabled-after-click. Writes .update_response on click.

All platforms: /approve and /deny (and /yes, /no) now work as shorthand
for yes/no when an update prompt is pending. The text fallback message
instructs users to use these commands. Raw message interception still
works as a fallback for non-command responses.

Gateway watcher checks adapter for send_update_prompt method (class-level
check to avoid MagicMock false positives) and falls back to text prompt
with /approve instructions when unavailable.

* fix: block /update on non-messaging platforms (API, webhooks, ACP)

Add _UPDATE_ALLOWED_PLATFORMS frozenset that explicitly lists messaging
platforms where /update is permitted. API server, webhook, and ACP
platforms get a clear error directing them to run hermes update from
the terminal instead.

ACP and API server already don't reach _handle_message (separate
codepaths), and webhooks have distinct session keys that can't collide
with messaging sessions. This guard is belt-and-suspenders.
											
										
										
											2026-04-05 00:28:58 -07:00
+								        """Watch ``hermes update --gateway``, streaming output + forwarding prompts.
 								        Polls ``.update_output.txt`` for new content and sends chunks to the
 								        user periodically.  Detects ``.update_prompt.json`` (written by the
 								        update process when it needs user input) and forwards the prompt to
 								        the messenger.  The user's next message is intercepted by
 								        ``_handle_message`` and written to ``.update_response``.
 								        """
 								        import json
 								        import re as _re
-												fix: notify gateway users when updates finish or fail

											
										
										
											2026-03-11 20:43:31 +01:00
+								        pending_path = _hermes_home / ".update_pending.json"
 								        claimed_path = _hermes_home / ".update_pending.claimed.json"
-												feat(gateway): live-stream /update output + interactive prompt buttons (#5180)

* feat(gateway): live-stream /update output + forward interactive prompts

Adds real-time output streaming and interactive prompt forwarding for
the gateway /update command, so users on Telegram/Discord/etc see the
full update progress and can respond to prompts (stash restore, config
migration) without needing terminal access.

Changes:

hermes_cli/main.py:
- Add --gateway flag to 'hermes update' argparse
- Add _gateway_prompt() file-based IPC function that writes
  .update_prompt.json and polls for .update_response
- Modify _restore_stashed_changes() to accept optional input_fn
  parameter for gateway mode prompt forwarding
- cmd_update() uses _gateway_prompt when --gateway is set, enabling
  interactive stash restore and config migration prompts

gateway/run.py:
- _handle_update_command: spawn with --gateway flag and
  PYTHONUNBUFFERED=1 for real-time output flushing
- Store session_key in .update_pending.json for cross-restart
  session matching
- Add _update_prompt_pending dict to track sessions awaiting
  update prompt responses
- Replace _watch_for_update_completion with _watch_update_progress:
  streams output chunks every ~4s, detects .update_prompt.json and
  forwards prompts to the user, handles completion/failure/timeout
- Add update prompt interception in _handle_message: when a prompt
  is pending, the user's next message is written to .update_response
  instead of being processed normally
- Preserve _send_update_notification as legacy fallback for
  post-restart cases where adapter isn't available yet

File-based IPC protocol:
- .update_prompt.json: written by update process with prompt text,
  default value, and unique ID
- .update_response: written by gateway with user's answer
- .update_output.txt: existing, now streamed in real-time
- .update_exit_code: existing completion marker

Tests: 16 new tests covering _gateway_prompt IPC, output streaming,
prompt detection/forwarding, message interception, and cleanup.

* feat: interactive buttons for update prompts (Telegram + Discord)

Telegram: Inline keyboard with ✓ Yes / ✗ No buttons. Clicking a button
answers the callback query, edits the message to show the choice, and
writes .update_response directly. CallbackQueryHandler registered on
the update_prompt: prefix.

Discord: UpdatePromptView (discord.ui.View) with green Yes / red No
buttons. Follows the ExecApprovalView pattern — auth check, embed color
update, disabled-after-click. Writes .update_response on click.

All platforms: /approve and /deny (and /yes, /no) now work as shorthand
for yes/no when an update prompt is pending. The text fallback message
instructs users to use these commands. Raw message interception still
works as a fallback for non-command responses.

Gateway watcher checks adapter for send_update_prompt method (class-level
check to avoid MagicMock false positives) and falls back to text prompt
with /approve instructions when unavailable.

* fix: block /update on non-messaging platforms (API, webhooks, ACP)

Add _UPDATE_ALLOWED_PLATFORMS frozenset that explicitly lists messaging
platforms where /update is permitted. API server, webhook, and ACP
platforms get a clear error directing them to run hermes update from
the terminal instead.

ACP and API server already don't reach _handle_message (separate
codepaths), and webhooks have distinct session keys that can't collide
with messaging sessions. This guard is belt-and-suspenders.
											
										
										
											2026-04-05 00:28:58 -07:00
+								        output_path = _hermes_home / ".update_output.txt"
-												fix: notify gateway users when updates finish or fail

											
										
										
											2026-03-11 20:43:31 +01:00
+								        exit_code_path = _hermes_home / ".update_exit_code"
-												feat(gateway): live-stream /update output + interactive prompt buttons (#5180)

* feat(gateway): live-stream /update output + forward interactive prompts

Adds real-time output streaming and interactive prompt forwarding for
the gateway /update command, so users on Telegram/Discord/etc see the
full update progress and can respond to prompts (stash restore, config
migration) without needing terminal access.

Changes:

hermes_cli/main.py:
- Add --gateway flag to 'hermes update' argparse
- Add _gateway_prompt() file-based IPC function that writes
  .update_prompt.json and polls for .update_response
- Modify _restore_stashed_changes() to accept optional input_fn
  parameter for gateway mode prompt forwarding
- cmd_update() uses _gateway_prompt when --gateway is set, enabling
  interactive stash restore and config migration prompts

gateway/run.py:
- _handle_update_command: spawn with --gateway flag and
  PYTHONUNBUFFERED=1 for real-time output flushing
- Store session_key in .update_pending.json for cross-restart
  session matching
- Add _update_prompt_pending dict to track sessions awaiting
  update prompt responses
- Replace _watch_for_update_completion with _watch_update_progress:
  streams output chunks every ~4s, detects .update_prompt.json and
  forwards prompts to the user, handles completion/failure/timeout
- Add update prompt interception in _handle_message: when a prompt
  is pending, the user's next message is written to .update_response
  instead of being processed normally
- Preserve _send_update_notification as legacy fallback for
  post-restart cases where adapter isn't available yet

File-based IPC protocol:
- .update_prompt.json: written by update process with prompt text,
  default value, and unique ID
- .update_response: written by gateway with user's answer
- .update_output.txt: existing, now streamed in real-time
- .update_exit_code: existing completion marker

Tests: 16 new tests covering _gateway_prompt IPC, output streaming,
prompt detection/forwarding, message interception, and cleanup.

* feat: interactive buttons for update prompts (Telegram + Discord)

Telegram: Inline keyboard with ✓ Yes / ✗ No buttons. Clicking a button
answers the callback query, edits the message to show the choice, and
writes .update_response directly. CallbackQueryHandler registered on
the update_prompt: prefix.

Discord: UpdatePromptView (discord.ui.View) with green Yes / red No
buttons. Follows the ExecApprovalView pattern — auth check, embed color
update, disabled-after-click. Writes .update_response on click.

All platforms: /approve and /deny (and /yes, /no) now work as shorthand
for yes/no when an update prompt is pending. The text fallback message
instructs users to use these commands. Raw message interception still
works as a fallback for non-command responses.

Gateway watcher checks adapter for send_update_prompt method (class-level
check to avoid MagicMock false positives) and falls back to text prompt
with /approve instructions when unavailable.

* fix: block /update on non-messaging platforms (API, webhooks, ACP)

Add _UPDATE_ALLOWED_PLATFORMS frozenset that explicitly lists messaging
platforms where /update is permitted. API server, webhook, and ACP
platforms get a clear error directing them to run hermes update from
the terminal instead.

ACP and API server already don't reach _handle_message (separate
codepaths), and webhooks have distinct session keys that can't collide
with messaging sessions. This guard is belt-and-suspenders.
											
										
										
											2026-04-05 00:28:58 -07:00
+								        prompt_path = _hermes_home / ".update_prompt.json"
-												fix: notify gateway users when updates finish or fail

											
										
										
											2026-03-11 20:43:31 +01:00
+								        loop = asyncio.get_running_loop()
 								        deadline = loop.time() + timeout
-												feat(gateway): live-stream /update output + interactive prompt buttons (#5180)

* feat(gateway): live-stream /update output + forward interactive prompts

Adds real-time output streaming and interactive prompt forwarding for
the gateway /update command, so users on Telegram/Discord/etc see the
full update progress and can respond to prompts (stash restore, config
migration) without needing terminal access.

Changes:

hermes_cli/main.py:
- Add --gateway flag to 'hermes update' argparse
- Add _gateway_prompt() file-based IPC function that writes
  .update_prompt.json and polls for .update_response
- Modify _restore_stashed_changes() to accept optional input_fn
  parameter for gateway mode prompt forwarding
- cmd_update() uses _gateway_prompt when --gateway is set, enabling
  interactive stash restore and config migration prompts

gateway/run.py:
- _handle_update_command: spawn with --gateway flag and
  PYTHONUNBUFFERED=1 for real-time output flushing
- Store session_key in .update_pending.json for cross-restart
  session matching
- Add _update_prompt_pending dict to track sessions awaiting
  update prompt responses
- Replace _watch_for_update_completion with _watch_update_progress:
  streams output chunks every ~4s, detects .update_prompt.json and
  forwards prompts to the user, handles completion/failure/timeout
- Add update prompt interception in _handle_message: when a prompt
  is pending, the user's next message is written to .update_response
  instead of being processed normally
- Preserve _send_update_notification as legacy fallback for
  post-restart cases where adapter isn't available yet

File-based IPC protocol:
- .update_prompt.json: written by update process with prompt text,
  default value, and unique ID
- .update_response: written by gateway with user's answer
- .update_output.txt: existing, now streamed in real-time
- .update_exit_code: existing completion marker

Tests: 16 new tests covering _gateway_prompt IPC, output streaming,
prompt detection/forwarding, message interception, and cleanup.

* feat: interactive buttons for update prompts (Telegram + Discord)

Telegram: Inline keyboard with ✓ Yes / ✗ No buttons. Clicking a button
answers the callback query, edits the message to show the choice, and
writes .update_response directly. CallbackQueryHandler registered on
the update_prompt: prefix.

Discord: UpdatePromptView (discord.ui.View) with green Yes / red No
buttons. Follows the ExecApprovalView pattern — auth check, embed color
update, disabled-after-click. Writes .update_response on click.

All platforms: /approve and /deny (and /yes, /no) now work as shorthand
for yes/no when an update prompt is pending. The text fallback message
instructs users to use these commands. Raw message interception still
works as a fallback for non-command responses.

Gateway watcher checks adapter for send_update_prompt method (class-level
check to avoid MagicMock false positives) and falls back to text prompt
with /approve instructions when unavailable.

* fix: block /update on non-messaging platforms (API, webhooks, ACP)

Add _UPDATE_ALLOWED_PLATFORMS frozenset that explicitly lists messaging
platforms where /update is permitted. API server, webhook, and ACP
platforms get a clear error directing them to run hermes update from
the terminal instead.

ACP and API server already don't reach _handle_message (separate
codepaths), and webhooks have distinct session keys that can't collide
with messaging sessions. This guard is belt-and-suspenders.
											
										
										
											2026-04-05 00:28:58 -07:00
+								        # Resolve the adapter and chat_id for sending messages
 								        adapter = None
 								        chat_id = None
 								        session_key = None
 								        for path in (claimed_path, pending_path):
 								            if path.exists():
 								                try:
 								                    pending = json.loads(path.read_text())
 								                    platform_str = pending.get("platform")
 								                    chat_id = pending.get("chat_id")
 								                    session_key = pending.get("session_key")
 								                    if platform_str and chat_id:
 								                        platform = Platform(platform_str)
 								                        adapter = self.adapters.get(platform)
 								                        # Fallback session key if not stored (old pending files)
 								                        if not session_key:
 								                            session_key = f"{platform_str}:{chat_id}"
 								                    break
 								                except Exception:
 								                    pass
 								        if not adapter or not chat_id:
 								            logger.warning("Update watcher: cannot resolve adapter/chat_id, falling back to completion-only")
 								            # Fall back to old behavior: wait for exit code and send final notification
 								            while (pending_path.exists() or claimed_path.exists()) and loop.time() < deadline:
 								                if exit_code_path.exists():
 								                    await self._send_update_notification()
 								                    return
 								                await asyncio.sleep(poll_interval)
 								            if (pending_path.exists() or claimed_path.exists()) and not exit_code_path.exists():
 								                exit_code_path.write_text("124")
-												fix: notify gateway users when updates finish or fail

											
										
										
											2026-03-11 20:43:31 +01:00
+								                await self._send_update_notification()
-												feat(gateway): live-stream /update output + interactive prompt buttons (#5180)

* feat(gateway): live-stream /update output + forward interactive prompts

Adds real-time output streaming and interactive prompt forwarding for
the gateway /update command, so users on Telegram/Discord/etc see the
full update progress and can respond to prompts (stash restore, config
migration) without needing terminal access.

Changes:

hermes_cli/main.py:
- Add --gateway flag to 'hermes update' argparse
- Add _gateway_prompt() file-based IPC function that writes
  .update_prompt.json and polls for .update_response
- Modify _restore_stashed_changes() to accept optional input_fn
  parameter for gateway mode prompt forwarding
- cmd_update() uses _gateway_prompt when --gateway is set, enabling
  interactive stash restore and config migration prompts

gateway/run.py:
- _handle_update_command: spawn with --gateway flag and
  PYTHONUNBUFFERED=1 for real-time output flushing
- Store session_key in .update_pending.json for cross-restart
  session matching
- Add _update_prompt_pending dict to track sessions awaiting
  update prompt responses
- Replace _watch_for_update_completion with _watch_update_progress:
  streams output chunks every ~4s, detects .update_prompt.json and
  forwards prompts to the user, handles completion/failure/timeout
- Add update prompt interception in _handle_message: when a prompt
  is pending, the user's next message is written to .update_response
  instead of being processed normally
- Preserve _send_update_notification as legacy fallback for
  post-restart cases where adapter isn't available yet

File-based IPC protocol:
- .update_prompt.json: written by update process with prompt text,
  default value, and unique ID
- .update_response: written by gateway with user's answer
- .update_output.txt: existing, now streamed in real-time
- .update_exit_code: existing completion marker

Tests: 16 new tests covering _gateway_prompt IPC, output streaming,
prompt detection/forwarding, message interception, and cleanup.

* feat: interactive buttons for update prompts (Telegram + Discord)

Telegram: Inline keyboard with ✓ Yes / ✗ No buttons. Clicking a button
answers the callback query, edits the message to show the choice, and
writes .update_response directly. CallbackQueryHandler registered on
the update_prompt: prefix.

Discord: UpdatePromptView (discord.ui.View) with green Yes / red No
buttons. Follows the ExecApprovalView pattern — auth check, embed color
update, disabled-after-click. Writes .update_response on click.

All platforms: /approve and /deny (and /yes, /no) now work as shorthand
for yes/no when an update prompt is pending. The text fallback message
instructs users to use these commands. Raw message interception still
works as a fallback for non-command responses.

Gateway watcher checks adapter for send_update_prompt method (class-level
check to avoid MagicMock false positives) and falls back to text prompt
with /approve instructions when unavailable.

* fix: block /update on non-messaging platforms (API, webhooks, ACP)

Add _UPDATE_ALLOWED_PLATFORMS frozenset that explicitly lists messaging
platforms where /update is permitted. API server, webhook, and ACP
platforms get a clear error directing them to run hermes update from
the terminal instead.

ACP and API server already don't reach _handle_message (separate
codepaths), and webhooks have distinct session keys that can't collide
with messaging sessions. This guard is belt-and-suspenders.
											
										
										
											2026-04-05 00:28:58 -07:00
+								            return
 								        def _strip_ansi(text: str) -> str:
 								            return _re.sub(r'\x1b\[[0-9;]*[A-Za-z]', '', text)
 								        bytes_sent = 0
 								        last_stream_time = loop.time()
 								        buffer = ""
 								        async def _flush_buffer() -> None:
 								            """Send buffered output to the user."""
 								            nonlocal buffer, last_stream_time
 								            if not buffer.strip():
 								                buffer = ""
-												fix: notify gateway users when updates finish or fail

											
										
										
											2026-03-11 20:43:31 +01:00
+								                return
-												feat(gateway): live-stream /update output + interactive prompt buttons (#5180)

* feat(gateway): live-stream /update output + forward interactive prompts

Adds real-time output streaming and interactive prompt forwarding for
the gateway /update command, so users on Telegram/Discord/etc see the
full update progress and can respond to prompts (stash restore, config
migration) without needing terminal access.

Changes:

hermes_cli/main.py:
- Add --gateway flag to 'hermes update' argparse
- Add _gateway_prompt() file-based IPC function that writes
  .update_prompt.json and polls for .update_response
- Modify _restore_stashed_changes() to accept optional input_fn
  parameter for gateway mode prompt forwarding
- cmd_update() uses _gateway_prompt when --gateway is set, enabling
  interactive stash restore and config migration prompts

gateway/run.py:
- _handle_update_command: spawn with --gateway flag and
  PYTHONUNBUFFERED=1 for real-time output flushing
- Store session_key in .update_pending.json for cross-restart
  session matching
- Add _update_prompt_pending dict to track sessions awaiting
  update prompt responses
- Replace _watch_for_update_completion with _watch_update_progress:
  streams output chunks every ~4s, detects .update_prompt.json and
  forwards prompts to the user, handles completion/failure/timeout
- Add update prompt interception in _handle_message: when a prompt
  is pending, the user's next message is written to .update_response
  instead of being processed normally
- Preserve _send_update_notification as legacy fallback for
  post-restart cases where adapter isn't available yet

File-based IPC protocol:
- .update_prompt.json: written by update process with prompt text,
  default value, and unique ID
- .update_response: written by gateway with user's answer
- .update_output.txt: existing, now streamed in real-time
- .update_exit_code: existing completion marker

Tests: 16 new tests covering _gateway_prompt IPC, output streaming,
prompt detection/forwarding, message interception, and cleanup.

* feat: interactive buttons for update prompts (Telegram + Discord)

Telegram: Inline keyboard with ✓ Yes / ✗ No buttons. Clicking a button
answers the callback query, edits the message to show the choice, and
writes .update_response directly. CallbackQueryHandler registered on
the update_prompt: prefix.

Discord: UpdatePromptView (discord.ui.View) with green Yes / red No
buttons. Follows the ExecApprovalView pattern — auth check, embed color
update, disabled-after-click. Writes .update_response on click.

All platforms: /approve and /deny (and /yes, /no) now work as shorthand
for yes/no when an update prompt is pending. The text fallback message
instructs users to use these commands. Raw message interception still
works as a fallback for non-command responses.

Gateway watcher checks adapter for send_update_prompt method (class-level
check to avoid MagicMock false positives) and falls back to text prompt
with /approve instructions when unavailable.

* fix: block /update on non-messaging platforms (API, webhooks, ACP)

Add _UPDATE_ALLOWED_PLATFORMS frozenset that explicitly lists messaging
platforms where /update is permitted. API server, webhook, and ACP
platforms get a clear error directing them to run hermes update from
the terminal instead.

ACP and API server already don't reach _handle_message (separate
codepaths), and webhooks have distinct session keys that can't collide
with messaging sessions. This guard is belt-and-suspenders.
											
										
										
											2026-04-05 00:28:58 -07:00
+								            # Chunk to fit message limits (Telegram: 4096, others: generous)
 								            clean = _strip_ansi(buffer).strip()
 								            buffer = ""
 								            last_stream_time = loop.time()
 								            if not clean:
 								                return
 								            # Split into chunks if too long
 								            max_chunk = 3500
 								            chunks = [clean[i:i + max_chunk] for i in range(0, len(clean), max_chunk)]
 								            for chunk in chunks:
 								                try:
 								                    await adapter.send(chat_id, f"```\n{chunk}\n```")
 								                except Exception as e:
 								                    logger.debug("Update stream send failed: %s", e)
 								        while loop.time() < deadline:
 								            # Check for completion
 								            if exit_code_path.exists():
 								                # Read any remaining output
 								                if output_path.exists():
 								                    try:
 								                        content = output_path.read_text()
 								                        if len(content) > bytes_sent:
 								                            buffer += content[bytes_sent:]
 								                            bytes_sent = len(content)
 								                    except OSError:
 								                        pass
 								                await _flush_buffer()
 								                # Send final status
 								                try:
 								                    exit_code_raw = exit_code_path.read_text().strip() or "1"
 								                    exit_code = int(exit_code_raw)
 								                    if exit_code == 0:
 								                        await adapter.send(chat_id, "✅ Hermes update finished.")
 								                    else:
 								                        await adapter.send(chat_id, "❌ Hermes update failed (exit code {}).".format(exit_code))
 								                    logger.info("Update finished (exit=%s), notified %s", exit_code, session_key)
 								                except Exception as e:
 								                    logger.warning("Update final notification failed: %s", e)
 								                # Cleanup
 								                for p in (pending_path, claimed_path, output_path,
 								                          exit_code_path, prompt_path):
 								                    p.unlink(missing_ok=True)
 								                (_hermes_home / ".update_response").unlink(missing_ok=True)
 								                self._update_prompt_pending.pop(session_key, None)
 								                return
 								            # Check for new output
 								            if output_path.exists():
 								                try:
 								                    content = output_path.read_text()
 								                    if len(content) > bytes_sent:
 								                        buffer += content[bytes_sent:]
 								                        bytes_sent = len(content)
 								                except OSError:
 								                    pass
 								            # Flush buffer periodically
 								            if buffer.strip() and (loop.time() - last_stream_time) >= stream_interval:
 								                await _flush_buffer()
-												fix: prevent duplicate update prompt spam in gateway watcher (#8343)

The _watch_update_progress() poll loop never deleted .update_prompt.json
after forwarding the prompt to the user, causing the same prompt to be
re-sent every poll cycle (2s). Two fixes:

1. Delete .update_prompt.json after forwarding — the update process only
   polls for .update_response, it doesn't need the prompt file to persist.
2. Guard re-sends with _update_prompt_pending check — belt-and-suspenders
   to prevent duplicates even under race conditions.

Add regression test asserting the prompt is sent exactly once.
											
										
										
											2026-04-12 04:52:59 -07:00
+								            # Check for prompts — only forward if we haven't already sent
 								            # one that's still awaiting a response.  Without this guard the
 								            # watcher would re-read the same .update_prompt.json every poll
 								            # cycle and spam the user with duplicate prompt messages.
 								            if (prompt_path.exists() and session_key
 								                    and not self._update_prompt_pending.get(session_key)):
-												feat(gateway): live-stream /update output + interactive prompt buttons (#5180)

* feat(gateway): live-stream /update output + forward interactive prompts

Adds real-time output streaming and interactive prompt forwarding for
the gateway /update command, so users on Telegram/Discord/etc see the
full update progress and can respond to prompts (stash restore, config
migration) without needing terminal access.

Changes:

hermes_cli/main.py:
- Add --gateway flag to 'hermes update' argparse
- Add _gateway_prompt() file-based IPC function that writes
  .update_prompt.json and polls for .update_response
- Modify _restore_stashed_changes() to accept optional input_fn
  parameter for gateway mode prompt forwarding
- cmd_update() uses _gateway_prompt when --gateway is set, enabling
  interactive stash restore and config migration prompts

gateway/run.py:
- _handle_update_command: spawn with --gateway flag and
  PYTHONUNBUFFERED=1 for real-time output flushing
- Store session_key in .update_pending.json for cross-restart
  session matching
- Add _update_prompt_pending dict to track sessions awaiting
  update prompt responses
- Replace _watch_for_update_completion with _watch_update_progress:
  streams output chunks every ~4s, detects .update_prompt.json and
  forwards prompts to the user, handles completion/failure/timeout
- Add update prompt interception in _handle_message: when a prompt
  is pending, the user's next message is written to .update_response
  instead of being processed normally
- Preserve _send_update_notification as legacy fallback for
  post-restart cases where adapter isn't available yet

File-based IPC protocol:
- .update_prompt.json: written by update process with prompt text,
  default value, and unique ID
- .update_response: written by gateway with user's answer
- .update_output.txt: existing, now streamed in real-time
- .update_exit_code: existing completion marker

Tests: 16 new tests covering _gateway_prompt IPC, output streaming,
prompt detection/forwarding, message interception, and cleanup.

* feat: interactive buttons for update prompts (Telegram + Discord)

Telegram: Inline keyboard with ✓ Yes / ✗ No buttons. Clicking a button
answers the callback query, edits the message to show the choice, and
writes .update_response directly. CallbackQueryHandler registered on
the update_prompt: prefix.

Discord: UpdatePromptView (discord.ui.View) with green Yes / red No
buttons. Follows the ExecApprovalView pattern — auth check, embed color
update, disabled-after-click. Writes .update_response on click.

All platforms: /approve and /deny (and /yes, /no) now work as shorthand
for yes/no when an update prompt is pending. The text fallback message
instructs users to use these commands. Raw message interception still
works as a fallback for non-command responses.

Gateway watcher checks adapter for send_update_prompt method (class-level
check to avoid MagicMock false positives) and falls back to text prompt
with /approve instructions when unavailable.

* fix: block /update on non-messaging platforms (API, webhooks, ACP)

Add _UPDATE_ALLOWED_PLATFORMS frozenset that explicitly lists messaging
platforms where /update is permitted. API server, webhook, and ACP
platforms get a clear error directing them to run hermes update from
the terminal instead.

ACP and API server already don't reach _handle_message (separate
codepaths), and webhooks have distinct session keys that can't collide
with messaging sessions. This guard is belt-and-suspenders.
											
										
										
											2026-04-05 00:28:58 -07:00
+								                try:
 								                    prompt_data = json.loads(prompt_path.read_text())
 								                    prompt_text = prompt_data.get("prompt", "")
 								                    default = prompt_data.get("default", "")
 								                    if prompt_text:
 								                        # Flush any buffered output first so the user sees
 								                        # context before the prompt
 								                        await _flush_buffer()
 								                        # Try platform-native buttons first (Discord, Telegram)
 								                        sent_buttons = False
 								                        if getattr(type(adapter), "send_update_prompt", None) is not None:
 								                            try:
 								                                await adapter.send_update_prompt(
 								                                    chat_id=chat_id,
 								                                    prompt=prompt_text,
 								                                    default=default,
 								                                    session_key=session_key,
 								                                )
 								                                sent_buttons = True
 								                            except Exception as btn_err:
 								                                logger.debug("Button-based update prompt failed: %s", btn_err)
 								                        if not sent_buttons:
 								                            default_hint = f" (default: {default})" if default else ""
 								                            await adapter.send(
 								                                chat_id,
 								                                f"⚕ **Update needs your input:**\n\n"
 								                                f"{prompt_text}{default_hint}\n\n"
 								                                f"Reply `/approve` (yes) or `/deny` (no), "
 								                                f"or type your answer directly."
 								                            )
 								                        self._update_prompt_pending[session_key] = True
-												fix: prevent duplicate update prompt spam in gateway watcher (#8343)

The _watch_update_progress() poll loop never deleted .update_prompt.json
after forwarding the prompt to the user, causing the same prompt to be
re-sent every poll cycle (2s). Two fixes:

1. Delete .update_prompt.json after forwarding — the update process only
   polls for .update_response, it doesn't need the prompt file to persist.
2. Guard re-sends with _update_prompt_pending check — belt-and-suspenders
   to prevent duplicates even under race conditions.

Add regression test asserting the prompt is sent exactly once.
											
										
										
											2026-04-12 04:52:59 -07:00
+								                        # Remove the prompt file so it isn't re-read on the
 								                        # next poll cycle.  The update process only needs
 								                        # .update_response to continue — it doesn't re-check
 								                        # .update_prompt.json while waiting.
 								                        prompt_path.unlink(missing_ok=True)
-												feat(gateway): live-stream /update output + interactive prompt buttons (#5180)

* feat(gateway): live-stream /update output + forward interactive prompts

Adds real-time output streaming and interactive prompt forwarding for
the gateway /update command, so users on Telegram/Discord/etc see the
full update progress and can respond to prompts (stash restore, config
migration) without needing terminal access.

Changes:

hermes_cli/main.py:
- Add --gateway flag to 'hermes update' argparse
- Add _gateway_prompt() file-based IPC function that writes
  .update_prompt.json and polls for .update_response
- Modify _restore_stashed_changes() to accept optional input_fn
  parameter for gateway mode prompt forwarding
- cmd_update() uses _gateway_prompt when --gateway is set, enabling
  interactive stash restore and config migration prompts

gateway/run.py:
- _handle_update_command: spawn with --gateway flag and
  PYTHONUNBUFFERED=1 for real-time output flushing
- Store session_key in .update_pending.json for cross-restart
  session matching
- Add _update_prompt_pending dict to track sessions awaiting
  update prompt responses
- Replace _watch_for_update_completion with _watch_update_progress:
  streams output chunks every ~4s, detects .update_prompt.json and
  forwards prompts to the user, handles completion/failure/timeout
- Add update prompt interception in _handle_message: when a prompt
  is pending, the user's next message is written to .update_response
  instead of being processed normally
- Preserve _send_update_notification as legacy fallback for
  post-restart cases where adapter isn't available yet

File-based IPC protocol:
- .update_prompt.json: written by update process with prompt text,
  default value, and unique ID
- .update_response: written by gateway with user's answer
- .update_output.txt: existing, now streamed in real-time
- .update_exit_code: existing completion marker

Tests: 16 new tests covering _gateway_prompt IPC, output streaming,
prompt detection/forwarding, message interception, and cleanup.

* feat: interactive buttons for update prompts (Telegram + Discord)

Telegram: Inline keyboard with ✓ Yes / ✗ No buttons. Clicking a button
answers the callback query, edits the message to show the choice, and
writes .update_response directly. CallbackQueryHandler registered on
the update_prompt: prefix.

Discord: UpdatePromptView (discord.ui.View) with green Yes / red No
buttons. Follows the ExecApprovalView pattern — auth check, embed color
update, disabled-after-click. Writes .update_response on click.

All platforms: /approve and /deny (and /yes, /no) now work as shorthand
for yes/no when an update prompt is pending. The text fallback message
instructs users to use these commands. Raw message interception still
works as a fallback for non-command responses.

Gateway watcher checks adapter for send_update_prompt method (class-level
check to avoid MagicMock false positives) and falls back to text prompt
with /approve instructions when unavailable.

* fix: block /update on non-messaging platforms (API, webhooks, ACP)

Add _UPDATE_ALLOWED_PLATFORMS frozenset that explicitly lists messaging
platforms where /update is permitted. API server, webhook, and ACP
platforms get a clear error directing them to run hermes update from
the terminal instead.

ACP and API server already don't reach _handle_message (separate
codepaths), and webhooks have distinct session keys that can't collide
with messaging sessions. This guard is belt-and-suspenders.
											
										
										
											2026-04-05 00:28:58 -07:00
+								                        logger.info("Forwarded update prompt to %s: %s", session_key, prompt_text[:80])
 								                except (json.JSONDecodeError, OSError) as e:
 								                    logger.debug("Failed to read update prompt: %s", e)
-												fix: notify gateway users when updates finish or fail

											
										
										
											2026-03-11 20:43:31 +01:00
+								            await asyncio.sleep(poll_interval)
-												feat(gateway): live-stream /update output + interactive prompt buttons (#5180)

* feat(gateway): live-stream /update output + forward interactive prompts

Adds real-time output streaming and interactive prompt forwarding for
the gateway /update command, so users on Telegram/Discord/etc see the
full update progress and can respond to prompts (stash restore, config
migration) without needing terminal access.

Changes:

hermes_cli/main.py:
- Add --gateway flag to 'hermes update' argparse
- Add _gateway_prompt() file-based IPC function that writes
  .update_prompt.json and polls for .update_response
- Modify _restore_stashed_changes() to accept optional input_fn
  parameter for gateway mode prompt forwarding
- cmd_update() uses _gateway_prompt when --gateway is set, enabling
  interactive stash restore and config migration prompts

gateway/run.py:
- _handle_update_command: spawn with --gateway flag and
  PYTHONUNBUFFERED=1 for real-time output flushing
- Store session_key in .update_pending.json for cross-restart
  session matching
- Add _update_prompt_pending dict to track sessions awaiting
  update prompt responses
- Replace _watch_for_update_completion with _watch_update_progress:
  streams output chunks every ~4s, detects .update_prompt.json and
  forwards prompts to the user, handles completion/failure/timeout
- Add update prompt interception in _handle_message: when a prompt
  is pending, the user's next message is written to .update_response
  instead of being processed normally
- Preserve _send_update_notification as legacy fallback for
  post-restart cases where adapter isn't available yet

File-based IPC protocol:
- .update_prompt.json: written by update process with prompt text,
  default value, and unique ID
- .update_response: written by gateway with user's answer
- .update_output.txt: existing, now streamed in real-time
- .update_exit_code: existing completion marker

Tests: 16 new tests covering _gateway_prompt IPC, output streaming,
prompt detection/forwarding, message interception, and cleanup.

* feat: interactive buttons for update prompts (Telegram + Discord)

Telegram: Inline keyboard with ✓ Yes / ✗ No buttons. Clicking a button
answers the callback query, edits the message to show the choice, and
writes .update_response directly. CallbackQueryHandler registered on
the update_prompt: prefix.

Discord: UpdatePromptView (discord.ui.View) with green Yes / red No
buttons. Follows the ExecApprovalView pattern — auth check, embed color
update, disabled-after-click. Writes .update_response on click.

All platforms: /approve and /deny (and /yes, /no) now work as shorthand
for yes/no when an update prompt is pending. The text fallback message
instructs users to use these commands. Raw message interception still
works as a fallback for non-command responses.

Gateway watcher checks adapter for send_update_prompt method (class-level
check to avoid MagicMock false positives) and falls back to text prompt
with /approve instructions when unavailable.

* fix: block /update on non-messaging platforms (API, webhooks, ACP)

Add _UPDATE_ALLOWED_PLATFORMS frozenset that explicitly lists messaging
platforms where /update is permitted. API server, webhook, and ACP
platforms get a clear error directing them to run hermes update from
the terminal instead.

ACP and API server already don't reach _handle_message (separate
codepaths), and webhooks have distinct session keys that can't collide
with messaging sessions. This guard is belt-and-suspenders.
											
										
										
											2026-04-05 00:28:58 -07:00
+								        # Timeout
 								        if not exit_code_path.exists():
 								            logger.warning("Update watcher timed out after %.0fs", timeout)
-												fix: notify gateway users when updates finish or fail

											
										
										
											2026-03-11 20:43:31 +01:00
+								            exit_code_path.write_text("124")
-												feat(gateway): live-stream /update output + interactive prompt buttons (#5180)

* feat(gateway): live-stream /update output + forward interactive prompts

Adds real-time output streaming and interactive prompt forwarding for
the gateway /update command, so users on Telegram/Discord/etc see the
full update progress and can respond to prompts (stash restore, config
migration) without needing terminal access.

Changes:

hermes_cli/main.py:
- Add --gateway flag to 'hermes update' argparse
- Add _gateway_prompt() file-based IPC function that writes
  .update_prompt.json and polls for .update_response
- Modify _restore_stashed_changes() to accept optional input_fn
  parameter for gateway mode prompt forwarding
- cmd_update() uses _gateway_prompt when --gateway is set, enabling
  interactive stash restore and config migration prompts

gateway/run.py:
- _handle_update_command: spawn with --gateway flag and
  PYTHONUNBUFFERED=1 for real-time output flushing
- Store session_key in .update_pending.json for cross-restart
  session matching
- Add _update_prompt_pending dict to track sessions awaiting
  update prompt responses
- Replace _watch_for_update_completion with _watch_update_progress:
  streams output chunks every ~4s, detects .update_prompt.json and
  forwards prompts to the user, handles completion/failure/timeout
- Add update prompt interception in _handle_message: when a prompt
  is pending, the user's next message is written to .update_response
  instead of being processed normally
- Preserve _send_update_notification as legacy fallback for
  post-restart cases where adapter isn't available yet

File-based IPC protocol:
- .update_prompt.json: written by update process with prompt text,
  default value, and unique ID
- .update_response: written by gateway with user's answer
- .update_output.txt: existing, now streamed in real-time
- .update_exit_code: existing completion marker

Tests: 16 new tests covering _gateway_prompt IPC, output streaming,
prompt detection/forwarding, message interception, and cleanup.

* feat: interactive buttons for update prompts (Telegram + Discord)

Telegram: Inline keyboard with ✓ Yes / ✗ No buttons. Clicking a button
answers the callback query, edits the message to show the choice, and
writes .update_response directly. CallbackQueryHandler registered on
the update_prompt: prefix.

Discord: UpdatePromptView (discord.ui.View) with green Yes / red No
buttons. Follows the ExecApprovalView pattern — auth check, embed color
update, disabled-after-click. Writes .update_response on click.

All platforms: /approve and /deny (and /yes, /no) now work as shorthand
for yes/no when an update prompt is pending. The text fallback message
instructs users to use these commands. Raw message interception still
works as a fallback for non-command responses.

Gateway watcher checks adapter for send_update_prompt method (class-level
check to avoid MagicMock false positives) and falls back to text prompt
with /approve instructions when unavailable.

* fix: block /update on non-messaging platforms (API, webhooks, ACP)

Add _UPDATE_ALLOWED_PLATFORMS frozenset that explicitly lists messaging
platforms where /update is permitted. API server, webhook, and ACP
platforms get a clear error directing them to run hermes update from
the terminal instead.

ACP and API server already don't reach _handle_message (separate
codepaths), and webhooks have distinct session keys that can't collide
with messaging sessions. This guard is belt-and-suspenders.
											
										
										
											2026-04-05 00:28:58 -07:00
+								            await _flush_buffer()
 								            try:
 								                await adapter.send(chat_id, "❌ Hermes update timed out after 30 minutes.")
 								            except Exception:
 								                pass
 								            for p in (pending_path, claimed_path, output_path,
 								                      exit_code_path, prompt_path):
 								                p.unlink(missing_ok=True)
 								            (_hermes_home / ".update_response").unlink(missing_ok=True)
 								            self._update_prompt_pending.pop(session_key, None)
-												fix: notify gateway users when updates finish or fail

											
										
										
											2026-03-11 20:43:31 +01:00
 								    async def _send_update_notification(self) -> bool:
 								        """If an update finished, notify the user.
 								        Returns False when the update is still running so a caller can retry
 								        later. Returns True after a definitive send/skip decision.
-												feat(gateway): live-stream /update output + interactive prompt buttons (#5180)

* feat(gateway): live-stream /update output + forward interactive prompts

Adds real-time output streaming and interactive prompt forwarding for
the gateway /update command, so users on Telegram/Discord/etc see the
full update progress and can respond to prompts (stash restore, config
migration) without needing terminal access.

Changes:

hermes_cli/main.py:
- Add --gateway flag to 'hermes update' argparse
- Add _gateway_prompt() file-based IPC function that writes
  .update_prompt.json and polls for .update_response
- Modify _restore_stashed_changes() to accept optional input_fn
  parameter for gateway mode prompt forwarding
- cmd_update() uses _gateway_prompt when --gateway is set, enabling
  interactive stash restore and config migration prompts

gateway/run.py:
- _handle_update_command: spawn with --gateway flag and
  PYTHONUNBUFFERED=1 for real-time output flushing
- Store session_key in .update_pending.json for cross-restart
  session matching
- Add _update_prompt_pending dict to track sessions awaiting
  update prompt responses
- Replace _watch_for_update_completion with _watch_update_progress:
  streams output chunks every ~4s, detects .update_prompt.json and
  forwards prompts to the user, handles completion/failure/timeout
- Add update prompt interception in _handle_message: when a prompt
  is pending, the user's next message is written to .update_response
  instead of being processed normally
- Preserve _send_update_notification as legacy fallback for
  post-restart cases where adapter isn't available yet

File-based IPC protocol:
- .update_prompt.json: written by update process with prompt text,
  default value, and unique ID
- .update_response: written by gateway with user's answer
- .update_output.txt: existing, now streamed in real-time
- .update_exit_code: existing completion marker

Tests: 16 new tests covering _gateway_prompt IPC, output streaming,
prompt detection/forwarding, message interception, and cleanup.

* feat: interactive buttons for update prompts (Telegram + Discord)

Telegram: Inline keyboard with ✓ Yes / ✗ No buttons. Clicking a button
answers the callback query, edits the message to show the choice, and
writes .update_response directly. CallbackQueryHandler registered on
the update_prompt: prefix.

Discord: UpdatePromptView (discord.ui.View) with green Yes / red No
buttons. Follows the ExecApprovalView pattern — auth check, embed color
update, disabled-after-click. Writes .update_response on click.

All platforms: /approve and /deny (and /yes, /no) now work as shorthand
for yes/no when an update prompt is pending. The text fallback message
instructs users to use these commands. Raw message interception still
works as a fallback for non-command responses.

Gateway watcher checks adapter for send_update_prompt method (class-level
check to avoid MagicMock false positives) and falls back to text prompt
with /approve instructions when unavailable.

* fix: block /update on non-messaging platforms (API, webhooks, ACP)

Add _UPDATE_ALLOWED_PLATFORMS frozenset that explicitly lists messaging
platforms where /update is permitted. API server, webhook, and ACP
platforms get a clear error directing them to run hermes update from
the terminal instead.

ACP and API server already don't reach _handle_message (separate
codepaths), and webhooks have distinct session keys that can't collide
with messaging sessions. This guard is belt-and-suspenders.
											
										
										
											2026-04-05 00:28:58 -07:00
 								        This is the legacy notification path used when the streaming watcher
 								        cannot resolve the adapter (e.g. after a gateway restart where the
 								        platform hasn't reconnected yet).
-												fix: notify gateway users when updates finish or fail

											
										
										
											2026-03-11 20:43:31 +01:00
+								        """
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        import json
 								        import re as _re
 								        pending_path = _hermes_home / ".update_pending.json"
-												fix: notify gateway users when updates finish or fail

											
										
										
											2026-03-11 20:43:31 +01:00
+								        claimed_path = _hermes_home / ".update_pending.claimed.json"
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        output_path = _hermes_home / ".update_output.txt"
-												fix: notify gateway users when updates finish or fail

											
										
										
											2026-03-11 20:43:31 +01:00
+								        exit_code_path = _hermes_home / ".update_exit_code"
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
-												fix: notify gateway users when updates finish or fail

											
										
										
											2026-03-11 20:43:31 +01:00
+								        if not pending_path.exists() and not claimed_path.exists():
 								            return False
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
-												fix: notify gateway users when updates finish or fail

											
										
										
											2026-03-11 20:43:31 +01:00
+								        cleanup = True
 								        active_pending_path = claimed_path
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        try:
-												fix: notify gateway users when updates finish or fail

											
										
										
											2026-03-11 20:43:31 +01:00
+								            if pending_path.exists():
 								                try:
 								                    pending_path.replace(claimed_path)
 								                except FileNotFoundError:
 								                    if not claimed_path.exists():
 								                        return True
 								            elif not claimed_path.exists():
 								                return True
 								            pending = json.loads(claimed_path.read_text())
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            platform_str = pending.get("platform")
 								            chat_id = pending.get("chat_id")
-												fix: notify gateway users when updates finish or fail

											
										
										
											2026-03-11 20:43:31 +01:00
+								            if not exit_code_path.exists():
 								                logger.info("Update notification deferred: update still running")
 								                cleanup = False
 								                active_pending_path = pending_path
 								                claimed_path.replace(pending_path)
 								                return False
 								            exit_code_raw = exit_code_path.read_text().strip() or "1"
 								            exit_code = int(exit_code_raw)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            # Read the captured update output
 								            output = ""
 								            if output_path.exists():
 								                output = output_path.read_text()
 								            # Resolve adapter
 								            platform = Platform(platform_str)
 								            adapter = self.adapters.get(platform)
 								            if adapter and chat_id:
 								                # Strip ANSI escape codes for clean display
 								                output = _re.sub(r'\x1b\[[0-9;]*m', '', output).strip()
 								                if output:
 								                    if len(output) > 3500:
 								                        output = "…" + output[-3500:]
-												fix: notify gateway users when updates finish or fail

											
										
										
											2026-03-11 20:43:31 +01:00
+								                    if exit_code == 0:
 								                        msg = f"✅ Hermes update finished.\n\n```\n{output}\n```"
 								                    else:
 								                        msg = f"❌ Hermes update failed.\n\n```\n{output}\n```"
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                else:
-												fix: notify gateway users when updates finish or fail

											
										
										
											2026-03-11 20:43:31 +01:00
+								                    if exit_code == 0:
 								                        msg = "✅ Hermes update finished successfully."
 								                    else:
 								                        msg = "❌ Hermes update failed. Check the gateway logs or run `hermes update` manually for details."
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                await adapter.send(chat_id, msg)
-												fix: notify gateway users when updates finish or fail

											
										
										
											2026-03-11 20:43:31 +01:00
+								                logger.info(
 								                    "Sent post-update notification to %s:%s (exit=%s)",
 								                    platform_str,
 								                    chat_id,
 								                    exit_code,
 								                )
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        except Exception as e:
 								            logger.warning("Post-update notification failed: %s", e)
 								        finally:
-												fix: notify gateway users when updates finish or fail

											
										
										
											2026-03-11 20:43:31 +01:00
+								            if cleanup:
 								                active_pending_path.unlink(missing_ok=True)
 								                claimed_path.unlink(missing_ok=True)
 								                output_path.unlink(missing_ok=True)
 								                exit_code_path.unlink(missing_ok=True)
 								        return True
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
-												feat(gateway): notify /restart requester when gateway comes back online

When a user sends /restart, the gateway now persists their routing info
(platform, chat_id, thread_id) to .restart_notify.json. After the new
gateway process starts and adapters connect, it reads the file, sends a
'Gateway restarted successfully' message to that specific chat, and
cleans up the file.

This follows the same pattern as _send_update_notification (used by
/update). Thread IDs are preserved so the notification lands in the
correct Telegram topic or Discord thread.

Previously, after /restart the user had no feedback that the gateway was
back — they had to send a message to find out. Now they get a proactive
notification and know their session continues.

											
										
										
											2026-04-12 21:19:44 -07:00
+								    async def _send_restart_notification(self) -> None:
 								        """Notify the chat that initiated /restart that the gateway is back."""
 								        import json as _json
 								        notify_path = _hermes_home / ".restart_notify.json"
 								        if not notify_path.exists():
 								            return
 								        try:
 								            data = _json.loads(notify_path.read_text())
 								            platform_str = data.get("platform")
 								            chat_id = data.get("chat_id")
 								            thread_id = data.get("thread_id")
 								            if not platform_str or not chat_id:
 								                return
 								            platform = Platform(platform_str)
 								            adapter = self.adapters.get(platform)
 								            if not adapter:
 								                logger.debug(
 								                    "Restart notification skipped: %s adapter not connected",
 								                    platform_str,
 								                )
 								                return
 								            metadata = {"thread_id": thread_id} if thread_id else None
 								            await adapter.send(
 								                chat_id,
 								                "♻ Gateway restarted successfully. Your session continues.",
 								                metadata=metadata,
 								            )
 								            logger.info(
 								                "Sent restart notification to %s:%s",
 								                platform_str,
 								                chat_id,
 								            )
 								        except Exception as e:
 								            logger.warning("Restart notification failed: %s", e)
 								        finally:
 								            notify_path.unlink(missing_ok=True)
-												fix(gateway): replace os.environ session state with contextvars for concurrency safety

When two gateway messages arrived concurrently, _set_session_env wrote
HERMES_SESSION_PLATFORM/CHAT_ID/CHAT_NAME/THREAD_ID into the process-global
os.environ. Because asyncio tasks share the same process, Message B would
overwrite Message A's values mid-flight, causing background-task notifications
and tool calls to route to the wrong thread/chat.

Replace os.environ with Python's contextvars.ContextVar. Each asyncio task
(and any run_in_executor thread it spawns) gets its own copy, so concurrent
messages never interfere.

Changes:
- New gateway/session_context.py with ContextVar definitions, set/clear/get
  helpers, and os.environ fallback for CLI/cron/test backward compatibility
- gateway/run.py: _set_session_env returns reset tokens, _clear_session_env
  accepts them for proper cleanup in finally blocks
- All tool consumers updated: cronjob_tools, send_message_tool, skills_tool,
  terminal_tool (both notify_on_complete AND check_interval blocks), tts_tool,
  agent/skill_utils, agent/prompt_builder
- Tests updated for new contextvar-based API

Fixes #7358

Co-authored-by: teknium1 <127238744+teknium1@users.noreply.github.com>

											
										
										
											2026-04-10 16:50:56 -07:00
+								    def _set_session_env(self, context: SessionContext) -> list:
 								        """Set session context variables for the current async task.
 								        Uses ``contextvars`` instead of ``os.environ`` so that concurrent
 								        gateway messages cannot overwrite each other's session state.
 								        Returns a list of reset tokens; pass them to ``_clear_session_env``
 								        in a ``finally`` block.
 								        """
 								        from gateway.session_context import set_session_vars
 								        return set_session_vars(
 								            platform=context.source.platform.value,
 								            chat_id=context.source.chat_id,
 								            chat_name=context.source.chat_name or "",
 								            thread_id=str(context.source.thread_id) if context.source.thread_id else "",
-												fix(gateway): propagate user identity through process watcher pipeline

Background process watchers (notify_on_complete, check_interval) created
synthetic SessionSource objects without user_id/user_name. While the
internal=True bypass (1d8d4f28) prevented false pairing for agent-
generated notifications, the missing identity caused:

- Garbage entries in pairing rate limiters (discord:None, telegram:None)
- 'User None' in approval messages and logs
- No user identity available for future code paths that need it

Additionally, platform messages arriving without from_user (Telegram
service messages, channel forwards, anonymous admin actions) could still
trigger false pairing because they are not internal events.

Fix:
1. Propagate user_id/user_name through the full watcher chain:
   session_context.py → gateway/run.py → terminal_tool.py →
   process_registry.py (including checkpoint persistence/recovery)

2. Add None user_id guard in _handle_message() — silently drop
   non-internal messages with no user identity instead of triggering
   the pairing flow.

Salvaged from PRs #7664 (kagura-agent, ContextVar approach),
#6540 (MestreY0d4-Uninter, tests), and #7709 (guang384, None guard).

Closes #6341, #6485, #7643
Relates to #6516, #7392

											
										
										
											2026-04-11 12:09:01 -07:00
+								            user_id=str(context.source.user_id) if context.source.user_id else "",
 								            user_name=str(context.source.user_name) if context.source.user_name else "",
-												fix(gateway): add HERMES_SESSION_KEY to session_context contextvars

Complete the contextvars migration by adding HERMES_SESSION_KEY to the
unified _VAR_MAP in session_context.py. Without this, concurrent gateway
handlers race on os.environ["HERMES_SESSION_KEY"].

- Add _SESSION_KEY ContextVar to _VAR_MAP, set_session_vars(), clear_session_vars()
- Wire session_key through _set_session_env() from SessionContext
- Replace os.getenv fallback in tools/approval.py with get_session_env()
  (function-level import to avoid cross-layer coupling)
- Keep os.environ set as CLI/cron fallback

Cherry-picked from PR #7878 by 0xbyt4.

											
										
										
											2026-04-11 15:28:41 -07:00
+								            session_key=context.session_key,
-												fix(gateway): replace os.environ session state with contextvars for concurrency safety

When two gateway messages arrived concurrently, _set_session_env wrote
HERMES_SESSION_PLATFORM/CHAT_ID/CHAT_NAME/THREAD_ID into the process-global
os.environ. Because asyncio tasks share the same process, Message B would
overwrite Message A's values mid-flight, causing background-task notifications
and tool calls to route to the wrong thread/chat.

Replace os.environ with Python's contextvars.ContextVar. Each asyncio task
(and any run_in_executor thread it spawns) gets its own copy, so concurrent
messages never interfere.

Changes:
- New gateway/session_context.py with ContextVar definitions, set/clear/get
  helpers, and os.environ fallback for CLI/cron/test backward compatibility
- gateway/run.py: _set_session_env returns reset tokens, _clear_session_env
  accepts them for proper cleanup in finally blocks
- All tool consumers updated: cronjob_tools, send_message_tool, skills_tool,
  terminal_tool (both notify_on_complete AND check_interval blocks), tts_tool,
  agent/skill_utils, agent/prompt_builder
- Tests updated for new contextvar-based API

Fixes #7358

Co-authored-by: teknium1 <127238744+teknium1@users.noreply.github.com>

											
										
										
											2026-04-10 16:50:56 -07:00
+								        )
 								    def _clear_session_env(self, tokens: list) -> None:
 								        """Restore session context variables to their pre-handler values."""
 								        from gateway.session_context import clear_session_vars
 								        clear_session_vars(tokens)
-												fix(gateway): preserve notify context in executor threads

Gateway executor work now inherits the active session contextvars via
copy_context() so background process watchers retain the correct
platform/chat/user/session metadata for routing completion events back
to the originating chat.

Cherry-picked from #10647 by @helix4u with:
- Use asyncio.get_running_loop() instead of deprecated get_event_loop()
- Strip trailing whitespace
- Add *args forwarding test
- Add exception propagation test

											
										
										
											2026-04-16 14:27:54 +05:30
 								    async def _run_in_executor_with_context(self, func, *args):
 								        """Run blocking work in the thread pool while preserving session contextvars."""
 								        loop = asyncio.get_running_loop()
 								        ctx = copy_context()
 								        return await loop.run_in_executor(None, ctx.run, func, *args)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								    async def _enrich_message_with_vision(
 								        self,
 								        user_text: str,
 								        image_paths: List[str],
 								    ) -> str:
 								        """
 								        Auto-analyze user-attached images with the vision tool and prepend
 								        the descriptions to the message text.
 								        Each image is analyzed with a general-purpose prompt.  The resulting
 								        description *and* the local cache path are injected so the model can:
 . Immediately understand what the user sent (no extra tool call).
 . Re-examine the image with vision_analyze if it needs more detail.
 								        Args:
-												fix(gateway): remove app-specific Athabasca references from vision enrichment (#1529)

Salvaged from PR #1428 by jplew.

Removes Athabasca-specific persistence guidance accidentally merged
in PR #1422:
- Drop Athabasca docstring and injected note from _enrich_message_with_vision
- Delete tests/gateway/test_image_enrichment.py (asserted app-specific behavior)

Co-authored-by: jplew <jplew@users.noreply.github.com>
											
										
										
											2026-03-16 05:02:58 -07:00
+								            user_text:   The user's original caption / message text.
 								            image_paths: List of local file paths to cached images.
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
 								        Returns:
 								            The enriched message string with vision descriptions prepended.
 								        """
 								        from tools.vision_tools import vision_analyze_tool
 								        import json as _json
 								        analysis_prompt = (
 								            "Describe everything visible in this image in thorough detail. "
 								            "Include any text, code, data, objects, people, layout, colors, "
 								            "and any other notable visual information."
 								        )
 								        enriched_parts = []
 								        for path in image_paths:
 								            try:
 								                logger.debug("Auto-analyzing user image: %s", path)
 								                result_json = await vision_analyze_tool(
 								                    image_url=path,
 								                    user_prompt=analysis_prompt,
 								                )
 								                result = _json.loads(result_json)
 								                if result.get("success"):
 								                    description = result.get("analysis", "")
 								                    enriched_parts.append(
 								                        f"[The user sent an image~ Here's what I can see:\n{description}]\n"
 								                        f"[If you need a closer look, use vision_analyze with "
 								                        f"image_url: {path} ~]"
 								                    )
 								                else:
 								                    enriched_parts.append(
 								                        "[The user sent an image but I couldn't quite see it "
 								                        "this time (>_<) You can try looking at it yourself "
 								                        f"with vision_analyze using image_url: {path}]"
 								                    )
 								            except Exception as e:
 								                logger.error("Vision auto-analysis error: %s", e)
 								                enriched_parts.append(
 								                    f"[The user sent an image but something went wrong when I "
 								                    f"tried to look at it~ You can try examining it yourself "
 								                    f"with vision_analyze using image_url: {path}]"
 								                )
 								        # Combine: vision descriptions first, then the user's original text
 								        if enriched_parts:
 								            prefix = "\n\n".join(enriched_parts)
 								            if user_text:
 								                return f"{prefix}\n\n{user_text}"
 								            return prefix
 								        return user_text
 								    async def _enrich_message_with_transcription(
 								        self,
 								        user_text: str,
 								        audio_paths: List[str],
 								    ) -> str:
 								        """
-												fix(gateway): honor stt.enabled false for voice transcription

- bridge stt.enabled from config.yaml into gateway runtime config
- preserve the flag in GatewayConfig serialization
- skip gateway voice transcription when STT is disabled
- add regression tests for config loading and disabled transcription flow

											
										
										
											2026-03-14 22:09:53 -07:00
+								        Auto-transcribe user voice/audio messages using the configured STT provider
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        and prepend the transcript to the message text.
 								        Args:
 								            user_text:   The user's original caption / message text.
 								            audio_paths: List of local file paths to cached audio files.
 								        Returns:
 								            The enriched message string with transcriptions prepended.
 								        """
-												fix(gateway): honor stt.enabled false for voice transcription

- bridge stt.enabled from config.yaml into gateway runtime config
- preserve the flag in GatewayConfig serialization
- skip gateway voice transcription when STT is disabled
- add regression tests for config loading and disabled transcription flow

											
										
										
											2026-03-14 22:09:53 -07:00
+								        if not getattr(self.config, "stt_enabled", True):
-												fix: check skill availability before hinting at hermes-agent-setup

Only mention the hermes-agent-setup skill in STT failure notes (both
the direct user message and the agent context note) when the skill is
actually installed. Uses _find_skill() from skill_manager_tool.

Also confirmed: STT is the only user-facing failure case where the
setup skill hint helps. Vision failures are transient API issues,
runtime transcription errors indicate a configured-but-broken provider,
and platform startup warnings are server logs.

											
										
										
											2026-03-18 03:17:23 -07:00
+								            disabled_note = "[The user sent voice message(s), but transcription is disabled in config."
 								            if self._has_setup_skill():
 								                disabled_note += (
 								                    " You have a skill called hermes-agent-setup that can help "
 								                    "users configure Hermes features including voice, tools, and more."
 								                )
 								            disabled_note += "]"
-												fix(gateway): honor stt.enabled false for voice transcription

- bridge stt.enabled from config.yaml into gateway runtime config
- preserve the flag in GatewayConfig serialization
- skip gateway voice transcription when STT is disabled
- add regression tests for config loading and disabled transcription flow

											
										
										
											2026-03-14 22:09:53 -07:00
+								            if user_text:
 								                return f"{disabled_note}\n\n{user_text}"
 								            return disabled_note
-												fix: STT provider-model mismatch — whisper-1 fed to faster-whisper (#7113)

Legacy flat stt.model config key (from cli-config.yaml.example and older
versions) was passed as a model override to transcribe_audio() by the
gateway, bypassing provider-specific model resolution. When the provider
was 'local' (faster-whisper), this caused:
  ValueError: Invalid model size 'whisper-1'

Changes:
- gateway/run.py, discord.py: stop passing model override — let
  transcribe_audio() handle provider-specific model resolution internally
- get_stt_model_from_config(): now provider-aware, reads from the correct
  nested section (stt.local.model, stt.openai.model, etc.); ignores
  legacy flat key for local provider to prevent model name mismatch
- cli-config.yaml.example: updated STT section to show nested provider
  config structure instead of legacy flat key
- config migration v13→v14: moves legacy stt.model to the correct
  provider section and removes the flat key

Reported by community user on Discord.
											
										
										
											2026-04-10 03:27:30 -07:00
+								        from tools.transcription_tools import transcribe_audio
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        import asyncio
 								        enriched_parts = []
 								        for path in audio_paths:
 								            try:
 								                logger.debug("Transcribing user voice: %s", path)
-												fix: STT provider-model mismatch — whisper-1 fed to faster-whisper (#7113)

Legacy flat stt.model config key (from cli-config.yaml.example and older
versions) was passed as a model override to transcribe_audio() by the
gateway, bypassing provider-specific model resolution. When the provider
was 'local' (faster-whisper), this caused:
  ValueError: Invalid model size 'whisper-1'

Changes:
- gateway/run.py, discord.py: stop passing model override — let
  transcribe_audio() handle provider-specific model resolution internally
- get_stt_model_from_config(): now provider-aware, reads from the correct
  nested section (stt.local.model, stt.openai.model, etc.); ignores
  legacy flat key for local provider to prevent model name mismatch
- cli-config.yaml.example: updated STT section to show nested provider
  config structure instead of legacy flat key
- config migration v13→v14: moves legacy stt.model to the correct
  provider section and removes the flat key

Reported by community user on Discord.
											
										
										
											2026-04-10 03:27:30 -07:00
+								                result = await asyncio.to_thread(transcribe_audio, path)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                if result["success"]:
 								                    transcript = result["transcript"]
 								                    enriched_parts.append(
 								                        f'[The user sent a voice message~ '
 								                        f'Here\'s what they said: "{transcript}"]'
 								                    )
 								                else:
 								                    error = result.get("error", "unknown error")
-												fix: restore local STT fallback for gateway voice notes

Restore local STT command fallback for voice transcription, detect whisper and ffmpeg in common local install paths, and avoid bogus no-provider messaging when only a backend-specific key is missing.

											
										
										
											2026-03-15 21:51:40 -07:00
+								                    if (
 								                        "No STT provider" in error
 								                        or error.startswith("Neither VOICE_TOOLS_OPENAI_KEY nor OPENAI_API_KEY is set")
 								                    ):
-												fix: check skill availability before hinting at hermes-agent-setup

Only mention the hermes-agent-setup skill in STT failure notes (both
the direct user message and the agent context note) when the skill is
actually installed. Uses _find_skill() from skill_manager_tool.

Also confirmed: STT is the only user-facing failure case where the
setup skill hint helps. Vision failures are transient API issues,
runtime transcription errors indicate a configured-but-broken provider,
and platform startup warnings are server logs.

											
										
										
											2026-03-18 03:17:23 -07:00
+								                        _no_stt_note = (
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                            "[The user sent a voice message but I can't listen "
-												feat: expand hermes-agent-setup skill + tell agent about it in STT notes

Skill now covers full CLI usage (hermes setup, hermes skills, hermes
tools, hermes config, session management, etc.), config file reference,
and expanded gateway commands.

Agent context notes for STT failure now mention the hermes-agent-setup
skill is available to help users configure Hermes features.

											
										
										
											2026-03-18 03:05:17 -07:00
+								                            "to it right now — no STT provider is configured. "
 								                            "A direct message has already been sent to the user "
-												fix: check skill availability before hinting at hermes-agent-setup

Only mention the hermes-agent-setup skill in STT failure notes (both
the direct user message and the agent context note) when the skill is
actually installed. Uses _find_skill() from skill_manager_tool.

Also confirmed: STT is the only user-facing failure case where the
setup skill hint helps. Vision failures are transient API issues,
runtime transcription errors indicate a configured-but-broken provider,
and platform startup warnings are server logs.

											
										
										
											2026-03-18 03:17:23 -07:00
+								                            "with setup instructions."
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                        )
-												fix: check skill availability before hinting at hermes-agent-setup

Only mention the hermes-agent-setup skill in STT failure notes (both
the direct user message and the agent context note) when the skill is
actually installed. Uses _find_skill() from skill_manager_tool.

Also confirmed: STT is the only user-facing failure case where the
setup skill hint helps. Vision failures are transient API issues,
runtime transcription errors indicate a configured-but-broken provider,
and platform startup warnings are server logs.

											
										
										
											2026-03-18 03:17:23 -07:00
+								                        if self._has_setup_skill():
 								                            _no_stt_note += (
 								                                " You have a skill called hermes-agent-setup "
 								                                "that can help users configure Hermes features "
 								                                "including voice, tools, and more."
 								                            )
 								                        _no_stt_note += "]"
 								                        enriched_parts.append(_no_stt_note)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                    else:
 								                        enriched_parts.append(
 								                            "[The user sent a voice message but I had trouble "
 								                            f"transcribing it~ ({error})]"
 								                        )
 								            except Exception as e:
 								                logger.error("Transcription error: %s", e)
 								                enriched_parts.append(
 								                    "[The user sent a voice message but something went wrong "
 								                    "when I tried to listen to it~ Let them know!]"
 								                )
 								        if enriched_parts:
 								            prefix = "\n\n".join(enriched_parts)
-												fix(gateway): discard empty placeholder when voice transcription succeeds

When a Discord voice message arrives, the adapter sets event.text to
"(The user sent a message with no text content)" since voice messages
have no text content. The transcription enrichment in
_enrich_message_with_transcription() then prepends the transcript but
leaves the placeholder intact, causing the agent to receive both:

    [The user sent a voice message~ Here's what they said: "..."]

    (The user sent a message with no text content)

The agent sees this as two separate user turns — one transcribed
and one empty — creating confusing duplicate messages.

Fix: when the transcription succeeds and user_text is only the empty
placeholder, return just the transcript without the redundant placeholder.

											
										
										
											2026-04-07 13:46:59 +02:00
+								            # Strip the empty-content placeholder from the Discord adapter
 								            # when we successfully transcribed the audio — it's redundant.
 								            _placeholder = "(The user sent a message with no text content)"
 								            if user_text and user_text.strip() == _placeholder:
 								                return prefix
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            if user_text:
 								                return f"{prefix}\n\n{user_text}"
 								            return prefix
 								        return user_text
-												fix(gateway): route synthetic background events by session

											
										
										
											2026-04-15 16:38:57 +01:00
+								    def _build_process_event_source(self, evt: dict):
 								        """Resolve the canonical source for a synthetic background-process event.
 								        Prefer the persisted session-store origin for the event's session key.
 								        Falling back to the currently active foreground event is what causes
 								        cross-topic bleed, so don't do that.
 								        """
 								        from gateway.session import SessionSource
 								        session_key = str(evt.get("session_key") or "").strip()
 								        derived_platform = ""
 								        derived_chat_type = ""
 								        derived_chat_id = ""
 								        if session_key:
 								            try:
 								                self.session_store._ensure_loaded()
 								                entry = self.session_store._entries.get(session_key)
 								                if entry and getattr(entry, "origin", None):
 								                    return entry.origin
 								            except Exception as exc:
 								                logger.debug(
 								                    "Synthetic process-event session-store lookup failed for %s: %s",
 								                    session_key,
 								                    exc,
 								                )
-												fix: follow-up improvements for watch notification routing (#9537)

- Populate watcher_* routing fields for watch-only processes (not just
  notify_on_complete), so watch-pattern events carry direct metadata
  instead of relying solely on session_key parsing fallback
- Extract _parse_session_key() helper to dedupe session key parsing
  at two call sites in gateway/run.py
- Add negative test proving cross-thread leakage doesn't happen
- Add edge-case tests for _build_process_event_source returning None
  (empty evt, invalid platform, short session_key)
- Add unit tests for _parse_session_key helper

											
										
										
											2026-04-15 22:52:30 +05:30
+								            _parsed = _parse_session_key(session_key)
 								            if _parsed:
 								                derived_platform = _parsed["platform"]
 								                derived_chat_type = _parsed["chat_type"]
 								                derived_chat_id = _parsed["chat_id"]
-												fix(gateway): route synthetic background events by session

											
										
										
											2026-04-15 16:38:57 +01:00
 								        platform_name = str(evt.get("platform") or derived_platform or "").strip().lower()
 								        chat_type = str(evt.get("chat_type") or derived_chat_type or "").strip().lower()
 								        chat_id = str(evt.get("chat_id") or derived_chat_id or "").strip()
 								        if not platform_name or not chat_type or not chat_id:
 								            return None
 								        try:
 								            platform = Platform(platform_name)
 								        except Exception:
 								            logger.warning(
 								                "Synthetic process event has invalid platform metadata: %r",
 								                platform_name,
 								            )
 								            return None
 								        return SessionSource(
 								            platform=platform,
 								            chat_id=chat_id,
 								            chat_type=chat_type,
 								            thread_id=str(evt.get("thread_id") or "").strip() or None,
 								            user_id=str(evt.get("user_id") or "").strip() or None,
 								            user_name=str(evt.get("user_name") or "").strip() or None,
 								        )
 								    async def _inject_watch_notification(self, synth_text: str, evt: dict) -> None:
-												feat: background process monitoring — watch_patterns for real-time output alerts

* feat: add watch_patterns to background processes for output monitoring

Adds a new 'watch_patterns' parameter to terminal(background=true) that
lets the agent specify strings to watch for in process output. When a
matching line appears, a notification is queued and injected as a
synthetic message — triggering a new agent turn, similar to
notify_on_complete but mid-process.

Implementation:
- ProcessSession gets watch_patterns field + rate-limit state
- _check_watch_patterns() in ProcessRegistry scans new output chunks
  from all three reader threads (local, PTY, env-poller)
- Rate limited: max 8 notifications per 10s window
- Sustained overload (45s) permanently disables watching for that process
- watch_queue alongside completion_queue, same consumption pattern
- CLI drains watch_queue in both idle loop and post-turn drain
- Gateway drains after agent runs via _inject_watch_notification()
- Checkpoint persistence + crash recovery includes watch_patterns
- Blocked in execute_code sandbox (like other bg params)
- 20 new tests covering matching, rate limiting, overload kill,
  checkpoint persistence, schema, and handler passthrough

Usage:
  terminal(
      command='npm run dev',
      background=true,
      watch_patterns=['ERROR', 'WARN', 'listening on port']
  )

* refactor: merge watch_queue into completion_queue

Unified queue with 'type' field distinguishing 'completion',
'watch_match', and 'watch_disabled' events. Extracted
_format_process_notification() in CLI and gateway to handle
all event types in a single drain loop. Removes duplication
across both CLI drain sites and the gateway.
											
										
										
											2026-04-11 03:13:23 -07:00
+								        """Inject a watch-pattern notification as a synthetic message event.
-												fix(gateway): route synthetic background events by session

											
										
										
											2026-04-15 16:38:57 +01:00
+								        Routing must come from the queued watch event itself, not from whatever
 								        foreground message happened to be active when the queue was drained.
-												feat: background process monitoring — watch_patterns for real-time output alerts

* feat: add watch_patterns to background processes for output monitoring

Adds a new 'watch_patterns' parameter to terminal(background=true) that
lets the agent specify strings to watch for in process output. When a
matching line appears, a notification is queued and injected as a
synthetic message — triggering a new agent turn, similar to
notify_on_complete but mid-process.

Implementation:
- ProcessSession gets watch_patterns field + rate-limit state
- _check_watch_patterns() in ProcessRegistry scans new output chunks
  from all three reader threads (local, PTY, env-poller)
- Rate limited: max 8 notifications per 10s window
- Sustained overload (45s) permanently disables watching for that process
- watch_queue alongside completion_queue, same consumption pattern
- CLI drains watch_queue in both idle loop and post-turn drain
- Gateway drains after agent runs via _inject_watch_notification()
- Checkpoint persistence + crash recovery includes watch_patterns
- Blocked in execute_code sandbox (like other bg params)
- 20 new tests covering matching, rate limiting, overload kill,
  checkpoint persistence, schema, and handler passthrough

Usage:
  terminal(
      command='npm run dev',
      background=true,
      watch_patterns=['ERROR', 'WARN', 'listening on port']
  )

* refactor: merge watch_queue into completion_queue

Unified queue with 'type' field distinguishing 'completion',
'watch_match', and 'watch_disabled' events. Extracted
_format_process_notification() in CLI and gateway to handle
all event types in a single drain loop. Removes duplication
across both CLI drain sites and the gateway.
											
										
										
											2026-04-11 03:13:23 -07:00
+								        """
-												fix(gateway): route synthetic background events by session

											
										
										
											2026-04-15 16:38:57 +01:00
+								        source = self._build_process_event_source(evt)
-												feat: background process monitoring — watch_patterns for real-time output alerts

* feat: add watch_patterns to background processes for output monitoring

Adds a new 'watch_patterns' parameter to terminal(background=true) that
lets the agent specify strings to watch for in process output. When a
matching line appears, a notification is queued and injected as a
synthetic message — triggering a new agent turn, similar to
notify_on_complete but mid-process.

Implementation:
- ProcessSession gets watch_patterns field + rate-limit state
- _check_watch_patterns() in ProcessRegistry scans new output chunks
  from all three reader threads (local, PTY, env-poller)
- Rate limited: max 8 notifications per 10s window
- Sustained overload (45s) permanently disables watching for that process
- watch_queue alongside completion_queue, same consumption pattern
- CLI drains watch_queue in both idle loop and post-turn drain
- Gateway drains after agent runs via _inject_watch_notification()
- Checkpoint persistence + crash recovery includes watch_patterns
- Blocked in execute_code sandbox (like other bg params)
- 20 new tests covering matching, rate limiting, overload kill,
  checkpoint persistence, schema, and handler passthrough

Usage:
  terminal(
      command='npm run dev',
      background=true,
      watch_patterns=['ERROR', 'WARN', 'listening on port']
  )

* refactor: merge watch_queue into completion_queue

Unified queue with 'type' field distinguishing 'completion',
'watch_match', and 'watch_disabled' events. Extracted
_format_process_notification() in CLI and gateway to handle
all event types in a single drain loop. Removes duplication
across both CLI drain sites and the gateway.
											
										
										
											2026-04-11 03:13:23 -07:00
+								        if not source:
-												fix(gateway): route synthetic background events by session

											
										
										
											2026-04-15 16:38:57 +01:00
+								            logger.warning(
 								                "Dropping watch notification with no routing metadata for process %s",
 								                evt.get("session_id", "unknown"),
 								            )
-												feat: background process monitoring — watch_patterns for real-time output alerts

* feat: add watch_patterns to background processes for output monitoring

Adds a new 'watch_patterns' parameter to terminal(background=true) that
lets the agent specify strings to watch for in process output. When a
matching line appears, a notification is queued and injected as a
synthetic message — triggering a new agent turn, similar to
notify_on_complete but mid-process.

Implementation:
- ProcessSession gets watch_patterns field + rate-limit state
- _check_watch_patterns() in ProcessRegistry scans new output chunks
  from all three reader threads (local, PTY, env-poller)
- Rate limited: max 8 notifications per 10s window
- Sustained overload (45s) permanently disables watching for that process
- watch_queue alongside completion_queue, same consumption pattern
- CLI drains watch_queue in both idle loop and post-turn drain
- Gateway drains after agent runs via _inject_watch_notification()
- Checkpoint persistence + crash recovery includes watch_patterns
- Blocked in execute_code sandbox (like other bg params)
- 20 new tests covering matching, rate limiting, overload kill,
  checkpoint persistence, schema, and handler passthrough

Usage:
  terminal(
      command='npm run dev',
      background=true,
      watch_patterns=['ERROR', 'WARN', 'listening on port']
  )

* refactor: merge watch_queue into completion_queue

Unified queue with 'type' field distinguishing 'completion',
'watch_match', and 'watch_disabled' events. Extracted
_format_process_notification() in CLI and gateway to handle
all event types in a single drain loop. Removes duplication
across both CLI drain sites and the gateway.
											
										
										
											2026-04-11 03:13:23 -07:00
+								            return
 								        platform_name = source.platform.value if hasattr(source.platform, "value") else str(source.platform)
 								        adapter = None
 								        for p, a in self.adapters.items():
 								            if p.value == platform_name:
 								                adapter = a
 								                break
 								        if not adapter:
 								            return
 								        try:
 								            from gateway.platforms.base import MessageEvent, MessageType
 								            synth_event = MessageEvent(
 								                text=synth_text,
 								                message_type=MessageType.TEXT,
 								                source=source,
 								                internal=True,
 								            )
-												fix(gateway): route synthetic background events by session

											
										
										
											2026-04-15 16:38:57 +01:00
+								            logger.info(
 								                "Watch pattern notification — injecting for %s chat=%s thread=%s",
 								                platform_name,
 								                source.chat_id,
 								                source.thread_id,
 								            )
-												feat: background process monitoring — watch_patterns for real-time output alerts

* feat: add watch_patterns to background processes for output monitoring

Adds a new 'watch_patterns' parameter to terminal(background=true) that
lets the agent specify strings to watch for in process output. When a
matching line appears, a notification is queued and injected as a
synthetic message — triggering a new agent turn, similar to
notify_on_complete but mid-process.

Implementation:
- ProcessSession gets watch_patterns field + rate-limit state
- _check_watch_patterns() in ProcessRegistry scans new output chunks
  from all three reader threads (local, PTY, env-poller)
- Rate limited: max 8 notifications per 10s window
- Sustained overload (45s) permanently disables watching for that process
- watch_queue alongside completion_queue, same consumption pattern
- CLI drains watch_queue in both idle loop and post-turn drain
- Gateway drains after agent runs via _inject_watch_notification()
- Checkpoint persistence + crash recovery includes watch_patterns
- Blocked in execute_code sandbox (like other bg params)
- 20 new tests covering matching, rate limiting, overload kill,
  checkpoint persistence, schema, and handler passthrough

Usage:
  terminal(
      command='npm run dev',
      background=true,
      watch_patterns=['ERROR', 'WARN', 'listening on port']
  )

* refactor: merge watch_queue into completion_queue

Unified queue with 'type' field distinguishing 'completion',
'watch_match', and 'watch_disabled' events. Extracted
_format_process_notification() in CLI and gateway to handle
all event types in a single drain loop. Removes duplication
across both CLI drain sites and the gateway.
											
										
										
											2026-04-11 03:13:23 -07:00
+								            await adapter.handle_message(synth_event)
 								        except Exception as e:
 								            logger.error("Watch notification injection error: %s", e)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								    async def _run_process_watcher(self, watcher: dict) -> None:
 								        """
 								        Periodically check a background process and push updates to the user.
 								        Runs as an asyncio task. Stays silent when nothing changed.
 								        Auto-removes when the process exits or is killed.
 								        Notification mode (from ``display.background_process_notifications``):
 								          - ``all``    — running-output updates + final message
 								          - ``result`` — final completion message only
 								          - ``error``  — final message only when exit code != 0
 								          - ``off``    — no messages at all
 								        """
 								        from tools.process_registry import process_registry
 								        session_id = watcher["session_id"]
 								        interval = watcher["check_interval"]
 								        session_key = watcher.get("session_key", "")
 								        platform_name = watcher.get("platform", "")
 								        chat_id = watcher.get("chat_id", "")
-												fix: route background process watcher notifications to Telegram forum topics (#1481)

Salvaged from PR #1146 by spanishflu-est1918.

Background process progress/completion messages were sent with only
chat_id, landing in the general topic instead of the originating forum
topic. Thread the thread_id from HERMES_SESSION_THREAD_ID through the
watcher payload and pass it as metadata to adapter.send() so Telegram
routes notifications to the correct topic.

The env var export (HERMES_SESSION_THREAD_ID in _set_session_env /
_clear_session_env) already existed on main — this commit adds the
missing watcher plumbing.

Co-authored-by: spanishflu-est1918 <spanishflu-est1918@users.noreply.github.com>
											
										
										
											2026-03-15 23:01:57 -07:00
+								        thread_id = watcher.get("thread_id", "")
-												fix(gateway): propagate user identity through process watcher pipeline

Background process watchers (notify_on_complete, check_interval) created
synthetic SessionSource objects without user_id/user_name. While the
internal=True bypass (1d8d4f28) prevented false pairing for agent-
generated notifications, the missing identity caused:

- Garbage entries in pairing rate limiters (discord:None, telegram:None)
- 'User None' in approval messages and logs
- No user identity available for future code paths that need it

Additionally, platform messages arriving without from_user (Telegram
service messages, channel forwards, anonymous admin actions) could still
trigger false pairing because they are not internal events.

Fix:
1. Propagate user_id/user_name through the full watcher chain:
   session_context.py → gateway/run.py → terminal_tool.py →
   process_registry.py (including checkpoint persistence/recovery)

2. Add None user_id guard in _handle_message() — silently drop
   non-internal messages with no user identity instead of triggering
   the pairing flow.

Salvaged from PRs #7664 (kagura-agent, ContextVar approach),
#6540 (MestreY0d4-Uninter, tests), and #7709 (guang384, None guard).

Closes #6341, #6485, #7643
Relates to #6516, #7392

											
										
										
											2026-04-11 12:09:01 -07:00
+								        user_id = watcher.get("user_id", "")
 								        user_name = watcher.get("user_name", "")
-												feat: notify_on_complete for background processes (#5779)

* feat: notify_on_complete for background processes

When terminal(background=true, notify_on_complete=true), the system
auto-triggers a new agent turn when the process exits — no polling needed.

Changes:
- ProcessSession: add notify_on_complete field
- ProcessRegistry: add completion_queue, populate on _move_to_finished()
- Terminal tool: add notify_on_complete parameter to schema + handler
- CLI: drain completion_queue after agent turn AND during idle loop
- Gateway: enhanced _run_process_watcher injects synthetic MessageEvent
  on completion, triggering a full agent turn
- Checkpoint persistence includes notify_on_complete for crash recovery
- code_execution_tool: block notify_on_complete in sandbox scripts
- 15 new tests covering queue mechanics, checkpoint round-trip, schema

* docs: update terminal tool descriptions for notify_on_complete

- background: remove 'ONLY for servers' language, describe both patterns
  (long-lived processes AND long-running tasks with notify_on_complete)
- notify_on_complete: more prescriptive about when to use it
- TERMINAL_TOOL_DESCRIPTION: remove 'Do NOT use background for builds'
  guidance that contradicted the new feature
											
										
										
											2026-04-07 02:40:16 -07:00
+								        agent_notify = watcher.get("notify_on_complete", False)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        notify_mode = self._load_background_notifications_mode()
-												feat: notify_on_complete for background processes (#5779)

* feat: notify_on_complete for background processes

When terminal(background=true, notify_on_complete=true), the system
auto-triggers a new agent turn when the process exits — no polling needed.

Changes:
- ProcessSession: add notify_on_complete field
- ProcessRegistry: add completion_queue, populate on _move_to_finished()
- Terminal tool: add notify_on_complete parameter to schema + handler
- CLI: drain completion_queue after agent turn AND during idle loop
- Gateway: enhanced _run_process_watcher injects synthetic MessageEvent
  on completion, triggering a full agent turn
- Checkpoint persistence includes notify_on_complete for crash recovery
- code_execution_tool: block notify_on_complete in sandbox scripts
- 15 new tests covering queue mechanics, checkpoint round-trip, schema

* docs: update terminal tool descriptions for notify_on_complete

- background: remove 'ONLY for servers' language, describe both patterns
  (long-lived processes AND long-running tasks with notify_on_complete)
- notify_on_complete: more prescriptive about when to use it
- TERMINAL_TOOL_DESCRIPTION: remove 'Do NOT use background for builds'
  guidance that contradicted the new feature
											
										
										
											2026-04-07 02:40:16 -07:00
+								        logger.debug("Process watcher started: %s (every %ss, notify=%s, agent_notify=%s)",
 								                      session_id, interval, notify_mode, agent_notify)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
-												feat: notify_on_complete for background processes (#5779)

* feat: notify_on_complete for background processes

When terminal(background=true, notify_on_complete=true), the system
auto-triggers a new agent turn when the process exits — no polling needed.

Changes:
- ProcessSession: add notify_on_complete field
- ProcessRegistry: add completion_queue, populate on _move_to_finished()
- Terminal tool: add notify_on_complete parameter to schema + handler
- CLI: drain completion_queue after agent turn AND during idle loop
- Gateway: enhanced _run_process_watcher injects synthetic MessageEvent
  on completion, triggering a full agent turn
- Checkpoint persistence includes notify_on_complete for crash recovery
- code_execution_tool: block notify_on_complete in sandbox scripts
- 15 new tests covering queue mechanics, checkpoint round-trip, schema

* docs: update terminal tool descriptions for notify_on_complete

- background: remove 'ONLY for servers' language, describe both patterns
  (long-lived processes AND long-running tasks with notify_on_complete)
- notify_on_complete: more prescriptive about when to use it
- TERMINAL_TOOL_DESCRIPTION: remove 'Do NOT use background for builds'
  guidance that contradicted the new feature
											
										
										
											2026-04-07 02:40:16 -07:00
+								        if notify_mode == "off" and not agent_notify:
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            # Still wait for the process to exit so we can log it, but don't
 								            # push any messages to the user.
 								            while True:
 								                await asyncio.sleep(interval)
 								                session = process_registry.get(session_id)
 								                if session is None or session.exited:
 								                    break
 								            logger.debug("Process watcher ended (silent): %s", session_id)
 								            return
 								        last_output_len = 0
 								        while True:
 								            await asyncio.sleep(interval)
 								            session = process_registry.get(session_id)
 								            if session is None:
 								                break
 								            current_output_len = len(session.output_buffer)
 								            has_new_output = current_output_len > last_output_len
 								            last_output_len = current_output_len
 								            if session.exited:
-												feat: notify_on_complete for background processes (#5779)

* feat: notify_on_complete for background processes

When terminal(background=true, notify_on_complete=true), the system
auto-triggers a new agent turn when the process exits — no polling needed.

Changes:
- ProcessSession: add notify_on_complete field
- ProcessRegistry: add completion_queue, populate on _move_to_finished()
- Terminal tool: add notify_on_complete parameter to schema + handler
- CLI: drain completion_queue after agent turn AND during idle loop
- Gateway: enhanced _run_process_watcher injects synthetic MessageEvent
  on completion, triggering a full agent turn
- Checkpoint persistence includes notify_on_complete for crash recovery
- code_execution_tool: block notify_on_complete in sandbox scripts
- 15 new tests covering queue mechanics, checkpoint round-trip, schema

* docs: update terminal tool descriptions for notify_on_complete

- background: remove 'ONLY for servers' language, describe both patterns
  (long-lived processes AND long-running tasks with notify_on_complete)
- notify_on_complete: more prescriptive about when to use it
- TERMINAL_TOOL_DESCRIPTION: remove 'Do NOT use background for builds'
  guidance that contradicted the new feature
											
										
										
											2026-04-07 02:40:16 -07:00
+								                # --- Agent-triggered completion: inject synthetic message ---
-												fix: suppress duplicate completion notifications when agent already consumed output via wait/poll/log (#8228)

When the agent calls process(action='wait') or process(action='poll')
and gets the exited status, the completion_queue notification is
redundant — the agent already has the output from the tool return.
Previously, the drain loops in CLI and gateway would still inject
the [SYSTEM: Background process completed] message, causing the
agent to receive the same information twice.

Fix: track session IDs in _completion_consumed set when wait/poll/log
returns an exited process. Drain loops in cli.py and gateway watcher
skip completion events for consumed sessions. Watch pattern events
are never suppressed (they have independent semantics).

Adds 4 tests covering wait/poll/log marking and running-process
negative case.
											
										
										
											2026-04-12 00:36:22 -07:00
+								                # Skip if the agent already consumed the result via wait/poll/log
 								                from tools.process_registry import process_registry as _pr_check
 								                if agent_notify and not _pr_check.is_completion_consumed(session_id):
-												feat: notify_on_complete for background processes (#5779)

* feat: notify_on_complete for background processes

When terminal(background=true, notify_on_complete=true), the system
auto-triggers a new agent turn when the process exits — no polling needed.

Changes:
- ProcessSession: add notify_on_complete field
- ProcessRegistry: add completion_queue, populate on _move_to_finished()
- Terminal tool: add notify_on_complete parameter to schema + handler
- CLI: drain completion_queue after agent turn AND during idle loop
- Gateway: enhanced _run_process_watcher injects synthetic MessageEvent
  on completion, triggering a full agent turn
- Checkpoint persistence includes notify_on_complete for crash recovery
- code_execution_tool: block notify_on_complete in sandbox scripts
- 15 new tests covering queue mechanics, checkpoint round-trip, schema

* docs: update terminal tool descriptions for notify_on_complete

- background: remove 'ONLY for servers' language, describe both patterns
  (long-lived processes AND long-running tasks with notify_on_complete)
- notify_on_complete: more prescriptive about when to use it
- TERMINAL_TOOL_DESCRIPTION: remove 'Do NOT use background for builds'
  guidance that contradicted the new feature
											
										
										
											2026-04-07 02:40:16 -07:00
+								                    from tools.ansi_strip import strip_ansi
 								                    _out = strip_ansi(session.output_buffer[-2000:]) if session.output_buffer else ""
 								                    synth_text = (
 								                        f"[SYSTEM: Background process {session_id} completed "
 								                        f"(exit code {session.exit_code}).\n"
 								                        f"Command: {session.command}\n"
 								                        f"Output:\n{_out}]"
 								                    )
-												fix(gateway): route synthetic background events by session

											
										
										
											2026-04-15 16:38:57 +01:00
+								                    source = self._build_process_event_source({
 								                        "session_id": session_id,
 								                        "session_key": session_key,
 								                        "platform": platform_name,
 								                        "chat_id": chat_id,
 								                        "thread_id": thread_id,
 								                        "user_id": user_id,
 								                        "user_name": user_name,
 								                    })
 								                    if not source:
 								                        logger.warning(
 								                            "Dropping completion notification with no routing metadata for process %s",
 								                            session_id,
 								                        )
 								                        break
-												feat: notify_on_complete for background processes (#5779)

* feat: notify_on_complete for background processes

When terminal(background=true, notify_on_complete=true), the system
auto-triggers a new agent turn when the process exits — no polling needed.

Changes:
- ProcessSession: add notify_on_complete field
- ProcessRegistry: add completion_queue, populate on _move_to_finished()
- Terminal tool: add notify_on_complete parameter to schema + handler
- CLI: drain completion_queue after agent turn AND during idle loop
- Gateway: enhanced _run_process_watcher injects synthetic MessageEvent
  on completion, triggering a full agent turn
- Checkpoint persistence includes notify_on_complete for crash recovery
- code_execution_tool: block notify_on_complete in sandbox scripts
- 15 new tests covering queue mechanics, checkpoint round-trip, schema

* docs: update terminal tool descriptions for notify_on_complete

- background: remove 'ONLY for servers' language, describe both patterns
  (long-lived processes AND long-running tasks with notify_on_complete)
- notify_on_complete: more prescriptive about when to use it
- TERMINAL_TOOL_DESCRIPTION: remove 'Do NOT use background for builds'
  guidance that contradicted the new feature
											
										
										
											2026-04-07 02:40:16 -07:00
+								                    adapter = None
 								                    for p, a in self.adapters.items():
-												fix(gateway): route synthetic background events by session

											
										
										
											2026-04-15 16:38:57 +01:00
+								                        if p == source.platform:
-												feat: notify_on_complete for background processes (#5779)

* feat: notify_on_complete for background processes

When terminal(background=true, notify_on_complete=true), the system
auto-triggers a new agent turn when the process exits — no polling needed.

Changes:
- ProcessSession: add notify_on_complete field
- ProcessRegistry: add completion_queue, populate on _move_to_finished()
- Terminal tool: add notify_on_complete parameter to schema + handler
- CLI: drain completion_queue after agent turn AND during idle loop
- Gateway: enhanced _run_process_watcher injects synthetic MessageEvent
  on completion, triggering a full agent turn
- Checkpoint persistence includes notify_on_complete for crash recovery
- code_execution_tool: block notify_on_complete in sandbox scripts
- 15 new tests covering queue mechanics, checkpoint round-trip, schema

* docs: update terminal tool descriptions for notify_on_complete

- background: remove 'ONLY for servers' language, describe both patterns
  (long-lived processes AND long-running tasks with notify_on_complete)
- notify_on_complete: more prescriptive about when to use it
- TERMINAL_TOOL_DESCRIPTION: remove 'Do NOT use background for builds'
  guidance that contradicted the new feature
											
										
										
											2026-04-07 02:40:16 -07:00
+								                            adapter = a
 								                            break
-												fix(gateway): route synthetic background events by session

											
										
										
											2026-04-15 16:38:57 +01:00
+								                    if adapter and source.chat_id:
-												feat: notify_on_complete for background processes (#5779)

* feat: notify_on_complete for background processes

When terminal(background=true, notify_on_complete=true), the system
auto-triggers a new agent turn when the process exits — no polling needed.

Changes:
- ProcessSession: add notify_on_complete field
- ProcessRegistry: add completion_queue, populate on _move_to_finished()
- Terminal tool: add notify_on_complete parameter to schema + handler
- CLI: drain completion_queue after agent turn AND during idle loop
- Gateway: enhanced _run_process_watcher injects synthetic MessageEvent
  on completion, triggering a full agent turn
- Checkpoint persistence includes notify_on_complete for crash recovery
- code_execution_tool: block notify_on_complete in sandbox scripts
- 15 new tests covering queue mechanics, checkpoint round-trip, schema

* docs: update terminal tool descriptions for notify_on_complete

- background: remove 'ONLY for servers' language, describe both patterns
  (long-lived processes AND long-running tasks with notify_on_complete)
- notify_on_complete: more prescriptive about when to use it
- TERMINAL_TOOL_DESCRIPTION: remove 'Do NOT use background for builds'
  guidance that contradicted the new feature
											
										
										
											2026-04-07 02:40:16 -07:00
+								                        try:
 								                            from gateway.platforms.base import MessageEvent, MessageType
 								                            synth_event = MessageEvent(
 								                                text=synth_text,
 								                                message_type=MessageType.TEXT,
-												fix(gateway): route synthetic background events by session

											
										
										
											2026-04-15 16:38:57 +01:00
+								                                source=source,
-												fix(gateway): prevent background process notifications from triggering false pairing requests

When a background process with notify_on_complete=True finishes, the
gateway injects a synthetic MessageEvent to notify the session. This
event was constructed without user_id, causing _is_user_authorized()
to reject it and — for DM-origin sessions — trigger the pairing flow,
sending "Hi~ I don't recognize you yet!" with a pairing code to the
chat owner.

Add an `internal` flag to MessageEvent that bypasses authorization
checks for system-generated synthetic events. Only the process watcher
sets this flag; no external/adapter code path can produce it.

Includes 4 regression tests covering the fix and the normal pairing path.

											
										
										
											2026-04-08 11:21:24 +10:00
+								                                internal=True,
-												feat: notify_on_complete for background processes (#5779)

* feat: notify_on_complete for background processes

When terminal(background=true, notify_on_complete=true), the system
auto-triggers a new agent turn when the process exits — no polling needed.

Changes:
- ProcessSession: add notify_on_complete field
- ProcessRegistry: add completion_queue, populate on _move_to_finished()
- Terminal tool: add notify_on_complete parameter to schema + handler
- CLI: drain completion_queue after agent turn AND during idle loop
- Gateway: enhanced _run_process_watcher injects synthetic MessageEvent
  on completion, triggering a full agent turn
- Checkpoint persistence includes notify_on_complete for crash recovery
- code_execution_tool: block notify_on_complete in sandbox scripts
- 15 new tests covering queue mechanics, checkpoint round-trip, schema

* docs: update terminal tool descriptions for notify_on_complete

- background: remove 'ONLY for servers' language, describe both patterns
  (long-lived processes AND long-running tasks with notify_on_complete)
- notify_on_complete: more prescriptive about when to use it
- TERMINAL_TOOL_DESCRIPTION: remove 'Do NOT use background for builds'
  guidance that contradicted the new feature
											
										
										
											2026-04-07 02:40:16 -07:00
+								                            )
 								                            logger.info(
-												fix(gateway): route synthetic background events by session

											
										
										
											2026-04-15 16:38:57 +01:00
+								                                "Process %s finished — injecting agent notification for session %s chat=%s thread=%s",
 								                                session_id,
 								                                session_key,
 								                                source.chat_id,
 								                                source.thread_id,
-												feat: notify_on_complete for background processes (#5779)

* feat: notify_on_complete for background processes

When terminal(background=true, notify_on_complete=true), the system
auto-triggers a new agent turn when the process exits — no polling needed.

Changes:
- ProcessSession: add notify_on_complete field
- ProcessRegistry: add completion_queue, populate on _move_to_finished()
- Terminal tool: add notify_on_complete parameter to schema + handler
- CLI: drain completion_queue after agent turn AND during idle loop
- Gateway: enhanced _run_process_watcher injects synthetic MessageEvent
  on completion, triggering a full agent turn
- Checkpoint persistence includes notify_on_complete for crash recovery
- code_execution_tool: block notify_on_complete in sandbox scripts
- 15 new tests covering queue mechanics, checkpoint round-trip, schema

* docs: update terminal tool descriptions for notify_on_complete

- background: remove 'ONLY for servers' language, describe both patterns
  (long-lived processes AND long-running tasks with notify_on_complete)
- notify_on_complete: more prescriptive about when to use it
- TERMINAL_TOOL_DESCRIPTION: remove 'Do NOT use background for builds'
  guidance that contradicted the new feature
											
										
										
											2026-04-07 02:40:16 -07:00
+								                            )
 								                            await adapter.handle_message(synth_event)
 								                        except Exception as e:
 								                            logger.error("Agent notify injection error: %s", e)
 								                    break
 								                # --- Normal text-only notification ---
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                # Decide whether to notify based on mode
 								                should_notify = (
 								                    notify_mode in ("all", "result")
 								                    or (notify_mode == "error" and session.exit_code not in (0, None))
 								                )
 								                if should_notify:
 								                    new_output = session.output_buffer[-1000:] if session.output_buffer else ""
 								                    message_text = (
 								                        f"[Background process {session_id} finished with exit code {session.exit_code}~ "
 								                        f"Here's the final output:\n{new_output}]"
 								                    )
 								                    adapter = None
 								                    for p, a in self.adapters.items():
 								                        if p.value == platform_name:
 								                            adapter = a
 								                            break
 								                    if adapter and chat_id:
 								                        try:
-												fix: route background process watcher notifications to Telegram forum topics (#1481)

Salvaged from PR #1146 by spanishflu-est1918.

Background process progress/completion messages were sent with only
chat_id, landing in the general topic instead of the originating forum
topic. Thread the thread_id from HERMES_SESSION_THREAD_ID through the
watcher payload and pass it as metadata to adapter.send() so Telegram
routes notifications to the correct topic.

The env var export (HERMES_SESSION_THREAD_ID in _set_session_env /
_clear_session_env) already existed on main — this commit adds the
missing watcher plumbing.

Co-authored-by: spanishflu-est1918 <spanishflu-est1918@users.noreply.github.com>
											
										
										
											2026-03-15 23:01:57 -07:00
+								                            send_meta = {"thread_id": thread_id} if thread_id else None
 								                            await adapter.send(chat_id, message_text, metadata=send_meta)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                        except Exception as e:
 								                            logger.error("Watcher delivery error: %s", e)
 								                break
-												feat: notify_on_complete for background processes (#5779)

* feat: notify_on_complete for background processes

When terminal(background=true, notify_on_complete=true), the system
auto-triggers a new agent turn when the process exits — no polling needed.

Changes:
- ProcessSession: add notify_on_complete field
- ProcessRegistry: add completion_queue, populate on _move_to_finished()
- Terminal tool: add notify_on_complete parameter to schema + handler
- CLI: drain completion_queue after agent turn AND during idle loop
- Gateway: enhanced _run_process_watcher injects synthetic MessageEvent
  on completion, triggering a full agent turn
- Checkpoint persistence includes notify_on_complete for crash recovery
- code_execution_tool: block notify_on_complete in sandbox scripts
- 15 new tests covering queue mechanics, checkpoint round-trip, schema

* docs: update terminal tool descriptions for notify_on_complete

- background: remove 'ONLY for servers' language, describe both patterns
  (long-lived processes AND long-running tasks with notify_on_complete)
- notify_on_complete: more prescriptive about when to use it
- TERMINAL_TOOL_DESCRIPTION: remove 'Do NOT use background for builds'
  guidance that contradicted the new feature
											
										
										
											2026-04-07 02:40:16 -07:00
+								            elif has_new_output and notify_mode == "all" and not agent_notify:
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                # New output available -- deliver status update (only in "all" mode)
-												feat: notify_on_complete for background processes (#5779)

* feat: notify_on_complete for background processes

When terminal(background=true, notify_on_complete=true), the system
auto-triggers a new agent turn when the process exits — no polling needed.

Changes:
- ProcessSession: add notify_on_complete field
- ProcessRegistry: add completion_queue, populate on _move_to_finished()
- Terminal tool: add notify_on_complete parameter to schema + handler
- CLI: drain completion_queue after agent turn AND during idle loop
- Gateway: enhanced _run_process_watcher injects synthetic MessageEvent
  on completion, triggering a full agent turn
- Checkpoint persistence includes notify_on_complete for crash recovery
- code_execution_tool: block notify_on_complete in sandbox scripts
- 15 new tests covering queue mechanics, checkpoint round-trip, schema

* docs: update terminal tool descriptions for notify_on_complete

- background: remove 'ONLY for servers' language, describe both patterns
  (long-lived processes AND long-running tasks with notify_on_complete)
- notify_on_complete: more prescriptive about when to use it
- TERMINAL_TOOL_DESCRIPTION: remove 'Do NOT use background for builds'
  guidance that contradicted the new feature
											
										
										
											2026-04-07 02:40:16 -07:00
+								                # Skip periodic updates for agent_notify watchers (they only care about completion)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                new_output = session.output_buffer[-500:] if session.output_buffer else ""
 								                message_text = (
 								                    f"[Background process {session_id} is still running~ "
 								                    f"New output:\n{new_output}]"
 								                )
 								                adapter = None
 								                for p, a in self.adapters.items():
 								                    if p.value == platform_name:
 								                        adapter = a
 								                        break
 								                if adapter and chat_id:
 								                    try:
-												fix: route background process watcher notifications to Telegram forum topics (#1481)

Salvaged from PR #1146 by spanishflu-est1918.

Background process progress/completion messages were sent with only
chat_id, landing in the general topic instead of the originating forum
topic. Thread the thread_id from HERMES_SESSION_THREAD_ID through the
watcher payload and pass it as metadata to adapter.send() so Telegram
routes notifications to the correct topic.

The env var export (HERMES_SESSION_THREAD_ID in _set_session_env /
_clear_session_env) already existed on main — this commit adds the
missing watcher plumbing.

Co-authored-by: spanishflu-est1918 <spanishflu-est1918@users.noreply.github.com>
											
										
										
											2026-03-15 23:01:57 -07:00
+								                        send_meta = {"thread_id": thread_id} if thread_id else None
 								                        await adapter.send(chat_id, message_text, metadata=send_meta)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                    except Exception as e:
 								                        logger.error("Watcher delivery error: %s", e)
 								        logger.debug("Process watcher ended: %s", session_id)
-												fix(gateway): cap interrupt recursion depth to prevent resource exhaustion (#816)

* fix: prevent infinite 400 failure loop on context overflow (#1630)

When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message.  This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error.  Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.

Three-layer fix:

1. run_agent.py — Fallback heuristic: when a 400 error has a very short
   generic message AND the session is large (>40% of context or >80
   messages), treat it as a probable context overflow and trigger
   compression instead of aborting.

2. run_agent.py + gateway/run.py — Don't persist failed messages:
   when the agent returns failed=True before generating any response,
   skip writing the user's message to the transcript/DB. This prevents
   the session from growing on each failure.

3. gateway/run.py — Smarter error messages: detect context-overflow
   failures and suggest /compact or /reset specifically, instead of a
   generic 'try again' that will fail identically.

* fix(skills): detect prompt injection patterns and block cache file reads

Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):

1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
   (index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
   was the original injection vector — untrusted skill descriptions
   in the catalog contained adversarial text that the model executed.

2. skill_view: warns when skills are loaded from outside the trusted
   ~/.hermes/skills/ directory, and detects common injection patterns
   in skill content ("ignore previous instructions", "<system>", etc.).

Cherry-picked from PR #1562 by ygd58.

* fix(tools): chunk long messages in send_message_tool before dispatch (#1552)

Long messages sent via send_message tool or cron delivery silently
failed when exceeding platform limits. Gateway adapters handle this
via truncate_message(), but the standalone senders in send_message_tool
bypassed that entirely.

- Apply truncate_message() chunking in _send_to_platform() before
  dispatching to individual platform senders
- Remove naive message[i:i+2000] character split in _send_discord()
  in favor of centralized smart splitting
- Attach media files to last chunk only for Telegram
- Add regression tests for chunking and media placement

Cherry-picked from PR #1557 by llbn.

* fix(approval): show full command in dangerous command approval (#1553)

Previously the command was truncated to 80 chars in CLI (with a
[v]iew full option), 500 chars in Discord embeds, and missing entirely
in Telegram/Slack approval messages. Now the full command is always
displayed everywhere:

- CLI: removed 80-char truncation and [v]iew full menu option
- Gateway (TG/Slack): approval_required message includes full command
  in a code block
- Discord: embed shows full command up to 4096-char limit
- Windows: skip SIGALRM-based test timeout (Unix-only)
- Updated tests: replaced view-flow tests with direct approval tests

Cherry-picked from PR #1566 by crazywriter1.

* fix(cli): flush stdout during agent loop to prevent macOS display freeze (#1624)

The interrupt polling loop in chat() waited on the queue without
invalidating the prompt_toolkit renderer. On macOS, the StdoutProxy
buffer only flushed on input events, causing the CLI to appear frozen
during tool execution until the user typed a key.

Fix: call _invalidate() on each queue timeout (every ~100ms, throttled
to 150ms) to force the renderer to flush buffered agent output.

* fix(claw): warn when API keys are skipped during OpenClaw migration (#1580)

When --migrate-secrets is not passed (the default), API keys like
OPENROUTER_API_KEY are silently skipped with no warning. Users don't
realize their keys weren't migrated until the agent fails to connect.

Add a post-migration warning with actionable instructions: either
re-run with --migrate-secrets or add the key manually via
hermes config set.

Cherry-picked from PR #1593 by ygd58.

* fix(security): block sandbox backend creds from subprocess env (#1264)

Add Modal and Daytona sandbox credentials to the subprocess env
blocklist so they're not leaked to agent terminal sessions via
printenv/env.

Cherry-picked from PR #1571 by ygd58.

* fix(gateway): cap interrupt recursion depth to prevent resource exhaustion (#816)

When a user sends multiple messages while the agent keeps failing,
_run_agent() calls itself recursively with no depth limit. This can
exhaust stack/memory if the agent is in a failure loop.

Add _MAX_INTERRUPT_DEPTH = 3. When exceeded, the pending message is
logged and the current result is returned instead of recursing deeper.

The log handler duplication bug described in #816 was already fixed
separately (AIAgent.__init__ deduplicates handlers).

---------

Co-authored-by: buray <ygd58@users.noreply.github.com>
Co-authored-by: lbn <llbn@users.noreply.github.com>
Co-authored-by: crazywriter1 <53251494+crazywriter1@users.noreply.github.com>
											
										
										
											2026-03-17 02:23:07 -07:00
+								    _MAX_INTERRUPT_DEPTH = 3  # Cap recursive interrupt handling (#816)
-												feat(gateway): cache AIAgent per session for prompt caching

The gateway created a fresh AIAgent per message, rebuilding the system
prompt (including memory, skills, context files) every turn. This broke
prompt prefix caching — providers like Anthropic charge ~10x more for
uncached prefixes.

Now caches AIAgent instances per session_key with a config signature.
The cached agent is reused across messages in the same session,
preserving the frozen system prompt and tool schemas. Cache is
invalidated when:
- Config changes (model, provider, toolsets, reasoning, ephemeral
  prompt) — detected via signature mismatch
- /new, /reset, /clear — explicit session reset
- /model — global model change clears all cached agents
- /reasoning — global reasoning change clears all cached agents

Per-message state (callbacks, stream consumers, progress queues) is
set on the agent instance before each run_conversation() call.

This matches CLI behavior where a single AIAgent lives across all turns
in a session, with _cached_system_prompt built once and reused.

											
										
										
											2026-03-21 13:07:08 -07:00
+								    @staticmethod
 								    def _agent_config_signature(
 								        model: str,
 								        runtime: dict,
 								        enabled_toolsets: list,
 								        ephemeral_prompt: str,
 								    ) -> str:
 								        """Compute a stable string key from agent config values.
 								        When this signature changes between messages, the cached AIAgent is
 								        discarded and rebuilt.  When it stays the same, the cached agent is
 								        reused — preserving the frozen system prompt and tool schemas for
 								        prompt cache hits.
 								        """
 								        import hashlib, json as _j
-												fix(gateway): fingerprint full auth token in agent cache signature (#3247)

Previously _agent_config_signature() used only the first 8 characters of
the API key, which causes false cache hits for JWT/OAuth tokens that share
a common prefix (e.g. 'eyJhbGci'). This led to cross-account cache
collisions when switching OAuth accounts in multi-user gateway deployments.

Replace the 8-char prefix with a SHA-256 hash of the full key so the
signature is unique per credential while keeping secrets out of the
cache key.

Salvaged from PR #3117 by EmpireOperating.

Co-authored-by: EmpireOperating <EmpireOperating@users.noreply.github.com>
											
										
										
											2026-03-26 13:19:43 -07:00
 								        # Fingerprint the FULL credential string instead of using a short
 								        # prefix. OAuth/JWT-style tokens frequently share a common prefix
 								        # (e.g. "eyJhbGci"), which can cause false cache hits across auth
 								        # switches if only the first few characters are considered.
 								        _api_key = str(runtime.get("api_key", "") or "")
 								        _api_key_fingerprint = hashlib.sha256(_api_key.encode()).hexdigest() if _api_key else ""
-												feat(gateway): cache AIAgent per session for prompt caching

The gateway created a fresh AIAgent per message, rebuilding the system
prompt (including memory, skills, context files) every turn. This broke
prompt prefix caching — providers like Anthropic charge ~10x more for
uncached prefixes.

Now caches AIAgent instances per session_key with a config signature.
The cached agent is reused across messages in the same session,
preserving the frozen system prompt and tool schemas. Cache is
invalidated when:
- Config changes (model, provider, toolsets, reasoning, ephemeral
  prompt) — detected via signature mismatch
- /new, /reset, /clear — explicit session reset
- /model — global model change clears all cached agents
- /reasoning — global reasoning change clears all cached agents

Per-message state (callbacks, stream consumers, progress queues) is
set on the agent instance before each run_conversation() call.

This matches CLI behavior where a single AIAgent lives across all turns
in a session, with _cached_system_prompt built once and reused.

											
										
										
											2026-03-21 13:07:08 -07:00
+								        blob = _j.dumps(
 								            [
 								                model,
-												fix(gateway): fingerprint full auth token in agent cache signature (#3247)

Previously _agent_config_signature() used only the first 8 characters of
the API key, which causes false cache hits for JWT/OAuth tokens that share
a common prefix (e.g. 'eyJhbGci'). This led to cross-account cache
collisions when switching OAuth accounts in multi-user gateway deployments.

Replace the 8-char prefix with a SHA-256 hash of the full key so the
signature is unique per credential while keeping secrets out of the
cache key.

Salvaged from PR #3117 by EmpireOperating.

Co-authored-by: EmpireOperating <EmpireOperating@users.noreply.github.com>
											
										
										
											2026-03-26 13:19:43 -07:00
+								                _api_key_fingerprint,
-												feat(gateway): cache AIAgent per session for prompt caching

The gateway created a fresh AIAgent per message, rebuilding the system
prompt (including memory, skills, context files) every turn. This broke
prompt prefix caching — providers like Anthropic charge ~10x more for
uncached prefixes.

Now caches AIAgent instances per session_key with a config signature.
The cached agent is reused across messages in the same session,
preserving the frozen system prompt and tool schemas. Cache is
invalidated when:
- Config changes (model, provider, toolsets, reasoning, ephemeral
  prompt) — detected via signature mismatch
- /new, /reset, /clear — explicit session reset
- /model — global model change clears all cached agents
- /reasoning — global reasoning change clears all cached agents

Per-message state (callbacks, stream consumers, progress queues) is
set on the agent instance before each run_conversation() call.

This matches CLI behavior where a single AIAgent lives across all turns
in a session, with _cached_system_prompt built once and reused.

											
										
										
											2026-03-21 13:07:08 -07:00
+								                runtime.get("base_url", ""),
 								                runtime.get("provider", ""),
 								                runtime.get("api_mode", ""),
 								                sorted(enabled_toolsets) if enabled_toolsets else [],
 								                # reasoning_config excluded — it's set per-message on the
 								                # cached agent and doesn't affect system prompt or tools.
 								                ephemeral_prompt or "",
 								            ],
 								            sort_keys=True,
 								            default=str,
 								        )
 								        return hashlib.sha256(blob.encode()).hexdigest()[:16]
-												fix(gateway): apply /model session overrides so switch persists across messages

The gateway /model command stored session overrides in
_session_model_overrides but run_sync() never consulted them when
resolving the model and runtime for the next message.  It always read
from config.yaml, so the switch was lost as soon as a new agent was
created.

Two fixes:

1. In run_sync(), apply _session_model_overrides after resolving from
   config.yaml/env — the override takes precedence for model, provider,
   api_key, base_url, and api_mode.

2. In post-run fallback detection, check whether the model mismatch
   (agent.model != config_model) is due to an intentional /model switch
   before evicting the cached agent.  Without this, the first message
   after /model would work (cached agent reused) but the fallback
   detector would evict it, causing the next message to revert.

Affects all gateway platforms (Telegram, Discord, Slack, WhatsApp,
Signal, Matrix, BlueBubbles, HomeAssistant) since they all share
GatewayRunner._run_agent().

Fixes #6213

											
										
										
											2026-04-09 22:26:32 +05:30
+								    def _apply_session_model_override(
 								        self, session_key: str, model: str, runtime_kwargs: dict
 								    ) -> tuple:
 								        """Apply /model session overrides if present, returning (model, runtime_kwargs).
 								        The gateway /model command stores per-session overrides in
 								        ``_session_model_overrides``.  These must take precedence over
 								        config.yaml defaults so the switched model is actually used for
 								        subsequent messages.  Fields with ``None`` values are skipped so
 								        partial overrides don't clobber valid config defaults.
 								        """
-												fix(gateway): address restart review feedback

											
										
										
											2026-04-10 14:00:21 -07:00
+								        override = self._session_model_overrides.get(session_key)
-												fix(gateway): apply /model session overrides so switch persists across messages

The gateway /model command stored session overrides in
_session_model_overrides but run_sync() never consulted them when
resolving the model and runtime for the next message.  It always read
from config.yaml, so the switch was lost as soon as a new agent was
created.

Two fixes:

1. In run_sync(), apply _session_model_overrides after resolving from
   config.yaml/env — the override takes precedence for model, provider,
   api_key, base_url, and api_mode.

2. In post-run fallback detection, check whether the model mismatch
   (agent.model != config_model) is due to an intentional /model switch
   before evicting the cached agent.  Without this, the first message
   after /model would work (cached agent reused) but the fallback
   detector would evict it, causing the next message to revert.

Affects all gateway platforms (Telegram, Discord, Slack, WhatsApp,
Signal, Matrix, BlueBubbles, HomeAssistant) since they all share
GatewayRunner._run_agent().

Fixes #6213

											
										
										
											2026-04-09 22:26:32 +05:30
+								        if not override:
 								            return model, runtime_kwargs
 								        model = override.get("model", model)
 								        for key in ("provider", "api_key", "base_url", "api_mode"):
 								            val = override.get(key)
 								            if val is not None:
 								                runtime_kwargs[key] = val
 								        return model, runtime_kwargs
 								    def _is_intentional_model_switch(self, session_key: str, agent_model: str) -> bool:
 								        """Return True if *agent_model* matches an active /model session override."""
-												fix(gateway): address restart review feedback

											
										
										
											2026-04-10 14:00:21 -07:00
+								        override = self._session_model_overrides.get(session_key)
-												fix(gateway): apply /model session overrides so switch persists across messages

The gateway /model command stored session overrides in
_session_model_overrides but run_sync() never consulted them when
resolving the model and runtime for the next message.  It always read
from config.yaml, so the switch was lost as soon as a new agent was
created.

Two fixes:

1. In run_sync(), apply _session_model_overrides after resolving from
   config.yaml/env — the override takes precedence for model, provider,
   api_key, base_url, and api_mode.

2. In post-run fallback detection, check whether the model mismatch
   (agent.model != config_model) is due to an intentional /model switch
   before evicting the cached agent.  Without this, the first message
   after /model would work (cached agent reused) but the fallback
   detector would evict it, causing the next message to revert.

Affects all gateway platforms (Telegram, Discord, Slack, WhatsApp,
Signal, Matrix, BlueBubbles, HomeAssistant) since they all share
GatewayRunner._run_agent().

Fixes #6213

											
										
										
											2026-04-09 22:26:32 +05:30
+								        return override is not None and override.get("model") == agent_model
-												feat(gateway): cache AIAgent per session for prompt caching

The gateway created a fresh AIAgent per message, rebuilding the system
prompt (including memory, skills, context files) every turn. This broke
prompt prefix caching — providers like Anthropic charge ~10x more for
uncached prefixes.

Now caches AIAgent instances per session_key with a config signature.
The cached agent is reused across messages in the same session,
preserving the frozen system prompt and tool schemas. Cache is
invalidated when:
- Config changes (model, provider, toolsets, reasoning, ephemeral
  prompt) — detected via signature mismatch
- /new, /reset, /clear — explicit session reset
- /model — global model change clears all cached agents
- /reasoning — global reasoning change clears all cached agents

Per-message state (callbacks, stream consumers, progress queues) is
set on the agent instance before each run_conversation() call.

This matches CLI behavior where a single AIAgent lives across all turns
in a session, with _cached_system_prompt built once and reused.

											
										
										
											2026-03-21 13:07:08 -07:00
+								    def _evict_cached_agent(self, session_key: str) -> None:
 								        """Remove a cached agent for a session (called on /new, /model, etc)."""
 								        _lock = getattr(self, "_agent_cache_lock", None)
 								        if _lock:
 								            with _lock:
 								                self._agent_cache.pop(session_key, None)
-												feat: gateway proxy mode — forward messages to remote API server

When GATEWAY_PROXY_URL (or gateway.proxy_url in config.yaml) is set,
the gateway becomes a thin relay: it handles platform I/O (encryption,
threading, media) and delegates all agent work to a remote Hermes API
server via POST /v1/chat/completions with SSE streaming.

This enables the primary use case of running a Matrix E2EE gateway in
Docker on Linux while the actual agent runs on the host (e.g. macOS)
with full access to local files, memory, skills, and a unified session
store. Works for any platform adapter, not just Matrix.

Configuration:
  - GATEWAY_PROXY_URL env var (Docker-friendly)
  - gateway.proxy_url in config.yaml
  - GATEWAY_PROXY_KEY env var for API auth (matches API_SERVER_KEY)
  - X-Hermes-Session-Id header for session continuity

Architecture:
  - _get_proxy_url() checks env var first, then config.yaml
  - _run_agent_via_proxy() handles HTTP forwarding with SSE streaming
  - _run_agent() delegates to proxy path when URL is configured
  - Platform streaming (GatewayStreamConsumer) works through proxy
  - Returns compatible result dict for session store recording

Files changed:
  - gateway/run.py: proxy mode implementation (~250 lines)
  - hermes_cli/config.py: GATEWAY_PROXY_URL + GATEWAY_PROXY_KEY env vars
  - tests/gateway/test_proxy_mode.py: 17 tests covering config
    resolution, dispatch, HTTP forwarding, error handling, message
    filtering, and result shape validation

Closes discussion from Cars29 re: Matrix gateway mixed-mode issue.

											
										
										
											2026-04-14 10:33:02 -07:00
+								    # ------------------------------------------------------------------
 								    # Proxy mode: forward messages to a remote Hermes API server
 								    # ------------------------------------------------------------------
 								    def _get_proxy_url(self) -> Optional[str]:
 								        """Return the proxy URL if proxy mode is configured, else None.
 								        Checks GATEWAY_PROXY_URL env var first (convenient for Docker),
 								        then ``gateway.proxy_url`` in config.yaml.
 								        """
 								        url = os.getenv("GATEWAY_PROXY_URL", "").strip()
 								        if url:
 								            return url.rstrip("/")
 								        cfg = _load_gateway_config()
 								        url = (cfg.get("gateway") or {}).get("proxy_url", "").strip()
 								        if url:
 								            return url.rstrip("/")
 								        return None
 								    async def _run_agent_via_proxy(
 								        self,
 								        message: str,
 								        context_prompt: str,
 								        history: List[Dict[str, Any]],
 								        source: "SessionSource",
 								        session_id: str,
 								        session_key: str = None,
 								        event_message_id: Optional[str] = None,
 								    ) -> Dict[str, Any]:
 								        """Forward the message to a remote Hermes API server instead of
 								        running a local AIAgent.
 								        When ``GATEWAY_PROXY_URL`` (or ``gateway.proxy_url`` in config.yaml)
 								        is set, the gateway becomes a thin relay: it handles platform I/O
 								        (encryption, threading, media) and delegates all agent work to the
 								        remote server via ``POST /v1/chat/completions`` with SSE streaming.
 								        This lets a Docker container handle Matrix E2EE while the actual
 								        agent runs on the host with full access to local files, memory,
 								        skills, and a unified session store.
 								        """
 								        try:
 								            from aiohttp import ClientSession as _AioClientSession, ClientTimeout
 								        except ImportError:
 								            return {
 								                "final_response": "⚠️ Proxy mode requires aiohttp. Install with: pip install aiohttp",
 								                "messages": [],
 								                "api_calls": 0,
 								                "tools": [],
 								            }
 								        proxy_url = self._get_proxy_url()
 								        if not proxy_url:
 								            return {
 								                "final_response": "⚠️ Proxy URL not configured (GATEWAY_PROXY_URL or gateway.proxy_url)",
 								                "messages": [],
 								                "api_calls": 0,
 								                "tools": [],
 								            }
 								        proxy_key = os.getenv("GATEWAY_PROXY_KEY", "").strip()
 								        # Build messages in OpenAI chat format --------------------------
 								        #
 								        # The remote api_server can maintain session continuity via
 								        # X-Hermes-Session-Id, so it loads its own history.  We only
 								        # need to send the current user message.  If the remote has
 								        # no history for this session yet, include what we have locally
 								        # so the first exchange has context.
 								        #
 								        # We always include the current message.  For history, send a
 								        # compact version (text-only user/assistant turns) — the remote
 								        # handles tool replay and system prompts.
 								        api_messages: List[Dict[str, str]] = []
 								        if context_prompt:
 								            api_messages.append({"role": "system", "content": context_prompt})
 								        for msg in history:
 								            role = msg.get("role")
 								            content = msg.get("content")
 								            if role in ("user", "assistant") and content:
 								                api_messages.append({"role": role, "content": content})
 								        api_messages.append({"role": "user", "content": message})
 								        # HTTP headers ---------------------------------------------------
 								        headers: Dict[str, str] = {"Content-Type": "application/json"}
 								        if proxy_key:
 								            headers["Authorization"] = f"Bearer {proxy_key}"
 								        if session_id:
 								            headers["X-Hermes-Session-Id"] = session_id
 								        body = {
 								            "model": "hermes-agent",
 								            "messages": api_messages,
 								            "stream": True,
 								        }
 								        # Set up platform streaming if available -------------------------
 								        _stream_consumer = None
 								        _scfg = getattr(getattr(self, "config", None), "streaming", None)
 								        if _scfg is None:
 								            from gateway.config import StreamingConfig
 								            _scfg = StreamingConfig()
 								        platform_key = _platform_config_key(source.platform)
 								        user_config = _load_gateway_config()
 								        from gateway.display_config import resolve_display_setting
 								        _plat_streaming = resolve_display_setting(
 								            user_config, platform_key, "streaming"
 								        )
 								        _streaming_enabled = (
 								            _scfg.enabled and _scfg.transport != "off"
 								            if _plat_streaming is None
 								            else bool(_plat_streaming)
 								        )
 								        if source.thread_id:
 								            _thread_metadata: Optional[Dict[str, Any]] = {"thread_id": source.thread_id}
 								        else:
 								            _thread_metadata = None
 								        if _streaming_enabled:
 								            try:
 								                from gateway.stream_consumer import GatewayStreamConsumer, StreamConsumerConfig
 								                from gateway.config import Platform
 								                _adapter = self.adapters.get(source.platform)
 								                if _adapter:
 								                    _adapter_supports_edit = getattr(_adapter, "SUPPORTS_MESSAGE_EDITING", True)
 								                    _effective_cursor = _scfg.cursor if _adapter_supports_edit else ""
-												fix(matrix): E2EE and migration bugfixes (#10860)

* - make buffered streaming
- fix path naming to expand `~` for agent.
- fix stripping of matrix ID to not remove other mentions / localports.

* fix(matrix): register MembershipEventDispatcher for invite auto-join

The mautrix migration (#7518) broke auto-join because InternalEventType.INVITE
events are only dispatched when MembershipEventDispatcher is registered on the
client. Without it, _on_invite is dead code and the bot silently ignores all
room invites.

Closes #10094
Closes #10725
Refs: PR #10135 (digging-airfare-4u), PR #10732 (fxfitz)

* fix(matrix): preserve _joined_rooms reference for CryptoStateStore

connect() reassigned self._joined_rooms = set(...) after initial sync,
orphaning the reference captured by _CryptoStateStore at init time.
find_shared_rooms() returned [] forever, breaking Megolm session rotation
on membership changes.

Mutate in place with clear() + update() so the CryptoStateStore reference
stays valid.

Refs #8174, PR #8215

* fix(matrix): remove dual ROOM_ENCRYPTED handler to fix dedup race

mautrix auto-registers DecryptionDispatcher when client.crypto is set.
The adapter also registered _on_encrypted_event for the same event type.
_on_encrypted_event had zero awaits and won the race to mark event IDs
in the dedup set, causing _on_room_message to drop successfully decrypted
events from DecryptionDispatcher. The retry loop masked this by re-decrypting
every message ~4 seconds later.

Remove _on_encrypted_event entirely. DecryptionDispatcher handles decryption;
genuinely undecryptable events are logged by mautrix and retried on next
key exchange.

Refs #8174, PR #8215

* fix(matrix): re-verify device keys after share_keys() upload

Matrix homeservers treat ed25519 identity keys as immutable per device.
share_keys() can return 200 but silently ignore new keys if the device
already exists with different identity keys. The bot would proceed with
shared=True while peers encrypt to the old (unreachable) keys.

Now re-queries the server after share_keys() and fails closed if keys
don't match, with an actionable error message.

Refs #8174, PR #8215

* fix(matrix): encrypt outbound attachments in E2EE rooms

_upload_and_send() uploaded raw bytes and used the 'url' key for all
rooms. In E2EE rooms, media must be encrypted client-side with
encrypt_attachment(), the ciphertext uploaded, and the 'file' key
(with key/iv/hashes) used instead of 'url'.

Now detects encrypted rooms via state_store.is_encrypted() and
branches to the encrypted upload path.

Refs: PR #9822 (charles-brooks)

* fix(matrix): add stop_typing to clear typing indicator after response

The adapter set a 30-second typing timeout but never cleared it.
The base class stop_typing() is a no-op, so the typing indicator
lingered for up to 30 seconds after each response.

Closes #6016
Refs: PR #6020 (r266-tech)

* fix(matrix): cache all media types locally, not just photos/voice

should_cache_locally only covered PHOTO, VOICE, and encrypted media.
Unencrypted audio/video/documents in plaintext rooms were passed as MXC
URLs that require authentication the agent doesn't have, resulting
in 401 errors.

Refs #3487, #3806

* fix(matrix): detect stale OTK conflict on startup and fail closed

When crypto state is wiped but the same device ID is reused, the
homeserver may still hold one-time keys signed with the previous
identity key. Identity key re-upload succeeds but OTK uploads fail
with "already exists" and a signature mismatch. Peers cannot
establish new Olm sessions, so all new messages are undecryptable.

Now proactively flushes OTKs via share_keys() during connect() and
catches the "already exists" error with an actionable log message
telling the operator to purge the device from the homeserver or
generate a fresh device ID.

Also documents the crypto store recovery procedure in the Matrix
setup guide.

Refs #8174

* docs(matrix): improve crypto recovery docs per review

- Put easy path (fresh access token) first, manual purge second
- URL-encode user ID in Synapse admin API example
- Note that device deletion may invalidate the access token
- Add "stop Synapse first" caveat for direct SQLite approach
- Mention the fail-closed startup detection behavior
- Add back-reference from upgrade section to OTK warning

* refactor(matrix): cleanup from code review

- Extract _extract_server_ed25519() and _reverify_keys_after_upload()
  to deduplicate the re-verification block (was copy-pasted in two
  places, three copies of ed25519 key extraction total)
- Remove dead code: _pending_megolm, _retry_pending_decryptions,
  _MAX_PENDING_EVENTS, _PENDING_EVENT_TTL — all orphaned after
  removing _on_encrypted_event
- Remove tautological TestMediaCacheGate (tested its own predicate,
  not production code)
- Remove dead TestMatrixMegolmEventHandling and
  TestMatrixRetryPendingDecryptions (tested removed methods)
- Merge duplicate TestMatrixStopTyping into TestMatrixTypingIndicator
- Trim comment to just the "why"
											
										
										
											2026-04-16 15:33:02 -07:00
+								                    _buffer_only = False
-												feat: gateway proxy mode — forward messages to remote API server

When GATEWAY_PROXY_URL (or gateway.proxy_url in config.yaml) is set,
the gateway becomes a thin relay: it handles platform I/O (encryption,
threading, media) and delegates all agent work to a remote Hermes API
server via POST /v1/chat/completions with SSE streaming.

This enables the primary use case of running a Matrix E2EE gateway in
Docker on Linux while the actual agent runs on the host (e.g. macOS)
with full access to local files, memory, skills, and a unified session
store. Works for any platform adapter, not just Matrix.

Configuration:
  - GATEWAY_PROXY_URL env var (Docker-friendly)
  - gateway.proxy_url in config.yaml
  - GATEWAY_PROXY_KEY env var for API auth (matches API_SERVER_KEY)
  - X-Hermes-Session-Id header for session continuity

Architecture:
  - _get_proxy_url() checks env var first, then config.yaml
  - _run_agent_via_proxy() handles HTTP forwarding with SSE streaming
  - _run_agent() delegates to proxy path when URL is configured
  - Platform streaming (GatewayStreamConsumer) works through proxy
  - Returns compatible result dict for session store recording

Files changed:
  - gateway/run.py: proxy mode implementation (~250 lines)
  - hermes_cli/config.py: GATEWAY_PROXY_URL + GATEWAY_PROXY_KEY env vars
  - tests/gateway/test_proxy_mode.py: 17 tests covering config
    resolution, dispatch, HTTP forwarding, error handling, message
    filtering, and result shape validation

Closes discussion from Cars29 re: Matrix gateway mixed-mode issue.

											
										
										
											2026-04-14 10:33:02 -07:00
+								                    if source.platform == Platform.MATRIX:
 								                        _effective_cursor = ""
-												fix(matrix): E2EE and migration bugfixes (#10860)

* - make buffered streaming
- fix path naming to expand `~` for agent.
- fix stripping of matrix ID to not remove other mentions / localports.

* fix(matrix): register MembershipEventDispatcher for invite auto-join

The mautrix migration (#7518) broke auto-join because InternalEventType.INVITE
events are only dispatched when MembershipEventDispatcher is registered on the
client. Without it, _on_invite is dead code and the bot silently ignores all
room invites.

Closes #10094
Closes #10725
Refs: PR #10135 (digging-airfare-4u), PR #10732 (fxfitz)

* fix(matrix): preserve _joined_rooms reference for CryptoStateStore

connect() reassigned self._joined_rooms = set(...) after initial sync,
orphaning the reference captured by _CryptoStateStore at init time.
find_shared_rooms() returned [] forever, breaking Megolm session rotation
on membership changes.

Mutate in place with clear() + update() so the CryptoStateStore reference
stays valid.

Refs #8174, PR #8215

* fix(matrix): remove dual ROOM_ENCRYPTED handler to fix dedup race

mautrix auto-registers DecryptionDispatcher when client.crypto is set.
The adapter also registered _on_encrypted_event for the same event type.
_on_encrypted_event had zero awaits and won the race to mark event IDs
in the dedup set, causing _on_room_message to drop successfully decrypted
events from DecryptionDispatcher. The retry loop masked this by re-decrypting
every message ~4 seconds later.

Remove _on_encrypted_event entirely. DecryptionDispatcher handles decryption;
genuinely undecryptable events are logged by mautrix and retried on next
key exchange.

Refs #8174, PR #8215

* fix(matrix): re-verify device keys after share_keys() upload

Matrix homeservers treat ed25519 identity keys as immutable per device.
share_keys() can return 200 but silently ignore new keys if the device
already exists with different identity keys. The bot would proceed with
shared=True while peers encrypt to the old (unreachable) keys.

Now re-queries the server after share_keys() and fails closed if keys
don't match, with an actionable error message.

Refs #8174, PR #8215

* fix(matrix): encrypt outbound attachments in E2EE rooms

_upload_and_send() uploaded raw bytes and used the 'url' key for all
rooms. In E2EE rooms, media must be encrypted client-side with
encrypt_attachment(), the ciphertext uploaded, and the 'file' key
(with key/iv/hashes) used instead of 'url'.

Now detects encrypted rooms via state_store.is_encrypted() and
branches to the encrypted upload path.

Refs: PR #9822 (charles-brooks)

* fix(matrix): add stop_typing to clear typing indicator after response

The adapter set a 30-second typing timeout but never cleared it.
The base class stop_typing() is a no-op, so the typing indicator
lingered for up to 30 seconds after each response.

Closes #6016
Refs: PR #6020 (r266-tech)

* fix(matrix): cache all media types locally, not just photos/voice

should_cache_locally only covered PHOTO, VOICE, and encrypted media.
Unencrypted audio/video/documents in plaintext rooms were passed as MXC
URLs that require authentication the agent doesn't have, resulting
in 401 errors.

Refs #3487, #3806

* fix(matrix): detect stale OTK conflict on startup and fail closed

When crypto state is wiped but the same device ID is reused, the
homeserver may still hold one-time keys signed with the previous
identity key. Identity key re-upload succeeds but OTK uploads fail
with "already exists" and a signature mismatch. Peers cannot
establish new Olm sessions, so all new messages are undecryptable.

Now proactively flushes OTKs via share_keys() during connect() and
catches the "already exists" error with an actionable log message
telling the operator to purge the device from the homeserver or
generate a fresh device ID.

Also documents the crypto store recovery procedure in the Matrix
setup guide.

Refs #8174

* docs(matrix): improve crypto recovery docs per review

- Put easy path (fresh access token) first, manual purge second
- URL-encode user ID in Synapse admin API example
- Note that device deletion may invalidate the access token
- Add "stop Synapse first" caveat for direct SQLite approach
- Mention the fail-closed startup detection behavior
- Add back-reference from upgrade section to OTK warning

* refactor(matrix): cleanup from code review

- Extract _extract_server_ed25519() and _reverify_keys_after_upload()
  to deduplicate the re-verification block (was copy-pasted in two
  places, three copies of ed25519 key extraction total)
- Remove dead code: _pending_megolm, _retry_pending_decryptions,
  _MAX_PENDING_EVENTS, _PENDING_EVENT_TTL — all orphaned after
  removing _on_encrypted_event
- Remove tautological TestMediaCacheGate (tested its own predicate,
  not production code)
- Remove dead TestMatrixMegolmEventHandling and
  TestMatrixRetryPendingDecryptions (tested removed methods)
- Merge duplicate TestMatrixStopTyping into TestMatrixTypingIndicator
- Trim comment to just the "why"
											
										
										
											2026-04-16 15:33:02 -07:00
+								                        _buffer_only = True
-												feat: gateway proxy mode — forward messages to remote API server

When GATEWAY_PROXY_URL (or gateway.proxy_url in config.yaml) is set,
the gateway becomes a thin relay: it handles platform I/O (encryption,
threading, media) and delegates all agent work to a remote Hermes API
server via POST /v1/chat/completions with SSE streaming.

This enables the primary use case of running a Matrix E2EE gateway in
Docker on Linux while the actual agent runs on the host (e.g. macOS)
with full access to local files, memory, skills, and a unified session
store. Works for any platform adapter, not just Matrix.

Configuration:
  - GATEWAY_PROXY_URL env var (Docker-friendly)
  - gateway.proxy_url in config.yaml
  - GATEWAY_PROXY_KEY env var for API auth (matches API_SERVER_KEY)
  - X-Hermes-Session-Id header for session continuity

Architecture:
  - _get_proxy_url() checks env var first, then config.yaml
  - _run_agent_via_proxy() handles HTTP forwarding with SSE streaming
  - _run_agent() delegates to proxy path when URL is configured
  - Platform streaming (GatewayStreamConsumer) works through proxy
  - Returns compatible result dict for session store recording

Files changed:
  - gateway/run.py: proxy mode implementation (~250 lines)
  - hermes_cli/config.py: GATEWAY_PROXY_URL + GATEWAY_PROXY_KEY env vars
  - tests/gateway/test_proxy_mode.py: 17 tests covering config
    resolution, dispatch, HTTP forwarding, error handling, message
    filtering, and result shape validation

Closes discussion from Cars29 re: Matrix gateway mixed-mode issue.

											
										
										
											2026-04-14 10:33:02 -07:00
+								                    _consumer_cfg = StreamConsumerConfig(
 								                        edit_interval=_scfg.edit_interval,
 								                        buffer_threshold=_scfg.buffer_threshold,
 								                        cursor=_effective_cursor,
-												fix(matrix): E2EE and migration bugfixes (#10860)

* - make buffered streaming
- fix path naming to expand `~` for agent.
- fix stripping of matrix ID to not remove other mentions / localports.

* fix(matrix): register MembershipEventDispatcher for invite auto-join

The mautrix migration (#7518) broke auto-join because InternalEventType.INVITE
events are only dispatched when MembershipEventDispatcher is registered on the
client. Without it, _on_invite is dead code and the bot silently ignores all
room invites.

Closes #10094
Closes #10725
Refs: PR #10135 (digging-airfare-4u), PR #10732 (fxfitz)

* fix(matrix): preserve _joined_rooms reference for CryptoStateStore

connect() reassigned self._joined_rooms = set(...) after initial sync,
orphaning the reference captured by _CryptoStateStore at init time.
find_shared_rooms() returned [] forever, breaking Megolm session rotation
on membership changes.

Mutate in place with clear() + update() so the CryptoStateStore reference
stays valid.

Refs #8174, PR #8215

* fix(matrix): remove dual ROOM_ENCRYPTED handler to fix dedup race

mautrix auto-registers DecryptionDispatcher when client.crypto is set.
The adapter also registered _on_encrypted_event for the same event type.
_on_encrypted_event had zero awaits and won the race to mark event IDs
in the dedup set, causing _on_room_message to drop successfully decrypted
events from DecryptionDispatcher. The retry loop masked this by re-decrypting
every message ~4 seconds later.

Remove _on_encrypted_event entirely. DecryptionDispatcher handles decryption;
genuinely undecryptable events are logged by mautrix and retried on next
key exchange.

Refs #8174, PR #8215

* fix(matrix): re-verify device keys after share_keys() upload

Matrix homeservers treat ed25519 identity keys as immutable per device.
share_keys() can return 200 but silently ignore new keys if the device
already exists with different identity keys. The bot would proceed with
shared=True while peers encrypt to the old (unreachable) keys.

Now re-queries the server after share_keys() and fails closed if keys
don't match, with an actionable error message.

Refs #8174, PR #8215

* fix(matrix): encrypt outbound attachments in E2EE rooms

_upload_and_send() uploaded raw bytes and used the 'url' key for all
rooms. In E2EE rooms, media must be encrypted client-side with
encrypt_attachment(), the ciphertext uploaded, and the 'file' key
(with key/iv/hashes) used instead of 'url'.

Now detects encrypted rooms via state_store.is_encrypted() and
branches to the encrypted upload path.

Refs: PR #9822 (charles-brooks)

* fix(matrix): add stop_typing to clear typing indicator after response

The adapter set a 30-second typing timeout but never cleared it.
The base class stop_typing() is a no-op, so the typing indicator
lingered for up to 30 seconds after each response.

Closes #6016
Refs: PR #6020 (r266-tech)

* fix(matrix): cache all media types locally, not just photos/voice

should_cache_locally only covered PHOTO, VOICE, and encrypted media.
Unencrypted audio/video/documents in plaintext rooms were passed as MXC
URLs that require authentication the agent doesn't have, resulting
in 401 errors.

Refs #3487, #3806

* fix(matrix): detect stale OTK conflict on startup and fail closed

When crypto state is wiped but the same device ID is reused, the
homeserver may still hold one-time keys signed with the previous
identity key. Identity key re-upload succeeds but OTK uploads fail
with "already exists" and a signature mismatch. Peers cannot
establish new Olm sessions, so all new messages are undecryptable.

Now proactively flushes OTKs via share_keys() during connect() and
catches the "already exists" error with an actionable log message
telling the operator to purge the device from the homeserver or
generate a fresh device ID.

Also documents the crypto store recovery procedure in the Matrix
setup guide.

Refs #8174

* docs(matrix): improve crypto recovery docs per review

- Put easy path (fresh access token) first, manual purge second
- URL-encode user ID in Synapse admin API example
- Note that device deletion may invalidate the access token
- Add "stop Synapse first" caveat for direct SQLite approach
- Mention the fail-closed startup detection behavior
- Add back-reference from upgrade section to OTK warning

* refactor(matrix): cleanup from code review

- Extract _extract_server_ed25519() and _reverify_keys_after_upload()
  to deduplicate the re-verification block (was copy-pasted in two
  places, three copies of ed25519 key extraction total)
- Remove dead code: _pending_megolm, _retry_pending_decryptions,
  _MAX_PENDING_EVENTS, _PENDING_EVENT_TTL — all orphaned after
  removing _on_encrypted_event
- Remove tautological TestMediaCacheGate (tested its own predicate,
  not production code)
- Remove dead TestMatrixMegolmEventHandling and
  TestMatrixRetryPendingDecryptions (tested removed methods)
- Merge duplicate TestMatrixStopTyping into TestMatrixTypingIndicator
- Trim comment to just the "why"
											
										
										
											2026-04-16 15:33:02 -07:00
+								                        buffer_only=_buffer_only,
-												feat: gateway proxy mode — forward messages to remote API server

When GATEWAY_PROXY_URL (or gateway.proxy_url in config.yaml) is set,
the gateway becomes a thin relay: it handles platform I/O (encryption,
threading, media) and delegates all agent work to a remote Hermes API
server via POST /v1/chat/completions with SSE streaming.

This enables the primary use case of running a Matrix E2EE gateway in
Docker on Linux while the actual agent runs on the host (e.g. macOS)
with full access to local files, memory, skills, and a unified session
store. Works for any platform adapter, not just Matrix.

Configuration:
  - GATEWAY_PROXY_URL env var (Docker-friendly)
  - gateway.proxy_url in config.yaml
  - GATEWAY_PROXY_KEY env var for API auth (matches API_SERVER_KEY)
  - X-Hermes-Session-Id header for session continuity

Architecture:
  - _get_proxy_url() checks env var first, then config.yaml
  - _run_agent_via_proxy() handles HTTP forwarding with SSE streaming
  - _run_agent() delegates to proxy path when URL is configured
  - Platform streaming (GatewayStreamConsumer) works through proxy
  - Returns compatible result dict for session store recording

Files changed:
  - gateway/run.py: proxy mode implementation (~250 lines)
  - hermes_cli/config.py: GATEWAY_PROXY_URL + GATEWAY_PROXY_KEY env vars
  - tests/gateway/test_proxy_mode.py: 17 tests covering config
    resolution, dispatch, HTTP forwarding, error handling, message
    filtering, and result shape validation

Closes discussion from Cars29 re: Matrix gateway mixed-mode issue.

											
										
										
											2026-04-14 10:33:02 -07:00
+								                    )
 								                    _stream_consumer = GatewayStreamConsumer(
 								                        adapter=_adapter,
 								                        chat_id=source.chat_id,
 								                        config=_consumer_cfg,
 								                        metadata=_thread_metadata,
 								                    )
 								            except Exception as _sc_err:
 								                logger.debug("Proxy: could not set up stream consumer: %s", _sc_err)
 								        # Run the stream consumer task in the background
 								        stream_task = None
 								        if _stream_consumer:
 								            stream_task = asyncio.create_task(_stream_consumer.run())
 								        # Send typing indicator
 								        _adapter = self.adapters.get(source.platform)
 								        if _adapter:
 								            try:
 								                await _adapter.send_typing(source.chat_id, metadata=_thread_metadata)
 								            except Exception:
 								                pass
 								        # Make the HTTP request with SSE streaming -----------------------
 								        full_response = ""
 								        _start = time.time()
 								        try:
 								            _timeout = ClientTimeout(total=0, sock_read=1800)
 								            async with _AioClientSession(timeout=_timeout) as session:
 								                async with session.post(
 								                    f"{proxy_url}/v1/chat/completions",
 								                    json=body,
 								                    headers=headers,
 								                ) as resp:
 								                    if resp.status != 200:
 								                        error_text = await resp.text()
 								                        logger.warning(
 								                            "Proxy error (%d) from %s: %s",
 								                            resp.status, proxy_url, error_text[:500],
 								                        )
 								                        return {
 								                            "final_response": f"⚠️ Proxy error ({resp.status}): {error_text[:300]}",
 								                            "messages": [],
 								                            "api_calls": 0,
 								                            "tools": [],
 								                        }
 								                    # Parse SSE stream
 								                    buffer = ""
 								                    async for chunk in resp.content.iter_any():
 								                        text = chunk.decode("utf-8", errors="replace")
 								                        buffer += text
 								                        # Process complete SSE lines
 								                        while "\n" in buffer:
 								                            line, buffer = buffer.split("\n", 1)
 								                            line = line.strip()
 								                            if not line:
 								                                continue
 								                            if line.startswith("data: "):
 								                                data = line[6:]
 								                                if data.strip() == "[DONE]":
 								                                    break
 								                                try:
 								                                    obj = json.loads(data)
 								                                    choices = obj.get("choices", [])
 								                                    if choices:
 								                                        delta = choices[0].get("delta", {})
 								                                        content = delta.get("content", "")
 								                                        if content:
 								                                            full_response += content
 								                                            if _stream_consumer:
 								                                                _stream_consumer.on_delta(content)
 								                                except json.JSONDecodeError:
 								                                    pass
 								        except asyncio.CancelledError:
 								            raise
 								        except Exception as e:
 								            logger.error("Proxy connection error to %s: %s", proxy_url, e)
 								            if not full_response:
 								                return {
 								                    "final_response": f"⚠️ Proxy connection error: {e}",
 								                    "messages": [],
 								                    "api_calls": 0,
 								                    "tools": [],
 								                }
 								            # Partial response — return what we got
 								        finally:
 								            # Finalize stream consumer
 								            if _stream_consumer:
 								                _stream_consumer.finish()
 								            if stream_task:
 								                try:
 								                    await asyncio.wait_for(stream_task, timeout=5.0)
 								                except (asyncio.TimeoutError, asyncio.CancelledError):
 								                    stream_task.cancel()
 								        _elapsed = time.time() - _start
 								        logger.info(
 								            "proxy response: url=%s session=%s time=%.1fs response=%d chars",
 								            proxy_url, (session_id or "")[:20], _elapsed, len(full_response),
 								        )
 								        return {
 								            "final_response": full_response or "(No response from remote agent)",
 								            "messages": [
 								                {"role": "user", "content": message},
 								                {"role": "assistant", "content": full_response},
 								            ],
 								            "api_calls": 1,
 								            "tools": [],
 								            "history_offset": len(history),
 								            "session_id": session_id,
 								            "response_previewed": _stream_consumer is not None and bool(full_response),
 								        }
 								    # ------------------------------------------------------------------
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								    async def _run_agent(
 								        self,
 								        message: str,
 								        context_prompt: str,
 								        history: List[Dict[str, Any]],
 								        source: SessionSource,
 								        session_id: str,
-												fix(gateway): cap interrupt recursion depth to prevent resource exhaustion (#816)

* fix: prevent infinite 400 failure loop on context overflow (#1630)

When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message.  This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error.  Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.

Three-layer fix:

1. run_agent.py — Fallback heuristic: when a 400 error has a very short
   generic message AND the session is large (>40% of context or >80
   messages), treat it as a probable context overflow and trigger
   compression instead of aborting.

2. run_agent.py + gateway/run.py — Don't persist failed messages:
   when the agent returns failed=True before generating any response,
   skip writing the user's message to the transcript/DB. This prevents
   the session from growing on each failure.

3. gateway/run.py — Smarter error messages: detect context-overflow
   failures and suggest /compact or /reset specifically, instead of a
   generic 'try again' that will fail identically.

* fix(skills): detect prompt injection patterns and block cache file reads

Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):

1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
   (index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
   was the original injection vector — untrusted skill descriptions
   in the catalog contained adversarial text that the model executed.

2. skill_view: warns when skills are loaded from outside the trusted
   ~/.hermes/skills/ directory, and detects common injection patterns
   in skill content ("ignore previous instructions", "<system>", etc.).

Cherry-picked from PR #1562 by ygd58.

* fix(tools): chunk long messages in send_message_tool before dispatch (#1552)

Long messages sent via send_message tool or cron delivery silently
failed when exceeding platform limits. Gateway adapters handle this
via truncate_message(), but the standalone senders in send_message_tool
bypassed that entirely.

- Apply truncate_message() chunking in _send_to_platform() before
  dispatching to individual platform senders
- Remove naive message[i:i+2000] character split in _send_discord()
  in favor of centralized smart splitting
- Attach media files to last chunk only for Telegram
- Add regression tests for chunking and media placement

Cherry-picked from PR #1557 by llbn.

* fix(approval): show full command in dangerous command approval (#1553)

Previously the command was truncated to 80 chars in CLI (with a
[v]iew full option), 500 chars in Discord embeds, and missing entirely
in Telegram/Slack approval messages. Now the full command is always
displayed everywhere:

- CLI: removed 80-char truncation and [v]iew full menu option
- Gateway (TG/Slack): approval_required message includes full command
  in a code block
- Discord: embed shows full command up to 4096-char limit
- Windows: skip SIGALRM-based test timeout (Unix-only)
- Updated tests: replaced view-flow tests with direct approval tests

Cherry-picked from PR #1566 by crazywriter1.

* fix(cli): flush stdout during agent loop to prevent macOS display freeze (#1624)

The interrupt polling loop in chat() waited on the queue without
invalidating the prompt_toolkit renderer. On macOS, the StdoutProxy
buffer only flushed on input events, causing the CLI to appear frozen
during tool execution until the user typed a key.

Fix: call _invalidate() on each queue timeout (every ~100ms, throttled
to 150ms) to force the renderer to flush buffered agent output.

* fix(claw): warn when API keys are skipped during OpenClaw migration (#1580)

When --migrate-secrets is not passed (the default), API keys like
OPENROUTER_API_KEY are silently skipped with no warning. Users don't
realize their keys weren't migrated until the agent fails to connect.

Add a post-migration warning with actionable instructions: either
re-run with --migrate-secrets or add the key manually via
hermes config set.

Cherry-picked from PR #1593 by ygd58.

* fix(security): block sandbox backend creds from subprocess env (#1264)

Add Modal and Daytona sandbox credentials to the subprocess env
blocklist so they're not leaked to agent terminal sessions via
printenv/env.

Cherry-picked from PR #1571 by ygd58.

* fix(gateway): cap interrupt recursion depth to prevent resource exhaustion (#816)

When a user sends multiple messages while the agent keeps failing,
_run_agent() calls itself recursively with no depth limit. This can
exhaust stack/memory if the agent is in a failure loop.

Add _MAX_INTERRUPT_DEPTH = 3. When exceeded, the pending message is
logged and the current result is returned instead of recursing deeper.

The log handler duplication bug described in #816 was already fixed
separately (AIAgent.__init__ deduplicates handlers).

---------

Co-authored-by: buray <ygd58@users.noreply.github.com>
Co-authored-by: lbn <llbn@users.noreply.github.com>
Co-authored-by: crazywriter1 <53251494+crazywriter1@users.noreply.github.com>
											
										
										
											2026-03-17 02:23:07 -07:00
+								        session_key: str = None,
 								        _interrupt_depth: int = 0,
-												fix(gateway/slack): send progress messages to correct thread (#3063)

Co-authored-by: Jneeee <jneeee@outlook.com>
											
										
										
											2026-03-25 15:51:15 -07:00
+								        event_message_id: Optional[str] = None,
-												feat(discord): add channel_prompts config

Add native Discord channel_prompts support with parent forum fallback,
ephemeral runtime injection, config migration updates, docs, and tests.

											
										
										
											2026-04-13 15:57:03 -07:00
+								        channel_prompt: Optional[str] = None,
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								    ) -> Dict[str, Any]:
 								        """
 								        Run the agent with the given message and context.
 								        Returns the full result dict from run_conversation, including:
 								          - "final_response": str (the text to send back)
 								          - "messages": list (full conversation including tool calls)
 								          - "api_calls": int
 								          - "completed": bool
 								        This is run in a thread pool to not block the event loop.
 								        Supports interruption via new messages.
 								        """
-												feat: gateway proxy mode — forward messages to remote API server

When GATEWAY_PROXY_URL (or gateway.proxy_url in config.yaml) is set,
the gateway becomes a thin relay: it handles platform I/O (encryption,
threading, media) and delegates all agent work to a remote Hermes API
server via POST /v1/chat/completions with SSE streaming.

This enables the primary use case of running a Matrix E2EE gateway in
Docker on Linux while the actual agent runs on the host (e.g. macOS)
with full access to local files, memory, skills, and a unified session
store. Works for any platform adapter, not just Matrix.

Configuration:
  - GATEWAY_PROXY_URL env var (Docker-friendly)
  - gateway.proxy_url in config.yaml
  - GATEWAY_PROXY_KEY env var for API auth (matches API_SERVER_KEY)
  - X-Hermes-Session-Id header for session continuity

Architecture:
  - _get_proxy_url() checks env var first, then config.yaml
  - _run_agent_via_proxy() handles HTTP forwarding with SSE streaming
  - _run_agent() delegates to proxy path when URL is configured
  - Platform streaming (GatewayStreamConsumer) works through proxy
  - Returns compatible result dict for session store recording

Files changed:
  - gateway/run.py: proxy mode implementation (~250 lines)
  - hermes_cli/config.py: GATEWAY_PROXY_URL + GATEWAY_PROXY_KEY env vars
  - tests/gateway/test_proxy_mode.py: 17 tests covering config
    resolution, dispatch, HTTP forwarding, error handling, message
    filtering, and result shape validation

Closes discussion from Cars29 re: Matrix gateway mixed-mode issue.

											
										
										
											2026-04-14 10:33:02 -07:00
+								        # ---- Proxy mode: delegate to remote API server ----
 								        if self._get_proxy_url():
 								            return await self._run_agent_via_proxy(
 								                message=message,
 								                context_prompt=context_prompt,
 								                history=history,
 								                source=source,
 								                session_id=session_id,
 								                session_key=session_key,
 								                event_message_id=event_message_id,
 								            )
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        from run_agent import AIAgent
 								        import queue
-												fix: MCP toolset resolution for runtime and config (#3252)

Gateway sessions had their own inline toolset resolution that only read
platform_toolsets from config, which never includes MCP server names.
MCP tools were discovered and registered but invisible to the model.

- Replace duplicated gateway toolset resolution in _run_agent() and
  _run_background_task() with calls to the shared _get_platform_tools()
- Extend _get_platform_tools() to include globally enabled MCP servers
  at runtime (include_default_mcp_servers=True), while config-editing
  flows use include_default_mcp_servers=False to avoid persisting
  implicit MCP defaults into platform_toolsets
- Add homeassistant to PLATFORMS dict (was missing, caused KeyError)
- Fix CLI entry point to use _get_platform_tools() as well, so MCP
  tools are visible in CLI mode too
- Remove redundant platform_key reassignment in _run_background_task

Co-authored-by: kshitijk4poor <kshitijk4poor@users.noreply.github.com>
											
										
										
											2026-03-26 13:39:41 -07:00
+								        user_config = _load_gateway_config()
 								        platform_key = _platform_config_key(source.platform)
 								        from hermes_cli.tools_config import _get_platform_tools
 								        enabled_toolsets = sorted(_get_platform_tools(user_config, platform_key))
-												feat: add voice conversation support and futuristic UI redesign

- Auto-TTS: voice messages get spoken response (audio first, then text)
- STT: Groq Whisper fallback when VOICE_TOOLS_OPENAI_KEY not set
- Futuristic UI: glassmorphism, centered container, purple theme, glow effects
- Voice bubble: custom waveform player with seek and progress
- Invisible TTS playback via play_tts() method (no audio file in chat)
- Add hermes-web toolset with full tool access
- Register Platform.WEB in toolset/config maps
- Update docs for voice conversation feature

											
										
										
											2026-03-11 20:16:57 +03:00
-												feat(gateway): surface natural mid-turn assistant messages in chat platforms

Add display.interim_assistant_messages config (enabled by default) that
forwards completed assistant commentary between tool calls to the user
as separate chat messages. Models already emit useful status text like
'I'll inspect the repo first.' — this surfaces it on Telegram, Discord,
and other messaging platforms instead of swallowing it.

Independent from tool_progress and gateway streaming. Disabled for
webhooks. Uses GatewayStreamConsumer when available, falls back to
direct adapter send. Tracks response_previewed to prevent double-delivery
when interim message matches the final response.

Also fixes: cursor not stripped from fallback prefix in stream consumer
(affected continuation calculation on no-edit platforms like Signal).

Cherry-picked from PR #7885 by asheriif, default changed to enabled.
Fixes #5016

											
										
										
											2026-04-11 16:03:52 -07:00
+								        display_config = user_config.get("display", {})
 								        if not isinstance(display_config, dict):
 								            display_config = {}
-												feat: per-platform display verbosity configuration (#8006)

Add display.platforms section to config.yaml for per-platform overrides of
display settings (tool_progress, show_reasoning, streaming, tool_preview_length).

Each platform gets sensible built-in defaults based on capability tier:
- High (telegram, discord): tool_progress=all, streaming follows global
- Medium (slack, mattermost, matrix, feishu): tool_progress=new
- Low (signal, whatsapp, bluebubbles, wecom, etc.): tool_progress=off, streaming=false
- Minimal (email, sms, webhook, homeassistant): tool_progress=off, streaming=false

Example config:
  display:
    platforms:
      telegram:
        tool_progress: all
        show_reasoning: true
      slack:
        tool_progress: off

Resolution order: platform override > global setting > built-in platform default.

Changes:
- New gateway/display_config.py: resolver module with tier-based platform defaults
- gateway/run.py: tool_progress, tool_preview_length, streaming, show_reasoning
  all resolve per-platform via the new resolver
- /verbose command: now cycles tool_progress per-platform (saves to
  display.platforms.<platform>.tool_progress instead of global)
- /reasoning show|hide: now saves show_reasoning per-platform
- Config version 15 -> 16: migrates tool_progress_overrides into display.platforms
- Backward compat: legacy tool_progress_overrides still read as fallback
- 27 new tests for resolver, normalization, migration, backward compat
- Updated verbose command tests for per-platform behavior

Addresses community request for per-channel verbosity control (Guillaume Meyer,
Nathan Danielsen) — high verbosity on backchannel Telegram, low on customer-facing
Slack, none on email.
											
										
										
											2026-04-11 17:20:34 -07:00
+								        # Per-platform display settings — resolve via display_config module
 								        # which checks display.platforms.<platform>.<key> first, then
 								        # display.<key> global, then built-in platform defaults.
 								        from gateway.display_config import resolve_display_setting
-												feat(display): configurable tool preview length -- show full paths by default (#3841)

Tool call previews (paths, commands, queries) were hardcoded to truncate
at 35-40 chars across CLI spinners, completion lines, and gateway progress
messages. Users could not see full file paths in tool output.

New config option: display.tool_preview_length (default 0 = no limit).
Set a positive number to truncate at that length.

Changes:
- display.py: module-level _tool_preview_max_len with getter/setter;
  build_tool_preview() and get_cute_tool_message() _trunc/_path respect it
- cli.py: reads config at startup, spinner widget respects config
- gateway/run.py: reads config per-message, progress callback respects config
- run_agent.py: removed redundant 30-char quiet-mode spinner truncation
- config.py: added display.tool_preview_length to DEFAULT_CONFIG

Reported by kriskaminski
											
										
										
											2026-03-29 18:02:42 -07:00
+								        # Apply tool preview length config (0 = no limit)
 								        try:
 								            from agent.display import set_tool_preview_max_len
-												feat: per-platform display verbosity configuration (#8006)

Add display.platforms section to config.yaml for per-platform overrides of
display settings (tool_progress, show_reasoning, streaming, tool_preview_length).

Each platform gets sensible built-in defaults based on capability tier:
- High (telegram, discord): tool_progress=all, streaming follows global
- Medium (slack, mattermost, matrix, feishu): tool_progress=new
- Low (signal, whatsapp, bluebubbles, wecom, etc.): tool_progress=off, streaming=false
- Minimal (email, sms, webhook, homeassistant): tool_progress=off, streaming=false

Example config:
  display:
    platforms:
      telegram:
        tool_progress: all
        show_reasoning: true
      slack:
        tool_progress: off

Resolution order: platform override > global setting > built-in platform default.

Changes:
- New gateway/display_config.py: resolver module with tier-based platform defaults
- gateway/run.py: tool_progress, tool_preview_length, streaming, show_reasoning
  all resolve per-platform via the new resolver
- /verbose command: now cycles tool_progress per-platform (saves to
  display.platforms.<platform>.tool_progress instead of global)
- /reasoning show|hide: now saves show_reasoning per-platform
- Config version 15 -> 16: migrates tool_progress_overrides into display.platforms
- Backward compat: legacy tool_progress_overrides still read as fallback
- 27 new tests for resolver, normalization, migration, backward compat
- Updated verbose command tests for per-platform behavior

Addresses community request for per-channel verbosity control (Guillaume Meyer,
Nathan Danielsen) — high verbosity on backchannel Telegram, low on customer-facing
Slack, none on email.
											
										
										
											2026-04-11 17:20:34 -07:00
+								            _tpl = resolve_display_setting(user_config, platform_key, "tool_preview_length", 0)
-												feat(display): configurable tool preview length -- show full paths by default (#3841)

Tool call previews (paths, commands, queries) were hardcoded to truncate
at 35-40 chars across CLI spinners, completion lines, and gateway progress
messages. Users could not see full file paths in tool output.

New config option: display.tool_preview_length (default 0 = no limit).
Set a positive number to truncate at that length.

Changes:
- display.py: module-level _tool_preview_max_len with getter/setter;
  build_tool_preview() and get_cute_tool_message() _trunc/_path respect it
- cli.py: reads config at startup, spinner widget respects config
- gateway/run.py: reads config per-message, progress callback respects config
- run_agent.py: removed redundant 30-char quiet-mode spinner truncation
- config.py: added display.tool_preview_length to DEFAULT_CONFIG

Reported by kriskaminski
											
										
										
											2026-03-29 18:02:42 -07:00
+								            set_tool_preview_max_len(int(_tpl) if _tpl else 0)
 								        except Exception:
 								            pass
-												feat: per-platform display verbosity configuration (#8006)

Add display.platforms section to config.yaml for per-platform overrides of
display settings (tool_progress, show_reasoning, streaming, tool_preview_length).

Each platform gets sensible built-in defaults based on capability tier:
- High (telegram, discord): tool_progress=all, streaming follows global
- Medium (slack, mattermost, matrix, feishu): tool_progress=new
- Low (signal, whatsapp, bluebubbles, wecom, etc.): tool_progress=off, streaming=false
- Minimal (email, sms, webhook, homeassistant): tool_progress=off, streaming=false

Example config:
  display:
    platforms:
      telegram:
        tool_progress: all
        show_reasoning: true
      slack:
        tool_progress: off

Resolution order: platform override > global setting > built-in platform default.

Changes:
- New gateway/display_config.py: resolver module with tier-based platform defaults
- gateway/run.py: tool_progress, tool_preview_length, streaming, show_reasoning
  all resolve per-platform via the new resolver
- /verbose command: now cycles tool_progress per-platform (saves to
  display.platforms.<platform>.tool_progress instead of global)
- /reasoning show|hide: now saves show_reasoning per-platform
- Config version 15 -> 16: migrates tool_progress_overrides into display.platforms
- Backward compat: legacy tool_progress_overrides still read as fallback
- 27 new tests for resolver, normalization, migration, backward compat
- Updated verbose command tests for per-platform behavior

Addresses community request for per-channel verbosity control (Guillaume Meyer,
Nathan Danielsen) — high verbosity on backchannel Telegram, low on customer-facing
Slack, none on email.
											
										
										
											2026-04-11 17:20:34 -07:00
+								        # Tool progress mode — resolved per-platform with env var fallback
 								        _resolved_tp = resolve_display_setting(user_config, platform_key, "tool_progress")
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        progress_mode = (
-												feat: per-platform display verbosity configuration (#8006)

Add display.platforms section to config.yaml for per-platform overrides of
display settings (tool_progress, show_reasoning, streaming, tool_preview_length).

Each platform gets sensible built-in defaults based on capability tier:
- High (telegram, discord): tool_progress=all, streaming follows global
- Medium (slack, mattermost, matrix, feishu): tool_progress=new
- Low (signal, whatsapp, bluebubbles, wecom, etc.): tool_progress=off, streaming=false
- Minimal (email, sms, webhook, homeassistant): tool_progress=off, streaming=false

Example config:
  display:
    platforms:
      telegram:
        tool_progress: all
        show_reasoning: true
      slack:
        tool_progress: off

Resolution order: platform override > global setting > built-in platform default.

Changes:
- New gateway/display_config.py: resolver module with tier-based platform defaults
- gateway/run.py: tool_progress, tool_preview_length, streaming, show_reasoning
  all resolve per-platform via the new resolver
- /verbose command: now cycles tool_progress per-platform (saves to
  display.platforms.<platform>.tool_progress instead of global)
- /reasoning show|hide: now saves show_reasoning per-platform
- Config version 15 -> 16: migrates tool_progress_overrides into display.platforms
- Backward compat: legacy tool_progress_overrides still read as fallback
- 27 new tests for resolver, normalization, migration, backward compat
- Updated verbose command tests for per-platform behavior

Addresses community request for per-channel verbosity control (Guillaume Meyer,
Nathan Danielsen) — high verbosity on backchannel Telegram, low on customer-facing
Slack, none on email.
											
										
										
											2026-04-11 17:20:34 -07:00
+								            _resolved_tp
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            or os.getenv("HERMES_TOOL_PROGRESS_MODE")
 								            or "all"
 								        )
-												fix: webhook platform support — skip home channel prompt, disable tool progress (salvage #4363) (#4660)

Cherry-picked from PR #4363 by @bennyhodl with follow-up fixes:

- Skip 'No home channel' prompt for webhook platform (webhooks deliver
  to configured targets, not a home channel)
- Disable tool progress for webhooks (no message editing support)
- Add webhook to PLATFORMS in tools_config.py and skills_config.py
- Add hermes-webhook toolset to toolsets.py + hermes-gateway includes
- Removed overly aggressive <50 char content filter that blocked
  legitimate short responses (tool progress already handled at source)

Co-authored-by: bennyhodl <bennyhodl@users.noreply.github.com>
											
										
										
											2026-04-02 14:00:22 -07:00
+								        # Disable tool progress for webhooks - they don't support message editing,
 								        # so each progress line would be sent as a separate message.
 								        from gateway.config import Platform
 								        tool_progress_enabled = progress_mode != "off" and source.platform != Platform.WEBHOOK
-												feat(gateway): surface natural mid-turn assistant messages in chat platforms

Add display.interim_assistant_messages config (enabled by default) that
forwards completed assistant commentary between tool calls to the user
as separate chat messages. Models already emit useful status text like
'I'll inspect the repo first.' — this surfaces it on Telegram, Discord,
and other messaging platforms instead of swallowing it.

Independent from tool_progress and gateway streaming. Disabled for
webhooks. Uses GatewayStreamConsumer when available, falls back to
direct adapter send. Tracks response_previewed to prevent double-delivery
when interim message matches the final response.

Also fixes: cursor not stripped from fallback prefix in stream consumer
(affected continuation calculation on no-edit platforms like Signal).

Cherry-picked from PR #7885 by asheriif, default changed to enabled.
Fixes #5016

											
										
										
											2026-04-11 16:03:52 -07:00
+								        # Natural assistant status messages are intentionally independent from
 								        # tool progress and token streaming. Users can keep tool_progress quiet
 								        # in chat platforms while opting into concise mid-turn updates.
 								        interim_assistant_messages_enabled = (
 								            source.platform != Platform.WEBHOOK
 								            and is_truthy_value(
 								                display_config.get("interim_assistant_messages"),
 								                default=True,
 								            )
 								        )
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
 								        # Queue for progress messages (thread-safe)
 								        progress_queue = queue.Queue() if tool_progress_enabled else None
 								        last_tool = [None]  # Mutable container for tracking in closure
 								        last_progress_msg = [None]  # Track last message for dedup
 								        repeat_count = [0]  # How many times the same message repeated
-												feat(api): structured run events via /v1/runs SSE endpoint

Add POST /v1/runs to start async agent runs and GET /v1/runs/{run_id}/events
for SSE streaming of typed lifecycle events (tool.started, tool.completed,
message.delta, reasoning.available, run.completed, run.failed).

Changes the internal tool_progress_callback signature from positional
(tool_name, preview, args) to event-type-first
(event_type, tool_name, preview, args, **kwargs). Existing consumers
filter on event_type and remain backward-compatible.

Adds concurrency limit (_MAX_CONCURRENT_RUNS=10) and orphaned run sweep.

Fixes logic inversion in cli.py _on_tool_progress where the original PR
would have displayed internal tools instead of non-internal ones.

Co-authored-by: Mibayy <mibayy@users.noreply.github.com>

											
										
										
											2026-04-05 11:52:46 -07:00
+								        def progress_callback(event_type: str, tool_name: str = None, preview: str = None, args: dict = None, **kwargs):
 								            """Callback invoked by agent on tool lifecycle events."""
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            if not progress_queue:
 								                return
-												feat(api): structured run events via /v1/runs SSE endpoint

Add POST /v1/runs to start async agent runs and GET /v1/runs/{run_id}/events
for SSE streaming of typed lifecycle events (tool.started, tool.completed,
message.delta, reasoning.available, run.completed, run.failed).

Changes the internal tool_progress_callback signature from positional
(tool_name, preview, args) to event-type-first
(event_type, tool_name, preview, args, **kwargs). Existing consumers
filter on event_type and remain backward-compatible.

Adds concurrency limit (_MAX_CONCURRENT_RUNS=10) and orphaned run sweep.

Fixes logic inversion in cli.py _on_tool_progress where the original PR
would have displayed internal tools instead of non-internal ones.

Co-authored-by: Mibayy <mibayy@users.noreply.github.com>

											
										
										
											2026-04-05 11:52:46 -07:00
 								            # Only act on tool.started events (ignore tool.completed, reasoning.available, etc.)
 								            if event_type not in ("tool.started",):
 								                return
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            # "new" mode: only report when tool changes
 								            if progress_mode == "new" and tool_name == last_tool[0]:
 								                return
 								            last_tool[0] = tool_name
 								            # Build progress message with primary argument preview
-												feat(tools): centralize tool emoji metadata in registry + skin integration

- Add 'emoji' field to ToolEntry and 'get_emoji()' to ToolRegistry
- Add emoji= to all 50+ registry.register() calls across tool files
- Add get_tool_emoji() helper in agent/display.py with 3-tier resolution:
  skin override → registry default → hardcoded fallback
- Replace hardcoded emoji maps in run_agent.py, delegate_tool.py, and
  gateway/run.py with centralized get_tool_emoji() calls
- Add 'tool_emojis' field to SkinConfig so skins can override per-tool
  emojis (e.g. ares skin could use swords instead of wrenches)
- Add 11 tests (5 registry emoji, 6 display/skin integration)
- Update AGENTS.md skin docs table

Based on the approach from PR #1061 by ForgingAlex (emoji centralization
in registry). This salvage fixes several issues from the original:
- Does NOT split the cronjob tool (which would crash on missing schemas)
- Does NOT change image_generate toolset/requires_env/is_async
- Does NOT delete existing tests
- Completes the centralization (gateway/run.py was missed)
- Hooks into the skin system for full customizability

											
										
										
											2026-03-15 20:21:21 -07:00
+								            from agent.display import get_tool_emoji
 								            emoji = get_tool_emoji(tool_name, default="⚙️")
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
-												fix(gateway): restore short preview truncation for all/new tool progress modes (#4935)

The tool_preview_length: 0 (unlimited) config change from e314833c
removed truncation from gateway progress messages in all/new modes.
This caused full terminal commands, code blocks, and file paths to
appear as permanent messages in Telegram -- the old 40-char truncation
was the correct behavior for messaging platforms.

Now:
- all/new modes: always truncate previews to 40 chars (old behavior)
- verbose mode: respects tool_preview_length config for JSON args cap

Reported by Paulclgro and socialsurfer on Discord.
											
										
										
											2026-04-03 20:32:01 -07:00
+								            # Verbose mode: show detailed arguments, respects tool_preview_length
 								            if progress_mode == "verbose":
 								                if args:
 								                    from agent.display import get_tool_preview_max_len
 								                    _pl = get_tool_preview_max_len()
 								                    import json as _json
 								                    args_str = _json.dumps(args, ensure_ascii=False, default=str)
-												fix(gateway): verbose tool progress no longer truncates args when tool_preview_length is 0 (#8735)

When tool_preview_length is 0 (default for platforms without a tier
default, like Session), verbose mode was truncating args JSON to 200
characters.  Since the user explicitly opted into verbose mode, they
expect full tool call detail — the 200-char cap defeated the purpose.

Now: tool_preview_length=0 means no truncation in verbose mode.
Positive values still cap as before.  Platform message-length limits
handle overflow naturally.
											
										
										
											2026-04-12 20:05:12 -07:00
+								                    # When tool_preview_length is 0 (default), don't truncate
 								                    # in verbose mode — the user explicitly asked for full
 								                    # detail.  Platform message-length limits handle the rest.
 								                    if _pl > 0 and len(args_str) > _pl:
 								                        args_str = args_str[:_pl - 3] + "..."
-												fix(gateway): restore short preview truncation for all/new tool progress modes (#4935)

The tool_preview_length: 0 (unlimited) config change from e314833c
removed truncation from gateway progress messages in all/new modes.
This caused full terminal commands, code blocks, and file paths to
appear as permanent messages in Telegram -- the old 40-char truncation
was the correct behavior for messaging platforms.

Now:
- all/new modes: always truncate previews to 40 chars (old behavior)
- verbose mode: respects tool_preview_length config for JSON args cap

Reported by Paulclgro and socialsurfer on Discord.
											
										
										
											2026-04-03 20:32:01 -07:00
+								                    msg = f"{emoji} {tool_name}({list(args.keys())})\n{args_str}"
 								                elif preview:
 								                    msg = f"{emoji} {tool_name}: \"{preview}\""
 								                else:
 								                    msg = f"{emoji} {tool_name}..."
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                progress_queue.put(msg)
 								                return
-												fix(gateway): respect tool_preview_length in all/new progress modes (#5937)

Previously, all/new tool progress modes always hard-truncated previews
to 40 chars, ignoring the display.tool_preview_length config. This made
it impossible for gateway users to see meaningful command/path info
without switching to verbose mode (which shows too much detail).

Now all/new modes read tool_preview_length from config:
- tool_preview_length: 0 (default/unset) → 40 chars (no regression)
- tool_preview_length: 120 → 120-char previews in all/new mode
- verbose mode: unchanged (already respected the config)

Users who want longer previews can set:
  display:
    tool_preview_length: 120

Reported by demontut_ on Discord.
											
										
										
											2026-04-07 14:10:56 -07:00
+								            # "all" / "new" modes: short preview, respects tool_preview_length
 								            # config (defaults to 40 chars when unset to keep gateway messages
 								            # compact — unlike CLI spinners, these persist as permanent messages).
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            if preview:
-												fix(gateway): respect tool_preview_length in all/new progress modes (#5937)

Previously, all/new tool progress modes always hard-truncated previews
to 40 chars, ignoring the display.tool_preview_length config. This made
it impossible for gateway users to see meaningful command/path info
without switching to verbose mode (which shows too much detail).

Now all/new modes read tool_preview_length from config:
- tool_preview_length: 0 (default/unset) → 40 chars (no regression)
- tool_preview_length: 120 → 120-char previews in all/new mode
- verbose mode: unchanged (already respected the config)

Users who want longer previews can set:
  display:
    tool_preview_length: 120

Reported by demontut_ on Discord.
											
										
										
											2026-04-07 14:10:56 -07:00
+								                from agent.display import get_tool_preview_max_len
 								                _pl = get_tool_preview_max_len()
 								                _cap = _pl if _pl > 0 else 40
 								                if len(preview) > _cap:
 								                    preview = preview[:_cap - 3] + "..."
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                msg = f"{emoji} {tool_name}: \"{preview}\""
 								            else:
 								                msg = f"{emoji} {tool_name}..."
 								            # Dedup: collapse consecutive identical progress messages.
 								            # Common with execute_code where models iterate with the same
 								            # code (same boilerplate imports → identical previews).
 								            if msg == last_progress_msg[0]:
 								                repeat_count[0] += 1
 								                # Update the last line in progress_lines with a counter
 								                # via a special "dedup" queue message.
 								                progress_queue.put(("__dedup__", msg, repeat_count[0]))
 								                return
 								            last_progress_msg[0] = msg
 								            repeat_count[0] = 0
 								            progress_queue.put(msg)
 								        # Background task to send progress messages
-												fix(gateway): scope progress thread fallback to Slack only (salvage #3414) (#3488)

* test(gateway): map fixture adapter by platform in progress threading tests

* fix(gateway): scope progress thread fallback to Slack only

---------

Co-authored-by: EmpireOperating <258363005+EmpireOperating@users.noreply.github.com>
											
										
										
											2026-03-27 22:37:53 -07:00
+								        # Accumulates tool lines into a single message that gets edited.
 								        #
 								        # Threading metadata is platform-specific:
 								        # - Slack DM threading needs event_message_id fallback (reply thread)
 								        # - Telegram uses message_thread_id only for forum topics; passing a
 								        #   normal DM/group message id as thread_id causes send failures
 								        # - Other platforms should use explicit source.thread_id only
 								        if source.platform == Platform.SLACK:
 								            _progress_thread_id = source.thread_id or event_message_id
 								        else:
 								            _progress_thread_id = source.thread_id
-												fix(gateway/slack): send progress messages to correct thread (#3063)

Co-authored-by: Jneeee <jneeee@outlook.com>
											
										
										
											2026-03-25 15:51:15 -07:00
+								        _progress_metadata = {"thread_id": _progress_thread_id} if _progress_thread_id else None
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
 								        async def send_progress_messages():
 								            if not progress_queue:
 								                return
 								            adapter = self.adapters.get(source.platform)
 								            if not adapter:
 								                return
-												feat(gateway): add BlueBubbles iMessage platform adapter (#6437)

Adds Apple iMessage as a gateway platform via BlueBubbles macOS server.

Architecture:
- Webhook-based inbound (event-driven, no polling/dedup needed)
- Email/phone → chat GUID resolution for user-friendly addressing
- Private API safety (checks helper_connected before tapback/typing)
- Inbound attachment downloading (images, audio, documents cached locally)
- Markdown stripping for clean iMessage delivery
- Smart progress suppression for platforms without message editing

Based on PR #5869 by @benjaminsehl (webhook architecture, GUID resolution,
Private API safety, progress suppression) with inbound attachment downloading
from PR #4588 by @1960697431 (attachment cache routing).

Integration points: Platform enum, env config, adapter factory, auth maps,
cron delivery, send_message routing, channel directory, platform hints,
toolset definition, setup wizard, status display.

27 tests covering config, adapter, webhook parsing, GUID resolution,
attachment download routing, toolset consistency, and prompt hints.
											
										
										
											2026-04-08 23:54:03 -07:00
+								            # Skip tool progress for platforms that don't support message
 								            # editing (e.g. iMessage/BlueBubbles) — each progress update
 								            # would become a separate message bubble, which is noisy.
 								            from gateway.platforms.base import BasePlatformAdapter as _BaseAdapter
 								            if type(adapter).edit_message is _BaseAdapter.edit_message:
 								                while not progress_queue.empty():
 								                    try:
 								                        progress_queue.get_nowait()
 								                    except Exception:
 								                        break
 								                return
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            progress_lines = []      # Accumulated tool lines
 								            progress_msg_id = None   # ID of the progress message to edit
 								            can_edit = True          # False once an edit fails (platform doesn't support it)
-												fix(gateway): race condition, photo media loss, and flood control in Telegram

Three bugs causing intermittent silent drops, partial responses, and
flood control delays on the Telegram platform:

1. Race condition in handle_message() — _active_sessions was set inside
   the background task, not before create_task(). Two rapid messages
   could both pass the guard and spawn duplicate processing tasks.
   Fix: set _active_sessions synchronously before spawning the task
   (grammY sequentialize / aiogram EventIsolation pattern).

2. Photo media loss on dequeue — when a photo (no caption) was queued
   during active processing and later dequeued, only .text was
   extracted. Empty text → message silently dropped.
   Fix: _build_media_placeholder() creates text context for media-only
   events so they survive the dequeue path.

3. Progress message edits triggered Telegram flood control — rapid tool
   calls edited the progress message every 0.3s, hitting Telegram's
   rate limit (23s+ waits). This blocked progress updates and could
   cause stream consumer timeouts.
   Fix: throttle edits to 1.5s minimum interval, detect flood control
   errors and gracefully degrade to new messages. edit_message() now
   returns failure for flood waits >5s instead of blocking.

											
										
										
											2026-04-02 16:32:21 +05:30
+								            _last_edit_ts = 0.0      # Throttle edits to avoid Telegram flood control
 								            _PROGRESS_EDIT_INTERVAL = 1.5  # Minimum seconds between edits
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
 								            while True:
 								                try:
 								                    raw = progress_queue.get_nowait()
-												fix(gateway): race condition, photo media loss, and flood control in Telegram

Three bugs causing intermittent silent drops, partial responses, and
flood control delays on the Telegram platform:

1. Race condition in handle_message() — _active_sessions was set inside
   the background task, not before create_task(). Two rapid messages
   could both pass the guard and spawn duplicate processing tasks.
   Fix: set _active_sessions synchronously before spawning the task
   (grammY sequentialize / aiogram EventIsolation pattern).

2. Photo media loss on dequeue — when a photo (no caption) was queued
   during active processing and later dequeued, only .text was
   extracted. Empty text → message silently dropped.
   Fix: _build_media_placeholder() creates text context for media-only
   events so they survive the dequeue path.

3. Progress message edits triggered Telegram flood control — rapid tool
   calls edited the progress message every 0.3s, hitting Telegram's
   rate limit (23s+ waits). This blocked progress updates and could
   cause stream consumer timeouts.
   Fix: throttle edits to 1.5s minimum interval, detect flood control
   errors and gracefully degrade to new messages. edit_message() now
   returns failure for flood waits >5s instead of blocking.

											
										
										
											2026-04-02 16:32:21 +05:30
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                    # Handle dedup messages: update last line with repeat counter
 								                    if isinstance(raw, tuple) and len(raw) == 3 and raw[0] == "__dedup__":
 								                        _, base_msg, count = raw
 								                        if progress_lines:
 								                            progress_lines[-1] = f"{base_msg} (×{count + 1})"
 								                        msg = progress_lines[-1] if progress_lines else base_msg
 								                    else:
 								                        msg = raw
 								                        progress_lines.append(msg)
-												fix(gateway): race condition, photo media loss, and flood control in Telegram

Three bugs causing intermittent silent drops, partial responses, and
flood control delays on the Telegram platform:

1. Race condition in handle_message() — _active_sessions was set inside
   the background task, not before create_task(). Two rapid messages
   could both pass the guard and spawn duplicate processing tasks.
   Fix: set _active_sessions synchronously before spawning the task
   (grammY sequentialize / aiogram EventIsolation pattern).

2. Photo media loss on dequeue — when a photo (no caption) was queued
   during active processing and later dequeued, only .text was
   extracted. Empty text → message silently dropped.
   Fix: _build_media_placeholder() creates text context for media-only
   events so they survive the dequeue path.

3. Progress message edits triggered Telegram flood control — rapid tool
   calls edited the progress message every 0.3s, hitting Telegram's
   rate limit (23s+ waits). This blocked progress updates and could
   cause stream consumer timeouts.
   Fix: throttle edits to 1.5s minimum interval, detect flood control
   errors and gracefully degrade to new messages. edit_message() now
   returns failure for flood waits >5s instead of blocking.

											
										
										
											2026-04-02 16:32:21 +05:30
+								                    # Throttle edits: batch rapid tool updates into fewer
 								                    # API calls to avoid hitting Telegram flood control.
 								                    # (grammY auto-retry pattern: proactively rate-limit
 								                    # instead of reacting to 429s.)
 								                    _now = time.monotonic()
-												refactor: simplify and harden PR fixes after review

- Fix cron ThreadPoolExecutor blocking on timeout: use shutdown(wait=False,
  cancel_futures=True) instead of context manager that waits indefinitely
- Extract _dequeue_pending_text() to deduplicate media-placeholder logic
  in interrupt and normal-completion dequeue paths
- Remove hasattr guards for _running_agents_ts: add class-level default
  so partial test construction works without scattered defensive checks
- Move `import concurrent.futures` to top of cron/scheduler.py
- Progress throttle: sleep remaining interval instead of busy-looping
  0.1s (~15 wakeups per 1.5s window → 1 wakeup)
- Deduplicate _load_stt_config() in transcription_tools.py:
  _has_openai_audio_backend() now delegates to _resolve_openai_audio_client_config()

											
										
										
											2026-04-02 23:41:38 +05:30
+								                    _remaining = _PROGRESS_EDIT_INTERVAL - (_now - _last_edit_ts)
 								                    if _remaining > 0:
-												fix: move class-level attribute after docstring, clarify throttle comment

Follow-up nits for salvaged PR #4577:
- Move _running_agents_ts class attribute below the docstring so
  GatewayRunner.__doc__ is preserved.
- Add clarifying comment explaining the throttle continue behavior
  (batches queued messages during the throttle interval).

											
										
										
											2026-04-03 00:45:31 -07:00
+								                        # Wait out the throttle interval, then loop back to
 								                        # drain any additional queued messages before sending
 								                        # a single batched edit.
-												refactor: simplify and harden PR fixes after review

- Fix cron ThreadPoolExecutor blocking on timeout: use shutdown(wait=False,
  cancel_futures=True) instead of context manager that waits indefinitely
- Extract _dequeue_pending_text() to deduplicate media-placeholder logic
  in interrupt and normal-completion dequeue paths
- Remove hasattr guards for _running_agents_ts: add class-level default
  so partial test construction works without scattered defensive checks
- Move `import concurrent.futures` to top of cron/scheduler.py
- Progress throttle: sleep remaining interval instead of busy-looping
  0.1s (~15 wakeups per 1.5s window → 1 wakeup)
- Deduplicate _load_stt_config() in transcription_tools.py:
  _has_openai_audio_backend() now delegates to _resolve_openai_audio_client_config()

											
										
										
											2026-04-02 23:41:38 +05:30
+								                        await asyncio.sleep(_remaining)
-												fix(gateway): race condition, photo media loss, and flood control in Telegram

Three bugs causing intermittent silent drops, partial responses, and
flood control delays on the Telegram platform:

1. Race condition in handle_message() — _active_sessions was set inside
   the background task, not before create_task(). Two rapid messages
   could both pass the guard and spawn duplicate processing tasks.
   Fix: set _active_sessions synchronously before spawning the task
   (grammY sequentialize / aiogram EventIsolation pattern).

2. Photo media loss on dequeue — when a photo (no caption) was queued
   during active processing and later dequeued, only .text was
   extracted. Empty text → message silently dropped.
   Fix: _build_media_placeholder() creates text context for media-only
   events so they survive the dequeue path.

3. Progress message edits triggered Telegram flood control — rapid tool
   calls edited the progress message every 0.3s, hitting Telegram's
   rate limit (23s+ waits). This blocked progress updates and could
   cause stream consumer timeouts.
   Fix: throttle edits to 1.5s minimum interval, detect flood control
   errors and gracefully degrade to new messages. edit_message() now
   returns failure for flood waits >5s instead of blocking.

											
										
										
											2026-04-02 16:32:21 +05:30
+								                        continue
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                    if can_edit and progress_msg_id is not None:
 								                        # Try to edit the existing progress message
 								                        full_text = "\n".join(progress_lines)
 								                        result = await adapter.edit_message(
 								                            chat_id=source.chat_id,
 								                            message_id=progress_msg_id,
 								                            content=full_text,
 								                        )
 								                        if not result.success:
-												fix(gateway): race condition, photo media loss, and flood control in Telegram

Three bugs causing intermittent silent drops, partial responses, and
flood control delays on the Telegram platform:

1. Race condition in handle_message() — _active_sessions was set inside
   the background task, not before create_task(). Two rapid messages
   could both pass the guard and spawn duplicate processing tasks.
   Fix: set _active_sessions synchronously before spawning the task
   (grammY sequentialize / aiogram EventIsolation pattern).

2. Photo media loss on dequeue — when a photo (no caption) was queued
   during active processing and later dequeued, only .text was
   extracted. Empty text → message silently dropped.
   Fix: _build_media_placeholder() creates text context for media-only
   events so they survive the dequeue path.

3. Progress message edits triggered Telegram flood control — rapid tool
   calls edited the progress message every 0.3s, hitting Telegram's
   rate limit (23s+ waits). This blocked progress updates and could
   cause stream consumer timeouts.
   Fix: throttle edits to 1.5s minimum interval, detect flood control
   errors and gracefully degrade to new messages. edit_message() now
   returns failure for flood waits >5s instead of blocking.

											
										
										
											2026-04-02 16:32:21 +05:30
+								                            _err = (getattr(result, "error", "") or "").lower()
 								                            if "flood" in _err or "retry after" in _err:
 								                                # Flood control hit — disable further edits,
 								                                # switch to sending new messages only for
 								                                # important updates.  Don't block 23s.
 								                                logger.info(
 								                                    "[%s] Progress edits disabled due to flood control",
 								                                    adapter.name,
 								                                )
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                            can_edit = False
 								                            await adapter.send(chat_id=source.chat_id, content=msg, metadata=_progress_metadata)
 								                    else:
 								                        if can_edit:
 								                            # First tool: send all accumulated text as new message
 								                            full_text = "\n".join(progress_lines)
 								                            result = await adapter.send(chat_id=source.chat_id, content=full_text, metadata=_progress_metadata)
 								                        else:
 								                            # Editing unsupported: send just this line
 								                            result = await adapter.send(chat_id=source.chat_id, content=msg, metadata=_progress_metadata)
 								                        if result.success and result.message_id:
 								                            progress_msg_id = result.message_id
-												fix(gateway): race condition, photo media loss, and flood control in Telegram

Three bugs causing intermittent silent drops, partial responses, and
flood control delays on the Telegram platform:

1. Race condition in handle_message() — _active_sessions was set inside
   the background task, not before create_task(). Two rapid messages
   could both pass the guard and spawn duplicate processing tasks.
   Fix: set _active_sessions synchronously before spawning the task
   (grammY sequentialize / aiogram EventIsolation pattern).

2. Photo media loss on dequeue — when a photo (no caption) was queued
   during active processing and later dequeued, only .text was
   extracted. Empty text → message silently dropped.
   Fix: _build_media_placeholder() creates text context for media-only
   events so they survive the dequeue path.

3. Progress message edits triggered Telegram flood control — rapid tool
   calls edited the progress message every 0.3s, hitting Telegram's
   rate limit (23s+ waits). This blocked progress updates and could
   cause stream consumer timeouts.
   Fix: throttle edits to 1.5s minimum interval, detect flood control
   errors and gracefully degrade to new messages. edit_message() now
   returns failure for flood waits >5s instead of blocking.

											
										
										
											2026-04-02 16:32:21 +05:30
+								                    _last_edit_ts = time.monotonic()
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                    # Restore typing indicator
 								                    await asyncio.sleep(0.3)
 								                    await adapter.send_typing(source.chat_id, metadata=_progress_metadata)
 								                except queue.Empty:
 								                    await asyncio.sleep(0.3)
 								                except asyncio.CancelledError:
 								                    # Drain remaining queued messages
 								                    while not progress_queue.empty():
 								                        try:
 								                            raw = progress_queue.get_nowait()
 								                            if isinstance(raw, tuple) and len(raw) == 3 and raw[0] == "__dedup__":
 								                                _, base_msg, count = raw
 								                                if progress_lines:
 								                                    progress_lines[-1] = f"{base_msg} (×{count + 1})"
 								                            else:
 								                                progress_lines.append(raw)
 								                        except Exception:
 								                            break
 								                    # Final edit with all remaining tools (only if editing works)
 								                    if can_edit and progress_lines and progress_msg_id:
 								                        full_text = "\n".join(progress_lines)
 								                        try:
 								                            await adapter.edit_message(
 								                                chat_id=source.chat_id,
 								                                message_id=progress_msg_id,
 								                                content=full_text,
 								                            )
 								                        except Exception:
 								                            pass
 								                    return
 								                except Exception as e:
 								                    logger.error("Progress message error: %s", e)
 								                    await asyncio.sleep(1)
 								        # We need to share the agent instance for interrupt support
 								        agent_holder = [None]  # Mutable container for the agent instance
 								        result_holder = [None]  # Mutable container for the result
 								        tools_holder = [None]   # Mutable container for the tool definitions
-												feat(gateway): streaming token delivery — StreamingConfig, GatewayStreamConsumer, already_sent

Stage 3 of streaming support. Gateway now streams tokens to messaging platforms:

- StreamingConfig dataclass (enabled, transport, edit_interval, buffer_threshold, cursor)
  on GatewayConfig with from_dict/to_dict serialization
- GatewayStreamConsumer: async queue-based consumer that progressively edits
  a single message on the target platform (edit transport)
- on_delta() → queue → run() async task → send_or_edit() with rate limiting
- already_sent propagation: when streaming delivered the response, handler
  returns None so base adapter skips duplicate send()
- stream_delta_callback wired into AIAgent constructor in _run_agent
- Consumer lifecycle: started as asyncio task, awaited with timeout in finally

Config (config.yaml):
  streaming:
    enabled: true
    transport: edit      # progressive editMessageText
    edit_interval: 0.3   # seconds between edits
    buffer_threshold: 40 # chars before forcing flush
    cursor: ' ▉'

Credit: jobless0x (#774, #1312), OutThisLife (#798), clicksingh (#697).

											
										
										
											2026-03-16 05:52:42 -07:00
+								        stream_consumer_holder = [None]  # Mutable container for stream consumer
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
 								        # Bridge sync step_callback → async hooks.emit for agent:step events
-												chore(gateway): replace deprecated asyncio.get_event_loop() with get_running_loop() (#11005)

All 10 call sites in gateway/run.py and gateway/platforms/api_server.py
are inside async functions where a loop is guaranteed to be running.

get_event_loop() is deprecated since Python 3.10 — it can silently
create a new loop when none is running, masking bugs.
get_running_loop() raises RuntimeError instead, which is safer.

Surfaced during review of PRs #10533 and #10647.

Co-authored-by: kshitijk4poor <kshitijk4poor@users.noreply.github.com>
											
										
										
											2026-04-16 05:13:39 -07:00
+								        _loop_for_step = asyncio.get_running_loop()
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        _hooks_ref = self.hooks
-												fix(gateway): normalize step_callback prev_tools for backward compat

The PR changed prev_tools from list[str] to list[dict] with name/result
keys.  The gateway's _step_callback_sync passed this directly to hooks
as 'tool_names', breaking user-authored hooks that call
', '.join(tool_names).

Now:
- 'tool_names' always contains strings (backward-compatible)
- 'tools' carries the enriched dicts for hooks that want results

Also adds summary logging to register_mcp_servers() and comprehensive
tests for all three PR changes:
- sanitize_mcp_name_component edge cases
- register_mcp_servers public API
- _register_session_mcp_servers ACP integration
- step_callback result forwarding
- gateway normalization backward compat

											
										
										
											2026-04-02 16:59:13 -07:00
+								        def _step_callback_sync(iteration: int, prev_tools: list) -> None:
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            try:
-												fix(gateway): normalize step_callback prev_tools for backward compat

The PR changed prev_tools from list[str] to list[dict] with name/result
keys.  The gateway's _step_callback_sync passed this directly to hooks
as 'tool_names', breaking user-authored hooks that call
', '.join(tool_names).

Now:
- 'tool_names' always contains strings (backward-compatible)
- 'tools' carries the enriched dicts for hooks that want results

Also adds summary logging to register_mcp_servers() and comprehensive
tests for all three PR changes:
- sanitize_mcp_name_component edge cases
- register_mcp_servers public API
- _register_session_mcp_servers ACP integration
- step_callback result forwarding
- gateway normalization backward compat

											
										
										
											2026-04-02 16:59:13 -07:00
+								                # prev_tools may be list[str] or list[dict] with "name"/"result"
 								                # keys.  Normalise to keep "tool_names" backward-compatible for
 								                # user-authored hooks that do ', '.join(tool_names)'.
 								                _names: list[str] = []
 								                for _t in (prev_tools or []):
 								                    if isinstance(_t, dict):
 								                        _names.append(_t.get("name") or "")
 								                    else:
 								                        _names.append(str(_t))
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                asyncio.run_coroutine_threadsafe(
 								                    _hooks_ref.emit("agent:step", {
 								                        "platform": source.platform.value if source.platform else "",
 								                        "user_id": source.user_id,
 								                        "session_id": session_id,
 								                        "iteration": iteration,
-												fix(gateway): normalize step_callback prev_tools for backward compat

The PR changed prev_tools from list[str] to list[dict] with name/result
keys.  The gateway's _step_callback_sync passed this directly to hooks
as 'tool_names', breaking user-authored hooks that call
', '.join(tool_names).

Now:
- 'tool_names' always contains strings (backward-compatible)
- 'tools' carries the enriched dicts for hooks that want results

Also adds summary logging to register_mcp_servers() and comprehensive
tests for all three PR changes:
- sanitize_mcp_name_component edge cases
- register_mcp_servers public API
- _register_session_mcp_servers ACP integration
- step_callback result forwarding
- gateway normalization backward compat

											
										
										
											2026-04-02 16:59:13 -07:00
+								                        "tool_names": _names,
 								                        "tools": prev_tools,
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                    }),
 								                    _loop_for_step,
 								                )
 								            except Exception as _e:
 								                logger.debug("agent:step hook error: %s", _e)
-												feat: context pressure warnings for CLI and gateway (#2159)

* feat: context pressure warnings for CLI and gateway

User-facing notifications as context approaches the compaction threshold.
Warnings fire at 60% and 85% of the way to compaction — relative to
the configured compression threshold, not the raw context window.

CLI: Formatted line with a progress bar showing distance to compaction.
Cyan at 60% (approaching), bold yellow at 85% (imminent).

  ◐ context ▰▰▰▰▰▰▰▰▰▰▰▰▱▱▱▱▱▱▱▱ 60% to compaction  100k threshold (50%) · approaching compaction
  ⚠ context ▰▰▰▰▰▰▰▰▰▰▰▰▰▰▰▰▰▱▱▱ 85% to compaction  100k threshold (50%) · compaction imminent

Gateway: Plain-text notification sent to the user's chat via the new
status_callback mechanism (asyncio.run_coroutine_threadsafe bridge,
same pattern as step_callback).

Does NOT inject into the message stream. The LLM never sees these
warnings. Flags reset after each compaction cycle.

Files changed:
- agent/display.py — format_context_pressure(), format_context_pressure_gateway()
- run_agent.py — status_callback param, _context_50/70_warned flags,
  _emit_context_pressure(), flag reset in _compress_context()
- gateway/run.py — _status_callback_sync bridge, wired to AIAgent
- tests/test_context_pressure.py — 23 tests

* Merge remote-tracking branch 'origin/main' into hermes/hermes-7ea545bf

---------

Co-authored-by: Test <test@test.com>
											
										
										
											2026-03-20 08:37:36 -07:00
+								        # Bridge sync status_callback → async adapter.send for context pressure
 								        _status_adapter = self.adapters.get(source.platform)
 								        _status_chat_id = source.chat_id
-												fix(gateway/slack): send progress messages to correct thread (#3063)

Co-authored-by: Jneeee <jneeee@outlook.com>
											
										
										
											2026-03-25 15:51:15 -07:00
+								        _status_thread_metadata = {"thread_id": _progress_thread_id} if _progress_thread_id else None
-												feat: context pressure warnings for CLI and gateway (#2159)

* feat: context pressure warnings for CLI and gateway

User-facing notifications as context approaches the compaction threshold.
Warnings fire at 60% and 85% of the way to compaction — relative to
the configured compression threshold, not the raw context window.

CLI: Formatted line with a progress bar showing distance to compaction.
Cyan at 60% (approaching), bold yellow at 85% (imminent).

  ◐ context ▰▰▰▰▰▰▰▰▰▰▰▰▱▱▱▱▱▱▱▱ 60% to compaction  100k threshold (50%) · approaching compaction
  ⚠ context ▰▰▰▰▰▰▰▰▰▰▰▰▰▰▰▰▰▱▱▱ 85% to compaction  100k threshold (50%) · compaction imminent

Gateway: Plain-text notification sent to the user's chat via the new
status_callback mechanism (asyncio.run_coroutine_threadsafe bridge,
same pattern as step_callback).

Does NOT inject into the message stream. The LLM never sees these
warnings. Flags reset after each compaction cycle.

Files changed:
- agent/display.py — format_context_pressure(), format_context_pressure_gateway()
- run_agent.py — status_callback param, _context_50/70_warned flags,
  _emit_context_pressure(), flag reset in _compress_context()
- gateway/run.py — _status_callback_sync bridge, wired to AIAgent
- tests/test_context_pressure.py — 23 tests

* Merge remote-tracking branch 'origin/main' into hermes/hermes-7ea545bf

---------

Co-authored-by: Test <test@test.com>
											
										
										
											2026-03-20 08:37:36 -07:00
 								        def _status_callback_sync(event_type: str, message: str) -> None:
 								            if not _status_adapter:
 								                return
 								            try:
 								                asyncio.run_coroutine_threadsafe(
 								                    _status_adapter.send(
 								                        _status_chat_id,
 								                        message,
 								                        metadata=_status_thread_metadata,
 								                    ),
 								                    _loop_for_step,
 								                )
 								            except Exception as _e:
 								                logger.debug("status_callback error (%s): %s", event_type, _e)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        def run_sync():
-												fix: follow-up fixes for salvaged PRs

- Fix GatewayApp → GatewayRunner import in api_server.py (PR #4976)
- Update launchd test assertions for new bootstrap/bootout/kickstart commands (PR #4892)
- Add nonlocal message declaration in run_sync() to fix UnboundLocalError (pre-existing scoping bug)

											
										
										
											2026-04-05 11:46:06 -07:00
+								            # The conditional re-assignment of `message` further below
 								            # (prepending model-switch notes) makes Python treat it as a
 								            # local variable in the entire function.  `nonlocal` lets us
 								            # read *and* reassign the outer `_run_agent` parameter without
 								            # triggering an UnboundLocalError on the earlier read at
 								            # `_resolve_turn_agent_config(message, …)`.
 								            nonlocal message
-												fix(gateway): add HERMES_SESSION_KEY to session_context contextvars

Complete the contextvars migration by adding HERMES_SESSION_KEY to the
unified _VAR_MAP in session_context.py. Without this, concurrent gateway
handlers race on os.environ["HERMES_SESSION_KEY"].

- Add _SESSION_KEY ContextVar to _VAR_MAP, set_session_vars(), clear_session_vars()
- Wire session_key through _set_session_env() from SessionContext
- Replace os.getenv fallback in tools/approval.py with get_session_env()
  (function-level import to avoid cross-layer coupling)
- Keep os.environ set as CLI/cron fallback

Cherry-picked from PR #7878 by 0xbyt4.

											
										
										
											2026-04-11 15:28:41 -07:00
+								            # session_key is now set via contextvars in _set_session_env()
 								            # (concurrency-safe). Keep os.environ as fallback for CLI/cron.
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            os.environ["HERMES_SESSION_KEY"] = session_key or ""
 								            # Read from env var or use default (same as CLI)
 								            max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "90"))
 								            # Map platform enum to the platform hint key the agent understands.
 								            # Platform.LOCAL ("local") maps to "cli"; others pass through as-is.
 								            platform_key = "cli" if source.platform == Platform.LOCAL else source.platform.value
-												feat(discord): add channel_prompts config

Add native Discord channel_prompts support with parent forum fallback,
ephemeral runtime injection, config migration updates, docs, and tests.

											
										
										
											2026-04-13 15:57:03 -07:00
+								            # Combine platform context, per-channel context, and the user-configured
 								            # ephemeral system prompt.
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            combined_ephemeral = context_prompt or ""
-												feat(discord): add channel_prompts config

Add native Discord channel_prompts support with parent forum fallback,
ephemeral runtime injection, config migration updates, docs, and tests.

											
										
										
											2026-04-13 15:57:03 -07:00
+								            event_channel_prompt = (channel_prompt or "").strip()
 								            if event_channel_prompt:
 								                combined_ephemeral = (combined_ephemeral + "\n\n" + event_channel_prompt).strip()
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            if self._ephemeral_system_prompt:
 								                combined_ephemeral = (combined_ephemeral + "\n\n" + self._ephemeral_system_prompt).strip()
 								            # Re-read .env and config for fresh credentials (gateway is long-lived,
 								            # keys may change without restart).
 								            try:
 								                load_dotenv(_env_path, override=True, encoding="utf-8")
 								            except UnicodeDecodeError:
 								                load_dotenv(_env_path, override=True, encoding="latin-1")
 								            except Exception:
 								                pass
 								            try:
-												fix: honor session-scoped gateway model overrides

											
										
										
											2026-04-11 04:24:45 -05:00
+								                model, runtime_kwargs = self._resolve_session_agent_runtime(
 								                    source=source,
 								                    session_key=session_key,
 								                    user_config=user_config,
 								                )
-												fix(gateway): evict cached agent on /model switch + add diagnostic logging (#8276)

After /model switches the model (both picker and text paths), the cached
agent's config signature becomes stale — the agent was updated in-place
via switch_model() but the cache tuple's signature was never refreshed.
The next turn *should* detect the signature mismatch and create a fresh
agent, but this relies on the new model's signature differing from the
old one in _agent_config_signature().

Evicting the cached agent explicitly after storing the session override
is more defensive — the next turn is guaranteed to create a fresh agent
from the override without depending on signature mismatch detection.

Also adds debug logging at three key decision points so we can trace
exactly what happens when /model + /retry interact:
- _resolve_session_agent_runtime: which override path is taken (fast
  with api_key vs fallback), or why no override was found
- _run_agent.run_sync: final resolved model/provider before agent
  creation

Reported: /model switch to xiaomi/mimo-v2-pro followed by /retry still
used the old model (glm-5.1).
											
										
										
											2026-04-12 01:58:17 -07:00
+								                logger.debug(
 								                    "run_agent resolved: model=%s provider=%s session=%s",
 								                    model, runtime_kwargs.get("provider"), (session_key or "")[:30],
 								                )
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            except Exception as exc:
 								                return {
 								                    "final_response": f"⚠️ Provider authentication failed: {exc}",
 								                    "messages": [],
 								                    "api_calls": 0,
 								                    "tools": [],
 								                }
 								            pr = self._provider_routing
-												feat(gateway): add reasoning hot reload

Add a /reasoning command across gateway adapters so users can
inspect or change reasoning effort without editing config by hand.

Reload reasoning settings from config.yaml before each agent run,
including background tasks, so the next message picks up the new
value consistently.

											
										
										
											2026-03-11 22:12:11 +08:00
+								            reasoning_config = self._load_reasoning_config()
 								            self._reasoning_config = reasoning_config
-												feat(gateway): add fast mode support to gateway chats

											
										
										
											2026-04-10 08:32:56 +01:00
+								            self._service_tier = self._load_service_tier()
-												feat(gateway): surface natural mid-turn assistant messages in chat platforms

Add display.interim_assistant_messages config (enabled by default) that
forwards completed assistant commentary between tool calls to the user
as separate chat messages. Models already emit useful status text like
'I'll inspect the repo first.' — this surfaces it on Telegram, Discord,
and other messaging platforms instead of swallowing it.

Independent from tool_progress and gateway streaming. Disabled for
webhooks. Uses GatewayStreamConsumer when available, falls back to
direct adapter send. Tracks response_previewed to prevent double-delivery
when interim message matches the final response.

Also fixes: cursor not stripped from fallback prefix in stream consumer
(affected continuation calculation on no-edit platforms like Signal).

Cherry-picked from PR #7885 by asheriif, default changed to enabled.
Fixes #5016

											
										
										
											2026-04-11 16:03:52 -07:00
+								            # Set up stream consumer for token streaming or interim commentary.
-												feat(gateway): streaming token delivery — StreamingConfig, GatewayStreamConsumer, already_sent

Stage 3 of streaming support. Gateway now streams tokens to messaging platforms:

- StreamingConfig dataclass (enabled, transport, edit_interval, buffer_threshold, cursor)
  on GatewayConfig with from_dict/to_dict serialization
- GatewayStreamConsumer: async queue-based consumer that progressively edits
  a single message on the target platform (edit transport)
- on_delta() → queue → run() async task → send_or_edit() with rate limiting
- already_sent propagation: when streaming delivered the response, handler
  returns None so base adapter skips duplicate send()
- stream_delta_callback wired into AIAgent constructor in _run_agent
- Consumer lifecycle: started as asyncio task, awaited with timeout in finally

Config (config.yaml):
  streaming:
    enabled: true
    transport: edit      # progressive editMessageText
    edit_interval: 0.3   # seconds between edits
    buffer_threshold: 40 # chars before forcing flush
    cursor: ' ▉'

Credit: jobless0x (#774, #1312), OutThisLife (#798), clicksingh (#697).

											
										
										
											2026-03-16 05:52:42 -07:00
+								            _stream_consumer = None
 								            _stream_delta_cb = None
 								            _scfg = getattr(getattr(self, 'config', None), 'streaming', None)
 								            if _scfg is None:
 								                from gateway.config import StreamingConfig
 								                _scfg = StreamingConfig()
-												feat: per-platform display verbosity configuration (#8006)

Add display.platforms section to config.yaml for per-platform overrides of
display settings (tool_progress, show_reasoning, streaming, tool_preview_length).

Each platform gets sensible built-in defaults based on capability tier:
- High (telegram, discord): tool_progress=all, streaming follows global
- Medium (slack, mattermost, matrix, feishu): tool_progress=new
- Low (signal, whatsapp, bluebubbles, wecom, etc.): tool_progress=off, streaming=false
- Minimal (email, sms, webhook, homeassistant): tool_progress=off, streaming=false

Example config:
  display:
    platforms:
      telegram:
        tool_progress: all
        show_reasoning: true
      slack:
        tool_progress: off

Resolution order: platform override > global setting > built-in platform default.

Changes:
- New gateway/display_config.py: resolver module with tier-based platform defaults
- gateway/run.py: tool_progress, tool_preview_length, streaming, show_reasoning
  all resolve per-platform via the new resolver
- /verbose command: now cycles tool_progress per-platform (saves to
  display.platforms.<platform>.tool_progress instead of global)
- /reasoning show|hide: now saves show_reasoning per-platform
- Config version 15 -> 16: migrates tool_progress_overrides into display.platforms
- Backward compat: legacy tool_progress_overrides still read as fallback
- 27 new tests for resolver, normalization, migration, backward compat
- Updated verbose command tests for per-platform behavior

Addresses community request for per-channel verbosity control (Guillaume Meyer,
Nathan Danielsen) — high verbosity on backchannel Telegram, low on customer-facing
Slack, none on email.
											
										
										
											2026-04-11 17:20:34 -07:00
+								            # Per-platform streaming gate: display.platforms.<plat>.streaming
 								            # can disable streaming for specific platforms even when the global
 								            # streaming config is enabled.
 								            _plat_streaming = resolve_display_setting(
 								                user_config, platform_key, "streaming"
 								            )
 								            # None = no per-platform override → follow global config
 								            _streaming_enabled = (
 								                _scfg.enabled and _scfg.transport != "off"
 								                if _plat_streaming is None
 								                else bool(_plat_streaming)
 								            )
 								            _want_stream_deltas = _streaming_enabled
-												feat(gateway): surface natural mid-turn assistant messages in chat platforms

Add display.interim_assistant_messages config (enabled by default) that
forwards completed assistant commentary between tool calls to the user
as separate chat messages. Models already emit useful status text like
'I'll inspect the repo first.' — this surfaces it on Telegram, Discord,
and other messaging platforms instead of swallowing it.

Independent from tool_progress and gateway streaming. Disabled for
webhooks. Uses GatewayStreamConsumer when available, falls back to
direct adapter send. Tracks response_previewed to prevent double-delivery
when interim message matches the final response.

Also fixes: cursor not stripped from fallback prefix in stream consumer
(affected continuation calculation on no-edit platforms like Signal).

Cherry-picked from PR #7885 by asheriif, default changed to enabled.
Fixes #5016

											
										
										
											2026-04-11 16:03:52 -07:00
+								            _want_interim_messages = interim_assistant_messages_enabled
 								            _want_interim_consumer = _want_interim_messages
 								            if _want_stream_deltas or _want_interim_consumer:
-												feat(gateway): streaming token delivery — StreamingConfig, GatewayStreamConsumer, already_sent

Stage 3 of streaming support. Gateway now streams tokens to messaging platforms:

- StreamingConfig dataclass (enabled, transport, edit_interval, buffer_threshold, cursor)
  on GatewayConfig with from_dict/to_dict serialization
- GatewayStreamConsumer: async queue-based consumer that progressively edits
  a single message on the target platform (edit transport)
- on_delta() → queue → run() async task → send_or_edit() with rate limiting
- already_sent propagation: when streaming delivered the response, handler
  returns None so base adapter skips duplicate send()
- stream_delta_callback wired into AIAgent constructor in _run_agent
- Consumer lifecycle: started as asyncio task, awaited with timeout in finally

Config (config.yaml):
  streaming:
    enabled: true
    transport: edit      # progressive editMessageText
    edit_interval: 0.3   # seconds between edits
    buffer_threshold: 40 # chars before forcing flush
    cursor: ' ▉'

Credit: jobless0x (#774, #1312), OutThisLife (#798), clicksingh (#697).

											
										
										
											2026-03-16 05:52:42 -07:00
+								                try:
 								                    from gateway.stream_consumer import GatewayStreamConsumer, StreamConsumerConfig
 								                    _adapter = self.adapters.get(source.platform)
 								                    if _adapter:
-												fix(weixin): streaming cursor, media uploads, markdown links, blank messages (#8665)

Four fixes for the Weixin/WeChat adapter, synthesized from the best
aspects of community PRs #8407, #8521, #8360, #7695, #8308, #8525,
#7531, #8144, #8251.

1. Streaming cursor (▉) stuck permanently — WeChat doesn't support
   message editing, so the cursor appended during streaming can never
   be removed.  Add SUPPORTS_MESSAGE_EDITING = False to WeixinAdapter
   and check it in gateway/run.py to use an empty cursor for non-edit
   platforms.  (Fixes #8307, #8326)

2. Media upload failures — two bugs in _send_file():
   a) upload_full_url path used PUT (404 on WeChat CDN); now uses POST.
   b) aes_key was base64(raw_bytes) but the iLink API expects
      base64(hex_string); images showed as grey boxes.  (Fixes #8352, #7529)
   Also: unified both upload paths into _upload_ciphertext(), preferring
   upload_full_url.  Added send_video/send_voice methods and voice_item
   media builder for audio/.silk files.  Added video_md5 field.

3. Markdown links stripped — WeChat can't render [text](url), so
   format_message() now converts them to 'text (url)' plaintext.
   Code blocks are preserved.  (Fixes #7617)

4. Blank message prevention — three guards:
   a) _split_text_for_weixin_delivery('') returns [] not ['']
   b) send() filters empty/whitespace chunks before _send_text_chunk
   c) _send_message() raises ValueError for empty text as safety net

Community credit: joei4cm (#8407), lyonDan (#8521), SKFDJKLDG (#8360),
tomqiaozc (#7695), joshleeeeee (#8308), luoxiao6645(#8525),
longsizhuo (#7531), Astral-Yang (#8144), QingWei-Li (#8251).
											
										
										
											2026-04-12 16:43:25 -07:00
+								                        # Platforms that don't support editing sent messages
-												feat(gateway): unify QQBot branding, add PLATFORM_HINTS, fix streaming, restore missing setup functions

- Rename platform from 'qq' to 'qqbot' across all integration points
  (Platform enum, toolset, config keys, import paths, file rename qq.py → qqbot.py)
- Add PLATFORM_HINTS for QQBot in prompt_builder (QQ supports markdown)
- Set SUPPORTS_MESSAGE_EDITING = False to skip streaming on QQ
  (prevents duplicate messages from non-editable partial + final sends)
- Add _send_qqbot() standalone send function for cron/send_message tool
- Add interactive _setup_qq() wizard in hermes_cli/setup.py
- Restore missing _setup_signal/email/sms/dingtalk/feishu/wecom/wecom_callback
  functions that were lost during the original merge

											
										
										
											2026-04-14 01:33:06 +08:00
+								                        # (e.g. QQ, WeChat) should skip streaming entirely —
 								                        # without edit support, the consumer sends a partial
 								                        # first message that can never be updated, resulting in
 								                        # duplicate messages (partial + final).
-												fix(weixin): streaming cursor, media uploads, markdown links, blank messages (#8665)

Four fixes for the Weixin/WeChat adapter, synthesized from the best
aspects of community PRs #8407, #8521, #8360, #7695, #8308, #8525,
#7531, #8144, #8251.

1. Streaming cursor (▉) stuck permanently — WeChat doesn't support
   message editing, so the cursor appended during streaming can never
   be removed.  Add SUPPORTS_MESSAGE_EDITING = False to WeixinAdapter
   and check it in gateway/run.py to use an empty cursor for non-edit
   platforms.  (Fixes #8307, #8326)

2. Media upload failures — two bugs in _send_file():
   a) upload_full_url path used PUT (404 on WeChat CDN); now uses POST.
   b) aes_key was base64(raw_bytes) but the iLink API expects
      base64(hex_string); images showed as grey boxes.  (Fixes #8352, #7529)
   Also: unified both upload paths into _upload_ciphertext(), preferring
   upload_full_url.  Added send_video/send_voice methods and voice_item
   media builder for audio/.silk files.  Added video_md5 field.

3. Markdown links stripped — WeChat can't render [text](url), so
   format_message() now converts them to 'text (url)' plaintext.
   Code blocks are preserved.  (Fixes #7617)

4. Blank message prevention — three guards:
   a) _split_text_for_weixin_delivery('') returns [] not ['']
   b) send() filters empty/whitespace chunks before _send_text_chunk
   c) _send_message() raises ValueError for empty text as safety net

Community credit: joei4cm (#8407), lyonDan (#8521), SKFDJKLDG (#8360),
tomqiaozc (#7695), joshleeeeee (#8308), luoxiao6645(#8525),
longsizhuo (#7531), Astral-Yang (#8144), QingWei-Li (#8251).
											
										
										
											2026-04-12 16:43:25 -07:00
+								                        _adapter_supports_edit = getattr(_adapter, "SUPPORTS_MESSAGE_EDITING", True)
-												feat(gateway): unify QQBot branding, add PLATFORM_HINTS, fix streaming, restore missing setup functions

- Rename platform from 'qq' to 'qqbot' across all integration points
  (Platform enum, toolset, config keys, import paths, file rename qq.py → qqbot.py)
- Add PLATFORM_HINTS for QQBot in prompt_builder (QQ supports markdown)
- Set SUPPORTS_MESSAGE_EDITING = False to skip streaming on QQ
  (prevents duplicate messages from non-editable partial + final sends)
- Add _send_qqbot() standalone send function for cron/send_message tool
- Add interactive _setup_qq() wizard in hermes_cli/setup.py
- Restore missing _setup_signal/email/sms/dingtalk/feishu/wecom/wecom_callback
  functions that were lost during the original merge

											
										
										
											2026-04-14 01:33:06 +08:00
+								                        if not _adapter_supports_edit:
 								                            raise RuntimeError("skip streaming for non-editable platform")
 								                        _effective_cursor = _scfg.cursor
-												fix(matrix): disable streaming cursor decoration on Matrix

											
										
										
											2026-04-12 16:20:56 -06:00
+								                        # Some Matrix clients render the streaming cursor
 								                        # as a visible tofu/white-box artifact.  Keep
 								                        # streaming text on Matrix, but suppress the cursor.
-												fix(matrix): E2EE and migration bugfixes (#10860)

* - make buffered streaming
- fix path naming to expand `~` for agent.
- fix stripping of matrix ID to not remove other mentions / localports.

* fix(matrix): register MembershipEventDispatcher for invite auto-join

The mautrix migration (#7518) broke auto-join because InternalEventType.INVITE
events are only dispatched when MembershipEventDispatcher is registered on the
client. Without it, _on_invite is dead code and the bot silently ignores all
room invites.

Closes #10094
Closes #10725
Refs: PR #10135 (digging-airfare-4u), PR #10732 (fxfitz)

* fix(matrix): preserve _joined_rooms reference for CryptoStateStore

connect() reassigned self._joined_rooms = set(...) after initial sync,
orphaning the reference captured by _CryptoStateStore at init time.
find_shared_rooms() returned [] forever, breaking Megolm session rotation
on membership changes.

Mutate in place with clear() + update() so the CryptoStateStore reference
stays valid.

Refs #8174, PR #8215

* fix(matrix): remove dual ROOM_ENCRYPTED handler to fix dedup race

mautrix auto-registers DecryptionDispatcher when client.crypto is set.
The adapter also registered _on_encrypted_event for the same event type.
_on_encrypted_event had zero awaits and won the race to mark event IDs
in the dedup set, causing _on_room_message to drop successfully decrypted
events from DecryptionDispatcher. The retry loop masked this by re-decrypting
every message ~4 seconds later.

Remove _on_encrypted_event entirely. DecryptionDispatcher handles decryption;
genuinely undecryptable events are logged by mautrix and retried on next
key exchange.

Refs #8174, PR #8215

* fix(matrix): re-verify device keys after share_keys() upload

Matrix homeservers treat ed25519 identity keys as immutable per device.
share_keys() can return 200 but silently ignore new keys if the device
already exists with different identity keys. The bot would proceed with
shared=True while peers encrypt to the old (unreachable) keys.

Now re-queries the server after share_keys() and fails closed if keys
don't match, with an actionable error message.

Refs #8174, PR #8215

* fix(matrix): encrypt outbound attachments in E2EE rooms

_upload_and_send() uploaded raw bytes and used the 'url' key for all
rooms. In E2EE rooms, media must be encrypted client-side with
encrypt_attachment(), the ciphertext uploaded, and the 'file' key
(with key/iv/hashes) used instead of 'url'.

Now detects encrypted rooms via state_store.is_encrypted() and
branches to the encrypted upload path.

Refs: PR #9822 (charles-brooks)

* fix(matrix): add stop_typing to clear typing indicator after response

The adapter set a 30-second typing timeout but never cleared it.
The base class stop_typing() is a no-op, so the typing indicator
lingered for up to 30 seconds after each response.

Closes #6016
Refs: PR #6020 (r266-tech)

* fix(matrix): cache all media types locally, not just photos/voice

should_cache_locally only covered PHOTO, VOICE, and encrypted media.
Unencrypted audio/video/documents in plaintext rooms were passed as MXC
URLs that require authentication the agent doesn't have, resulting
in 401 errors.

Refs #3487, #3806

* fix(matrix): detect stale OTK conflict on startup and fail closed

When crypto state is wiped but the same device ID is reused, the
homeserver may still hold one-time keys signed with the previous
identity key. Identity key re-upload succeeds but OTK uploads fail
with "already exists" and a signature mismatch. Peers cannot
establish new Olm sessions, so all new messages are undecryptable.

Now proactively flushes OTKs via share_keys() during connect() and
catches the "already exists" error with an actionable log message
telling the operator to purge the device from the homeserver or
generate a fresh device ID.

Also documents the crypto store recovery procedure in the Matrix
setup guide.

Refs #8174

* docs(matrix): improve crypto recovery docs per review

- Put easy path (fresh access token) first, manual purge second
- URL-encode user ID in Synapse admin API example
- Note that device deletion may invalidate the access token
- Add "stop Synapse first" caveat for direct SQLite approach
- Mention the fail-closed startup detection behavior
- Add back-reference from upgrade section to OTK warning

* refactor(matrix): cleanup from code review

- Extract _extract_server_ed25519() and _reverify_keys_after_upload()
  to deduplicate the re-verification block (was copy-pasted in two
  places, three copies of ed25519 key extraction total)
- Remove dead code: _pending_megolm, _retry_pending_decryptions,
  _MAX_PENDING_EVENTS, _PENDING_EVENT_TTL — all orphaned after
  removing _on_encrypted_event
- Remove tautological TestMediaCacheGate (tested its own predicate,
  not production code)
- Remove dead TestMatrixMegolmEventHandling and
  TestMatrixRetryPendingDecryptions (tested removed methods)
- Merge duplicate TestMatrixStopTyping into TestMatrixTypingIndicator
- Trim comment to just the "why"
											
										
										
											2026-04-16 15:33:02 -07:00
+								                        _buffer_only = False
-												fix(matrix): disable streaming cursor decoration on Matrix

											
										
										
											2026-04-12 16:20:56 -06:00
+								                        if source.platform == Platform.MATRIX:
 								                            _effective_cursor = ""
-												fix(matrix): E2EE and migration bugfixes (#10860)

* - make buffered streaming
- fix path naming to expand `~` for agent.
- fix stripping of matrix ID to not remove other mentions / localports.

* fix(matrix): register MembershipEventDispatcher for invite auto-join

The mautrix migration (#7518) broke auto-join because InternalEventType.INVITE
events are only dispatched when MembershipEventDispatcher is registered on the
client. Without it, _on_invite is dead code and the bot silently ignores all
room invites.

Closes #10094
Closes #10725
Refs: PR #10135 (digging-airfare-4u), PR #10732 (fxfitz)

* fix(matrix): preserve _joined_rooms reference for CryptoStateStore

connect() reassigned self._joined_rooms = set(...) after initial sync,
orphaning the reference captured by _CryptoStateStore at init time.
find_shared_rooms() returned [] forever, breaking Megolm session rotation
on membership changes.

Mutate in place with clear() + update() so the CryptoStateStore reference
stays valid.

Refs #8174, PR #8215

* fix(matrix): remove dual ROOM_ENCRYPTED handler to fix dedup race

mautrix auto-registers DecryptionDispatcher when client.crypto is set.
The adapter also registered _on_encrypted_event for the same event type.
_on_encrypted_event had zero awaits and won the race to mark event IDs
in the dedup set, causing _on_room_message to drop successfully decrypted
events from DecryptionDispatcher. The retry loop masked this by re-decrypting
every message ~4 seconds later.

Remove _on_encrypted_event entirely. DecryptionDispatcher handles decryption;
genuinely undecryptable events are logged by mautrix and retried on next
key exchange.

Refs #8174, PR #8215

* fix(matrix): re-verify device keys after share_keys() upload

Matrix homeservers treat ed25519 identity keys as immutable per device.
share_keys() can return 200 but silently ignore new keys if the device
already exists with different identity keys. The bot would proceed with
shared=True while peers encrypt to the old (unreachable) keys.

Now re-queries the server after share_keys() and fails closed if keys
don't match, with an actionable error message.

Refs #8174, PR #8215

* fix(matrix): encrypt outbound attachments in E2EE rooms

_upload_and_send() uploaded raw bytes and used the 'url' key for all
rooms. In E2EE rooms, media must be encrypted client-side with
encrypt_attachment(), the ciphertext uploaded, and the 'file' key
(with key/iv/hashes) used instead of 'url'.

Now detects encrypted rooms via state_store.is_encrypted() and
branches to the encrypted upload path.

Refs: PR #9822 (charles-brooks)

* fix(matrix): add stop_typing to clear typing indicator after response

The adapter set a 30-second typing timeout but never cleared it.
The base class stop_typing() is a no-op, so the typing indicator
lingered for up to 30 seconds after each response.

Closes #6016
Refs: PR #6020 (r266-tech)

* fix(matrix): cache all media types locally, not just photos/voice

should_cache_locally only covered PHOTO, VOICE, and encrypted media.
Unencrypted audio/video/documents in plaintext rooms were passed as MXC
URLs that require authentication the agent doesn't have, resulting
in 401 errors.

Refs #3487, #3806

* fix(matrix): detect stale OTK conflict on startup and fail closed

When crypto state is wiped but the same device ID is reused, the
homeserver may still hold one-time keys signed with the previous
identity key. Identity key re-upload succeeds but OTK uploads fail
with "already exists" and a signature mismatch. Peers cannot
establish new Olm sessions, so all new messages are undecryptable.

Now proactively flushes OTKs via share_keys() during connect() and
catches the "already exists" error with an actionable log message
telling the operator to purge the device from the homeserver or
generate a fresh device ID.

Also documents the crypto store recovery procedure in the Matrix
setup guide.

Refs #8174

* docs(matrix): improve crypto recovery docs per review

- Put easy path (fresh access token) first, manual purge second
- URL-encode user ID in Synapse admin API example
- Note that device deletion may invalidate the access token
- Add "stop Synapse first" caveat for direct SQLite approach
- Mention the fail-closed startup detection behavior
- Add back-reference from upgrade section to OTK warning

* refactor(matrix): cleanup from code review

- Extract _extract_server_ed25519() and _reverify_keys_after_upload()
  to deduplicate the re-verification block (was copy-pasted in two
  places, three copies of ed25519 key extraction total)
- Remove dead code: _pending_megolm, _retry_pending_decryptions,
  _MAX_PENDING_EVENTS, _PENDING_EVENT_TTL — all orphaned after
  removing _on_encrypted_event
- Remove tautological TestMediaCacheGate (tested its own predicate,
  not production code)
- Remove dead TestMatrixMegolmEventHandling and
  TestMatrixRetryPendingDecryptions (tested removed methods)
- Merge duplicate TestMatrixStopTyping into TestMatrixTypingIndicator
- Trim comment to just the "why"
											
										
										
											2026-04-16 15:33:02 -07:00
+								                            _buffer_only = True
-												feat(gateway): streaming token delivery — StreamingConfig, GatewayStreamConsumer, already_sent

Stage 3 of streaming support. Gateway now streams tokens to messaging platforms:

- StreamingConfig dataclass (enabled, transport, edit_interval, buffer_threshold, cursor)
  on GatewayConfig with from_dict/to_dict serialization
- GatewayStreamConsumer: async queue-based consumer that progressively edits
  a single message on the target platform (edit transport)
- on_delta() → queue → run() async task → send_or_edit() with rate limiting
- already_sent propagation: when streaming delivered the response, handler
  returns None so base adapter skips duplicate send()
- stream_delta_callback wired into AIAgent constructor in _run_agent
- Consumer lifecycle: started as asyncio task, awaited with timeout in finally

Config (config.yaml):
  streaming:
    enabled: true
    transport: edit      # progressive editMessageText
    edit_interval: 0.3   # seconds between edits
    buffer_threshold: 40 # chars before forcing flush
    cursor: ' ▉'

Credit: jobless0x (#774, #1312), OutThisLife (#798), clicksingh (#697).

											
										
										
											2026-03-16 05:52:42 -07:00
+								                        _consumer_cfg = StreamConsumerConfig(
 								                            edit_interval=_scfg.edit_interval,
 								                            buffer_threshold=_scfg.buffer_threshold,
-												fix(weixin): streaming cursor, media uploads, markdown links, blank messages (#8665)

Four fixes for the Weixin/WeChat adapter, synthesized from the best
aspects of community PRs #8407, #8521, #8360, #7695, #8308, #8525,
#7531, #8144, #8251.

1. Streaming cursor (▉) stuck permanently — WeChat doesn't support
   message editing, so the cursor appended during streaming can never
   be removed.  Add SUPPORTS_MESSAGE_EDITING = False to WeixinAdapter
   and check it in gateway/run.py to use an empty cursor for non-edit
   platforms.  (Fixes #8307, #8326)

2. Media upload failures — two bugs in _send_file():
   a) upload_full_url path used PUT (404 on WeChat CDN); now uses POST.
   b) aes_key was base64(raw_bytes) but the iLink API expects
      base64(hex_string); images showed as grey boxes.  (Fixes #8352, #7529)
   Also: unified both upload paths into _upload_ciphertext(), preferring
   upload_full_url.  Added send_video/send_voice methods and voice_item
   media builder for audio/.silk files.  Added video_md5 field.

3. Markdown links stripped — WeChat can't render [text](url), so
   format_message() now converts them to 'text (url)' plaintext.
   Code blocks are preserved.  (Fixes #7617)

4. Blank message prevention — three guards:
   a) _split_text_for_weixin_delivery('') returns [] not ['']
   b) send() filters empty/whitespace chunks before _send_text_chunk
   c) _send_message() raises ValueError for empty text as safety net

Community credit: joei4cm (#8407), lyonDan (#8521), SKFDJKLDG (#8360),
tomqiaozc (#7695), joshleeeeee (#8308), luoxiao6645(#8525),
longsizhuo (#7531), Astral-Yang (#8144), QingWei-Li (#8251).
											
										
										
											2026-04-12 16:43:25 -07:00
+								                            cursor=_effective_cursor,
-												fix(matrix): E2EE and migration bugfixes (#10860)

* - make buffered streaming
- fix path naming to expand `~` for agent.
- fix stripping of matrix ID to not remove other mentions / localports.

* fix(matrix): register MembershipEventDispatcher for invite auto-join

The mautrix migration (#7518) broke auto-join because InternalEventType.INVITE
events are only dispatched when MembershipEventDispatcher is registered on the
client. Without it, _on_invite is dead code and the bot silently ignores all
room invites.

Closes #10094
Closes #10725
Refs: PR #10135 (digging-airfare-4u), PR #10732 (fxfitz)

* fix(matrix): preserve _joined_rooms reference for CryptoStateStore

connect() reassigned self._joined_rooms = set(...) after initial sync,
orphaning the reference captured by _CryptoStateStore at init time.
find_shared_rooms() returned [] forever, breaking Megolm session rotation
on membership changes.

Mutate in place with clear() + update() so the CryptoStateStore reference
stays valid.

Refs #8174, PR #8215

* fix(matrix): remove dual ROOM_ENCRYPTED handler to fix dedup race

mautrix auto-registers DecryptionDispatcher when client.crypto is set.
The adapter also registered _on_encrypted_event for the same event type.
_on_encrypted_event had zero awaits and won the race to mark event IDs
in the dedup set, causing _on_room_message to drop successfully decrypted
events from DecryptionDispatcher. The retry loop masked this by re-decrypting
every message ~4 seconds later.

Remove _on_encrypted_event entirely. DecryptionDispatcher handles decryption;
genuinely undecryptable events are logged by mautrix and retried on next
key exchange.

Refs #8174, PR #8215

* fix(matrix): re-verify device keys after share_keys() upload

Matrix homeservers treat ed25519 identity keys as immutable per device.
share_keys() can return 200 but silently ignore new keys if the device
already exists with different identity keys. The bot would proceed with
shared=True while peers encrypt to the old (unreachable) keys.

Now re-queries the server after share_keys() and fails closed if keys
don't match, with an actionable error message.

Refs #8174, PR #8215

* fix(matrix): encrypt outbound attachments in E2EE rooms

_upload_and_send() uploaded raw bytes and used the 'url' key for all
rooms. In E2EE rooms, media must be encrypted client-side with
encrypt_attachment(), the ciphertext uploaded, and the 'file' key
(with key/iv/hashes) used instead of 'url'.

Now detects encrypted rooms via state_store.is_encrypted() and
branches to the encrypted upload path.

Refs: PR #9822 (charles-brooks)

* fix(matrix): add stop_typing to clear typing indicator after response

The adapter set a 30-second typing timeout but never cleared it.
The base class stop_typing() is a no-op, so the typing indicator
lingered for up to 30 seconds after each response.

Closes #6016
Refs: PR #6020 (r266-tech)

* fix(matrix): cache all media types locally, not just photos/voice

should_cache_locally only covered PHOTO, VOICE, and encrypted media.
Unencrypted audio/video/documents in plaintext rooms were passed as MXC
URLs that require authentication the agent doesn't have, resulting
in 401 errors.

Refs #3487, #3806

* fix(matrix): detect stale OTK conflict on startup and fail closed

When crypto state is wiped but the same device ID is reused, the
homeserver may still hold one-time keys signed with the previous
identity key. Identity key re-upload succeeds but OTK uploads fail
with "already exists" and a signature mismatch. Peers cannot
establish new Olm sessions, so all new messages are undecryptable.

Now proactively flushes OTKs via share_keys() during connect() and
catches the "already exists" error with an actionable log message
telling the operator to purge the device from the homeserver or
generate a fresh device ID.

Also documents the crypto store recovery procedure in the Matrix
setup guide.

Refs #8174

* docs(matrix): improve crypto recovery docs per review

- Put easy path (fresh access token) first, manual purge second
- URL-encode user ID in Synapse admin API example
- Note that device deletion may invalidate the access token
- Add "stop Synapse first" caveat for direct SQLite approach
- Mention the fail-closed startup detection behavior
- Add back-reference from upgrade section to OTK warning

* refactor(matrix): cleanup from code review

- Extract _extract_server_ed25519() and _reverify_keys_after_upload()
  to deduplicate the re-verification block (was copy-pasted in two
  places, three copies of ed25519 key extraction total)
- Remove dead code: _pending_megolm, _retry_pending_decryptions,
  _MAX_PENDING_EVENTS, _PENDING_EVENT_TTL — all orphaned after
  removing _on_encrypted_event
- Remove tautological TestMediaCacheGate (tested its own predicate,
  not production code)
- Remove dead TestMatrixMegolmEventHandling and
  TestMatrixRetryPendingDecryptions (tested removed methods)
- Merge duplicate TestMatrixStopTyping into TestMatrixTypingIndicator
- Trim comment to just the "why"
											
										
										
											2026-04-16 15:33:02 -07:00
+								                            buffer_only=_buffer_only,
-												feat(gateway): streaming token delivery — StreamingConfig, GatewayStreamConsumer, already_sent

Stage 3 of streaming support. Gateway now streams tokens to messaging platforms:

- StreamingConfig dataclass (enabled, transport, edit_interval, buffer_threshold, cursor)
  on GatewayConfig with from_dict/to_dict serialization
- GatewayStreamConsumer: async queue-based consumer that progressively edits
  a single message on the target platform (edit transport)
- on_delta() → queue → run() async task → send_or_edit() with rate limiting
- already_sent propagation: when streaming delivered the response, handler
  returns None so base adapter skips duplicate send()
- stream_delta_callback wired into AIAgent constructor in _run_agent
- Consumer lifecycle: started as asyncio task, awaited with timeout in finally

Config (config.yaml):
  streaming:
    enabled: true
    transport: edit      # progressive editMessageText
    edit_interval: 0.3   # seconds between edits
    buffer_threshold: 40 # chars before forcing flush
    cursor: ' ▉'

Credit: jobless0x (#774, #1312), OutThisLife (#798), clicksingh (#697).

											
										
										
											2026-03-16 05:52:42 -07:00
+								                        )
 								                        _stream_consumer = GatewayStreamConsumer(
 								                            adapter=_adapter,
 								                            chat_id=source.chat_id,
 								                            config=_consumer_cfg,
-												fix(gateway/slack): send progress messages to correct thread (#3063)

Co-authored-by: Jneeee <jneeee@outlook.com>
											
										
										
											2026-03-25 15:51:15 -07:00
+								                            metadata={"thread_id": _progress_thread_id} if _progress_thread_id else None,
-												feat(gateway): streaming token delivery — StreamingConfig, GatewayStreamConsumer, already_sent

Stage 3 of streaming support. Gateway now streams tokens to messaging platforms:

- StreamingConfig dataclass (enabled, transport, edit_interval, buffer_threshold, cursor)
  on GatewayConfig with from_dict/to_dict serialization
- GatewayStreamConsumer: async queue-based consumer that progressively edits
  a single message on the target platform (edit transport)
- on_delta() → queue → run() async task → send_or_edit() with rate limiting
- already_sent propagation: when streaming delivered the response, handler
  returns None so base adapter skips duplicate send()
- stream_delta_callback wired into AIAgent constructor in _run_agent
- Consumer lifecycle: started as asyncio task, awaited with timeout in finally

Config (config.yaml):
  streaming:
    enabled: true
    transport: edit      # progressive editMessageText
    edit_interval: 0.3   # seconds between edits
    buffer_threshold: 40 # chars before forcing flush
    cursor: ' ▉'

Credit: jobless0x (#774, #1312), OutThisLife (#798), clicksingh (#697).

											
										
										
											2026-03-16 05:52:42 -07:00
+								                        )
-												feat(gateway): surface natural mid-turn assistant messages in chat platforms

Add display.interim_assistant_messages config (enabled by default) that
forwards completed assistant commentary between tool calls to the user
as separate chat messages. Models already emit useful status text like
'I'll inspect the repo first.' — this surfaces it on Telegram, Discord,
and other messaging platforms instead of swallowing it.

Independent from tool_progress and gateway streaming. Disabled for
webhooks. Uses GatewayStreamConsumer when available, falls back to
direct adapter send. Tracks response_previewed to prevent double-delivery
when interim message matches the final response.

Also fixes: cursor not stripped from fallback prefix in stream consumer
(affected continuation calculation on no-edit platforms like Signal).

Cherry-picked from PR #7885 by asheriif, default changed to enabled.
Fixes #5016

											
										
										
											2026-04-11 16:03:52 -07:00
+								                        if _want_stream_deltas:
 								                            _stream_delta_cb = _stream_consumer.on_delta
-												feat(gateway): streaming token delivery — StreamingConfig, GatewayStreamConsumer, already_sent

Stage 3 of streaming support. Gateway now streams tokens to messaging platforms:

- StreamingConfig dataclass (enabled, transport, edit_interval, buffer_threshold, cursor)
  on GatewayConfig with from_dict/to_dict serialization
- GatewayStreamConsumer: async queue-based consumer that progressively edits
  a single message on the target platform (edit transport)
- on_delta() → queue → run() async task → send_or_edit() with rate limiting
- already_sent propagation: when streaming delivered the response, handler
  returns None so base adapter skips duplicate send()
- stream_delta_callback wired into AIAgent constructor in _run_agent
- Consumer lifecycle: started as asyncio task, awaited with timeout in finally

Config (config.yaml):
  streaming:
    enabled: true
    transport: edit      # progressive editMessageText
    edit_interval: 0.3   # seconds between edits
    buffer_threshold: 40 # chars before forcing flush
    cursor: ' ▉'

Credit: jobless0x (#774, #1312), OutThisLife (#798), clicksingh (#697).

											
										
										
											2026-03-16 05:52:42 -07:00
+								                        stream_consumer_holder[0] = _stream_consumer
 								                except Exception as _sc_err:
 								                    logger.debug("Could not set up stream consumer: %s", _sc_err)
-												feat(gateway): surface natural mid-turn assistant messages in chat platforms

Add display.interim_assistant_messages config (enabled by default) that
forwards completed assistant commentary between tool calls to the user
as separate chat messages. Models already emit useful status text like
'I'll inspect the repo first.' — this surfaces it on Telegram, Discord,
and other messaging platforms instead of swallowing it.

Independent from tool_progress and gateway streaming. Disabled for
webhooks. Uses GatewayStreamConsumer when available, falls back to
direct adapter send. Tracks response_previewed to prevent double-delivery
when interim message matches the final response.

Also fixes: cursor not stripped from fallback prefix in stream consumer
(affected continuation calculation on no-edit platforms like Signal).

Cherry-picked from PR #7885 by asheriif, default changed to enabled.
Fixes #5016

											
										
										
											2026-04-11 16:03:52 -07:00
+								            def _interim_assistant_cb(text: str, *, already_streamed: bool = False) -> None:
 								                if _stream_consumer is not None:
 								                    if already_streamed:
 								                        _stream_consumer.on_segment_break()
 								                    else:
 								                        _stream_consumer.on_commentary(text)
 								                    return
 								                if already_streamed or not _status_adapter or not str(text or "").strip():
 								                    return
 								                try:
 								                    asyncio.run_coroutine_threadsafe(
 								                        _status_adapter.send(
 								                            _status_chat_id,
 								                            text,
 								                            metadata=_status_thread_metadata,
 								                        ),
 								                        _loop_for_step,
 								                    )
 								                except Exception as _e:
 								                    logger.debug("interim_assistant_callback error: %s", _e)
-												fix: hermes update causes dual gateways on macOS (launchd) (#1567)

* feat: add optional smart model routing

Add a conservative cheap-vs-strong routing option that can send very short/simple turns to a cheaper model across providers while keeping the primary model for complex work. Wire it through CLI, gateway, and cron, and document the config.yaml workflow.

* fix(gateway): remove recursive ExecStop from systemd units, extend TimeoutStopSec to 60s

* fix(gateway): avoid recursive ExecStop in user systemd unit

* fix: extend ExecStop removal and TimeoutStopSec=60 to system unit

The cherry-picked PR #1448 fix only covered the user systemd unit.
The system unit had the same TimeoutStopSec=15 and could benefit
from the same 60s timeout for clean shutdown. Also adds a regression
test for the system unit.

---------

Co-authored-by: Ninja <ninja@local>

* feat(skills): add blender-mcp optional skill for 3D modeling

Control a running Blender instance from Hermes via socket connection
to the blender-mcp addon (port 9876). Supports creating 3D objects,
materials, animations, and running arbitrary bpy code.

Placed in optional-skills/ since it requires Blender 4.3+ desktop
with a third-party addon manually started each session.

* feat(acp): support slash commands in ACP adapter (#1532)

Adds /help, /model, /tools, /context, /reset, /compact, /version
to the ACP adapter (VS Code, Zed, JetBrains). Commands are handled
directly in the server without instantiating the TUI — each command
queries agent/session state and returns plain text.

Unrecognized /commands fall through to the LLM as normal messages.

/model uses detect_provider_for_model() for auto-detection when
switching models, matching the CLI and gateway behavior.

Fixes #1402

* fix(logging): improve error logging in session search tool (#1533)

* fix(gateway): restart on retryable startup failures (#1517)

* feat(email): add skip_attachments option via config.yaml

* feat(email): add skip_attachments option via config.yaml

Adds a config.yaml-driven option to skip email attachments in the
gateway email adapter. Useful for malware protection and bandwidth
savings.

Configure in config.yaml:
  platforms:
    email:
      skip_attachments: true

Based on PR #1521 by @an420eth, changed from env var to config.yaml
(via PlatformConfig.extra) to match the project's config-first pattern.

* docs: document skip_attachments option for email adapter

* fix(telegram): retry on transient TLS failures during connect and send

Add exponential-backoff retry (3 attempts) around initialize() to
handle transient TLS resets during gateway startup. Also catches
TimedOut and OSError in addition to NetworkError.

Add exponential-backoff retry (3 attempts) around send_message() for
NetworkError during message delivery, wrapping the existing Markdown
fallback logic.

Both imports are guarded with try/except ImportError for test
environments where telegram is mocked.

Based on PR #1527 by cmd8. Closes #1526.

* feat: permissive block_anchor thresholds and unicode normalization (#1539)

Salvaged from PR #1528 by an420eth. Closes #517.

Improves _strategy_block_anchor in fuzzy_match.py:
- Add unicode normalization (smart quotes, em/en-dashes, ellipsis,
  non-breaking spaces → ASCII) so LLM-produced unicode artifacts
  don't break anchor line matching
- Lower thresholds: 0.10 for unique matches (was 0.70), 0.30 for
  multiple candidates — if first/last lines match exactly, the
  block is almost certainly correct
- Use original (non-normalized) content for offset calculation to
  preserve correct character positions

Tested: 3 new scenarios fixed (em-dash anchors, non-breaking space
anchors, very-low-similarity unique matches), zero regressions on
all 9 existing fuzzy match tests.

Co-authored-by: an420eth <an420eth@users.noreply.github.com>

* feat(cli): add file path autocomplete in the input prompt (#1545)

When typing a path-like token (./  ../  ~/  /  or containing /),
the CLI now shows filesystem completions in the dropdown menu.
Directories show a trailing slash and 'dir' label; files show
their size. Completions are case-insensitive and capped at 30
entries.

Triggered by tokens like:
  edit ./src/ma     → shows ./src/main.py, ./src/manifest.json, ...
  check ~/doc       → shows ~/docs/, ~/documents/, ...
  read /etc/hos     → shows /etc/hosts, /etc/hostname, ...
  open tools/reg    → shows tools/registry.py

Slash command autocomplete (/help, /model, etc.) is unaffected —
it still triggers when the input starts with /.

Inspired by OpenCode PR #145 (file path completion menu).

Implementation:
- hermes_cli/commands.py: _extract_path_word() detects path-like
  tokens, _path_completions() yields filesystem Completions with
  size labels, get_completions() routes to paths vs slash commands
- tests/hermes_cli/test_path_completion.py: 26 tests covering
  path extraction, prefix filtering, directory markers, home
  expansion, case-insensitivity, integration with slash commands

* feat(privacy): redact PII from LLM context when privacy.redact_pii is enabled

Add privacy.redact_pii config option (boolean, default false). When
enabled, the gateway redacts personally identifiable information from
the system prompt before sending it to the LLM provider:

- Phone numbers (user IDs on WhatsApp/Signal) → hashed to user_<sha256>
- User IDs → hashed to user_<sha256>
- Chat IDs → numeric portion hashed, platform prefix preserved
- Home channel IDs → hashed
- Names/usernames → NOT affected (user-chosen, publicly visible)

Hashes are deterministic (same user → same hash) so the model can
still distinguish users in group chats. Routing and delivery use
the original values internally — redaction only affects LLM context.

Inspired by OpenClaw PR #47959.

* fix(privacy): skip PII redaction on Discord/Slack (mentions need real IDs)

Discord uses <@user_id> for mentions and Slack uses <@U12345> — the LLM
needs the real ID to tag users. Redaction now only applies to WhatsApp,
Signal, and Telegram where IDs are pure routing metadata.

Add 4 platform-specific tests covering Discord, WhatsApp, Signal, Slack.

* feat: smart approvals + /stop command (inspired by OpenAI Codex)

* feat: smart approvals — LLM-based risk assessment for dangerous commands

Adds a 'smart' approval mode that uses the auxiliary LLM to assess
whether a flagged command is genuinely dangerous or a false positive,
auto-approving low-risk commands without prompting the user.

Inspired by OpenAI Codex's Smart Approvals guardian subagent
(openai/codex#13860).

Config (config.yaml):
  approvals:
    mode: manual   # manual (default), smart, off

Modes:
- manual — current behavior, always prompt the user
- smart  — aux LLM evaluates risk: APPROVE (auto-allow), DENY (block),
           or ESCALATE (fall through to manual prompt)
- off    — skip all approval prompts (equivalent to --yolo)

When smart mode auto-approves, the pattern gets session-level approval
so subsequent uses of the same pattern don't trigger another LLM call.
When it denies, the command is blocked without user prompt. When
uncertain, it escalates to the normal manual approval flow.

The LLM prompt is carefully scoped: it sees only the command text and
the flagged reason, assesses actual risk vs false positive, and returns
a single-word verdict.

* feat: make smart approval model configurable via config.yaml

Adds auxiliary.approval section to config.yaml with the same
provider/model/base_url/api_key pattern as other aux tasks (vision,
web_extract, compression, etc.).

Config:
  auxiliary:
    approval:
      provider: auto
      model: ''        # fast/cheap model recommended
      base_url: ''
      api_key: ''

Bridged to env vars in both CLI and gateway paths so the aux client
picks them up automatically.

* feat: add /stop command to kill all background processes

Adds a /stop slash command that kills all running background processes
at once. Currently users have to process(list) then process(kill) for
each one individually.

Inspired by OpenAI Codex's separation of interrupt (Ctrl+C stops current
turn) from /stop (cleans up background processes). See openai/codex#14602.

Ctrl+C continues to only interrupt the active agent turn — background
dev servers, watchers, etc. are preserved. /stop is the explicit way
to clean them all up.

* feat: first-class plugin architecture + hide status bar cost by default (#1544)

The persistent status bar now shows context %, token counts, and
duration but NOT $ cost by default. Cost display is opt-in via:

  display:
    show_cost: true

in config.yaml, or: hermes config set display.show_cost true

The /usage command still shows full cost breakdown since the user
explicitly asked for it — this only affects the always-visible bar.

Status bar without cost:
  ⚕ claude-sonnet-4 │ 12K/200K │ 6% │ 15m

Status bar with show_cost: true:
  ⚕ claude-sonnet-4 │ 12K/200K │ 6% │ $0.06 │ 15m

* feat: improve memory prioritization + aggressive skill updates (inspired by OpenAI Codex)

* feat: improve memory prioritization — user preferences over procedural knowledge

Inspired by OpenAI Codex's memory prompt improvements (openai/codex#14493)
which focus memory writes on user preferences and recurring patterns
rather than procedural task details.

Key insight: 'Optimize for reducing future user steering — the most
valuable memory prevents the user from having to repeat themselves.'

Changes:
- MEMORY_GUIDANCE (prompt_builder.py): added prioritization hierarchy
  and the core principle about reducing user steering
- MEMORY_SCHEMA (memory_tool.py): reordered WHEN TO SAVE list to put
  corrections first, added explicit PRIORITY guidance
- Memory nudge (run_agent.py): now asks specifically about preferences,
  corrections, and workflow patterns instead of generic 'anything'
- Memory flush (run_agent.py): now instructs to prioritize user
  preferences and corrections over task-specific details

* feat: more aggressive skill creation and update prompting

Press harder on skill updates — the agent should proactively patch
skills when it encounters issues during use, not wait to be asked.

Changes:
- SKILLS_GUIDANCE: 'consider saving' → 'save'; added explicit instruction
  to patch skills immediately when found outdated/wrong
- Skills header: added instruction to update loaded skills before finishing
  if they had missing steps or wrong commands
- Skill nudge: more assertive ('save the approach' not 'consider saving'),
  now also prompts for updating existing skills used in the task
- Skill nudge interval: lowered default from 15 to 10 iterations
- skill_manage schema: added 'patch it immediately' to update triggers

* feat: first-class plugin architecture (#1555)

Plugin system for extending Hermes with custom tools, hooks, and
integrations — no source code changes required.

Core system (hermes_cli/plugins.py):
  - Plugin discovery from ~/.hermes/plugins/, .hermes/plugins/, and
    pip entry_points (hermes_agent.plugins group)
  - PluginContext with register_tool() and register_hook()
  - 6 lifecycle hooks: pre/post tool_call, pre/post llm_call,
    on_session_start/end
  - Namespace package handling for relative imports in plugins
  - Graceful error isolation — broken plugins never crash the agent

Integration (model_tools.py):
  - Plugin discovery runs after built-in + MCP tools
  - Plugin tools bypass toolset filter via get_plugin_tool_names()
  - Pre/post tool call hooks fire in handle_function_call()

CLI:
  - /plugins command shows loaded plugins, tool counts, status
  - Added to COMMANDS dict for autocomplete

Docs:
  - Getting started guide (build-a-hermes-plugin.md) — full tutorial
    building a calculator plugin step by step
  - Reference page (features/plugins.md) — quick overview + tables
  - Covers: file structure, schemas, handlers, hooks, data files,
    bundled skills, env var gating, pip distribution, common mistakes

Tests: 16 tests covering discovery, loading, hooks, tool visibility.

* fix: hermes update causes dual gateways on macOS (launchd)

Three bugs worked together to create the dual-gateway problem:

1. cmd_update only checked systemd for gateway restart, completely
   ignoring launchd on macOS. After killing the PID it would print
   'Restart it with: hermes gateway run' even when launchd was about
   to auto-respawn the process.

2. launchd's KeepAlive.SuccessfulExit=false respawns the gateway
   after SIGTERM (non-zero exit), so the user's manual restart
   created a second instance.

3. The launchd plist lacked --replace (systemd had it), so the
   respawned gateway didn't kill stale instances on startup.

Fixes:
- Add --replace to launchd ProgramArguments (matches systemd)
- Add launchd detection to cmd_update's auto-restart logic
- Print 'auto-restart via launchd' instead of manual restart hint

* fix: add launchd plist auto-refresh + explicit restart in cmd_update

Two integration issues with the initial fix:

1. Existing macOS users with old plist (no --replace) would never
   get the fix until manual uninstall/reinstall. Added
   refresh_launchd_plist_if_needed() — mirrors the existing
   refresh_systemd_unit_if_needed(). Called from launchd_start(),
   launchd_restart(), and cmd_update.

2. cmd_update relied on KeepAlive respawn after SIGTERM rather than
   explicit launchctl stop/start. This caused races: launchd would
   respawn the old process before the PID file was cleaned up.
   Now does explicit stop+start (matching how systemd gets an
   explicit systemctl restart), with plist refresh first so the
   new --replace flag is picked up.

---------

Co-authored-by: Ninja <ninja@local>
Co-authored-by: alireza78a <alireza78a@users.noreply.github.com>
Co-authored-by: Oktay Aydin <113846926+aydnOktay@users.noreply.github.com>
Co-authored-by: JP Lew <polydegen@protonmail.com>
Co-authored-by: an420eth <an420eth@users.noreply.github.com>
											
										
										
											2026-03-16 12:36:29 -07:00
+								            turn_route = self._resolve_turn_agent_config(message, model, runtime_kwargs)
-												feat(gateway): cache AIAgent per session for prompt caching

The gateway created a fresh AIAgent per message, rebuilding the system
prompt (including memory, skills, context files) every turn. This broke
prompt prefix caching — providers like Anthropic charge ~10x more for
uncached prefixes.

Now caches AIAgent instances per session_key with a config signature.
The cached agent is reused across messages in the same session,
preserving the frozen system prompt and tool schemas. Cache is
invalidated when:
- Config changes (model, provider, toolsets, reasoning, ephemeral
  prompt) — detected via signature mismatch
- /new, /reset, /clear — explicit session reset
- /model — global model change clears all cached agents
- /reasoning — global reasoning change clears all cached agents

Per-message state (callbacks, stream consumers, progress queues) is
set on the agent instance before each run_conversation() call.

This matches CLI behavior where a single AIAgent lives across all turns
in a session, with _cached_system_prompt built once and reused.

											
										
										
											2026-03-21 13:07:08 -07:00
 								            # Check agent cache — reuse the AIAgent from the previous message
 								            # in this session to preserve the frozen system prompt and tool
 								            # schemas for prompt cache hits.
 								            _sig = self._agent_config_signature(
 								                turn_route["model"],
 								                turn_route["runtime"],
 								                enabled_toolsets,
 								                combined_ephemeral,
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            )
-												feat(gateway): cache AIAgent per session for prompt caching

The gateway created a fresh AIAgent per message, rebuilding the system
prompt (including memory, skills, context files) every turn. This broke
prompt prefix caching — providers like Anthropic charge ~10x more for
uncached prefixes.

Now caches AIAgent instances per session_key with a config signature.
The cached agent is reused across messages in the same session,
preserving the frozen system prompt and tool schemas. Cache is
invalidated when:
- Config changes (model, provider, toolsets, reasoning, ephemeral
  prompt) — detected via signature mismatch
- /new, /reset, /clear — explicit session reset
- /model — global model change clears all cached agents
- /reasoning — global reasoning change clears all cached agents

Per-message state (callbacks, stream consumers, progress queues) is
set on the agent instance before each run_conversation() call.

This matches CLI behavior where a single AIAgent lives across all turns
in a session, with _cached_system_prompt built once and reused.

											
										
										
											2026-03-21 13:07:08 -07:00
+								            agent = None
 								            _cache_lock = getattr(self, "_agent_cache_lock", None)
 								            _cache = getattr(self, "_agent_cache", None)
 								            if _cache_lock and _cache is not None:
 								                with _cache_lock:
 								                    cached = _cache.get(session_key)
 								                    if cached and cached[1] == _sig:
 								                        agent = cached[0]
-												fix: stale agent timeout, uv venv detection, empty response after tools (#9051, #8620, #9400)

Three independent fixes:

1. Reset activity timestamp on cached agent reuse (#9051)
   When the gateway reuses a cached AIAgent for a new turn, the
   _last_activity_ts from the previous turn (possibly hours ago)
   carried over. The inactivity timeout handler immediately saw
   the agent as idle for hours and killed it.

   Fix: reset _last_activity_ts, _last_activity_desc, and
   _api_call_count when retrieving an agent from the cache.

2. Detect uv-managed virtual environments (#8620 sub-issue 1)
   The systemd unit generator fell back to sys.executable (uv's
   standalone Python) when running under 'uv run', because
   sys.prefix == sys.base_prefix (uv doesn't set up traditional
   venv activation). The generated ExecStart pointed to a Python
   binary without site-packages, crashing the service on startup.

   Fix: check VIRTUAL_ENV env var before falling back to
   sys.executable. uv sets VIRTUAL_ENV even when sys.prefix
   doesn't reflect the venv.

3. Nudge model to continue after empty post-tool response (#9400)
   Weaker models (GLM-5, mimo-v2-pro) sometimes return empty
   responses after tool calls instead of continuing to the next
   step. The agent silently abandoned the remaining work with
   '(empty)' or used prior-turn fallback text.

   Fix: when the model returns empty after tool calls AND there's
   no prior-turn content to fall back on, inject a one-time user
   nudge message telling the model to process the tool results and
   continue. The flag resets after each successful tool round so it
   can fire again on later rounds.

Test plan: 97 gateway + CLI tests pass, 9 venv detection tests pass

											
										
										
											2026-04-14 21:15:53 -07:00
+								                        # Reset activity timestamp so the inactivity timeout
 								                        # handler doesn't see stale idle time from the previous
 								                        # turn and immediately kill this agent.  (#9051)
 								                        agent._last_activity_ts = time.time()
 								                        agent._last_activity_desc = "starting new turn (cached)"
 								                        agent._api_call_count = 0
-												feat(gateway): cache AIAgent per session for prompt caching

The gateway created a fresh AIAgent per message, rebuilding the system
prompt (including memory, skills, context files) every turn. This broke
prompt prefix caching — providers like Anthropic charge ~10x more for
uncached prefixes.

Now caches AIAgent instances per session_key with a config signature.
The cached agent is reused across messages in the same session,
preserving the frozen system prompt and tool schemas. Cache is
invalidated when:
- Config changes (model, provider, toolsets, reasoning, ephemeral
  prompt) — detected via signature mismatch
- /new, /reset, /clear — explicit session reset
- /model — global model change clears all cached agents
- /reasoning — global reasoning change clears all cached agents

Per-message state (callbacks, stream consumers, progress queues) is
set on the agent instance before each run_conversation() call.

This matches CLI behavior where a single AIAgent lives across all turns
in a session, with _cached_system_prompt built once and reused.

											
										
										
											2026-03-21 13:07:08 -07:00
+								                        logger.debug("Reusing cached agent for session %s", session_key)
 								            if agent is None:
 								                # Config changed or first message — create fresh agent
 								                agent = AIAgent(
 								                    model=turn_route["model"],
 								                    **turn_route["runtime"],
 								                    max_iterations=max_iterations,
 								                    quiet_mode=True,
 								                    verbose_logging=False,
 								                    enabled_toolsets=enabled_toolsets,
 								                    ephemeral_system_prompt=combined_ephemeral or None,
 								                    prefill_messages=self._prefill_messages or None,
 								                    reasoning_config=reasoning_config,
-												feat(gateway): add fast mode support to gateway chats

											
										
										
											2026-04-10 08:32:56 +01:00
+								                    service_tier=self._service_tier,
 								                    request_overrides=turn_route.get("request_overrides"),
-												feat(gateway): cache AIAgent per session for prompt caching

The gateway created a fresh AIAgent per message, rebuilding the system
prompt (including memory, skills, context files) every turn. This broke
prompt prefix caching — providers like Anthropic charge ~10x more for
uncached prefixes.

Now caches AIAgent instances per session_key with a config signature.
The cached agent is reused across messages in the same session,
preserving the frozen system prompt and tool schemas. Cache is
invalidated when:
- Config changes (model, provider, toolsets, reasoning, ephemeral
  prompt) — detected via signature mismatch
- /new, /reset, /clear — explicit session reset
- /model — global model change clears all cached agents
- /reasoning — global reasoning change clears all cached agents

Per-message state (callbacks, stream consumers, progress queues) is
set on the agent instance before each run_conversation() call.

This matches CLI behavior where a single AIAgent lives across all turns
in a session, with _cached_system_prompt built once and reused.

											
										
										
											2026-03-21 13:07:08 -07:00
+								                    providers_allowed=pr.get("only"),
 								                    providers_ignored=pr.get("ignore"),
 								                    providers_order=pr.get("order"),
 								                    provider_sort=pr.get("sort"),
 								                    provider_require_parameters=pr.get("require_parameters", False),
 								                    provider_data_collection=pr.get("data_collection"),
 								                    session_id=session_id,
 								                    platform=platform_key,
-												fix: thread gateway user_id to memory plugins for per-user scoping (#5895)

Memory plugins (Mem0, Honcho) used static identifiers ('hermes-user',
config peerName) meaning all gateway users shared the same memory bucket.

Changes:
- AIAgent.__init__: add user_id parameter, store as self._user_id
- run_agent.py: include user_id in _init_kwargs passed to memory providers
- gateway/run.py: pass source.user_id to AIAgent in primary + background paths
- Mem0 plugin: prefer kwargs user_id over config default
- Honcho plugin: override cfg.peer_name with gateway user_id when present

CLI sessions (user_id=None) preserve existing defaults. Only gateway
sessions with a real platform user_id get per-user memory scoping.

Reported by plev333.
											
										
										
											2026-04-07 11:14:12 -07:00
+								                    user_id=source.user_id,
-												feat(honcho): context injection overhaul, 5-tool surface, cost safety, session isolation (#10619)

Salvaged from PR #9884 by erosika. Cherry-picked plugin changes onto
current main with minimal core modifications.

Plugin changes (plugins/memory/honcho/):
- New honcho_reasoning tool (5th tool, splits LLM calls from honcho_context)
- Two-layer context injection: base context (summary + representation + card)
  on contextCadence, dialectic supplement on dialecticCadence
- Multi-pass dialectic depth (1-3 passes) with early bail-out on strong signal
- Cold/warm prompt selection based on session state
- dialecticCadence defaults to 3 (was 1) — ~66% fewer Honcho LLM calls
- Session summary injection for conversational continuity
- Bidirectional peer targeting on all 5 tools
- Correctness fixes: peer param fallback, None guard on set_peer_card,
  schema validation, signal_sufficient anchored regex, mid->medium level fix

Core changes (~20 lines across 3 files):
- agent/memory_manager.py: Enhanced sanitize_context() to strip full
  <memory-context> blocks and system notes (prevents leak from saveMessages)
- run_agent.py: gateway_session_key param for stable per-chat Honcho sessions,
  on_turn_start() call before prefetch_all() for cadence tracking,
  sanitize_context() on user messages to strip leaked memory blocks
- gateway/run.py: skip_memory=True on 2 temp agents (prevents orphan sessions),
  gateway_session_key threading to main agent

Tests: 509 passed (3 skipped — honcho SDK not installed locally)
Docs: Updated honcho.md, memory-providers.md, tools-reference.md, SKILL.md

Co-authored-by: erosika <erosika@users.noreply.github.com>
											
										
										
											2026-04-15 19:12:19 -07:00
+								                    gateway_session_key=session_key,
-												feat(gateway): cache AIAgent per session for prompt caching

The gateway created a fresh AIAgent per message, rebuilding the system
prompt (including memory, skills, context files) every turn. This broke
prompt prefix caching — providers like Anthropic charge ~10x more for
uncached prefixes.

Now caches AIAgent instances per session_key with a config signature.
The cached agent is reused across messages in the same session,
preserving the frozen system prompt and tool schemas. Cache is
invalidated when:
- Config changes (model, provider, toolsets, reasoning, ephemeral
  prompt) — detected via signature mismatch
- /new, /reset, /clear — explicit session reset
- /model — global model change clears all cached agents
- /reasoning — global reasoning change clears all cached agents

Per-message state (callbacks, stream consumers, progress queues) is
set on the agent instance before each run_conversation() call.

This matches CLI behavior where a single AIAgent lives across all turns
in a session, with _cached_system_prompt built once and reused.

											
										
										
											2026-03-21 13:07:08 -07:00
+								                    session_db=self._session_db,
 								                    fallback_model=self._fallback_model,
 								                )
 								                if _cache_lock and _cache is not None:
 								                    with _cache_lock:
 								                        _cache[session_key] = (agent, _sig)
 								                logger.debug("Created new agent for session %s (sig=%s)", session_key, _sig)
 								            # Per-message state — callbacks and reasoning config change every
 								            # turn and must not be baked into the cached agent constructor.
 								            agent.tool_progress_callback = progress_callback if tool_progress_enabled else None
 								            agent.step_callback = _step_callback_sync if _hooks_ref.loaded_hooks else None
 								            agent.stream_delta_callback = _stream_delta_cb
-												feat(gateway): surface natural mid-turn assistant messages in chat platforms

Add display.interim_assistant_messages config (enabled by default) that
forwards completed assistant commentary between tool calls to the user
as separate chat messages. Models already emit useful status text like
'I'll inspect the repo first.' — this surfaces it on Telegram, Discord,
and other messaging platforms instead of swallowing it.

Independent from tool_progress and gateway streaming. Disabled for
webhooks. Uses GatewayStreamConsumer when available, falls back to
direct adapter send. Tracks response_previewed to prevent double-delivery
when interim message matches the final response.

Also fixes: cursor not stripped from fallback prefix in stream consumer
(affected continuation calculation on no-edit platforms like Signal).

Cherry-picked from PR #7885 by asheriif, default changed to enabled.
Fixes #5016

											
										
										
											2026-04-11 16:03:52 -07:00
+								            agent.interim_assistant_callback = _interim_assistant_cb if _want_interim_messages else None
-												feat(gateway): cache AIAgent per session for prompt caching

The gateway created a fresh AIAgent per message, rebuilding the system
prompt (including memory, skills, context files) every turn. This broke
prompt prefix caching — providers like Anthropic charge ~10x more for
uncached prefixes.

Now caches AIAgent instances per session_key with a config signature.
The cached agent is reused across messages in the same session,
preserving the frozen system prompt and tool schemas. Cache is
invalidated when:
- Config changes (model, provider, toolsets, reasoning, ephemeral
  prompt) — detected via signature mismatch
- /new, /reset, /clear — explicit session reset
- /model — global model change clears all cached agents
- /reasoning — global reasoning change clears all cached agents

Per-message state (callbacks, stream consumers, progress queues) is
set on the agent instance before each run_conversation() call.

This matches CLI behavior where a single AIAgent lives across all turns
in a session, with _cached_system_prompt built once and reused.

											
										
										
											2026-03-21 13:07:08 -07:00
+								            agent.status_callback = _status_callback_sync
 								            agent.reasoning_config = reasoning_config
-												feat(gateway): add fast mode support to gateway chats

											
										
										
											2026-04-10 08:32:56 +01:00
+								            agent.service_tier = self._service_tier
 								            agent.request_overrides = turn_route.get("request_overrides")
-												feat(gateway): deliver background review notifications to user chat (#3293)

The background memory/skill review (_spawn_background_review) runs
after the agent response when turn/iteration counters exceed their
thresholds. It saves memories and skills, then prints a summary like
'💾 Memory updated · User profile updated'. In CLI mode this goes to
the terminal via _safe_print. In gateway mode, _safe_print routes to
print() which goes to stdout — invisible to the user.

Add a background_review_callback attribute to AIAgent. When set, the
background review thread calls it with the summary string after saves
complete. The gateway wires this to adapter.send() via the same
run_coroutine_threadsafe bridge used by status_callback, delivering
the notification to the user's chat.
											
										
										
											2026-03-26 17:38:24 -07:00
-												fix(gateway): defer background review notifications until after main reply

Background review notifications ("💾 Skill created", "💾 Memory updated")
could race ahead of the main assistant reply in chat, making it look like
the agent stopped after creating a skill.

Gate bg-review notifications behind a threading.Event + pending queue.
Register a release callback on the adapter's _post_delivery_callbacks dict
so base.py's finally block fires it after the main response is delivered.

The queued-message path in _run_agent pops and calls the callback directly
to prevent double-fire.

Co-authored-by: Hermes Agent <hermes@nousresearch.com>
Closes #10541

											
										
										
											2026-04-15 16:40:38 -07:00
+								            _bg_review_release = threading.Event()
 								            _bg_review_pending: list[str] = []
 								            _bg_review_pending_lock = threading.Lock()
 								            def _deliver_bg_review_message(message: str) -> None:
-												feat(gateway): deliver background review notifications to user chat (#3293)

The background memory/skill review (_spawn_background_review) runs
after the agent response when turn/iteration counters exceed their
thresholds. It saves memories and skills, then prints a summary like
'💾 Memory updated · User profile updated'. In CLI mode this goes to
the terminal via _safe_print. In gateway mode, _safe_print routes to
print() which goes to stdout — invisible to the user.

Add a background_review_callback attribute to AIAgent. When set, the
background review thread calls it with the summary string after saves
complete. The gateway wires this to adapter.send() via the same
run_coroutine_threadsafe bridge used by status_callback, delivering
the notification to the user's chat.
											
										
										
											2026-03-26 17:38:24 -07:00
+								                if not _status_adapter:
 								                    return
 								                try:
 								                    asyncio.run_coroutine_threadsafe(
 								                        _status_adapter.send(
 								                            _status_chat_id,
 								                            message,
 								                            metadata=_status_thread_metadata,
 								                        ),
 								                        _loop_for_step,
 								                    )
 								                except Exception as _e:
 								                    logger.debug("background_review_callback error: %s", _e)
-												fix(gateway): defer background review notifications until after main reply

Background review notifications ("💾 Skill created", "💾 Memory updated")
could race ahead of the main assistant reply in chat, making it look like
the agent stopped after creating a skill.

Gate bg-review notifications behind a threading.Event + pending queue.
Register a release callback on the adapter's _post_delivery_callbacks dict
so base.py's finally block fires it after the main response is delivered.

The queued-message path in _run_agent pops and calls the callback directly
to prevent double-fire.

Co-authored-by: Hermes Agent <hermes@nousresearch.com>
Closes #10541

											
										
										
											2026-04-15 16:40:38 -07:00
+								            def _release_bg_review_messages() -> None:
 								                _bg_review_release.set()
 								                with _bg_review_pending_lock:
 								                    pending = list(_bg_review_pending)
 								                    _bg_review_pending.clear()
 								                for queued in pending:
 								                    _deliver_bg_review_message(queued)
 								            # Background review delivery — send "💾 Memory updated" etc. to user
 								            def _bg_review_send(message: str) -> None:
 								                if not _status_adapter:
 								                    return
 								                if not _bg_review_release.is_set():
 								                    with _bg_review_pending_lock:
 								                        if not _bg_review_release.is_set():
 								                            _bg_review_pending.append(message)
 								                            return
 								                _deliver_bg_review_message(message)
-												feat(gateway): deliver background review notifications to user chat (#3293)

The background memory/skill review (_spawn_background_review) runs
after the agent response when turn/iteration counters exceed their
thresholds. It saves memories and skills, then prints a summary like
'💾 Memory updated · User profile updated'. In CLI mode this goes to
the terminal via _safe_print. In gateway mode, _safe_print routes to
print() which goes to stdout — invisible to the user.

Add a background_review_callback attribute to AIAgent. When set, the
background review thread calls it with the summary string after saves
complete. The gateway wires this to adapter.send() via the same
run_coroutine_threadsafe bridge used by status_callback, delivering
the notification to the user's chat.
											
										
										
											2026-03-26 17:38:24 -07:00
+								            agent.background_review_callback = _bg_review_send
-												fix(gateway): defer background review notifications until after main reply

Background review notifications ("💾 Skill created", "💾 Memory updated")
could race ahead of the main assistant reply in chat, making it look like
the agent stopped after creating a skill.

Gate bg-review notifications behind a threading.Event + pending queue.
Register a release callback on the adapter's _post_delivery_callbacks dict
so base.py's finally block fires it after the main response is delivered.

The queued-message path in _run_agent pops and calls the callback directly
to prevent double-fire.

Co-authored-by: Hermes Agent <hermes@nousresearch.com>
Closes #10541

											
										
										
											2026-04-15 16:40:38 -07:00
+								            # Register the release hook on the adapter so base.py's finally
 								            # block can fire it after delivering the main response.
 								            if _status_adapter and session_key:
 								                _pdc = getattr(_status_adapter, "_post_delivery_callbacks", None)
 								                if _pdc is not None:
 								                    _pdc[session_key] = _release_bg_review_messages
-												feat(gateway): deliver background review notifications to user chat (#3293)

The background memory/skill review (_spawn_background_review) runs
after the agent response when turn/iteration counters exceed their
thresholds. It saves memories and skills, then prints a summary like
'💾 Memory updated · User profile updated'. In CLI mode this goes to
the terminal via _safe_print. In gateway mode, _safe_print routes to
print() which goes to stdout — invisible to the user.

Add a background_review_callback attribute to AIAgent. When set, the
background review thread calls it with the summary string after saves
complete. The gateway wires this to adapter.send() via the same
run_coroutine_threadsafe bridge used by status_callback, delivering
the notification to the user's chat.
											
										
										
											2026-03-26 17:38:24 -07:00
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            # Store agent reference for interrupt support
 								            agent_holder[0] = agent
 								            # Capture the full tool definitions for transcript logging
 								            tools_holder[0] = agent.tools if hasattr(agent, 'tools') else None
 								            # Convert history to agent format.
 								            # Two cases:
 								            #   1. Normal path (from transcript): simple {role, content, timestamp} dicts
 								            #      - Strip timestamps, keep role+content
 								            #   2. Interrupt path (from agent result["messages"]): full agent messages
 								            #      that may include tool_calls, tool_call_id, reasoning, etc.
 								            #      - These must be passed through intact so the API sees valid
 								            #        assistant→tool sequences (dropping tool_calls causes 500 errors)
 								            agent_history = []
 								            for msg in history:
 								                role = msg.get("role")
 								                if not role:
 								                    continue
 								                # Skip metadata entries (tool definitions, session info)
 								                # -- these are for transcript logging, not for the LLM
 								                if role in ("session_meta",):
 								                    continue
 								                # Skip system messages -- the agent rebuilds its own system prompt
 								                if role == "system":
 								                    continue
 								                # Rich agent messages (tool_calls, tool results) must be passed
 								                # through intact so the API sees valid assistant→tool sequences
 								                has_tool_calls = "tool_calls" in msg
 								                has_tool_call_id = "tool_call_id" in msg
 								                is_tool_message = role == "tool"
 								                if has_tool_calls or has_tool_call_id or is_tool_message:
 								                    clean_msg = {k: v for k, v in msg.items() if k != "timestamp"}
 								                    agent_history.append(clean_msg)
 								                else:
 								                    # Simple text message - just need role and content
 								                    content = msg.get("content")
 								                    if content:
 								                        # Tag cross-platform mirror messages so the agent knows their origin
 								                        if msg.get("mirror"):
 								                            mirror_src = msg.get("mirror_source", "another session")
 								                            content = f"[Delivered from {mirror_src}] {content}"
-												feat: persist reasoning across gateway session turns (schema v6) (#2974)

feat: persist reasoning across gateway session turns (schema v6)

Tested against OpenAI Codex (direct), Anthropic (direct + OAI-compat), and OpenRouter → 6 backends. All reasoning field types (reasoning, reasoning_details, codex_reasoning_items) round-trip through the DB correctly.
											
										
										
											2026-03-25 09:47:28 -07:00
+								                        entry = {"role": role, "content": content}
 								                        # Preserve reasoning fields on assistant messages so
 								                        # multi-turn reasoning context survives session reload.
 								                        # The agent's _build_api_kwargs converts these to the
 								                        # provider-specific format (reasoning_content, etc.).
 								                        if role == "assistant":
 								                            for _rkey in ("reasoning", "reasoning_details",
 								                                          "codex_reasoning_items"):
 								                                _rval = msg.get(_rkey)
 								                                if _rval:
 								                                    entry[_rkey] = _rval
 								                        agent_history.append(entry)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
 								            # Collect MEDIA paths already in history so we can exclude them
 								            # from the current turn's extraction. This is compression-safe:
 								            # even if the message list shrinks, we know which paths are old.
 								            _history_media_paths: set = set()
 								            for _hm in agent_history:
 								                if _hm.get("role") in ("tool", "function"):
 								                    _hc = _hm.get("content", "")
 								                    if "MEDIA:" in _hc:
 								                        for _match in re.finditer(r'MEDIA:(\S+)', _hc):
 								                            _p = _match.group(1).strip().rstrip('",}')
 								                            if _p:
 								                                _history_media_paths.add(_p)
-												fix: make gateway approval block agent thread like CLI does (#4557)

The gateway's dangerous command approval system was fundamentally broken:
the agent loop continued running after a command was flagged, and the
approval request only reached the user after the agent finished its
entire conversation loop. By then the context was lost.

This change makes the gateway approval mirror the CLI's synchronous
behavior. When a dangerous command is detected:

1. The agent thread blocks on a threading.Event
2. The approval request is sent to the user immediately
3. The user responds with /approve or /deny
4. The event is signaled and the agent resumes with the real result

The agent never sees 'approval_required' as a tool result. It either
gets the command output (approved) or a definitive BLOCKED message
(denied/timed out) — same as CLI mode.

Queue-based design supports multiple concurrent approvals (parallel
subagents via delegate_task, execute_code RPC handlers). Each approval
gets its own _ApprovalEntry with its own threading.Event. /approve
resolves the oldest (FIFO); /approve all resolves all at once.

Changes:
- tools/approval.py: Queue-based per-session blocking gateway approval
  (register/unregister callbacks, resolve with FIFO or all-at-once)
- gateway/run.py: Register approval callback in run_sync(), remove
  post-loop pop_pending hack, /approve and /deny support 'all' flag
- tests: 21 tests including parallel subagent E2E scenarios
											
										
										
											2026-04-02 01:47:19 -07:00
+								            # Register per-session gateway approval callback so dangerous
 								            # command approval blocks the agent thread (mirrors CLI input()).
 								            # The callback bridges sync→async to send the approval request
 								            # to the user immediately.
-												fix(gateway): isolate approval session key per turn

											
										
										
											2026-04-03 01:09:45 +00:00
+								            from tools.approval import (
 								                register_gateway_notify,
 								                reset_current_session_key,
 								                set_current_session_key,
 								                unregister_gateway_notify,
 								            )
-												fix: make gateway approval block agent thread like CLI does (#4557)

The gateway's dangerous command approval system was fundamentally broken:
the agent loop continued running after a command was flagged, and the
approval request only reached the user after the agent finished its
entire conversation loop. By then the context was lost.

This change makes the gateway approval mirror the CLI's synchronous
behavior. When a dangerous command is detected:

1. The agent thread blocks on a threading.Event
2. The approval request is sent to the user immediately
3. The user responds with /approve or /deny
4. The event is signaled and the agent resumes with the real result

The agent never sees 'approval_required' as a tool result. It either
gets the command output (approved) or a definitive BLOCKED message
(denied/timed out) — same as CLI mode.

Queue-based design supports multiple concurrent approvals (parallel
subagents via delegate_task, execute_code RPC handlers). Each approval
gets its own _ApprovalEntry with its own threading.Event. /approve
resolves the oldest (FIFO); /approve all resolves all at once.

Changes:
- tools/approval.py: Queue-based per-session blocking gateway approval
  (register/unregister callbacks, resolve with FIFO or all-at-once)
- gateway/run.py: Register approval callback in run_sync(), remove
  post-loop pop_pending hack, /approve and /deny support 'all' flag
- tests: 21 tests including parallel subagent E2E scenarios
											
										
										
											2026-04-02 01:47:19 -07:00
 								            def _approval_notify_sync(approval_data: dict) -> None:
-												fix(discord): register /approve and /deny slash commands, wire up button-based approval UI (#4800)

Two fixes for Discord exec approval:

1. Register /approve and /deny as native Discord slash commands so they
   appear in Discord's command picker (autocomplete). Previously they
   were only handled as text commands, so users saw 'no commands found'
   when typing /approve.

2. Wire up the existing ExecApprovalView button UI (was dead code):
   - ExecApprovalView now calls resolve_gateway_approval() to actually
     unblock the waiting agent thread when a button is clicked
   - Gateway's _approval_notify_sync() detects adapters with
     send_exec_approval() and routes through the button UI
   - Added 'Allow Session' button for parity with /approve session
   - send_exec_approval() now accepts session_key and metadata for
     thread support
   - Graceful fallback to text-based /approve prompt if button send fails

Also updates test mocks to include grey/secondary ButtonStyle and
purple Color (used by new button styles).
											
										
										
											2026-04-03 10:24:07 -07:00
+								                """Send the approval request to the user from the agent thread.
 								                If the adapter supports interactive button-based approvals
 								                (e.g. Discord's ``send_exec_approval``), use that for a richer
 								                UX.  Otherwise fall back to a plain text message with
 								                ``/approve`` instructions.
 								                """
-												fix: pause typing indicator during approval waits (#5893)

When the agent waits for dangerous-command approval, the typing
indicator (_keep_typing loop) kept refreshing. On Slack's Assistant
API this is critical: assistant_threads_setStatus disables the
compose box, preventing users from typing /approve or /deny.

- Add _typing_paused set + pause/resume methods to BasePlatformAdapter
- _keep_typing skips send_typing when chat_id is paused
- _approval_notify_sync pauses typing before sending approval prompt
- _handle_approve_command / _handle_deny_command resume typing after

Benefits all platforms — no reason to show 'is thinking...' while
the agent is idle waiting for human input.
											
										
										
											2026-04-07 11:04:50 -07:00
+								                # Pause the typing indicator while the agent waits for
 								                # user approval.  Critical for Slack's Assistant API where
 								                # assistant_threads_setStatus disables the compose box — the
 								                # user literally cannot type /approve while "is thinking..."
 								                # is active.  The approval message send auto-clears the Slack
 								                # status; pausing prevents _keep_typing from re-setting it.
 								                # Typing resumes in _handle_approve_command/_handle_deny_command.
 								                _status_adapter.pause_typing_for_chat(_status_chat_id)
-												fix: make gateway approval block agent thread like CLI does (#4557)

The gateway's dangerous command approval system was fundamentally broken:
the agent loop continued running after a command was flagged, and the
approval request only reached the user after the agent finished its
entire conversation loop. By then the context was lost.

This change makes the gateway approval mirror the CLI's synchronous
behavior. When a dangerous command is detected:

1. The agent thread blocks on a threading.Event
2. The approval request is sent to the user immediately
3. The user responds with /approve or /deny
4. The event is signaled and the agent resumes with the real result

The agent never sees 'approval_required' as a tool result. It either
gets the command output (approved) or a definitive BLOCKED message
(denied/timed out) — same as CLI mode.

Queue-based design supports multiple concurrent approvals (parallel
subagents via delegate_task, execute_code RPC handlers). Each approval
gets its own _ApprovalEntry with its own threading.Event. /approve
resolves the oldest (FIFO); /approve all resolves all at once.

Changes:
- tools/approval.py: Queue-based per-session blocking gateway approval
  (register/unregister callbacks, resolve with FIFO or all-at-once)
- gateway/run.py: Register approval callback in run_sync(), remove
  post-loop pop_pending hack, /approve and /deny support 'all' flag
- tests: 21 tests including parallel subagent E2E scenarios
											
										
										
											2026-04-02 01:47:19 -07:00
+								                cmd = approval_data.get("command", "")
 								                desc = approval_data.get("description", "dangerous command")
-												fix(discord): register /approve and /deny slash commands, wire up button-based approval UI (#4800)

Two fixes for Discord exec approval:

1. Register /approve and /deny as native Discord slash commands so they
   appear in Discord's command picker (autocomplete). Previously they
   were only handled as text commands, so users saw 'no commands found'
   when typing /approve.

2. Wire up the existing ExecApprovalView button UI (was dead code):
   - ExecApprovalView now calls resolve_gateway_approval() to actually
     unblock the waiting agent thread when a button is clicked
   - Gateway's _approval_notify_sync() detects adapters with
     send_exec_approval() and routes through the button UI
   - Added 'Allow Session' button for parity with /approve session
   - send_exec_approval() now accepts session_key and metadata for
     thread support
   - Graceful fallback to text-based /approve prompt if button send fails

Also updates test mocks to include grey/secondary ButtonStyle and
purple Color (used by new button styles).
											
										
										
											2026-04-03 10:24:07 -07:00
 								                # Prefer button-based approval when the adapter supports it.
 								                # Check the *class* for the method, not the instance — avoids
 								                # false positives from MagicMock auto-attribute creation in tests.
 								                if getattr(type(_status_adapter), "send_exec_approval", None) is not None:
 								                    try:
-												fix: escape command content in Telegram exec approval prompt

Switch from fragile Markdown V1 to HTML parse mode with html.escape()
for exec approval messages. Add fallback to text-based approval when
the formatted send fails.

Cherry-picked from #10999 by @danieldoderlein.

											
										
										
											2026-04-16 19:22:30 +05:30
+								                        _approval_result = asyncio.run_coroutine_threadsafe(
-												fix(discord): register /approve and /deny slash commands, wire up button-based approval UI (#4800)

Two fixes for Discord exec approval:

1. Register /approve and /deny as native Discord slash commands so they
   appear in Discord's command picker (autocomplete). Previously they
   were only handled as text commands, so users saw 'no commands found'
   when typing /approve.

2. Wire up the existing ExecApprovalView button UI (was dead code):
   - ExecApprovalView now calls resolve_gateway_approval() to actually
     unblock the waiting agent thread when a button is clicked
   - Gateway's _approval_notify_sync() detects adapters with
     send_exec_approval() and routes through the button UI
   - Added 'Allow Session' button for parity with /approve session
   - send_exec_approval() now accepts session_key and metadata for
     thread support
   - Graceful fallback to text-based /approve prompt if button send fails

Also updates test mocks to include grey/secondary ButtonStyle and
purple Color (used by new button styles).
											
										
										
											2026-04-03 10:24:07 -07:00
+								                            _status_adapter.send_exec_approval(
 								                                chat_id=_status_chat_id,
 								                                command=cmd,
 								                                session_key=_approval_session_key,
 								                                description=desc,
 								                                metadata=_status_thread_metadata,
 								                            ),
 								                            _loop_for_step,
 								                        ).result(timeout=15)
-												fix: escape command content in Telegram exec approval prompt

Switch from fragile Markdown V1 to HTML parse mode with html.escape()
for exec approval messages. Add fallback to text-based approval when
the formatted send fails.

Cherry-picked from #10999 by @danieldoderlein.

											
										
										
											2026-04-16 19:22:30 +05:30
+								                        if _approval_result.success:
 								                            return
 								                        logger.warning(
 								                            "Button-based approval failed (send returned error), falling back to text: %s",
 								                            _approval_result.error,
 								                        )
-												fix(discord): register /approve and /deny slash commands, wire up button-based approval UI (#4800)

Two fixes for Discord exec approval:

1. Register /approve and /deny as native Discord slash commands so they
   appear in Discord's command picker (autocomplete). Previously they
   were only handled as text commands, so users saw 'no commands found'
   when typing /approve.

2. Wire up the existing ExecApprovalView button UI (was dead code):
   - ExecApprovalView now calls resolve_gateway_approval() to actually
     unblock the waiting agent thread when a button is clicked
   - Gateway's _approval_notify_sync() detects adapters with
     send_exec_approval() and routes through the button UI
   - Added 'Allow Session' button for parity with /approve session
   - send_exec_approval() now accepts session_key and metadata for
     thread support
   - Graceful fallback to text-based /approve prompt if button send fails

Also updates test mocks to include grey/secondary ButtonStyle and
purple Color (used by new button styles).
											
										
										
											2026-04-03 10:24:07 -07:00
+								                    except Exception as _e:
 								                        logger.warning(
 								                            "Button-based approval failed, falling back to text: %s", _e
 								                        )
 								                # Fallback: plain text approval prompt
 								                cmd_preview = cmd[:200] + "..." if len(cmd) > 200 else cmd
-												fix: make gateway approval block agent thread like CLI does (#4557)

The gateway's dangerous command approval system was fundamentally broken:
the agent loop continued running after a command was flagged, and the
approval request only reached the user after the agent finished its
entire conversation loop. By then the context was lost.

This change makes the gateway approval mirror the CLI's synchronous
behavior. When a dangerous command is detected:

1. The agent thread blocks on a threading.Event
2. The approval request is sent to the user immediately
3. The user responds with /approve or /deny
4. The event is signaled and the agent resumes with the real result

The agent never sees 'approval_required' as a tool result. It either
gets the command output (approved) or a definitive BLOCKED message
(denied/timed out) — same as CLI mode.

Queue-based design supports multiple concurrent approvals (parallel
subagents via delegate_task, execute_code RPC handlers). Each approval
gets its own _ApprovalEntry with its own threading.Event. /approve
resolves the oldest (FIFO); /approve all resolves all at once.

Changes:
- tools/approval.py: Queue-based per-session blocking gateway approval
  (register/unregister callbacks, resolve with FIFO or all-at-once)
- gateway/run.py: Register approval callback in run_sync(), remove
  post-loop pop_pending hack, /approve and /deny support 'all' flag
- tests: 21 tests including parallel subagent E2E scenarios
											
										
										
											2026-04-02 01:47:19 -07:00
+								                msg = (
 								                    f"⚠️ **Dangerous command requires approval:**\n"
 								                    f"```\n{cmd_preview}\n```\n"
 								                    f"Reason: {desc}\n\n"
 								                    f"Reply `/approve` to execute, `/approve session` to approve this pattern "
 								                    f"for the session, `/approve always` to approve permanently, or `/deny` to cancel."
 								                )
 								                try:
 								                    asyncio.run_coroutine_threadsafe(
 								                        _status_adapter.send(
 								                            _status_chat_id,
 								                            msg,
 								                            metadata=_status_thread_metadata,
 								                        ),
 								                        _loop_for_step,
 								                    ).result(timeout=15)
 								                except Exception as _e:
 								                    logger.error("Failed to send approval request: %s", _e)
-												fix: Ollama Cloud auth, /model switch persistence, and alias tab completion

- Add OLLAMA_API_KEY to credential resolution chain for ollama.com endpoints
- Update requested_provider/_explicit_api_key/_explicit_base_url after /model
  switch so _ensure_runtime_credentials() doesn't revert the switch
- Pass base_url/api_key from fallback config to resolve_provider_client()
- Add DirectAlias system: user-configurable model_aliases in config.yaml
  checked before catalog resolution, with reverse lookup by model ID
- Add /model tab completion showing aliases with provider metadata

Co-authored-by: LucidPaths <LucidPaths@users.noreply.github.com>

											
										
										
											2026-04-05 10:58:44 -07:00
+								            # Prepend pending model switch note so the model knows about the switch
 								            _pending_notes = getattr(self, '_pending_model_notes', {})
 								            _msn = _pending_notes.pop(session_key, None) if session_key else None
 								            if _msn:
 								                message = _msn + "\n\n" + message
-												feat: auto-continue interrupted agent work after gateway restart (#4493)

When the gateway restarts mid-agent-work, the session transcript ends
on a tool result the agent never processed. Previously, the user had
to type 'continue' or use /retry (which replays from scratch, losing
all prior work).

Now, when the next user message arrives and the loaded history ends
with role='tool', a system note is prepended:

  [System note: Your previous turn was interrupted before you could
  process the last tool result(s). Please finish processing those
  results and summarize what was accomplished, then address the
  user's new message below.]

This is injected in _run_agent()'s run_sync closure, right before
calling agent.run_conversation(). The agent sees the full history
(including the pending tool results) and the system note, so it can
summarize what was accomplished and then handle the user's new input.

Design decisions:
- No new session flags or schema changes — purely detects trailing
  tool messages in the loaded history
- Works for any restart scenario (clean, crash, SIGTERM, drain timeout)
  as long as the session wasn't suspended (suspended = fresh start)
- The user's actual message is preserved after the note
- If the session WAS suspended (unclean shutdown), the old history is
  abandoned and the user starts fresh — no false auto-continue

Also updates the shutdown notification message from 'Use /retry after
restart to continue' to 'Send any message after restart to resume
where it left off' — which is now accurate.

Test plan:
- 6 new auto-continue tests (trailing tool detection, no false
  positives for assistant/user/empty history, multi-tool, message
  preservation)
- All 13 restart drain tests pass (updated /retry assertion)

											
										
										
											2026-04-14 16:55:30 -07:00
+								            # Auto-continue: if the loaded history ends with a tool result,
 								            # the previous agent turn was interrupted mid-work (gateway
 								            # restart, crash, SIGTERM).  Prepend a system note so the model
 								            # finishes processing the pending tool results before addressing
 								            # the user's new message.  (#4493)
 								            if agent_history and agent_history[-1].get("role") == "tool":
 								                message = (
 								                    "[System note: Your previous turn was interrupted before you could "
 								                    "process the last tool result(s). The conversation history contains "
 								                    "tool outputs you haven't responded to yet. Please finish processing "
 								                    "those results and summarize what was accomplished, then address the "
 								                    "user's new message below.]\n\n"
 								                    + message
 								                )
-												fix: make gateway approval block agent thread like CLI does (#4557)

The gateway's dangerous command approval system was fundamentally broken:
the agent loop continued running after a command was flagged, and the
approval request only reached the user after the agent finished its
entire conversation loop. By then the context was lost.

This change makes the gateway approval mirror the CLI's synchronous
behavior. When a dangerous command is detected:

1. The agent thread blocks on a threading.Event
2. The approval request is sent to the user immediately
3. The user responds with /approve or /deny
4. The event is signaled and the agent resumes with the real result

The agent never sees 'approval_required' as a tool result. It either
gets the command output (approved) or a definitive BLOCKED message
(denied/timed out) — same as CLI mode.

Queue-based design supports multiple concurrent approvals (parallel
subagents via delegate_task, execute_code RPC handlers). Each approval
gets its own _ApprovalEntry with its own threading.Event. /approve
resolves the oldest (FIFO); /approve all resolves all at once.

Changes:
- tools/approval.py: Queue-based per-session blocking gateway approval
  (register/unregister callbacks, resolve with FIFO or all-at-once)
- gateway/run.py: Register approval callback in run_sync(), remove
  post-loop pop_pending hack, /approve and /deny support 'all' flag
- tests: 21 tests including parallel subagent E2E scenarios
											
										
										
											2026-04-02 01:47:19 -07:00
+								            _approval_session_key = session_key or ""
-												fix(gateway): isolate approval session key per turn

											
										
										
											2026-04-03 01:09:45 +00:00
+								            _approval_session_token = set_current_session_key(_approval_session_key)
-												fix: make gateway approval block agent thread like CLI does (#4557)

The gateway's dangerous command approval system was fundamentally broken:
the agent loop continued running after a command was flagged, and the
approval request only reached the user after the agent finished its
entire conversation loop. By then the context was lost.

This change makes the gateway approval mirror the CLI's synchronous
behavior. When a dangerous command is detected:

1. The agent thread blocks on a threading.Event
2. The approval request is sent to the user immediately
3. The user responds with /approve or /deny
4. The event is signaled and the agent resumes with the real result

The agent never sees 'approval_required' as a tool result. It either
gets the command output (approved) or a definitive BLOCKED message
(denied/timed out) — same as CLI mode.

Queue-based design supports multiple concurrent approvals (parallel
subagents via delegate_task, execute_code RPC handlers). Each approval
gets its own _ApprovalEntry with its own threading.Event. /approve
resolves the oldest (FIFO); /approve all resolves all at once.

Changes:
- tools/approval.py: Queue-based per-session blocking gateway approval
  (register/unregister callbacks, resolve with FIFO or all-at-once)
- gateway/run.py: Register approval callback in run_sync(), remove
  post-loop pop_pending hack, /approve and /deny support 'all' flag
- tests: 21 tests including parallel subagent E2E scenarios
											
										
										
											2026-04-02 01:47:19 -07:00
+								            register_gateway_notify(_approval_session_key, _approval_notify_sync)
 								            try:
 								                result = agent.run_conversation(message, conversation_history=agent_history, task_id=session_id)
 								            finally:
 								                unregister_gateway_notify(_approval_session_key)
-												fix(gateway): isolate approval session key per turn

											
										
										
											2026-04-03 01:09:45 +00:00
+								                reset_current_session_key(_approval_session_token)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            result_holder[0] = result
-												feat(gateway): streaming token delivery — StreamingConfig, GatewayStreamConsumer, already_sent

Stage 3 of streaming support. Gateway now streams tokens to messaging platforms:

- StreamingConfig dataclass (enabled, transport, edit_interval, buffer_threshold, cursor)
  on GatewayConfig with from_dict/to_dict serialization
- GatewayStreamConsumer: async queue-based consumer that progressively edits
  a single message on the target platform (edit transport)
- on_delta() → queue → run() async task → send_or_edit() with rate limiting
- already_sent propagation: when streaming delivered the response, handler
  returns None so base adapter skips duplicate send()
- stream_delta_callback wired into AIAgent constructor in _run_agent
- Consumer lifecycle: started as asyncio task, awaited with timeout in finally

Config (config.yaml):
  streaming:
    enabled: true
    transport: edit      # progressive editMessageText
    edit_interval: 0.3   # seconds between edits
    buffer_threshold: 40 # chars before forcing flush
    cursor: ' ▉'

Credit: jobless0x (#774, #1312), OutThisLife (#798), clicksingh (#697).

											
										
										
											2026-03-16 05:52:42 -07:00
 								            # Signal the stream consumer that the agent is done
 								            if _stream_consumer is not None:
 								                _stream_consumer.finish()
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
 								            # Return final response, or a message if something went wrong
 								            final_response = result.get("final_response")
-												fix(gateway): make /status report live state and tokens (#1476)
											
										
										
											2026-03-15 19:18:58 -07:00
+								            # Extract actual token counts from the agent instance used for this run
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            _last_prompt_toks = 0
-												fix(gateway): make /status report live state and tokens (#1476)
											
										
										
											2026-03-15 19:18:58 -07:00
+								            _input_toks = 0
 								            _output_toks = 0
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            _agent = agent_holder[0]
 								            if _agent and hasattr(_agent, "context_compressor"):
 								                _last_prompt_toks = getattr(_agent.context_compressor, "last_prompt_tokens", 0)
-												fix(gateway): make /status report live state and tokens (#1476)
											
										
										
											2026-03-15 19:18:58 -07:00
+								                _input_toks = getattr(_agent, "session_prompt_tokens", 0)
 								                _output_toks = getattr(_agent, "session_completion_tokens", 0)
-												fix: tolerate test doubles without model attr

Use getattr() when returning model metadata from GatewayRunner._run_agent so fake agents and minimal stubs without a model attribute do not break unrelated gateway flows while preserving the session-model backfill behavior.

											
										
										
											2026-03-14 06:47:39 -07:00
+								            _resolved_model = getattr(_agent, "model", None) if _agent else None
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
 								            if not final_response:
-												fix: stop leaking '(No response generated)' placeholder to users and cron targets

When the LLM returns an empty completion, gateway/run.py replaced
final_response with the literal string '(No response generated)'.
This defeated cron/scheduler.py's empty-response skip guard, causing
the placeholder to be delivered to home channels.

Changes:
- gateway/run.py: return empty string instead of placeholder when
  there is no error and no response content
- cron/scheduler.py: defensively strip the placeholder text in case
  any upstream path still produces it

Fixes NousResearch/hermes-agent#9270

											
										
										
											2026-04-14 09:17:52 +08:00
+								                error_msg = f"⚠️ {result['error']}" if result.get("error") else ""
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                return {
 								                    "final_response": error_msg,
 								                    "messages": result.get("messages", []),
 								                    "api_calls": result.get("api_calls", 0),
-												fix(gateway): break compression-exhaustion infinite loop and auto-reset session (#9893)

When compression fails after max attempts, the agent returns
{completed: False, partial: True} but was missing the 'failed' flag.
The gateway's agent_failed_early guard checked for 'failed' AND
'not final_response', but _run_agent_blocking always converts errors
to final_response — making the guard dead code.  This caused the
oversized session to persist, creating an infinite fail loop where
every subsequent message hits the same compression failure.

Changes:
- run_agent.py: add 'failed: True' and 'compression_exhausted: True'
  to all 5 compression-exhaustion return paths
- gateway/run.py (_run_agent_blocking): forward 'failed' and
  'compression_exhausted' flags through to the caller
- gateway/run.py (_handle_message_with_agent): fix agent_failed_early
  to check bool(failed) without the broken 'not final_response' clause;
  auto-reset the session when compression is exhausted so the next
  message starts fresh
- Update tests to match new guard logic and add
  TestCompressionExhaustedFlag test class

Closes #9893

											
										
										
											2026-04-14 21:15:12 -07:00
+								                    "failed": result.get("failed", False),
 								                    "compression_exhausted": result.get("compression_exhausted", False),
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                    "tools": tools_holder[0] or [],
 								                    "history_offset": len(agent_history),
 								                    "last_prompt_tokens": _last_prompt_toks,
-												fix(gateway): make /status report live state and tokens (#1476)
											
										
										
											2026-03-15 19:18:58 -07:00
+								                    "input_tokens": _input_toks,
 								                    "output_tokens": _output_toks,
-												fix: tolerate test doubles without model attr

Use getattr() when returning model metadata from GatewayRunner._run_agent so fake agents and minimal stubs without a model attribute do not break unrelated gateway flows while preserving the session-model backfill behavior.

											
										
										
											2026-03-14 06:47:39 -07:00
+								                    "model": _resolved_model,
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                }
 								            # Scan tool results for MEDIA:<path> tags that need to be delivered
 								            # as native audio/file attachments.  The TTS tool embeds MEDIA: tags
 								            # in its JSON response, but the model's final text reply usually
 								            # doesn't include them.  We collect unique tags from tool results and
 								            # append any that aren't already present in the final response, so the
 								            # adapter's extract_media() can find and deliver the files exactly once.
 								            #
 								            # Uses path-based deduplication against _history_media_paths (collected
 								            # before run_conversation) instead of index slicing. This is safe even
 								            # when context compression shrinks the message list. (Fixes #160)
 								            if "MEDIA:" not in final_response:
 								                media_tags = []
 								                has_voice_directive = False
 								                for msg in result.get("messages", []):
 								                    if msg.get("role") in ("tool", "function"):
 								                        content = msg.get("content", "")
 								                        if "MEDIA:" in content:
 								                            for match in re.finditer(r'MEDIA:(\S+)', content):
 								                                path = match.group(1).strip().rstrip('",}')
 								                                if path and path not in _history_media_paths:
 								                                    media_tags.append(f"MEDIA:{path}")
 								                            if "[[audio_as_voice]]" in content:
 								                                has_voice_directive = True
 								                if media_tags:
 								                    seen = set()
 								                    unique_tags = []
 								                    for tag in media_tags:
 								                        if tag not in seen:
 								                            seen.add(tag)
 								                            unique_tags.append(tag)
 								                    if has_voice_directive:
 								                        unique_tags.insert(0, "[[audio_as_voice]]")
 								                    final_response = final_response + "\n" + "\n".join(unique_tags)
 								            # Sync session_id: the agent may have created a new session during
 								            # mid-run context compression (_compress_context splits sessions).
 								            # If so, update the session store entry so the NEXT message loads
 								            # the compressed transcript, not the stale pre-compression one.
 								            agent = agent_holder[0]
-												fix: persist compressed context to gateway session after mid-run compression

When context compression fires during run_conversation() in the gateway,
the compressed messages were silently lost on the next turn. Two bugs:

1. Agent-side: _flush_messages_to_session_db() calculated
   flush_from = max(len(conversation_history), _last_flushed_db_idx).
   After compression, _last_flushed_db_idx was correctly reset to 0,
   but conversation_history still had its original pre-compression
   length (e.g. 200). Since compressed messages are shorter (~30),
   messages[200:] was empty — nothing written to the new session's
   SQLite.

   Fix: Set conversation_history = None after each _compress_context()
   call so start_idx = 0 and all compressed messages are flushed.

2. Gateway-side: history_offset was always len(agent_history) — the
   original pre-compression length. After compression shortened the
   message list, agent_messages[200:] was empty, causing the gateway
   to fall back to writing only a user/assistant pair, losing the
   compressed summary and tail context.

   Fix: Detect session splits (agent.session_id != original) and set
   history_offset = 0 so all compressed messages are written to JSONL.
											
										
										
											2026-03-30 18:49:14 -07:00
+								            _session_was_split = False
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            if agent and session_key and hasattr(agent, 'session_id') and agent.session_id != session_id:
-												fix: persist compressed context to gateway session after mid-run compression

When context compression fires during run_conversation() in the gateway,
the compressed messages were silently lost on the next turn. Two bugs:

1. Agent-side: _flush_messages_to_session_db() calculated
   flush_from = max(len(conversation_history), _last_flushed_db_idx).
   After compression, _last_flushed_db_idx was correctly reset to 0,
   but conversation_history still had its original pre-compression
   length (e.g. 200). Since compressed messages are shorter (~30),
   messages[200:] was empty — nothing written to the new session's
   SQLite.

   Fix: Set conversation_history = None after each _compress_context()
   call so start_idx = 0 and all compressed messages are flushed.

2. Gateway-side: history_offset was always len(agent_history) — the
   original pre-compression length. After compression shortened the
   message list, agent_messages[200:] was empty, causing the gateway
   to fall back to writing only a user/assistant pair, losing the
   compressed summary and tail context.

   Fix: Detect session splits (agent.session_id != original) and set
   history_offset = 0 so all compressed messages are written to JSONL.
											
										
										
											2026-03-30 18:49:14 -07:00
+								                _session_was_split = True
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                logger.info(
 								                    "Session split detected: %s → %s (compression)",
 								                    session_id, agent.session_id,
 								                )
 								                entry = self.session_store._entries.get(session_key)
 								                if entry:
 								                    entry.session_id = agent.session_id
 								                    self.session_store._save()
 								            effective_session_id = getattr(agent, 'session_id', session_id) if agent else session_id
-												fix: persist compressed context to gateway session after mid-run compression

When context compression fires during run_conversation() in the gateway,
the compressed messages were silently lost on the next turn. Two bugs:

1. Agent-side: _flush_messages_to_session_db() calculated
   flush_from = max(len(conversation_history), _last_flushed_db_idx).
   After compression, _last_flushed_db_idx was correctly reset to 0,
   but conversation_history still had its original pre-compression
   length (e.g. 200). Since compressed messages are shorter (~30),
   messages[200:] was empty — nothing written to the new session's
   SQLite.

   Fix: Set conversation_history = None after each _compress_context()
   call so start_idx = 0 and all compressed messages are flushed.

2. Gateway-side: history_offset was always len(agent_history) — the
   original pre-compression length. After compression shortened the
   message list, agent_messages[200:] was empty, causing the gateway
   to fall back to writing only a user/assistant pair, losing the
   compressed summary and tail context.

   Fix: Detect session splits (agent.session_id != original) and set
   history_offset = 0 so all compressed messages are written to JSONL.
											
										
										
											2026-03-30 18:49:14 -07:00
+								            # When compression created a new session, the messages list was
 								            # shortened.  Using the original history offset would produce an
 								            # empty new_messages slice, causing the gateway to write only a
 								            # user/assistant pair — losing the compressed summary and tail.
 								            # Reset to 0 so the gateway writes ALL compressed messages.
 								            _effective_history_offset = 0 if _session_was_split else len(agent_history)
-												feat: auto-generate session titles after first exchange

After the first user→assistant exchange, Hermes now generates a short
descriptive session title via the auxiliary LLM (compression task config).
Title generation runs in a background thread so it never delays the
user-facing response.

Key behaviors:
- Fires only on the first 1-2 exchanges (checks user message count)
- Skips if a title already exists (user-set titles are never overwritten)
- Uses call_llm with compression task config (cheapest/fastest model)
- Truncates long messages to keep the title generation request small
- Cleans up LLM output: strips quotes, 'Title:' prefixes, enforces 80 char max
- Works in both CLI and gateway (Telegram/Discord/etc.)

Also updates /title (no args) to show the session ID alongside the title
in both CLI and gateway.

Implements #1426

											
										
										
											2026-03-17 04:14:40 -07:00
+								            # Auto-generate session title after first exchange (non-blocking)
 								            if final_response and self._session_db:
 								                try:
 								                    from agent.title_generator import maybe_auto_title
 								                    all_msgs = result_holder[0].get("messages", []) if result_holder[0] else []
 								                    maybe_auto_title(
 								                        self._session_db,
 								                        effective_session_id,
 								                        message,
 								                        final_response,
 								                        all_msgs,
 								                    )
 								                except Exception:
 								                    pass
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            return {
 								                "final_response": final_response,
 								                "last_reasoning": result.get("last_reasoning"),
 								                "messages": result_holder[0].get("messages", []) if result_holder[0] else [],
 								                "api_calls": result_holder[0].get("api_calls", 0) if result_holder[0] else 0,
 								                "tools": tools_holder[0] or [],
-												fix: persist compressed context to gateway session after mid-run compression

When context compression fires during run_conversation() in the gateway,
the compressed messages were silently lost on the next turn. Two bugs:

1. Agent-side: _flush_messages_to_session_db() calculated
   flush_from = max(len(conversation_history), _last_flushed_db_idx).
   After compression, _last_flushed_db_idx was correctly reset to 0,
   but conversation_history still had its original pre-compression
   length (e.g. 200). Since compressed messages are shorter (~30),
   messages[200:] was empty — nothing written to the new session's
   SQLite.

   Fix: Set conversation_history = None after each _compress_context()
   call so start_idx = 0 and all compressed messages are flushed.

2. Gateway-side: history_offset was always len(agent_history) — the
   original pre-compression length. After compression shortened the
   message list, agent_messages[200:] was empty, causing the gateway
   to fall back to writing only a user/assistant pair, losing the
   compressed summary and tail context.

   Fix: Detect session splits (agent.session_id != original) and set
   history_offset = 0 so all compressed messages are written to JSONL.
											
										
										
											2026-03-30 18:49:14 -07:00
+								                "history_offset": _effective_history_offset,
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                "last_prompt_tokens": _last_prompt_toks,
-												fix(gateway): make /status report live state and tokens (#1476)
											
										
										
											2026-03-15 19:18:58 -07:00
+								                "input_tokens": _input_toks,
 								                "output_tokens": _output_toks,
-												fix: tolerate test doubles without model attr

Use getattr() when returning model metadata from GatewayRunner._run_agent so fake agents and minimal stubs without a model attribute do not break unrelated gateway flows while preserving the session-model backfill behavior.

											
										
										
											2026-03-14 06:47:39 -07:00
+								                "model": _resolved_model,
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                "session_id": effective_session_id,
-												feat(gateway): surface natural mid-turn assistant messages in chat platforms

Add display.interim_assistant_messages config (enabled by default) that
forwards completed assistant commentary between tool calls to the user
as separate chat messages. Models already emit useful status text like
'I'll inspect the repo first.' — this surfaces it on Telegram, Discord,
and other messaging platforms instead of swallowing it.

Independent from tool_progress and gateway streaming. Disabled for
webhooks. Uses GatewayStreamConsumer when available, falls back to
direct adapter send. Tracks response_previewed to prevent double-delivery
when interim message matches the final response.

Also fixes: cursor not stripped from fallback prefix in stream consumer
(affected continuation calculation on no-edit platforms like Signal).

Cherry-picked from PR #7885 by asheriif, default changed to enabled.
Fixes #5016

											
										
										
											2026-04-11 16:03:52 -07:00
+								                "response_previewed": result.get("response_previewed", False),
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            }
 								        # Start progress message sender if enabled
 								        progress_task = None
 								        if tool_progress_enabled:
 								            progress_task = asyncio.create_task(send_progress_messages())
-												feat(gateway): streaming token delivery — StreamingConfig, GatewayStreamConsumer, already_sent

Stage 3 of streaming support. Gateway now streams tokens to messaging platforms:

- StreamingConfig dataclass (enabled, transport, edit_interval, buffer_threshold, cursor)
  on GatewayConfig with from_dict/to_dict serialization
- GatewayStreamConsumer: async queue-based consumer that progressively edits
  a single message on the target platform (edit transport)
- on_delta() → queue → run() async task → send_or_edit() with rate limiting
- already_sent propagation: when streaming delivered the response, handler
  returns None so base adapter skips duplicate send()
- stream_delta_callback wired into AIAgent constructor in _run_agent
- Consumer lifecycle: started as asyncio task, awaited with timeout in finally

Config (config.yaml):
  streaming:
    enabled: true
    transport: edit      # progressive editMessageText
    edit_interval: 0.3   # seconds between edits
    buffer_threshold: 40 # chars before forcing flush
    cursor: ' ▉'

Credit: jobless0x (#774, #1312), OutThisLife (#798), clicksingh (#697).

											
										
										
											2026-03-16 05:52:42 -07:00
-												fix: audit fixes — 5 bugs found and resolved

Thorough code review found 5 issues across run_agent.py, cli.py, and gateway/:

1. CRITICAL — Gateway stream consumer task never started: stream_consumer_holder
   was checked BEFORE run_sync populated it. Fixed with async polling pattern
   (same as track_agent).

2. MEDIUM-HIGH — Streaming fallback after partial delivery caused double-response:
   if streaming failed after some tokens were delivered, the fallback would
   re-deliver the full response. Now tracks deltas_were_sent and only falls
   back when no tokens reached consumers yet.

3. MEDIUM — Codex mode lost on_first_delta spinner callback: _run_codex_stream
   now accepts on_first_delta parameter, fires it on first text delta. Passed
   through from _interruptible_streaming_api_call via _codex_on_first_delta
   instance attribute.

4. MEDIUM — CLI close-tag after-text bypassed tag filtering: text after a
   reasoning close tag was sent directly to _emit_stream_text, skipping
   open-tag detection. Now routes through _stream_delta for full filtering.

5. LOW — Removed 140 lines of dead code: old _streaming_api_call method
   (superseded by _interruptible_streaming_api_call). Updated 13 tests in
   test_run_agent.py and test_openai_client_lifecycle.py to use the new
   method name and signature.

4573 tests passing.

											
										
										
											2026-03-16 06:35:46 -07:00
+								        # Start stream consumer task — polls for consumer creation since it
 								        # happens inside run_sync (thread pool) after the agent is constructed.
-												feat(gateway): streaming token delivery — StreamingConfig, GatewayStreamConsumer, already_sent

Stage 3 of streaming support. Gateway now streams tokens to messaging platforms:

- StreamingConfig dataclass (enabled, transport, edit_interval, buffer_threshold, cursor)
  on GatewayConfig with from_dict/to_dict serialization
- GatewayStreamConsumer: async queue-based consumer that progressively edits
  a single message on the target platform (edit transport)
- on_delta() → queue → run() async task → send_or_edit() with rate limiting
- already_sent propagation: when streaming delivered the response, handler
  returns None so base adapter skips duplicate send()
- stream_delta_callback wired into AIAgent constructor in _run_agent
- Consumer lifecycle: started as asyncio task, awaited with timeout in finally

Config (config.yaml):
  streaming:
    enabled: true
    transport: edit      # progressive editMessageText
    edit_interval: 0.3   # seconds between edits
    buffer_threshold: 40 # chars before forcing flush
    cursor: ' ▉'

Credit: jobless0x (#774, #1312), OutThisLife (#798), clicksingh (#697).

											
										
										
											2026-03-16 05:52:42 -07:00
+								        stream_task = None
-												fix: audit fixes — 5 bugs found and resolved

Thorough code review found 5 issues across run_agent.py, cli.py, and gateway/:

1. CRITICAL — Gateway stream consumer task never started: stream_consumer_holder
   was checked BEFORE run_sync populated it. Fixed with async polling pattern
   (same as track_agent).

2. MEDIUM-HIGH — Streaming fallback after partial delivery caused double-response:
   if streaming failed after some tokens were delivered, the fallback would
   re-deliver the full response. Now tracks deltas_were_sent and only falls
   back when no tokens reached consumers yet.

3. MEDIUM — Codex mode lost on_first_delta spinner callback: _run_codex_stream
   now accepts on_first_delta parameter, fires it on first text delta. Passed
   through from _interruptible_streaming_api_call via _codex_on_first_delta
   instance attribute.

4. MEDIUM — CLI close-tag after-text bypassed tag filtering: text after a
   reasoning close tag was sent directly to _emit_stream_text, skipping
   open-tag detection. Now routes through _stream_delta for full filtering.

5. LOW — Removed 140 lines of dead code: old _streaming_api_call method
   (superseded by _interruptible_streaming_api_call). Updated 13 tests in
   test_run_agent.py and test_openai_client_lifecycle.py to use the new
   method name and signature.

4573 tests passing.

											
										
										
											2026-03-16 06:35:46 -07:00
 								        async def _start_stream_consumer():
 								            """Wait for the stream consumer to be created, then run it."""
 								            for _ in range(200):  # Up to 10s wait
 								                if stream_consumer_holder[0] is not None:
 								                    await stream_consumer_holder[0].run()
 								                    return
 								                await asyncio.sleep(0.05)
 								        stream_task = asyncio.create_task(_start_stream_consumer())
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
 								        # Track this agent as running for this session (for interrupt support)
 								        # We do this in a callback after the agent is created
 								        async def track_agent():
 								            # Wait for agent to be created
 								            while agent_holder[0] is None:
 								                await asyncio.sleep(0.05)
 								            if session_key:
 								                self._running_agents[session_key] = agent_holder[0]
-												fix(gateway): drain in-flight work before restart

											
										
										
											2026-04-10 10:19:17 -07:00
+								                if self._draining:
 								                    self._update_runtime_status("draining")
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
 								        tracking_task = asyncio.create_task(track_agent())
-												fix: make gateway interrupt detection resilient to monitor task failures

The interrupt mechanism for regular text messages (non-commands) during
active agent runs relied on a single async polling task
(monitor_for_interrupt) with no error handling. If this task died
silently due to an unhandled exception, stale adapter reference after
reconnect, or any other failure, user messages sent during agent
execution would be queued but never trigger an actual interrupt — the
agent would continue running until it finished naturally, then process
the queued message.

Three improvements:

1. Error handling in monitor_for_interrupt(): wrap the polling body in
   try/except so transient errors are logged and retried instead of
   silently killing the task.

2. Fresh adapter reference on each poll iteration: re-resolve
   self.adapters.get(source.platform) every 200ms instead of capturing
   the adapter once at task creation time. This prevents stale
   references after adapter reconnects.

3. Backup interrupt check in the inactivity poll loop: both the
   unlimited and timeout-enabled paths now check for pending interrupts
   every 5 seconds (the existing poll interval). Uses a shared
   _interrupt_detected asyncio.Event to avoid double-firing when the
   primary monitor already handled the interrupt. Logs at INFO level
   with monitor task state for debugging.

											
										
										
											2026-04-11 23:42:19 -07:00
+								        # Monitor for interrupts from the adapter (new messages arriving).
 								        # This is the PRIMARY interrupt path for regular text messages —
 								        # Level 1 (base.py) catches them before _handle_message() is reached,
 								        # so the Level 2 running_agent.interrupt() path never fires.
 								        # The inactivity poll loop below has a BACKUP check in case this
 								        # task dies (no error handling = silent death = lost interrupts).
 								        _interrupt_detected = asyncio.Event()  # shared with backup check
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        async def monitor_for_interrupt():
-												fix: make gateway interrupt detection resilient to monitor task failures

The interrupt mechanism for regular text messages (non-commands) during
active agent runs relied on a single async polling task
(monitor_for_interrupt) with no error handling. If this task died
silently due to an unhandled exception, stale adapter reference after
reconnect, or any other failure, user messages sent during agent
execution would be queued but never trigger an actual interrupt — the
agent would continue running until it finished naturally, then process
the queued message.

Three improvements:

1. Error handling in monitor_for_interrupt(): wrap the polling body in
   try/except so transient errors are logged and retried instead of
   silently killing the task.

2. Fresh adapter reference on each poll iteration: re-resolve
   self.adapters.get(source.platform) every 200ms instead of capturing
   the adapter once at task creation time. This prevents stale
   references after adapter reconnects.

3. Backup interrupt check in the inactivity poll loop: both the
   unlimited and timeout-enabled paths now check for pending interrupts
   every 5 seconds (the existing poll interval). Uses a shared
   _interrupt_detected asyncio.Event to avoid double-firing when the
   primary monitor already handled the interrupt. Logs at INFO level
   with monitor task state for debugging.

											
										
										
											2026-04-11 23:42:19 -07:00
+								            if not session_key:
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                return
-												fix: make gateway interrupt detection resilient to monitor task failures

The interrupt mechanism for regular text messages (non-commands) during
active agent runs relied on a single async polling task
(monitor_for_interrupt) with no error handling. If this task died
silently due to an unhandled exception, stale adapter reference after
reconnect, or any other failure, user messages sent during agent
execution would be queued but never trigger an actual interrupt — the
agent would continue running until it finished naturally, then process
the queued message.

Three improvements:

1. Error handling in monitor_for_interrupt(): wrap the polling body in
   try/except so transient errors are logged and retried instead of
   silently killing the task.

2. Fresh adapter reference on each poll iteration: re-resolve
   self.adapters.get(source.platform) every 200ms instead of capturing
   the adapter once at task creation time. This prevents stale
   references after adapter reconnects.

3. Backup interrupt check in the inactivity poll loop: both the
   unlimited and timeout-enabled paths now check for pending interrupts
   every 5 seconds (the existing poll interval). Uses a shared
   _interrupt_detected asyncio.Event to avoid double-firing when the
   primary monitor already handled the interrupt. Logs at INFO level
   with monitor task state for debugging.

											
										
										
											2026-04-11 23:42:19 -07:00
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            while True:
 								                await asyncio.sleep(0.2)  # Check every 200ms
-												fix: make gateway interrupt detection resilient to monitor task failures

The interrupt mechanism for regular text messages (non-commands) during
active agent runs relied on a single async polling task
(monitor_for_interrupt) with no error handling. If this task died
silently due to an unhandled exception, stale adapter reference after
reconnect, or any other failure, user messages sent during agent
execution would be queued but never trigger an actual interrupt — the
agent would continue running until it finished naturally, then process
the queued message.

Three improvements:

1. Error handling in monitor_for_interrupt(): wrap the polling body in
   try/except so transient errors are logged and retried instead of
   silently killing the task.

2. Fresh adapter reference on each poll iteration: re-resolve
   self.adapters.get(source.platform) every 200ms instead of capturing
   the adapter once at task creation time. This prevents stale
   references after adapter reconnects.

3. Backup interrupt check in the inactivity poll loop: both the
   unlimited and timeout-enabled paths now check for pending interrupts
   every 5 seconds (the existing poll interval). Uses a shared
   _interrupt_detected asyncio.Event to avoid double-firing when the
   primary monitor already handled the interrupt. Logs at INFO level
   with monitor task state for debugging.

											
										
										
											2026-04-11 23:42:19 -07:00
+								                try:
 								                    # Re-resolve adapter each iteration so reconnects don't
 								                    # leave us holding a stale reference.
 								                    _adapter = self.adapters.get(source.platform)
 								                    if not _adapter:
 								                        continue
 								                    # Check if adapter has a pending interrupt for this session.
 								                    # Must use session_key (build_session_key output) — NOT
 								                    # source.chat_id — because the adapter stores interrupt events
 								                    # under the full session key.
 								                    if hasattr(_adapter, 'has_pending_interrupt') and _adapter.has_pending_interrupt(session_key):
 								                        agent = agent_holder[0]
 								                        if agent:
-												fix(gateway): peek at pending message during interrupt instead of consuming it

The monitor_for_interrupt() and backup interrupt checks were calling
get_pending_message() which pops the message from the adapter's queue.
This created a race condition: if the agent finished naturally before
checking _interrupt_requested, the pending message was permanently lost.

Timeline of the race:
1. Agent near completion, user sends message
2. Level 1 guard stores message in adapter._pending_messages, sets event
3. monitor_for_interrupt() detects event, POPS message, calls agent.interrupt()
4. Agent's run_conversation() was already returning (interrupted=False)
5. Post-run dequeue finds nothing (monitor already consumed it)
6. result.get('interrupted') is False so interrupt_message fallback doesn't fire
7. User message permanently lost — agent finishes without processing it

Fix: change all three interrupt detection sites (primary monitor + two
backup checks) from get_pending_message() (pop) to
_pending_messages.get() (peek). The message stays in the adapter's queue
until _dequeue_pending_event() consumes it in the post-run handler,
which runs regardless of whether the agent was interrupted or finished
naturally.

Reported by @_SushantSays — intermittent message loss during long
terminal command execution, persisting after the previous fix (73f970fa)
which addressed monitor task death but not this consumption race.

											
										
										
											2026-04-12 01:55:31 -07:00
+								                            # Peek at the pending message text WITHOUT consuming it.
 								                            # The message must remain in _pending_messages so the
 								                            # post-run dequeue at _dequeue_pending_event() can
 								                            # retrieve the full MessageEvent (with media metadata).
 								                            # If we pop here, a race exists: the agent may finish
 								                            # before checking _interrupt_requested, and the message
 								                            # is lost — neither the interrupt path nor the dequeue
 								                            # path finds it.
 								                            _peek_event = _adapter._pending_messages.get(session_key)
 								                            pending_text = _peek_event.text if _peek_event else None
-												fix: make gateway interrupt detection resilient to monitor task failures

The interrupt mechanism for regular text messages (non-commands) during
active agent runs relied on a single async polling task
(monitor_for_interrupt) with no error handling. If this task died
silently due to an unhandled exception, stale adapter reference after
reconnect, or any other failure, user messages sent during agent
execution would be queued but never trigger an actual interrupt — the
agent would continue running until it finished naturally, then process
the queued message.

Three improvements:

1. Error handling in monitor_for_interrupt(): wrap the polling body in
   try/except so transient errors are logged and retried instead of
   silently killing the task.

2. Fresh adapter reference on each poll iteration: re-resolve
   self.adapters.get(source.platform) every 200ms instead of capturing
   the adapter once at task creation time. This prevents stale
   references after adapter reconnects.

3. Backup interrupt check in the inactivity poll loop: both the
   unlimited and timeout-enabled paths now check for pending interrupts
   every 5 seconds (the existing poll interval). Uses a shared
   _interrupt_detected asyncio.Event to avoid double-firing when the
   primary monitor already handled the interrupt. Logs at INFO level
   with monitor task state for debugging.

											
										
										
											2026-04-11 23:42:19 -07:00
+								                            logger.debug("Interrupt detected from adapter, signaling agent...")
 								                            agent.interrupt(pending_text)
 								                            _interrupt_detected.set()
 								                            break
 								                except asyncio.CancelledError:
 								                    raise
 								                except Exception as _mon_err:
 								                    logger.debug("monitor_for_interrupt error (will retry): %s", _mon_err)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
 								        interrupt_monitor = asyncio.create_task(monitor_for_interrupt())
-												fix(gateway): support infinite timeout + periodic notifications + actionable error (#4959)

- HERMES_AGENT_TIMEOUT=0 now means no limit (infinite execution)
- Periodic 'still working' notifications every 10 minutes for long tasks
- Timeout error message now tells users how to increase the limit
- Stale-lock eviction handles infinite timeout correctly (float inf TTL)
											
										
										
											2026-04-03 22:37:38 -07:00
 								        # Periodic "still working" notifications for long-running tasks.
-												feat: make gateway 'still working' notification interval configurable (#8572)

Add agent.gateway_notify_interval config option (default 600s).
Set to 0 to disable periodic 'still working' notifications.
Bridged to HERMES_AGENT_NOTIFY_INTERVAL env var (same pattern as
gateway_timeout and gateway_timeout_warning).

The inactivity warning (gateway_timeout_warning) was already
configurable; this makes the wall-clock ping configurable too.
											
										
										
											2026-04-12 13:06:34 -07:00
+								        # Fires every N seconds so the user knows the agent hasn't died.
 								        # Config: agent.gateway_notify_interval in config.yaml, or
 								        # HERMES_AGENT_NOTIFY_INTERVAL env var.  Default 600s (10 min).
 								        # 0 = disable notifications.
 								        _NOTIFY_INTERVAL_RAW = float(os.getenv("HERMES_AGENT_NOTIFY_INTERVAL", 600))
 								        _NOTIFY_INTERVAL = _NOTIFY_INTERVAL_RAW if _NOTIFY_INTERVAL_RAW > 0 else None
-												fix(gateway): support infinite timeout + periodic notifications + actionable error (#4959)

- HERMES_AGENT_TIMEOUT=0 now means no limit (infinite execution)
- Periodic 'still working' notifications every 10 minutes for long tasks
- Timeout error message now tells users how to increase the limit
- Stale-lock eviction handles infinite timeout correctly (float inf TTL)
											
										
										
											2026-04-03 22:37:38 -07:00
+								        _notify_start = time.time()
 								        async def _notify_long_running():
-												feat: make gateway 'still working' notification interval configurable (#8572)

Add agent.gateway_notify_interval config option (default 600s).
Set to 0 to disable periodic 'still working' notifications.
Bridged to HERMES_AGENT_NOTIFY_INTERVAL env var (same pattern as
gateway_timeout and gateway_timeout_warning).

The inactivity warning (gateway_timeout_warning) was already
configurable; this makes the wall-clock ping configurable too.
											
										
										
											2026-04-12 13:06:34 -07:00
+								            if _NOTIFY_INTERVAL is None:
 								                return  # Notifications disabled (gateway_notify_interval: 0)
-												fix(gateway): support infinite timeout + periodic notifications + actionable error (#4959)

- HERMES_AGENT_TIMEOUT=0 now means no limit (infinite execution)
- Periodic 'still working' notifications every 10 minutes for long tasks
- Timeout error message now tells users how to increase the limit
- Stale-lock eviction handles infinite timeout correctly (float inf TTL)
											
										
										
											2026-04-03 22:37:38 -07:00
+								            _notify_adapter = self.adapters.get(source.platform)
 								            if not _notify_adapter:
 								                return
 								            while True:
 								                await asyncio.sleep(_NOTIFY_INTERVAL)
 								                _elapsed_mins = int((time.time() - _notify_start) // 60)
-												fix: improve timeout debug logging and user-facing diagnostics (#5370)

Agent activity tracking:
- Add _last_activity_ts, _last_activity_desc, _current_tool to AIAgent
- Touch activity on: API call start/complete, tool start/complete,
  first stream chunk, streaming request start
- Public get_activity_summary() method for external consumers

Gateway timeout diagnostics:
- Timeout message now includes what the agent was doing when killed:
  actively working vs stuck on a tool vs waiting on API response
- Includes iteration count, last activity description, seconds since
  last activity — users can distinguish legitimate long tasks from
  genuine hangs
- 'Still working' notifications now show iteration count and current
  tool instead of just elapsed time
- Stale lock eviction logs include agent activity state for debugging

Stream stale timeout:
- _emit_status when stale stream is detected (was log-only) — gateway
  users now see 'No response from provider for Ns' with model and
  context size
- Improved logger.warning with model name and estimated context size

Error path notifications (gateway-visible via _emit_status):
- Context compression attempts now use _emit_status (was _vprint only)
- Non-retryable client errors emit summary before aborting
- Max retry exhaustion emits error summary (was _vprint only)
- Rate limit exhaustion emits specific rate-limit message

These were all CLI-visible but silent to gateway users, which is why
people on Telegram/Discord saw generic 'request failed' messages
without explanation.
											
										
										
											2026-04-05 18:33:33 -07:00
+								                # Include agent activity context if available.
 								                _agent_ref = agent_holder[0]
 								                _status_detail = ""
 								                if _agent_ref and hasattr(_agent_ref, "get_activity_summary"):
 								                    try:
 								                        _a = _agent_ref.get_activity_summary()
 								                        _parts = [f"iteration {_a['api_call_count']}/{_a['max_iterations']}"]
 								                        if _a.get("current_tool"):
 								                            _parts.append(f"running: {_a['current_tool']}")
 								                        else:
 								                            _parts.append(_a.get("last_activity_desc", ""))
 								                        _status_detail = " — " + ", ".join(_parts)
 								                    except Exception:
 								                        pass
-												fix(gateway): support infinite timeout + periodic notifications + actionable error (#4959)

- HERMES_AGENT_TIMEOUT=0 now means no limit (infinite execution)
- Periodic 'still working' notifications every 10 minutes for long tasks
- Timeout error message now tells users how to increase the limit
- Stale-lock eviction handles infinite timeout correctly (float inf TTL)
											
										
										
											2026-04-03 22:37:38 -07:00
+								                try:
 								                    await _notify_adapter.send(
 								                        source.chat_id,
-												fix: improve timeout debug logging and user-facing diagnostics (#5370)

Agent activity tracking:
- Add _last_activity_ts, _last_activity_desc, _current_tool to AIAgent
- Touch activity on: API call start/complete, tool start/complete,
  first stream chunk, streaming request start
- Public get_activity_summary() method for external consumers

Gateway timeout diagnostics:
- Timeout message now includes what the agent was doing when killed:
  actively working vs stuck on a tool vs waiting on API response
- Includes iteration count, last activity description, seconds since
  last activity — users can distinguish legitimate long tasks from
  genuine hangs
- 'Still working' notifications now show iteration count and current
  tool instead of just elapsed time
- Stale lock eviction logs include agent activity state for debugging

Stream stale timeout:
- _emit_status when stale stream is detected (was log-only) — gateway
  users now see 'No response from provider for Ns' with model and
  context size
- Improved logger.warning with model name and estimated context size

Error path notifications (gateway-visible via _emit_status):
- Context compression attempts now use _emit_status (was _vprint only)
- Non-retryable client errors emit summary before aborting
- Max retry exhaustion emits error summary (was _vprint only)
- Rate limit exhaustion emits specific rate-limit message

These were all CLI-visible but silent to gateway users, which is why
people on Telegram/Discord saw generic 'request failed' messages
without explanation.
											
										
										
											2026-04-05 18:33:33 -07:00
+								                        f"⏳ Still working... ({_elapsed_mins} min elapsed{_status_detail})",
-												fix(gateway): support infinite timeout + periodic notifications + actionable error (#4959)

- HERMES_AGENT_TIMEOUT=0 now means no limit (infinite execution)
- Periodic 'still working' notifications every 10 minutes for long tasks
- Timeout error message now tells users how to increase the limit
- Stale-lock eviction handles infinite timeout correctly (float inf TTL)
											
										
										
											2026-04-03 22:37:38 -07:00
+								                        metadata=_status_thread_metadata,
 								                    )
 								                except Exception as _ne:
 								                    logger.debug("Long-running notification error: %s", _ne)
 								        _notify_task = asyncio.create_task(_notify_long_running())
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        try:
-												fix(gateway): replace wall-clock agent timeout with inactivity-based timeout (#5389)

The gateway previously used a hard wall-clock asyncio.wait_for timeout
that killed agents after a fixed duration regardless of activity. This
punished legitimate long-running tasks (subagent delegation, reasoning
models, multi-step research).

Now uses an inactivity-based polling loop that checks the agent's
built-in activity tracker (get_activity_summary) every 5 seconds. The
agent can run indefinitely as long as it's actively calling tools or
receiving API responses. Only fires when the agent has been completely
idle for the configured duration.

Changes:
- Replace asyncio.wait_for with asyncio.wait poll loop checking
  agent idle time via get_activity_summary()
- Add agent.gateway_timeout config.yaml key (default 1800s, 0=unlimited)
- Update stale session eviction to use agent idle time instead of
  pure wall-clock (prevents evicting active long-running tasks)
- Preserve all existing diagnostic logging and user-facing context

Inspired by PR #4864 (Mibayy) and issue #4815 (BongSuCHOI).
Reimplemented on current main using existing _touch_activity()
infrastructure rather than a parallel tracker.
											
										
										
											2026-04-05 19:38:21 -07:00
+								            # Run in thread pool to not block.  Use an *inactivity*-based
 								            # timeout instead of a wall-clock limit: the agent can run for
 								            # hours if it's actively calling tools / receiving stream tokens,
 								            # but a hung API call or stuck tool with no activity for the
 								            # configured duration is caught and killed.  (#4815)
 								            #
 								            # Config: agent.gateway_timeout in config.yaml, or
 								            # HERMES_AGENT_TIMEOUT env var (env var takes precedence).
 								            # Default 1800s (30 min inactivity).  0 = unlimited.
-												fix: increase default HERMES_AGENT_TIMEOUT from 10min to 30min

Users hitting the 10-minute default during complex tool chains.
Bumps both the execution cap and stale-lock eviction timeout.
Still overridable via HERMES_AGENT_TIMEOUT env var (0 = unlimited).

											
										
										
											2026-04-05 10:32:48 -07:00
+								            _agent_timeout_raw = float(os.getenv("HERMES_AGENT_TIMEOUT", 1800))
-												fix(gateway): support infinite timeout + periodic notifications + actionable error (#4959)

- HERMES_AGENT_TIMEOUT=0 now means no limit (infinite execution)
- Periodic 'still working' notifications every 10 minutes for long tasks
- Timeout error message now tells users how to increase the limit
- Stale-lock eviction handles infinite timeout correctly (float inf TTL)
											
										
										
											2026-04-03 22:37:38 -07:00
+								            _agent_timeout = _agent_timeout_raw if _agent_timeout_raw > 0 else None
-												fix(gateway): add staged inactivity warning before timeout escalation

Introduce gateway_timeout_warning (default 900s) as a pre-timeout alert
layer.  When inactivity reaches the warning threshold, a single
notification is sent to the user offering to wait or reset.  If
inactivity continues to the gateway_timeout (default 1800s), the full
timeout fires as before.

This gives users a chance to intervene before work is lost on slow
API providers without disabling the safety timeout entirely.

Config: agent.gateway_timeout_warning in config.yaml, or
HERMES_AGENT_TIMEOUT_WARNING env var (0 = disable warning).

											
										
										
											2026-04-08 21:39:27 +02:00
+								            _agent_warning_raw = float(os.getenv("HERMES_AGENT_TIMEOUT_WARNING", 900))
 								            _agent_warning = _agent_warning_raw if _agent_warning_raw > 0 else None
 								            _warning_fired = False
-												fix(gateway): replace wall-clock agent timeout with inactivity-based timeout (#5389)

The gateway previously used a hard wall-clock asyncio.wait_for timeout
that killed agents after a fixed duration regardless of activity. This
punished legitimate long-running tasks (subagent delegation, reasoning
models, multi-step research).

Now uses an inactivity-based polling loop that checks the agent's
built-in activity tracker (get_activity_summary) every 5 seconds. The
agent can run indefinitely as long as it's actively calling tools or
receiving API responses. Only fires when the agent has been completely
idle for the configured duration.

Changes:
- Replace asyncio.wait_for with asyncio.wait poll loop checking
  agent idle time via get_activity_summary()
- Add agent.gateway_timeout config.yaml key (default 1800s, 0=unlimited)
- Update stale session eviction to use agent idle time instead of
  pure wall-clock (prevents evicting active long-running tasks)
- Preserve all existing diagnostic logging and user-facing context

Inspired by PR #4864 (Mibayy) and issue #4815 (BongSuCHOI).
Reimplemented on current main using existing _touch_activity()
infrastructure rather than a parallel tracker.
											
										
										
											2026-04-05 19:38:21 -07:00
+								            _executor_task = asyncio.ensure_future(
-												fix(gateway): preserve notify context in executor threads

Gateway executor work now inherits the active session contextvars via
copy_context() so background process watchers retain the correct
platform/chat/user/session metadata for routing completion events back
to the originating chat.

Cherry-picked from #10647 by @helix4u with:
- Use asyncio.get_running_loop() instead of deprecated get_event_loop()
- Strip trailing whitespace
- Add *args forwarding test
- Add exception propagation test

											
										
										
											2026-04-16 14:27:54 +05:30
+								                self._run_in_executor_with_context(run_sync)
-												fix(gateway): replace wall-clock agent timeout with inactivity-based timeout (#5389)

The gateway previously used a hard wall-clock asyncio.wait_for timeout
that killed agents after a fixed duration regardless of activity. This
punished legitimate long-running tasks (subagent delegation, reasoning
models, multi-step research).

Now uses an inactivity-based polling loop that checks the agent's
built-in activity tracker (get_activity_summary) every 5 seconds. The
agent can run indefinitely as long as it's actively calling tools or
receiving API responses. Only fires when the agent has been completely
idle for the configured duration.

Changes:
- Replace asyncio.wait_for with asyncio.wait poll loop checking
  agent idle time via get_activity_summary()
- Add agent.gateway_timeout config.yaml key (default 1800s, 0=unlimited)
- Update stale session eviction to use agent idle time instead of
  pure wall-clock (prevents evicting active long-running tasks)
- Preserve all existing diagnostic logging and user-facing context

Inspired by PR #4864 (Mibayy) and issue #4815 (BongSuCHOI).
Reimplemented on current main using existing _touch_activity()
infrastructure rather than a parallel tracker.
											
										
										
											2026-04-05 19:38:21 -07:00
+								            )
 								            _inactivity_timeout = False
 								            _POLL_INTERVAL = 5.0
 								            if _agent_timeout is None:
-												fix: make gateway interrupt detection resilient to monitor task failures

The interrupt mechanism for regular text messages (non-commands) during
active agent runs relied on a single async polling task
(monitor_for_interrupt) with no error handling. If this task died
silently due to an unhandled exception, stale adapter reference after
reconnect, or any other failure, user messages sent during agent
execution would be queued but never trigger an actual interrupt — the
agent would continue running until it finished naturally, then process
the queued message.

Three improvements:

1. Error handling in monitor_for_interrupt(): wrap the polling body in
   try/except so transient errors are logged and retried instead of
   silently killing the task.

2. Fresh adapter reference on each poll iteration: re-resolve
   self.adapters.get(source.platform) every 200ms instead of capturing
   the adapter once at task creation time. This prevents stale
   references after adapter reconnects.

3. Backup interrupt check in the inactivity poll loop: both the
   unlimited and timeout-enabled paths now check for pending interrupts
   every 5 seconds (the existing poll interval). Uses a shared
   _interrupt_detected asyncio.Event to avoid double-firing when the
   primary monitor already handled the interrupt. Logs at INFO level
   with monitor task state for debugging.

											
										
										
											2026-04-11 23:42:19 -07:00
+								                # Unlimited — still poll periodically for backup interrupt
 								                # detection in case monitor_for_interrupt() silently died.
 								                response = None
 								                while True:
 								                    done, _ = await asyncio.wait(
 								                        {_executor_task}, timeout=_POLL_INTERVAL
 								                    )
 								                    if done:
 								                        response = _executor_task.result()
 								                        break
 								                    # Backup interrupt check: if the monitor task died or
 								                    # missed the interrupt, catch it here.
 								                    if not _interrupt_detected.is_set() and session_key:
 								                        _backup_adapter = self.adapters.get(source.platform)
 								                        _backup_agent = agent_holder[0]
 								                        if (_backup_adapter and _backup_agent
 								                                and hasattr(_backup_adapter, 'has_pending_interrupt')
 								                                and _backup_adapter.has_pending_interrupt(session_key)):
-												fix(gateway): peek at pending message during interrupt instead of consuming it

The monitor_for_interrupt() and backup interrupt checks were calling
get_pending_message() which pops the message from the adapter's queue.
This created a race condition: if the agent finished naturally before
checking _interrupt_requested, the pending message was permanently lost.

Timeline of the race:
1. Agent near completion, user sends message
2. Level 1 guard stores message in adapter._pending_messages, sets event
3. monitor_for_interrupt() detects event, POPS message, calls agent.interrupt()
4. Agent's run_conversation() was already returning (interrupted=False)
5. Post-run dequeue finds nothing (monitor already consumed it)
6. result.get('interrupted') is False so interrupt_message fallback doesn't fire
7. User message permanently lost — agent finishes without processing it

Fix: change all three interrupt detection sites (primary monitor + two
backup checks) from get_pending_message() (pop) to
_pending_messages.get() (peek). The message stays in the adapter's queue
until _dequeue_pending_event() consumes it in the post-run handler,
which runs regardless of whether the agent was interrupted or finished
naturally.

Reported by @_SushantSays — intermittent message loss during long
terminal command execution, persisting after the previous fix (73f970fa)
which addressed monitor task death but not this consumption race.

											
										
										
											2026-04-12 01:55:31 -07:00
+								                            _bp_event = _backup_adapter._pending_messages.get(session_key)
-												fix: make gateway interrupt detection resilient to monitor task failures

The interrupt mechanism for regular text messages (non-commands) during
active agent runs relied on a single async polling task
(monitor_for_interrupt) with no error handling. If this task died
silently due to an unhandled exception, stale adapter reference after
reconnect, or any other failure, user messages sent during agent
execution would be queued but never trigger an actual interrupt — the
agent would continue running until it finished naturally, then process
the queued message.

Three improvements:

1. Error handling in monitor_for_interrupt(): wrap the polling body in
   try/except so transient errors are logged and retried instead of
   silently killing the task.

2. Fresh adapter reference on each poll iteration: re-resolve
   self.adapters.get(source.platform) every 200ms instead of capturing
   the adapter once at task creation time. This prevents stale
   references after adapter reconnects.

3. Backup interrupt check in the inactivity poll loop: both the
   unlimited and timeout-enabled paths now check for pending interrupts
   every 5 seconds (the existing poll interval). Uses a shared
   _interrupt_detected asyncio.Event to avoid double-firing when the
   primary monitor already handled the interrupt. Logs at INFO level
   with monitor task state for debugging.

											
										
										
											2026-04-11 23:42:19 -07:00
+								                            _bp_text = _bp_event.text if _bp_event else None
 								                            logger.info(
 								                                "Backup interrupt detected for session %s "
 								                                "(monitor task state: %s)",
 								                                session_key[:20],
 								                                "done" if interrupt_monitor.done() else "running",
 								                            )
 								                            _backup_agent.interrupt(_bp_text)
 								                            _interrupt_detected.set()
-												fix(gateway): replace wall-clock agent timeout with inactivity-based timeout (#5389)

The gateway previously used a hard wall-clock asyncio.wait_for timeout
that killed agents after a fixed duration regardless of activity. This
punished legitimate long-running tasks (subagent delegation, reasoning
models, multi-step research).

Now uses an inactivity-based polling loop that checks the agent's
built-in activity tracker (get_activity_summary) every 5 seconds. The
agent can run indefinitely as long as it's actively calling tools or
receiving API responses. Only fires when the agent has been completely
idle for the configured duration.

Changes:
- Replace asyncio.wait_for with asyncio.wait poll loop checking
  agent idle time via get_activity_summary()
- Add agent.gateway_timeout config.yaml key (default 1800s, 0=unlimited)
- Update stale session eviction to use agent idle time instead of
  pure wall-clock (prevents evicting active long-running tasks)
- Preserve all existing diagnostic logging and user-facing context

Inspired by PR #4864 (Mibayy) and issue #4815 (BongSuCHOI).
Reimplemented on current main using existing _touch_activity()
infrastructure rather than a parallel tracker.
											
										
										
											2026-04-05 19:38:21 -07:00
+								            else:
 								                # Poll loop: check the agent's built-in activity tracker
 								                # (updated by _touch_activity() on every tool call, API
 								                # call, and stream delta) every few seconds.
 								                response = None
 								                while True:
 								                    done, _ = await asyncio.wait(
 								                        {_executor_task}, timeout=_POLL_INTERVAL
 								                    )
 								                    if done:
 								                        response = _executor_task.result()
 								                        break
 								                    # Agent still running — check inactivity.
 								                    _agent_ref = agent_holder[0]
 								                    _idle_secs = 0.0
 								                    if _agent_ref and hasattr(_agent_ref, "get_activity_summary"):
 								                        try:
 								                            _act = _agent_ref.get_activity_summary()
 								                            _idle_secs = _act.get("seconds_since_activity", 0.0)
 								                        except Exception:
 								                            pass
-												fix(gateway): add staged inactivity warning before timeout escalation

Introduce gateway_timeout_warning (default 900s) as a pre-timeout alert
layer.  When inactivity reaches the warning threshold, a single
notification is sent to the user offering to wait or reset.  If
inactivity continues to the gateway_timeout (default 1800s), the full
timeout fires as before.

This gives users a chance to intervene before work is lost on slow
API providers without disabling the safety timeout entirely.

Config: agent.gateway_timeout_warning in config.yaml, or
HERMES_AGENT_TIMEOUT_WARNING env var (0 = disable warning).

											
										
										
											2026-04-08 21:39:27 +02:00
+								                    # Staged warning: fire once before escalating to full timeout.
 								                    if (not _warning_fired and _agent_warning is not None
 								                            and _idle_secs >= _agent_warning):
 								                        _warning_fired = True
 								                        _warn_adapter = self.adapters.get(source.platform)
 								                        if _warn_adapter:
-												fix: correct unbound exception variable and remaining-time math in warning

- Bind exception in warning send handler (was using stale _ne from outer scope)
- Calculate remaining time until timeout correctly: (timeout - warning) // 60
  instead of warning // 60 (which equals elapsed time, not remaining)

											
										
										
											2026-04-08 19:59:44 -07:00
+								                            _elapsed_warn = int(_agent_warning // 60) or 1
 								                            _remaining_mins = int((_agent_timeout - _agent_warning) // 60) or 1
-												fix(gateway): add staged inactivity warning before timeout escalation

Introduce gateway_timeout_warning (default 900s) as a pre-timeout alert
layer.  When inactivity reaches the warning threshold, a single
notification is sent to the user offering to wait or reset.  If
inactivity continues to the gateway_timeout (default 1800s), the full
timeout fires as before.

This gives users a chance to intervene before work is lost on slow
API providers without disabling the safety timeout entirely.

Config: agent.gateway_timeout_warning in config.yaml, or
HERMES_AGENT_TIMEOUT_WARNING env var (0 = disable warning).

											
										
										
											2026-04-08 21:39:27 +02:00
+								                            try:
 								                                await _warn_adapter.send(
 								                                    source.chat_id,
-												fix: correct unbound exception variable and remaining-time math in warning

- Bind exception in warning send handler (was using stale _ne from outer scope)
- Calculate remaining time until timeout correctly: (timeout - warning) // 60
  instead of warning // 60 (which equals elapsed time, not remaining)

											
										
										
											2026-04-08 19:59:44 -07:00
+								                                    f"⚠️ No activity for {_elapsed_warn} min. "
-												fix(gateway): add staged inactivity warning before timeout escalation

Introduce gateway_timeout_warning (default 900s) as a pre-timeout alert
layer.  When inactivity reaches the warning threshold, a single
notification is sent to the user offering to wait or reset.  If
inactivity continues to the gateway_timeout (default 1800s), the full
timeout fires as before.

This gives users a chance to intervene before work is lost on slow
API providers without disabling the safety timeout entirely.

Config: agent.gateway_timeout_warning in config.yaml, or
HERMES_AGENT_TIMEOUT_WARNING env var (0 = disable warning).

											
										
										
											2026-04-08 21:39:27 +02:00
+								                                    f"If the agent does not respond soon, it will "
-												fix: correct unbound exception variable and remaining-time math in warning

- Bind exception in warning send handler (was using stale _ne from outer scope)
- Calculate remaining time until timeout correctly: (timeout - warning) // 60
  instead of warning // 60 (which equals elapsed time, not remaining)

											
										
										
											2026-04-08 19:59:44 -07:00
+								                                    f"be timed out in {_remaining_mins} min. "
-												fix(gateway): add staged inactivity warning before timeout escalation

Introduce gateway_timeout_warning (default 900s) as a pre-timeout alert
layer.  When inactivity reaches the warning threshold, a single
notification is sent to the user offering to wait or reset.  If
inactivity continues to the gateway_timeout (default 1800s), the full
timeout fires as before.

This gives users a chance to intervene before work is lost on slow
API providers without disabling the safety timeout entirely.

Config: agent.gateway_timeout_warning in config.yaml, or
HERMES_AGENT_TIMEOUT_WARNING env var (0 = disable warning).

											
										
										
											2026-04-08 21:39:27 +02:00
+								                                    f"You can continue waiting or use /reset.",
 								                                    metadata=_status_thread_metadata,
 								                                )
-												fix: correct unbound exception variable and remaining-time math in warning

- Bind exception in warning send handler (was using stale _ne from outer scope)
- Calculate remaining time until timeout correctly: (timeout - warning) // 60
  instead of warning // 60 (which equals elapsed time, not remaining)

											
										
										
											2026-04-08 19:59:44 -07:00
+								                            except Exception as _warn_err:
 								                                logger.debug("Inactivity warning send error: %s", _warn_err)
-												fix(gateway): replace wall-clock agent timeout with inactivity-based timeout (#5389)

The gateway previously used a hard wall-clock asyncio.wait_for timeout
that killed agents after a fixed duration regardless of activity. This
punished legitimate long-running tasks (subagent delegation, reasoning
models, multi-step research).

Now uses an inactivity-based polling loop that checks the agent's
built-in activity tracker (get_activity_summary) every 5 seconds. The
agent can run indefinitely as long as it's actively calling tools or
receiving API responses. Only fires when the agent has been completely
idle for the configured duration.

Changes:
- Replace asyncio.wait_for with asyncio.wait poll loop checking
  agent idle time via get_activity_summary()
- Add agent.gateway_timeout config.yaml key (default 1800s, 0=unlimited)
- Update stale session eviction to use agent idle time instead of
  pure wall-clock (prevents evicting active long-running tasks)
- Preserve all existing diagnostic logging and user-facing context

Inspired by PR #4864 (Mibayy) and issue #4815 (BongSuCHOI).
Reimplemented on current main using existing _touch_activity()
infrastructure rather than a parallel tracker.
											
										
										
											2026-04-05 19:38:21 -07:00
+								                    if _idle_secs >= _agent_timeout:
 								                        _inactivity_timeout = True
 								                        break
-												fix: make gateway interrupt detection resilient to monitor task failures

The interrupt mechanism for regular text messages (non-commands) during
active agent runs relied on a single async polling task
(monitor_for_interrupt) with no error handling. If this task died
silently due to an unhandled exception, stale adapter reference after
reconnect, or any other failure, user messages sent during agent
execution would be queued but never trigger an actual interrupt — the
agent would continue running until it finished naturally, then process
the queued message.

Three improvements:

1. Error handling in monitor_for_interrupt(): wrap the polling body in
   try/except so transient errors are logged and retried instead of
   silently killing the task.

2. Fresh adapter reference on each poll iteration: re-resolve
   self.adapters.get(source.platform) every 200ms instead of capturing
   the adapter once at task creation time. This prevents stale
   references after adapter reconnects.

3. Backup interrupt check in the inactivity poll loop: both the
   unlimited and timeout-enabled paths now check for pending interrupts
   every 5 seconds (the existing poll interval). Uses a shared
   _interrupt_detected asyncio.Event to avoid double-firing when the
   primary monitor already handled the interrupt. Logs at INFO level
   with monitor task state for debugging.

											
										
										
											2026-04-11 23:42:19 -07:00
+								                    # Backup interrupt check (same as unlimited path).
 								                    if not _interrupt_detected.is_set() and session_key:
 								                        _backup_adapter = self.adapters.get(source.platform)
 								                        _backup_agent = agent_holder[0]
 								                        if (_backup_adapter and _backup_agent
 								                                and hasattr(_backup_adapter, 'has_pending_interrupt')
 								                                and _backup_adapter.has_pending_interrupt(session_key)):
-												fix(gateway): peek at pending message during interrupt instead of consuming it

The monitor_for_interrupt() and backup interrupt checks were calling
get_pending_message() which pops the message from the adapter's queue.
This created a race condition: if the agent finished naturally before
checking _interrupt_requested, the pending message was permanently lost.

Timeline of the race:
1. Agent near completion, user sends message
2. Level 1 guard stores message in adapter._pending_messages, sets event
3. monitor_for_interrupt() detects event, POPS message, calls agent.interrupt()
4. Agent's run_conversation() was already returning (interrupted=False)
5. Post-run dequeue finds nothing (monitor already consumed it)
6. result.get('interrupted') is False so interrupt_message fallback doesn't fire
7. User message permanently lost — agent finishes without processing it

Fix: change all three interrupt detection sites (primary monitor + two
backup checks) from get_pending_message() (pop) to
_pending_messages.get() (peek). The message stays in the adapter's queue
until _dequeue_pending_event() consumes it in the post-run handler,
which runs regardless of whether the agent was interrupted or finished
naturally.

Reported by @_SushantSays — intermittent message loss during long
terminal command execution, persisting after the previous fix (73f970fa)
which addressed monitor task death but not this consumption race.

											
										
										
											2026-04-12 01:55:31 -07:00
+								                            _bp_event = _backup_adapter._pending_messages.get(session_key)
-												fix: make gateway interrupt detection resilient to monitor task failures

The interrupt mechanism for regular text messages (non-commands) during
active agent runs relied on a single async polling task
(monitor_for_interrupt) with no error handling. If this task died
silently due to an unhandled exception, stale adapter reference after
reconnect, or any other failure, user messages sent during agent
execution would be queued but never trigger an actual interrupt — the
agent would continue running until it finished naturally, then process
the queued message.

Three improvements:

1. Error handling in monitor_for_interrupt(): wrap the polling body in
   try/except so transient errors are logged and retried instead of
   silently killing the task.

2. Fresh adapter reference on each poll iteration: re-resolve
   self.adapters.get(source.platform) every 200ms instead of capturing
   the adapter once at task creation time. This prevents stale
   references after adapter reconnects.

3. Backup interrupt check in the inactivity poll loop: both the
   unlimited and timeout-enabled paths now check for pending interrupts
   every 5 seconds (the existing poll interval). Uses a shared
   _interrupt_detected asyncio.Event to avoid double-firing when the
   primary monitor already handled the interrupt. Logs at INFO level
   with monitor task state for debugging.

											
										
										
											2026-04-11 23:42:19 -07:00
+								                            _bp_text = _bp_event.text if _bp_event else None
 								                            logger.info(
 								                                "Backup interrupt detected for session %s "
 								                                "(monitor task state: %s)",
 								                                session_key[:20],
 								                                "done" if interrupt_monitor.done() else "running",
 								                            )
 								                            _backup_agent.interrupt(_bp_text)
 								                            _interrupt_detected.set()
-												fix(gateway): replace wall-clock agent timeout with inactivity-based timeout (#5389)

The gateway previously used a hard wall-clock asyncio.wait_for timeout
that killed agents after a fixed duration regardless of activity. This
punished legitimate long-running tasks (subagent delegation, reasoning
models, multi-step research).

Now uses an inactivity-based polling loop that checks the agent's
built-in activity tracker (get_activity_summary) every 5 seconds. The
agent can run indefinitely as long as it's actively calling tools or
receiving API responses. Only fires when the agent has been completely
idle for the configured duration.

Changes:
- Replace asyncio.wait_for with asyncio.wait poll loop checking
  agent idle time via get_activity_summary()
- Add agent.gateway_timeout config.yaml key (default 1800s, 0=unlimited)
- Update stale session eviction to use agent idle time instead of
  pure wall-clock (prevents evicting active long-running tasks)
- Preserve all existing diagnostic logging and user-facing context

Inspired by PR #4864 (Mibayy) and issue #4815 (BongSuCHOI).
Reimplemented on current main using existing _touch_activity()
infrastructure rather than a parallel tracker.
											
										
										
											2026-04-05 19:38:21 -07:00
 								            if _inactivity_timeout:
-												fix: improve timeout debug logging and user-facing diagnostics (#5370)

Agent activity tracking:
- Add _last_activity_ts, _last_activity_desc, _current_tool to AIAgent
- Touch activity on: API call start/complete, tool start/complete,
  first stream chunk, streaming request start
- Public get_activity_summary() method for external consumers

Gateway timeout diagnostics:
- Timeout message now includes what the agent was doing when killed:
  actively working vs stuck on a tool vs waiting on API response
- Includes iteration count, last activity description, seconds since
  last activity — users can distinguish legitimate long tasks from
  genuine hangs
- 'Still working' notifications now show iteration count and current
  tool instead of just elapsed time
- Stale lock eviction logs include agent activity state for debugging

Stream stale timeout:
- _emit_status when stale stream is detected (was log-only) — gateway
  users now see 'No response from provider for Ns' with model and
  context size
- Improved logger.warning with model name and estimated context size

Error path notifications (gateway-visible via _emit_status):
- Context compression attempts now use _emit_status (was _vprint only)
- Non-retryable client errors emit summary before aborting
- Max retry exhaustion emits error summary (was _vprint only)
- Rate limit exhaustion emits specific rate-limit message

These were all CLI-visible but silent to gateway users, which is why
people on Telegram/Discord saw generic 'request failed' messages
without explanation.
											
										
										
											2026-04-05 18:33:33 -07:00
+								                # Build a diagnostic summary from the agent's activity tracker.
 								                _timed_out_agent = agent_holder[0]
 								                _activity = {}
 								                if _timed_out_agent and hasattr(_timed_out_agent, "get_activity_summary"):
 								                    try:
 								                        _activity = _timed_out_agent.get_activity_summary()
 								                    except Exception:
 								                        pass
 								                _last_desc = _activity.get("last_activity_desc", "unknown")
 								                _secs_ago = _activity.get("seconds_since_activity", 0)
 								                _cur_tool = _activity.get("current_tool")
 								                _iter_n = _activity.get("api_call_count", 0)
 								                _iter_max = _activity.get("max_iterations", 0)
-												fix(gateway): prevent stuck sessions with agent timeout and staleness eviction

Three changes to prevent sessions from getting permanently locked:

1. Agent execution timeout (HERMES_AGENT_TIMEOUT, default 10min):
   Wraps run_in_executor with asyncio.wait_for so a hung API call or
   runaway tool can't lock a session indefinitely. On timeout, the
   agent is interrupted and the user gets an actionable error message.

2. Staleness eviction for _running_agents:
   Tracks start timestamps for each session entry. When a new message
   arrives and the entry is older than timeout + 1min grace, it's
   evicted as a leaked lock. Safety net for any cleanup path that
   fails to remove the entry.

3. Cron job timeout (HERMES_CRON_TIMEOUT, default 10min):
   Wraps run_conversation in a ThreadPoolExecutor with timeout so a
   hung cron job doesn't block the ticker thread (and all subsequent
   cron jobs) indefinitely.

Follows grammY runner's per-update timeout pattern and aiogram's
asyncio.wait_for approach for handler deadlines.

											
										
										
											2026-04-02 22:52:52 +05:30
+								                logger.error(
-												fix(gateway): replace wall-clock agent timeout with inactivity-based timeout (#5389)

The gateway previously used a hard wall-clock asyncio.wait_for timeout
that killed agents after a fixed duration regardless of activity. This
punished legitimate long-running tasks (subagent delegation, reasoning
models, multi-step research).

Now uses an inactivity-based polling loop that checks the agent's
built-in activity tracker (get_activity_summary) every 5 seconds. The
agent can run indefinitely as long as it's actively calling tools or
receiving API responses. Only fires when the agent has been completely
idle for the configured duration.

Changes:
- Replace asyncio.wait_for with asyncio.wait poll loop checking
  agent idle time via get_activity_summary()
- Add agent.gateway_timeout config.yaml key (default 1800s, 0=unlimited)
- Update stale session eviction to use agent idle time instead of
  pure wall-clock (prevents evicting active long-running tasks)
- Preserve all existing diagnostic logging and user-facing context

Inspired by PR #4864 (Mibayy) and issue #4815 (BongSuCHOI).
Reimplemented on current main using existing _touch_activity()
infrastructure rather than a parallel tracker.
											
										
										
											2026-04-05 19:38:21 -07:00
+								                    "Agent idle for %.0fs (timeout %.0fs) in session %s "
 								                    "| last_activity=%s | iteration=%s/%s | tool=%s",
 								                    _secs_ago, _agent_timeout, session_key,
 								                    _last_desc, _iter_n, _iter_max,
-												fix: improve timeout debug logging and user-facing diagnostics (#5370)

Agent activity tracking:
- Add _last_activity_ts, _last_activity_desc, _current_tool to AIAgent
- Touch activity on: API call start/complete, tool start/complete,
  first stream chunk, streaming request start
- Public get_activity_summary() method for external consumers

Gateway timeout diagnostics:
- Timeout message now includes what the agent was doing when killed:
  actively working vs stuck on a tool vs waiting on API response
- Includes iteration count, last activity description, seconds since
  last activity — users can distinguish legitimate long tasks from
  genuine hangs
- 'Still working' notifications now show iteration count and current
  tool instead of just elapsed time
- Stale lock eviction logs include agent activity state for debugging

Stream stale timeout:
- _emit_status when stale stream is detected (was log-only) — gateway
  users now see 'No response from provider for Ns' with model and
  context size
- Improved logger.warning with model name and estimated context size

Error path notifications (gateway-visible via _emit_status):
- Context compression attempts now use _emit_status (was _vprint only)
- Non-retryable client errors emit summary before aborting
- Max retry exhaustion emits error summary (was _vprint only)
- Rate limit exhaustion emits specific rate-limit message

These were all CLI-visible but silent to gateway users, which is why
people on Telegram/Discord saw generic 'request failed' messages
without explanation.
											
										
										
											2026-04-05 18:33:33 -07:00
+								                    _cur_tool or "none",
-												fix(gateway): prevent stuck sessions with agent timeout and staleness eviction

Three changes to prevent sessions from getting permanently locked:

1. Agent execution timeout (HERMES_AGENT_TIMEOUT, default 10min):
   Wraps run_in_executor with asyncio.wait_for so a hung API call or
   runaway tool can't lock a session indefinitely. On timeout, the
   agent is interrupted and the user gets an actionable error message.

2. Staleness eviction for _running_agents:
   Tracks start timestamps for each session entry. When a new message
   arrives and the entry is older than timeout + 1min grace, it's
   evicted as a leaked lock. Safety net for any cleanup path that
   fails to remove the entry.

3. Cron job timeout (HERMES_CRON_TIMEOUT, default 10min):
   Wraps run_conversation in a ThreadPoolExecutor with timeout so a
   hung cron job doesn't block the ticker thread (and all subsequent
   cron jobs) indefinitely.

Follows grammY runner's per-update timeout pattern and aiogram's
asyncio.wait_for approach for handler deadlines.

											
										
										
											2026-04-02 22:52:52 +05:30
+								                )
-												fix: improve timeout debug logging and user-facing diagnostics (#5370)

Agent activity tracking:
- Add _last_activity_ts, _last_activity_desc, _current_tool to AIAgent
- Touch activity on: API call start/complete, tool start/complete,
  first stream chunk, streaming request start
- Public get_activity_summary() method for external consumers

Gateway timeout diagnostics:
- Timeout message now includes what the agent was doing when killed:
  actively working vs stuck on a tool vs waiting on API response
- Includes iteration count, last activity description, seconds since
  last activity — users can distinguish legitimate long tasks from
  genuine hangs
- 'Still working' notifications now show iteration count and current
  tool instead of just elapsed time
- Stale lock eviction logs include agent activity state for debugging

Stream stale timeout:
- _emit_status when stale stream is detected (was log-only) — gateway
  users now see 'No response from provider for Ns' with model and
  context size
- Improved logger.warning with model name and estimated context size

Error path notifications (gateway-visible via _emit_status):
- Context compression attempts now use _emit_status (was _vprint only)
- Non-retryable client errors emit summary before aborting
- Max retry exhaustion emits error summary (was _vprint only)
- Rate limit exhaustion emits specific rate-limit message

These were all CLI-visible but silent to gateway users, which is why
people on Telegram/Discord saw generic 'request failed' messages
without explanation.
											
										
										
											2026-04-05 18:33:33 -07:00
-												fix(gateway): prevent stuck sessions with agent timeout and staleness eviction

Three changes to prevent sessions from getting permanently locked:

1. Agent execution timeout (HERMES_AGENT_TIMEOUT, default 10min):
   Wraps run_in_executor with asyncio.wait_for so a hung API call or
   runaway tool can't lock a session indefinitely. On timeout, the
   agent is interrupted and the user gets an actionable error message.

2. Staleness eviction for _running_agents:
   Tracks start timestamps for each session entry. When a new message
   arrives and the entry is older than timeout + 1min grace, it's
   evicted as a leaked lock. Safety net for any cleanup path that
   fails to remove the entry.

3. Cron job timeout (HERMES_CRON_TIMEOUT, default 10min):
   Wraps run_conversation in a ThreadPoolExecutor with timeout so a
   hung cron job doesn't block the ticker thread (and all subsequent
   cron jobs) indefinitely.

Follows grammY runner's per-update timeout pattern and aiogram's
asyncio.wait_for approach for handler deadlines.

											
										
										
											2026-04-02 22:52:52 +05:30
+								                # Interrupt the agent if it's still running so the thread
 								                # pool worker is freed.
 								                if _timed_out_agent and hasattr(_timed_out_agent, "interrupt"):
-												fix(gateway): replace wall-clock agent timeout with inactivity-based timeout (#5389)

The gateway previously used a hard wall-clock asyncio.wait_for timeout
that killed agents after a fixed duration regardless of activity. This
punished legitimate long-running tasks (subagent delegation, reasoning
models, multi-step research).

Now uses an inactivity-based polling loop that checks the agent's
built-in activity tracker (get_activity_summary) every 5 seconds. The
agent can run indefinitely as long as it's actively calling tools or
receiving API responses. Only fires when the agent has been completely
idle for the configured duration.

Changes:
- Replace asyncio.wait_for with asyncio.wait poll loop checking
  agent idle time via get_activity_summary()
- Add agent.gateway_timeout config.yaml key (default 1800s, 0=unlimited)
- Update stale session eviction to use agent idle time instead of
  pure wall-clock (prevents evicting active long-running tasks)
- Preserve all existing diagnostic logging and user-facing context

Inspired by PR #4864 (Mibayy) and issue #4815 (BongSuCHOI).
Reimplemented on current main using existing _touch_activity()
infrastructure rather than a parallel tracker.
											
										
										
											2026-04-05 19:38:21 -07:00
+								                    _timed_out_agent.interrupt("Execution timed out (inactivity)")
-												fix: improve timeout debug logging and user-facing diagnostics (#5370)

Agent activity tracking:
- Add _last_activity_ts, _last_activity_desc, _current_tool to AIAgent
- Touch activity on: API call start/complete, tool start/complete,
  first stream chunk, streaming request start
- Public get_activity_summary() method for external consumers

Gateway timeout diagnostics:
- Timeout message now includes what the agent was doing when killed:
  actively working vs stuck on a tool vs waiting on API response
- Includes iteration count, last activity description, seconds since
  last activity — users can distinguish legitimate long tasks from
  genuine hangs
- 'Still working' notifications now show iteration count and current
  tool instead of just elapsed time
- Stale lock eviction logs include agent activity state for debugging

Stream stale timeout:
- _emit_status when stale stream is detected (was log-only) — gateway
  users now see 'No response from provider for Ns' with model and
  context size
- Improved logger.warning with model name and estimated context size

Error path notifications (gateway-visible via _emit_status):
- Context compression attempts now use _emit_status (was _vprint only)
- Non-retryable client errors emit summary before aborting
- Max retry exhaustion emits error summary (was _vprint only)
- Rate limit exhaustion emits specific rate-limit message

These were all CLI-visible but silent to gateway users, which is why
people on Telegram/Discord saw generic 'request failed' messages
without explanation.
											
										
										
											2026-04-05 18:33:33 -07:00
-												fix(gateway): replace wall-clock agent timeout with inactivity-based timeout (#5389)

The gateway previously used a hard wall-clock asyncio.wait_for timeout
that killed agents after a fixed duration regardless of activity. This
punished legitimate long-running tasks (subagent delegation, reasoning
models, multi-step research).

Now uses an inactivity-based polling loop that checks the agent's
built-in activity tracker (get_activity_summary) every 5 seconds. The
agent can run indefinitely as long as it's actively calling tools or
receiving API responses. Only fires when the agent has been completely
idle for the configured duration.

Changes:
- Replace asyncio.wait_for with asyncio.wait poll loop checking
  agent idle time via get_activity_summary()
- Add agent.gateway_timeout config.yaml key (default 1800s, 0=unlimited)
- Update stale session eviction to use agent idle time instead of
  pure wall-clock (prevents evicting active long-running tasks)
- Preserve all existing diagnostic logging and user-facing context

Inspired by PR #4864 (Mibayy) and issue #4815 (BongSuCHOI).
Reimplemented on current main using existing _touch_activity()
infrastructure rather than a parallel tracker.
											
										
										
											2026-04-05 19:38:21 -07:00
+								                _timeout_mins = int(_agent_timeout // 60) or 1
-												fix: improve timeout debug logging and user-facing diagnostics (#5370)

Agent activity tracking:
- Add _last_activity_ts, _last_activity_desc, _current_tool to AIAgent
- Touch activity on: API call start/complete, tool start/complete,
  first stream chunk, streaming request start
- Public get_activity_summary() method for external consumers

Gateway timeout diagnostics:
- Timeout message now includes what the agent was doing when killed:
  actively working vs stuck on a tool vs waiting on API response
- Includes iteration count, last activity description, seconds since
  last activity — users can distinguish legitimate long tasks from
  genuine hangs
- 'Still working' notifications now show iteration count and current
  tool instead of just elapsed time
- Stale lock eviction logs include agent activity state for debugging

Stream stale timeout:
- _emit_status when stale stream is detected (was log-only) — gateway
  users now see 'No response from provider for Ns' with model and
  context size
- Improved logger.warning with model name and estimated context size

Error path notifications (gateway-visible via _emit_status):
- Context compression attempts now use _emit_status (was _vprint only)
- Non-retryable client errors emit summary before aborting
- Max retry exhaustion emits error summary (was _vprint only)
- Rate limit exhaustion emits specific rate-limit message

These were all CLI-visible but silent to gateway users, which is why
people on Telegram/Discord saw generic 'request failed' messages
without explanation.
											
										
										
											2026-04-05 18:33:33 -07:00
 								                # Construct a user-facing message with diagnostic context.
-												fix(gateway): replace wall-clock agent timeout with inactivity-based timeout (#5389)

The gateway previously used a hard wall-clock asyncio.wait_for timeout
that killed agents after a fixed duration regardless of activity. This
punished legitimate long-running tasks (subagent delegation, reasoning
models, multi-step research).

Now uses an inactivity-based polling loop that checks the agent's
built-in activity tracker (get_activity_summary) every 5 seconds. The
agent can run indefinitely as long as it's actively calling tools or
receiving API responses. Only fires when the agent has been completely
idle for the configured duration.

Changes:
- Replace asyncio.wait_for with asyncio.wait poll loop checking
  agent idle time via get_activity_summary()
- Add agent.gateway_timeout config.yaml key (default 1800s, 0=unlimited)
- Update stale session eviction to use agent idle time instead of
  pure wall-clock (prevents evicting active long-running tasks)
- Preserve all existing diagnostic logging and user-facing context

Inspired by PR #4864 (Mibayy) and issue #4815 (BongSuCHOI).
Reimplemented on current main using existing _touch_activity()
infrastructure rather than a parallel tracker.
											
										
										
											2026-04-05 19:38:21 -07:00
+								                _diag_lines = [
 								                    f"⏱️ Agent inactive for {_timeout_mins} min — no tool calls "
 								                    f"or API responses."
 								                ]
 								                if _cur_tool:
-												fix: improve timeout debug logging and user-facing diagnostics (#5370)

Agent activity tracking:
- Add _last_activity_ts, _last_activity_desc, _current_tool to AIAgent
- Touch activity on: API call start/complete, tool start/complete,
  first stream chunk, streaming request start
- Public get_activity_summary() method for external consumers

Gateway timeout diagnostics:
- Timeout message now includes what the agent was doing when killed:
  actively working vs stuck on a tool vs waiting on API response
- Includes iteration count, last activity description, seconds since
  last activity — users can distinguish legitimate long tasks from
  genuine hangs
- 'Still working' notifications now show iteration count and current
  tool instead of just elapsed time
- Stale lock eviction logs include agent activity state for debugging

Stream stale timeout:
- _emit_status when stale stream is detected (was log-only) — gateway
  users now see 'No response from provider for Ns' with model and
  context size
- Improved logger.warning with model name and estimated context size

Error path notifications (gateway-visible via _emit_status):
- Context compression attempts now use _emit_status (was _vprint only)
- Non-retryable client errors emit summary before aborting
- Max retry exhaustion emits error summary (was _vprint only)
- Rate limit exhaustion emits specific rate-limit message

These were all CLI-visible but silent to gateway users, which is why
people on Telegram/Discord saw generic 'request failed' messages
without explanation.
											
										
										
											2026-04-05 18:33:33 -07:00
+								                    _diag_lines.append(
 								                        f"The agent appears stuck on tool `{_cur_tool}` "
 								                        f"({_secs_ago:.0f}s since last activity, "
 								                        f"iteration {_iter_n}/{_iter_max})."
 								                    )
 								                else:
 								                    _diag_lines.append(
 								                        f"Last activity: {_last_desc} ({_secs_ago:.0f}s ago, "
 								                        f"iteration {_iter_n}/{_iter_max}). "
 								                        "The agent may have been waiting on an API response."
 								                    )
 								                _diag_lines.append(
-												fix(gateway): replace wall-clock agent timeout with inactivity-based timeout (#5389)

The gateway previously used a hard wall-clock asyncio.wait_for timeout
that killed agents after a fixed duration regardless of activity. This
punished legitimate long-running tasks (subagent delegation, reasoning
models, multi-step research).

Now uses an inactivity-based polling loop that checks the agent's
built-in activity tracker (get_activity_summary) every 5 seconds. The
agent can run indefinitely as long as it's actively calling tools or
receiving API responses. Only fires when the agent has been completely
idle for the configured duration.

Changes:
- Replace asyncio.wait_for with asyncio.wait poll loop checking
  agent idle time via get_activity_summary()
- Add agent.gateway_timeout config.yaml key (default 1800s, 0=unlimited)
- Update stale session eviction to use agent idle time instead of
  pure wall-clock (prevents evicting active long-running tasks)
- Preserve all existing diagnostic logging and user-facing context

Inspired by PR #4864 (Mibayy) and issue #4815 (BongSuCHOI).
Reimplemented on current main using existing _touch_activity()
infrastructure rather than a parallel tracker.
											
										
										
											2026-04-05 19:38:21 -07:00
+								                    "To increase the limit, set agent.gateway_timeout in config.yaml "
-												fix: improve timeout debug logging and user-facing diagnostics (#5370)

Agent activity tracking:
- Add _last_activity_ts, _last_activity_desc, _current_tool to AIAgent
- Touch activity on: API call start/complete, tool start/complete,
  first stream chunk, streaming request start
- Public get_activity_summary() method for external consumers

Gateway timeout diagnostics:
- Timeout message now includes what the agent was doing when killed:
  actively working vs stuck on a tool vs waiting on API response
- Includes iteration count, last activity description, seconds since
  last activity — users can distinguish legitimate long tasks from
  genuine hangs
- 'Still working' notifications now show iteration count and current
  tool instead of just elapsed time
- Stale lock eviction logs include agent activity state for debugging

Stream stale timeout:
- _emit_status when stale stream is detected (was log-only) — gateway
  users now see 'No response from provider for Ns' with model and
  context size
- Improved logger.warning with model name and estimated context size

Error path notifications (gateway-visible via _emit_status):
- Context compression attempts now use _emit_status (was _vprint only)
- Non-retryable client errors emit summary before aborting
- Max retry exhaustion emits error summary (was _vprint only)
- Rate limit exhaustion emits specific rate-limit message

These were all CLI-visible but silent to gateway users, which is why
people on Telegram/Discord saw generic 'request failed' messages
without explanation.
											
										
										
											2026-04-05 18:33:33 -07:00
+								                    "(value in seconds, 0 = no limit) and restart the gateway.\n"
 								                    "Try again, or use /reset to start fresh."
 								                )
-												fix(gateway): prevent stuck sessions with agent timeout and staleness eviction

Three changes to prevent sessions from getting permanently locked:

1. Agent execution timeout (HERMES_AGENT_TIMEOUT, default 10min):
   Wraps run_in_executor with asyncio.wait_for so a hung API call or
   runaway tool can't lock a session indefinitely. On timeout, the
   agent is interrupted and the user gets an actionable error message.

2. Staleness eviction for _running_agents:
   Tracks start timestamps for each session entry. When a new message
   arrives and the entry is older than timeout + 1min grace, it's
   evicted as a leaked lock. Safety net for any cleanup path that
   fails to remove the entry.

3. Cron job timeout (HERMES_CRON_TIMEOUT, default 10min):
   Wraps run_conversation in a ThreadPoolExecutor with timeout so a
   hung cron job doesn't block the ticker thread (and all subsequent
   cron jobs) indefinitely.

Follows grammY runner's per-update timeout pattern and aiogram's
asyncio.wait_for approach for handler deadlines.

											
										
										
											2026-04-02 22:52:52 +05:30
+								                response = {
-												fix: improve timeout debug logging and user-facing diagnostics (#5370)

Agent activity tracking:
- Add _last_activity_ts, _last_activity_desc, _current_tool to AIAgent
- Touch activity on: API call start/complete, tool start/complete,
  first stream chunk, streaming request start
- Public get_activity_summary() method for external consumers

Gateway timeout diagnostics:
- Timeout message now includes what the agent was doing when killed:
  actively working vs stuck on a tool vs waiting on API response
- Includes iteration count, last activity description, seconds since
  last activity — users can distinguish legitimate long tasks from
  genuine hangs
- 'Still working' notifications now show iteration count and current
  tool instead of just elapsed time
- Stale lock eviction logs include agent activity state for debugging

Stream stale timeout:
- _emit_status when stale stream is detected (was log-only) — gateway
  users now see 'No response from provider for Ns' with model and
  context size
- Improved logger.warning with model name and estimated context size

Error path notifications (gateway-visible via _emit_status):
- Context compression attempts now use _emit_status (was _vprint only)
- Non-retryable client errors emit summary before aborting
- Max retry exhaustion emits error summary (was _vprint only)
- Rate limit exhaustion emits specific rate-limit message

These were all CLI-visible but silent to gateway users, which is why
people on Telegram/Discord saw generic 'request failed' messages
without explanation.
											
										
										
											2026-04-05 18:33:33 -07:00
+								                    "final_response": "\n".join(_diag_lines),
-												fix(gateway): prevent stuck sessions with agent timeout and staleness eviction

Three changes to prevent sessions from getting permanently locked:

1. Agent execution timeout (HERMES_AGENT_TIMEOUT, default 10min):
   Wraps run_in_executor with asyncio.wait_for so a hung API call or
   runaway tool can't lock a session indefinitely. On timeout, the
   agent is interrupted and the user gets an actionable error message.

2. Staleness eviction for _running_agents:
   Tracks start timestamps for each session entry. When a new message
   arrives and the entry is older than timeout + 1min grace, it's
   evicted as a leaked lock. Safety net for any cleanup path that
   fails to remove the entry.

3. Cron job timeout (HERMES_CRON_TIMEOUT, default 10min):
   Wraps run_conversation in a ThreadPoolExecutor with timeout so a
   hung cron job doesn't block the ticker thread (and all subsequent
   cron jobs) indefinitely.

Follows grammY runner's per-update timeout pattern and aiogram's
asyncio.wait_for approach for handler deadlines.

											
										
										
											2026-04-02 22:52:52 +05:30
+								                    "messages": result_holder[0].get("messages", []) if result_holder[0] else [],
-												fix: improve timeout debug logging and user-facing diagnostics (#5370)

Agent activity tracking:
- Add _last_activity_ts, _last_activity_desc, _current_tool to AIAgent
- Touch activity on: API call start/complete, tool start/complete,
  first stream chunk, streaming request start
- Public get_activity_summary() method for external consumers

Gateway timeout diagnostics:
- Timeout message now includes what the agent was doing when killed:
  actively working vs stuck on a tool vs waiting on API response
- Includes iteration count, last activity description, seconds since
  last activity — users can distinguish legitimate long tasks from
  genuine hangs
- 'Still working' notifications now show iteration count and current
  tool instead of just elapsed time
- Stale lock eviction logs include agent activity state for debugging

Stream stale timeout:
- _emit_status when stale stream is detected (was log-only) — gateway
  users now see 'No response from provider for Ns' with model and
  context size
- Improved logger.warning with model name and estimated context size

Error path notifications (gateway-visible via _emit_status):
- Context compression attempts now use _emit_status (was _vprint only)
- Non-retryable client errors emit summary before aborting
- Max retry exhaustion emits error summary (was _vprint only)
- Rate limit exhaustion emits specific rate-limit message

These were all CLI-visible but silent to gateway users, which is why
people on Telegram/Discord saw generic 'request failed' messages
without explanation.
											
										
										
											2026-04-05 18:33:33 -07:00
+								                    "api_calls": _iter_n,
-												fix(gateway): prevent stuck sessions with agent timeout and staleness eviction

Three changes to prevent sessions from getting permanently locked:

1. Agent execution timeout (HERMES_AGENT_TIMEOUT, default 10min):
   Wraps run_in_executor with asyncio.wait_for so a hung API call or
   runaway tool can't lock a session indefinitely. On timeout, the
   agent is interrupted and the user gets an actionable error message.

2. Staleness eviction for _running_agents:
   Tracks start timestamps for each session entry. When a new message
   arrives and the entry is older than timeout + 1min grace, it's
   evicted as a leaked lock. Safety net for any cleanup path that
   fails to remove the entry.

3. Cron job timeout (HERMES_CRON_TIMEOUT, default 10min):
   Wraps run_conversation in a ThreadPoolExecutor with timeout so a
   hung cron job doesn't block the ticker thread (and all subsequent
   cron jobs) indefinitely.

Follows grammY runner's per-update timeout pattern and aiogram's
asyncio.wait_for approach for handler deadlines.

											
										
										
											2026-04-02 22:52:52 +05:30
+								                    "tools": tools_holder[0] or [],
 								                    "history_offset": 0,
 								                    "failed": True,
 								                }
-												fix(gateway): /model shows active fallback model instead of config default (#1615)

* fix: prevent infinite 400 failure loop on context overflow (#1630)

When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message.  This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error.  Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.

Three-layer fix:

1. run_agent.py — Fallback heuristic: when a 400 error has a very short
   generic message AND the session is large (>40% of context or >80
   messages), treat it as a probable context overflow and trigger
   compression instead of aborting.

2. run_agent.py + gateway/run.py — Don't persist failed messages:
   when the agent returns failed=True before generating any response,
   skip writing the user's message to the transcript/DB. This prevents
   the session from growing on each failure.

3. gateway/run.py — Smarter error messages: detect context-overflow
   failures and suggest /compact or /reset specifically, instead of a
   generic 'try again' that will fail identically.

* fix(skills): detect prompt injection patterns and block cache file reads

Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):

1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
   (index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
   was the original injection vector — untrusted skill descriptions
   in the catalog contained adversarial text that the model executed.

2. skill_view: warns when skills are loaded from outside the trusted
   ~/.hermes/skills/ directory, and detects common injection patterns
   in skill content ("ignore previous instructions", "<system>", etc.).

Cherry-picked from PR #1562 by ygd58.

* fix(tools): chunk long messages in send_message_tool before dispatch (#1552)

Long messages sent via send_message tool or cron delivery silently
failed when exceeding platform limits. Gateway adapters handle this
via truncate_message(), but the standalone senders in send_message_tool
bypassed that entirely.

- Apply truncate_message() chunking in _send_to_platform() before
  dispatching to individual platform senders
- Remove naive message[i:i+2000] character split in _send_discord()
  in favor of centralized smart splitting
- Attach media files to last chunk only for Telegram
- Add regression tests for chunking and media placement

Cherry-picked from PR #1557 by llbn.

* fix(approval): show full command in dangerous command approval (#1553)

Previously the command was truncated to 80 chars in CLI (with a
[v]iew full option), 500 chars in Discord embeds, and missing entirely
in Telegram/Slack approval messages. Now the full command is always
displayed everywhere:

- CLI: removed 80-char truncation and [v]iew full menu option
- Gateway (TG/Slack): approval_required message includes full command
  in a code block
- Discord: embed shows full command up to 4096-char limit
- Windows: skip SIGALRM-based test timeout (Unix-only)
- Updated tests: replaced view-flow tests with direct approval tests

Cherry-picked from PR #1566 by crazywriter1.

* fix(cli): flush stdout during agent loop to prevent macOS display freeze (#1624)

The interrupt polling loop in chat() waited on the queue without
invalidating the prompt_toolkit renderer. On macOS, the StdoutProxy
buffer only flushed on input events, causing the CLI to appear frozen
during tool execution until the user typed a key.

Fix: call _invalidate() on each queue timeout (every ~100ms, throttled
to 150ms) to force the renderer to flush buffered agent output.

* fix(claw): warn when API keys are skipped during OpenClaw migration (#1580)

When --migrate-secrets is not passed (the default), API keys like
OPENROUTER_API_KEY are silently skipped with no warning. Users don't
realize their keys weren't migrated until the agent fails to connect.

Add a post-migration warning with actionable instructions: either
re-run with --migrate-secrets or add the key manually via
hermes config set.

Cherry-picked from PR #1593 by ygd58.

* fix(security): block sandbox backend creds from subprocess env (#1264)

Add Modal and Daytona sandbox credentials to the subprocess env
blocklist so they're not leaked to agent terminal sessions via
printenv/env.

Cherry-picked from PR #1571 by ygd58.

* fix(gateway): cap interrupt recursion depth to prevent resource exhaustion (#816)

When a user sends multiple messages while the agent keeps failing,
_run_agent() calls itself recursively with no depth limit. This can
exhaust stack/memory if the agent is in a failure loop.

Add _MAX_INTERRUPT_DEPTH = 3. When exceeded, the pending message is
logged and the current result is returned instead of recursing deeper.

The log handler duplication bug described in #816 was already fixed
separately (AIAgent.__init__ deduplicates handlers).

* fix(gateway): /model shows active fallback model instead of config default (#1615)

When the agent falls back to a different model (e.g. due to rate
limiting), /model still showed the config default. Now tracks the
effective model/provider after each agent run and displays it.

Cleared when the primary model succeeds again or the user explicitly
switches via /model.

Cherry-picked from PR #1616 by MaxKerkula. Added hasattr guard for
test compatibility.

---------

Co-authored-by: buray <ygd58@users.noreply.github.com>
Co-authored-by: lbn <llbn@users.noreply.github.com>
Co-authored-by: crazywriter1 <53251494+crazywriter1@users.noreply.github.com>
Co-authored-by: Max K <MaxKerkula@users.noreply.github.com>
											
										
										
											2026-03-17 02:26:51 -07:00
 								            # Track fallback model state: if the agent switched to a
 								            # fallback model during this run, persist it so /model shows
 								            # the actually-active model instead of the config default.
-												fix: don't evict cached agent on failed runs — prevents MCP restart loop (#7539)

* fix: circuit breaker stops CPU-burning restart loops on persistent errors

When a gateway session hits a non-retryable error (e.g. invalid model
ID → HTTP 400), the agent fails and returns. But if the session keeps
receiving messages (or something periodically recreates agents), each
attempt spawns a new AIAgent — reinitializing MCP server connections,
burning CPU — only to hit the same 400 error again. On a 4-core server,
this pegs an entire core per stuck session and accumulates 300+ minutes
of CPU time over hours.

Fix: add a per-session consecutive failure counter in the gateway runner.

- Track consecutive non-retryable failures per session key
- After 3 consecutive failures (_MAX_CONSECUTIVE_FAILURES), block
  further agent creation for that session and notify the user:
  '⚠️ This session has failed N times in a row with a non-retryable
  error. Use /reset to start a new session.'
- Evict the cached agent when the circuit breaker engages to prevent
  stale state from accumulating
- Reset the counter on successful agent runs
- Clear the counter on /reset and /new so users can recover
- Uses getattr() pattern so bare GatewayRunner instances (common in
  tests using object.__new__) don't crash

Tests:
- 8 new tests in test_circuit_breaker.py covering counter behavior,
  threshold, reset, session isolation, and bare-runner safety

Addresses #7130.

* Revert "fix: circuit breaker stops CPU-burning restart loops on persistent errors"

This reverts commit d848ea7109d62a2fc4ba6da36fc4f0366b5ded94.

* fix: don't evict cached agent on failed runs — prevents MCP restart loop

When a run fails (e.g. invalid model ID → 400) and fallback activated,
the gateway was evicting the cached agent to 'retry primary next time.'
But evicting a failed agent forces a full AIAgent recreation on the next
message — reinitializing MCP server connections, spawning stdio
processes — only to hit the same 400 again. This created a CPU-burning
loop (91%+ for hours, #7130).

The fix: add `and not _run_failed` to the fallback-eviction check.
Failed runs keep the cached agent. The next message reuses it (no MCP
reinit), hits the same error, returns it to the user quickly. The user
can /reset or /model to fix their config.

Successful fallback runs still evict as before so the next message
retries the primary model.

Addresses #7130.
											
										
										
											2026-04-10 21:16:56 -07:00
+								            # Skip eviction when the run failed — evicting a failed agent
 								            # forces MCP reinit on the next message for no benefit (the
 								            # same error will recur).  This was the root cause of #7130:
 								            # a bad model ID triggered fallback → eviction → recreation →
 								            # MCP reinit → same 400 → loop, burning 91% CPU for hours.
-												fix(gateway): /model shows active fallback model instead of config default (#1615)

* fix: prevent infinite 400 failure loop on context overflow (#1630)

When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message.  This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error.  Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.

Three-layer fix:

1. run_agent.py — Fallback heuristic: when a 400 error has a very short
   generic message AND the session is large (>40% of context or >80
   messages), treat it as a probable context overflow and trigger
   compression instead of aborting.

2. run_agent.py + gateway/run.py — Don't persist failed messages:
   when the agent returns failed=True before generating any response,
   skip writing the user's message to the transcript/DB. This prevents
   the session from growing on each failure.

3. gateway/run.py — Smarter error messages: detect context-overflow
   failures and suggest /compact or /reset specifically, instead of a
   generic 'try again' that will fail identically.

* fix(skills): detect prompt injection patterns and block cache file reads

Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):

1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
   (index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
   was the original injection vector — untrusted skill descriptions
   in the catalog contained adversarial text that the model executed.

2. skill_view: warns when skills are loaded from outside the trusted
   ~/.hermes/skills/ directory, and detects common injection patterns
   in skill content ("ignore previous instructions", "<system>", etc.).

Cherry-picked from PR #1562 by ygd58.

* fix(tools): chunk long messages in send_message_tool before dispatch (#1552)

Long messages sent via send_message tool or cron delivery silently
failed when exceeding platform limits. Gateway adapters handle this
via truncate_message(), but the standalone senders in send_message_tool
bypassed that entirely.

- Apply truncate_message() chunking in _send_to_platform() before
  dispatching to individual platform senders
- Remove naive message[i:i+2000] character split in _send_discord()
  in favor of centralized smart splitting
- Attach media files to last chunk only for Telegram
- Add regression tests for chunking and media placement

Cherry-picked from PR #1557 by llbn.

* fix(approval): show full command in dangerous command approval (#1553)

Previously the command was truncated to 80 chars in CLI (with a
[v]iew full option), 500 chars in Discord embeds, and missing entirely
in Telegram/Slack approval messages. Now the full command is always
displayed everywhere:

- CLI: removed 80-char truncation and [v]iew full menu option
- Gateway (TG/Slack): approval_required message includes full command
  in a code block
- Discord: embed shows full command up to 4096-char limit
- Windows: skip SIGALRM-based test timeout (Unix-only)
- Updated tests: replaced view-flow tests with direct approval tests

Cherry-picked from PR #1566 by crazywriter1.

* fix(cli): flush stdout during agent loop to prevent macOS display freeze (#1624)

The interrupt polling loop in chat() waited on the queue without
invalidating the prompt_toolkit renderer. On macOS, the StdoutProxy
buffer only flushed on input events, causing the CLI to appear frozen
during tool execution until the user typed a key.

Fix: call _invalidate() on each queue timeout (every ~100ms, throttled
to 150ms) to force the renderer to flush buffered agent output.

* fix(claw): warn when API keys are skipped during OpenClaw migration (#1580)

When --migrate-secrets is not passed (the default), API keys like
OPENROUTER_API_KEY are silently skipped with no warning. Users don't
realize their keys weren't migrated until the agent fails to connect.

Add a post-migration warning with actionable instructions: either
re-run with --migrate-secrets or add the key manually via
hermes config set.

Cherry-picked from PR #1593 by ygd58.

* fix(security): block sandbox backend creds from subprocess env (#1264)

Add Modal and Daytona sandbox credentials to the subprocess env
blocklist so they're not leaked to agent terminal sessions via
printenv/env.

Cherry-picked from PR #1571 by ygd58.

* fix(gateway): cap interrupt recursion depth to prevent resource exhaustion (#816)

When a user sends multiple messages while the agent keeps failing,
_run_agent() calls itself recursively with no depth limit. This can
exhaust stack/memory if the agent is in a failure loop.

Add _MAX_INTERRUPT_DEPTH = 3. When exceeded, the pending message is
logged and the current result is returned instead of recursing deeper.

The log handler duplication bug described in #816 was already fixed
separately (AIAgent.__init__ deduplicates handlers).

* fix(gateway): /model shows active fallback model instead of config default (#1615)

When the agent falls back to a different model (e.g. due to rate
limiting), /model still showed the config default. Now tracks the
effective model/provider after each agent run and displays it.

Cleared when the primary model succeeds again or the user explicitly
switches via /model.

Cherry-picked from PR #1616 by MaxKerkula. Added hasattr guard for
test compatibility.

---------

Co-authored-by: buray <ygd58@users.noreply.github.com>
Co-authored-by: lbn <llbn@users.noreply.github.com>
Co-authored-by: crazywriter1 <53251494+crazywriter1@users.noreply.github.com>
Co-authored-by: Max K <MaxKerkula@users.noreply.github.com>
											
										
										
											2026-03-17 02:26:51 -07:00
+								            _agent = agent_holder[0]
-												fix: don't evict cached agent on failed runs — prevents MCP restart loop (#7539)

* fix: circuit breaker stops CPU-burning restart loops on persistent errors

When a gateway session hits a non-retryable error (e.g. invalid model
ID → HTTP 400), the agent fails and returns. But if the session keeps
receiving messages (or something periodically recreates agents), each
attempt spawns a new AIAgent — reinitializing MCP server connections,
burning CPU — only to hit the same 400 error again. On a 4-core server,
this pegs an entire core per stuck session and accumulates 300+ minutes
of CPU time over hours.

Fix: add a per-session consecutive failure counter in the gateway runner.

- Track consecutive non-retryable failures per session key
- After 3 consecutive failures (_MAX_CONSECUTIVE_FAILURES), block
  further agent creation for that session and notify the user:
  '⚠️ This session has failed N times in a row with a non-retryable
  error. Use /reset to start a new session.'
- Evict the cached agent when the circuit breaker engages to prevent
  stale state from accumulating
- Reset the counter on successful agent runs
- Clear the counter on /reset and /new so users can recover
- Uses getattr() pattern so bare GatewayRunner instances (common in
  tests using object.__new__) don't crash

Tests:
- 8 new tests in test_circuit_breaker.py covering counter behavior,
  threshold, reset, session isolation, and bare-runner safety

Addresses #7130.

* Revert "fix: circuit breaker stops CPU-burning restart loops on persistent errors"

This reverts commit d848ea7109d62a2fc4ba6da36fc4f0366b5ded94.

* fix: don't evict cached agent on failed runs — prevents MCP restart loop

When a run fails (e.g. invalid model ID → 400) and fallback activated,
the gateway was evicting the cached agent to 'retry primary next time.'
But evicting a failed agent forces a full AIAgent recreation on the next
message — reinitializing MCP server connections, spawning stdio
processes — only to hit the same 400 again. This created a CPU-burning
loop (91%+ for hours, #7130).

The fix: add `and not _run_failed` to the fallback-eviction check.
Failed runs keep the cached agent. The next message reuses it (no MCP
reinit), hits the same error, returns it to the user quickly. The user
can /reset or /model to fix their config.

Successful fallback runs still evict as before so the next message
retries the primary model.

Addresses #7130.
											
										
										
											2026-04-10 21:16:56 -07:00
+								            _result_for_fb = result_holder[0]
 								            _run_failed = _result_for_fb.get("failed") if _result_for_fb else False
 								            if _agent is not None and hasattr(_agent, 'model') and not _run_failed:
-												fix(gateway): /model shows active fallback model instead of config default (#1615)

* fix: prevent infinite 400 failure loop on context overflow (#1630)

When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message.  This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error.  Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.

Three-layer fix:

1. run_agent.py — Fallback heuristic: when a 400 error has a very short
   generic message AND the session is large (>40% of context or >80
   messages), treat it as a probable context overflow and trigger
   compression instead of aborting.

2. run_agent.py + gateway/run.py — Don't persist failed messages:
   when the agent returns failed=True before generating any response,
   skip writing the user's message to the transcript/DB. This prevents
   the session from growing on each failure.

3. gateway/run.py — Smarter error messages: detect context-overflow
   failures and suggest /compact or /reset specifically, instead of a
   generic 'try again' that will fail identically.

* fix(skills): detect prompt injection patterns and block cache file reads

Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):

1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
   (index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
   was the original injection vector — untrusted skill descriptions
   in the catalog contained adversarial text that the model executed.

2. skill_view: warns when skills are loaded from outside the trusted
   ~/.hermes/skills/ directory, and detects common injection patterns
   in skill content ("ignore previous instructions", "<system>", etc.).

Cherry-picked from PR #1562 by ygd58.

* fix(tools): chunk long messages in send_message_tool before dispatch (#1552)

Long messages sent via send_message tool or cron delivery silently
failed when exceeding platform limits. Gateway adapters handle this
via truncate_message(), but the standalone senders in send_message_tool
bypassed that entirely.

- Apply truncate_message() chunking in _send_to_platform() before
  dispatching to individual platform senders
- Remove naive message[i:i+2000] character split in _send_discord()
  in favor of centralized smart splitting
- Attach media files to last chunk only for Telegram
- Add regression tests for chunking and media placement

Cherry-picked from PR #1557 by llbn.

* fix(approval): show full command in dangerous command approval (#1553)

Previously the command was truncated to 80 chars in CLI (with a
[v]iew full option), 500 chars in Discord embeds, and missing entirely
in Telegram/Slack approval messages. Now the full command is always
displayed everywhere:

- CLI: removed 80-char truncation and [v]iew full menu option
- Gateway (TG/Slack): approval_required message includes full command
  in a code block
- Discord: embed shows full command up to 4096-char limit
- Windows: skip SIGALRM-based test timeout (Unix-only)
- Updated tests: replaced view-flow tests with direct approval tests

Cherry-picked from PR #1566 by crazywriter1.

* fix(cli): flush stdout during agent loop to prevent macOS display freeze (#1624)

The interrupt polling loop in chat() waited on the queue without
invalidating the prompt_toolkit renderer. On macOS, the StdoutProxy
buffer only flushed on input events, causing the CLI to appear frozen
during tool execution until the user typed a key.

Fix: call _invalidate() on each queue timeout (every ~100ms, throttled
to 150ms) to force the renderer to flush buffered agent output.

* fix(claw): warn when API keys are skipped during OpenClaw migration (#1580)

When --migrate-secrets is not passed (the default), API keys like
OPENROUTER_API_KEY are silently skipped with no warning. Users don't
realize their keys weren't migrated until the agent fails to connect.

Add a post-migration warning with actionable instructions: either
re-run with --migrate-secrets or add the key manually via
hermes config set.

Cherry-picked from PR #1593 by ygd58.

* fix(security): block sandbox backend creds from subprocess env (#1264)

Add Modal and Daytona sandbox credentials to the subprocess env
blocklist so they're not leaked to agent terminal sessions via
printenv/env.

Cherry-picked from PR #1571 by ygd58.

* fix(gateway): cap interrupt recursion depth to prevent resource exhaustion (#816)

When a user sends multiple messages while the agent keeps failing,
_run_agent() calls itself recursively with no depth limit. This can
exhaust stack/memory if the agent is in a failure loop.

Add _MAX_INTERRUPT_DEPTH = 3. When exceeded, the pending message is
logged and the current result is returned instead of recursing deeper.

The log handler duplication bug described in #816 was already fixed
separately (AIAgent.__init__ deduplicates handlers).

* fix(gateway): /model shows active fallback model instead of config default (#1615)

When the agent falls back to a different model (e.g. due to rate
limiting), /model still showed the config default. Now tracks the
effective model/provider after each agent run and displays it.

Cleared when the primary model succeeds again or the user explicitly
switches via /model.

Cherry-picked from PR #1616 by MaxKerkula. Added hasattr guard for
test compatibility.

---------

Co-authored-by: buray <ygd58@users.noreply.github.com>
Co-authored-by: lbn <llbn@users.noreply.github.com>
Co-authored-by: crazywriter1 <53251494+crazywriter1@users.noreply.github.com>
Co-authored-by: Max K <MaxKerkula@users.noreply.github.com>
											
										
										
											2026-03-17 02:26:51 -07:00
+								                _cfg_model = _resolve_gateway_model()
-												fix(gateway): apply /model session overrides so switch persists across messages

The gateway /model command stored session overrides in
_session_model_overrides but run_sync() never consulted them when
resolving the model and runtime for the next message.  It always read
from config.yaml, so the switch was lost as soon as a new agent was
created.

Two fixes:

1. In run_sync(), apply _session_model_overrides after resolving from
   config.yaml/env — the override takes precedence for model, provider,
   api_key, base_url, and api_mode.

2. In post-run fallback detection, check whether the model mismatch
   (agent.model != config_model) is due to an intentional /model switch
   before evicting the cached agent.  Without this, the first message
   after /model would work (cached agent reused) but the fallback
   detector would evict it, causing the next message to revert.

Affects all gateway platforms (Telegram, Discord, Slack, WhatsApp,
Signal, Matrix, BlueBubbles, HomeAssistant) since they all share
GatewayRunner._run_agent().

Fixes #6213

											
										
										
											2026-04-09 22:26:32 +05:30
+								                if _agent.model != _cfg_model and not self._is_intentional_model_switch(session_key, _agent.model):
-												fix: don't evict cached agent on failed runs — prevents MCP restart loop (#7539)

* fix: circuit breaker stops CPU-burning restart loops on persistent errors

When a gateway session hits a non-retryable error (e.g. invalid model
ID → HTTP 400), the agent fails and returns. But if the session keeps
receiving messages (or something periodically recreates agents), each
attempt spawns a new AIAgent — reinitializing MCP server connections,
burning CPU — only to hit the same 400 error again. On a 4-core server,
this pegs an entire core per stuck session and accumulates 300+ minutes
of CPU time over hours.

Fix: add a per-session consecutive failure counter in the gateway runner.

- Track consecutive non-retryable failures per session key
- After 3 consecutive failures (_MAX_CONSECUTIVE_FAILURES), block
  further agent creation for that session and notify the user:
  '⚠️ This session has failed N times in a row with a non-retryable
  error. Use /reset to start a new session.'
- Evict the cached agent when the circuit breaker engages to prevent
  stale state from accumulating
- Reset the counter on successful agent runs
- Clear the counter on /reset and /new so users can recover
- Uses getattr() pattern so bare GatewayRunner instances (common in
  tests using object.__new__) don't crash

Tests:
- 8 new tests in test_circuit_breaker.py covering counter behavior,
  threshold, reset, session isolation, and bare-runner safety

Addresses #7130.

* Revert "fix: circuit breaker stops CPU-burning restart loops on persistent errors"

This reverts commit d848ea7109d62a2fc4ba6da36fc4f0366b5ded94.

* fix: don't evict cached agent on failed runs — prevents MCP restart loop

When a run fails (e.g. invalid model ID → 400) and fallback activated,
the gateway was evicting the cached agent to 'retry primary next time.'
But evicting a failed agent forces a full AIAgent recreation on the next
message — reinitializing MCP server connections, spawning stdio
processes — only to hit the same 400 again. This created a CPU-burning
loop (91%+ for hours, #7130).

The fix: add `and not _run_failed` to the fallback-eviction check.
Failed runs keep the cached agent. The next message reuses it (no MCP
reinit), hits the same error, returns it to the user quickly. The user
can /reset or /model to fix their config.

Successful fallback runs still evict as before so the next message
retries the primary model.

Addresses #7130.
											
										
										
											2026-04-10 21:16:56 -07:00
+								                    # Fallback activated on a successful run — evict cached
 								                    # agent so the next message retries the primary model.
-												feat(gateway): cache AIAgent per session for prompt caching

The gateway created a fresh AIAgent per message, rebuilding the system
prompt (including memory, skills, context files) every turn. This broke
prompt prefix caching — providers like Anthropic charge ~10x more for
uncached prefixes.

Now caches AIAgent instances per session_key with a config signature.
The cached agent is reused across messages in the same session,
preserving the frozen system prompt and tool schemas. Cache is
invalidated when:
- Config changes (model, provider, toolsets, reasoning, ephemeral
  prompt) — detected via signature mismatch
- /new, /reset, /clear — explicit session reset
- /model — global model change clears all cached agents
- /reasoning — global reasoning change clears all cached agents

Per-message state (callbacks, stream consumers, progress queues) is
set on the agent instance before each run_conversation() call.

This matches CLI behavior where a single AIAgent lives across all turns
in a session, with _cached_system_prompt built once and reused.

											
										
										
											2026-03-21 13:07:08 -07:00
+								                    self._evict_cached_agent(session_key)
-												fix(gateway): /model shows active fallback model instead of config default (#1615)

* fix: prevent infinite 400 failure loop on context overflow (#1630)

When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message.  This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error.  Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.

Three-layer fix:

1. run_agent.py — Fallback heuristic: when a 400 error has a very short
   generic message AND the session is large (>40% of context or >80
   messages), treat it as a probable context overflow and trigger
   compression instead of aborting.

2. run_agent.py + gateway/run.py — Don't persist failed messages:
   when the agent returns failed=True before generating any response,
   skip writing the user's message to the transcript/DB. This prevents
   the session from growing on each failure.

3. gateway/run.py — Smarter error messages: detect context-overflow
   failures and suggest /compact or /reset specifically, instead of a
   generic 'try again' that will fail identically.

* fix(skills): detect prompt injection patterns and block cache file reads

Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):

1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
   (index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
   was the original injection vector — untrusted skill descriptions
   in the catalog contained adversarial text that the model executed.

2. skill_view: warns when skills are loaded from outside the trusted
   ~/.hermes/skills/ directory, and detects common injection patterns
   in skill content ("ignore previous instructions", "<system>", etc.).

Cherry-picked from PR #1562 by ygd58.

* fix(tools): chunk long messages in send_message_tool before dispatch (#1552)

Long messages sent via send_message tool or cron delivery silently
failed when exceeding platform limits. Gateway adapters handle this
via truncate_message(), but the standalone senders in send_message_tool
bypassed that entirely.

- Apply truncate_message() chunking in _send_to_platform() before
  dispatching to individual platform senders
- Remove naive message[i:i+2000] character split in _send_discord()
  in favor of centralized smart splitting
- Attach media files to last chunk only for Telegram
- Add regression tests for chunking and media placement

Cherry-picked from PR #1557 by llbn.

* fix(approval): show full command in dangerous command approval (#1553)

Previously the command was truncated to 80 chars in CLI (with a
[v]iew full option), 500 chars in Discord embeds, and missing entirely
in Telegram/Slack approval messages. Now the full command is always
displayed everywhere:

- CLI: removed 80-char truncation and [v]iew full menu option
- Gateway (TG/Slack): approval_required message includes full command
  in a code block
- Discord: embed shows full command up to 4096-char limit
- Windows: skip SIGALRM-based test timeout (Unix-only)
- Updated tests: replaced view-flow tests with direct approval tests

Cherry-picked from PR #1566 by crazywriter1.

* fix(cli): flush stdout during agent loop to prevent macOS display freeze (#1624)

The interrupt polling loop in chat() waited on the queue without
invalidating the prompt_toolkit renderer. On macOS, the StdoutProxy
buffer only flushed on input events, causing the CLI to appear frozen
during tool execution until the user typed a key.

Fix: call _invalidate() on each queue timeout (every ~100ms, throttled
to 150ms) to force the renderer to flush buffered agent output.

* fix(claw): warn when API keys are skipped during OpenClaw migration (#1580)

When --migrate-secrets is not passed (the default), API keys like
OPENROUTER_API_KEY are silently skipped with no warning. Users don't
realize their keys weren't migrated until the agent fails to connect.

Add a post-migration warning with actionable instructions: either
re-run with --migrate-secrets or add the key manually via
hermes config set.

Cherry-picked from PR #1593 by ygd58.

* fix(security): block sandbox backend creds from subprocess env (#1264)

Add Modal and Daytona sandbox credentials to the subprocess env
blocklist so they're not leaked to agent terminal sessions via
printenv/env.

Cherry-picked from PR #1571 by ygd58.

* fix(gateway): cap interrupt recursion depth to prevent resource exhaustion (#816)

When a user sends multiple messages while the agent keeps failing,
_run_agent() calls itself recursively with no depth limit. This can
exhaust stack/memory if the agent is in a failure loop.

Add _MAX_INTERRUPT_DEPTH = 3. When exceeded, the pending message is
logged and the current result is returned instead of recursing deeper.

The log handler duplication bug described in #816 was already fixed
separately (AIAgent.__init__ deduplicates handlers).

* fix(gateway): /model shows active fallback model instead of config default (#1615)

When the agent falls back to a different model (e.g. due to rate
limiting), /model still showed the config default. Now tracks the
effective model/provider after each agent run and displays it.

Cleared when the primary model succeeds again or the user explicitly
switches via /model.

Cherry-picked from PR #1616 by MaxKerkula. Added hasattr guard for
test compatibility.

---------

Co-authored-by: buray <ygd58@users.noreply.github.com>
Co-authored-by: lbn <llbn@users.noreply.github.com>
Co-authored-by: crazywriter1 <53251494+crazywriter1@users.noreply.github.com>
Co-authored-by: Max K <MaxKerkula@users.noreply.github.com>
											
										
										
											2026-03-17 02:26:51 -07:00
-												fix(gateway): process /queue'd messages after agent completion (#2469)

* fix: respect DashScope v1 runtime mode for alibaba

Remove the hardcoded Alibaba branch from resolve_runtime_provider()
that forced api_mode='anthropic_messages' regardless of the base URL.

Alibaba now goes through the generic API-key provider path, which
auto-detects the protocol from the URL:
- /apps/anthropic → anthropic_messages (via endswith check)
- /v1 → chat_completions (default)

This fixes Alibaba setup with OpenAI-compatible DashScope endpoints
(e.g. coding-intl.dashscope.aliyuncs.com/v1) that were broken because
runtime always forced Anthropic mode even when setup saved a /v1 URL.

Based on PR #2024 by @kshitijk4poor.

* docs(skill): add split, merge, search examples to ocr-and-documents skill

Adds pymupdf examples for PDF splitting, merging, and text search
to the existing ocr-and-documents skill. No new dependencies — pymupdf
already covers all three operations natively.

* fix: replace all production print() calls with logger in rl_training_tool

Replace all bare print() calls in production code paths with proper logger calls.

- Add `import logging` and module-level `logger = logging.getLogger(__name__)`
- Replace print() in _start_training_run() with logger.info()
- Replace print() in _stop_training_run() with logger.info()
- Replace print(Warning/Note) calls with logger.warning() and logger.info()

Using the logging framework allows log level filtering, proper formatting,
and log routing instead of always printing to stdout.

* fix(gateway): process /queue'd messages after agent completion

/queue stored messages in adapter._pending_messages but never consumed
them after normal (non-interrupted) completion. The consumption path
at line 5219 only checked pending messages when result.get('interrupted')
was True — since /queue deliberately doesn't interrupt, queued messages
were silently dropped.

Now checks adapter._pending_messages after both interrupted AND normal
completion. For queued messages (non-interrupt), the first response is
delivered before recursing to process the queued follow-up. Skips the
direct send when streaming already delivered the response.

Reported by GhostMode on Discord.

---------

Co-authored-by: kshitijk4poor <kshitijk4poor@users.noreply.github.com>
Co-authored-by: memosr.eth <96793918+memosr@users.noreply.github.com>
											
										
										
											2026-03-22 04:56:13 -07:00
+								            # Check if we were interrupted OR have a queued message (/queue).
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            result = result_holder[0]
 								            adapter = self.adapters.get(source.platform)
-												fix(gateway): process /queue'd messages after agent completion (#2469)

* fix: respect DashScope v1 runtime mode for alibaba

Remove the hardcoded Alibaba branch from resolve_runtime_provider()
that forced api_mode='anthropic_messages' regardless of the base URL.

Alibaba now goes through the generic API-key provider path, which
auto-detects the protocol from the URL:
- /apps/anthropic → anthropic_messages (via endswith check)
- /v1 → chat_completions (default)

This fixes Alibaba setup with OpenAI-compatible DashScope endpoints
(e.g. coding-intl.dashscope.aliyuncs.com/v1) that were broken because
runtime always forced Anthropic mode even when setup saved a /v1 URL.

Based on PR #2024 by @kshitijk4poor.

* docs(skill): add split, merge, search examples to ocr-and-documents skill

Adds pymupdf examples for PDF splitting, merging, and text search
to the existing ocr-and-documents skill. No new dependencies — pymupdf
already covers all three operations natively.

* fix: replace all production print() calls with logger in rl_training_tool

Replace all bare print() calls in production code paths with proper logger calls.

- Add `import logging` and module-level `logger = logging.getLogger(__name__)`
- Replace print() in _start_training_run() with logger.info()
- Replace print() in _stop_training_run() with logger.info()
- Replace print(Warning/Note) calls with logger.warning() and logger.info()

Using the logging framework allows log level filtering, proper formatting,
and log routing instead of always printing to stdout.

* fix(gateway): process /queue'd messages after agent completion

/queue stored messages in adapter._pending_messages but never consumed
them after normal (non-interrupted) completion. The consumption path
at line 5219 only checked pending messages when result.get('interrupted')
was True — since /queue deliberately doesn't interrupt, queued messages
were silently dropped.

Now checks adapter._pending_messages after both interrupted AND normal
completion. For queued messages (non-interrupt), the first response is
delivered before recursing to process the queued follow-up. Skips the
direct send when streaming already delivered the response.

Reported by GhostMode on Discord.

---------

Co-authored-by: kshitijk4poor <kshitijk4poor@users.noreply.github.com>
Co-authored-by: memosr.eth <96793918+memosr@users.noreply.github.com>
											
										
										
											2026-03-22 04:56:13 -07:00
+								            # Get pending message from adapter.
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            # Use session_key (not source.chat_id) to match adapter's storage keys.
-												fix(gateway): preserve queued voice events for STT

											
										
										
											2026-04-11 21:36:05 +01:00
+								            pending_event = None
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            pending = None
-												fix(gateway): process /queue'd messages after agent completion (#2469)

* fix: respect DashScope v1 runtime mode for alibaba

Remove the hardcoded Alibaba branch from resolve_runtime_provider()
that forced api_mode='anthropic_messages' regardless of the base URL.

Alibaba now goes through the generic API-key provider path, which
auto-detects the protocol from the URL:
- /apps/anthropic → anthropic_messages (via endswith check)
- /v1 → chat_completions (default)

This fixes Alibaba setup with OpenAI-compatible DashScope endpoints
(e.g. coding-intl.dashscope.aliyuncs.com/v1) that were broken because
runtime always forced Anthropic mode even when setup saved a /v1 URL.

Based on PR #2024 by @kshitijk4poor.

* docs(skill): add split, merge, search examples to ocr-and-documents skill

Adds pymupdf examples for PDF splitting, merging, and text search
to the existing ocr-and-documents skill. No new dependencies — pymupdf
already covers all three operations natively.

* fix: replace all production print() calls with logger in rl_training_tool

Replace all bare print() calls in production code paths with proper logger calls.

- Add `import logging` and module-level `logger = logging.getLogger(__name__)`
- Replace print() in _start_training_run() with logger.info()
- Replace print() in _stop_training_run() with logger.info()
- Replace print(Warning/Note) calls with logger.warning() and logger.info()

Using the logging framework allows log level filtering, proper formatting,
and log routing instead of always printing to stdout.

* fix(gateway): process /queue'd messages after agent completion

/queue stored messages in adapter._pending_messages but never consumed
them after normal (non-interrupted) completion. The consumption path
at line 5219 only checked pending messages when result.get('interrupted')
was True — since /queue deliberately doesn't interrupt, queued messages
were silently dropped.

Now checks adapter._pending_messages after both interrupted AND normal
completion. For queued messages (non-interrupt), the first response is
delivered before recursing to process the queued follow-up. Skips the
direct send when streaming already delivered the response.

Reported by GhostMode on Discord.

---------

Co-authored-by: kshitijk4poor <kshitijk4poor@users.noreply.github.com>
Co-authored-by: memosr.eth <96793918+memosr@users.noreply.github.com>
											
										
										
											2026-03-22 04:56:13 -07:00
+								            if result and adapter and session_key:
-												fix(gateway): preserve queued voice events for STT

											
										
										
											2026-04-11 21:36:05 +01:00
+								                pending_event = _dequeue_pending_event(adapter, session_key)
 								                if result.get("interrupted") and not pending_event and result.get("interrupt_message"):
 								                    pending = result.get("interrupt_message")
 								                elif pending_event:
 								                    pending = pending_event.text or _build_media_placeholder(pending_event)
 								                    logger.debug("Processing queued message after agent completion: '%s...'", pending[:40])
-												fix(gateway): /stop and /new bypass Level 1 active-session guard (#5765)

* fix(gateway): /stop and /new bypass Level 1 active-session guard

The base adapter's Level 1 guard intercepted ALL messages while an
agent was running, including /stop and /new. These commands were queued
as pending messages instead of being dispatched to the gateway runner's
Level 2 handler. When the agent eventually stopped (via the interrupt
mechanism), the command text leaked into the conversation as a user
message — the model would receive '/stop' as input and respond to it.

Fix: Add /stop, /new, and /reset to the bypass set in base.py alongside
/approve, /deny, and /status. Consolidate the three separate bypass
blocks into one. Commands in the bypass set are dispatched inline to the
gateway runner, where Level 2 handles them correctly (hard-kill for
/stop, session reset for /new).

Also add a safety net in _run_agent's pending-message processing: if the
pending text resolves to a known slash command, discard it instead of
passing it to the agent. This catches edge cases where command text
leaks through the interrupt_message fallback.

Refs: #5244

* test: regression tests for command bypass of active-session guard

17 tests covering:
- /stop, /new, /reset bypass the Level 1 guard when agent is running
- /approve, /deny, /status bypass (existing behavior, now tested)
- Regular text and unknown commands still queued (not bypassed)
- File paths like '/path/to/file' not treated as commands
- Telegram @botname suffix handled correctly
- Safety net command resolution (resolve_command detects known commands)
											
										
										
											2026-04-07 00:53:45 -07:00
+								            # Safety net: if the pending text is a slash command (e.g. "/stop",
 								            # "/new"), discard it — commands should never be passed to the agent
 								            # as user input.  The primary fix is in base.py (commands bypass the
 								            # active-session guard), but this catches edge cases where command
 								            # text leaks through the interrupt_message fallback.
 								            if pending and pending.strip().startswith("/"):
 								                _pending_parts = pending.strip().split(None, 1)
 								                _pending_cmd_word = _pending_parts[0][1:].lower() if _pending_parts else ""
 								                if _pending_cmd_word:
 								                    try:
 								                        from hermes_cli.commands import resolve_command as _rc_pending
 								                        if _rc_pending(_pending_cmd_word):
 								                            logger.info(
 								                                "Discarding command '/%s' from pending queue — "
 								                                "commands must not be passed as agent input",
 								                                _pending_cmd_word,
 								                            )
-												fix(gateway): preserve queued voice events for STT

											
										
										
											2026-04-11 21:36:05 +01:00
+								                            pending_event = None
-												fix(gateway): /stop and /new bypass Level 1 active-session guard (#5765)

* fix(gateway): /stop and /new bypass Level 1 active-session guard

The base adapter's Level 1 guard intercepted ALL messages while an
agent was running, including /stop and /new. These commands were queued
as pending messages instead of being dispatched to the gateway runner's
Level 2 handler. When the agent eventually stopped (via the interrupt
mechanism), the command text leaked into the conversation as a user
message — the model would receive '/stop' as input and respond to it.

Fix: Add /stop, /new, and /reset to the bypass set in base.py alongside
/approve, /deny, and /status. Consolidate the three separate bypass
blocks into one. Commands in the bypass set are dispatched inline to the
gateway runner, where Level 2 handles them correctly (hard-kill for
/stop, session reset for /new).

Also add a safety net in _run_agent's pending-message processing: if the
pending text resolves to a known slash command, discard it instead of
passing it to the agent. This catches edge cases where command text
leaks through the interrupt_message fallback.

Refs: #5244

* test: regression tests for command bypass of active-session guard

17 tests covering:
- /stop, /new, /reset bypass the Level 1 guard when agent is running
- /approve, /deny, /status bypass (existing behavior, now tested)
- Regular text and unknown commands still queued (not bypassed)
- File paths like '/path/to/file' not treated as commands
- Telegram @botname suffix handled correctly
- Safety net command resolution (resolve_command detects known commands)
											
										
										
											2026-04-07 00:53:45 -07:00
+								                            pending = None
 								                    except Exception:
 								                        pass
-												fix(gateway): preserve queued voice events for STT

											
										
										
											2026-04-11 21:36:05 +01:00
+								            if self._draining and (pending_event or pending):
-												fix(gateway): drain in-flight work before restart

											
										
										
											2026-04-10 10:19:17 -07:00
+								                logger.info(
 								                    "Discarding pending follow-up for session %s during gateway %s",
 								                    session_key[:20] if session_key else "?",
 								                    self._status_action_label(),
 								                )
-												fix(gateway): preserve queued voice events for STT

											
										
										
											2026-04-11 21:36:05 +01:00
+								                pending_event = None
-												fix(gateway): drain in-flight work before restart

											
										
										
											2026-04-10 10:19:17 -07:00
+								                pending = None
-												fix(gateway): preserve queued voice events for STT

											
										
										
											2026-04-11 21:36:05 +01:00
+								            if pending_event or pending:
-												fix(gateway): process /queue'd messages after agent completion (#2469)

* fix: respect DashScope v1 runtime mode for alibaba

Remove the hardcoded Alibaba branch from resolve_runtime_provider()
that forced api_mode='anthropic_messages' regardless of the base URL.

Alibaba now goes through the generic API-key provider path, which
auto-detects the protocol from the URL:
- /apps/anthropic → anthropic_messages (via endswith check)
- /v1 → chat_completions (default)

This fixes Alibaba setup with OpenAI-compatible DashScope endpoints
(e.g. coding-intl.dashscope.aliyuncs.com/v1) that were broken because
runtime always forced Anthropic mode even when setup saved a /v1 URL.

Based on PR #2024 by @kshitijk4poor.

* docs(skill): add split, merge, search examples to ocr-and-documents skill

Adds pymupdf examples for PDF splitting, merging, and text search
to the existing ocr-and-documents skill. No new dependencies — pymupdf
already covers all three operations natively.

* fix: replace all production print() calls with logger in rl_training_tool

Replace all bare print() calls in production code paths with proper logger calls.

- Add `import logging` and module-level `logger = logging.getLogger(__name__)`
- Replace print() in _start_training_run() with logger.info()
- Replace print() in _stop_training_run() with logger.info()
- Replace print(Warning/Note) calls with logger.warning() and logger.info()

Using the logging framework allows log level filtering, proper formatting,
and log routing instead of always printing to stdout.

* fix(gateway): process /queue'd messages after agent completion

/queue stored messages in adapter._pending_messages but never consumed
them after normal (non-interrupted) completion. The consumption path
at line 5219 only checked pending messages when result.get('interrupted')
was True — since /queue deliberately doesn't interrupt, queued messages
were silently dropped.

Now checks adapter._pending_messages after both interrupted AND normal
completion. For queued messages (non-interrupt), the first response is
delivered before recursing to process the queued follow-up. Skips the
direct send when streaming already delivered the response.

Reported by GhostMode on Discord.

---------

Co-authored-by: kshitijk4poor <kshitijk4poor@users.noreply.github.com>
Co-authored-by: memosr.eth <96793918+memosr@users.noreply.github.com>
											
										
										
											2026-03-22 04:56:13 -07:00
+								                logger.debug("Processing pending message: '%s...'", pending[:40])
-												fix(gateway): preserve queued voice events for STT

											
										
										
											2026-04-11 21:36:05 +01:00
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                # Clear the adapter's interrupt event so the next _run_agent call
 								                # doesn't immediately re-trigger the interrupt before the new agent
 								                # even makes its first API call (this was causing an infinite loop).
 								                if adapter and hasattr(adapter, '_active_sessions') and session_key and session_key in adapter._active_sessions:
 								                    adapter._active_sessions[session_key].clear()
-												fix(gateway): preserve queued voice events for STT

											
										
										
											2026-04-11 21:36:05 +01:00
-												fix(gateway): cap interrupt recursion depth to prevent resource exhaustion (#816)

* fix: prevent infinite 400 failure loop on context overflow (#1630)

When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message.  This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error.  Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.

Three-layer fix:

1. run_agent.py — Fallback heuristic: when a 400 error has a very short
   generic message AND the session is large (>40% of context or >80
   messages), treat it as a probable context overflow and trigger
   compression instead of aborting.

2. run_agent.py + gateway/run.py — Don't persist failed messages:
   when the agent returns failed=True before generating any response,
   skip writing the user's message to the transcript/DB. This prevents
   the session from growing on each failure.

3. gateway/run.py — Smarter error messages: detect context-overflow
   failures and suggest /compact or /reset specifically, instead of a
   generic 'try again' that will fail identically.

* fix(skills): detect prompt injection patterns and block cache file reads

Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):

1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
   (index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
   was the original injection vector — untrusted skill descriptions
   in the catalog contained adversarial text that the model executed.

2. skill_view: warns when skills are loaded from outside the trusted
   ~/.hermes/skills/ directory, and detects common injection patterns
   in skill content ("ignore previous instructions", "<system>", etc.).

Cherry-picked from PR #1562 by ygd58.

* fix(tools): chunk long messages in send_message_tool before dispatch (#1552)

Long messages sent via send_message tool or cron delivery silently
failed when exceeding platform limits. Gateway adapters handle this
via truncate_message(), but the standalone senders in send_message_tool
bypassed that entirely.

- Apply truncate_message() chunking in _send_to_platform() before
  dispatching to individual platform senders
- Remove naive message[i:i+2000] character split in _send_discord()
  in favor of centralized smart splitting
- Attach media files to last chunk only for Telegram
- Add regression tests for chunking and media placement

Cherry-picked from PR #1557 by llbn.

* fix(approval): show full command in dangerous command approval (#1553)

Previously the command was truncated to 80 chars in CLI (with a
[v]iew full option), 500 chars in Discord embeds, and missing entirely
in Telegram/Slack approval messages. Now the full command is always
displayed everywhere:

- CLI: removed 80-char truncation and [v]iew full menu option
- Gateway (TG/Slack): approval_required message includes full command
  in a code block
- Discord: embed shows full command up to 4096-char limit
- Windows: skip SIGALRM-based test timeout (Unix-only)
- Updated tests: replaced view-flow tests with direct approval tests

Cherry-picked from PR #1566 by crazywriter1.

* fix(cli): flush stdout during agent loop to prevent macOS display freeze (#1624)

The interrupt polling loop in chat() waited on the queue without
invalidating the prompt_toolkit renderer. On macOS, the StdoutProxy
buffer only flushed on input events, causing the CLI to appear frozen
during tool execution until the user typed a key.

Fix: call _invalidate() on each queue timeout (every ~100ms, throttled
to 150ms) to force the renderer to flush buffered agent output.

* fix(claw): warn when API keys are skipped during OpenClaw migration (#1580)

When --migrate-secrets is not passed (the default), API keys like
OPENROUTER_API_KEY are silently skipped with no warning. Users don't
realize their keys weren't migrated until the agent fails to connect.

Add a post-migration warning with actionable instructions: either
re-run with --migrate-secrets or add the key manually via
hermes config set.

Cherry-picked from PR #1593 by ygd58.

* fix(security): block sandbox backend creds from subprocess env (#1264)

Add Modal and Daytona sandbox credentials to the subprocess env
blocklist so they're not leaked to agent terminal sessions via
printenv/env.

Cherry-picked from PR #1571 by ygd58.

* fix(gateway): cap interrupt recursion depth to prevent resource exhaustion (#816)

When a user sends multiple messages while the agent keeps failing,
_run_agent() calls itself recursively with no depth limit. This can
exhaust stack/memory if the agent is in a failure loop.

Add _MAX_INTERRUPT_DEPTH = 3. When exceeded, the pending message is
logged and the current result is returned instead of recursing deeper.

The log handler duplication bug described in #816 was already fixed
separately (AIAgent.__init__ deduplicates handlers).

---------

Co-authored-by: buray <ygd58@users.noreply.github.com>
Co-authored-by: lbn <llbn@users.noreply.github.com>
Co-authored-by: crazywriter1 <53251494+crazywriter1@users.noreply.github.com>
											
										
										
											2026-03-17 02:23:07 -07:00
+								                # Cap recursion depth to prevent resource exhaustion when the
 								                # user sends multiple messages while the agent keeps failing. (#816)
 								                if _interrupt_depth >= self._MAX_INTERRUPT_DEPTH:
 								                    logger.warning(
 								                        "Interrupt recursion depth %d reached for session %s — "
 								                        "queueing message instead of recursing.",
 								                        _interrupt_depth, session_key,
 								                    )
 								                    adapter = self.adapters.get(source.platform)
-												fix(gateway): preserve queued voice events for STT

											
										
										
											2026-04-11 21:36:05 +01:00
+								                    if adapter and pending_event:
 								                        merge_pending_message_event(adapter._pending_messages, session_key, pending_event)
 								                    elif adapter and hasattr(adapter, 'queue_message'):
-												fix(gateway): cap interrupt recursion depth to prevent resource exhaustion (#816)

* fix: prevent infinite 400 failure loop on context overflow (#1630)

When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message.  This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error.  Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.

Three-layer fix:

1. run_agent.py — Fallback heuristic: when a 400 error has a very short
   generic message AND the session is large (>40% of context or >80
   messages), treat it as a probable context overflow and trigger
   compression instead of aborting.

2. run_agent.py + gateway/run.py — Don't persist failed messages:
   when the agent returns failed=True before generating any response,
   skip writing the user's message to the transcript/DB. This prevents
   the session from growing on each failure.

3. gateway/run.py — Smarter error messages: detect context-overflow
   failures and suggest /compact or /reset specifically, instead of a
   generic 'try again' that will fail identically.

* fix(skills): detect prompt injection patterns and block cache file reads

Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):

1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
   (index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
   was the original injection vector — untrusted skill descriptions
   in the catalog contained adversarial text that the model executed.

2. skill_view: warns when skills are loaded from outside the trusted
   ~/.hermes/skills/ directory, and detects common injection patterns
   in skill content ("ignore previous instructions", "<system>", etc.).

Cherry-picked from PR #1562 by ygd58.

* fix(tools): chunk long messages in send_message_tool before dispatch (#1552)

Long messages sent via send_message tool or cron delivery silently
failed when exceeding platform limits. Gateway adapters handle this
via truncate_message(), but the standalone senders in send_message_tool
bypassed that entirely.

- Apply truncate_message() chunking in _send_to_platform() before
  dispatching to individual platform senders
- Remove naive message[i:i+2000] character split in _send_discord()
  in favor of centralized smart splitting
- Attach media files to last chunk only for Telegram
- Add regression tests for chunking and media placement

Cherry-picked from PR #1557 by llbn.

* fix(approval): show full command in dangerous command approval (#1553)

Previously the command was truncated to 80 chars in CLI (with a
[v]iew full option), 500 chars in Discord embeds, and missing entirely
in Telegram/Slack approval messages. Now the full command is always
displayed everywhere:

- CLI: removed 80-char truncation and [v]iew full menu option
- Gateway (TG/Slack): approval_required message includes full command
  in a code block
- Discord: embed shows full command up to 4096-char limit
- Windows: skip SIGALRM-based test timeout (Unix-only)
- Updated tests: replaced view-flow tests with direct approval tests

Cherry-picked from PR #1566 by crazywriter1.

* fix(cli): flush stdout during agent loop to prevent macOS display freeze (#1624)

The interrupt polling loop in chat() waited on the queue without
invalidating the prompt_toolkit renderer. On macOS, the StdoutProxy
buffer only flushed on input events, causing the CLI to appear frozen
during tool execution until the user typed a key.

Fix: call _invalidate() on each queue timeout (every ~100ms, throttled
to 150ms) to force the renderer to flush buffered agent output.

* fix(claw): warn when API keys are skipped during OpenClaw migration (#1580)

When --migrate-secrets is not passed (the default), API keys like
OPENROUTER_API_KEY are silently skipped with no warning. Users don't
realize their keys weren't migrated until the agent fails to connect.

Add a post-migration warning with actionable instructions: either
re-run with --migrate-secrets or add the key manually via
hermes config set.

Cherry-picked from PR #1593 by ygd58.

* fix(security): block sandbox backend creds from subprocess env (#1264)

Add Modal and Daytona sandbox credentials to the subprocess env
blocklist so they're not leaked to agent terminal sessions via
printenv/env.

Cherry-picked from PR #1571 by ygd58.

* fix(gateway): cap interrupt recursion depth to prevent resource exhaustion (#816)

When a user sends multiple messages while the agent keeps failing,
_run_agent() calls itself recursively with no depth limit. This can
exhaust stack/memory if the agent is in a failure loop.

Add _MAX_INTERRUPT_DEPTH = 3. When exceeded, the pending message is
logged and the current result is returned instead of recursing deeper.

The log handler duplication bug described in #816 was already fixed
separately (AIAgent.__init__ deduplicates handlers).

---------

Co-authored-by: buray <ygd58@users.noreply.github.com>
Co-authored-by: lbn <llbn@users.noreply.github.com>
Co-authored-by: crazywriter1 <53251494+crazywriter1@users.noreply.github.com>
											
										
										
											2026-03-17 02:23:07 -07:00
+								                        adapter.queue_message(session_key, pending)
 								                    return result_holder[0] or {"final_response": response, "messages": history}
-												fix(gateway): process /queue'd messages after agent completion (#2469)

* fix: respect DashScope v1 runtime mode for alibaba

Remove the hardcoded Alibaba branch from resolve_runtime_provider()
that forced api_mode='anthropic_messages' regardless of the base URL.

Alibaba now goes through the generic API-key provider path, which
auto-detects the protocol from the URL:
- /apps/anthropic → anthropic_messages (via endswith check)
- /v1 → chat_completions (default)

This fixes Alibaba setup with OpenAI-compatible DashScope endpoints
(e.g. coding-intl.dashscope.aliyuncs.com/v1) that were broken because
runtime always forced Anthropic mode even when setup saved a /v1 URL.

Based on PR #2024 by @kshitijk4poor.

* docs(skill): add split, merge, search examples to ocr-and-documents skill

Adds pymupdf examples for PDF splitting, merging, and text search
to the existing ocr-and-documents skill. No new dependencies — pymupdf
already covers all three operations natively.

* fix: replace all production print() calls with logger in rl_training_tool

Replace all bare print() calls in production code paths with proper logger calls.

- Add `import logging` and module-level `logger = logging.getLogger(__name__)`
- Replace print() in _start_training_run() with logger.info()
- Replace print() in _stop_training_run() with logger.info()
- Replace print(Warning/Note) calls with logger.warning() and logger.info()

Using the logging framework allows log level filtering, proper formatting,
and log routing instead of always printing to stdout.

* fix(gateway): process /queue'd messages after agent completion

/queue stored messages in adapter._pending_messages but never consumed
them after normal (non-interrupted) completion. The consumption path
at line 5219 only checked pending messages when result.get('interrupted')
was True — since /queue deliberately doesn't interrupt, queued messages
were silently dropped.

Now checks adapter._pending_messages after both interrupted AND normal
completion. For queued messages (non-interrupt), the first response is
delivered before recursing to process the queued follow-up. Skips the
direct send when streaming already delivered the response.

Reported by GhostMode on Discord.

---------

Co-authored-by: kshitijk4poor <kshitijk4poor@users.noreply.github.com>
Co-authored-by: memosr.eth <96793918+memosr@users.noreply.github.com>
											
										
										
											2026-03-22 04:56:13 -07:00
+								                was_interrupted = result.get("interrupted")
 								                if not was_interrupted:
 								                    # Queued message after normal completion — deliver the first
 								                    # response before processing the queued follow-up.
 								                    # Skip if streaming already delivered it.
 								                    _sc = stream_consumer_holder[0]
-												feat(gateway): surface natural mid-turn assistant messages in chat platforms

Add display.interim_assistant_messages config (enabled by default) that
forwards completed assistant commentary between tool calls to the user
as separate chat messages. Models already emit useful status text like
'I'll inspect the repo first.' — this surfaces it on Telegram, Discord,
and other messaging platforms instead of swallowing it.

Independent from tool_progress and gateway streaming. Disabled for
webhooks. Uses GatewayStreamConsumer when available, falls back to
direct adapter send. Tracks response_previewed to prevent double-delivery
when interim message matches the final response.

Also fixes: cursor not stripped from fallback prefix in stream consumer
(affected continuation calculation on no-edit platforms like Signal).

Cherry-picked from PR #7885 by asheriif, default changed to enabled.
Fixes #5016

											
										
										
											2026-04-11 16:03:52 -07:00
+								                    if _sc and stream_task:
 								                        try:
 								                            await asyncio.wait_for(stream_task, timeout=5.0)
 								                        except (asyncio.TimeoutError, asyncio.CancelledError):
 								                            stream_task.cancel()
 								                            try:
 								                                await stream_task
 								                            except asyncio.CancelledError:
 								                                pass
 								                        except Exception as e:
 								                            logger.debug("Stream consumer wait before queued message failed: %s", e)
-												fix(gateway): honor previewed replies in queued follow-ups

											
										
										
											2026-04-16 14:06:38 +02:00
+								                    _previewed = bool(result.get("response_previewed"))
-												feat(gateway): surface natural mid-turn assistant messages in chat platforms

Add display.interim_assistant_messages config (enabled by default) that
forwards completed assistant commentary between tool calls to the user
as separate chat messages. Models already emit useful status text like
'I'll inspect the repo first.' — this surfaces it on Telegram, Discord,
and other messaging platforms instead of swallowing it.

Independent from tool_progress and gateway streaming. Disabled for
webhooks. Uses GatewayStreamConsumer when available, falls back to
direct adapter send. Tracks response_previewed to prevent double-delivery
when interim message matches the final response.

Also fixes: cursor not stripped from fallback prefix in stream consumer
(affected continuation calculation on no-edit platforms like Signal).

Cherry-picked from PR #7885 by asheriif, default changed to enabled.
Fixes #5016

											
										
										
											2026-04-11 16:03:52 -07:00
+								                    _already_streamed = bool(
-												fix(gateway): honor previewed replies in queued follow-ups

											
										
										
											2026-04-16 14:06:38 +02:00
+								                        (_sc and getattr(_sc, "final_response_sent", False))
 								                        or _previewed
-												feat(gateway): surface natural mid-turn assistant messages in chat platforms

Add display.interim_assistant_messages config (enabled by default) that
forwards completed assistant commentary between tool calls to the user
as separate chat messages. Models already emit useful status text like
'I'll inspect the repo first.' — this surfaces it on Telegram, Discord,
and other messaging platforms instead of swallowing it.

Independent from tool_progress and gateway streaming. Disabled for
webhooks. Uses GatewayStreamConsumer when available, falls back to
direct adapter send. Tracks response_previewed to prevent double-delivery
when interim message matches the final response.

Also fixes: cursor not stripped from fallback prefix in stream consumer
(affected continuation calculation on no-edit platforms like Signal).

Cherry-picked from PR #7885 by asheriif, default changed to enabled.
Fixes #5016

											
										
										
											2026-04-11 16:03:52 -07:00
+								                    )
-												fix(gateway): process /queue'd messages after agent completion (#2469)

* fix: respect DashScope v1 runtime mode for alibaba

Remove the hardcoded Alibaba branch from resolve_runtime_provider()
that forced api_mode='anthropic_messages' regardless of the base URL.

Alibaba now goes through the generic API-key provider path, which
auto-detects the protocol from the URL:
- /apps/anthropic → anthropic_messages (via endswith check)
- /v1 → chat_completions (default)

This fixes Alibaba setup with OpenAI-compatible DashScope endpoints
(e.g. coding-intl.dashscope.aliyuncs.com/v1) that were broken because
runtime always forced Anthropic mode even when setup saved a /v1 URL.

Based on PR #2024 by @kshitijk4poor.

* docs(skill): add split, merge, search examples to ocr-and-documents skill

Adds pymupdf examples for PDF splitting, merging, and text search
to the existing ocr-and-documents skill. No new dependencies — pymupdf
already covers all three operations natively.

* fix: replace all production print() calls with logger in rl_training_tool

Replace all bare print() calls in production code paths with proper logger calls.

- Add `import logging` and module-level `logger = logging.getLogger(__name__)`
- Replace print() in _start_training_run() with logger.info()
- Replace print() in _stop_training_run() with logger.info()
- Replace print(Warning/Note) calls with logger.warning() and logger.info()

Using the logging framework allows log level filtering, proper formatting,
and log routing instead of always printing to stdout.

* fix(gateway): process /queue'd messages after agent completion

/queue stored messages in adapter._pending_messages but never consumed
them after normal (non-interrupted) completion. The consumption path
at line 5219 only checked pending messages when result.get('interrupted')
was True — since /queue deliberately doesn't interrupt, queued messages
were silently dropped.

Now checks adapter._pending_messages after both interrupted AND normal
completion. For queued messages (non-interrupt), the first response is
delivered before recursing to process the queued follow-up. Skips the
direct send when streaming already delivered the response.

Reported by GhostMode on Discord.

---------

Co-authored-by: kshitijk4poor <kshitijk4poor@users.noreply.github.com>
Co-authored-by: memosr.eth <96793918+memosr@users.noreply.github.com>
											
										
										
											2026-03-22 04:56:13 -07:00
+								                    first_response = result.get("final_response", "")
 								                    if first_response and not _already_streamed:
 								                        try:
-												fix: only suppress gateway replies after confirmed final stream delivery

(cherry picked from commit 675249085b383fff305cc84b8aeacd6dd20c7b14)

											
										
										
											2026-04-16 13:08:57 +02:00
+								                            logger.info(
 								                                "Queued follow-up for session %s: final stream delivery not confirmed; sending first response before continuing.",
 								                                session_key[:20] if session_key else "?",
 								                            )
-												feat(gateway): surface natural mid-turn assistant messages in chat platforms

Add display.interim_assistant_messages config (enabled by default) that
forwards completed assistant commentary between tool calls to the user
as separate chat messages. Models already emit useful status text like
'I'll inspect the repo first.' — this surfaces it on Telegram, Discord,
and other messaging platforms instead of swallowing it.

Independent from tool_progress and gateway streaming. Disabled for
webhooks. Uses GatewayStreamConsumer when available, falls back to
direct adapter send. Tracks response_previewed to prevent double-delivery
when interim message matches the final response.

Also fixes: cursor not stripped from fallback prefix in stream consumer
(affected continuation calculation on no-edit platforms like Signal).

Cherry-picked from PR #7885 by asheriif, default changed to enabled.
Fixes #5016

											
										
										
											2026-04-11 16:03:52 -07:00
+								                            await adapter.send(
 								                                source.chat_id,
 								                                first_response,
 								                                metadata=_status_thread_metadata,
 								                            )
-												fix(gateway): process /queue'd messages after agent completion (#2469)

* fix: respect DashScope v1 runtime mode for alibaba

Remove the hardcoded Alibaba branch from resolve_runtime_provider()
that forced api_mode='anthropic_messages' regardless of the base URL.

Alibaba now goes through the generic API-key provider path, which
auto-detects the protocol from the URL:
- /apps/anthropic → anthropic_messages (via endswith check)
- /v1 → chat_completions (default)

This fixes Alibaba setup with OpenAI-compatible DashScope endpoints
(e.g. coding-intl.dashscope.aliyuncs.com/v1) that were broken because
runtime always forced Anthropic mode even when setup saved a /v1 URL.

Based on PR #2024 by @kshitijk4poor.

* docs(skill): add split, merge, search examples to ocr-and-documents skill

Adds pymupdf examples for PDF splitting, merging, and text search
to the existing ocr-and-documents skill. No new dependencies — pymupdf
already covers all three operations natively.

* fix: replace all production print() calls with logger in rl_training_tool

Replace all bare print() calls in production code paths with proper logger calls.

- Add `import logging` and module-level `logger = logging.getLogger(__name__)`
- Replace print() in _start_training_run() with logger.info()
- Replace print() in _stop_training_run() with logger.info()
- Replace print(Warning/Note) calls with logger.warning() and logger.info()

Using the logging framework allows log level filtering, proper formatting,
and log routing instead of always printing to stdout.

* fix(gateway): process /queue'd messages after agent completion

/queue stored messages in adapter._pending_messages but never consumed
them after normal (non-interrupted) completion. The consumption path
at line 5219 only checked pending messages when result.get('interrupted')
was True — since /queue deliberately doesn't interrupt, queued messages
were silently dropped.

Now checks adapter._pending_messages after both interrupted AND normal
completion. For queued messages (non-interrupt), the first response is
delivered before recursing to process the queued follow-up. Skips the
direct send when streaming already delivered the response.

Reported by GhostMode on Discord.

---------

Co-authored-by: kshitijk4poor <kshitijk4poor@users.noreply.github.com>
Co-authored-by: memosr.eth <96793918+memosr@users.noreply.github.com>
											
										
										
											2026-03-22 04:56:13 -07:00
+								                        except Exception as e:
 								                            logger.warning("Failed to send first response before queued message: %s", e)
-												fix: only suppress gateway replies after confirmed final stream delivery

(cherry picked from commit 675249085b383fff305cc84b8aeacd6dd20c7b14)

											
										
										
											2026-04-16 13:08:57 +02:00
+								                    elif first_response:
 								                        logger.info(
 								                            "Queued follow-up for session %s: skipping resend because final streamed delivery was confirmed.",
 								                            session_key[:20] if session_key else "?",
 								                        )
-												fix(gateway): defer background review notifications until after main reply

Background review notifications ("💾 Skill created", "💾 Memory updated")
could race ahead of the main assistant reply in chat, making it look like
the agent stopped after creating a skill.

Gate bg-review notifications behind a threading.Event + pending queue.
Register a release callback on the adapter's _post_delivery_callbacks dict
so base.py's finally block fires it after the main response is delivered.

The queued-message path in _run_agent pops and calls the callback directly
to prevent double-fire.

Co-authored-by: Hermes Agent <hermes@nousresearch.com>
Closes #10541

											
										
										
											2026-04-15 16:40:38 -07:00
+								                    # Release deferred bg-review notifications now that the
 								                    # first response has been delivered.  Pop from the
 								                    # adapter's callback dict (prevents double-fire in
 								                    # base.py's finally block) and call it.
 								                    if adapter and hasattr(adapter, "_post_delivery_callbacks"):
 								                        _bg_cb = adapter._post_delivery_callbacks.pop(session_key, None)
 								                        if callable(_bg_cb):
 								                            try:
 								                                _bg_cb()
 								                            except Exception:
 								                                pass
-												fix(gateway): process /queue'd messages after agent completion (#2469)

* fix: respect DashScope v1 runtime mode for alibaba

Remove the hardcoded Alibaba branch from resolve_runtime_provider()
that forced api_mode='anthropic_messages' regardless of the base URL.

Alibaba now goes through the generic API-key provider path, which
auto-detects the protocol from the URL:
- /apps/anthropic → anthropic_messages (via endswith check)
- /v1 → chat_completions (default)

This fixes Alibaba setup with OpenAI-compatible DashScope endpoints
(e.g. coding-intl.dashscope.aliyuncs.com/v1) that were broken because
runtime always forced Anthropic mode even when setup saved a /v1 URL.

Based on PR #2024 by @kshitijk4poor.

* docs(skill): add split, merge, search examples to ocr-and-documents skill

Adds pymupdf examples for PDF splitting, merging, and text search
to the existing ocr-and-documents skill. No new dependencies — pymupdf
already covers all three operations natively.

* fix: replace all production print() calls with logger in rl_training_tool

Replace all bare print() calls in production code paths with proper logger calls.

- Add `import logging` and module-level `logger = logging.getLogger(__name__)`
- Replace print() in _start_training_run() with logger.info()
- Replace print() in _stop_training_run() with logger.info()
- Replace print(Warning/Note) calls with logger.warning() and logger.info()

Using the logging framework allows log level filtering, proper formatting,
and log routing instead of always printing to stdout.

* fix(gateway): process /queue'd messages after agent completion

/queue stored messages in adapter._pending_messages but never consumed
them after normal (non-interrupted) completion. The consumption path
at line 5219 only checked pending messages when result.get('interrupted')
was True — since /queue deliberately doesn't interrupt, queued messages
were silently dropped.

Now checks adapter._pending_messages after both interrupted AND normal
completion. For queued messages (non-interrupt), the first response is
delivered before recursing to process the queued follow-up. Skips the
direct send when streaming already delivered the response.

Reported by GhostMode on Discord.

---------

Co-authored-by: kshitijk4poor <kshitijk4poor@users.noreply.github.com>
Co-authored-by: memosr.eth <96793918+memosr@users.noreply.github.com>
											
										
										
											2026-03-22 04:56:13 -07:00
+								                # else: interrupted — discard the interrupted response ("Operation
 								                # interrupted." is just noise; the user already knows they sent a
 								                # new message).
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                updated_history = result.get("messages", history)
-												fix(gateway): preserve queued voice events for STT

											
										
										
											2026-04-11 21:36:05 +01:00
+								                next_source = source
 								                next_message = pending
 								                next_message_id = None
-												fix(gateway): guard pending_event.channel_prompt against None in recursive _run_agent

Initialize next_channel_prompt before the pending_event check and use
getattr with None default, matching the existing pattern for
next_source/next_message/next_message_id. Prevents AttributeError
when pending_event is None (interrupt path).

Cherry-picked from #10953 by @jackjin1997.

											
										
										
											2026-04-16 19:22:38 +05:30
+								                next_channel_prompt = None
-												fix(gateway): preserve queued voice events for STT

											
										
										
											2026-04-11 21:36:05 +01:00
+								                if pending_event is not None:
 								                    next_source = getattr(pending_event, "source", None) or source
 								                    next_message = await self._prepare_inbound_message_text(
 								                        event=pending_event,
 								                        source=next_source,
 								                        history=updated_history,
 								                    )
 								                    if next_message is None:
 								                        return result
 								                    next_message_id = getattr(pending_event, "message_id", None)
-												fix(gateway): guard pending_event.channel_prompt against None in recursive _run_agent

Initialize next_channel_prompt before the pending_event check and use
getattr with None default, matching the existing pattern for
next_source/next_message/next_message_id. Prevents AttributeError
when pending_event is None (interrupt path).

Cherry-picked from #10953 by @jackjin1997.

											
										
										
											2026-04-16 19:22:38 +05:30
+								                    next_channel_prompt = getattr(pending_event, "channel_prompt", None)
-												fix(gateway): preserve queued voice events for STT

											
										
										
											2026-04-11 21:36:05 +01:00
-												fix: improve interrupt responsiveness during concurrent tool execution and follow-up turns (#10935)

Three targeted fixes for the 'agent stuck on terminal command' report:

1. **Concurrent tool wait loop now checks interrupts** (run_agent.py)
   The sequential path checked _interrupt_requested before each tool call,
   but the concurrent path's wait loop just blocked with 30s timeouts.
   Now polls every 5s and cancels pending futures on interrupt, giving
   already-running tools 3s to notice the per-thread interrupt signal.

2. **Cancelled concurrent tools get proper interrupt messages** (run_agent.py)
   When a concurrent tool is cancelled or didn't return a result due to
   interrupt, the tool result message says 'skipped due to user interrupt'
   instead of a generic error.

3. **Typing indicator fires before follow-up turn** (gateway/run.py)
   After an interrupt is acknowledged and the pending message dequeued,
   the gateway now sends a typing indicator before starting the recursive
   _run_agent call. This gives the user immediate visual feedback that
   the system is processing their new message (closing the perceived
   'dead air' gap between the interrupt ack and the response).

Reported by @_SushantSays.
											
										
										
											2026-04-16 02:44:56 -07:00
+								                # Restart typing indicator so the user sees activity while
 								                # the follow-up turn runs.  The outer _process_message_background
 								                # typing task is still alive but may be stale.
 								                _followup_adapter = self.adapters.get(source.platform)
 								                if _followup_adapter:
 								                    try:
 								                        await _followup_adapter.send_typing(
 								                            source.chat_id,
 								                            metadata=_status_thread_metadata,
 								                        )
 								                    except Exception:
 								                        pass
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                return await self._run_agent(
-												fix(gateway): preserve queued voice events for STT

											
										
										
											2026-04-11 21:36:05 +01:00
+								                    message=next_message,
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                    context_prompt=context_prompt,
 								                    history=updated_history,
-												fix(gateway): preserve queued voice events for STT

											
										
										
											2026-04-11 21:36:05 +01:00
+								                    source=next_source,
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                    session_id=session_id,
-												fix(gateway): cap interrupt recursion depth to prevent resource exhaustion (#816)

* fix: prevent infinite 400 failure loop on context overflow (#1630)

When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message.  This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error.  Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.

Three-layer fix:

1. run_agent.py — Fallback heuristic: when a 400 error has a very short
   generic message AND the session is large (>40% of context or >80
   messages), treat it as a probable context overflow and trigger
   compression instead of aborting.

2. run_agent.py + gateway/run.py — Don't persist failed messages:
   when the agent returns failed=True before generating any response,
   skip writing the user's message to the transcript/DB. This prevents
   the session from growing on each failure.

3. gateway/run.py — Smarter error messages: detect context-overflow
   failures and suggest /compact or /reset specifically, instead of a
   generic 'try again' that will fail identically.

* fix(skills): detect prompt injection patterns and block cache file reads

Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):

1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
   (index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
   was the original injection vector — untrusted skill descriptions
   in the catalog contained adversarial text that the model executed.

2. skill_view: warns when skills are loaded from outside the trusted
   ~/.hermes/skills/ directory, and detects common injection patterns
   in skill content ("ignore previous instructions", "<system>", etc.).

Cherry-picked from PR #1562 by ygd58.

* fix(tools): chunk long messages in send_message_tool before dispatch (#1552)

Long messages sent via send_message tool or cron delivery silently
failed when exceeding platform limits. Gateway adapters handle this
via truncate_message(), but the standalone senders in send_message_tool
bypassed that entirely.

- Apply truncate_message() chunking in _send_to_platform() before
  dispatching to individual platform senders
- Remove naive message[i:i+2000] character split in _send_discord()
  in favor of centralized smart splitting
- Attach media files to last chunk only for Telegram
- Add regression tests for chunking and media placement

Cherry-picked from PR #1557 by llbn.

* fix(approval): show full command in dangerous command approval (#1553)

Previously the command was truncated to 80 chars in CLI (with a
[v]iew full option), 500 chars in Discord embeds, and missing entirely
in Telegram/Slack approval messages. Now the full command is always
displayed everywhere:

- CLI: removed 80-char truncation and [v]iew full menu option
- Gateway (TG/Slack): approval_required message includes full command
  in a code block
- Discord: embed shows full command up to 4096-char limit
- Windows: skip SIGALRM-based test timeout (Unix-only)
- Updated tests: replaced view-flow tests with direct approval tests

Cherry-picked from PR #1566 by crazywriter1.

* fix(cli): flush stdout during agent loop to prevent macOS display freeze (#1624)

The interrupt polling loop in chat() waited on the queue without
invalidating the prompt_toolkit renderer. On macOS, the StdoutProxy
buffer only flushed on input events, causing the CLI to appear frozen
during tool execution until the user typed a key.

Fix: call _invalidate() on each queue timeout (every ~100ms, throttled
to 150ms) to force the renderer to flush buffered agent output.

* fix(claw): warn when API keys are skipped during OpenClaw migration (#1580)

When --migrate-secrets is not passed (the default), API keys like
OPENROUTER_API_KEY are silently skipped with no warning. Users don't
realize their keys weren't migrated until the agent fails to connect.

Add a post-migration warning with actionable instructions: either
re-run with --migrate-secrets or add the key manually via
hermes config set.

Cherry-picked from PR #1593 by ygd58.

* fix(security): block sandbox backend creds from subprocess env (#1264)

Add Modal and Daytona sandbox credentials to the subprocess env
blocklist so they're not leaked to agent terminal sessions via
printenv/env.

Cherry-picked from PR #1571 by ygd58.

* fix(gateway): cap interrupt recursion depth to prevent resource exhaustion (#816)

When a user sends multiple messages while the agent keeps failing,
_run_agent() calls itself recursively with no depth limit. This can
exhaust stack/memory if the agent is in a failure loop.

Add _MAX_INTERRUPT_DEPTH = 3. When exceeded, the pending message is
logged and the current result is returned instead of recursing deeper.

The log handler duplication bug described in #816 was already fixed
separately (AIAgent.__init__ deduplicates handlers).

---------

Co-authored-by: buray <ygd58@users.noreply.github.com>
Co-authored-by: lbn <llbn@users.noreply.github.com>
Co-authored-by: crazywriter1 <53251494+crazywriter1@users.noreply.github.com>
											
										
										
											2026-03-17 02:23:07 -07:00
+								                    session_key=session_key,
 								                    _interrupt_depth=_interrupt_depth + 1,
-												fix(gateway): preserve queued voice events for STT

											
										
										
											2026-04-11 21:36:05 +01:00
+								                    event_message_id=next_message_id,
-												fix(gateway): guard pending_event.channel_prompt against None in recursive _run_agent

Initialize next_channel_prompt before the pending_event check and use
getattr with None default, matching the existing pattern for
next_source/next_message/next_message_id. Prevents AttributeError
when pending_event is None (interrupt path).

Cherry-picked from #10953 by @jackjin1997.

											
										
										
											2026-04-16 19:22:38 +05:30
+								                    channel_prompt=next_channel_prompt,
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                )
 								        finally:
-												fix(gateway): support infinite timeout + periodic notifications + actionable error (#4959)

- HERMES_AGENT_TIMEOUT=0 now means no limit (infinite execution)
- Periodic 'still working' notifications every 10 minutes for long tasks
- Timeout error message now tells users how to increase the limit
- Stale-lock eviction handles infinite timeout correctly (float inf TTL)
											
										
										
											2026-04-03 22:37:38 -07:00
+								            # Stop progress sender, interrupt monitor, and notification task
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            if progress_task:
 								                progress_task.cancel()
 								            interrupt_monitor.cancel()
-												fix(gateway): support infinite timeout + periodic notifications + actionable error (#4959)

- HERMES_AGENT_TIMEOUT=0 now means no limit (infinite execution)
- Periodic 'still working' notifications every 10 minutes for long tasks
- Timeout error message now tells users how to increase the limit
- Stale-lock eviction handles infinite timeout correctly (float inf TTL)
											
										
										
											2026-04-03 22:37:38 -07:00
+								            _notify_task.cancel()
-												feat(gateway): streaming token delivery — StreamingConfig, GatewayStreamConsumer, already_sent

Stage 3 of streaming support. Gateway now streams tokens to messaging platforms:

- StreamingConfig dataclass (enabled, transport, edit_interval, buffer_threshold, cursor)
  on GatewayConfig with from_dict/to_dict serialization
- GatewayStreamConsumer: async queue-based consumer that progressively edits
  a single message on the target platform (edit transport)
- on_delta() → queue → run() async task → send_or_edit() with rate limiting
- already_sent propagation: when streaming delivered the response, handler
  returns None so base adapter skips duplicate send()
- stream_delta_callback wired into AIAgent constructor in _run_agent
- Consumer lifecycle: started as asyncio task, awaited with timeout in finally

Config (config.yaml):
  streaming:
    enabled: true
    transport: edit      # progressive editMessageText
    edit_interval: 0.3   # seconds between edits
    buffer_threshold: 40 # chars before forcing flush
    cursor: ' ▉'

Credit: jobless0x (#774, #1312), OutThisLife (#798), clicksingh (#697).

											
										
										
											2026-03-16 05:52:42 -07:00
 								            # Wait for stream consumer to finish its final edit
 								            if stream_task:
 								                try:
 								                    await asyncio.wait_for(stream_task, timeout=5.0)
 								                except (asyncio.TimeoutError, asyncio.CancelledError):
 								                    stream_task.cancel()
 								                    try:
 								                        await stream_task
 								                    except asyncio.CancelledError:
 								                        pass
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
 								            # Clean up tracking
 								            tracking_task.cancel()
 								            if session_key and session_key in self._running_agents:
 								                del self._running_agents[session_key]
-												refactor: simplify and harden PR fixes after review

- Fix cron ThreadPoolExecutor blocking on timeout: use shutdown(wait=False,
  cancel_futures=True) instead of context manager that waits indefinitely
- Extract _dequeue_pending_text() to deduplicate media-placeholder logic
  in interrupt and normal-completion dequeue paths
- Remove hasattr guards for _running_agents_ts: add class-level default
  so partial test construction works without scattered defensive checks
- Move `import concurrent.futures` to top of cron/scheduler.py
- Progress throttle: sleep remaining interval instead of busy-looping
  0.1s (~15 wakeups per 1.5s window → 1 wakeup)
- Deduplicate _load_stt_config() in transcription_tools.py:
  _has_openai_audio_backend() now delegates to _resolve_openai_audio_client_config()

											
										
										
											2026-04-02 23:41:38 +05:30
+								            if session_key:
-												fix(gateway): prevent stuck sessions with agent timeout and staleness eviction

Three changes to prevent sessions from getting permanently locked:

1. Agent execution timeout (HERMES_AGENT_TIMEOUT, default 10min):
   Wraps run_in_executor with asyncio.wait_for so a hung API call or
   runaway tool can't lock a session indefinitely. On timeout, the
   agent is interrupted and the user gets an actionable error message.

2. Staleness eviction for _running_agents:
   Tracks start timestamps for each session entry. When a new message
   arrives and the entry is older than timeout + 1min grace, it's
   evicted as a leaked lock. Safety net for any cleanup path that
   fails to remove the entry.

3. Cron job timeout (HERMES_CRON_TIMEOUT, default 10min):
   Wraps run_conversation in a ThreadPoolExecutor with timeout so a
   hung cron job doesn't block the ticker thread (and all subsequent
   cron jobs) indefinitely.

Follows grammY runner's per-update timeout pattern and aiogram's
asyncio.wait_for approach for handler deadlines.

											
										
										
											2026-04-02 22:52:52 +05:30
+								                self._running_agents_ts.pop(session_key, None)
-												fix(gateway): drain in-flight work before restart

											
										
										
											2026-04-10 10:19:17 -07:00
+								            if self._draining:
 								                self._update_runtime_status("draining")
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
 								            # Wait for cancelled tasks
-												fix(gateway): support infinite timeout + periodic notifications + actionable error (#4959)

- HERMES_AGENT_TIMEOUT=0 now means no limit (infinite execution)
- Periodic 'still working' notifications every 10 minutes for long tasks
- Timeout error message now tells users how to increase the limit
- Stale-lock eviction handles infinite timeout correctly (float inf TTL)
											
										
										
											2026-04-03 22:37:38 -07:00
+								            for task in [progress_task, interrupt_monitor, tracking_task, _notify_task]:
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                if task:
 								                    try:
 								                        await task
 								                    except asyncio.CancelledError:
 								                        pass
-												feat(gateway): streaming token delivery — StreamingConfig, GatewayStreamConsumer, already_sent

Stage 3 of streaming support. Gateway now streams tokens to messaging platforms:

- StreamingConfig dataclass (enabled, transport, edit_interval, buffer_threshold, cursor)
  on GatewayConfig with from_dict/to_dict serialization
- GatewayStreamConsumer: async queue-based consumer that progressively edits
  a single message on the target platform (edit transport)
- on_delta() → queue → run() async task → send_or_edit() with rate limiting
- already_sent propagation: when streaming delivered the response, handler
  returns None so base adapter skips duplicate send()
- stream_delta_callback wired into AIAgent constructor in _run_agent
- Consumer lifecycle: started as asyncio task, awaited with timeout in finally

Config (config.yaml):
  streaming:
    enabled: true
    transport: edit      # progressive editMessageText
    edit_interval: 0.3   # seconds between edits
    buffer_threshold: 40 # chars before forcing flush
    cursor: ' ▉'

Credit: jobless0x (#774, #1312), OutThisLife (#798), clicksingh (#697).

											
										
										
											2026-03-16 05:52:42 -07:00
 								        # If streaming already delivered the response, mark it so the
 								        # caller's send() is skipped (avoiding duplicate messages).
-												fix(gateway): don't suppress error messages when streaming already_sent (#7652)

When the stream consumer has sent at least one message (already_sent=True),
the gateway skips sending the final response to avoid duplicates. But this
also suppressed error messages when the agent failed mid-loop — rate limit
exhaustion, context overflow, compression failure, etc.

The user would see the last streamed content and then nothing: no error
message, no explanation. The agent appeared to 'stop responding.'

Fix: check the 'failed' flag at both the producer (_run_agent marks
already_sent) and consumer (_handle_message_with_agent checks it) sites.
Error messages are always delivered regardless of streaming state.
											
										
										
											2026-04-11 01:55:36 -07:00
+								        # BUT: never suppress delivery when the agent failed — the error
 								        # message is new content the user hasn't seen, and it must reach
 								        # them even if streaming had sent earlier partial output.
-												fix: prevent already_sent from swallowing empty responses after tool calls (#10531)

When a model (e.g. mimo-v2-pro) streams intermediate text alongside tool
calls ("Let me search for that") but then returns empty after processing
tool results, the stream consumer already_sent flag is True from the
earlier text delivery.  The gateway suppression check
(already_sent=True, failed=False → return None) would swallow the final
response, leaving the user staring at silence after the search.

Two changes:

1. gateway/run.py return path: skip already_sent suppression when the
   final_response is "(empty)" or empty — the user needs to know the
   agent finished even if streaming sent partial content earlier.

2. gateway/run.py response handler: convert the internal "(empty)"
   sentinel to a user-friendly warning instead of delivering the raw
   sentinel string.

Tests added for all empty/None/sentinel cases plus preserved existing
suppression behavior for normal non-empty responses.
											
										
										
											2026-04-15 14:26:45 -07:00
+								        #
 								        # Also never suppress when the final response is "(empty)" — this
 								        # means the model failed to produce content after tool calls (common
 								        # with mimo-v2-pro, GLM-5, etc.).  The stream consumer may have
 								        # sent intermediate text ("Let me search for that…") alongside the
 								        # tool call, setting already_sent=True, but that text is NOT the
 								        # final answer.  Suppressing delivery here leaves the user staring
 								        # at silence.  (#10xxx — "agent stops after web search")
-												feat(gateway): streaming token delivery — StreamingConfig, GatewayStreamConsumer, already_sent

Stage 3 of streaming support. Gateway now streams tokens to messaging platforms:

- StreamingConfig dataclass (enabled, transport, edit_interval, buffer_threshold, cursor)
  on GatewayConfig with from_dict/to_dict serialization
- GatewayStreamConsumer: async queue-based consumer that progressively edits
  a single message on the target platform (edit transport)
- on_delta() → queue → run() async task → send_or_edit() with rate limiting
- already_sent propagation: when streaming delivered the response, handler
  returns None so base adapter skips duplicate send()
- stream_delta_callback wired into AIAgent constructor in _run_agent
- Consumer lifecycle: started as asyncio task, awaited with timeout in finally

Config (config.yaml):
  streaming:
    enabled: true
    transport: edit      # progressive editMessageText
    edit_interval: 0.3   # seconds between edits
    buffer_threshold: 40 # chars before forcing flush
    cursor: ' ▉'

Credit: jobless0x (#774, #1312), OutThisLife (#798), clicksingh (#697).

											
										
										
											2026-03-16 05:52:42 -07:00
+								        _sc = stream_consumer_holder[0]
-												fix(tests): resolve remaining CI failures — commit_memory_session, already_sent, timezone leak, session env (#10785)

Fixes 12 CI test failures:

1. test_cli_new_session (4): _FakeAgent missing commit_memory_session
   attribute added in the memory provider refactoring. Added MagicMock.

2. test_run_progress_topics (1): already_sent detection only checked
   stream consumer flags, missing the response_previewed path from
   interim_assistant_callback. Restructured guard to check both paths.

3. test_timezone (1): HERMES_TIMEZONE leaked into child processes via
   _SAFE_ENV_PREFIXES matching HERMES_*. The code correctly converts
   it to TZ but didn't remove the original. Added child_env.pop().

4. test_session_env (1): contextvars baseline captured from a different
   context couldn't be restored after clear. Changed assertion to verify
   the test's value was removed rather than comparing to a fragile baseline.

5. test_discord_slash_commands (5): already fixed on current main.
											
										
										
											2026-04-16 02:26:14 -07:00
+								        if isinstance(response, dict) and not response.get("failed"):
-												fix: prevent already_sent from swallowing empty responses after tool calls (#10531)

When a model (e.g. mimo-v2-pro) streams intermediate text alongside tool
calls ("Let me search for that") but then returns empty after processing
tool results, the stream consumer already_sent flag is True from the
earlier text delivery.  The gateway suppression check
(already_sent=True, failed=False → return None) would swallow the final
response, leaving the user staring at silence after the search.

Two changes:

1. gateway/run.py return path: skip already_sent suppression when the
   final_response is "(empty)" or empty — the user needs to know the
   agent finished even if streaming sent partial content earlier.

2. gateway/run.py response handler: convert the internal "(empty)"
   sentinel to a user-friendly warning instead of delivering the raw
   sentinel string.

Tests added for all empty/None/sentinel cases plus preserved existing
suppression behavior for normal non-empty responses.
											
										
										
											2026-04-15 14:26:45 -07:00
+								            _final = response.get("final_response") or ""
 								            _is_empty_sentinel = not _final or _final == "(empty)"
-												fix: only suppress gateway replies after confirmed final stream delivery

(cherry picked from commit 675249085b383fff305cc84b8aeacd6dd20c7b14)

											
										
										
											2026-04-16 13:08:57 +02:00
+								            _streamed = bool(
 								                _sc and getattr(_sc, "final_response_sent", False)
-												fix(tests): resolve remaining CI failures — commit_memory_session, already_sent, timezone leak, session env (#10785)

Fixes 12 CI test failures:

1. test_cli_new_session (4): _FakeAgent missing commit_memory_session
   attribute added in the memory provider refactoring. Added MagicMock.

2. test_run_progress_topics (1): already_sent detection only checked
   stream consumer flags, missing the response_previewed path from
   interim_assistant_callback. Restructured guard to check both paths.

3. test_timezone (1): HERMES_TIMEZONE leaked into child processes via
   _SAFE_ENV_PREFIXES matching HERMES_*. The code correctly converts
   it to TZ but didn't remove the original. Added child_env.pop().

4. test_session_env (1): contextvars baseline captured from a different
   context couldn't be restored after clear. Changed assertion to verify
   the test's value was removed rather than comparing to a fragile baseline.

5. test_discord_slash_commands (5): already fixed on current main.
											
										
										
											2026-04-16 02:26:14 -07:00
+								            )
 								            # response_previewed means the interim_assistant_callback already
 								            # sent the final text via the adapter (non-streaming path).
 								            _previewed = bool(response.get("response_previewed"))
 								            if not _is_empty_sentinel and (_streamed or _previewed):
-												fix: only suppress gateway replies after confirmed final stream delivery

(cherry picked from commit 675249085b383fff305cc84b8aeacd6dd20c7b14)

											
										
										
											2026-04-16 13:08:57 +02:00
+								                logger.info(
 								                    "Suppressing normal final send for session %s: final delivery already confirmed (streamed=%s previewed=%s).",
 								                    session_key[:20] if session_key else "?",
 								                    _streamed,
 								                    _previewed,
 								                )
-												fix(gateway): don't suppress error messages when streaming already_sent (#7652)

When the stream consumer has sent at least one message (already_sent=True),
the gateway skips sending the final response to avoid duplicates. But this
also suppressed error messages when the agent failed mid-loop — rate limit
exhaustion, context overflow, compression failure, etc.

The user would see the last streamed content and then nothing: no error
message, no explanation. The agent appeared to 'stop responding.'

Fix: check the 'failed' flag at both the producer (_run_agent marks
already_sent) and consumer (_handle_message_with_agent checks it) sites.
Error messages are always delivered regardless of streaming state.
											
										
										
											2026-04-11 01:55:36 -07:00
+								                response["already_sent"] = True
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
 								        return response
-												fix(matrix): E2EE cron delivery via live adapter + HTML formatting + origin fallback

Salvaged from PRs #3767 (chalkers), #5236 (ygd58), #2641 (buntingszn).

Three improvements to Matrix cron delivery:

1. Live adapter path: when the gateway is running, cron delivery now uses
   the connected MatrixAdapter via run_coroutine_threadsafe instead of
   the standalone HTTP PUT. This enables delivery to E2EE rooms where
   the raw HTTP path cannot encrypt. Falls back to standalone on failure.
   Threads adapters + event loop from gateway -> cron ticker -> tick() ->
   _deliver_result(). (from #3767)

2. HTML formatted_body: _send_matrix() now converts markdown to HTML
   using the optional markdown library, with h1-h6 to bold conversion
   for Element X compatibility. Falls back to plain text if markdown
   is not installed. Also adds random bytes to txn_id to prevent
   collisions. (from #5236)

3. Origin fallback: when deliver="origin" but origin is null (jobs
   created via API/scripts), falls back to HOME_CHANNEL env vars
   in order: matrix -> telegram -> discord -> slack. (from #2641)

											
										
										
											2026-04-05 10:52:29 -07:00
+								def _start_cron_ticker(stop_event: threading.Event, adapters=None, loop=None, interval: int = 60):
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								    """
 								    Background thread that ticks the cron scheduler at a regular interval.
 								    Runs inside the gateway process so cronjobs fire automatically without
 								    needing a separate `hermes cron daemon` or system cron entry.
-												fix(matrix): E2EE cron delivery via live adapter + HTML formatting + origin fallback

Salvaged from PRs #3767 (chalkers), #5236 (ygd58), #2641 (buntingszn).

Three improvements to Matrix cron delivery:

1. Live adapter path: when the gateway is running, cron delivery now uses
   the connected MatrixAdapter via run_coroutine_threadsafe instead of
   the standalone HTTP PUT. This enables delivery to E2EE rooms where
   the raw HTTP path cannot encrypt. Falls back to standalone on failure.
   Threads adapters + event loop from gateway -> cron ticker -> tick() ->
   _deliver_result(). (from #3767)

2. HTML formatted_body: _send_matrix() now converts markdown to HTML
   using the optional markdown library, with h1-h6 to bold conversion
   for Element X compatibility. Falls back to plain text if markdown
   is not installed. Also adds random bytes to txn_id to prevent
   collisions. (from #5236)

3. Origin fallback: when deliver="origin" but origin is null (jobs
   created via API/scripts), falls back to HOME_CHANNEL env vars
   in order: matrix -> telegram -> discord -> slack. (from #2641)

											
										
										
											2026-04-05 10:52:29 -07:00
+								    When ``adapters`` and ``loop`` are provided, passes them through to the
 								    cron delivery path so live adapters can be used for E2EE rooms.
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								    Also refreshes the channel directory every 5 minutes and prunes the
 								    image/audio/document cache once per hour.
 								    """
 								    from cron.scheduler import tick as cron_tick
 								    from gateway.platforms.base import cleanup_image_cache, cleanup_document_cache
 								    IMAGE_CACHE_EVERY = 60   # ticks — once per hour at default 60s interval
 								    CHANNEL_DIR_EVERY = 5    # ticks — every 5 minutes
 								    logger.info("Cron ticker started (interval=%ds)", interval)
 								    tick_count = 0
 								    while not stop_event.is_set():
 								        try:
-												fix(matrix): E2EE cron delivery via live adapter + HTML formatting + origin fallback

Salvaged from PRs #3767 (chalkers), #5236 (ygd58), #2641 (buntingszn).

Three improvements to Matrix cron delivery:

1. Live adapter path: when the gateway is running, cron delivery now uses
   the connected MatrixAdapter via run_coroutine_threadsafe instead of
   the standalone HTTP PUT. This enables delivery to E2EE rooms where
   the raw HTTP path cannot encrypt. Falls back to standalone on failure.
   Threads adapters + event loop from gateway -> cron ticker -> tick() ->
   _deliver_result(). (from #3767)

2. HTML formatted_body: _send_matrix() now converts markdown to HTML
   using the optional markdown library, with h1-h6 to bold conversion
   for Element X compatibility. Falls back to plain text if markdown
   is not installed. Also adds random bytes to txn_id to prevent
   collisions. (from #5236)

3. Origin fallback: when deliver="origin" but origin is null (jobs
   created via API/scripts), falls back to HOME_CHANNEL env vars
   in order: matrix -> telegram -> discord -> slack. (from #2641)

											
										
										
											2026-04-05 10:52:29 -07:00
+								            cron_tick(verbose=False, adapters=adapters, loop=loop)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        except Exception as e:
 								            logger.debug("Cron tick error: %s", e)
 								        tick_count += 1
 								        if tick_count % CHANNEL_DIR_EVERY == 0 and adapters:
 								            try:
 								                from gateway.channel_directory import build_channel_directory
 								                build_channel_directory(adapters)
 								            except Exception as e:
 								                logger.debug("Channel directory refresh error: %s", e)
 								        if tick_count % IMAGE_CACHE_EVERY == 0:
 								            try:
 								                removed = cleanup_image_cache(max_age_hours=24)
 								                if removed:
 								                    logger.info("Image cache cleanup: removed %d stale file(s)", removed)
 								            except Exception as e:
 								                logger.debug("Image cache cleanup error: %s", e)
 								            try:
 								                removed = cleanup_document_cache(max_age_hours=24)
 								                if removed:
 								                    logger.info("Document cache cleanup: removed %d stale file(s)", removed)
 								            except Exception as e:
 								                logger.debug("Document cache cleanup error: %s", e)
 								        stop_event.wait(timeout=interval)
 								    logger.info("Cron ticker stopped")
-												fix(gateway): wire -v/-q flags to stderr logging

By default 'hermes gateway run' now prints WARNING+ to stderr so
connection errors and startup failures are visible in the terminal
without having to tail ~/.hermes/logs/gateway.log.

- gateway/run.py: start_gateway() accepts verbosity: Optional[int]=0.
  When not None, attaches a StreamHandler to stderr with level mapped
  from the count (0=WARNING, 1=INFO, 2+=DEBUG). Root logger level is
  also lowered when DEBUG is requested so records are not swallowed.

- hermes_cli/gateway.py: run_gateway() gains verbose: int and
  quiet: bool params. -q translates to verbosity=None (no stderr
  handler). Wired through gateway_command().

- hermes_cli/main.py: -v changed from store_true to action=count so
  -v/-vv/-vvv each increment the level. -q/--quiet added as a new flag.

Behaviour summary:
  hermes gateway run        -> WARNING+ on stderr (default)
  hermes gateway run -q     -> silent
  hermes gateway run -v     -> INFO+
  hermes gateway run -vv    -> DEBUG

											
										
										
											2026-04-01 11:27:23 -03:00
+								async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool = False, verbosity: Optional[int] = 0) -> bool:
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								    """
 								    Start the gateway and run until interrupted.
 								    This is the main entry point for running the gateway.
 								    Returns True if the gateway ran successfully, False if it failed to start.
 								    A False return causes a non-zero exit code so systemd can auto-restart.
 								    Args:
 								        config: Optional gateway configuration override.
 								        replace: If True, kill any existing gateway instance before starting.
 								                 Useful for systemd services to avoid restart-loop deadlocks
 								                 when the previous process hasn't fully exited yet.
 								    """
 								    # ── Duplicate-instance guard ──────────────────────────────────────
 								    # Prevent two gateways from running under the same HERMES_HOME.
 								    # The PID file is scoped to HERMES_HOME, so future multi-profile
 								    # setups (each profile using a distinct HERMES_HOME) will naturally
 								    # allow concurrent instances without tripping this guard.
 								    import time as _time
-												fix(gateway): implement platform-aware PID termination

											
										
										
											2026-04-09 23:48:46 +03:00
+								    from gateway.status import get_running_pid, remove_pid_file, terminate_pid
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								    existing_pid = get_running_pid()
 								    if existing_pid is not None and existing_pid != os.getpid():
 								        if replace:
 								            logger.info(
 								                "Replacing existing gateway instance (PID %d) with --replace.",
 								                existing_pid,
 								            )
 								            try:
-												fix(gateway): implement platform-aware PID termination

											
										
										
											2026-04-09 23:48:46 +03:00
+								                terminate_pid(existing_pid, force=False)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            except ProcessLookupError:
 								                pass  # Already gone
-												fix(gateway): implement platform-aware PID termination

											
										
										
											2026-04-09 23:48:46 +03:00
+								            except (PermissionError, OSError):
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                logger.error(
 								                    "Permission denied killing PID %d. Cannot replace.",
 								                    existing_pid,
 								                )
 								                return False
 								            # Wait up to 10 seconds for the old process to exit
 								            for _ in range(20):
 								                try:
 								                    os.kill(existing_pid, 0)
 								                    _time.sleep(0.5)
 								                except (ProcessLookupError, PermissionError):
 								                    break  # Process is gone
 								            else:
 								                # Still alive after 10s — force kill
 								                logger.warning(
 								                    "Old gateway (PID %d) did not exit after SIGTERM, sending SIGKILL.",
 								                    existing_pid,
 								                )
 								                try:
-												fix(gateway): implement platform-aware PID termination

											
										
										
											2026-04-09 23:48:46 +03:00
+								                    terminate_pid(existing_pid, force=True)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                    _time.sleep(0.5)
-												fix(gateway): implement platform-aware PID termination

											
										
										
											2026-04-09 23:48:46 +03:00
+								                except (ProcessLookupError, PermissionError, OSError):
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								                    pass
 								            remove_pid_file()
-												fix(gateway): detect stopped processes and release stale locks on --replace

											
										
										
											2026-03-21 18:13:53 -07:00
+								            # Also release all scoped locks left by the old process.
 								            # Stopped (Ctrl+Z) processes don't release locks on exit,
 								            # leaving stale lock files that block the new gateway from starting.
 								            try:
 								                from gateway.status import release_all_scoped_locks
 								                _released = release_all_scoped_locks()
 								                if _released:
 								                    logger.info("Released %d stale scoped lock(s) from old gateway.", _released)
 								            except Exception:
 								                pass
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        else:
-												refactor: consolidate get_hermes_home() and parse_reasoning_effort() (#3062)

Centralizes two widely-duplicated patterns into hermes_constants.py:

1. get_hermes_home() — Path resolution for ~/.hermes (HERMES_HOME env var)
   - Was copy-pasted inline across 30+ files as:
     Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
   - Now defined once in hermes_constants.py (zero-dependency module)
   - hermes_cli/config.py re-exports it for backward compatibility
   - Removed local wrapper functions in honcho_integration/client.py,
     tools/website_policy.py, tools/tirith_security.py, hermes_cli/uninstall.py

2. parse_reasoning_effort() — Reasoning effort string validation
   - Was copy-pasted in cli.py, gateway/run.py, cron/scheduler.py
   - Same validation logic: check against (xhigh, high, medium, low, minimal, none)
   - Now defined once in hermes_constants.py, called from all 3 locations
   - Warning log for unknown values kept at call sites (context-specific)

31 files changed, net +31 lines (125 insertions, 94 deletions)
Full test suite: 6179 passed, 0 failed
											
										
										
											2026-03-25 15:54:28 -07:00
+								            hermes_home = str(get_hermes_home())
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            logger.error(
 								                "Another gateway instance is already running (PID %d, HERMES_HOME=%s). "
 								                "Use 'hermes gateway restart' to replace it, or 'hermes gateway stop' first.",
 								                existing_pid, hermes_home,
 								            )
 								            print(
 								                f"\n❌ Gateway already running (PID {existing_pid}).\n"
 								                f"   Use 'hermes gateway restart' to replace it,\n"
 								                f"   or 'hermes gateway stop' to kill it first.\n"
 								                f"   Or use 'hermes gateway run --replace' to auto-replace.\n"
 								            )
 								            return False
 								    # Sync bundled skills on gateway start (fast -- skips unchanged)
 								    try:
 								        from tools.skills_sync import sync_skills
 								        sync_skills(quiet=True)
 								    except Exception:
 								        pass
-												feat: component-separated logging with session context and filtering (#7991)

* feat: component-separated logging with session context and filtering

Phase 1 — Gateway log isolation:
- gateway.log now only receives records from gateway.* loggers
  (platform adapters, session management, slash commands, delivery)
- agent.log remains the catch-all (all components)
- errors.log remains WARNING+ catch-all
- Moved gateway.log handler creation from gateway/run.py into
  hermes_logging.setup_logging(mode='gateway') with _ComponentFilter

Phase 2 — Session ID injection:
- Added set_session_context(session_id) / clear_session_context() API
  using threading.local() for per-thread session tracking
- _SessionFilter enriches every log record with session_tag attribute
- Log format: '2026-04-11 10:23:45 INFO [session_id] logger.name: msg'
- Session context set at start of run_conversation() in run_agent.py
- Thread-isolated: gateway conversations on different threads don't leak

Phase 3 — Component filtering in hermes logs:
- Added --component flag: hermes logs --component gateway|agent|tools|cli|cron
- COMPONENT_PREFIXES maps component names to logger name prefixes
- Works with all existing filters (--level, --session, --since, -f)
- Logger name extraction handles both old and new log formats

Files changed:
- hermes_logging.py: _SessionFilter, _ComponentFilter, COMPONENT_PREFIXES,
  set/clear_session_context(), gateway.log creation in setup_logging()
- gateway/run.py: removed redundant gateway.log handler (now in hermes_logging)
- run_agent.py: set_session_context() at start of run_conversation()
- hermes_cli/logs.py: --component filter, logger name extraction
- hermes_cli/main.py: --component argument on logs subparser

Addresses community request for component-separated, filterable logging.
Zero changes to existing logger names — __name__ already provides hierarchy.

* fix: use LogRecord factory instead of per-handler _SessionFilter

The _SessionFilter approach required attaching a filter to every handler
we create. Any handler created outside our _add_rotating_handler (like
the gateway stderr handler, or third-party handlers) would crash with
KeyError: 'session_tag' if it used our format string.

Replace with logging.setLogRecordFactory() which injects session_tag
into every LogRecord at creation time — process-global, zero per-handler
wiring needed. The factory is installed at import time (before
setup_logging) so session_tag is available from the moment hermes_logging
is imported.

- Idempotent: marker attribute prevents double-wrapping on module reload
- Chains with existing factory: won't break third-party record factories
- Removes _SessionFilter from _add_rotating_handler and setup_verbose_logging
- Adds tests: record factory injection, idempotency, arbitrary handler compat
											
										
										
											2026-04-11 17:23:36 -07:00
+								    # Centralized logging — agent.log (INFO+), errors.log (WARNING+),
 								    # and gateway.log (INFO+, gateway-component records only).
-												feat: centralized logging, instrumentation, hermes logs CLI, gateway noise fix (#5430)

Adds comprehensive logging infrastructure to Hermes Agent across 4 phases:

**Phase 1 — Centralized logging**
- New hermes_logging.py with idempotent setup_logging() used by CLI, gateway, and cron
- agent.log (INFO+) and errors.log (WARNING+) with RotatingFileHandler + RedactingFormatter
- config.yaml logging: section (level, max_size_mb, backup_count)
- All entry points wired (cli.py, main.py, gateway/run.py, run_agent.py)
- Fixed debug_helpers.py writing to ./logs/ instead of ~/.hermes/logs/

**Phase 2 — Event instrumentation**
- API calls: model, provider, tokens, latency, cache hit %
- Tool execution: name, duration, result size (both sequential + concurrent)
- Session lifecycle: turn start (session/model/provider/platform), compression (before/after)
- Credential pool: rotation events, exhaustion tracking

**Phase 3 — hermes logs CLI command**
- hermes logs / hermes logs -f / hermes logs errors / hermes logs gateway
- --level, --session, --since filters
- hermes logs list (file sizes + ages)

**Phase 4 — Gateway bug fix + noise reduction**
- fix: _async_flush_memories() called with wrong arg count — sessions never flushed
- Batched session expiry logs: 6 lines/cycle → 2 summary lines
- Added inbound message + response time logging

75 new tests, zero regressions on the full suite.
											
										
										
											2026-04-06 00:08:20 -07:00
+								    # Idempotent, so repeated calls from AIAgent.__init__ won't duplicate.
 								    from hermes_logging import setup_logging
-												feat: component-separated logging with session context and filtering (#7991)

* feat: component-separated logging with session context and filtering

Phase 1 — Gateway log isolation:
- gateway.log now only receives records from gateway.* loggers
  (platform adapters, session management, slash commands, delivery)
- agent.log remains the catch-all (all components)
- errors.log remains WARNING+ catch-all
- Moved gateway.log handler creation from gateway/run.py into
  hermes_logging.setup_logging(mode='gateway') with _ComponentFilter

Phase 2 — Session ID injection:
- Added set_session_context(session_id) / clear_session_context() API
  using threading.local() for per-thread session tracking
- _SessionFilter enriches every log record with session_tag attribute
- Log format: '2026-04-11 10:23:45 INFO [session_id] logger.name: msg'
- Session context set at start of run_conversation() in run_agent.py
- Thread-isolated: gateway conversations on different threads don't leak

Phase 3 — Component filtering in hermes logs:
- Added --component flag: hermes logs --component gateway|agent|tools|cli|cron
- COMPONENT_PREFIXES maps component names to logger name prefixes
- Works with all existing filters (--level, --session, --since, -f)
- Logger name extraction handles both old and new log formats

Files changed:
- hermes_logging.py: _SessionFilter, _ComponentFilter, COMPONENT_PREFIXES,
  set/clear_session_context(), gateway.log creation in setup_logging()
- gateway/run.py: removed redundant gateway.log handler (now in hermes_logging)
- run_agent.py: set_session_context() at start of run_conversation()
- hermes_cli/logs.py: --component filter, logger name extraction
- hermes_cli/main.py: --component argument on logs subparser

Addresses community request for component-separated, filterable logging.
Zero changes to existing logger names — __name__ already provides hierarchy.

* fix: use LogRecord factory instead of per-handler _SessionFilter

The _SessionFilter approach required attaching a filter to every handler
we create. Any handler created outside our _add_rotating_handler (like
the gateway stderr handler, or third-party handlers) would crash with
KeyError: 'session_tag' if it used our format string.

Replace with logging.setLogRecordFactory() which injects session_tag
into every LogRecord at creation time — process-global, zero per-handler
wiring needed. The factory is installed at import time (before
setup_logging) so session_tag is available from the moment hermes_logging
is imported.

- Idempotent: marker attribute prevents double-wrapping on module reload
- Chains with existing factory: won't break third-party record factories
- Removes _SessionFilter from _add_rotating_handler and setup_verbose_logging
- Adds tests: record factory injection, idempotency, arbitrary handler compat
											
										
										
											2026-04-11 17:23:36 -07:00
+								    setup_logging(hermes_home=_hermes_home, mode="gateway")
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
-												fix(gateway): wire -v/-q flags to stderr logging

By default 'hermes gateway run' now prints WARNING+ to stderr so
connection errors and startup failures are visible in the terminal
without having to tail ~/.hermes/logs/gateway.log.

- gateway/run.py: start_gateway() accepts verbosity: Optional[int]=0.
  When not None, attaches a StreamHandler to stderr with level mapped
  from the count (0=WARNING, 1=INFO, 2+=DEBUG). Root logger level is
  also lowered when DEBUG is requested so records are not swallowed.

- hermes_cli/gateway.py: run_gateway() gains verbose: int and
  quiet: bool params. -q translates to verbosity=None (no stderr
  handler). Wired through gateway_command().

- hermes_cli/main.py: -v changed from store_true to action=count so
  -v/-vv/-vvv each increment the level. -q/--quiet added as a new flag.

Behaviour summary:
  hermes gateway run        -> WARNING+ on stderr (default)
  hermes gateway run -q     -> silent
  hermes gateway run -v     -> INFO+
  hermes gateway run -vv    -> DEBUG

											
										
										
											2026-04-01 11:27:23 -03:00
+								    # Optional stderr handler — level driven by -v/-q flags on the CLI.
 								    # verbosity=None (-q/--quiet): no stderr output
 								    # verbosity=0    (default):    WARNING and above
 								    # verbosity=1    (-v):         INFO and above
 								    # verbosity=2+   (-vv/-vvv):   DEBUG
 								    if verbosity is not None:
-												fix(gateway): add missing RedactingFormatter import

The gateway startup path references RedactingFormatter without
importing it, causing a NameError crash when launched with a
verbosity flag (e.g. via launchd --replace).

Fixes #8044

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-04-12 09:18:57 +08:00
+								        from agent.redact import RedactingFormatter
-												fix(gateway): wire -v/-q flags to stderr logging

By default 'hermes gateway run' now prints WARNING+ to stderr so
connection errors and startup failures are visible in the terminal
without having to tail ~/.hermes/logs/gateway.log.

- gateway/run.py: start_gateway() accepts verbosity: Optional[int]=0.
  When not None, attaches a StreamHandler to stderr with level mapped
  from the count (0=WARNING, 1=INFO, 2+=DEBUG). Root logger level is
  also lowered when DEBUG is requested so records are not swallowed.

- hermes_cli/gateway.py: run_gateway() gains verbose: int and
  quiet: bool params. -q translates to verbosity=None (no stderr
  handler). Wired through gateway_command().

- hermes_cli/main.py: -v changed from store_true to action=count so
  -v/-vv/-vvv each increment the level. -q/--quiet added as a new flag.

Behaviour summary:
  hermes gateway run        -> WARNING+ on stderr (default)
  hermes gateway run -q     -> silent
  hermes gateway run -v     -> INFO+
  hermes gateway run -vv    -> DEBUG

											
										
										
											2026-04-01 11:27:23 -03:00
+								        _stderr_level = {0: logging.WARNING, 1: logging.INFO}.get(verbosity, logging.DEBUG)
 								        _stderr_handler = logging.StreamHandler()
 								        _stderr_handler.setLevel(_stderr_level)
-												fix: use RedactingFormatter on stderr handler, update types and test mock

- stderr handler now uses RedactingFormatter to match file handlers
- restart path uses verbose=0 (int) instead of verbose=False (bool)
- test mock updated with new run_gateway(verbose, quiet, replace) signature

											
										
										
											2026-04-01 10:56:12 -07:00
+								        _stderr_handler.setFormatter(RedactingFormatter('%(levelname)s %(name)s: %(message)s'))
-												fix(gateway): wire -v/-q flags to stderr logging

By default 'hermes gateway run' now prints WARNING+ to stderr so
connection errors and startup failures are visible in the terminal
without having to tail ~/.hermes/logs/gateway.log.

- gateway/run.py: start_gateway() accepts verbosity: Optional[int]=0.
  When not None, attaches a StreamHandler to stderr with level mapped
  from the count (0=WARNING, 1=INFO, 2+=DEBUG). Root logger level is
  also lowered when DEBUG is requested so records are not swallowed.

- hermes_cli/gateway.py: run_gateway() gains verbose: int and
  quiet: bool params. -q translates to verbosity=None (no stderr
  handler). Wired through gateway_command().

- hermes_cli/main.py: -v changed from store_true to action=count so
  -v/-vv/-vvv each increment the level. -q/--quiet added as a new flag.

Behaviour summary:
  hermes gateway run        -> WARNING+ on stderr (default)
  hermes gateway run -q     -> silent
  hermes gateway run -v     -> INFO+
  hermes gateway run -vv    -> DEBUG

											
										
										
											2026-04-01 11:27:23 -03:00
+								        logging.getLogger().addHandler(_stderr_handler)
 								        # Lower root logger level if needed so DEBUG records can reach the handler
 								        if _stderr_level < logging.getLogger().level:
 								            logging.getLogger().setLevel(_stderr_level)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								    runner = GatewayRunner(config)
-												fix: gateway auto-recovers from unexpected SIGTERM via systemd (#5646)

Root cause: when the gateway received SIGTERM (from hermes update,
external kill, WSL2 runtime, etc.), it exited with status 0. systemd's
Restart=on-failure only restarts on non-zero exit, so the gateway
stayed dead permanently. Users had to manually restart.

Fix 1: Signal-initiated shutdown exits non-zero
When SIGTERM/SIGINT is received and no restart was requested (via
/restart, /update, or SIGUSR1), start_gateway() returns False which
causes sys.exit(1). systemd sees a failure exit and auto-restarts
after RestartSec=30.

This is safe because systemctl stop tracks its own stop-requested
state independently of exit code — Restart= never fires for a
deliberate stop, regardless of exit code.

Also logs 'Received SIGTERM/SIGINT — initiating shutdown' so the
cause of unexpected shutdowns is visible in agent.log.

Fix 2: PID file ownership guard
remove_pid_file() now checks that the PID file belongs to the current
process before removing it. During --replace handoffs, the old
process's atexit handler could fire AFTER the new process wrote its
PID file, deleting the new record. This left the gateway running but
invisible to get_running_pid(), causing 'Another gateway already
running' errors on next restart.

Test plan:
- All restart drain tests pass (13)
- All gateway service tests pass (84)
- All update gateway restart tests pass (34)

											
										
										
											2026-04-14 14:34:34 -07:00
+								    # Track whether a signal initiated the shutdown (vs. internal request).
 								    # When an unexpected SIGTERM kills the gateway, we exit non-zero so
 								    # systemd's Restart=on-failure revives the process.  systemctl stop
 								    # is safe: systemd tracks stop-requested state independently of exit
 								    # code, so Restart= never fires for a deliberate stop.
 								    _signal_initiated_shutdown = False
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								    # Set up signal handlers
-												fix(gateway): drain in-flight work before restart

											
										
										
											2026-04-10 10:19:17 -07:00
+								    def shutdown_signal_handler():
-												fix: gateway auto-recovers from unexpected SIGTERM via systemd (#5646)

Root cause: when the gateway received SIGTERM (from hermes update,
external kill, WSL2 runtime, etc.), it exited with status 0. systemd's
Restart=on-failure only restarts on non-zero exit, so the gateway
stayed dead permanently. Users had to manually restart.

Fix 1: Signal-initiated shutdown exits non-zero
When SIGTERM/SIGINT is received and no restart was requested (via
/restart, /update, or SIGUSR1), start_gateway() returns False which
causes sys.exit(1). systemd sees a failure exit and auto-restarts
after RestartSec=30.

This is safe because systemctl stop tracks its own stop-requested
state independently of exit code — Restart= never fires for a
deliberate stop, regardless of exit code.

Also logs 'Received SIGTERM/SIGINT — initiating shutdown' so the
cause of unexpected shutdowns is visible in agent.log.

Fix 2: PID file ownership guard
remove_pid_file() now checks that the PID file belongs to the current
process before removing it. During --replace handoffs, the old
process's atexit handler could fire AFTER the new process wrote its
PID file, deleting the new record. This left the gateway running but
invisible to get_running_pid(), causing 'Another gateway already
running' errors on next restart.

Test plan:
- All restart drain tests pass (13)
- All gateway service tests pass (84)
- All update gateway restart tests pass (34)

											
										
										
											2026-04-14 14:34:34 -07:00
+								        nonlocal _signal_initiated_shutdown
 								        _signal_initiated_shutdown = True
 								        logger.info("Received SIGTERM/SIGINT — initiating shutdown")
-												diag: log all hermes processes on unexpected gateway shutdown (#9905)

When the gateway receives SIGTERM/SIGINT, the shutdown handler now
runs 'ps aux' and logs every hermes/gateway-related process (excluding
itself). This will show in agent.log as:

  WARNING: Shutdown diagnostic — other hermes processes running:
    hermes  1234 ... hermes update --gateway
    hermes  5678 ... hermes gateway restart

This is the missing diagnostic for #5646 / #6666 — we can prove
the restarts are from systemctl but can't determine WHO issues the
systemctl command. Next time it happens, the agent.log will contain
the evidence (the process that sent the signal or called systemctl
should still be alive when the handler fires).
											
										
										
											2026-04-14 16:26:36 -07:00
+								        # Diagnostic: log all hermes-related processes so we can identify
 								        # what triggered the signal (hermes update, hermes gateway restart,
 								        # a stale detached subprocess, etc.).
 								        try:
 								            import subprocess as _sp
 								            _ps = _sp.run(
 								                ["ps", "aux"],
 								                capture_output=True, text=True, timeout=3,
 								            )
 								            _hermes_procs = [
 								                line for line in _ps.stdout.splitlines()
 								                if ("hermes" in line.lower() or "gateway" in line.lower())
 								                and str(os.getpid()) not in line.split()[1:2]  # exclude self
 								            ]
 								            if _hermes_procs:
 								                logger.warning(
 								                    "Shutdown diagnostic — other hermes processes running:\n  %s",
 								                    "\n  ".join(_hermes_procs),
 								                )
 								            else:
 								                logger.info("Shutdown diagnostic — no other hermes processes found")
 								        except Exception:
 								            pass
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        asyncio.create_task(runner.stop())
-												fix(gateway): drain in-flight work before restart

											
										
										
											2026-04-10 10:19:17 -07:00
 								    def restart_signal_handler():
 								        runner.request_restart(detached=False, via_service=True)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
-												chore(gateway): replace deprecated asyncio.get_event_loop() with get_running_loop() (#11005)

All 10 call sites in gateway/run.py and gateway/platforms/api_server.py
are inside async functions where a loop is guaranteed to be running.

get_event_loop() is deprecated since Python 3.10 — it can silently
create a new loop when none is running, masking bugs.
get_running_loop() raises RuntimeError instead, which is safer.

Surfaced during review of PRs #10533 and #10647.

Co-authored-by: kshitijk4poor <kshitijk4poor@users.noreply.github.com>
											
										
										
											2026-04-16 05:13:39 -07:00
+								    loop = asyncio.get_running_loop()
-												fix(discord): voice session continuity and signal handler thread safety

- Store source metadata on /voice channel join so voice input shares the
  same session as the linked text channel conversation
- Treat voice-linked text channels as free-response (skip @mention and
  auto-thread) while voice is active
- Scope the voice-linked exemption to the exact bound channel, not
  sibling threads
- Guard signal handler registration in start_gateway() for non-main
  threads (prevents RuntimeError when gateway runs in a daemon thread)
- Clean up _voice_sources on leave_voice_channel

Salvaged from PR #3475 by twilwa (Modal runtime portions excluded).

											
										
										
											2026-04-13 04:47:11 -07:00
+								    if threading.current_thread() is threading.main_thread():
 								        for sig in (signal.SIGINT, signal.SIGTERM):
 								            try:
 								                loop.add_signal_handler(sig, shutdown_signal_handler)
 								            except NotImplementedError:
 								                pass
 								        if hasattr(signal, "SIGUSR1"):
 								            try:
 								                loop.add_signal_handler(signal.SIGUSR1, restart_signal_handler)
 								            except NotImplementedError:
 								                pass
 								    else:
 								        logger.info("Skipping signal handlers (not running in main thread).")
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
 								    # Start the gateway
 								    success = await runner.start()
 								    if not success:
 								        return False
-												fix: preserve current gateway update and startup behavior

Follow up on salvaged PR #1052.
Restore current-main gateway lifecycle handling after conflict resolution and
adapt the update fallback to use shell-quoted argv parts safely.

											
										
										
											2026-03-14 18:03:50 -07:00
+								    if runner.should_exit_cleanly:
 								        if runner.exit_reason:
 								            logger.error("Gateway exiting cleanly: %s", runner.exit_reason)
 								        return True
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
 								    # Write PID file so CLI can detect gateway is running
 								    import atexit
 								    from gateway.status import write_pid_file, remove_pid_file
 								    write_pid_file()
 								    atexit.register(remove_pid_file)
-												fix(matrix): E2EE cron delivery via live adapter + HTML formatting + origin fallback

Salvaged from PRs #3767 (chalkers), #5236 (ygd58), #2641 (buntingszn).

Three improvements to Matrix cron delivery:

1. Live adapter path: when the gateway is running, cron delivery now uses
   the connected MatrixAdapter via run_coroutine_threadsafe instead of
   the standalone HTTP PUT. This enables delivery to E2EE rooms where
   the raw HTTP path cannot encrypt. Falls back to standalone on failure.
   Threads adapters + event loop from gateway -> cron ticker -> tick() ->
   _deliver_result(). (from #3767)

2. HTML formatted_body: _send_matrix() now converts markdown to HTML
   using the optional markdown library, with h1-h6 to bold conversion
   for Element X compatibility. Falls back to plain text if markdown
   is not installed. Also adds random bytes to txn_id to prevent
   collisions. (from #5236)

3. Origin fallback: when deliver="origin" but origin is null (jobs
   created via API/scripts), falls back to HOME_CHANNEL env vars
   in order: matrix -> telegram -> discord -> slack. (from #2641)

											
										
										
											2026-04-05 10:52:29 -07:00
+								    # Start background cron ticker so scheduled jobs fire automatically.
 								    # Pass the event loop so cron delivery can use live adapters (E2EE support).
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								    cron_stop = threading.Event()
 								    cron_thread = threading.Thread(
 								        target=_start_cron_ticker,
 								        args=(cron_stop,),
-												fix(matrix): E2EE cron delivery via live adapter + HTML formatting + origin fallback

Salvaged from PRs #3767 (chalkers), #5236 (ygd58), #2641 (buntingszn).

Three improvements to Matrix cron delivery:

1. Live adapter path: when the gateway is running, cron delivery now uses
   the connected MatrixAdapter via run_coroutine_threadsafe instead of
   the standalone HTTP PUT. This enables delivery to E2EE rooms where
   the raw HTTP path cannot encrypt. Falls back to standalone on failure.
   Threads adapters + event loop from gateway -> cron ticker -> tick() ->
   _deliver_result(). (from #3767)

2. HTML formatted_body: _send_matrix() now converts markdown to HTML
   using the optional markdown library, with h1-h6 to bold conversion
   for Element X compatibility. Falls back to plain text if markdown
   is not installed. Also adds random bytes to txn_id to prevent
   collisions. (from #5236)

3. Origin fallback: when deliver="origin" but origin is null (jobs
   created via API/scripts), falls back to HOME_CHANNEL env vars
   in order: matrix -> telegram -> discord -> slack. (from #2641)

											
										
										
											2026-04-05 10:52:29 -07:00
+								        kwargs={"adapters": runner.adapters, "loop": asyncio.get_running_loop()},
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        daemon=True,
 								        name="cron-ticker",
 								    )
 								    cron_thread.start()
 								    # Wait for shutdown
 								    await runner.wait_for_shutdown()
-												fix(gateway): restart on whatsapp bridge child exit (#2334)

Co-authored-by: Frederico Ribeiro <fr@tecompanytea.com>
											
										
										
											2026-03-21 09:38:52 -07:00
 								    if runner.should_exit_with_failure:
 								        if runner.exit_reason:
 								            logger.error("Gateway exiting with failure: %s", runner.exit_reason)
 								        return False
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
 								    # Stop cron ticker cleanly
 								    cron_stop.set()
 								    cron_thread.join(timeout=5)
 								    # Close MCP server connections
 								    try:
 								        from tools.mcp_tool import shutdown_mcp_servers
 								        shutdown_mcp_servers()
 								    except Exception:
 								        pass
-												fix(gateway): drain in-flight work before restart

											
										
										
											2026-04-10 10:19:17 -07:00
+								    if runner.exit_code is not None:
 								        raise SystemExit(runner.exit_code)
-												fix: gateway auto-recovers from unexpected SIGTERM via systemd (#5646)

Root cause: when the gateway received SIGTERM (from hermes update,
external kill, WSL2 runtime, etc.), it exited with status 0. systemd's
Restart=on-failure only restarts on non-zero exit, so the gateway
stayed dead permanently. Users had to manually restart.

Fix 1: Signal-initiated shutdown exits non-zero
When SIGTERM/SIGINT is received and no restart was requested (via
/restart, /update, or SIGUSR1), start_gateway() returns False which
causes sys.exit(1). systemd sees a failure exit and auto-restarts
after RestartSec=30.

This is safe because systemctl stop tracks its own stop-requested
state independently of exit code — Restart= never fires for a
deliberate stop, regardless of exit code.

Also logs 'Received SIGTERM/SIGINT — initiating shutdown' so the
cause of unexpected shutdowns is visible in agent.log.

Fix 2: PID file ownership guard
remove_pid_file() now checks that the PID file belongs to the current
process before removing it. During --replace handoffs, the old
process's atexit handler could fire AFTER the new process wrote its
PID file, deleting the new record. This left the gateway running but
invisible to get_running_pid(), causing 'Another gateway already
running' errors on next restart.

Test plan:
- All restart drain tests pass (13)
- All gateway service tests pass (84)
- All update gateway restart tests pass (34)

											
										
										
											2026-04-14 14:34:34 -07:00
+								    # When a signal (SIGTERM/SIGINT) caused the shutdown and it wasn't a
 								    # planned restart (/restart, /update, SIGUSR1), exit non-zero so
 								    # systemd's Restart=on-failure revives the process.  This covers:
 								    #   - hermes update killing the gateway mid-work
 								    #   - External kill commands
 								    #   - WSL2/container runtime sending unexpected signals
 								    # systemctl stop is safe: systemd tracks "stop requested" state
 								    # independently of exit code, so Restart= never fires for it.
 								    if _signal_initiated_shutdown and not runner._restart_requested:
 								        logger.info(
 								            "Exiting with code 1 (signal-initiated shutdown without restart "
 								            "request) so systemd Restart=on-failure can revive the gateway."
 								        )
 								        return False  # → sys.exit(1) in the caller
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								    return True
 								def main():
 								    """CLI entry point for the gateway."""
 								    import argparse
 								    parser = argparse.ArgumentParser(description="Hermes Gateway - Multi-platform messaging")
 								    parser.add_argument("--config", "-c", help="Path to gateway config file")
 								    parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output")
 								    args = parser.parse_args()
 								    config = None
 								    if args.config:
-												fix: enforce TTL in MessageDeduplicator + use yaml for gateway --config (#10306, #10216) (#10509)

Two gateway fixes:

1. MessageDeduplicator.is_duplicate() now checks TTL at query time (#10306)

   Previously, is_duplicate() returned True for any previously seen ID
   without checking its age — expired entries were only purged when cache
   size exceeded max_size.  On normal workloads that never overflow, message
   IDs stayed deduplicated forever instead of expiring after the TTL.

   Fix: check `now - timestamp < ttl` before returning True.  Expired
   entries are removed and treated as new messages.

2. Gateway --config flag now uses yaml.safe_load() (#10216)

   The --config CLI flag in gateway/run.py main() used json.load() to
   parse config files.  YAML is the only documented config format and
   every other config loader uses yaml.safe_load().  A YAML config file
   passed via --config would crash with json.JSONDecodeError.

Closes #10306
Closes #10216
											
										
										
											2026-04-15 13:35:40 -07:00
+								        import yaml
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        with open(args.config, encoding="utf-8") as f:
-												fix: enforce TTL in MessageDeduplicator + use yaml for gateway --config (#10306, #10216) (#10509)

Two gateway fixes:

1. MessageDeduplicator.is_duplicate() now checks TTL at query time (#10306)

   Previously, is_duplicate() returned True for any previously seen ID
   without checking its age — expired entries were only purged when cache
   size exceeded max_size.  On normal workloads that never overflow, message
   IDs stayed deduplicated forever instead of expiring after the TTL.

   Fix: check `now - timestamp < ttl` before returning True.  Expired
   entries are removed and treated as new messages.

2. Gateway --config flag now uses yaml.safe_load() (#10216)

   The --config CLI flag in gateway/run.py main() used json.load() to
   parse config files.  YAML is the only documented config format and
   every other config loader uses yaml.safe_load().  A YAML config file
   passed via --config would crash with json.JSONDecodeError.

Closes #10306
Closes #10216
											
										
										
											2026-04-15 13:35:40 -07:00
+								            data = yaml.safe_load(f)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								            config = GatewayConfig.from_dict(data)
 								    # Run the gateway - exit with code 1 if no platforms connected,
 								    # so systemd Restart=on-failure will retry on transient errors (e.g. DNS)
 								    success = asyncio.run(start_gateway(config))
 								    if not success:
 								        sys.exit(1)
 								if __name__ == "__main__":
 								    main()