gateway/run.py

"""
Gateway runner - entry point for messaging platform integrations.

This module provides:
- start_gateway(): Start all configured platform adapters
- GatewayRunner: Main class managing the gateway lifecycle

Usage:
    # Start the gateway
    python -m gateway.run
    
    # Or from CLI
    python cli.py --gateway
"""

import asyncio
import logging
import os
import re
import sys
import signal
import threading
from logging.handlers import RotatingFileHandler
from pathlib import Path
from datetime import datetime
from typing import Dict, Optional, Any, List

# Add parent directory to path
sys.path.insert(0, str(Path(__file__).parent.parent))

# Resolve Hermes home directory (respects HERMES_HOME override)
_hermes_home = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))

# Load environment variables from ~/.hermes/.env first
from dotenv import load_dotenv
_env_path = _hermes_home / '.env'
if _env_path.exists():
    try:
        load_dotenv(_env_path, encoding="utf-8")
    except UnicodeDecodeError:
        load_dotenv(_env_path, encoding="latin-1")
# Also try project .env as fallback
load_dotenv()

# Bridge config.yaml values into the environment so os.getenv() picks them up.
# config.yaml is authoritative for terminal settings — overrides .env.
_config_path = _hermes_home / 'config.yaml'
if _config_path.exists():
    try:
        import yaml as _yaml
        with open(_config_path, encoding="utf-8") as _f:
            _cfg = _yaml.safe_load(_f) or {}
        # Top-level simple values (fallback only — don't override .env)
        for _key, _val in _cfg.items():
            if isinstance(_val, (str, int, float, bool)) and _key not in os.environ:
                os.environ[_key] = str(_val)
        # Terminal config is nested — bridge to TERMINAL_* env vars.
        # config.yaml overrides .env for these since it's the documented config path.
        _terminal_cfg = _cfg.get("terminal", {})
        if _terminal_cfg and isinstance(_terminal_cfg, dict):
            _terminal_env_map = {
                "backend": "TERMINAL_ENV",
                "cwd": "TERMINAL_CWD",
                "timeout": "TERMINAL_TIMEOUT",
                "lifetime_seconds": "TERMINAL_LIFETIME_SECONDS",
                "docker_image": "TERMINAL_DOCKER_IMAGE",
                "singularity_image": "TERMINAL_SINGULARITY_IMAGE",
                "modal_image": "TERMINAL_MODAL_IMAGE",
                "daytona_image": "TERMINAL_DAYTONA_IMAGE",
                "ssh_host": "TERMINAL_SSH_HOST",
                "ssh_user": "TERMINAL_SSH_USER",
                "ssh_port": "TERMINAL_SSH_PORT",
                "ssh_key": "TERMINAL_SSH_KEY",
                "container_cpu": "TERMINAL_CONTAINER_CPU",
                "container_memory": "TERMINAL_CONTAINER_MEMORY",
                "container_disk": "TERMINAL_CONTAINER_DISK",
                "container_persistent": "TERMINAL_CONTAINER_PERSISTENT",
                "docker_volumes": "TERMINAL_DOCKER_VOLUMES",
                "sandbox_dir": "TERMINAL_SANDBOX_DIR",
            }
            for _cfg_key, _env_var in _terminal_env_map.items():
                if _cfg_key in _terminal_cfg:
                    _val = _terminal_cfg[_cfg_key]
                    if isinstance(_val, list):
                        os.environ[_env_var] = json.dumps(_val)
                    else:
                        os.environ[_env_var] = str(_val)
        _compression_cfg = _cfg.get("compression", {})
        if _compression_cfg and isinstance(_compression_cfg, dict):
            _compression_env_map = {
                "enabled": "CONTEXT_COMPRESSION_ENABLED",
                "threshold": "CONTEXT_COMPRESSION_THRESHOLD",
                "summary_model": "CONTEXT_COMPRESSION_MODEL",
                "summary_provider": "CONTEXT_COMPRESSION_PROVIDER",
            }
            for _cfg_key, _env_var in _compression_env_map.items():
                if _cfg_key in _compression_cfg:
                    os.environ[_env_var] = str(_compression_cfg[_cfg_key])
        # Auxiliary model overrides (vision, web_extract).
        # Each task has provider + model; bridge non-default values to env vars.
        _auxiliary_cfg = _cfg.get("auxiliary", {})
        if _auxiliary_cfg and isinstance(_auxiliary_cfg, dict):
            _aux_task_env = {
                "vision":      ("AUXILIARY_VISION_PROVIDER",      "AUXILIARY_VISION_MODEL"),
                "web_extract": ("AUXILIARY_WEB_EXTRACT_PROVIDER",  "AUXILIARY_WEB_EXTRACT_MODEL"),
            }
            for _task_key, (_prov_env, _model_env) in _aux_task_env.items():
                _task_cfg = _auxiliary_cfg.get(_task_key, {})
                if not isinstance(_task_cfg, dict):
                    continue
                _prov = str(_task_cfg.get("provider", "")).strip()
                _model = str(_task_cfg.get("model", "")).strip()
                if _prov and _prov != "auto":
                    os.environ[_prov_env] = _prov
                if _model:
                    os.environ[_model_env] = _model
        _agent_cfg = _cfg.get("agent", {})
        if _agent_cfg and isinstance(_agent_cfg, dict):
            if "max_turns" in _agent_cfg:
                os.environ["HERMES_MAX_ITERATIONS"] = str(_agent_cfg["max_turns"])
        # Timezone: bridge config.yaml → HERMES_TIMEZONE env var.
        # HERMES_TIMEZONE from .env takes precedence (already in os.environ).
        _tz_cfg = _cfg.get("timezone", "")
        if _tz_cfg and isinstance(_tz_cfg, str) and "HERMES_TIMEZONE" not in os.environ:
            os.environ["HERMES_TIMEZONE"] = _tz_cfg.strip()
        # Security settings
        _security_cfg = _cfg.get("security", {})
        if isinstance(_security_cfg, dict):
            _redact = _security_cfg.get("redact_secrets")
            if _redact is not None:
                os.environ["HERMES_REDACT_SECRETS"] = str(_redact).lower()
    except Exception:
        pass  # Non-fatal; gateway can still run with .env values

# Gateway runs in quiet mode - suppress debug output and use cwd directly (no temp dirs)
os.environ["HERMES_QUIET"] = "1"

# Enable interactive exec approval for dangerous commands on messaging platforms
os.environ["HERMES_EXEC_ASK"] = "1"

# Set terminal working directory for messaging platforms.
# If the user set an explicit path in config.yaml (not "." or "auto"),
# respect it. Otherwise use MESSAGING_CWD or default to home directory.
_configured_cwd = os.environ.get("TERMINAL_CWD", "")
if not _configured_cwd or _configured_cwd in (".", "auto", "cwd"):
    messaging_cwd = os.getenv("MESSAGING_CWD") or str(Path.home())
    os.environ["TERMINAL_CWD"] = messaging_cwd

from gateway.config import (
    Platform,
    GatewayConfig,
    load_gateway_config,
)
from gateway.session import (
    SessionStore,
    SessionSource,
    SessionContext,
    build_session_context,
    build_session_context_prompt,
    build_session_key,
)
from gateway.delivery import DeliveryRouter, DeliveryTarget
from gateway.platforms.base import BasePlatformAdapter, MessageEvent, MessageType

logger = logging.getLogger(__name__)


def _resolve_runtime_agent_kwargs() -> dict:
    """Resolve provider credentials for gateway-created AIAgent instances."""
    from hermes_cli.runtime_provider import (
        resolve_runtime_provider,
        format_runtime_provider_error,
    )

    try:
        runtime = resolve_runtime_provider(
            requested=os.getenv("HERMES_INFERENCE_PROVIDER"),
        )
    except Exception as exc:
        raise RuntimeError(format_runtime_provider_error(exc)) from exc

    return {
        "api_key": runtime.get("api_key"),
        "base_url": runtime.get("base_url"),
        "provider": runtime.get("provider"),
        "api_mode": runtime.get("api_mode"),
    }


def _resolve_gateway_model() -> str:
    """Read model from env/config — mirrors the resolution in _run_agent_sync.

    Without this, temporary AIAgent instances (memory flush, /compress) fall
    back to the hardcoded default ("anthropic/claude-opus-4.6") which fails
    when the active provider is openai-codex.
    """
    model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") or "anthropic/claude-opus-4.6"
    try:
        import yaml as _y
        _cfg_path = _hermes_home / "config.yaml"
        if _cfg_path.exists():
            with open(_cfg_path, encoding="utf-8") as _f:
                _cfg = _y.safe_load(_f) or {}
            _model_cfg = _cfg.get("model", {})
            if isinstance(_model_cfg, str):
                model = _model_cfg
            elif isinstance(_model_cfg, dict):
                model = _model_cfg.get("default", model)
    except Exception:
        pass
    return model


class GatewayRunner:
    """
    Main gateway controller.
    
    Manages the lifecycle of all platform adapters and routes
    messages to/from the agent.
    """
    
    def __init__(self, config: Optional[GatewayConfig] = None):
        self.config = config or load_gateway_config()
        self.adapters: Dict[Platform, BasePlatformAdapter] = {}

        # Load ephemeral config from config.yaml / env vars.
        # Both are injected at API-call time only and never persisted.
        self._prefill_messages = self._load_prefill_messages()
        self._ephemeral_system_prompt = self._load_ephemeral_system_prompt()
        self._reasoning_config = self._load_reasoning_config()
        self._show_reasoning = self._load_show_reasoning()
        self._provider_routing = self._load_provider_routing()
        self._fallback_model = self._load_fallback_model()

        # Wire process registry into session store for reset protection
        from tools.process_registry import process_registry
        self.session_store = SessionStore(
            self.config.sessions_dir, self.config,
            has_active_processes_fn=lambda key: process_registry.has_active_for_session(key),
        )
        self.delivery_router = DeliveryRouter(self.config)
        self._running = False
        self._shutdown_event = asyncio.Event()
        
        # Track running agents per session for interrupt support
        # Key: session_key, Value: AIAgent instance
        self._running_agents: Dict[str, Any] = {}
        self._pending_messages: Dict[str, str] = {}  # Queued messages during interrupt
        
        # Track pending exec approvals per session
        # Key: session_key, Value: {"command": str, "pattern_key": str, ...}
        self._pending_approvals: Dict[str, Dict[str, Any]] = {}

        # Persistent Honcho managers keyed by gateway session key.
        # This preserves write_frequency="session" semantics across short-lived
        # per-message AIAgent instances.
        self._honcho_managers: Dict[str, Any] = {}
        self._honcho_configs: Dict[str, Any] = {}

        # Ensure tirith security scanner is available (downloads if needed)
        try:
            from tools.tirith_security import ensure_installed
            ensure_installed()
        except Exception:
            pass  # Non-fatal — fail-open at scan time if unavailable
        
        # Initialize session database for session_search tool support
        self._session_db = None
        try:
            from hermes_state import SessionDB
            self._session_db = SessionDB()
        except Exception as e:
            logger.debug("SQLite session store not available: %s", e)
        
        # DM pairing store for code-based user authorization
        from gateway.pairing import PairingStore
        self.pairing_store = PairingStore()
        
        # Event hook system
        from gateway.hooks import HookRegistry
        self.hooks = HookRegistry()

    def _get_or_create_gateway_honcho(self, session_key: str):
        """Return a persistent Honcho manager/config pair for this gateway session."""
        if not hasattr(self, "_honcho_managers"):
            self._honcho_managers = {}
        if not hasattr(self, "_honcho_configs"):
            self._honcho_configs = {}

        if session_key in self._honcho_managers:
            return self._honcho_managers[session_key], self._honcho_configs.get(session_key)

        try:
            from honcho_integration.client import HonchoClientConfig, get_honcho_client
            from honcho_integration.session import HonchoSessionManager

            hcfg = HonchoClientConfig.from_global_config()
            if not hcfg.enabled or not hcfg.api_key:
                return None, hcfg

            client = get_honcho_client(hcfg)
            manager = HonchoSessionManager(
                honcho=client,
                config=hcfg,
                context_tokens=hcfg.context_tokens,
            )
            self._honcho_managers[session_key] = manager
            self._honcho_configs[session_key] = hcfg
            return manager, hcfg
        except Exception as e:
            logger.debug("Gateway Honcho init failed for %s: %s", session_key, e)
            return None, None

    def _shutdown_gateway_honcho(self, session_key: str) -> None:
        """Flush and close the persistent Honcho manager for a gateway session."""
        managers = getattr(self, "_honcho_managers", None)
        configs = getattr(self, "_honcho_configs", None)
        if managers is None or configs is None:
            return

        manager = managers.pop(session_key, None)
        configs.pop(session_key, None)
        if not manager:
            return
        try:
            manager.shutdown()
        except Exception as e:
            logger.debug("Gateway Honcho shutdown failed for %s: %s", session_key, e)

    def _shutdown_all_gateway_honcho(self) -> None:
        """Flush and close all persistent Honcho managers."""
        managers = getattr(self, "_honcho_managers", None)
        if not managers:
            return
        for session_key in list(managers.keys()):
            self._shutdown_gateway_honcho(session_key)
    
    def _flush_memories_for_session(self, old_session_id: str):
        """Prompt the agent to save memories/skills before context is lost.

        Synchronous worker — meant to be called via run_in_executor from
        an async context so it doesn't block the event loop.
        """
        try:
            history = self.session_store.load_transcript(old_session_id)
            if not history or len(history) < 4:
                return

            from run_agent import AIAgent
            runtime_kwargs = _resolve_runtime_agent_kwargs()
            if not runtime_kwargs.get("api_key"):
                return

            # Resolve model from config — AIAgent's default is OpenRouter-
            # formatted ("anthropic/claude-opus-4.6") which fails when the
            # active provider is openai-codex.
            model = _resolve_gateway_model()

            tmp_agent = AIAgent(
                **runtime_kwargs,
                model=model,
                max_iterations=8,
                quiet_mode=True,
                enabled_toolsets=["memory", "skills"],
                session_id=old_session_id,
            )

            # Build conversation history from transcript
            msgs = [
                {"role": m.get("role"), "content": m.get("content")}
                for m in history
                if m.get("role") in ("user", "assistant") and m.get("content")
            ]

            # Give the agent a real turn to think about what to save
            flush_prompt = (
                "[System: This session is about to be automatically reset due to "
                "inactivity or a scheduled daily reset. The conversation context "
                "will be cleared after this turn.\n\n"
                "Review the conversation above and:\n"
                "1. Save any important facts, preferences, or decisions to memory "
                "(user profile or your notes) that would be useful in future sessions.\n"
                "2. If you discovered a reusable workflow or solved a non-trivial "
                "problem, consider saving it as a skill.\n"
                "3. If nothing is worth saving, that's fine — just skip.\n\n"
                "Do NOT respond to the user. Just use the memory and skill_manage "
                "tools if needed, then stop.]"
            )

            tmp_agent.run_conversation(
                user_message=flush_prompt,
                conversation_history=msgs,
            )
            logger.info("Pre-reset memory flush completed for session %s", old_session_id)
            # Flush any queued Honcho writes before the session is dropped
            if getattr(tmp_agent, '_honcho', None):
                try:
                    tmp_agent._honcho.shutdown()
                except Exception:
                    pass
        except Exception as e:
            logger.debug("Pre-reset memory flush failed for session %s: %s", old_session_id, e)

    async def _async_flush_memories(self, old_session_id: str):
        """Run the sync memory flush in a thread pool so it won't block the event loop."""
        loop = asyncio.get_event_loop()
        await loop.run_in_executor(None, self._flush_memories_for_session, old_session_id)
    
    @staticmethod
    def _load_prefill_messages() -> List[Dict[str, Any]]:
        """Load ephemeral prefill messages from config or env var.
        
        Checks HERMES_PREFILL_MESSAGES_FILE env var first, then falls back to
        the prefill_messages_file key in ~/.hermes/config.yaml.
        Relative paths are resolved from ~/.hermes/.
        """
        import json as _json
        file_path = os.getenv("HERMES_PREFILL_MESSAGES_FILE", "")
        if not file_path:
            try:
                import yaml as _y
                cfg_path = _hermes_home / "config.yaml"
                if cfg_path.exists():
                    with open(cfg_path, encoding="utf-8") as _f:
                        cfg = _y.safe_load(_f) or {}
                    file_path = cfg.get("prefill_messages_file", "")
            except Exception:
                pass
        if not file_path:
            return []
        path = Path(file_path).expanduser()
        if not path.is_absolute():
            path = _hermes_home / path
        if not path.exists():
            logger.warning("Prefill messages file not found: %s", path)
            return []
        try:
            with open(path, "r", encoding="utf-8") as f:
                data = _json.load(f)
            if not isinstance(data, list):
                logger.warning("Prefill messages file must contain a JSON array: %s", path)
                return []
            return data
        except Exception as e:
            logger.warning("Failed to load prefill messages from %s: %s", path, e)
            return []

    @staticmethod
    def _load_ephemeral_system_prompt() -> str:
        """Load ephemeral system prompt from config or env var.
        
        Checks HERMES_EPHEMERAL_SYSTEM_PROMPT env var first, then falls back to
        agent.system_prompt in ~/.hermes/config.yaml.
        """
        prompt = os.getenv("HERMES_EPHEMERAL_SYSTEM_PROMPT", "")
        if prompt:
            return prompt
        try:
            import yaml as _y
            cfg_path = _hermes_home / "config.yaml"
            if cfg_path.exists():
                with open(cfg_path, encoding="utf-8") as _f:
                    cfg = _y.safe_load(_f) or {}
                return (cfg.get("agent", {}).get("system_prompt", "") or "").strip()
        except Exception:
            pass
        return ""

    @staticmethod
    def _load_reasoning_config() -> dict | None:
        """Load reasoning effort from config or env var.
        
        Checks HERMES_REASONING_EFFORT env var first, then agent.reasoning_effort
        in config.yaml. Valid: "xhigh", "high", "medium", "low", "minimal", "none".
        Returns None to use default (medium).
        """
        effort = os.getenv("HERMES_REASONING_EFFORT", "")
        if not effort:
            try:
                import yaml as _y
                cfg_path = _hermes_home / "config.yaml"
                if cfg_path.exists():
                    with open(cfg_path, encoding="utf-8") as _f:
                        cfg = _y.safe_load(_f) or {}
                    effort = str(cfg.get("agent", {}).get("reasoning_effort", "") or "").strip()
            except Exception:
                pass
        if not effort:
            return None
        effort = effort.lower().strip()
        if effort == "none":
            return {"enabled": False}
        valid = ("xhigh", "high", "medium", "low", "minimal")
        if effort in valid:
            return {"enabled": True, "effort": effort}
        logger.warning("Unknown reasoning_effort '%s', using default (medium)", effort)
        return None

    @staticmethod
    def _load_show_reasoning() -> bool:
        """Load show_reasoning toggle from config.yaml display section."""
        try:
            import yaml as _y
            cfg_path = _hermes_home / "config.yaml"
            if cfg_path.exists():
                with open(cfg_path, encoding="utf-8") as _f:
                    cfg = _y.safe_load(_f) or {}
                return bool(cfg.get("display", {}).get("show_reasoning", False))
        except Exception:
            pass
        return False

    @staticmethod
    def _load_background_notifications_mode() -> str:
        """Load background process notification mode from config or env var.

        Modes:
          - ``all``    — push running-output updates *and* the final message (default)
          - ``result`` — only the final completion message (regardless of exit code)
          - ``error``  — only the final message when exit code is non-zero
          - ``off``    — no watcher messages at all
        """
        mode = os.getenv("HERMES_BACKGROUND_NOTIFICATIONS", "")
        if not mode:
            try:
                import yaml as _y
                cfg_path = _hermes_home / "config.yaml"
                if cfg_path.exists():
                    with open(cfg_path, encoding="utf-8") as _f:
                        cfg = _y.safe_load(_f) or {}
                    raw = cfg.get("display", {}).get("background_process_notifications")
                    if raw is False:
                        mode = "off"
                    elif raw not in (None, ""):
                        mode = str(raw)
            except Exception:
                pass
        mode = (mode or "all").strip().lower()
        valid = {"all", "result", "error", "off"}
        if mode not in valid:
            logger.warning(
                "Unknown background_process_notifications '%s', defaulting to 'all'",
                mode,
            )
            return "all"
        return mode

    @staticmethod
    def _load_provider_routing() -> dict:
        """Load OpenRouter provider routing preferences from config.yaml."""
        try:
            import yaml as _y
            cfg_path = _hermes_home / "config.yaml"
            if cfg_path.exists():
                with open(cfg_path, encoding="utf-8") as _f:
                    cfg = _y.safe_load(_f) or {}
                return cfg.get("provider_routing", {}) or {}
        except Exception:
            pass
        return {}

    @staticmethod
    def _load_fallback_model() -> dict | None:
        """Load fallback model config from config.yaml.

        Returns a dict with 'provider' and 'model' keys, or None if
        not configured / both fields empty.
        """
        try:
            import yaml as _y
            cfg_path = _hermes_home / "config.yaml"
            if cfg_path.exists():
                with open(cfg_path, encoding="utf-8") as _f:
                    cfg = _y.safe_load(_f) or {}
                fb = cfg.get("fallback_model", {}) or {}
                if fb.get("provider") and fb.get("model"):
                    return fb
        except Exception:
            pass
        return None

    async def start(self) -> bool:
        """
        Start the gateway and all configured platform adapters.
        
        Returns True if at least one adapter connected successfully.
        """
        logger.info("Starting Hermes Gateway...")
        logger.info("Session storage: %s", self.config.sessions_dir)
        
        # Warn if no user allowlists are configured and open access is not opted in
        _any_allowlist = any(
            os.getenv(v)
            for v in ("TELEGRAM_ALLOWED_USERS", "DISCORD_ALLOWED_USERS",
                       "WHATSAPP_ALLOWED_USERS", "SLACK_ALLOWED_USERS",
                       "GATEWAY_ALLOWED_USERS")
        )
        _allow_all = os.getenv("GATEWAY_ALLOW_ALL_USERS", "").lower() in ("true", "1", "yes")
        if not _any_allowlist and not _allow_all:
            logger.warning(
                "No user allowlists configured. All unauthorized users will be denied. "
                "Set GATEWAY_ALLOW_ALL_USERS=true in ~/.hermes/.env to allow open access, "
                "or configure platform allowlists (e.g., TELEGRAM_ALLOWED_USERS=your_id)."
            )
        
        # Discover and load event hooks
        self.hooks.discover_and_load()
        
        # Recover background processes from checkpoint (crash recovery)
        try:
            from tools.process_registry import process_registry
            recovered = process_registry.recover_from_checkpoint()
            if recovered:
                logger.info("Recovered %s background process(es) from previous run", recovered)
        except Exception as e:
            logger.warning("Process checkpoint recovery: %s", e)
        
        connected_count = 0
        
        # Initialize and connect each configured platform
        for platform, platform_config in self.config.platforms.items():
            if not platform_config.enabled:
                continue
            
            adapter = self._create_adapter(platform, platform_config)
            if not adapter:
                logger.warning("No adapter available for %s", platform.value)
                continue
            
            # Set up message handler
            adapter.set_message_handler(self._handle_message)
            
            # Try to connect
            logger.info("Connecting to %s...", platform.value)
            try:
                success = await adapter.connect()
                if success:
                    self.adapters[platform] = adapter
                    connected_count += 1
                    logger.info("✓ %s connected", platform.value)
                else:
                    logger.warning("✗ %s failed to connect", platform.value)
            except Exception as e:
                logger.error("✗ %s error: %s", platform.value, e)
        
        if connected_count == 0:
            logger.warning("No messaging platforms connected.")
            logger.info("Gateway will continue running for cron job execution.")
        
        # Update delivery router with adapters
        self.delivery_router.adapters = self.adapters
        
        self._running = True
        
        # Emit gateway:startup hook
        hook_count = len(self.hooks.loaded_hooks)
        if hook_count:
            logger.info("%s hook(s) loaded", hook_count)
        await self.hooks.emit("gateway:startup", {
            "platforms": [p.value for p in self.adapters.keys()],
        })
        
        if connected_count > 0:
            logger.info("Gateway running with %s platform(s)", connected_count)
        
        # Build initial channel directory for send_message name resolution
        try:
            from gateway.channel_directory import build_channel_directory
            directory = build_channel_directory(self.adapters)
            ch_count = sum(len(chs) for chs in directory.get("platforms", {}).values())
            logger.info("Channel directory built: %d target(s)", ch_count)
        except Exception as e:
            logger.warning("Channel directory build failed: %s", e)
        
        # Check if we're restarting after a /update command
        await self._send_update_notification()

        # Start background session expiry watcher for proactive memory flushing
        asyncio.create_task(self._session_expiry_watcher())

        logger.info("Press Ctrl+C to stop")
        
        return True
    
    async def _session_expiry_watcher(self, interval: int = 300):
        """Background task that proactively flushes memories for expired sessions.
        
        Runs every `interval` seconds (default 5 min).  For each session that
        has expired according to its reset policy, flushes memories in a thread
        pool and marks the session so it won't be flushed again.

        This means memories are already saved by the time the user sends their
        next message, so there's no blocking delay.
        """
        await asyncio.sleep(60)  # initial delay — let the gateway fully start
        while self._running:
            try:
                self.session_store._ensure_loaded()
                for key, entry in list(self.session_store._entries.items()):
                    if entry.session_id in self.session_store._pre_flushed_sessions:
                        continue  # already flushed this session
                    if not self.session_store._is_session_expired(entry):
                        continue  # session still active
                    # Session has expired — flush memories in the background
                    logger.info(
                        "Session %s expired (key=%s), flushing memories proactively",
                        entry.session_id, key,
                    )
                    try:
                        await self._async_flush_memories(entry.session_id)
                        self._shutdown_gateway_honcho(key)
                        self.session_store._pre_flushed_sessions.add(entry.session_id)
                    except Exception as e:
                        logger.debug("Proactive memory flush failed for %s: %s", entry.session_id, e)
            except Exception as e:
                logger.debug("Session expiry watcher error: %s", e)
            # Sleep in small increments so we can stop quickly
            for _ in range(interval):
                if not self._running:
                    break
                await asyncio.sleep(1)

    async def stop(self) -> None:
        """Stop the gateway and disconnect all adapters."""
        logger.info("Stopping gateway...")
        self._running = False
        
        for platform, adapter in self.adapters.items():
            try:
                await adapter.disconnect()
                logger.info("✓ %s disconnected", platform.value)
            except Exception as e:
                logger.error("✗ %s disconnect error: %s", platform.value, e)

        self.adapters.clear()
        self._shutdown_all_gateway_honcho()
        self._shutdown_event.set()
        
        from gateway.status import remove_pid_file
        remove_pid_file()
        
        logger.info("Gateway stopped")
    
    async def wait_for_shutdown(self) -> None:
        """Wait for shutdown signal."""
        await self._shutdown_event.wait()
    
    def _create_adapter(
        self, 
        platform: Platform, 
        config: Any
    ) -> Optional[BasePlatformAdapter]:
        """Create the appropriate adapter for a platform."""
        if platform == Platform.TELEGRAM:
            from gateway.platforms.telegram import TelegramAdapter, check_telegram_requirements
            if not check_telegram_requirements():
                logger.warning("Telegram: python-telegram-bot not installed")
                return None
            return TelegramAdapter(config)
        
        elif platform == Platform.DISCORD:
            from gateway.platforms.discord import DiscordAdapter, check_discord_requirements
            if not check_discord_requirements():
                logger.warning("Discord: discord.py not installed")
                return None
            return DiscordAdapter(config)
        
        elif platform == Platform.WHATSAPP:
            from gateway.platforms.whatsapp import WhatsAppAdapter, check_whatsapp_requirements
            if not check_whatsapp_requirements():
                logger.warning("WhatsApp: Node.js not installed or bridge not configured")
                return None
            return WhatsAppAdapter(config)
        
        elif platform == Platform.SLACK:
            from gateway.platforms.slack import SlackAdapter, check_slack_requirements
            if not check_slack_requirements():
                logger.warning("Slack: slack-bolt not installed. Run: pip install 'hermes-agent[slack]'")
                return None
            return SlackAdapter(config)

        elif platform == Platform.SIGNAL:
            from gateway.platforms.signal import SignalAdapter, check_signal_requirements
            if not check_signal_requirements():
                logger.warning("Signal: SIGNAL_HTTP_URL or SIGNAL_ACCOUNT not configured")
                return None
            return SignalAdapter(config)

        elif platform == Platform.HOMEASSISTANT:
            from gateway.platforms.homeassistant import HomeAssistantAdapter, check_ha_requirements
            if not check_ha_requirements():
                logger.warning("HomeAssistant: aiohttp not installed or HASS_TOKEN not set")
                return None
            return HomeAssistantAdapter(config)

        elif platform == Platform.EMAIL:
            from gateway.platforms.email import EmailAdapter, check_email_requirements
            if not check_email_requirements():
                logger.warning("Email: EMAIL_ADDRESS, EMAIL_PASSWORD, EMAIL_IMAP_HOST, or EMAIL_SMTP_HOST not set")
                return None
            return EmailAdapter(config)

        return None
    
    def _is_user_authorized(self, source: SessionSource) -> bool:
        """
        Check if a user is authorized to use the bot.
        
        Checks in order:
        1. Per-platform allow-all flag (e.g., DISCORD_ALLOW_ALL_USERS=true)
        2. Environment variable allowlists (TELEGRAM_ALLOWED_USERS, etc.)
        3. DM pairing approved list
        4. Global allow-all (GATEWAY_ALLOW_ALL_USERS=true)
        5. Default: deny
        """
        # Home Assistant events are system-generated (state changes), not
        # user-initiated messages.  The HASS_TOKEN already authenticates the
        # connection, so HA events are always authorized.
        if source.platform == Platform.HOMEASSISTANT:
            return True

        user_id = source.user_id
        if not user_id:
            return False

        platform_env_map = {
            Platform.TELEGRAM: "TELEGRAM_ALLOWED_USERS",
            Platform.DISCORD: "DISCORD_ALLOWED_USERS",
            Platform.WHATSAPP: "WHATSAPP_ALLOWED_USERS",
            Platform.SLACK: "SLACK_ALLOWED_USERS",
            Platform.SIGNAL: "SIGNAL_ALLOWED_USERS",
            Platform.EMAIL: "EMAIL_ALLOWED_USERS",
        }
        platform_allow_all_map = {
            Platform.TELEGRAM: "TELEGRAM_ALLOW_ALL_USERS",
            Platform.DISCORD: "DISCORD_ALLOW_ALL_USERS",
            Platform.WHATSAPP: "WHATSAPP_ALLOW_ALL_USERS",
            Platform.SLACK: "SLACK_ALLOW_ALL_USERS",
            Platform.SIGNAL: "SIGNAL_ALLOW_ALL_USERS",
            Platform.EMAIL: "EMAIL_ALLOW_ALL_USERS",
        }

        # Per-platform allow-all flag (e.g., DISCORD_ALLOW_ALL_USERS=true)
        platform_allow_all_var = platform_allow_all_map.get(source.platform, "")
        if platform_allow_all_var and os.getenv(platform_allow_all_var, "").lower() in ("true", "1", "yes"):
            return True

        # Check pairing store (always checked, regardless of allowlists)
        platform_name = source.platform.value if source.platform else ""
        if self.pairing_store.is_approved(platform_name, user_id):
            return True

        # Check platform-specific and global allowlists
        platform_allowlist = os.getenv(platform_env_map.get(source.platform, ""), "").strip()
        global_allowlist = os.getenv("GATEWAY_ALLOWED_USERS", "").strip()

        if not platform_allowlist and not global_allowlist:
            # No allowlists configured -- check global allow-all flag
            return os.getenv("GATEWAY_ALLOW_ALL_USERS", "").lower() in ("true", "1", "yes")

        # Check if user is in any allowlist
        allowed_ids = set()
        if platform_allowlist:
            allowed_ids.update(uid.strip() for uid in platform_allowlist.split(",") if uid.strip())
        if global_allowlist:
            allowed_ids.update(uid.strip() for uid in global_allowlist.split(",") if uid.strip())

        # WhatsApp JIDs have @s.whatsapp.net suffix — strip it for comparison
        check_ids = {user_id}
        if "@" in user_id:
            check_ids.add(user_id.split("@")[0])
        return bool(check_ids & allowed_ids)
    
    async def _handle_message(self, event: MessageEvent) -> Optional[str]:
        """
        Handle an incoming message from any platform.
        
        This is the core message processing pipeline:
        1. Check user authorization
        2. Check for commands (/new, /reset, etc.)
        3. Check for running agent and interrupt if needed
        4. Get or create session
        5. Build context for agent
        6. Run agent conversation
        7. Return response
        """
        source = event.source
        
        # Check if user is authorized
        if not self._is_user_authorized(source):
            logger.warning("Unauthorized user: %s (%s) on %s", source.user_id, source.user_name, source.platform.value)
            # In DMs: offer pairing code. In groups: silently ignore.
            if source.chat_type == "dm":
                platform_name = source.platform.value if source.platform else "unknown"
                code = self.pairing_store.generate_code(
                    platform_name, source.user_id, source.user_name or ""
                )
                if code:
                    adapter = self.adapters.get(source.platform)
                    if adapter:
                        await adapter.send(
                            source.chat_id,
                            f"Hi~ I don't recognize you yet!\n\n"
                            f"Here's your pairing code: `{code}`\n\n"
                            f"Ask the bot owner to run:\n"
                            f"`hermes pairing approve {platform_name} {code}`"
                        )
                else:
                    adapter = self.adapters.get(source.platform)
                    if adapter:
                        await adapter.send(
                            source.chat_id,
                            "Too many pairing requests right now~ "
                            "Please try again later!"
                        )
            return None
        
        # PRIORITY: If an agent is already running for this session, interrupt it
        # immediately. This is before command parsing to minimize latency -- the
        # user's "stop" message reaches the agent as fast as possible.
        _quick_key = build_session_key(source)
        if _quick_key in self._running_agents:
            running_agent = self._running_agents[_quick_key]
            logger.debug("PRIORITY interrupt for session %s", _quick_key[:20])
            running_agent.interrupt(event.text)
            if _quick_key in self._pending_messages:
                self._pending_messages[_quick_key] += "\n" + event.text
            else:
                self._pending_messages[_quick_key] = event.text
            return None
        
        # Check for commands
        command = event.get_command()
        
        # Emit command:* hook for any recognized slash command
        _known_commands = {"new", "reset", "help", "status", "stop", "model",
                          "personality", "retry", "undo", "sethome", "set-home",
                          "compress", "usage", "insights", "reload-mcp", "reload_mcp",
                          "update", "title", "resume", "provider", "rollback",
                          "background", "reasoning"}
        if command and command in _known_commands:
            await self.hooks.emit(f"command:{command}", {
                "platform": source.platform.value if source.platform else "",
                "user_id": source.user_id,
                "command": command,
                "args": event.get_command_args().strip(),
            })
        
        if command in ["new", "reset"]:
            return await self._handle_reset_command(event)
        
        if command == "help":
            return await self._handle_help_command(event)
        
        if command == "status":
            return await self._handle_status_command(event)
        
        if command == "stop":
            return await self._handle_stop_command(event)
        
        if command == "model":
            return await self._handle_model_command(event)
        
        if command == "provider":
            return await self._handle_provider_command(event)
        
        if command == "personality":
            return await self._handle_personality_command(event)
        
        if command == "retry":
            return await self._handle_retry_command(event)
        
        if command == "undo":
            return await self._handle_undo_command(event)
        
        if command in ["sethome", "set-home"]:
            return await self._handle_set_home_command(event)

        if command == "compress":
            return await self._handle_compress_command(event)

        if command == "usage":
            return await self._handle_usage_command(event)

        if command == "insights":
            return await self._handle_insights_command(event)

        if command in ("reload-mcp", "reload_mcp"):
            return await self._handle_reload_mcp_command(event)

        if command == "update":
            return await self._handle_update_command(event)

        if command == "title":
            return await self._handle_title_command(event)

        if command == "resume":
            return await self._handle_resume_command(event)

        if command == "rollback":
            return await self._handle_rollback_command(event)

        if command == "background":
            return await self._handle_background_command(event)

        if command == "reasoning":
            return await self._handle_reasoning_command(event)
        
        # User-defined quick commands (bypass agent loop, no LLM call)
        if command:
            quick_commands = self.config.get("quick_commands", {})
            if command in quick_commands:
                qcmd = quick_commands[command]
                if qcmd.get("type") == "exec":
                    exec_cmd = qcmd.get("command", "")
                    if exec_cmd:
                        try:
                            proc = await asyncio.create_subprocess_shell(
                                exec_cmd,
                                stdout=asyncio.subprocess.PIPE,
                                stderr=asyncio.subprocess.PIPE,
                            )
                            stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=30)
                            output = (stdout or stderr).decode().strip()
                            return output if output else "Command returned no output."
                        except asyncio.TimeoutError:
                            return "Quick command timed out (30s)."
                        except Exception as e:
                            return f"Quick command error: {e}"
                    else:
                        return f"Quick command '/{command}' has no command defined."
                else:
                    return f"Quick command '/{command}' has unsupported type (only 'exec' is supported)."

        # Skill slash commands: /skill-name loads the skill and sends to agent
        if command:
            try:
                from agent.skill_commands import get_skill_commands, build_skill_invocation_message
                skill_cmds = get_skill_commands()
                cmd_key = f"/{command}"
                if cmd_key in skill_cmds:
                    user_instruction = event.get_command_args().strip()
                    msg = build_skill_invocation_message(
                        cmd_key, user_instruction, task_id=session_key
                    )
                    if msg:
                        event.text = msg
                        # Fall through to normal message processing with skill content
            except Exception as e:
                logger.debug("Skill command check failed (non-fatal): %s", e)
        
        # Check for pending exec approval responses
        session_key_preview = build_session_key(source)
        if session_key_preview in self._pending_approvals:
            user_text = event.text.strip().lower()
            if user_text in ("yes", "y", "approve", "ok", "go", "do it"):
                approval = self._pending_approvals.pop(session_key_preview)
                cmd = approval["command"]
                pattern_keys = approval.get("pattern_keys", [])
                if not pattern_keys:
                    pk = approval.get("pattern_key", "")
                    pattern_keys = [pk] if pk else []
                logger.info("User approved dangerous command: %s...", cmd[:60])
                from tools.terminal_tool import terminal_tool
                from tools.approval import approve_session
                for pk in pattern_keys:
                    approve_session(session_key_preview, pk)
                result = terminal_tool(command=cmd, force=True)
                return f"✅ Command approved and executed.\n\n```\n{result[:3500]}\n```"
            elif user_text in ("no", "n", "deny", "cancel", "nope"):
                self._pending_approvals.pop(session_key_preview)
                return "❌ Command denied."
            elif user_text in ("full", "show", "view", "show full", "view full"):
                # Show full command without consuming the approval
                cmd = self._pending_approvals[session_key_preview]["command"]
                return f"Full command:\n\n```\n{cmd}\n```\n\nReply yes/no to approve or deny."
            # If it's not clearly an approval/denial, fall through to normal processing
        
        # Get or create session
        session_entry = self.session_store.get_or_create_session(source)
        session_key = session_entry.session_key
        
        # Emit session:start for new or auto-reset sessions
        _is_new_session = (
            session_entry.created_at == session_entry.updated_at
            or getattr(session_entry, "was_auto_reset", False)
        )
        if _is_new_session:
            await self.hooks.emit("session:start", {
                "platform": source.platform.value if source.platform else "",
                "user_id": source.user_id,
                "session_id": session_entry.session_id,
                "session_key": session_key,
            })
        
        # Build session context
        context = build_session_context(source, self.config, session_entry)
        
        # Set environment variables for tools
        self._set_session_env(context)
        
        # Build the context prompt to inject
        context_prompt = build_session_context_prompt(context)
        
        # If the previous session expired and was auto-reset, prepend a notice
        # so the agent knows this is a fresh conversation (not an intentional /reset).
        if getattr(session_entry, 'was_auto_reset', False):
            context_prompt = (
                "[System note: The user's previous session expired due to inactivity. "
                "This is a fresh conversation with no prior context.]\n\n"
                + context_prompt
            )
            session_entry.was_auto_reset = False
        
        # Load conversation history from transcript
        history = self.session_store.load_transcript(session_entry.session_id)
        
        # -----------------------------------------------------------------
        # Session hygiene: auto-compress pathologically large transcripts
        #
        # Long-lived gateway sessions can accumulate enough history that
        # every new message rehydrates an oversized transcript, causing
        # repeated truncation/context failures.  Detect this early and
        # compress proactively — before the agent even starts.  (#628)
        #
        # Token source priority:
        # 1. Actual API-reported prompt_tokens from the last turn
        #    (stored in session_entry.last_prompt_tokens)
        # 2. Rough char-based estimate (str(msg)//4) with a 1.4x
        #    safety factor to account for overestimation on tool-heavy
        #    conversations (code/JSON tokenizes at 5-7+ chars/token).
        # -----------------------------------------------------------------
        if history and len(history) >= 4:
            from agent.model_metadata import (
                estimate_messages_tokens_rough,
                get_model_context_length,
            )

            # Read model + compression config from config.yaml.
            # NOTE: hygiene threshold is intentionally HIGHER than the agent's
            # own compressor (0.85 vs 0.50).  Hygiene is a safety net for
            # sessions that grew too large between turns — it fires pre-agent
            # to prevent API failures.  The agent's own compressor handles
            # normal context management during its tool loop with accurate
            # real token counts.  Having hygiene at 0.50 caused premature
            # compression on every turn in long gateway sessions.
            _hyg_model = "anthropic/claude-sonnet-4.6"
            _hyg_threshold_pct = 0.85
            _hyg_compression_enabled = True
            try:
                _hyg_cfg_path = _hermes_home / "config.yaml"
                if _hyg_cfg_path.exists():
                    import yaml as _hyg_yaml
                    with open(_hyg_cfg_path, encoding="utf-8") as _hyg_f:
                        _hyg_data = _hyg_yaml.safe_load(_hyg_f) or {}

                    # Resolve model name (same logic as run_sync)
                    _model_cfg = _hyg_data.get("model", {})
                    if isinstance(_model_cfg, str):
                        _hyg_model = _model_cfg
                    elif isinstance(_model_cfg, dict):
                        _hyg_model = _model_cfg.get("default", _hyg_model)

                    # Read compression settings — only use enabled flag.
                    # The threshold is intentionally separate from the agent's
                    # compression.threshold (hygiene runs higher).
                    _comp_cfg = _hyg_data.get("compression", {})
                    if isinstance(_comp_cfg, dict):
                        _hyg_compression_enabled = str(
                            _comp_cfg.get("enabled", True)
                        ).lower() in ("true", "1", "yes")
            except Exception:
                pass

            # Check env override for disabling compression entirely
            if os.getenv("CONTEXT_COMPRESSION_ENABLED", "").lower() in ("false", "0", "no"):
                _hyg_compression_enabled = False

            if _hyg_compression_enabled:
                _hyg_context_length = get_model_context_length(_hyg_model)
                _compress_token_threshold = int(
                    _hyg_context_length * _hyg_threshold_pct
                )
                _warn_token_threshold = int(_hyg_context_length * 0.95)

                _msg_count = len(history)

                # Prefer actual API-reported tokens from the last turn
                # (stored in session entry) over the rough char-based estimate.
                # The rough estimate (str(msg)//4) overestimates by 30-50% on
                # tool-heavy/code-heavy conversations, causing premature compression.
                _stored_tokens = session_entry.last_prompt_tokens
                if _stored_tokens > 0:
                    _approx_tokens = _stored_tokens
                    _token_source = "actual"
                else:
                    _approx_tokens = estimate_messages_tokens_rough(history)
                    # Apply safety factor only for rough estimates
                    _compress_token_threshold = int(
                        _compress_token_threshold * 1.4
                    )
                    _warn_token_threshold = int(_warn_token_threshold * 1.4)
                    _token_source = "estimated"

                _needs_compress = _approx_tokens >= _compress_token_threshold

                if _needs_compress:
                    logger.info(
                        "Session hygiene: %s messages, ~%s tokens (%s) — auto-compressing "
                        "(threshold: %s%% of %s = %s tokens)",
                        _msg_count, f"{_approx_tokens:,}", _token_source,
                        int(_hyg_threshold_pct * 100),
                        f"{_hyg_context_length:,}",
                        f"{_compress_token_threshold:,}",
                    )

                    _hyg_adapter = self.adapters.get(source.platform)
                    _hyg_meta = {"thread_id": source.thread_id} if source.thread_id else None
                    if _hyg_adapter:
                        try:
                            await _hyg_adapter.send(
                                source.chat_id,
                                f"🗜️ Session is large ({_msg_count} messages, "
                                f"~{_approx_tokens:,} tokens). Auto-compressing...",
                                metadata=_hyg_meta,
                            )
                        except Exception:
                            pass

                    try:
                        from run_agent import AIAgent

                        _hyg_runtime = _resolve_runtime_agent_kwargs()
                        if _hyg_runtime.get("api_key"):
                            _hyg_msgs = [
                                {"role": m.get("role"), "content": m.get("content")}
                                for m in history
                                if m.get("role") in ("user", "assistant")
                                and m.get("content")
                            ]

                            if len(_hyg_msgs) >= 4:
                                _hyg_agent = AIAgent(
                                    **_hyg_runtime,
                                    model=_hyg_model,
                                    max_iterations=4,
                                    quiet_mode=True,
                                    enabled_toolsets=["memory"],
                                    session_id=session_entry.session_id,
                                )

                                loop = asyncio.get_event_loop()
                                _compressed, _ = await loop.run_in_executor(
                                    None,
                                    lambda: _hyg_agent._compress_context(
                                        _hyg_msgs, "",
                                        approx_tokens=_approx_tokens,
                                    ),
                                )

                                self.session_store.rewrite_transcript(
                                    session_entry.session_id, _compressed
                                )
                                # Reset stored token count — transcript was rewritten
                                session_entry.last_prompt_tokens = 0
                                history = _compressed
                                _new_count = len(_compressed)
                                _new_tokens = estimate_messages_tokens_rough(
                                    _compressed
                                )

                                logger.info(
                                    "Session hygiene: compressed %s → %s msgs, "
                                    "~%s → ~%s tokens",
                                    _msg_count, _new_count,
                                    f"{_approx_tokens:,}", f"{_new_tokens:,}",
                                )

                                if _hyg_adapter:
                                    try:
                                        await _hyg_adapter.send(
                                            source.chat_id,
                                            f"🗜️ Compressed: {_msg_count} → "
                                            f"{_new_count} messages, "
                                            f"~{_approx_tokens:,} → "
                                            f"~{_new_tokens:,} tokens",
                                            metadata=_hyg_meta,
                                        )
                                    except Exception:
                                        pass

                                # Still too large after compression — warn user
                                if _new_tokens >= _warn_token_threshold:
                                    logger.warning(
                                        "Session hygiene: still ~%s tokens after "
                                        "compression — suggesting /reset",
                                        f"{_new_tokens:,}",
                                    )
                                    if _hyg_adapter:
                                        try:
                                            await _hyg_adapter.send(
                                                source.chat_id,
                                                "⚠️ Session is still very large "
                                                "after compression "
                                                f"(~{_new_tokens:,} tokens). "
                                                "Consider using /reset to start "
                                                "fresh if you experience issues.",
                                                metadata=_hyg_meta,
                                            )
                                        except Exception:
                                            pass

                    except Exception as e:
                        logger.warning(
                            "Session hygiene auto-compress failed: %s", e
                        )
                        # Compression failed and session is dangerously large
                        if _approx_tokens >= _warn_token_threshold:
                            _hyg_adapter = self.adapters.get(source.platform)
                            _hyg_meta = {"thread_id": source.thread_id} if source.thread_id else None
                            if _hyg_adapter:
                                try:
                                    await _hyg_adapter.send(
                                        source.chat_id,
                                        f"⚠️ Session is very large "
                                        f"({_msg_count} messages, "
                                        f"~{_approx_tokens:,} tokens) and "
                                        "auto-compression failed. Consider "
                                        "using /compress or /reset to avoid "
                                        "issues.",
                                        metadata=_hyg_meta,
                                    )
                                except Exception:
                                    pass

        # First-message onboarding -- only on the very first interaction ever
        if not history and not self.session_store.has_any_sessions():
            context_prompt += (
                "\n\n[System note: This is the user's very first message ever. "
                "Briefly introduce yourself and mention that /help shows available commands. "
                "Keep the introduction concise -- one or two sentences max.]"
            )
        
        # One-time prompt if no home channel is set for this platform
        if not history and source.platform and source.platform != Platform.LOCAL:
            platform_name = source.platform.value
            env_key = f"{platform_name.upper()}_HOME_CHANNEL"
            if not os.getenv(env_key):
                adapter = self.adapters.get(source.platform)
                if adapter:
                    await adapter.send(
                        source.chat_id,
                        f"📬 No home channel is set for {platform_name.title()}. "
                        f"A home channel is where Hermes delivers cron job results "
                        f"and cross-platform messages.\n\n"
                        f"Type /sethome to make this chat your home channel, "
                        f"or ignore to skip."
                    )
        
        # -----------------------------------------------------------------
        # Auto-analyze images sent by the user
        #
        # If the user attached image(s), we run the vision tool eagerly so
        # the conversation model always receives a text description.  The
        # local file path is also included so the model can re-examine the
        # image later with a more targeted question via vision_analyze.
        #
        # We filter to image paths only (by media_type) so that non-image
        # attachments (documents, audio, etc.) are not sent to the vision
        # tool even when they appear in the same message.
        # -----------------------------------------------------------------
        message_text = event.text or ""
        if event.media_urls:
            image_paths = []
            for i, path in enumerate(event.media_urls):
                # Check media_types if available; otherwise infer from message type
                mtype = event.media_types[i] if i < len(event.media_types) else ""
                is_image = (
                    mtype.startswith("image/")
                    or event.message_type == MessageType.PHOTO
                )
                if is_image:
                    image_paths.append(path)
            if image_paths:
                message_text = await self._enrich_message_with_vision(
                    message_text, image_paths
                )
        
        # -----------------------------------------------------------------
        # Auto-transcribe voice/audio messages sent by the user
        # -----------------------------------------------------------------
        if event.media_urls:
            audio_paths = []
            for i, path in enumerate(event.media_urls):
                mtype = event.media_types[i] if i < len(event.media_types) else ""
                is_audio = (
                    mtype.startswith("audio/")
                    or event.message_type in (MessageType.VOICE, MessageType.AUDIO)
                )
                if is_audio:
                    audio_paths.append(path)
            if audio_paths:
                message_text = await self._enrich_message_with_transcription(
                    message_text, audio_paths
                )

        # -----------------------------------------------------------------
        # Enrich document messages with context notes for the agent
        # -----------------------------------------------------------------
        if event.media_urls and event.message_type == MessageType.DOCUMENT:
            for i, path in enumerate(event.media_urls):
                mtype = event.media_types[i] if i < len(event.media_types) else ""
                if not (mtype.startswith("application/") or mtype.startswith("text/")):
                    continue
                # Extract display filename by stripping the doc_{uuid12}_ prefix
                import os as _os
                basename = _os.path.basename(path)
                # Format: doc_<12hex>_<original_filename>
                parts = basename.split("_", 2)
                display_name = parts[2] if len(parts) >= 3 else basename
                # Sanitize to prevent prompt injection via filenames
                import re as _re
                display_name = _re.sub(r'[^\w.\- ]', '_', display_name)

                if mtype.startswith("text/"):
                    context_note = (
                        f"[The user sent a text document: '{display_name}'. "
                        f"Its content has been included below. "
                        f"The file is also saved at: {path}]"
                    )
                else:
                    context_note = (
                        f"[The user sent a document: '{display_name}'. "
                        f"The file is saved at: {path}. "
                        f"Ask the user what they'd like you to do with it.]"
                    )
                message_text = f"{context_note}\n\n{message_text}"

        try:
            # Emit agent:start hook
            hook_ctx = {
                "platform": source.platform.value if source.platform else "",
                "user_id": source.user_id,
                "session_id": session_entry.session_id,
                "message": message_text[:500],
            }
            await self.hooks.emit("agent:start", hook_ctx)
            
            # Run the agent
            agent_result = await self._run_agent(
                message=message_text,
                context_prompt=context_prompt,
                history=history,
                source=source,
                session_id=session_entry.session_id,
                session_key=session_key
            )
            
            response = agent_result.get("final_response", "")
            agent_messages = agent_result.get("messages", [])

            # If the agent's session_id changed during compression, update
            # session_entry so transcript writes below go to the right session.
            if agent_result.get("session_id") and agent_result["session_id"] != session_entry.session_id:
                session_entry.session_id = agent_result["session_id"]

            # Prepend reasoning/thinking if display is enabled
            if getattr(self, "_show_reasoning", False) and response:
                last_reasoning = agent_result.get("last_reasoning")
                if last_reasoning:
                    # Collapse long reasoning to keep messages readable
                    lines = last_reasoning.strip().splitlines()
                    if len(lines) > 15:
                        display_reasoning = "\n".join(lines[:15])
                        display_reasoning += f"\n_... ({len(lines) - 15} more lines)_"
                    else:
                        display_reasoning = last_reasoning.strip()
                    response = f"💭 **Reasoning:**\n```\n{display_reasoning}\n```\n\n{response}"

            # Emit agent:end hook
            await self.hooks.emit("agent:end", {
                **hook_ctx,
                "response": (response or "")[:500],
            })
            
            # Check for pending process watchers (check_interval on background processes)
            try:
                from tools.process_registry import process_registry
                while process_registry.pending_watchers:
                    watcher = process_registry.pending_watchers.pop(0)
                    asyncio.create_task(self._run_process_watcher(watcher))
            except Exception as e:
                logger.error("Process watcher setup error: %s", e)

            # Check if the agent encountered a dangerous command needing approval
            try:
                from tools.approval import pop_pending
                pending = pop_pending(session_key)
                if pending:
                    self._pending_approvals[session_key] = pending
            except Exception as e:
                logger.debug("Failed to check pending approvals: %s", e)
            
            # Save the full conversation to the transcript, including tool calls.
            # This preserves the complete agent loop (tool_calls, tool results,
            # intermediate reasoning) so sessions can be resumed with full context
            # and transcripts are useful for debugging and training data.
            ts = datetime.now().isoformat()
            
            # If this is a fresh session (no history), write the full tool
            # definitions as the first entry so the transcript is self-describing
            # -- the same list of dicts sent as tools=[...] in the API request.
            if not history:
                tool_defs = agent_result.get("tools", [])
                self.session_store.append_to_transcript(
                    session_entry.session_id,
                    {
                        "role": "session_meta",
                        "tools": tool_defs or [],
                        "model": os.getenv("HERMES_MODEL", ""),
                        "platform": source.platform.value if source.platform else "",
                        "timestamp": ts,
                    }
                )
            
            # Find only the NEW messages from this turn (skip history we loaded).
            # Use the filtered history length (history_offset) that was actually
            # passed to the agent, not len(history) which includes session_meta
            # entries that were stripped before the agent saw them.
            history_len = agent_result.get("history_offset", len(history))
            new_messages = agent_messages[history_len:] if len(agent_messages) > history_len else []
            
            # If no new messages found (edge case), fall back to simple user/assistant
            if not new_messages:
                self.session_store.append_to_transcript(
                    session_entry.session_id,
                    {"role": "user", "content": message_text, "timestamp": ts}
                )
                if response:
                    self.session_store.append_to_transcript(
                        session_entry.session_id,
                        {"role": "assistant", "content": response, "timestamp": ts}
                    )
            else:
                # The agent already persisted these messages to SQLite via
                # _flush_messages_to_session_db(), so skip the DB write here
                # to prevent the duplicate-write bug (#860).  We still write
                # to JSONL for backward compatibility and as a backup.
                agent_persisted = self._session_db is not None
                for msg in new_messages:
                    # Skip system messages (they're rebuilt each run)
                    if msg.get("role") == "system":
                        continue
                    # Add timestamp to each message for debugging
                    entry = {**msg, "timestamp": ts}
                    self.session_store.append_to_transcript(
                        session_entry.session_id, entry,
                        skip_db=agent_persisted,
                    )
            
            # Update session with actual prompt token count from the agent
            self.session_store.update_session(
                session_entry.session_key,
                last_prompt_tokens=agent_result.get("last_prompt_tokens", 0),
            )
            
            return response
            
        except Exception as e:
            logger.exception("Agent error in session %s", session_key)
            return (
                "Sorry, I encountered an unexpected error. "
                "The details have been logged for debugging. "
                "Try again or use /reset to start a fresh session."
            )
        finally:
            # Clear session env
            self._clear_session_env()
    
    async def _handle_reset_command(self, event: MessageEvent) -> str:
        """Handle /new or /reset command."""
        source = event.source
        
        # Get existing session key
        session_key = self.session_store._generate_session_key(source)
        
        # Flush memories in the background (fire-and-forget) so the user
        # gets the "Session reset!" response immediately.
        try:
            old_entry = self.session_store._entries.get(session_key)
            if old_entry:
                asyncio.create_task(self._async_flush_memories(old_entry.session_id))
        except Exception as e:
            logger.debug("Gateway memory flush on reset failed: %s", e)

        self._shutdown_gateway_honcho(session_key)
        
        # Reset the session
        new_entry = self.session_store.reset_session(session_key)
        
        # Emit session:reset hook
        await self.hooks.emit("session:reset", {
            "platform": source.platform.value if source.platform else "",
            "user_id": source.user_id,
            "session_key": session_key,
        })
        
        if new_entry:
            return "✨ Session reset! I've started fresh with no memory of our previous conversation."
        else:
            # No existing session, just create one
            self.session_store.get_or_create_session(source, force_new=True)
            return "✨ New session started!"
    
    async def _handle_status_command(self, event: MessageEvent) -> str:
        """Handle /status command."""
        source = event.source
        session_entry = self.session_store.get_or_create_session(source)
        
        connected_platforms = [p.value for p in self.adapters.keys()]
        
        # Check if there's an active agent
        session_key = session_entry.session_key
        is_running = session_key in self._running_agents
        
        lines = [
            "📊 **Hermes Gateway Status**",
            "",
            f"**Session ID:** `{session_entry.session_id[:12]}...`",
            f"**Created:** {session_entry.created_at.strftime('%Y-%m-%d %H:%M')}",
            f"**Last Activity:** {session_entry.updated_at.strftime('%Y-%m-%d %H:%M')}",
            f"**Tokens:** {session_entry.total_tokens:,}",
            f"**Agent Running:** {'Yes ⚡' if is_running else 'No'}",
            "",
            f"**Connected Platforms:** {', '.join(connected_platforms)}",
        ]
        
        return "\n".join(lines)
    
    async def _handle_stop_command(self, event: MessageEvent) -> str:
        """Handle /stop command - interrupt a running agent."""
        source = event.source
        session_entry = self.session_store.get_or_create_session(source)
        session_key = session_entry.session_key
        
        if session_key in self._running_agents:
            agent = self._running_agents[session_key]
            agent.interrupt()
            return "⚡ Stopping the current task... The agent will finish its current step and respond."
        else:
            return "No active task to stop."
    
    async def _handle_help_command(self, event: MessageEvent) -> str:
        """Handle /help command - list available commands."""
        lines = [
            "📖 **Hermes Commands**\n",
            "`/new` — Start a new conversation",
            "`/reset` — Reset conversation history",
            "`/status` — Show session info",
            "`/stop` — Interrupt the running agent",
            "`/model [provider:model]` — Show/change model (or switch provider)",
            "`/provider` — Show available providers and auth status",
            "`/personality [name]` — Set a personality",
            "`/retry` — Retry your last message",
            "`/undo` — Remove the last exchange",
            "`/sethome` — Set this chat as the home channel",
            "`/compress` — Compress conversation context",
            "`/title [name]` — Set or show the session title",
            "`/resume [name]` — Resume a previously-named session",
            "`/usage` — Show token usage for this session",
            "`/insights [days]` — Show usage insights and analytics",
            "`/reasoning [level|show|hide]` — Set reasoning effort or toggle display",
            "`/rollback [number]` — List or restore filesystem checkpoints",
            "`/background <prompt>` — Run a prompt in a separate background session",
            "`/reload-mcp` — Reload MCP servers from config",
            "`/update` — Update Hermes Agent to the latest version",
            "`/help` — Show this message",
        ]
        try:
            from agent.skill_commands import get_skill_commands
            skill_cmds = get_skill_commands()
            if skill_cmds:
                lines.append(f"\n⚡ **Skill Commands** ({len(skill_cmds)} installed):")
                for cmd in sorted(skill_cmds):
                    lines.append(f"`{cmd}` — {skill_cmds[cmd]['description']}")
        except Exception:
            pass
        return "\n".join(lines)
    
    async def _handle_model_command(self, event: MessageEvent) -> str:
        """Handle /model command - show or change the current model."""
        import yaml
        from hermes_cli.models import (
            parse_model_input,
            validate_requested_model,
            curated_models_for_provider,
            normalize_provider,
            _PROVIDER_LABELS,
        )

        args = event.get_command_args().strip()
        config_path = _hermes_home / 'config.yaml'

        # Resolve current model and provider from config
        current = os.getenv("HERMES_MODEL") or "anthropic/claude-opus-4.6"
        current_provider = "openrouter"
        try:
            if config_path.exists():
                with open(config_path, encoding="utf-8") as f:
                    cfg = yaml.safe_load(f) or {}
                model_cfg = cfg.get("model", {})
                if isinstance(model_cfg, str):
                    current = model_cfg
                elif isinstance(model_cfg, dict):
                    current = model_cfg.get("default", current)
                    current_provider = model_cfg.get("provider", current_provider)
        except Exception:
            pass

        # Resolve "auto" to the actual provider using credential detection
        current_provider = normalize_provider(current_provider)
        if current_provider == "auto":
            try:
                from hermes_cli.auth import resolve_provider as _resolve_provider
                current_provider = _resolve_provider(current_provider)
            except Exception:
                current_provider = "openrouter"

        # Detect custom endpoint: provider resolved to openrouter but a custom
        # base URL is configured — the user set up a custom endpoint.
        if current_provider == "openrouter" and os.getenv("OPENAI_BASE_URL", "").strip():
            current_provider = "custom"

        if not args:
            provider_label = _PROVIDER_LABELS.get(current_provider, current_provider)
            lines = [
                f"🤖 **Current model:** `{current}`",
                f"**Provider:** {provider_label}",
                "",
            ]
            curated = curated_models_for_provider(current_provider)
            if curated:
                lines.append(f"**Available models ({provider_label}):**")
                for mid, desc in curated:
                    marker = " ←" if mid == current else ""
                    label = f"  _{desc}_" if desc else ""
                    lines.append(f"• `{mid}`{label}{marker}")
                lines.append("")
            lines.append("To change: `/model model-name`")
            lines.append("Switch provider: `/model provider:model-name`")
            return "\n".join(lines)

        # Parse provider:model syntax
        target_provider, new_model = parse_model_input(args, current_provider)
        provider_changed = target_provider != current_provider

        # Resolve credentials for the target provider (for API probe)
        api_key = os.getenv("OPENROUTER_API_KEY") or os.getenv("OPENAI_API_KEY") or ""
        base_url = "https://openrouter.ai/api/v1"
        if provider_changed:
            try:
                from hermes_cli.runtime_provider import resolve_runtime_provider
                runtime = resolve_runtime_provider(requested=target_provider)
                api_key = runtime.get("api_key", "")
                base_url = runtime.get("base_url", "")
            except Exception as e:
                provider_label = _PROVIDER_LABELS.get(target_provider, target_provider)
                return f"⚠️ Could not resolve credentials for provider '{provider_label}': {e}"
        else:
            # Use current provider's base_url from config or registry
            try:
                from hermes_cli.runtime_provider import resolve_runtime_provider
                runtime = resolve_runtime_provider(requested=current_provider)
                api_key = runtime.get("api_key", "")
                base_url = runtime.get("base_url", "")
            except Exception:
                pass

        # Validate the model against the live API
        try:
            validation = validate_requested_model(
                new_model,
                target_provider,
                api_key=api_key,
                base_url=base_url,
            )
        except Exception:
            validation = {"accepted": True, "persist": True, "recognized": False, "message": None}

        if not validation.get("accepted"):
            msg = validation.get("message", "Invalid model")
            tip = "\n\nUse `/model` to see available models, `/provider` to see providers" if "Did you mean" not in msg else ""
            return f"⚠️ {msg}{tip}"

        # Persist to config only if validation approves
        if validation.get("persist"):
            try:
                user_config = {}
                if config_path.exists():
                    with open(config_path, encoding="utf-8") as f:
                        user_config = yaml.safe_load(f) or {}
                if "model" not in user_config or not isinstance(user_config["model"], dict):
                    user_config["model"] = {}
                user_config["model"]["default"] = new_model
                if provider_changed:
                    user_config["model"]["provider"] = target_provider
                with open(config_path, 'w', encoding="utf-8") as f:
                    yaml.dump(user_config, f, default_flow_style=False, sort_keys=False)
            except Exception as e:
                return f"⚠️ Failed to save model change: {e}"

        # Set env vars so the next agent run picks up the change
        os.environ["HERMES_MODEL"] = new_model
        if provider_changed:
            os.environ["HERMES_INFERENCE_PROVIDER"] = target_provider

        provider_label = _PROVIDER_LABELS.get(target_provider, target_provider)
        provider_note = f"\n**Provider:** {provider_label}" if provider_changed else ""

        warning = ""
        if validation.get("message"):
            warning = f"\n⚠️ {validation['message']}"

        if validation.get("persist"):
            persist_note = "saved to config"
        else:
            persist_note = "this session only — will revert on restart"
        return f"🤖 Model changed to `{new_model}` ({persist_note}){provider_note}{warning}\n_(takes effect on next message)_"

    async def _handle_provider_command(self, event: MessageEvent) -> str:
        """Handle /provider command - show available providers."""
        import yaml
        from hermes_cli.models import (
            list_available_providers,
            normalize_provider,
            _PROVIDER_LABELS,
        )

        # Resolve current provider from config
        current_provider = "openrouter"
        config_path = _hermes_home / 'config.yaml'
        try:
            if config_path.exists():
                with open(config_path, encoding="utf-8") as f:
                    cfg = yaml.safe_load(f) or {}
                model_cfg = cfg.get("model", {})
                if isinstance(model_cfg, dict):
                    current_provider = model_cfg.get("provider", current_provider)
        except Exception:
            pass

        current_provider = normalize_provider(current_provider)
        if current_provider == "auto":
            try:
                from hermes_cli.auth import resolve_provider as _resolve_provider
                current_provider = _resolve_provider(current_provider)
            except Exception:
                current_provider = "openrouter"

        # Detect custom endpoint
        if current_provider == "openrouter" and os.getenv("OPENAI_BASE_URL", "").strip():
            current_provider = "custom"

        current_label = _PROVIDER_LABELS.get(current_provider, current_provider)

        lines = [
            f"🔌 **Current provider:** {current_label} (`{current_provider}`)",
            "",
            "**Available providers:**",
        ]

        providers = list_available_providers()
        for p in providers:
            marker = " ← active" if p["id"] == current_provider else ""
            auth = "✅" if p["authenticated"] else "❌"
            aliases = f"  _(also: {', '.join(p['aliases'])})_" if p["aliases"] else ""
            lines.append(f"{auth} `{p['id']}` — {p['label']}{aliases}{marker}")

        lines.append("")
        lines.append("Switch: `/model provider:model-name`")
        lines.append("Setup: `hermes setup`")
        return "\n".join(lines)
    
    async def _handle_personality_command(self, event: MessageEvent) -> str:
        """Handle /personality command - list or set a personality."""
        import yaml

        args = event.get_command_args().strip().lower()
        config_path = _hermes_home / 'config.yaml'

        try:
            if config_path.exists():
                with open(config_path, 'r', encoding="utf-8") as f:
                    config = yaml.safe_load(f) or {}
                personalities = config.get("agent", {}).get("personalities", {})
            else:
                config = {}
                personalities = {}
        except Exception:
            config = {}
            personalities = {}

        if not personalities:
            return "No personalities configured in `~/.hermes/config.yaml`"

        if not args:
            lines = ["🎭 **Available Personalities**\n"]
            lines.append("• `none` — (no personality overlay)")
            for name, prompt in personalities.items():
                if isinstance(prompt, dict):
                    preview = prompt.get("description") or prompt.get("system_prompt", "")[:50]
                else:
                    preview = prompt[:50] + "..." if len(prompt) > 50 else prompt
                lines.append(f"• `{name}` — {preview}")
            lines.append(f"\nUsage: `/personality <name>`")
            return "\n".join(lines)

        def _resolve_prompt(value):
            if isinstance(value, dict):
                parts = [value.get("system_prompt", "")]
                if value.get("tone"):
                    parts.append(f'Tone: {value["tone"]}')
                if value.get("style"):
                    parts.append(f'Style: {value["style"]}')
                return "\n".join(p for p in parts if p)
            return str(value)

        if args in ("none", "default", "neutral"):
            try:
                if "agent" not in config or not isinstance(config.get("agent"), dict):
                    config["agent"] = {}
                config["agent"]["system_prompt"] = ""
                with open(config_path, "w") as f:
                    yaml.dump(config, f, default_flow_style=False, sort_keys=False)
            except Exception as e:
                return f"⚠️ Failed to save personality change: {e}"
            self._ephemeral_system_prompt = ""
            return "🎭 Personality cleared — using base agent behavior.\n_(takes effect on next message)_"
        elif args in personalities:
            new_prompt = _resolve_prompt(personalities[args])

            # Write to config.yaml, same pattern as CLI save_config_value.
            try:
                if "agent" not in config or not isinstance(config.get("agent"), dict):
                    config["agent"] = {}
                config["agent"]["system_prompt"] = new_prompt
                with open(config_path, 'w', encoding="utf-8") as f:
                    yaml.dump(config, f, default_flow_style=False, sort_keys=False)
            except Exception as e:
                return f"⚠️ Failed to save personality change: {e}"

            # Update in-memory so it takes effect on the very next message.
            self._ephemeral_system_prompt = new_prompt

            return f"🎭 Personality set to **{args}**\n_(takes effect on next message)_"

        available = "`none`, " + ", ".join(f"`{n}`" for n in personalities.keys())
        return f"Unknown personality: `{args}`\n\nAvailable: {available}"
    
    async def _handle_retry_command(self, event: MessageEvent) -> str:
        """Handle /retry command - re-send the last user message."""
        source = event.source
        session_entry = self.session_store.get_or_create_session(source)
        history = self.session_store.load_transcript(session_entry.session_id)
        
        # Find the last user message
        last_user_msg = None
        last_user_idx = None
        for i in range(len(history) - 1, -1, -1):
            if history[i].get("role") == "user":
                last_user_msg = history[i].get("content", "")
                last_user_idx = i
                break
        
        if not last_user_msg:
            return "No previous message to retry."
        
        # Truncate history to before the last user message and persist
        truncated = history[:last_user_idx]
        self.session_store.rewrite_transcript(session_entry.session_id, truncated)
        # Reset stored token count — transcript was truncated
        session_entry.last_prompt_tokens = 0
        
        # Re-send by creating a fake text event with the old message
        retry_event = MessageEvent(
            text=last_user_msg,
            message_type=MessageType.TEXT,
            source=source,
            raw_message=event.raw_message,
        )
        
        # Let the normal message handler process it
        return await self._handle_message(retry_event)
    
    async def _handle_undo_command(self, event: MessageEvent) -> str:
        """Handle /undo command - remove the last user/assistant exchange."""
        source = event.source
        session_entry = self.session_store.get_or_create_session(source)
        history = self.session_store.load_transcript(session_entry.session_id)
        
        # Find the last user message and remove everything from it onward
        last_user_idx = None
        for i in range(len(history) - 1, -1, -1):
            if history[i].get("role") == "user":
                last_user_idx = i
                break
        
        if last_user_idx is None:
            return "Nothing to undo."
        
        removed_msg = history[last_user_idx].get("content", "")
        removed_count = len(history) - last_user_idx
        self.session_store.rewrite_transcript(session_entry.session_id, history[:last_user_idx])
        # Reset stored token count — transcript was truncated
        session_entry.last_prompt_tokens = 0
        
        preview = removed_msg[:40] + "..." if len(removed_msg) > 40 else removed_msg
        return f"↩️ Undid {removed_count} message(s).\nRemoved: \"{preview}\""
    
    async def _handle_set_home_command(self, event: MessageEvent) -> str:
        """Handle /sethome command -- set the current chat as the platform's home channel."""
        source = event.source
        platform_name = source.platform.value if source.platform else "unknown"
        chat_id = source.chat_id
        chat_name = source.chat_name or chat_id
        
        env_key = f"{platform_name.upper()}_HOME_CHANNEL"
        
        # Save to config.yaml
        try:
            import yaml
            config_path = _hermes_home / 'config.yaml'
            user_config = {}
            if config_path.exists():
                with open(config_path, encoding="utf-8") as f:
                    user_config = yaml.safe_load(f) or {}
            user_config[env_key] = chat_id
            with open(config_path, 'w', encoding="utf-8") as f:
                yaml.dump(user_config, f, default_flow_style=False)
            # Also set in the current environment so it takes effect immediately
            os.environ[env_key] = str(chat_id)
        except Exception as e:
            return f"Failed to save home channel: {e}"
        
        return (
            f"✅ Home channel set to **{chat_name}** (ID: {chat_id}).\n"
            f"Cron jobs and cross-platform messages will be delivered here."
        )
    
    async def _handle_rollback_command(self, event: MessageEvent) -> str:
        """Handle /rollback command — list or restore filesystem checkpoints."""
        from tools.checkpoint_manager import CheckpointManager, format_checkpoint_list

        # Read checkpoint config from config.yaml
        cp_cfg = {}
        try:
            import yaml as _y
            _cfg_path = _hermes_home / "config.yaml"
            if _cfg_path.exists():
                with open(_cfg_path, encoding="utf-8") as _f:
                    _data = _y.safe_load(_f) or {}
                cp_cfg = _data.get("checkpoints", {})
                if isinstance(cp_cfg, bool):
                    cp_cfg = {"enabled": cp_cfg}
        except Exception:
            pass

        if not cp_cfg.get("enabled", False):
            return (
                "Checkpoints are not enabled.\n"
                "Enable in config.yaml:\n```\ncheckpoints:\n  enabled: true\n```"
            )

        mgr = CheckpointManager(
            enabled=True,
            max_snapshots=cp_cfg.get("max_snapshots", 50),
        )

        cwd = os.getenv("MESSAGING_CWD", str(Path.home()))
        arg = event.get_command_args().strip()

        if not arg:
            checkpoints = mgr.list_checkpoints(cwd)
            return format_checkpoint_list(checkpoints, cwd)

        # Restore by number or hash
        checkpoints = mgr.list_checkpoints(cwd)
        if not checkpoints:
            return f"No checkpoints found for {cwd}"

        target_hash = None
        try:
            idx = int(arg) - 1
            if 0 <= idx < len(checkpoints):
                target_hash = checkpoints[idx]["hash"]
            else:
                return f"Invalid checkpoint number. Use 1-{len(checkpoints)}."
        except ValueError:
            target_hash = arg

        result = mgr.restore(cwd, target_hash)
        if result["success"]:
            return (
                f"✅ Restored to checkpoint {result['restored_to']}: {result['reason']}\n"
                f"A pre-rollback snapshot was saved automatically."
            )
        return f"❌ {result['error']}"

    async def _handle_background_command(self, event: MessageEvent) -> str:
        """Handle /background <prompt> — run a prompt in a separate background session.

        Spawns a new AIAgent in a background thread with its own session.
        When it completes, sends the result back to the same chat without
        modifying the active session's conversation history.
        """
        prompt = event.get_command_args().strip()
        if not prompt:
            return (
                "Usage: /background <prompt>\n"
                "Example: /background Summarize the top HN stories today\n\n"
                "Runs the prompt in a separate session. "
                "You can keep chatting — the result will appear here when done."
            )

        source = event.source
        task_id = f"bg_{datetime.now().strftime('%H%M%S')}_{os.urandom(3).hex()}"

        # Fire-and-forget the background task
        asyncio.create_task(
            self._run_background_task(prompt, source, task_id)
        )

        preview = prompt[:60] + ("..." if len(prompt) > 60 else "")
        return f'🔄 Background task started: "{preview}"\nTask ID: {task_id}\nYou can keep chatting — results will appear when done.'

    async def _run_background_task(
        self, prompt: str, source: "SessionSource", task_id: str
    ) -> None:
        """Execute a background agent task and deliver the result to the chat."""
        from run_agent import AIAgent

        adapter = self.adapters.get(source.platform)
        if not adapter:
            logger.warning("No adapter for platform %s in background task %s", source.platform, task_id)
            return

        _thread_metadata = {"thread_id": source.thread_id} if source.thread_id else None

        try:
            runtime_kwargs = _resolve_runtime_agent_kwargs()
            if not runtime_kwargs.get("api_key"):
                await adapter.send(
                    source.chat_id,
                    f"❌ Background task {task_id} failed: no provider credentials configured.",
                    metadata=_thread_metadata,
                )
                return

            # Read model from config via shared helper
            model = _resolve_gateway_model()

            # Determine toolset (same logic as _run_agent)
            default_toolset_map = {
                Platform.LOCAL: "hermes-cli",
                Platform.TELEGRAM: "hermes-telegram",
                Platform.DISCORD: "hermes-discord",
                Platform.WHATSAPP: "hermes-whatsapp",
                Platform.SLACK: "hermes-slack",
                Platform.SIGNAL: "hermes-signal",
                Platform.HOMEASSISTANT: "hermes-homeassistant",
                Platform.EMAIL: "hermes-email",
            }
            platform_toolsets_config = {}
            try:
                config_path = _hermes_home / 'config.yaml'
                if config_path.exists():
                    import yaml
                    with open(config_path, 'r', encoding="utf-8") as f:
                        user_config = yaml.safe_load(f) or {}
                    platform_toolsets_config = user_config.get("platform_toolsets", {})
            except Exception:
                pass

            platform_config_key = {
                Platform.LOCAL: "cli",
                Platform.TELEGRAM: "telegram",
                Platform.DISCORD: "discord",
                Platform.WHATSAPP: "whatsapp",
                Platform.SLACK: "slack",
                Platform.SIGNAL: "signal",
                Platform.HOMEASSISTANT: "homeassistant",
                Platform.EMAIL: "email",
            }.get(source.platform, "telegram")

            config_toolsets = platform_toolsets_config.get(platform_config_key)
            if config_toolsets and isinstance(config_toolsets, list):
                enabled_toolsets = config_toolsets
            else:
                default_toolset = default_toolset_map.get(source.platform, "hermes-telegram")
                enabled_toolsets = [default_toolset]

            platform_key = "cli" if source.platform == Platform.LOCAL else source.platform.value

            pr = self._provider_routing
            max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "90"))

            def run_sync():
                agent = AIAgent(
                    model=model,
                    **runtime_kwargs,
                    max_iterations=max_iterations,
                    quiet_mode=True,
                    verbose_logging=False,
                    enabled_toolsets=enabled_toolsets,
                    reasoning_config=self._reasoning_config,
                    providers_allowed=pr.get("only"),
                    providers_ignored=pr.get("ignore"),
                    providers_order=pr.get("order"),
                    provider_sort=pr.get("sort"),
                    provider_require_parameters=pr.get("require_parameters", False),
                    provider_data_collection=pr.get("data_collection"),
                    session_id=task_id,
                    platform=platform_key,
                    session_db=self._session_db,
                    fallback_model=self._fallback_model,
                )

                return agent.run_conversation(
                    user_message=prompt,
                    task_id=task_id,
                )

            loop = asyncio.get_event_loop()
            result = await loop.run_in_executor(None, run_sync)

            response = result.get("final_response", "") if result else ""
            if not response and result and result.get("error"):
                response = f"Error: {result['error']}"

            # Extract media files from the response
            if response:
                media_files, response = adapter.extract_media(response)
                images, text_content = adapter.extract_images(response)

                preview = prompt[:60] + ("..." if len(prompt) > 60 else "")
                header = f'✅ Background task complete\nPrompt: "{preview}"\n\n'

                if text_content:
                    await adapter.send(
                        chat_id=source.chat_id,
                        content=header + text_content,
                        metadata=_thread_metadata,
                    )
                elif not images and not media_files:
                    await adapter.send(
                        chat_id=source.chat_id,
                        content=header + "(No response generated)",
                        metadata=_thread_metadata,
                    )

                # Send extracted images
                for image_url, alt_text in (images or []):
                    try:
                        await adapter.send_image(
                            chat_id=source.chat_id,
                            image_url=image_url,
                            caption=alt_text,
                        )
                    except Exception:
                        pass

                # Send media files
                for media_path in (media_files or []):
                    try:
                        await adapter.send_file(
                            chat_id=source.chat_id,
                            file_path=media_path,
                        )
                    except Exception:
                        pass
            else:
                preview = prompt[:60] + ("..." if len(prompt) > 60 else "")
                await adapter.send(
                    chat_id=source.chat_id,
                    content=f'✅ Background task complete\nPrompt: "{preview}"\n\n(No response generated)',
                    metadata=_thread_metadata,
                )

        except Exception as e:
            logger.exception("Background task %s failed", task_id)
            try:
                await adapter.send(
                    chat_id=source.chat_id,
                    content=f"❌ Background task {task_id} failed: {e}",
                    metadata=_thread_metadata,
                )
            except Exception:
                pass

    async def _handle_reasoning_command(self, event: MessageEvent) -> str:
        """Handle /reasoning command — manage reasoning effort and display toggle.

        Usage:
            /reasoning              Show current effort level and display state
            /reasoning <level>      Set reasoning effort (none, low, medium, high, xhigh)
            /reasoning show|on      Show model reasoning in responses
            /reasoning hide|off     Hide model reasoning from responses
        """
        import yaml

        args = event.get_command_args().strip().lower()
        config_path = _hermes_home / "config.yaml"

        def _save_config_key(key_path: str, value):
            """Save a dot-separated key to config.yaml."""
            try:
                user_config = {}
                if config_path.exists():
                    with open(config_path, encoding="utf-8") as f:
                        user_config = yaml.safe_load(f) or {}
                keys = key_path.split(".")
                current = user_config
                for k in keys[:-1]:
                    if k not in current or not isinstance(current[k], dict):
                        current[k] = {}
                    current = current[k]
                current[keys[-1]] = value
                with open(config_path, "w", encoding="utf-8") as f:
                    yaml.dump(user_config, f, default_flow_style=False, sort_keys=False)
                return True
            except Exception as e:
                logger.error("Failed to save config key %s: %s", key_path, e)
                return False

        if not args:
            # Show current state
            rc = self._reasoning_config
            if rc is None:
                level = "medium (default)"
            elif rc.get("enabled") is False:
                level = "none (disabled)"
            else:
                level = rc.get("effort", "medium")
            display_state = "on ✓" if self._show_reasoning else "off"
            return (
                "🧠 **Reasoning Settings**\n\n"
                f"**Effort:** `{level}`\n"
                f"**Display:** {display_state}\n\n"
                "_Usage:_ `/reasoning <none|low|medium|high|xhigh|show|hide>`"
            )

        # Display toggle
        if args in ("show", "on"):
            self._show_reasoning = True
            _save_config_key("display.show_reasoning", True)
            return "🧠 ✓ Reasoning display: **ON**\nModel thinking will be shown before each response."

        if args in ("hide", "off"):
            self._show_reasoning = False
            _save_config_key("display.show_reasoning", False)
            return "🧠 ✓ Reasoning display: **OFF**"

        # Effort level change
        effort = args.strip()
        if effort == "none":
            parsed = {"enabled": False}
        elif effort in ("xhigh", "high", "medium", "low", "minimal"):
            parsed = {"enabled": True, "effort": effort}
        else:
            return (
                f"⚠️ Unknown argument: `{effort}`\n\n"
                "**Valid levels:** none, low, minimal, medium, high, xhigh\n"
                "**Display:** show, hide"
            )

        self._reasoning_config = parsed
        if _save_config_key("agent.reasoning_effort", effort):
            return f"🧠 ✓ Reasoning effort set to `{effort}` (saved to config)\n_(takes effect on next message)_"
        else:
            return f"🧠 ✓ Reasoning effort set to `{effort}` (this session only)"

    async def _handle_compress_command(self, event: MessageEvent) -> str:
        """Handle /compress command -- manually compress conversation context."""
        source = event.source
        session_entry = self.session_store.get_or_create_session(source)
        history = self.session_store.load_transcript(session_entry.session_id)

        if not history or len(history) < 4:
            return "Not enough conversation to compress (need at least 4 messages)."

        try:
            from run_agent import AIAgent
            from agent.model_metadata import estimate_messages_tokens_rough

            runtime_kwargs = _resolve_runtime_agent_kwargs()
            if not runtime_kwargs.get("api_key"):
                return "No provider configured -- cannot compress."

            # Resolve model from config (same reason as memory flush above).
            model = _resolve_gateway_model()

            msgs = [
                {"role": m.get("role"), "content": m.get("content")}
                for m in history
                if m.get("role") in ("user", "assistant") and m.get("content")
            ]
            original_count = len(msgs)
            approx_tokens = estimate_messages_tokens_rough(msgs)

            tmp_agent = AIAgent(
                **runtime_kwargs,
                model=model,
                max_iterations=4,
                quiet_mode=True,
                enabled_toolsets=["memory"],
                session_id=session_entry.session_id,
            )

            loop = asyncio.get_event_loop()
            compressed, _ = await loop.run_in_executor(
                None,
                lambda: tmp_agent._compress_context(msgs, "", approx_tokens=approx_tokens),
            )

            self.session_store.rewrite_transcript(session_entry.session_id, compressed)
            # Reset stored token count — transcript changed, old value is stale
            self.session_store.update_session(
                session_entry.session_key, last_prompt_tokens=0,
            )
            new_count = len(compressed)
            new_tokens = estimate_messages_tokens_rough(compressed)

            return (
                f"🗜️ Compressed: {original_count} → {new_count} messages\n"
                f"~{approx_tokens:,} → ~{new_tokens:,} tokens"
            )
        except Exception as e:
            logger.warning("Manual compress failed: %s", e)
            return f"Compression failed: {e}"

    async def _handle_title_command(self, event: MessageEvent) -> str:
        """Handle /title command — set or show the current session's title."""
        source = event.source
        session_entry = self.session_store.get_or_create_session(source)
        session_id = session_entry.session_id

        if not self._session_db:
            return "Session database not available."

        title_arg = event.get_command_args().strip()
        if title_arg:
            # Sanitize the title before setting
            try:
                sanitized = self._session_db.sanitize_title(title_arg)
            except ValueError as e:
                return f"⚠️ {e}"
            if not sanitized:
                return "⚠️ Title is empty after cleanup. Please use printable characters."
            # Set the title
            try:
                if self._session_db.set_session_title(session_id, sanitized):
                    return f"✏️ Session title set: **{sanitized}**"
                else:
                    return "Session not found in database."
            except ValueError as e:
                return f"⚠️ {e}"
        else:
            # Show the current title
            title = self._session_db.get_session_title(session_id)
            if title:
                return f"📌 Session title: **{title}**"
            else:
                return "No title set. Usage: `/title My Session Name`"

    async def _handle_resume_command(self, event: MessageEvent) -> str:
        """Handle /resume command — switch to a previously-named session."""
        if not self._session_db:
            return "Session database not available."

        source = event.source
        session_key = build_session_key(source)
        name = event.get_command_args().strip()

        if not name:
            # List recent titled sessions for this user/platform
            try:
                user_source = source.platform.value if source.platform else None
                sessions = self._session_db.list_sessions_rich(
                    source=user_source, limit=10
                )
                titled = [s for s in sessions if s.get("title")]
                if not titled:
                    return (
                        "No named sessions found.\n"
                        "Use `/title My Session` to name your current session, "
                        "then `/resume My Session` to return to it later."
                    )
                lines = ["📋 **Named Sessions**\n"]
                for s in titled[:10]:
                    title = s["title"]
                    preview = s.get("preview", "")[:40]
                    preview_part = f" — _{preview}_" if preview else ""
                    lines.append(f"• **{title}**{preview_part}")
                lines.append("\nUsage: `/resume <session name>`")
                return "\n".join(lines)
            except Exception as e:
                logger.debug("Failed to list titled sessions: %s", e)
                return f"Could not list sessions: {e}"

        # Resolve the name to a session ID
        target_id = self._session_db.resolve_session_by_title(name)
        if not target_id:
            return (
                f"No session found matching '**{name}**'.\n"
                "Use `/resume` with no arguments to see available sessions."
            )

        # Check if already on that session
        current_entry = self.session_store.get_or_create_session(source)
        if current_entry.session_id == target_id:
            return f"📌 Already on session **{name}**."

        # Flush memories for current session before switching
        try:
            asyncio.create_task(self._async_flush_memories(current_entry.session_id))
        except Exception as e:
            logger.debug("Memory flush on resume failed: %s", e)

        self._shutdown_gateway_honcho(session_key)

        # Clear any running agent for this session key
        if session_key in self._running_agents:
            del self._running_agents[session_key]

        # Switch the session entry to point at the old session
        new_entry = self.session_store.switch_session(session_key, target_id)
        if not new_entry:
            return "Failed to switch session."

        # Get the title for confirmation
        title = self._session_db.get_session_title(target_id) or name

        # Count messages for context
        history = self.session_store.load_transcript(target_id)
        msg_count = len([m for m in history if m.get("role") == "user"]) if history else 0
        msg_part = f" ({msg_count} message{'s' if msg_count != 1 else ''})" if msg_count else ""

        return f"↻ Resumed session **{title}**{msg_part}. Conversation restored."

    async def _handle_usage_command(self, event: MessageEvent) -> str:
        """Handle /usage command -- show token usage for the session's last agent run."""
        source = event.source
        session_key = build_session_key(source)

        agent = self._running_agents.get(session_key)
        if agent and hasattr(agent, "session_total_tokens") and agent.session_api_calls > 0:
            lines = [
                "📊 **Session Token Usage**",
                f"Prompt (input): {agent.session_prompt_tokens:,}",
                f"Completion (output): {agent.session_completion_tokens:,}",
                f"Total: {agent.session_total_tokens:,}",
                f"API calls: {agent.session_api_calls}",
            ]
            ctx = agent.context_compressor
            if ctx.last_prompt_tokens:
                pct = ctx.last_prompt_tokens / ctx.context_length * 100 if ctx.context_length else 0
                lines.append(f"Context: {ctx.last_prompt_tokens:,} / {ctx.context_length:,} ({pct:.0f}%)")
            if ctx.compression_count:
                lines.append(f"Compressions: {ctx.compression_count}")
            return "\n".join(lines)

        # No running agent -- check session history for a rough count
        session_entry = self.session_store.get_or_create_session(source)
        history = self.session_store.load_transcript(session_entry.session_id)
        if history:
            from agent.model_metadata import estimate_messages_tokens_rough
            msgs = [m for m in history if m.get("role") in ("user", "assistant") and m.get("content")]
            approx = estimate_messages_tokens_rough(msgs)
            return (
                f"📊 **Session Info**\n"
                f"Messages: {len(msgs)}\n"
                f"Estimated context: ~{approx:,} tokens\n"
                f"_(Detailed usage available during active conversations)_"
            )
        return "No usage data available for this session."

    async def _handle_insights_command(self, event: MessageEvent) -> str:
        """Handle /insights command -- show usage insights and analytics."""
        import asyncio as _asyncio

        args = event.get_command_args().strip()
        days = 30
        source = None

        # Parse simple args: /insights 7  or  /insights --days 7
        if args:
            parts = args.split()
            i = 0
            while i < len(parts):
                if parts[i] == "--days" and i + 1 < len(parts):
                    try:
                        days = int(parts[i + 1])
                    except ValueError:
                        return f"Invalid --days value: {parts[i + 1]}"
                    i += 2
                elif parts[i] == "--source" and i + 1 < len(parts):
                    source = parts[i + 1]
                    i += 2
                elif parts[i].isdigit():
                    days = int(parts[i])
                    i += 1
                else:
                    i += 1

        try:
            from hermes_state import SessionDB
            from agent.insights import InsightsEngine

            loop = _asyncio.get_event_loop()

            def _run_insights():
                db = SessionDB()
                engine = InsightsEngine(db)
                report = engine.generate(days=days, source=source)
                result = engine.format_gateway(report)
                db.close()
                return result

            return await loop.run_in_executor(None, _run_insights)
        except Exception as e:
            logger.error("Insights command error: %s", e, exc_info=True)
            return f"Error generating insights: {e}"

    async def _handle_reload_mcp_command(self, event: MessageEvent) -> str:
        """Handle /reload-mcp command -- disconnect and reconnect all MCP servers."""
        loop = asyncio.get_event_loop()
        try:
            from tools.mcp_tool import shutdown_mcp_servers, discover_mcp_tools, _load_mcp_config, _servers, _lock

            # Capture old server names before shutdown
            with _lock:
                old_servers = set(_servers.keys())

            # Read new config before shutting down, so we know what will be added/removed
            new_config = _load_mcp_config()
            new_server_names = set(new_config.keys())

            # Shutdown existing connections
            await loop.run_in_executor(None, shutdown_mcp_servers)

            # Reconnect by discovering tools (reads config.yaml fresh)
            new_tools = await loop.run_in_executor(None, discover_mcp_tools)

            # Compute what changed
            with _lock:
                connected_servers = set(_servers.keys())

            added = connected_servers - old_servers
            removed = old_servers - connected_servers
            reconnected = connected_servers & old_servers

            lines = ["🔄 **MCP Servers Reloaded**\n"]
            if reconnected:
                lines.append(f"♻️ Reconnected: {', '.join(sorted(reconnected))}")
            if added:
                lines.append(f"➕ Added: {', '.join(sorted(added))}")
            if removed:
                lines.append(f"➖ Removed: {', '.join(sorted(removed))}")
            if not connected_servers:
                lines.append("No MCP servers connected.")
            else:
                lines.append(f"\n🔧 {len(new_tools)} tool(s) available from {len(connected_servers)} server(s)")

            # Inject a message at the END of the session history so the
            # model knows tools changed on its next turn.  Appended after
            # all existing messages to preserve prompt-cache for the prefix.
            change_parts = []
            if added:
                change_parts.append(f"Added servers: {', '.join(sorted(added))}")
            if removed:
                change_parts.append(f"Removed servers: {', '.join(sorted(removed))}")
            if reconnected:
                change_parts.append(f"Reconnected servers: {', '.join(sorted(reconnected))}")
            tool_summary = f"{len(new_tools)} MCP tool(s) now available" if new_tools else "No MCP tools available"
            change_detail = ". ".join(change_parts) + ". " if change_parts else ""
            reload_msg = {
                "role": "user",
                "content": f"[SYSTEM: MCP servers have been reloaded. {change_detail}{tool_summary}. The tool list for this conversation has been updated accordingly.]",
            }
            try:
                session_entry = self.session_store.get_or_create_session(event.source)
                self.session_store.append_to_transcript(
                    session_entry.session_id, reload_msg
                )
            except Exception:
                pass  # Best-effort; don't fail the reload over a transcript write

            return "\n".join(lines)

        except Exception as e:
            logger.warning("MCP reload failed: %s", e)
            return f"❌ MCP reload failed: {e}"

    async def _handle_update_command(self, event: MessageEvent) -> str:
        """Handle /update command — update Hermes Agent to the latest version.

        Spawns ``hermes update`` in a separate systemd scope so it survives the
        gateway restart that ``hermes update`` triggers at the end.  A marker
        file is written so the *new* gateway process can notify the user of the
        result on startup.
        """
        import json
        import shutil
        import subprocess
        from datetime import datetime

        project_root = Path(__file__).parent.parent.resolve()
        git_dir = project_root / '.git'

        if not git_dir.exists():
            return "✗ Not a git repository — cannot update."

        hermes_bin = shutil.which("hermes")
        if not hermes_bin:
            return "✗ `hermes` command not found on PATH."

        # Write marker so the restarted gateway can notify this chat
        pending_path = _hermes_home / ".update_pending.json"
        output_path = _hermes_home / ".update_output.txt"
        pending = {
            "platform": event.source.platform.value,
            "chat_id": event.source.chat_id,
            "user_id": event.source.user_id,
            "timestamp": datetime.now().isoformat(),
        }
        pending_path.write_text(json.dumps(pending))

        # Spawn `hermes update` in a separate cgroup so it survives gateway
        # restart.  systemd-run --user --scope creates a transient scope unit.
        update_cmd = f"{hermes_bin} update > {output_path} 2>&1"
        try:
            systemd_run = shutil.which("systemd-run")
            if systemd_run:
                subprocess.Popen(
                    [systemd_run, "--user", "--scope",
                     "--unit=hermes-update", "--",
                     "bash", "-c", update_cmd],
                    stdout=subprocess.DEVNULL,
                    stderr=subprocess.DEVNULL,
                    start_new_session=True,
                )
            else:
                # Fallback: best-effort detach with start_new_session
                subprocess.Popen(
                    ["bash", "-c", f"nohup {update_cmd} &"],
                    stdout=subprocess.DEVNULL,
                    stderr=subprocess.DEVNULL,
                    start_new_session=True,
                )
        except Exception as e:
            pending_path.unlink(missing_ok=True)
            return f"✗ Failed to start update: {e}"

        return "⚕ Starting Hermes update… I'll notify you when it's done."

    async def _send_update_notification(self) -> None:
        """If the gateway is starting after a ``/update``, notify the user."""
        import json
        import re as _re

        pending_path = _hermes_home / ".update_pending.json"
        output_path = _hermes_home / ".update_output.txt"

        if not pending_path.exists():
            return

        try:
            pending = json.loads(pending_path.read_text())
            platform_str = pending.get("platform")
            chat_id = pending.get("chat_id")

            # Read the captured update output
            output = ""
            if output_path.exists():
                output = output_path.read_text()

            # Resolve adapter
            platform = Platform(platform_str)
            adapter = self.adapters.get(platform)

            if adapter and chat_id:
                # Strip ANSI escape codes for clean display
                output = _re.sub(r'\x1b\[[0-9;]*m', '', output).strip()
                if output:
                    # Truncate if too long for a single message
                    if len(output) > 3500:
                        output = "…" + output[-3500:]
                    msg = f"✅ Hermes update finished — gateway restarted.\n\n```\n{output}\n```"
                else:
                    msg = "✅ Hermes update finished — gateway restarted successfully."
                await adapter.send(chat_id, msg)
                logger.info("Sent post-update notification to %s:%s", platform_str, chat_id)
        except Exception as e:
            logger.warning("Post-update notification failed: %s", e)
        finally:
            pending_path.unlink(missing_ok=True)
            output_path.unlink(missing_ok=True)

    def _set_session_env(self, context: SessionContext) -> None:
        """Set environment variables for the current session."""
        os.environ["HERMES_SESSION_PLATFORM"] = context.source.platform.value
        os.environ["HERMES_SESSION_CHAT_ID"] = context.source.chat_id
        if context.source.chat_name:
            os.environ["HERMES_SESSION_CHAT_NAME"] = context.source.chat_name
    
    def _clear_session_env(self) -> None:
        """Clear session environment variables."""
        for var in ["HERMES_SESSION_PLATFORM", "HERMES_SESSION_CHAT_ID", "HERMES_SESSION_CHAT_NAME"]:
            if var in os.environ:
                del os.environ[var]
    
    async def _enrich_message_with_vision(
        self,
        user_text: str,
        image_paths: List[str],
    ) -> str:
        """
        Auto-analyze user-attached images with the vision tool and prepend
        the descriptions to the message text.

        Each image is analyzed with a general-purpose prompt.  The resulting
        description *and* the local cache path are injected so the model can:
          1. Immediately understand what the user sent (no extra tool call).
          2. Re-examine the image with vision_analyze if it needs more detail.

        Args:
            user_text:   The user's original caption / message text.
            image_paths: List of local file paths to cached images.

        Returns:
            The enriched message string with vision descriptions prepended.
        """
        from tools.vision_tools import vision_analyze_tool
        import json as _json

        analysis_prompt = (
            "Describe everything visible in this image in thorough detail. "
            "Include any text, code, data, objects, people, layout, colors, "
            "and any other notable visual information."
        )

        enriched_parts = []
        for path in image_paths:
            try:
                logger.debug("Auto-analyzing user image: %s", path)
                result_json = await vision_analyze_tool(
                    image_url=path,
                    user_prompt=analysis_prompt,
                )
                result = _json.loads(result_json)
                if result.get("success"):
                    description = result.get("analysis", "")
                    enriched_parts.append(
                        f"[The user sent an image~ Here's what I can see:\n{description}]\n"
                        f"[If you need a closer look, use vision_analyze with "
                        f"image_url: {path} ~]"
                    )
                else:
                    enriched_parts.append(
                        "[The user sent an image but I couldn't quite see it "
                        "this time (>_<) You can try looking at it yourself "
                        f"with vision_analyze using image_url: {path}]"
                    )
            except Exception as e:
                logger.error("Vision auto-analysis error: %s", e)
                enriched_parts.append(
                    f"[The user sent an image but something went wrong when I "
                    f"tried to look at it~ You can try examining it yourself "
                    f"with vision_analyze using image_url: {path}]"
                )

        # Combine: vision descriptions first, then the user's original text
        if enriched_parts:
            prefix = "\n\n".join(enriched_parts)
            if user_text:
                return f"{prefix}\n\n{user_text}"
            return prefix
        return user_text

    async def _enrich_message_with_transcription(
        self,
        user_text: str,
        audio_paths: List[str],
    ) -> str:
        """
        Auto-transcribe user voice/audio messages using OpenAI Whisper API
        and prepend the transcript to the message text.

        Args:
            user_text:   The user's original caption / message text.
            audio_paths: List of local file paths to cached audio files.

        Returns:
            The enriched message string with transcriptions prepended.
        """
        from tools.transcription_tools import transcribe_audio
        import asyncio

        enriched_parts = []
        for path in audio_paths:
            try:
                logger.debug("Transcribing user voice: %s", path)
                result = await asyncio.to_thread(transcribe_audio, path)
                if result["success"]:
                    transcript = result["transcript"]
                    enriched_parts.append(
                        f'[The user sent a voice message~ '
                        f'Here\'s what they said: "{transcript}"]'
                    )
                else:
                    error = result.get("error", "unknown error")
                    if "OPENAI_API_KEY" in error or "VOICE_TOOLS_OPENAI_KEY" in error:
                        enriched_parts.append(
                            "[The user sent a voice message but I can't listen "
                            "to it right now~ VOICE_TOOLS_OPENAI_KEY isn't set up yet "
                            "(';w;') Let them know!]"
                        )
                    else:
                        enriched_parts.append(
                            "[The user sent a voice message but I had trouble "
                            f"transcribing it~ ({error})]"
                        )
            except Exception as e:
                logger.error("Transcription error: %s", e)
                enriched_parts.append(
                    "[The user sent a voice message but something went wrong "
                    "when I tried to listen to it~ Let them know!]"
                )

        if enriched_parts:
            prefix = "\n\n".join(enriched_parts)
            if user_text:
                return f"{prefix}\n\n{user_text}"
            return prefix
        return user_text

    async def _run_process_watcher(self, watcher: dict) -> None:
        """
        Periodically check a background process and push updates to the user.

        Runs as an asyncio task. Stays silent when nothing changed.
        Auto-removes when the process exits or is killed.

        Notification mode (from ``display.background_process_notifications``):
          - ``all``    — running-output updates + final message
          - ``result`` — final completion message only
          - ``error``  — final message only when exit code != 0
          - ``off``    — no messages at all
        """
        from tools.process_registry import process_registry

        session_id = watcher["session_id"]
        interval = watcher["check_interval"]
        session_key = watcher.get("session_key", "")
        platform_name = watcher.get("platform", "")
        chat_id = watcher.get("chat_id", "")
        notify_mode = self._load_background_notifications_mode()

        logger.debug("Process watcher started: %s (every %ss, notify=%s)",
                      session_id, interval, notify_mode)

        if notify_mode == "off":
            # Still wait for the process to exit so we can log it, but don't
            # push any messages to the user.
            while True:
                await asyncio.sleep(interval)
                session = process_registry.get(session_id)
                if session is None or session.exited:
                    break
            logger.debug("Process watcher ended (silent): %s", session_id)
            return

        last_output_len = 0
        while True:
            await asyncio.sleep(interval)

            session = process_registry.get(session_id)
            if session is None:
                break

            current_output_len = len(session.output_buffer)
            has_new_output = current_output_len > last_output_len
            last_output_len = current_output_len

            if session.exited:
                # Decide whether to notify based on mode
                should_notify = (
                    notify_mode in ("all", "result")
                    or (notify_mode == "error" and session.exit_code not in (0, None))
                )
                if should_notify:
                    new_output = session.output_buffer[-1000:] if session.output_buffer else ""
                    message_text = (
                        f"[Background process {session_id} finished with exit code {session.exit_code}~ "
                        f"Here's the final output:\n{new_output}]"
                    )
                    adapter = None
                    for p, a in self.adapters.items():
                        if p.value == platform_name:
                            adapter = a
                            break
                    if adapter and chat_id:
                        try:
                            await adapter.send(chat_id, message_text)
                        except Exception as e:
                            logger.error("Watcher delivery error: %s", e)
                break

            elif has_new_output and notify_mode == "all":
                # New output available -- deliver status update (only in "all" mode)
                new_output = session.output_buffer[-500:] if session.output_buffer else ""
                message_text = (
                    f"[Background process {session_id} is still running~ "
                    f"New output:\n{new_output}]"
                )
                adapter = None
                for p, a in self.adapters.items():
                    if p.value == platform_name:
                        adapter = a
                        break
                if adapter and chat_id:
                    try:
                        await adapter.send(chat_id, message_text)
                    except Exception as e:
                        logger.error("Watcher delivery error: %s", e)

        logger.debug("Process watcher ended: %s", session_id)

    async def _run_agent(
        self,
        message: str,
        context_prompt: str,
        history: List[Dict[str, Any]],
        source: SessionSource,
        session_id: str,
        session_key: str = None
    ) -> Dict[str, Any]:
        """
        Run the agent with the given message and context.
        
        Returns the full result dict from run_conversation, including:
          - "final_response": str (the text to send back)
          - "messages": list (full conversation including tool calls)
          - "api_calls": int
          - "completed": bool
        
        This is run in a thread pool to not block the event loop.
        Supports interruption via new messages.
        """
        from run_agent import AIAgent
        import queue
        
        # Determine toolset based on platform.
        # Check config.yaml for per-platform overrides, fallback to hardcoded defaults.
        default_toolset_map = {
            Platform.LOCAL: "hermes-cli",
            Platform.TELEGRAM: "hermes-telegram",
            Platform.DISCORD: "hermes-discord",
            Platform.WHATSAPP: "hermes-whatsapp",
            Platform.SLACK: "hermes-slack",
            Platform.SIGNAL: "hermes-signal",
            Platform.HOMEASSISTANT: "hermes-homeassistant",
            Platform.EMAIL: "hermes-email",
        }
        
        # Try to load platform_toolsets from config
        platform_toolsets_config = {}
        try:
            config_path = _hermes_home / 'config.yaml'
            if config_path.exists():
                import yaml
                with open(config_path, 'r', encoding="utf-8") as f:
                    user_config = yaml.safe_load(f) or {}
                platform_toolsets_config = user_config.get("platform_toolsets", {})
        except Exception as e:
            logger.debug("Could not load platform_toolsets config: %s", e)
        
        # Map platform enum to config key
        platform_config_key = {
            Platform.LOCAL: "cli",
            Platform.TELEGRAM: "telegram",
            Platform.DISCORD: "discord",
            Platform.WHATSAPP: "whatsapp",
            Platform.SLACK: "slack",
            Platform.SIGNAL: "signal",
            Platform.HOMEASSISTANT: "homeassistant",
            Platform.EMAIL: "email",
        }.get(source.platform, "telegram")
        
        # Use config override if present (list of toolsets), otherwise hardcoded default
        config_toolsets = platform_toolsets_config.get(platform_config_key)
        if config_toolsets and isinstance(config_toolsets, list):
            enabled_toolsets = config_toolsets
        else:
            default_toolset = default_toolset_map.get(source.platform, "hermes-telegram")
            enabled_toolsets = [default_toolset]
        
        # Tool progress mode from config.yaml: "all", "new", "verbose", "off"
        # Falls back to env vars for backward compatibility
        _progress_cfg = {}
        try:
            _tp_cfg_path = _hermes_home / "config.yaml"
            if _tp_cfg_path.exists():
                import yaml as _tp_yaml
                with open(_tp_cfg_path, encoding="utf-8") as _tp_f:
                    _tp_data = _tp_yaml.safe_load(_tp_f) or {}
                _progress_cfg = _tp_data.get("display", {})
        except Exception:
            pass
        progress_mode = (
            _progress_cfg.get("tool_progress")
            or os.getenv("HERMES_TOOL_PROGRESS_MODE")
            or "all"
        )
        tool_progress_enabled = progress_mode != "off"
        
        # Queue for progress messages (thread-safe)
        progress_queue = queue.Queue() if tool_progress_enabled else None
        last_tool = [None]  # Mutable container for tracking in closure
        last_progress_msg = [None]  # Track last message for dedup
        repeat_count = [0]  # How many times the same message repeated
        
        def progress_callback(tool_name: str, preview: str = None, args: dict = None):
            """Callback invoked by agent when a tool is called."""
            if not progress_queue:
                return
            
            # "new" mode: only report when tool changes
            if progress_mode == "new" and tool_name == last_tool[0]:
                return
            last_tool[0] = tool_name
            
            # Build progress message with primary argument preview
            tool_emojis = {
                "terminal": "💻",
                "process": "⚙️",
                "web_search": "🔍",
                "web_extract": "📄",
                "read_file": "📖",
                "write_file": "✍️",
                "patch": "🔧",
                "search": "🔎",
                "search_files": "🔎",
                "list_directory": "📂",
                "image_generate": "🎨",
                "text_to_speech": "🔊",
                "browser_navigate": "🌐",
                "browser_click": "👆",
                "browser_type": "⌨️",
                "browser_snapshot": "📸",
                "browser_scroll": "📜",
                "browser_back": "◀️",
                "browser_press": "⌨️",
                "browser_close": "🚪",
                "browser_get_images": "🖼️",
                "browser_vision": "👁️",
                "moa_query": "🧠",
                "mixture_of_agents": "🧠",
                "vision_analyze": "👁️",
                "skill_view": "📚",
                "skills_list": "📋",
                "todo": "📋",
                "memory": "🧠",
                "session_search": "🔍",
                "send_message": "📨",
                "schedule_cronjob": "⏰",
                "list_cronjobs": "⏰",
                "remove_cronjob": "⏰",
                "execute_code": "🐍",
                "delegate_task": "🔀",
                "clarify": "❓",
                "skill_manage": "📝",
            }
            emoji = tool_emojis.get(tool_name, "⚙️")
            
            # Verbose mode: show detailed arguments
            if progress_mode == "verbose" and args:
                import json as _json
                args_str = _json.dumps(args, ensure_ascii=False, default=str)
                if len(args_str) > 200:
                    args_str = args_str[:197] + "..."
                msg = f"{emoji} {tool_name}({list(args.keys())})\n{args_str}"
                progress_queue.put(msg)
                return
            
            if preview:
                # Truncate preview to keep messages clean
                if len(preview) > 80:
                    preview = preview[:77] + "..."
                msg = f"{emoji} {tool_name}: \"{preview}\""
            else:
                msg = f"{emoji} {tool_name}..."
            
            # Dedup: collapse consecutive identical progress messages.
            # Common with execute_code where models iterate with the same
            # code (same boilerplate imports → identical previews).
            if msg == last_progress_msg[0]:
                repeat_count[0] += 1
                # Update the last line in progress_lines with a counter
                # via a special "dedup" queue message.
                progress_queue.put(("__dedup__", msg, repeat_count[0]))
                return
            last_progress_msg[0] = msg
            repeat_count[0] = 0
            
            progress_queue.put(msg)
        
        # Background task to send progress messages
        # Accumulates tool lines into a single message that gets edited
        _progress_metadata = {"thread_id": source.thread_id} if source.thread_id else None

        async def send_progress_messages():
            if not progress_queue:
                return

            adapter = self.adapters.get(source.platform)
            if not adapter:
                return

            progress_lines = []      # Accumulated tool lines
            progress_msg_id = None   # ID of the progress message to edit
            can_edit = True          # False once an edit fails (platform doesn't support it)

            while True:
                try:
                    raw = progress_queue.get_nowait()
                    
                    # Handle dedup messages: update last line with repeat counter
                    if isinstance(raw, tuple) and len(raw) == 3 and raw[0] == "__dedup__":
                        _, base_msg, count = raw
                        if progress_lines:
                            progress_lines[-1] = f"{base_msg} (×{count + 1})"
                        msg = progress_lines[-1] if progress_lines else base_msg
                    else:
                        msg = raw
                        progress_lines.append(msg)

                    if can_edit and progress_msg_id is not None:
                        # Try to edit the existing progress message
                        full_text = "\n".join(progress_lines)
                        result = await adapter.edit_message(
                            chat_id=source.chat_id,
                            message_id=progress_msg_id,
                            content=full_text,
                        )
                        if not result.success:
                            # Platform doesn't support editing — stop trying,
                            # send just this new line as a separate message
                            can_edit = False
                            await adapter.send(chat_id=source.chat_id, content=msg, metadata=_progress_metadata)
                    else:
                        if can_edit:
                            # First tool: send all accumulated text as new message
                            full_text = "\n".join(progress_lines)
                            result = await adapter.send(chat_id=source.chat_id, content=full_text, metadata=_progress_metadata)
                        else:
                            # Editing unsupported: send just this line
                            result = await adapter.send(chat_id=source.chat_id, content=msg, metadata=_progress_metadata)
                        if result.success and result.message_id:
                            progress_msg_id = result.message_id

                    # Restore typing indicator
                    await asyncio.sleep(0.3)
                    await adapter.send_typing(source.chat_id, metadata=_progress_metadata)

                except queue.Empty:
                    await asyncio.sleep(0.3)
                except asyncio.CancelledError:
                    # Drain remaining queued messages
                    while not progress_queue.empty():
                        try:
                            raw = progress_queue.get_nowait()
                            if isinstance(raw, tuple) and len(raw) == 3 and raw[0] == "__dedup__":
                                _, base_msg, count = raw
                                if progress_lines:
                                    progress_lines[-1] = f"{base_msg} (×{count + 1})"
                            else:
                                progress_lines.append(raw)
                        except Exception:
                            break
                    # Final edit with all remaining tools (only if editing works)
                    if can_edit and progress_lines and progress_msg_id:
                        full_text = "\n".join(progress_lines)
                        try:
                            await adapter.edit_message(
                                chat_id=source.chat_id,
                                message_id=progress_msg_id,
                                content=full_text,
                            )
                        except Exception:
                            pass
                    return
                except Exception as e:
                    logger.error("Progress message error: %s", e)
                    await asyncio.sleep(1)
        
        # We need to share the agent instance for interrupt support
        agent_holder = [None]  # Mutable container for the agent instance
        result_holder = [None]  # Mutable container for the result
        tools_holder = [None]   # Mutable container for the tool definitions
        
        # Bridge sync step_callback → async hooks.emit for agent:step events
        _loop_for_step = asyncio.get_event_loop()
        _hooks_ref = self.hooks

        def _step_callback_sync(iteration: int, tool_names: list) -> None:
            try:
                asyncio.run_coroutine_threadsafe(
                    _hooks_ref.emit("agent:step", {
                        "platform": source.platform.value if source.platform else "",
                        "user_id": source.user_id,
                        "session_id": session_id,
                        "iteration": iteration,
                        "tool_names": tool_names,
                    }),
                    _loop_for_step,
                )
            except Exception as _e:
                logger.debug("agent:step hook error: %s", _e)

        def run_sync():
            # Pass session_key to process registry via env var so background
            # processes can be mapped back to this gateway session
            os.environ["HERMES_SESSION_KEY"] = session_key or ""

            # Read from env var or use default (same as CLI)
            max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "90"))
            
            # Map platform enum to the platform hint key the agent understands.
            # Platform.LOCAL ("local") maps to "cli"; others pass through as-is.
            platform_key = "cli" if source.platform == Platform.LOCAL else source.platform.value
            
            # Combine platform context with user-configured ephemeral system prompt
            combined_ephemeral = context_prompt or ""
            if self._ephemeral_system_prompt:
                combined_ephemeral = (combined_ephemeral + "\n\n" + self._ephemeral_system_prompt).strip()

            # Re-read .env and config for fresh credentials (gateway is long-lived,
            # keys may change without restart).
            try:
                load_dotenv(_env_path, override=True, encoding="utf-8")
            except UnicodeDecodeError:
                load_dotenv(_env_path, override=True, encoding="latin-1")
            except Exception:
                pass

            model = _resolve_gateway_model()

            try:
                runtime_kwargs = _resolve_runtime_agent_kwargs()
            except Exception as exc:
                return {
                    "final_response": f"⚠️ Provider authentication failed: {exc}",
                    "messages": [],
                    "api_calls": 0,
                    "tools": [],
                }

            pr = self._provider_routing
            honcho_manager, honcho_config = self._get_or_create_gateway_honcho(session_key)
            agent = AIAgent(
                model=model,
                **runtime_kwargs,
                max_iterations=max_iterations,
                quiet_mode=True,
                verbose_logging=False,
                enabled_toolsets=enabled_toolsets,
                ephemeral_system_prompt=combined_ephemeral or None,
                prefill_messages=self._prefill_messages or None,
                reasoning_config=self._reasoning_config,
                providers_allowed=pr.get("only"),
                providers_ignored=pr.get("ignore"),
                providers_order=pr.get("order"),
                provider_sort=pr.get("sort"),
                provider_require_parameters=pr.get("require_parameters", False),
                provider_data_collection=pr.get("data_collection"),
                session_id=session_id,
                tool_progress_callback=progress_callback if tool_progress_enabled else None,
                step_callback=_step_callback_sync if _hooks_ref.loaded_hooks else None,
                platform=platform_key,
                honcho_session_key=session_key,
                honcho_manager=honcho_manager,
                honcho_config=honcho_config,
                session_db=self._session_db,
                fallback_model=self._fallback_model,
            )
            
            # Store agent reference for interrupt support
            agent_holder[0] = agent
            # Capture the full tool definitions for transcript logging
            tools_holder[0] = agent.tools if hasattr(agent, 'tools') else None
            
            # Convert history to agent format.
            # Two cases:
            #   1. Normal path (from transcript): simple {role, content, timestamp} dicts
            #      - Strip timestamps, keep role+content
            #   2. Interrupt path (from agent result["messages"]): full agent messages
            #      that may include tool_calls, tool_call_id, reasoning, etc.
            #      - These must be passed through intact so the API sees valid
            #        assistant→tool sequences (dropping tool_calls causes 500 errors)
            agent_history = []
            for msg in history:
                role = msg.get("role")
                if not role:
                    continue
                
                # Skip metadata entries (tool definitions, session info)
                # -- these are for transcript logging, not for the LLM
                if role in ("session_meta",):
                    continue
                
                # Skip system messages -- the agent rebuilds its own system prompt
                if role == "system":
                    continue
                
                # Rich agent messages (tool_calls, tool results) must be passed
                # through intact so the API sees valid assistant→tool sequences
                has_tool_calls = "tool_calls" in msg
                has_tool_call_id = "tool_call_id" in msg
                is_tool_message = role == "tool"
                
                if has_tool_calls or has_tool_call_id or is_tool_message:
                    clean_msg = {k: v for k, v in msg.items() if k != "timestamp"}
                    agent_history.append(clean_msg)
                else:
                    # Simple text message - just need role and content
                    content = msg.get("content")
                    if content:
                        # Tag cross-platform mirror messages so the agent knows their origin
                        if msg.get("mirror"):
                            mirror_src = msg.get("mirror_source", "another session")
                            content = f"[Delivered from {mirror_src}] {content}"
                        agent_history.append({"role": role, "content": content})
            
            # Collect MEDIA paths already in history so we can exclude them
            # from the current turn's extraction. This is compression-safe:
            # even if the message list shrinks, we know which paths are old.
            _history_media_paths: set = set()
            for _hm in agent_history:
                if _hm.get("role") in ("tool", "function"):
                    _hc = _hm.get("content", "")
                    if "MEDIA:" in _hc:
                        for _match in re.finditer(r'MEDIA:(\S+)', _hc):
                            _p = _match.group(1).strip().rstrip('",}')
                            if _p:
                                _history_media_paths.add(_p)
            
            result = agent.run_conversation(message, conversation_history=agent_history, task_id=session_id)
            result_holder[0] = result
            
            # Return final response, or a message if something went wrong
            final_response = result.get("final_response")

            # Extract last actual prompt token count from the agent's compressor
            _last_prompt_toks = 0
            _agent = agent_holder[0]
            if _agent and hasattr(_agent, "context_compressor"):
                _last_prompt_toks = getattr(_agent.context_compressor, "last_prompt_tokens", 0)

            if not final_response:
                error_msg = f"⚠️ {result['error']}" if result.get("error") else "(No response generated)"
                return {
                    "final_response": error_msg,
                    "messages": result.get("messages", []),
                    "api_calls": result.get("api_calls", 0),
                    "tools": tools_holder[0] or [],
                    "history_offset": len(agent_history),
                    "last_prompt_tokens": _last_prompt_toks,
                }
            
            # Scan tool results for MEDIA:<path> tags that need to be delivered
            # as native audio/file attachments.  The TTS tool embeds MEDIA: tags
            # in its JSON response, but the model's final text reply usually
            # doesn't include them.  We collect unique tags from tool results and
            # append any that aren't already present in the final response, so the
            # adapter's extract_media() can find and deliver the files exactly once.
            #
            # Uses path-based deduplication against _history_media_paths (collected
            # before run_conversation) instead of index slicing. This is safe even
            # when context compression shrinks the message list. (Fixes #160)
            if "MEDIA:" not in final_response:
                media_tags = []
                has_voice_directive = False
                for msg in result.get("messages", []):
                    if msg.get("role") in ("tool", "function"):
                        content = msg.get("content", "")
                        if "MEDIA:" in content:
                            for match in re.finditer(r'MEDIA:(\S+)', content):
                                path = match.group(1).strip().rstrip('",}')
                                if path and path not in _history_media_paths:
                                    media_tags.append(f"MEDIA:{path}")
                            if "[[audio_as_voice]]" in content:
                                has_voice_directive = True
                
                if media_tags:
                    seen = set()
                    unique_tags = []
                    for tag in media_tags:
                        if tag not in seen:
                            seen.add(tag)
                            unique_tags.append(tag)
                    if has_voice_directive:
                        unique_tags.insert(0, "[[audio_as_voice]]")
                    final_response = final_response + "\n" + "\n".join(unique_tags)
            
            # Sync session_id: the agent may have created a new session during
            # mid-run context compression (_compress_context splits sessions).
            # If so, update the session store entry so the NEXT message loads
            # the compressed transcript, not the stale pre-compression one.
            agent = agent_holder[0]
            if agent and session_key and hasattr(agent, 'session_id') and agent.session_id != session_id:
                logger.info(
                    "Session split detected: %s → %s (compression)",
                    session_id, agent.session_id,
                )
                entry = self.session_store._entries.get(session_key)
                if entry:
                    entry.session_id = agent.session_id
                    self.session_store._save()

            effective_session_id = getattr(agent, 'session_id', session_id) if agent else session_id

            return {
                "final_response": final_response,
                "last_reasoning": result.get("last_reasoning"),
                "messages": result_holder[0].get("messages", []) if result_holder[0] else [],
                "api_calls": result_holder[0].get("api_calls", 0) if result_holder[0] else 0,
                "tools": tools_holder[0] or [],
                "history_offset": len(agent_history),
                "last_prompt_tokens": _last_prompt_toks,
                "session_id": effective_session_id,
            }
        
        # Start progress message sender if enabled
        progress_task = None
        if tool_progress_enabled:
            progress_task = asyncio.create_task(send_progress_messages())
        
        # Track this agent as running for this session (for interrupt support)
        # We do this in a callback after the agent is created
        async def track_agent():
            # Wait for agent to be created
            while agent_holder[0] is None:
                await asyncio.sleep(0.05)
            if session_key:
                self._running_agents[session_key] = agent_holder[0]
        
        tracking_task = asyncio.create_task(track_agent())
        
        # Monitor for interrupts from the adapter (new messages arriving)
        async def monitor_for_interrupt():
            adapter = self.adapters.get(source.platform)
            if not adapter or not session_key:
                return
            
            while True:
                await asyncio.sleep(0.2)  # Check every 200ms
                # Check if adapter has a pending interrupt for this session.
                # Must use session_key (build_session_key output) — NOT
                # source.chat_id — because the adapter stores interrupt events
                # under the full session key.
                if hasattr(adapter, 'has_pending_interrupt') and adapter.has_pending_interrupt(session_key):
                    agent = agent_holder[0]
                    if agent:
                        pending_event = adapter.get_pending_message(session_key)
                        pending_text = pending_event.text if pending_event else None
                        logger.debug("Interrupt detected from adapter, signaling agent...")
                        agent.interrupt(pending_text)
                        break
        
        interrupt_monitor = asyncio.create_task(monitor_for_interrupt())
        
        try:
            # Run in thread pool to not block
            loop = asyncio.get_event_loop()
            response = await loop.run_in_executor(None, run_sync)
            
            # Check if we were interrupted and have a pending message
            result = result_holder[0]
            adapter = self.adapters.get(source.platform)
            
            # Get pending message from adapter if interrupted.
            # Use session_key (not source.chat_id) to match adapter's storage keys.
            pending = None
            if result and result.get("interrupted") and adapter:
                pending_event = adapter.get_pending_message(session_key) if session_key else None
                if pending_event:
                    pending = pending_event.text
                elif result.get("interrupt_message"):
                    pending = result.get("interrupt_message")
            
            if pending:
                logger.debug("Processing interrupted message: '%s...'", pending[:40])
                
                # Clear the adapter's interrupt event so the next _run_agent call
                # doesn't immediately re-trigger the interrupt before the new agent
                # even makes its first API call (this was causing an infinite loop).
                if adapter and hasattr(adapter, '_active_sessions') and session_key and session_key in adapter._active_sessions:
                    adapter._active_sessions[session_key].clear()
                
                # Don't send the interrupted response to the user — it's just noise
                # like "Operation interrupted." They already know they sent a new
                # message, so go straight to processing it.
                
                # Now process the pending message with updated history
                updated_history = result.get("messages", history)
                return await self._run_agent(
                    message=pending,
                    context_prompt=context_prompt,
                    history=updated_history,
                    source=source,
                    session_id=session_id,
                    session_key=session_key
                )
        finally:
            # Stop progress sender and interrupt monitor
            if progress_task:
                progress_task.cancel()
            interrupt_monitor.cancel()
            
            # Clean up tracking
            tracking_task.cancel()
            if session_key and session_key in self._running_agents:
                del self._running_agents[session_key]
            
            # Wait for cancelled tasks
            for task in [progress_task, interrupt_monitor, tracking_task]:
                if task:
                    try:
                        await task
                    except asyncio.CancelledError:
                        pass
        
        return response


def _start_cron_ticker(stop_event: threading.Event, adapters=None, interval: int = 60):
    """
    Background thread that ticks the cron scheduler at a regular interval.
    
    Runs inside the gateway process so cronjobs fire automatically without
    needing a separate `hermes cron daemon` or system cron entry.

    Also refreshes the channel directory every 5 minutes and prunes the
    image/audio/document cache once per hour.
    """
    from cron.scheduler import tick as cron_tick
    from gateway.platforms.base import cleanup_image_cache, cleanup_document_cache

    IMAGE_CACHE_EVERY = 60   # ticks — once per hour at default 60s interval
    CHANNEL_DIR_EVERY = 5    # ticks — every 5 minutes

    logger.info("Cron ticker started (interval=%ds)", interval)
    tick_count = 0
    while not stop_event.is_set():
        try:
            cron_tick(verbose=False)
        except Exception as e:
            logger.debug("Cron tick error: %s", e)

        tick_count += 1

        if tick_count % CHANNEL_DIR_EVERY == 0 and adapters:
            try:
                from gateway.channel_directory import build_channel_directory
                build_channel_directory(adapters)
            except Exception as e:
                logger.debug("Channel directory refresh error: %s", e)

        if tick_count % IMAGE_CACHE_EVERY == 0:
            try:
                removed = cleanup_image_cache(max_age_hours=24)
                if removed:
                    logger.info("Image cache cleanup: removed %d stale file(s)", removed)
            except Exception as e:
                logger.debug("Image cache cleanup error: %s", e)
            try:
                removed = cleanup_document_cache(max_age_hours=24)
                if removed:
                    logger.info("Document cache cleanup: removed %d stale file(s)", removed)
            except Exception as e:
                logger.debug("Document cache cleanup error: %s", e)

        stop_event.wait(timeout=interval)
    logger.info("Cron ticker stopped")


async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool = False) -> bool:
    """
    Start the gateway and run until interrupted.
    
    This is the main entry point for running the gateway.
    Returns True if the gateway ran successfully, False if it failed to start.
    A False return causes a non-zero exit code so systemd can auto-restart.
    
    Args:
        config: Optional gateway configuration override.
        replace: If True, kill any existing gateway instance before starting.
                 Useful for systemd services to avoid restart-loop deadlocks
                 when the previous process hasn't fully exited yet.
    """
    # ── Duplicate-instance guard ──────────────────────────────────────
    # Prevent two gateways from running under the same HERMES_HOME.
    # The PID file is scoped to HERMES_HOME, so future multi-profile
    # setups (each profile using a distinct HERMES_HOME) will naturally
    # allow concurrent instances without tripping this guard.
    import time as _time
    from gateway.status import get_running_pid, remove_pid_file
    existing_pid = get_running_pid()
    if existing_pid is not None and existing_pid != os.getpid():
        if replace:
            logger.info(
                "Replacing existing gateway instance (PID %d) with --replace.",
                existing_pid,
            )
            try:
                os.kill(existing_pid, signal.SIGTERM)
            except ProcessLookupError:
                pass  # Already gone
            except PermissionError:
                logger.error(
                    "Permission denied killing PID %d. Cannot replace.",
                    existing_pid,
                )
                return False
            # Wait up to 10 seconds for the old process to exit
            for _ in range(20):
                try:
                    os.kill(existing_pid, 0)
                    _time.sleep(0.5)
                except (ProcessLookupError, PermissionError):
                    break  # Process is gone
            else:
                # Still alive after 10s — force kill
                logger.warning(
                    "Old gateway (PID %d) did not exit after SIGTERM, sending SIGKILL.",
                    existing_pid,
                )
                try:
                    os.kill(existing_pid, signal.SIGKILL)
                    _time.sleep(0.5)
                except (ProcessLookupError, PermissionError):
                    pass
            remove_pid_file()
        else:
            hermes_home = os.getenv("HERMES_HOME", "~/.hermes")
            logger.error(
                "Another gateway instance is already running (PID %d, HERMES_HOME=%s). "
                "Use 'hermes gateway restart' to replace it, or 'hermes gateway stop' first.",
                existing_pid, hermes_home,
            )
            print(
                f"\n❌ Gateway already running (PID {existing_pid}).\n"
                f"   Use 'hermes gateway restart' to replace it,\n"
                f"   or 'hermes gateway stop' to kill it first.\n"
                f"   Or use 'hermes gateway run --replace' to auto-replace.\n"
            )
            return False

    # Sync bundled skills on gateway start (fast -- skips unchanged)
    try:
        from tools.skills_sync import sync_skills
        sync_skills(quiet=True)
    except Exception:
        pass

    # Configure rotating file log so gateway output is persisted for debugging
    log_dir = _hermes_home / 'logs'
    log_dir.mkdir(parents=True, exist_ok=True)
    file_handler = RotatingFileHandler(
        log_dir / 'gateway.log',
        maxBytes=5 * 1024 * 1024,
        backupCount=3,
    )
    from agent.redact import RedactingFormatter
    file_handler.setFormatter(RedactingFormatter('%(asctime)s %(levelname)s %(name)s: %(message)s'))
    logging.getLogger().addHandler(file_handler)
    logging.getLogger().setLevel(logging.INFO)

    # Separate errors-only log for easy debugging
    error_handler = RotatingFileHandler(
        log_dir / 'errors.log',
        maxBytes=2 * 1024 * 1024,
        backupCount=2,
    )
    error_handler.setLevel(logging.WARNING)
    error_handler.setFormatter(RedactingFormatter('%(asctime)s %(levelname)s %(name)s: %(message)s'))
    logging.getLogger().addHandler(error_handler)

    runner = GatewayRunner(config)
    
    # Set up signal handlers
    def signal_handler():
        asyncio.create_task(runner.stop())
    
    loop = asyncio.get_event_loop()
    for sig in (signal.SIGINT, signal.SIGTERM):
        try:
            loop.add_signal_handler(sig, signal_handler)
        except NotImplementedError:
            pass
    
    # Start the gateway
    success = await runner.start()
    if not success:
        return False
    
    # Write PID file so CLI can detect gateway is running
    import atexit
    from gateway.status import write_pid_file, remove_pid_file
    write_pid_file()
    atexit.register(remove_pid_file)
    
    # Start background cron ticker so scheduled jobs fire automatically
    cron_stop = threading.Event()
    cron_thread = threading.Thread(
        target=_start_cron_ticker,
        args=(cron_stop,),
        kwargs={"adapters": runner.adapters},
        daemon=True,
        name="cron-ticker",
    )
    cron_thread.start()
    
    # Wait for shutdown
    await runner.wait_for_shutdown()
    
    # Stop cron ticker cleanly
    cron_stop.set()
    cron_thread.join(timeout=5)

    # Close MCP server connections
    try:
        from tools.mcp_tool import shutdown_mcp_servers
        shutdown_mcp_servers()
    except Exception:
        pass

    return True


def main():
    """CLI entry point for the gateway."""
    import argparse
    
    parser = argparse.ArgumentParser(description="Hermes Gateway - Multi-platform messaging")
    parser.add_argument("--config", "-c", help="Path to gateway config file")
    parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output")
    
    args = parser.parse_args()
    
    config = None
    if args.config:
        import json
        with open(args.config, encoding="utf-8") as f:
            data = json.load(f)
            config = GatewayConfig.from_dict(data)
    
    # Run the gateway - exit with code 1 if no platforms connected,
    # so systemd Restart=on-failure will retry on transient errors (e.g. DNS)
    success = asyncio.run(start_gateway(config))
    if not success:
        sys.exit(1)


if __name__ == "__main__":
    main()
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								"""
 								Gateway runner - entry point for messaging platform integrations.
 								This module provides:
 								- start_gateway(): Start all configured platform adapters
 								- GatewayRunner: Main class managing the gateway lifecycle
 								Usage:
 								    # Start the gateway
 								    python -m gateway.run
 								    # Or from CLI
 								    python cli.py --gateway
 								"""
 								import asyncio
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								import logging
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								import os
-												Add Text-to-Speech (TTS) support with Edge TTS and ElevenLabs integration

- Updated `pyproject.toml` to include Edge TTS and ElevenLabs as dependencies.
- Enhanced documentation to detail voice message capabilities across platforms and TTS provider options.
- Modified the GatewayRunner to handle MEDIA tags from TTS tool responses, ensuring proper delivery of audio messages.

											
										
										
											2026-02-14 16:08:14 -08:00
+								import re
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								import sys
 								import signal
-												refactor: streamline cron job handling and update CLI commands

- Removed legacy cron daemon functionality, integrating cron job execution directly into the gateway process for improved efficiency.
- Updated CLI commands to reflect changes, replacing `hermes cron daemon` with `hermes cron status` and enhancing documentation for cron job management.
- Clarified messaging in the README and other documentation regarding the gateway's role in managing cron jobs.
- Removed obsolete terminal_hecate tool and related configurations to simplify the codebase.

											
										
										
											2026-02-21 16:21:19 -08:00
+								import threading
-												Hermes Agent UX Improvements

											
										
										
											2026-02-22 02:16:11 -08:00
+								from logging.handlers import RotatingFileHandler
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								from pathlib import Path
 								from datetime import datetime
 								from typing import Dict, Optional, Any, List
 								# Add parent directory to path
 								sys.path.insert(0, str(Path(__file__).parent.parent))
-												fix: respect HERMES_HOME env var in gateway and cron scheduler

Both entry points hardcoded Path.home() / ".hermes" for .env, config.yaml,
logs, and lock files. Now uses _hermes_home which reads HERMES_HOME env var
with ~/.hermes as default, matching cli.py and run_agent.py.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-26 18:51:46 +11:00
+								# Resolve Hermes home directory (respects HERMES_HOME override)
 								_hermes_home = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
-												Enhance messaging gateway configuration and security features

- Added new environment variables for Telegram and Discord bot configurations, including `TELEGRAM_ALLOWED_USERS` and `DISCORD_ALLOWED_USERS`, to restrict bot access to specific users.
- Updated documentation in AGENTS.md and README.md to include detailed setup instructions for the messaging gateway, emphasizing the importance of user allowlists for security.
- Improved the CLI setup wizard to prompt for allowed user IDs during configuration, enhancing user guidance and security awareness.
- Refined the gateway run script to support user authorization checks, ensuring only allowed users can interact with the bot.

											
										
										
											2026-02-03 10:46:23 -08:00
+								# Load environment variables from ~/.hermes/.env first
-												Update requirements and enhance environment variable loading in gateway

- Updated requirements.txt to uncomment and ensure the installation of `python-telegram-bot` and `discord.py` packages.
- Enhanced the gateway run script to load environment variables from a specified path, improving configuration management and flexibility for different environments.

											
										
										
											2026-02-03 07:02:59 -08:00
+								from dotenv import load_dotenv
-												fix: respect HERMES_HOME env var in gateway and cron scheduler

Both entry points hardcoded Path.home() / ".hermes" for .env, config.yaml,
logs, and lock files. Now uses _hermes_home which reads HERMES_HOME env var
with ~/.hermes as default, matching cli.py and run_agent.py.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-26 18:51:46 +11:00
+								_env_path = _hermes_home / '.env'
-												Update requirements and enhance environment variable loading in gateway

- Updated requirements.txt to uncomment and ensure the installation of `python-telegram-bot` and `discord.py` packages.
- Enhanced the gateway run script to load environment variables from a specified path, improving configuration management and flexibility for different environments.

											
										
										
											2026-02-03 07:02:59 -08:00
+								if _env_path.exists():
-												More fixes for windoze

											
										
										
											2026-02-25 15:20:42 -08:00
+								    try:
 								        load_dotenv(_env_path, encoding="utf-8")
 								    except UnicodeDecodeError:
 								        load_dotenv(_env_path, encoding="latin-1")
-												Update requirements and enhance environment variable loading in gateway

- Updated requirements.txt to uncomment and ensure the installation of `python-telegram-bot` and `discord.py` packages.
- Enhanced the gateway run script to load environment variables from a specified path, improving configuration management and flexibility for different environments.

											
										
										
											2026-02-03 07:02:59 -08:00
+								# Also try project .env as fallback
 								load_dotenv()
-												feat: integrate config.yaml values into environment for enhanced flexibility

- Added functionality to load values from config.yaml into the environment, allowing os.getenv() to access them.
- Ensured that existing environment variables take precedence over config values.
- Updated DiscordAdapter to resolve usernames in DISCORD_ALLOWED_USERS to numeric IDs, improving user authorization checks.
- Enhanced event handling to provide clearer logging and ensure proper synchronization of slash commands.

											
										
										
											2026-02-22 17:35:45 -08:00
+								# Bridge config.yaml values into the environment so os.getenv() picks them up.
-												feat(config): enhance terminal environment variable management

- Updated .env.example to clarify terminal backend configuration and its relationship with config.yaml.
- Modified gateway/run.py to ensure terminal settings from config.yaml take precedence over .env, improving consistency in environment variable handling.
- Added mapping for terminal configuration options to corresponding environment variables for better integration.

											
										
										
											2026-02-26 20:05:35 -08:00
+								# config.yaml is authoritative for terminal settings — overrides .env.
-												fix: respect HERMES_HOME env var in gateway and cron scheduler

Both entry points hardcoded Path.home() / ".hermes" for .env, config.yaml,
logs, and lock files. Now uses _hermes_home which reads HERMES_HOME env var
with ~/.hermes as default, matching cli.py and run_agent.py.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-26 18:51:46 +11:00
+								_config_path = _hermes_home / 'config.yaml'
-												feat: integrate config.yaml values into environment for enhanced flexibility

- Added functionality to load values from config.yaml into the environment, allowing os.getenv() to access them.
- Ensured that existing environment variables take precedence over config values.
- Updated DiscordAdapter to resolve usernames in DISCORD_ALLOWED_USERS to numeric IDs, improving user authorization checks.
- Enhanced event handling to provide clearer logging and ensure proper synchronization of slash commands.

											
										
										
											2026-02-22 17:35:45 -08:00
+								if _config_path.exists():
 								    try:
 								        import yaml as _yaml
-												Add explicit encoding="utf-8" to all config/data file open() calls

On Windows, open() defaults to the system locale encoding (cp1252,
cp1254, etc.) rather than UTF-8. This breaks any file containing
non-ASCII characters, and also causes crashes when writing JSON with
ensure_ascii=False.

This adds encoding="utf-8" to open() calls in:
- gateway/run.py (config.yaml reads/writes throughout)
- gateway/config.py (gateway.json and config.yaml)
- hermes_cli/config.py (config.yaml load/save)
- hermes_cli/main.py (session export with ensure_ascii=False)
- hermes_cli/status.py (jobs.json and sessions.json)

											
										
										
											2026-03-05 17:04:33 -05:00
+								        with open(_config_path, encoding="utf-8") as _f:
-												feat: integrate config.yaml values into environment for enhanced flexibility

- Added functionality to load values from config.yaml into the environment, allowing os.getenv() to access them.
- Ensured that existing environment variables take precedence over config values.
- Updated DiscordAdapter to resolve usernames in DISCORD_ALLOWED_USERS to numeric IDs, improving user authorization checks.
- Enhanced event handling to provide clearer logging and ensure proper synchronization of slash commands.

											
										
										
											2026-02-22 17:35:45 -08:00
+								            _cfg = _yaml.safe_load(_f) or {}
-												feat(config): enhance terminal environment variable management

- Updated .env.example to clarify terminal backend configuration and its relationship with config.yaml.
- Modified gateway/run.py to ensure terminal settings from config.yaml take precedence over .env, improving consistency in environment variable handling.
- Added mapping for terminal configuration options to corresponding environment variables for better integration.

											
										
										
											2026-02-26 20:05:35 -08:00
+								        # Top-level simple values (fallback only — don't override .env)
-												feat: integrate config.yaml values into environment for enhanced flexibility

- Added functionality to load values from config.yaml into the environment, allowing os.getenv() to access them.
- Ensured that existing environment variables take precedence over config values.
- Updated DiscordAdapter to resolve usernames in DISCORD_ALLOWED_USERS to numeric IDs, improving user authorization checks.
- Enhanced event handling to provide clearer logging and ensure proper synchronization of slash commands.

											
										
										
											2026-02-22 17:35:45 -08:00
+								        for _key, _val in _cfg.items():
 								            if isinstance(_val, (str, int, float, bool)) and _key not in os.environ:
 								                os.environ[_key] = str(_val)
-												feat(config): enhance terminal environment variable management

- Updated .env.example to clarify terminal backend configuration and its relationship with config.yaml.
- Modified gateway/run.py to ensure terminal settings from config.yaml take precedence over .env, improving consistency in environment variable handling.
- Added mapping for terminal configuration options to corresponding environment variables for better integration.

											
										
										
											2026-02-26 20:05:35 -08:00
+								        # Terminal config is nested — bridge to TERMINAL_* env vars.
 								        # config.yaml overrides .env for these since it's the documented config path.
 								        _terminal_cfg = _cfg.get("terminal", {})
 								        if _terminal_cfg and isinstance(_terminal_cfg, dict):
 								            _terminal_env_map = {
 								                "backend": "TERMINAL_ENV",
 								                "cwd": "TERMINAL_CWD",
 								                "timeout": "TERMINAL_TIMEOUT",
 								                "lifetime_seconds": "TERMINAL_LIFETIME_SECONDS",
 								                "docker_image": "TERMINAL_DOCKER_IMAGE",
 								                "singularity_image": "TERMINAL_SINGULARITY_IMAGE",
 								                "modal_image": "TERMINAL_MODAL_IMAGE",
-												fix(daytona): add missing config mappings in gateway, CLI defaults, and config display

Signed-off-by: rovle <lovre.pesut@gmail.com>

											
										
										
											2026-03-05 11:12:50 -08:00
+								                "daytona_image": "TERMINAL_DAYTONA_IMAGE",
-												feat(config): enhance terminal environment variable management

- Updated .env.example to clarify terminal backend configuration and its relationship with config.yaml.
- Modified gateway/run.py to ensure terminal settings from config.yaml take precedence over .env, improving consistency in environment variable handling.
- Added mapping for terminal configuration options to corresponding environment variables for better integration.

											
										
										
											2026-02-26 20:05:35 -08:00
+								                "ssh_host": "TERMINAL_SSH_HOST",
 								                "ssh_user": "TERMINAL_SSH_USER",
 								                "ssh_port": "TERMINAL_SSH_PORT",
 								                "ssh_key": "TERMINAL_SSH_KEY",
 								                "container_cpu": "TERMINAL_CONTAINER_CPU",
 								                "container_memory": "TERMINAL_CONTAINER_MEMORY",
 								                "container_disk": "TERMINAL_CONTAINER_DISK",
 								                "container_persistent": "TERMINAL_CONTAINER_PERSISTENT",
-												fix: gateway missing docker_volumes config bridge + list serialization bug

The gateway's config.yaml → env var bridge was missing docker_volumes,
so Docker volume mounts configured in config.yaml were ignored for
gateway sessions (Telegram, Discord, etc.) while working in CLI.

Also fixes list serialization: str() produces Python repr with single
quotes which json.loads() in terminal_tool.py can't parse. Now uses
json.dumps() for list values.

Based on PR #431 by @manuelschipper (applied manually due to stale branch).

											
										
										
											2026-03-09 17:24:00 -07:00
+								                "docker_volumes": "TERMINAL_DOCKER_VOLUMES",
-												fix: respect config.yaml cwd in gateway, add sandbox_dir config option

Two fixes:

1. Gateway CWD override: TERMINAL_CWD from config.yaml was being
   unconditionally overwritten by the messaging_cwd fallback (line 114).
   Now explicit paths in config.yaml are respected — only '.' / 'auto' /
   'cwd' (or unset) fall back to MESSAGING_CWD or home directory.

2. sandbox_dir config: Added terminal.sandbox_dir to config.yaml bridge
   in gateway/run.py, cli.py, and hermes_cli/config.py. Maps to
   TERMINAL_SANDBOX_DIR env var, which get_sandbox_dir() reads to
   determine where Docker/Singularity sandbox data is stored (default:
   ~/.hermes/sandboxes/). Users can now set:
     hermes config set terminal.sandbox_dir /data/hermes-sandboxes

											
										
										
											2026-03-08 01:33:46 -08:00
+								                "sandbox_dir": "TERMINAL_SANDBOX_DIR",
-												feat(config): enhance terminal environment variable management

- Updated .env.example to clarify terminal backend configuration and its relationship with config.yaml.
- Modified gateway/run.py to ensure terminal settings from config.yaml take precedence over .env, improving consistency in environment variable handling.
- Added mapping for terminal configuration options to corresponding environment variables for better integration.

											
										
										
											2026-02-26 20:05:35 -08:00
+								            }
 								            for _cfg_key, _env_var in _terminal_env_map.items():
 								                if _cfg_key in _terminal_cfg:
-												fix: gateway missing docker_volumes config bridge + list serialization bug

The gateway's config.yaml → env var bridge was missing docker_volumes,
so Docker volume mounts configured in config.yaml were ignored for
gateway sessions (Telegram, Discord, etc.) while working in CLI.

Also fixes list serialization: str() produces Python repr with single
quotes which json.loads() in terminal_tool.py can't parse. Now uses
json.dumps() for list values.

Based on PR #431 by @manuelschipper (applied manually due to stale branch).

											
										
										
											2026-03-09 17:24:00 -07:00
+								                    _val = _terminal_cfg[_cfg_key]
 								                    if isinstance(_val, list):
 								                        os.environ[_env_var] = json.dumps(_val)
 								                    else:
 								                        os.environ[_env_var] = str(_val)
-												refactor: update context compression configuration to use config.yaml and improve model handling

											
										
										
											2026-02-28 04:46:35 -08:00
+								        _compression_cfg = _cfg.get("compression", {})
 								        if _compression_cfg and isinstance(_compression_cfg, dict):
 								            _compression_env_map = {
 								                "enabled": "CONTEXT_COMPRESSION_ENABLED",
 								                "threshold": "CONTEXT_COMPRESSION_THRESHOLD",
 								                "summary_model": "CONTEXT_COMPRESSION_MODEL",
-												fix: harden auxiliary model config — gateway bridge, vision safety, tests

Improvements on top of PR #606 (auxiliary model configuration):

1. Gateway bridge: Added auxiliary.* and compression.summary_provider
   config bridging to gateway/run.py so config.yaml settings work from
   messaging platforms (not just CLI). Matches the pattern in cli.py.

2. Vision auto-fallback safety: In auto mode, vision now only tries
   OpenRouter + Nous Portal (known multimodal-capable providers).
   Custom endpoints, Codex, and API-key providers are skipped to avoid
   confusing errors from providers that don't support vision input.
   Explicit provider override (AUXILIARY_VISION_PROVIDER=main) still
   allows using any provider.

3. Comprehensive tests (46 new):
   - _get_auxiliary_provider env var resolution (8 tests)
   - _resolve_forced_provider with all provider types (8 tests)
   - Per-task provider routing integration (4 tests)
   - Vision auto-fallback safety (7 tests)
   - Config bridging logic (11 tests)
   - Gateway/CLI bridge parity (2 tests)
   - Vision model override via env var (2 tests)
   - DEFAULT_CONFIG shape validation (4 tests)

4. Docs: Added auxiliary_client.py to AGENTS.md project structure.
   Updated module docstring with separate text/vision resolution chains.

Tests: 2429 passed (was 2383).

											
										
										
											2026-03-08 18:06:40 -07:00
+								                "summary_provider": "CONTEXT_COMPRESSION_PROVIDER",
-												refactor: update context compression configuration to use config.yaml and improve model handling

											
										
										
											2026-02-28 04:46:35 -08:00
+								            }
 								            for _cfg_key, _env_var in _compression_env_map.items():
 								                if _cfg_key in _compression_cfg:
 								                    os.environ[_env_var] = str(_compression_cfg[_cfg_key])
-												fix: harden auxiliary model config — gateway bridge, vision safety, tests

Improvements on top of PR #606 (auxiliary model configuration):

1. Gateway bridge: Added auxiliary.* and compression.summary_provider
   config bridging to gateway/run.py so config.yaml settings work from
   messaging platforms (not just CLI). Matches the pattern in cli.py.

2. Vision auto-fallback safety: In auto mode, vision now only tries
   OpenRouter + Nous Portal (known multimodal-capable providers).
   Custom endpoints, Codex, and API-key providers are skipped to avoid
   confusing errors from providers that don't support vision input.
   Explicit provider override (AUXILIARY_VISION_PROVIDER=main) still
   allows using any provider.

3. Comprehensive tests (46 new):
   - _get_auxiliary_provider env var resolution (8 tests)
   - _resolve_forced_provider with all provider types (8 tests)
   - Per-task provider routing integration (4 tests)
   - Vision auto-fallback safety (7 tests)
   - Config bridging logic (11 tests)
   - Gateway/CLI bridge parity (2 tests)
   - Vision model override via env var (2 tests)
   - DEFAULT_CONFIG shape validation (4 tests)

4. Docs: Added auxiliary_client.py to AGENTS.md project structure.
   Updated module docstring with separate text/vision resolution chains.

Tests: 2429 passed (was 2383).

											
										
										
											2026-03-08 18:06:40 -07:00
+								        # Auxiliary model overrides (vision, web_extract).
 								        # Each task has provider + model; bridge non-default values to env vars.
 								        _auxiliary_cfg = _cfg.get("auxiliary", {})
 								        if _auxiliary_cfg and isinstance(_auxiliary_cfg, dict):
 								            _aux_task_env = {
 								                "vision":      ("AUXILIARY_VISION_PROVIDER",      "AUXILIARY_VISION_MODEL"),
 								                "web_extract": ("AUXILIARY_WEB_EXTRACT_PROVIDER",  "AUXILIARY_WEB_EXTRACT_MODEL"),
 								            }
 								            for _task_key, (_prov_env, _model_env) in _aux_task_env.items():
 								                _task_cfg = _auxiliary_cfg.get(_task_key, {})
 								                if not isinstance(_task_cfg, dict):
 								                    continue
 								                _prov = str(_task_cfg.get("provider", "")).strip()
 								                _model = str(_task_cfg.get("model", "")).strip()
 								                if _prov and _prov != "auto":
 								                    os.environ[_prov_env] = _prov
 								                if _model:
 								                    os.environ[_model_env] = _model
-												refactor(cli): update max turns configuration precedence and enhance documentation

											
										
										
											2026-02-28 10:35:49 -08:00
+								        _agent_cfg = _cfg.get("agent", {})
 								        if _agent_cfg and isinstance(_agent_cfg, dict):
 								            if "max_turns" in _agent_cfg:
 								                os.environ["HERMES_MAX_ITERATIONS"] = str(_agent_cfg["max_turns"])
-												fix(timezone): add timezone-aware clock across agent, cron, and execute_code

											
										
										
											2026-03-03 11:57:18 +05:30
+								        # Timezone: bridge config.yaml → HERMES_TIMEZONE env var.
 								        # HERMES_TIMEZONE from .env takes precedence (already in os.environ).
 								        _tz_cfg = _cfg.get("timezone", "")
 								        if _tz_cfg and isinstance(_tz_cfg, str) and "HERMES_TIMEZONE" not in os.environ:
 								            os.environ["HERMES_TIMEZONE"] = _tz_cfg.strip()
-												feat: add config toggle to disable secret redaction

New config option:

  security:
    redact_secrets: false  # default: true

When set to false, API keys, tokens, and passwords are shown in
full in read_file, search_files, and terminal output. Useful for
debugging auth issues where you need to verify the actual key value.

Bridged to both CLI and gateway via HERMES_REDACT_SECRETS env var.
The check is in redact_sensitive_text() itself, so all call sites
(terminal, file tools, log formatter) respect it.

											
										
										
											2026-03-09 01:04:33 -07:00
+								        # Security settings
 								        _security_cfg = _cfg.get("security", {})
 								        if isinstance(_security_cfg, dict):
 								            _redact = _security_cfg.get("redact_secrets")
 								            if _redact is not None:
 								                os.environ["HERMES_REDACT_SECRETS"] = str(_redact).lower()
-												feat: integrate config.yaml values into environment for enhanced flexibility

- Added functionality to load values from config.yaml into the environment, allowing os.getenv() to access them.
- Ensured that existing environment variables take precedence over config values.
- Updated DiscordAdapter to resolve usernames in DISCORD_ALLOWED_USERS to numeric IDs, improving user authorization checks.
- Enhanced event handling to provide clearer logging and ensure proper synchronization of slash commands.

											
										
										
											2026-02-22 17:35:45 -08:00
+								    except Exception:
 								        pass  # Non-fatal; gateway can still run with .env values
-												Enhance messaging gateway configuration and security features

- Added new environment variables for Telegram and Discord bot configurations, including `TELEGRAM_ALLOWED_USERS` and `DISCORD_ALLOWED_USERS`, to restrict bot access to specific users.
- Updated documentation in AGENTS.md and README.md to include detailed setup instructions for the messaging gateway, emphasizing the importance of user allowlists for security.
- Improved the CLI setup wizard to prompt for allowed user IDs during configuration, enhancing user guidance and security awareness.
- Refined the gateway run script to support user authorization checks, ensuring only allowed users can interact with the bot.

											
										
										
											2026-02-03 10:46:23 -08:00
+								# Gateway runs in quiet mode - suppress debug output and use cwd directly (no temp dirs)
 								os.environ["HERMES_QUIET"] = "1"
-												Add Text-to-Speech (TTS) functionality with multiple providers

Add tool previews

Add AGENTS and SOUL.md support

Add Exec Approval

											
										
										
											2026-02-12 10:05:08 -08:00
+								# Enable interactive exec approval for dangerous commands on messaging platforms
 								os.environ["HERMES_EXEC_ASK"] = "1"
-												fix: respect config.yaml cwd in gateway, add sandbox_dir config option

Two fixes:

1. Gateway CWD override: TERMINAL_CWD from config.yaml was being
   unconditionally overwritten by the messaging_cwd fallback (line 114).
   Now explicit paths in config.yaml are respected — only '.' / 'auto' /
   'cwd' (or unset) fall back to MESSAGING_CWD or home directory.

2. sandbox_dir config: Added terminal.sandbox_dir to config.yaml bridge
   in gateway/run.py, cli.py, and hermes_cli/config.py. Maps to
   TERMINAL_SANDBOX_DIR env var, which get_sandbox_dir() reads to
   determine where Docker/Singularity sandbox data is stored (default:
   ~/.hermes/sandboxes/). Users can now set:
     hermes config set terminal.sandbox_dir /data/hermes-sandboxes

											
										
										
											2026-03-08 01:33:46 -08:00
+								# Set terminal working directory for messaging platforms.
 								# If the user set an explicit path in config.yaml (not "." or "auto"),
 								# respect it. Otherwise use MESSAGING_CWD or default to home directory.
 								_configured_cwd = os.environ.get("TERMINAL_CWD", "")
 								if not _configured_cwd or _configured_cwd in (".", "auto", "cwd"):
 								    messaging_cwd = os.getenv("MESSAGING_CWD") or str(Path.home())
 								    os.environ["TERMINAL_CWD"] = messaging_cwd
-												Enhance messaging gateway configuration and security features

- Added new environment variables for Telegram and Discord bot configurations, including `TELEGRAM_ALLOWED_USERS` and `DISCORD_ALLOWED_USERS`, to restrict bot access to specific users.
- Updated documentation in AGENTS.md and README.md to include detailed setup instructions for the messaging gateway, emphasizing the importance of user allowlists for security.
- Improved the CLI setup wizard to prompt for allowed user IDs during configuration, enhancing user guidance and security awareness.
- Refined the gateway run script to support user authorization checks, ensuring only allowed users can interact with the bot.

											
										
										
											2026-02-03 10:46:23 -08:00
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								from gateway.config import (
 								    Platform,
 								    GatewayConfig,
 								    load_gateway_config,
 								)
 								from gateway.session import (
 								    SessionStore,
 								    SessionSource,
 								    SessionContext,
 								    build_session_context,
 								    build_session_context_prompt,
-												refactor: extract build_session_key() as single source of truth

The session key construction logic was duplicated in 4 places
(session.py + 3 inline copies in run.py), which is exactly the
kind of drift that caused issue #349 in the first place.

Extracted build_session_key() as a public function in session.py.
SessionStore._generate_session_key() now delegates to it, and all
inline key construction in run.py has been replaced with calls to
the shared function. Tests updated to test the function directly.

											
										
										
											2026-03-04 03:34:45 -08:00
+								    build_session_key,
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								)
 								from gateway.delivery import DeliveryRouter, DeliveryTarget
-												Enhance image handling and analysis capabilities across platforms

- Updated the vision tool to accept both HTTP/HTTPS URLs and local file paths for image analysis.
- Implemented caching of user-uploaded images in local directories to ensure reliable access for the vision tool, addressing issues with ephemeral URLs.
- Enhanced platform adapters (Discord, Telegram, WhatsApp) to download and cache images, allowing for immediate analysis and enriched message context.
- Added a new method to auto-analyze images attached by users, enriching the conversation with detailed descriptions.
- Improved documentation for image handling processes and updated related functions for clarity and efficiency.

											
										
										
											2026-02-15 16:10:50 -08:00
+								from gateway.platforms.base import BasePlatformAdapter, MessageEvent, MessageType
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								logger = logging.getLogger(__name__)
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
-												Add OpenAI Codex provider runtime and responses integration (without .agent/PLANS.md)

											
										
										
											2026-02-25 18:20:38 -08:00
+								def _resolve_runtime_agent_kwargs() -> dict:
 								    """Resolve provider credentials for gateway-created AIAgent instances."""
 								    from hermes_cli.runtime_provider import (
 								        resolve_runtime_provider,
 								        format_runtime_provider_error,
 								    )
 								    try:
 								        runtime = resolve_runtime_provider(
 								            requested=os.getenv("HERMES_INFERENCE_PROVIDER"),
 								        )
 								    except Exception as exc:
 								        raise RuntimeError(format_runtime_provider_error(exc)) from exc
 								    return {
 								        "api_key": runtime.get("api_key"),
 								        "base_url": runtime.get("base_url"),
 								        "provider": runtime.get("provider"),
 								        "api_mode": runtime.get("api_mode"),
 								    }
-												fix(gateway): pass model to temporary AIAgent instances

Memory flush, /compress, and session hygiene create AIAgent without
model=, falling back to the hardcoded default "anthropic/claude-opus-4.6".
This fails with a 400 error when the active provider is openai-codex
(Codex only accepts its own model names like gpt-5.1-codex-mini).

Add _resolve_gateway_model() that mirrors the env/config resolution
already used by _run_agent_sync, and wire it into all three temporary
agent creation sites.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 00:09:37 +01:00
+								def _resolve_gateway_model() -> str:
 								    """Read model from env/config — mirrors the resolution in _run_agent_sync.
 								    Without this, temporary AIAgent instances (memory flush, /compress) fall
 								    back to the hardcoded default ("anthropic/claude-opus-4.6") which fails
 								    when the active provider is openai-codex.
 								    """
 								    model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") or "anthropic/claude-opus-4.6"
 								    try:
 								        import yaml as _y
 								        _cfg_path = _hermes_home / "config.yaml"
 								        if _cfg_path.exists():
 								            with open(_cfg_path, encoding="utf-8") as _f:
 								                _cfg = _y.safe_load(_f) or {}
 								            _model_cfg = _cfg.get("model", {})
 								            if isinstance(_model_cfg, str):
 								                model = _model_cfg
 								            elif isinstance(_model_cfg, dict):
 								                model = _model_cfg.get("default", model)
 								    except Exception:
 								        pass
 								    return model
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								class GatewayRunner:
 								    """
 								    Main gateway controller.
 								    Manages the lifecycle of all platform adapters and routes
 								    messages to/from the agent.
 								    """
 								    def __init__(self, config: Optional[GatewayConfig] = None):
 								        self.config = config or load_gateway_config()
 								        self.adapters: Dict[Platform, BasePlatformAdapter] = {}
-												Add background process management with process tool, wait, PTY, and stdin support

New process registry and tool for managing long-running background processes
across all terminal backends (local, Docker, Singularity, Modal, SSH).

Process Registry (tools/process_registry.py):
- ProcessSession tracking with rolling 200KB output buffer
- spawn_local() with optional PTY via ptyprocess for interactive CLIs
- spawn_via_env() for non-local backends (runs inside sandbox, never on host)
- Background reader threads per process (Popen stdout or PTY)
- wait() with timeout clamping, interrupt support, and transparent limit reporting
- JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery
- Module-level singleton shared across agent loop, gateway, and RL

Process Tool (model_tools.py):
- 7 actions: list, poll, log, wait, kill, write, submit
- Paired with terminal in all toolsets (CLI, messaging, RL)
- Timeout clamping with transparent notes in response

Terminal Tool Updates (tools/terminal_tool.py):
- Replaced nohup background mode with registry spawn (returns session_id)
- Added workdir parameter for per-command working directory
- Added check_interval parameter for gateway auto-check watchers
- Added pty parameter for interactive CLI tools (Codex, Claude Code)
- Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs
- Cleanup thread now respects active background processes (won't reap sandbox)

Gateway Integration (gateway/run.py, session.py, config.py):
- Session reset protection: sessions with active processes exempt from reset
- Default idle timeout increased from 2 hours to 24 hours
- from_dict fallback aligned to match (was 120, now 1440)
- session_key env var propagated to process registry for session mapping
- Crash recovery on gateway startup via checkpoint probe
- check_interval watcher: asyncio task polls process, delivers updates to platform

RL Safety (environments/):
- tool_context.py cleanup() kills background processes on episode end
- hermes_base_env.py warns when enabled_toolsets is None (loads all tools)
- Process tool safe in RL via wait() blocking the agent loop

Also:
- Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all])
- Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP
- Updated AGENTS.md with process management docs and project structure
- Updated README.md terminal section with process management overview

											
										
										
											2026-02-17 02:51:31 -08:00
-												feat: add ephemeral prefill messages and system prompt loading

- Implemented functionality to load ephemeral prefill messages from a JSON file, enhancing few-shot priming capabilities for the agent.
- Introduced a mechanism to load an ephemeral system prompt from environment variables or configuration files, ensuring dynamic prompt adjustments at API-call time.
- Updated the CLI and agent initialization to utilize the new prefill messages and system prompt, improving the overall interaction experience.
- Enhanced configuration options with new environment variables for prefill messages and system prompts, allowing for greater customization without persistence.

											
										
										
											2026-02-23 23:55:42 -08:00
+								        # Load ephemeral config from config.yaml / env vars.
 								        # Both are injected at API-call time only and never persisted.
 								        self._prefill_messages = self._load_prefill_messages()
 								        self._ephemeral_system_prompt = self._load_ephemeral_system_prompt()
-												feat: add reasoning effort configuration for agent

- Introduced a new configuration option for reasoning effort in the CLI, allowing users to specify the level of reasoning the agent should perform before responding.
- Updated the CLI and agent initialization to incorporate the reasoning configuration, enhancing the agent's responsiveness and adaptability.
- Implemented logic to load reasoning effort from environment variables and configuration files, providing flexibility in agent behavior.
- Enhanced the documentation in the example configuration file to clarify the new reasoning effort options available.

											
										
										
											2026-02-24 03:30:19 -08:00
+								        self._reasoning_config = self._load_reasoning_config()
-												fix: /reasoning command — add gateway support, fix display, persist settings (#1031)

* fix: /reasoning command output ordering, display, and inline think extraction

Three issues with the /reasoning command:

1. Output interleaving: The command echo used print() while feedback
   used _cprint(), causing them to render out-of-order under
   prompt_toolkit's patch_stdout. Changed echo to use _cprint() so
   all output renders through the same path in correct order.

2. Reasoning display not working: /reasoning show toggled a flag
   but reasoning never appeared for models that embed thinking in
   inline <think> blocks rather than structured API fields. Added
   fallback extraction in _build_assistant_message to capture
   <think> block content as reasoning when no structured reasoning
   fields (reasoning, reasoning_content, reasoning_details) are
   present. This feeds into both the reasoning callback (during
   tool loops) and the post-response reasoning box display.

3. Feedback clarity: Added checkmarks to confirm actions, persisted
   show/hide to config (was session-only before), and aligned the
   status display for readability.

Tests: 7 new tests for inline think block extraction (41 total).

* feat: add /reasoning command to gateway (Telegram/Discord/etc)

The /reasoning command only existed in the CLI — messaging platforms
had no way to view or change reasoning settings. This adds:

1. /reasoning command handler in the gateway:
   - No args: shows current effort level and display state
   - /reasoning <level>: sets reasoning effort (none/low/medium/high/xhigh)
   - /reasoning show|hide: toggles reasoning display in responses
   - All changes saved to config.yaml immediately

2. Reasoning display in gateway responses:
   - When show_reasoning is enabled, prepends a 'Reasoning' block
     with the model's last_reasoning content before the response
   - Collapses long reasoning (>15 lines) to keep messages readable
   - Uses last_reasoning from run_conversation result dict

3. Plumbing:
   - Added _show_reasoning attribute loaded from config at startup
   - Propagated last_reasoning through _run_agent return dict
   - Added /reasoning to help text and known_commands set
   - Uses getattr for _show_reasoning to handle test stubs
											
										
										
											2026-03-12 05:38:19 -07:00
+								        self._show_reasoning = self._load_show_reasoning()
-												feat(provider-routing): add OpenRouter provider routing configuration

Introduced a new `provider_routing` section in the CLI configuration to control how requests are routed across providers when using OpenRouter. This includes options for sorting providers by throughput, latency, or price, as well as allowing or ignoring specific providers, setting the order of provider attempts, and managing data collection policies. Updated relevant classes and documentation to support these features, enhancing flexibility in provider selection.

											
										
										
											2026-03-01 18:24:27 -08:00
+								        self._provider_routing = self._load_provider_routing()
-												feat: simple fallback model for provider resilience

When the primary model/provider fails after retries (rate limit, overload,
auth errors, connection failures), Hermes automatically switches to a
configured fallback model for the remainder of the session.

Config (in ~/.hermes/config.yaml):

  fallback_model:
    provider: openrouter
    model: anthropic/claude-sonnet-4

Supports all major providers: OpenRouter, OpenAI, Nous, DeepSeek, Together,
Groq, Fireworks, Mistral, Gemini — plus custom endpoints via base_url and
api_key_env overrides.

Design principles:
- Dead simple: one fallback model, not a chain
- One-shot: switches once, doesn't ping-pong back
- Zero new dependencies: uses existing OpenAI client
- Minimal code: ~100 lines in run_agent.py, ~5 lines in cli.py/gateway
- Three trigger points: max retries exhausted, non-retryable client errors,
  and invalid response exhaustion

Does NOT trigger on context overflow or payload-too-large errors (those
are handled by the existing compression system).

Addresses #737.

25 new tests, 2492 total passing.

											
										
										
											2026-03-08 20:22:33 -07:00
+								        self._fallback_model = self._load_fallback_model()
-												feat: add ephemeral prefill messages and system prompt loading

- Implemented functionality to load ephemeral prefill messages from a JSON file, enhancing few-shot priming capabilities for the agent.
- Introduced a mechanism to load an ephemeral system prompt from environment variables or configuration files, ensuring dynamic prompt adjustments at API-call time.
- Updated the CLI and agent initialization to utilize the new prefill messages and system prompt, improving the overall interaction experience.
- Enhanced configuration options with new environment variables for prefill messages and system prompts, allowing for greater customization without persistence.

											
										
										
											2026-02-23 23:55:42 -08:00
-												Add background process management with process tool, wait, PTY, and stdin support

New process registry and tool for managing long-running background processes
across all terminal backends (local, Docker, Singularity, Modal, SSH).

Process Registry (tools/process_registry.py):
- ProcessSession tracking with rolling 200KB output buffer
- spawn_local() with optional PTY via ptyprocess for interactive CLIs
- spawn_via_env() for non-local backends (runs inside sandbox, never on host)
- Background reader threads per process (Popen stdout or PTY)
- wait() with timeout clamping, interrupt support, and transparent limit reporting
- JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery
- Module-level singleton shared across agent loop, gateway, and RL

Process Tool (model_tools.py):
- 7 actions: list, poll, log, wait, kill, write, submit
- Paired with terminal in all toolsets (CLI, messaging, RL)
- Timeout clamping with transparent notes in response

Terminal Tool Updates (tools/terminal_tool.py):
- Replaced nohup background mode with registry spawn (returns session_id)
- Added workdir parameter for per-command working directory
- Added check_interval parameter for gateway auto-check watchers
- Added pty parameter for interactive CLI tools (Codex, Claude Code)
- Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs
- Cleanup thread now respects active background processes (won't reap sandbox)

Gateway Integration (gateway/run.py, session.py, config.py):
- Session reset protection: sessions with active processes exempt from reset
- Default idle timeout increased from 2 hours to 24 hours
- from_dict fallback aligned to match (was 120, now 1440)
- session_key env var propagated to process registry for session mapping
- Crash recovery on gateway startup via checkpoint probe
- check_interval watcher: asyncio task polls process, delivers updates to platform

RL Safety (environments/):
- tool_context.py cleanup() kills background processes on episode end
- hermes_base_env.py warns when enabled_toolsets is None (loads all tools)
- Process tool safe in RL via wait() blocking the agent loop

Also:
- Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all])
- Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP
- Updated AGENTS.md with process management docs and project structure
- Updated README.md terminal section with process management overview

											
										
										
											2026-02-17 02:51:31 -08:00
+								        # Wire process registry into session store for reset protection
 								        from tools.process_registry import process_registry
 								        self.session_store = SessionStore(
 								            self.config.sessions_dir, self.config,
 								            has_active_processes_fn=lambda key: process_registry.has_active_for_session(key),
 								        )
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        self.delivery_router = DeliveryRouter(self.config)
 								        self._running = False
 								        self._shutdown_event = asyncio.Event()
-												Implement interrupt handling for agent and CLI input and persistent prompt line at bottom of CLI :)

- Enhanced the AIAgent class to support interrupt requests, allowing for graceful interruption of ongoing tasks and processing of new messages.
- Updated the HermesCLI to manage user input in a persistent manner, enabling real-time interruption of the agent's conversation.
- Introduced a mechanism in the GatewayRunner to handle incoming messages while an agent is running, allowing for immediate response to user commands.
- Improved overall user experience by providing feedback during interruptions and ensuring that pending messages are processed correctly.

											
										
										
											2026-02-03 16:15:49 -08:00
 								        # Track running agents per session for interrupt support
 								        # Key: session_key, Value: AIAgent instance
 								        self._running_agents: Dict[str, Any] = {}
 								        self._pending_messages: Dict[str, str] = {}  # Queued messages during interrupt
-												Add Text-to-Speech (TTS) functionality with multiple providers

Add tool previews

Add AGENTS and SOUL.md support

Add Exec Approval

											
										
										
											2026-02-12 10:05:08 -08:00
 								        # Track pending exec approvals per session
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        # Key: session_key, Value: {"command": str, "pattern_key": str, ...}
 								        self._pending_approvals: Dict[str, Dict[str, Any]] = {}
-												fix(gateway): persist Honcho managers across session requests

											
										
										
											2026-03-10 02:06:17 -07:00
 								        # Persistent Honcho managers keyed by gateway session key.
 								        # This preserves write_frequency="session" semantics across short-lived
 								        # per-message AIAgent instances.
 								        self._honcho_managers: Dict[str, Any] = {}
 								        self._honcho_configs: Dict[str, Any] = {}
-												feat(security): add tirith pre-exec command scanning

Integrate tirith as a pre-execution security scanner that detects
homograph URLs, pipe-to-interpreter patterns, terminal injection,
zero-width Unicode, and environment variable manipulation — threats
the existing 50-pattern dangerous command detector doesn't cover.

Architecture: gather-then-decide — both tirith and the dangerous
command detector run before any approval prompt, preventing gateway
force=True replay from bypassing one check when only the other was
shown to the user.

New files:
- tools/tirith_security.py: subprocess wrapper with auto-installer,
  mandatory cosign provenance verification, non-blocking background
  download, disk-persistent failure markers with retryable-cause
  tracking (cosign_missing auto-clears when cosign appears on PATH)
- tests/tools/test_tirith_security.py: 62 tests covering exit code
  mapping, fail_open, cosign verification, background install,
  HERMES_HOME isolation, and failure recovery
- tests/tools/test_command_guards.py: 21 integration tests for the
  combined guard orchestration

Modified files:
- tools/approval.py: add check_all_command_guards() orchestrator,
  add allow_permanent parameter to prompt_dangerous_approval()
- tools/terminal_tool.py: replace _check_dangerous_command with
  consolidated check_all_command_guards
- cli.py: update _approval_callback for allow_permanent kwarg,
  call ensure_installed() at startup
- gateway/run.py: iterate pattern_keys list on replay approval,
  call ensure_installed() at startup
- hermes_cli/config.py: add security config defaults, split
  commented sections for independent fallback
- cli-config.yaml.example: document tirith security config

											
										
										
											2026-03-11 14:20:32 +05:30
 								        # Ensure tirith security scanner is available (downloads if needed)
 								        try:
 								            from tools.tirith_security import ensure_installed
 								            ensure_installed()
 								        except Exception:
 								            pass  # Non-fatal — fail-open at scan time if unavailable
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
-												fix(gateway): Pass session_db to AIAgent, fixing session_search error

When running via the gateway (e.g. Telegram), the session_search tool
returned: {"error": "session_search must be handled by the agent loop"}

Root cause:
- gateway/run.py creates AIAgent without passing session_db=
- self._session_db is None in the agent instance
- The dispatch condition "elif function_name == 'session_search' and self._session_db"
  skips when _session_db is None, falling through to the generic error

This fix:
1. Initializes self._session_db in GatewayRunner.__init__()
2. Passes session_db to all AIAgent instantiations in gateway/run.py
3. Adds defensive fallback in run_agent.py to return a clear error when
   session_db is unavailable, instead of falling through

Fixes #105

											
										
										
											2026-02-27 00:32:17 -05:00
+								        # Initialize session database for session_search tool support
 								        self._session_db = None
 								        try:
 								            from hermes_state import SessionDB
 								            self._session_db = SessionDB()
 								        except Exception as e:
 								            logger.debug("SQLite session store not available: %s", e)
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
+								        # DM pairing store for code-based user authorization
 								        from gateway.pairing import PairingStore
 								        self.pairing_store = PairingStore()
 								        # Event hook system
 								        from gateway.hooks import HookRegistry
 								        self.hooks = HookRegistry()
-												fix(gateway): persist Honcho managers across session requests

											
										
										
											2026-03-10 02:06:17 -07:00
 								    def _get_or_create_gateway_honcho(self, session_key: str):
 								        """Return a persistent Honcho manager/config pair for this gateway session."""
 								        if not hasattr(self, "_honcho_managers"):
 								            self._honcho_managers = {}
 								        if not hasattr(self, "_honcho_configs"):
 								            self._honcho_configs = {}
 								        if session_key in self._honcho_managers:
 								            return self._honcho_managers[session_key], self._honcho_configs.get(session_key)
 								        try:
 								            from honcho_integration.client import HonchoClientConfig, get_honcho_client
 								            from honcho_integration.session import HonchoSessionManager
 								            hcfg = HonchoClientConfig.from_global_config()
-												refactor(honcho): remove local memory mode

The "local" memoryMode was redundant with enabled: false. Simplifies
the mode system to hybrid and honcho only.

											
										
										
											2026-03-12 16:23:34 -04:00
+								            if not hcfg.enabled or not hcfg.api_key:
-												fix(gateway): persist Honcho managers across session requests

											
										
										
											2026-03-10 02:06:17 -07:00
+								                return None, hcfg
 								            client = get_honcho_client(hcfg)
 								            manager = HonchoSessionManager(
 								                honcho=client,
 								                config=hcfg,
 								                context_tokens=hcfg.context_tokens,
 								            )
 								            self._honcho_managers[session_key] = manager
 								            self._honcho_configs[session_key] = hcfg
 								            return manager, hcfg
 								        except Exception as e:
 								            logger.debug("Gateway Honcho init failed for %s: %s", session_key, e)
 								            return None, None
 								    def _shutdown_gateway_honcho(self, session_key: str) -> None:
 								        """Flush and close the persistent Honcho manager for a gateway session."""
 								        managers = getattr(self, "_honcho_managers", None)
 								        configs = getattr(self, "_honcho_configs", None)
 								        if managers is None or configs is None:
 								            return
 								        manager = managers.pop(session_key, None)
 								        configs.pop(session_key, None)
 								        if not manager:
 								            return
 								        try:
 								            manager.shutdown()
 								        except Exception as e:
 								            logger.debug("Gateway Honcho shutdown failed for %s: %s", session_key, e)
 								    def _shutdown_all_gateway_honcho(self) -> None:
 								        """Flush and close all persistent Honcho managers."""
 								        managers = getattr(self, "_honcho_managers", None)
 								        if not managers:
 								            return
 								        for session_key in list(managers.keys()):
 								            self._shutdown_gateway_honcho(session_key)
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
-												feat(gateway): proactive async memory flush on session expiry

Previously, when a session expired (idle/daily reset), the memory flush
ran synchronously inside get_or_create_session — blocking the user's
message for 10-60s while an LLM call saved memories.

Now a background watcher task (_session_expiry_watcher) runs every 5 min,
detects expired sessions, and flushes memories proactively in a thread
pool.  By the time the user sends their next message, memories are
already saved and the response is immediate.

Changes:
- Add _is_session_expired(entry) to SessionStore — works from entry
  alone without needing a SessionSource
- Add _pre_flushed_sessions set to track already-flushed sessions
- Remove sync _on_auto_reset callback from get_or_create_session
- Refactor flush into _flush_memories_for_session (sync worker) +
  _async_flush_memories (thread pool wrapper)
- Add _session_expiry_watcher background task, started in start()
- Simplify /reset command to use shared fire-and-forget flush
- Add 10 tests for expiry detection, callback removal, tracking

											
										
										
											2026-03-07 11:27:50 -08:00
+								    def _flush_memories_for_session(self, old_session_id: str):
 								        """Prompt the agent to save memories/skills before context is lost.
 								        Synchronous worker — meant to be called via run_in_executor from
 								        an async context so it doesn't block the event loop.
-												feat(session): implement session reset policy for messaging platforms

- Added configuration options for automatic session resets based on inactivity or daily boundaries in cli-config.yaml.
- Enhanced SessionResetPolicy class to support a "none" mode for no auto-resets.
- Implemented memory flushing before session resets in SessionStore to preserve important information.
- Updated setup wizard to guide users in configuring session reset preferences.

											
										
										
											2026-02-26 21:20:50 -08:00
+								        """
 								        try:
-												feat(gateway): proactive async memory flush on session expiry

Previously, when a session expired (idle/daily reset), the memory flush
ran synchronously inside get_or_create_session — blocking the user's
message for 10-60s while an LLM call saved memories.

Now a background watcher task (_session_expiry_watcher) runs every 5 min,
detects expired sessions, and flushes memories proactively in a thread
pool.  By the time the user sends their next message, memories are
already saved and the response is immediate.

Changes:
- Add _is_session_expired(entry) to SessionStore — works from entry
  alone without needing a SessionSource
- Add _pre_flushed_sessions set to track already-flushed sessions
- Remove sync _on_auto_reset callback from get_or_create_session
- Refactor flush into _flush_memories_for_session (sync worker) +
  _async_flush_memories (thread pool wrapper)
- Add _session_expiry_watcher background task, started in start()
- Simplify /reset command to use shared fire-and-forget flush
- Add 10 tests for expiry detection, callback removal, tracking

											
										
										
											2026-03-07 11:27:50 -08:00
+								            history = self.session_store.load_transcript(old_session_id)
-												feat(session): implement session reset policy for messaging platforms

- Added configuration options for automatic session resets based on inactivity or daily boundaries in cli-config.yaml.
- Enhanced SessionResetPolicy class to support a "none" mode for no auto-resets.
- Implemented memory flushing before session resets in SessionStore to preserve important information.
- Updated setup wizard to guide users in configuring session reset preferences.

											
										
										
											2026-02-26 21:20:50 -08:00
+								            if not history or len(history) < 4:
 								                return
 								            from run_agent import AIAgent
-												refactor(cli): Finalize OpenAI Codex Integration with OAuth

- Enhanced Codex model discovery by fetching available models from the API, with fallback to local cache and defaults.
- Updated the context compressor's summary target tokens to 2500 for improved performance.
- Added external credential detection for Codex CLI to streamline authentication.
- Refactored various components to ensure consistent handling of authentication and model selection across the application.

											
										
										
											2026-02-28 21:47:51 -08:00
+								            runtime_kwargs = _resolve_runtime_agent_kwargs()
 								            if not runtime_kwargs.get("api_key"):
-												feat(session): implement session reset policy for messaging platforms

- Added configuration options for automatic session resets based on inactivity or daily boundaries in cli-config.yaml.
- Enhanced SessionResetPolicy class to support a "none" mode for no auto-resets.
- Implemented memory flushing before session resets in SessionStore to preserve important information.
- Updated setup wizard to guide users in configuring session reset preferences.

											
										
										
											2026-02-26 21:20:50 -08:00
+								                return
-												fix(gateway): pass model to temporary AIAgent instances

Memory flush, /compress, and session hygiene create AIAgent without
model=, falling back to the hardcoded default "anthropic/claude-opus-4.6".
This fails with a 400 error when the active provider is openai-codex
(Codex only accepts its own model names like gpt-5.1-codex-mini).

Add _resolve_gateway_model() that mirrors the env/config resolution
already used by _run_agent_sync, and wire it into all three temporary
agent creation sites.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 00:09:37 +01:00
+								            # Resolve model from config — AIAgent's default is OpenRouter-
 								            # formatted ("anthropic/claude-opus-4.6") which fails when the
 								            # active provider is openai-codex.
 								            model = _resolve_gateway_model()
-												feat(session): implement session reset policy for messaging platforms

- Added configuration options for automatic session resets based on inactivity or daily boundaries in cli-config.yaml.
- Enhanced SessionResetPolicy class to support a "none" mode for no auto-resets.
- Implemented memory flushing before session resets in SessionStore to preserve important information.
- Updated setup wizard to guide users in configuring session reset preferences.

											
										
										
											2026-02-26 21:20:50 -08:00
+								            tmp_agent = AIAgent(
-												refactor(cli): Finalize OpenAI Codex Integration with OAuth

- Enhanced Codex model discovery by fetching available models from the API, with fallback to local cache and defaults.
- Updated the context compressor's summary target tokens to 2500 for improved performance.
- Added external credential detection for Codex CLI to streamline authentication.
- Refactored various components to ensure consistent handling of authentication and model selection across the application.

											
										
										
											2026-02-28 21:47:51 -08:00
+								                **runtime_kwargs,
-												fix(gateway): pass model to temporary AIAgent instances

Memory flush, /compress, and session hygiene create AIAgent without
model=, falling back to the hardcoded default "anthropic/claude-opus-4.6".
This fails with a 400 error when the active provider is openai-codex
(Codex only accepts its own model names like gpt-5.1-codex-mini).

Add _resolve_gateway_model() that mirrors the env/config resolution
already used by _run_agent_sync, and wire it into all three temporary
agent creation sites.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 00:09:37 +01:00
+								                model=model,
-												feat(session): implement session reset policy for messaging platforms

- Added configuration options for automatic session resets based on inactivity or daily boundaries in cli-config.yaml.
- Enhanced SessionResetPolicy class to support a "none" mode for no auto-resets.
- Implemented memory flushing before session resets in SessionStore to preserve important information.
- Updated setup wizard to guide users in configuring session reset preferences.

											
										
										
											2026-02-26 21:20:50 -08:00
+								                max_iterations=8,
 								                quiet_mode=True,
 								                enabled_toolsets=["memory", "skills"],
-												feat(gateway): proactive async memory flush on session expiry

Previously, when a session expired (idle/daily reset), the memory flush
ran synchronously inside get_or_create_session — blocking the user's
message for 10-60s while an LLM call saved memories.

Now a background watcher task (_session_expiry_watcher) runs every 5 min,
detects expired sessions, and flushes memories proactively in a thread
pool.  By the time the user sends their next message, memories are
already saved and the response is immediate.

Changes:
- Add _is_session_expired(entry) to SessionStore — works from entry
  alone without needing a SessionSource
- Add _pre_flushed_sessions set to track already-flushed sessions
- Remove sync _on_auto_reset callback from get_or_create_session
- Refactor flush into _flush_memories_for_session (sync worker) +
  _async_flush_memories (thread pool wrapper)
- Add _session_expiry_watcher background task, started in start()
- Simplify /reset command to use shared fire-and-forget flush
- Add 10 tests for expiry detection, callback removal, tracking

											
										
										
											2026-03-07 11:27:50 -08:00
+								                session_id=old_session_id,
-												feat(session): implement session reset policy for messaging platforms

- Added configuration options for automatic session resets based on inactivity or daily boundaries in cli-config.yaml.
- Enhanced SessionResetPolicy class to support a "none" mode for no auto-resets.
- Implemented memory flushing before session resets in SessionStore to preserve important information.
- Updated setup wizard to guide users in configuring session reset preferences.

											
										
										
											2026-02-26 21:20:50 -08:00
+								            )
 								            # Build conversation history from transcript
 								            msgs = [
 								                {"role": m.get("role"), "content": m.get("content")}
 								                for m in history
 								                if m.get("role") in ("user", "assistant") and m.get("content")
 								            ]
 								            # Give the agent a real turn to think about what to save
 								            flush_prompt = (
 								                "[System: This session is about to be automatically reset due to "
 								                "inactivity or a scheduled daily reset. The conversation context "
 								                "will be cleared after this turn.\n\n"
 								                "Review the conversation above and:\n"
 								                "1. Save any important facts, preferences, or decisions to memory "
 								                "(user profile or your notes) that would be useful in future sessions.\n"
 								                "2. If you discovered a reusable workflow or solved a non-trivial "
 								                "problem, consider saving it as a skill.\n"
 								                "3. If nothing is worth saving, that's fine — just skip.\n\n"
 								                "Do NOT respond to the user. Just use the memory and skill_manage "
 								                "tools if needed, then stop.]"
 								            )
 								            tmp_agent.run_conversation(
 								                user_message=flush_prompt,
 								                conversation_history=msgs,
 								            )
-												feat(gateway): proactive async memory flush on session expiry

Previously, when a session expired (idle/daily reset), the memory flush
ran synchronously inside get_or_create_session — blocking the user's
message for 10-60s while an LLM call saved memories.

Now a background watcher task (_session_expiry_watcher) runs every 5 min,
detects expired sessions, and flushes memories proactively in a thread
pool.  By the time the user sends their next message, memories are
already saved and the response is immediate.

Changes:
- Add _is_session_expired(entry) to SessionStore — works from entry
  alone without needing a SessionSource
- Add _pre_flushed_sessions set to track already-flushed sessions
- Remove sync _on_auto_reset callback from get_or_create_session
- Refactor flush into _flush_memories_for_session (sync worker) +
  _async_flush_memories (thread pool wrapper)
- Add _session_expiry_watcher background task, started in start()
- Simplify /reset command to use shared fire-and-forget flush
- Add 10 tests for expiry detection, callback removal, tracking

											
										
										
											2026-03-07 11:27:50 -08:00
+								            logger.info("Pre-reset memory flush completed for session %s", old_session_id)
-												feat(honcho): async memory integration with prefetch pipeline and recallMode

Adds full Honcho memory integration to Hermes:

- Session manager with async background writes, memory modes (honcho/hybrid/local),
  and dialectic prefetch for first-turn context warming
- Agent integration: prefetch pipeline, tool surface gated by recallMode,
  system prompt context injection, SIGTERM/SIGINT flush handlers
- CLI commands: setup, status, mode, tokens, peer, identity, migrate
- recallMode setting (auto | context | tools) for A/B testing retrieval strategies
- Session strategies: per-session, per-repo (git tree root), per-directory, global
- Polymorphic memoryMode config: string shorthand or per-peer object overrides
- 97 tests covering async writes, client config, session resolution, and memory modes

											
										
										
											2026-03-09 15:58:22 -04:00
+								            # Flush any queued Honcho writes before the session is dropped
 								            if getattr(tmp_agent, '_honcho', None):
 								                try:
 								                    tmp_agent._honcho.shutdown()
 								                except Exception:
 								                    pass
-												feat(session): implement session reset policy for messaging platforms

- Added configuration options for automatic session resets based on inactivity or daily boundaries in cli-config.yaml.
- Enhanced SessionResetPolicy class to support a "none" mode for no auto-resets.
- Implemented memory flushing before session resets in SessionStore to preserve important information.
- Updated setup wizard to guide users in configuring session reset preferences.

											
										
										
											2026-02-26 21:20:50 -08:00
+								        except Exception as e:
-												feat(gateway): proactive async memory flush on session expiry

Previously, when a session expired (idle/daily reset), the memory flush
ran synchronously inside get_or_create_session — blocking the user's
message for 10-60s while an LLM call saved memories.

Now a background watcher task (_session_expiry_watcher) runs every 5 min,
detects expired sessions, and flushes memories proactively in a thread
pool.  By the time the user sends their next message, memories are
already saved and the response is immediate.

Changes:
- Add _is_session_expired(entry) to SessionStore — works from entry
  alone without needing a SessionSource
- Add _pre_flushed_sessions set to track already-flushed sessions
- Remove sync _on_auto_reset callback from get_or_create_session
- Refactor flush into _flush_memories_for_session (sync worker) +
  _async_flush_memories (thread pool wrapper)
- Add _session_expiry_watcher background task, started in start()
- Simplify /reset command to use shared fire-and-forget flush
- Add 10 tests for expiry detection, callback removal, tracking

											
										
										
											2026-03-07 11:27:50 -08:00
+								            logger.debug("Pre-reset memory flush failed for session %s: %s", old_session_id, e)
 								    async def _async_flush_memories(self, old_session_id: str):
 								        """Run the sync memory flush in a thread pool so it won't block the event loop."""
 								        loop = asyncio.get_event_loop()
 								        await loop.run_in_executor(None, self._flush_memories_for_session, old_session_id)
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
-												feat: add ephemeral prefill messages and system prompt loading

- Implemented functionality to load ephemeral prefill messages from a JSON file, enhancing few-shot priming capabilities for the agent.
- Introduced a mechanism to load an ephemeral system prompt from environment variables or configuration files, ensuring dynamic prompt adjustments at API-call time.
- Updated the CLI and agent initialization to utilize the new prefill messages and system prompt, improving the overall interaction experience.
- Enhanced configuration options with new environment variables for prefill messages and system prompts, allowing for greater customization without persistence.

											
										
										
											2026-02-23 23:55:42 -08:00
+								    @staticmethod
 								    def _load_prefill_messages() -> List[Dict[str, Any]]:
 								        """Load ephemeral prefill messages from config or env var.
 								        Checks HERMES_PREFILL_MESSAGES_FILE env var first, then falls back to
 								        the prefill_messages_file key in ~/.hermes/config.yaml.
 								        Relative paths are resolved from ~/.hermes/.
 								        """
 								        import json as _json
 								        file_path = os.getenv("HERMES_PREFILL_MESSAGES_FILE", "")
 								        if not file_path:
 								            try:
 								                import yaml as _y
-												fix: respect HERMES_HOME env var in gateway and cron scheduler

Both entry points hardcoded Path.home() / ".hermes" for .env, config.yaml,
logs, and lock files. Now uses _hermes_home which reads HERMES_HOME env var
with ~/.hermes as default, matching cli.py and run_agent.py.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-26 18:51:46 +11:00
+								                cfg_path = _hermes_home / "config.yaml"
-												feat: add ephemeral prefill messages and system prompt loading

- Implemented functionality to load ephemeral prefill messages from a JSON file, enhancing few-shot priming capabilities for the agent.
- Introduced a mechanism to load an ephemeral system prompt from environment variables or configuration files, ensuring dynamic prompt adjustments at API-call time.
- Updated the CLI and agent initialization to utilize the new prefill messages and system prompt, improving the overall interaction experience.
- Enhanced configuration options with new environment variables for prefill messages and system prompts, allowing for greater customization without persistence.

											
										
										
											2026-02-23 23:55:42 -08:00
+								                if cfg_path.exists():
-												Add explicit encoding="utf-8" to all config/data file open() calls

On Windows, open() defaults to the system locale encoding (cp1252,
cp1254, etc.) rather than UTF-8. This breaks any file containing
non-ASCII characters, and also causes crashes when writing JSON with
ensure_ascii=False.

This adds encoding="utf-8" to open() calls in:
- gateway/run.py (config.yaml reads/writes throughout)
- gateway/config.py (gateway.json and config.yaml)
- hermes_cli/config.py (config.yaml load/save)
- hermes_cli/main.py (session export with ensure_ascii=False)
- hermes_cli/status.py (jobs.json and sessions.json)

											
										
										
											2026-03-05 17:04:33 -05:00
+								                    with open(cfg_path, encoding="utf-8") as _f:
-												feat: add ephemeral prefill messages and system prompt loading

- Implemented functionality to load ephemeral prefill messages from a JSON file, enhancing few-shot priming capabilities for the agent.
- Introduced a mechanism to load an ephemeral system prompt from environment variables or configuration files, ensuring dynamic prompt adjustments at API-call time.
- Updated the CLI and agent initialization to utilize the new prefill messages and system prompt, improving the overall interaction experience.
- Enhanced configuration options with new environment variables for prefill messages and system prompts, allowing for greater customization without persistence.

											
										
										
											2026-02-23 23:55:42 -08:00
+								                        cfg = _y.safe_load(_f) or {}
 								                    file_path = cfg.get("prefill_messages_file", "")
 								            except Exception:
 								                pass
 								        if not file_path:
 								            return []
 								        path = Path(file_path).expanduser()
 								        if not path.is_absolute():
-												fix: respect HERMES_HOME env var in gateway and cron scheduler

Both entry points hardcoded Path.home() / ".hermes" for .env, config.yaml,
logs, and lock files. Now uses _hermes_home which reads HERMES_HOME env var
with ~/.hermes as default, matching cli.py and run_agent.py.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-26 18:51:46 +11:00
+								            path = _hermes_home / path
-												feat: add ephemeral prefill messages and system prompt loading

- Implemented functionality to load ephemeral prefill messages from a JSON file, enhancing few-shot priming capabilities for the agent.
- Introduced a mechanism to load an ephemeral system prompt from environment variables or configuration files, ensuring dynamic prompt adjustments at API-call time.
- Updated the CLI and agent initialization to utilize the new prefill messages and system prompt, improving the overall interaction experience.
- Enhanced configuration options with new environment variables for prefill messages and system prompts, allowing for greater customization without persistence.

											
										
										
											2026-02-23 23:55:42 -08:00
+								        if not path.exists():
 								            logger.warning("Prefill messages file not found: %s", path)
 								            return []
 								        try:
 								            with open(path, "r", encoding="utf-8") as f:
 								                data = _json.load(f)
 								            if not isinstance(data, list):
 								                logger.warning("Prefill messages file must contain a JSON array: %s", path)
 								                return []
 								            return data
 								        except Exception as e:
 								            logger.warning("Failed to load prefill messages from %s: %s", path, e)
 								            return []
 								    @staticmethod
 								    def _load_ephemeral_system_prompt() -> str:
 								        """Load ephemeral system prompt from config or env var.
 								        Checks HERMES_EPHEMERAL_SYSTEM_PROMPT env var first, then falls back to
 								        agent.system_prompt in ~/.hermes/config.yaml.
 								        """
 								        prompt = os.getenv("HERMES_EPHEMERAL_SYSTEM_PROMPT", "")
 								        if prompt:
 								            return prompt
 								        try:
 								            import yaml as _y
-												fix: respect HERMES_HOME env var in gateway and cron scheduler

Both entry points hardcoded Path.home() / ".hermes" for .env, config.yaml,
logs, and lock files. Now uses _hermes_home which reads HERMES_HOME env var
with ~/.hermes as default, matching cli.py and run_agent.py.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-26 18:51:46 +11:00
+								            cfg_path = _hermes_home / "config.yaml"
-												feat: add ephemeral prefill messages and system prompt loading

- Implemented functionality to load ephemeral prefill messages from a JSON file, enhancing few-shot priming capabilities for the agent.
- Introduced a mechanism to load an ephemeral system prompt from environment variables or configuration files, ensuring dynamic prompt adjustments at API-call time.
- Updated the CLI and agent initialization to utilize the new prefill messages and system prompt, improving the overall interaction experience.
- Enhanced configuration options with new environment variables for prefill messages and system prompts, allowing for greater customization without persistence.

											
										
										
											2026-02-23 23:55:42 -08:00
+								            if cfg_path.exists():
-												Add explicit encoding="utf-8" to all config/data file open() calls

On Windows, open() defaults to the system locale encoding (cp1252,
cp1254, etc.) rather than UTF-8. This breaks any file containing
non-ASCII characters, and also causes crashes when writing JSON with
ensure_ascii=False.

This adds encoding="utf-8" to open() calls in:
- gateway/run.py (config.yaml reads/writes throughout)
- gateway/config.py (gateway.json and config.yaml)
- hermes_cli/config.py (config.yaml load/save)
- hermes_cli/main.py (session export with ensure_ascii=False)
- hermes_cli/status.py (jobs.json and sessions.json)

											
										
										
											2026-03-05 17:04:33 -05:00
+								                with open(cfg_path, encoding="utf-8") as _f:
-												feat: add ephemeral prefill messages and system prompt loading

- Implemented functionality to load ephemeral prefill messages from a JSON file, enhancing few-shot priming capabilities for the agent.
- Introduced a mechanism to load an ephemeral system prompt from environment variables or configuration files, ensuring dynamic prompt adjustments at API-call time.
- Updated the CLI and agent initialization to utilize the new prefill messages and system prompt, improving the overall interaction experience.
- Enhanced configuration options with new environment variables for prefill messages and system prompts, allowing for greater customization without persistence.

											
										
										
											2026-02-23 23:55:42 -08:00
+								                    cfg = _y.safe_load(_f) or {}
 								                return (cfg.get("agent", {}).get("system_prompt", "") or "").strip()
 								        except Exception:
 								            pass
 								        return ""
-												feat: add reasoning effort configuration for agent

- Introduced a new configuration option for reasoning effort in the CLI, allowing users to specify the level of reasoning the agent should perform before responding.
- Updated the CLI and agent initialization to incorporate the reasoning configuration, enhancing the agent's responsiveness and adaptability.
- Implemented logic to load reasoning effort from environment variables and configuration files, providing flexibility in agent behavior.
- Enhanced the documentation in the example configuration file to clarify the new reasoning effort options available.

											
										
										
											2026-02-24 03:30:19 -08:00
+								    @staticmethod
 								    def _load_reasoning_config() -> dict | None:
 								        """Load reasoning effort from config or env var.
 								        Checks HERMES_REASONING_EFFORT env var first, then agent.reasoning_effort
 								        in config.yaml. Valid: "xhigh", "high", "medium", "low", "minimal", "none".
-												feat: default reasoning effort from xhigh to medium

Reduces token usage and latency for most tasks by defaulting to
medium reasoning effort instead of xhigh. Users can still override
via config or CLI flag. Updates code, tests, example config, and docs.

											
										
										
											2026-03-07 10:14:19 -08:00
+								        Returns None to use default (medium).
-												feat: add reasoning effort configuration for agent

- Introduced a new configuration option for reasoning effort in the CLI, allowing users to specify the level of reasoning the agent should perform before responding.
- Updated the CLI and agent initialization to incorporate the reasoning configuration, enhancing the agent's responsiveness and adaptability.
- Implemented logic to load reasoning effort from environment variables and configuration files, providing flexibility in agent behavior.
- Enhanced the documentation in the example configuration file to clarify the new reasoning effort options available.

											
										
										
											2026-02-24 03:30:19 -08:00
+								        """
 								        effort = os.getenv("HERMES_REASONING_EFFORT", "")
 								        if not effort:
 								            try:
 								                import yaml as _y
-												fix: respect HERMES_HOME env var in gateway and cron scheduler

Both entry points hardcoded Path.home() / ".hermes" for .env, config.yaml,
logs, and lock files. Now uses _hermes_home which reads HERMES_HOME env var
with ~/.hermes as default, matching cli.py and run_agent.py.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-26 18:51:46 +11:00
+								                cfg_path = _hermes_home / "config.yaml"
-												feat: add reasoning effort configuration for agent

- Introduced a new configuration option for reasoning effort in the CLI, allowing users to specify the level of reasoning the agent should perform before responding.
- Updated the CLI and agent initialization to incorporate the reasoning configuration, enhancing the agent's responsiveness and adaptability.
- Implemented logic to load reasoning effort from environment variables and configuration files, providing flexibility in agent behavior.
- Enhanced the documentation in the example configuration file to clarify the new reasoning effort options available.

											
										
										
											2026-02-24 03:30:19 -08:00
+								                if cfg_path.exists():
-												Add explicit encoding="utf-8" to all config/data file open() calls

On Windows, open() defaults to the system locale encoding (cp1252,
cp1254, etc.) rather than UTF-8. This breaks any file containing
non-ASCII characters, and also causes crashes when writing JSON with
ensure_ascii=False.

This adds encoding="utf-8" to open() calls in:
- gateway/run.py (config.yaml reads/writes throughout)
- gateway/config.py (gateway.json and config.yaml)
- hermes_cli/config.py (config.yaml load/save)
- hermes_cli/main.py (session export with ensure_ascii=False)
- hermes_cli/status.py (jobs.json and sessions.json)

											
										
										
											2026-03-05 17:04:33 -05:00
+								                    with open(cfg_path, encoding="utf-8") as _f:
-												feat: add reasoning effort configuration for agent

- Introduced a new configuration option for reasoning effort in the CLI, allowing users to specify the level of reasoning the agent should perform before responding.
- Updated the CLI and agent initialization to incorporate the reasoning configuration, enhancing the agent's responsiveness and adaptability.
- Implemented logic to load reasoning effort from environment variables and configuration files, providing flexibility in agent behavior.
- Enhanced the documentation in the example configuration file to clarify the new reasoning effort options available.

											
										
										
											2026-02-24 03:30:19 -08:00
+								                        cfg = _y.safe_load(_f) or {}
 								                    effort = str(cfg.get("agent", {}).get("reasoning_effort", "") or "").strip()
 								            except Exception:
 								                pass
 								        if not effort:
 								            return None
 								        effort = effort.lower().strip()
 								        if effort == "none":
 								            return {"enabled": False}
 								        valid = ("xhigh", "high", "medium", "low", "minimal")
 								        if effort in valid:
 								            return {"enabled": True, "effort": effort}
-												feat: default reasoning effort from xhigh to medium

Reduces token usage and latency for most tasks by defaulting to
medium reasoning effort instead of xhigh. Users can still override
via config or CLI flag. Updates code, tests, example config, and docs.

											
										
										
											2026-03-07 10:14:19 -08:00
+								        logger.warning("Unknown reasoning_effort '%s', using default (medium)", effort)
-												feat: add reasoning effort configuration for agent

- Introduced a new configuration option for reasoning effort in the CLI, allowing users to specify the level of reasoning the agent should perform before responding.
- Updated the CLI and agent initialization to incorporate the reasoning configuration, enhancing the agent's responsiveness and adaptability.
- Implemented logic to load reasoning effort from environment variables and configuration files, providing flexibility in agent behavior.
- Enhanced the documentation in the example configuration file to clarify the new reasoning effort options available.

											
										
										
											2026-02-24 03:30:19 -08:00
+								        return None
-												fix: /reasoning command — add gateway support, fix display, persist settings (#1031)

* fix: /reasoning command output ordering, display, and inline think extraction

Three issues with the /reasoning command:

1. Output interleaving: The command echo used print() while feedback
   used _cprint(), causing them to render out-of-order under
   prompt_toolkit's patch_stdout. Changed echo to use _cprint() so
   all output renders through the same path in correct order.

2. Reasoning display not working: /reasoning show toggled a flag
   but reasoning never appeared for models that embed thinking in
   inline <think> blocks rather than structured API fields. Added
   fallback extraction in _build_assistant_message to capture
   <think> block content as reasoning when no structured reasoning
   fields (reasoning, reasoning_content, reasoning_details) are
   present. This feeds into both the reasoning callback (during
   tool loops) and the post-response reasoning box display.

3. Feedback clarity: Added checkmarks to confirm actions, persisted
   show/hide to config (was session-only before), and aligned the
   status display for readability.

Tests: 7 new tests for inline think block extraction (41 total).

* feat: add /reasoning command to gateway (Telegram/Discord/etc)

The /reasoning command only existed in the CLI — messaging platforms
had no way to view or change reasoning settings. This adds:

1. /reasoning command handler in the gateway:
   - No args: shows current effort level and display state
   - /reasoning <level>: sets reasoning effort (none/low/medium/high/xhigh)
   - /reasoning show|hide: toggles reasoning display in responses
   - All changes saved to config.yaml immediately

2. Reasoning display in gateway responses:
   - When show_reasoning is enabled, prepends a 'Reasoning' block
     with the model's last_reasoning content before the response
   - Collapses long reasoning (>15 lines) to keep messages readable
   - Uses last_reasoning from run_conversation result dict

3. Plumbing:
   - Added _show_reasoning attribute loaded from config at startup
   - Propagated last_reasoning through _run_agent return dict
   - Added /reasoning to help text and known_commands set
   - Uses getattr for _show_reasoning to handle test stubs
											
										
										
											2026-03-12 05:38:19 -07:00
+								    @staticmethod
 								    def _load_show_reasoning() -> bool:
 								        """Load show_reasoning toggle from config.yaml display section."""
 								        try:
 								            import yaml as _y
 								            cfg_path = _hermes_home / "config.yaml"
 								            if cfg_path.exists():
 								                with open(cfg_path, encoding="utf-8") as _f:
 								                    cfg = _y.safe_load(_f) or {}
 								                return bool(cfg.get("display", {}).get("show_reasoning", False))
 								        except Exception:
 								            pass
 								        return False
-												feat(gateway): configurable background process watcher notifications

Add display.background_process_notifications config option to control
how chatty the gateway process watcher is when using
terminal(background=true, check_interval=...) from messaging platforms.

Modes:
  - all:    running-output updates + final message (default, current behavior)
  - result: only the final completion message
  - error:  only the final message when exit code != 0
  - off:    no watcher messages at all

Also supports HERMES_BACKGROUND_NOTIFICATIONS env var override.

Includes 12 tests (5 config loading + 7 watcher behavior).

Inspired by @PeterFile's PR #593. Closes #592.

											
										
										
											2026-03-10 04:12:39 -07:00
+								    @staticmethod
 								    def _load_background_notifications_mode() -> str:
 								        """Load background process notification mode from config or env var.
 								        Modes:
 								          - ``all``    — push running-output updates *and* the final message (default)
 								          - ``result`` — only the final completion message (regardless of exit code)
 								          - ``error``  — only the final message when exit code is non-zero
 								          - ``off``    — no watcher messages at all
 								        """
 								        mode = os.getenv("HERMES_BACKGROUND_NOTIFICATIONS", "")
 								        if not mode:
 								            try:
 								                import yaml as _y
 								                cfg_path = _hermes_home / "config.yaml"
 								                if cfg_path.exists():
 								                    with open(cfg_path, encoding="utf-8") as _f:
 								                        cfg = _y.safe_load(_f) or {}
 								                    raw = cfg.get("display", {}).get("background_process_notifications")
 								                    if raw is False:
 								                        mode = "off"
 								                    elif raw not in (None, ""):
 								                        mode = str(raw)
 								            except Exception:
 								                pass
 								        mode = (mode or "all").strip().lower()
 								        valid = {"all", "result", "error", "off"}
 								        if mode not in valid:
 								            logger.warning(
 								                "Unknown background_process_notifications '%s', defaulting to 'all'",
 								                mode,
 								            )
 								            return "all"
 								        return mode
-												feat(provider-routing): add OpenRouter provider routing configuration

Introduced a new `provider_routing` section in the CLI configuration to control how requests are routed across providers when using OpenRouter. This includes options for sorting providers by throughput, latency, or price, as well as allowing or ignoring specific providers, setting the order of provider attempts, and managing data collection policies. Updated relevant classes and documentation to support these features, enhancing flexibility in provider selection.

											
										
										
											2026-03-01 18:24:27 -08:00
+								    @staticmethod
 								    def _load_provider_routing() -> dict:
 								        """Load OpenRouter provider routing preferences from config.yaml."""
 								        try:
 								            import yaml as _y
 								            cfg_path = _hermes_home / "config.yaml"
 								            if cfg_path.exists():
-												Add explicit encoding="utf-8" to all config/data file open() calls

On Windows, open() defaults to the system locale encoding (cp1252,
cp1254, etc.) rather than UTF-8. This breaks any file containing
non-ASCII characters, and also causes crashes when writing JSON with
ensure_ascii=False.

This adds encoding="utf-8" to open() calls in:
- gateway/run.py (config.yaml reads/writes throughout)
- gateway/config.py (gateway.json and config.yaml)
- hermes_cli/config.py (config.yaml load/save)
- hermes_cli/main.py (session export with ensure_ascii=False)
- hermes_cli/status.py (jobs.json and sessions.json)

											
										
										
											2026-03-05 17:04:33 -05:00
+								                with open(cfg_path, encoding="utf-8") as _f:
-												feat(provider-routing): add OpenRouter provider routing configuration

Introduced a new `provider_routing` section in the CLI configuration to control how requests are routed across providers when using OpenRouter. This includes options for sorting providers by throughput, latency, or price, as well as allowing or ignoring specific providers, setting the order of provider attempts, and managing data collection policies. Updated relevant classes and documentation to support these features, enhancing flexibility in provider selection.

											
										
										
											2026-03-01 18:24:27 -08:00
+								                    cfg = _y.safe_load(_f) or {}
 								                return cfg.get("provider_routing", {}) or {}
 								        except Exception:
 								            pass
 								        return {}
-												feat: simple fallback model for provider resilience

When the primary model/provider fails after retries (rate limit, overload,
auth errors, connection failures), Hermes automatically switches to a
configured fallback model for the remainder of the session.

Config (in ~/.hermes/config.yaml):

  fallback_model:
    provider: openrouter
    model: anthropic/claude-sonnet-4

Supports all major providers: OpenRouter, OpenAI, Nous, DeepSeek, Together,
Groq, Fireworks, Mistral, Gemini — plus custom endpoints via base_url and
api_key_env overrides.

Design principles:
- Dead simple: one fallback model, not a chain
- One-shot: switches once, doesn't ping-pong back
- Zero new dependencies: uses existing OpenAI client
- Minimal code: ~100 lines in run_agent.py, ~5 lines in cli.py/gateway
- Three trigger points: max retries exhausted, non-retryable client errors,
  and invalid response exhaustion

Does NOT trigger on context overflow or payload-too-large errors (those
are handled by the existing compression system).

Addresses #737.

25 new tests, 2492 total passing.

											
										
										
											2026-03-08 20:22:33 -07:00
+								    @staticmethod
 								    def _load_fallback_model() -> dict | None:
 								        """Load fallback model config from config.yaml.
 								        Returns a dict with 'provider' and 'model' keys, or None if
 								        not configured / both fields empty.
 								        """
 								        try:
 								            import yaml as _y
 								            cfg_path = _hermes_home / "config.yaml"
 								            if cfg_path.exists():
-												Merge PR #458: Add explicit UTF-8 encoding to config/data file I/O

Authored by shitcoinsherpa. Adds encoding='utf-8' to all text-mode
open() calls in gateway/run.py, gateway/config.py, hermes_cli/config.py,
hermes_cli/main.py, and hermes_cli/status.py. Prevents encoding errors
on Windows where the default locale is not UTF-8.

Also fixed 4 additional open() calls in gateway/run.py that were added
after the PR branch was created.

											
										
										
											2026-03-09 21:19:20 -07:00
+								                with open(cfg_path, encoding="utf-8") as _f:
-												feat: simple fallback model for provider resilience

When the primary model/provider fails after retries (rate limit, overload,
auth errors, connection failures), Hermes automatically switches to a
configured fallback model for the remainder of the session.

Config (in ~/.hermes/config.yaml):

  fallback_model:
    provider: openrouter
    model: anthropic/claude-sonnet-4

Supports all major providers: OpenRouter, OpenAI, Nous, DeepSeek, Together,
Groq, Fireworks, Mistral, Gemini — plus custom endpoints via base_url and
api_key_env overrides.

Design principles:
- Dead simple: one fallback model, not a chain
- One-shot: switches once, doesn't ping-pong back
- Zero new dependencies: uses existing OpenAI client
- Minimal code: ~100 lines in run_agent.py, ~5 lines in cli.py/gateway
- Three trigger points: max retries exhausted, non-retryable client errors,
  and invalid response exhaustion

Does NOT trigger on context overflow or payload-too-large errors (those
are handled by the existing compression system).

Addresses #737.

25 new tests, 2492 total passing.

											
										
										
											2026-03-08 20:22:33 -07:00
+								                    cfg = _y.safe_load(_f) or {}
 								                fb = cfg.get("fallback_model", {}) or {}
 								                if fb.get("provider") and fb.get("model"):
 								                    return fb
 								        except Exception:
 								            pass
 								        return None
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								    async def start(self) -> bool:
 								        """
 								        Start the gateway and all configured platform adapters.
 								        Returns True if at least one adapter connected successfully.
 								        """
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								        logger.info("Starting Hermes Gateway...")
 								        logger.info("Session storage: %s", self.config.sessions_dir)
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
-												refactor: deduplicate toolsets, unify async bridging, fix approval race condition, harden security

- Replace 4 copy-pasted messaging platform toolsets with shared _HERMES_CORE_TOOLS list
- Consolidate 5 ad-hoc async-bridging patterns into single _run_async() in model_tools.py
  - Removes deprecated get_event_loop()/set_event_loop() calls
  - Makes all tool handlers self-protecting regardless of caller's event loop state
  - RL handler refactored from if/elif chain to dispatch dict
- Fix exec approval race condition: replace module-level globals with thread-safe
  per-session tools/approval.py (submit_pending, pop_pending, approve_session, is_approved)
  - Session A approving "rm" no longer approves it for all other sessions
- Fix config deep merge: user overriding tts.elevenlabs.voice_id no longer clobbers
  tts.elevenlabs.model_id; migration detection now recurses to arbitrary depth
- Gateway default-deny: unauthenticated users denied unless GATEWAY_ALLOW_ALL_USERS=true
- Add 10 dangerous command patterns: rm --recursive, bash -c, python -e, curl|bash,
  xargs rm, find -delete
- Sanitize gateway error messages: users see generic message, full traceback goes to logs

											
										
										
											2026-02-21 18:28:49 -08:00
+								        # Warn if no user allowlists are configured and open access is not opted in
 								        _any_allowlist = any(
 								            os.getenv(v)
 								            for v in ("TELEGRAM_ALLOWED_USERS", "DISCORD_ALLOWED_USERS",
 								                       "WHATSAPP_ALLOWED_USERS", "SLACK_ALLOWED_USERS",
 								                       "GATEWAY_ALLOWED_USERS")
 								        )
 								        _allow_all = os.getenv("GATEWAY_ALLOW_ALL_USERS", "").lower() in ("true", "1", "yes")
 								        if not _any_allowlist and not _allow_all:
 								            logger.warning(
 								                "No user allowlists configured. All unauthorized users will be denied. "
 								                "Set GATEWAY_ALLOW_ALL_USERS=true in ~/.hermes/.env to allow open access, "
 								                "or configure platform allowlists (e.g., TELEGRAM_ALLOWED_USERS=your_id)."
 								            )
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
+								        # Discover and load event hooks
 								        self.hooks.discover_and_load()
-												Add background process management with process tool, wait, PTY, and stdin support

New process registry and tool for managing long-running background processes
across all terminal backends (local, Docker, Singularity, Modal, SSH).

Process Registry (tools/process_registry.py):
- ProcessSession tracking with rolling 200KB output buffer
- spawn_local() with optional PTY via ptyprocess for interactive CLIs
- spawn_via_env() for non-local backends (runs inside sandbox, never on host)
- Background reader threads per process (Popen stdout or PTY)
- wait() with timeout clamping, interrupt support, and transparent limit reporting
- JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery
- Module-level singleton shared across agent loop, gateway, and RL

Process Tool (model_tools.py):
- 7 actions: list, poll, log, wait, kill, write, submit
- Paired with terminal in all toolsets (CLI, messaging, RL)
- Timeout clamping with transparent notes in response

Terminal Tool Updates (tools/terminal_tool.py):
- Replaced nohup background mode with registry spawn (returns session_id)
- Added workdir parameter for per-command working directory
- Added check_interval parameter for gateway auto-check watchers
- Added pty parameter for interactive CLI tools (Codex, Claude Code)
- Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs
- Cleanup thread now respects active background processes (won't reap sandbox)

Gateway Integration (gateway/run.py, session.py, config.py):
- Session reset protection: sessions with active processes exempt from reset
- Default idle timeout increased from 2 hours to 24 hours
- from_dict fallback aligned to match (was 120, now 1440)
- session_key env var propagated to process registry for session mapping
- Crash recovery on gateway startup via checkpoint probe
- check_interval watcher: asyncio task polls process, delivers updates to platform

RL Safety (environments/):
- tool_context.py cleanup() kills background processes on episode end
- hermes_base_env.py warns when enabled_toolsets is None (loads all tools)
- Process tool safe in RL via wait() blocking the agent loop

Also:
- Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all])
- Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP
- Updated AGENTS.md with process management docs and project structure
- Updated README.md terminal section with process management overview

											
										
										
											2026-02-17 02:51:31 -08:00
+								        # Recover background processes from checkpoint (crash recovery)
 								        try:
 								            from tools.process_registry import process_registry
 								            recovered = process_registry.recover_from_checkpoint()
 								            if recovered:
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								                logger.info("Recovered %s background process(es) from previous run", recovered)
-												Add background process management with process tool, wait, PTY, and stdin support

New process registry and tool for managing long-running background processes
across all terminal backends (local, Docker, Singularity, Modal, SSH).

Process Registry (tools/process_registry.py):
- ProcessSession tracking with rolling 200KB output buffer
- spawn_local() with optional PTY via ptyprocess for interactive CLIs
- spawn_via_env() for non-local backends (runs inside sandbox, never on host)
- Background reader threads per process (Popen stdout or PTY)
- wait() with timeout clamping, interrupt support, and transparent limit reporting
- JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery
- Module-level singleton shared across agent loop, gateway, and RL

Process Tool (model_tools.py):
- 7 actions: list, poll, log, wait, kill, write, submit
- Paired with terminal in all toolsets (CLI, messaging, RL)
- Timeout clamping with transparent notes in response

Terminal Tool Updates (tools/terminal_tool.py):
- Replaced nohup background mode with registry spawn (returns session_id)
- Added workdir parameter for per-command working directory
- Added check_interval parameter for gateway auto-check watchers
- Added pty parameter for interactive CLI tools (Codex, Claude Code)
- Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs
- Cleanup thread now respects active background processes (won't reap sandbox)

Gateway Integration (gateway/run.py, session.py, config.py):
- Session reset protection: sessions with active processes exempt from reset
- Default idle timeout increased from 2 hours to 24 hours
- from_dict fallback aligned to match (was 120, now 1440)
- session_key env var propagated to process registry for session mapping
- Crash recovery on gateway startup via checkpoint probe
- check_interval watcher: asyncio task polls process, delivers updates to platform

RL Safety (environments/):
- tool_context.py cleanup() kills background processes on episode end
- hermes_base_env.py warns when enabled_toolsets is None (loads all tools)
- Process tool safe in RL via wait() blocking the agent loop

Also:
- Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all])
- Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP
- Updated AGENTS.md with process management docs and project structure
- Updated README.md terminal section with process management overview

											
										
										
											2026-02-17 02:51:31 -08:00
+								        except Exception as e:
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								            logger.warning("Process checkpoint recovery: %s", e)
-												Add background process management with process tool, wait, PTY, and stdin support

New process registry and tool for managing long-running background processes
across all terminal backends (local, Docker, Singularity, Modal, SSH).

Process Registry (tools/process_registry.py):
- ProcessSession tracking with rolling 200KB output buffer
- spawn_local() with optional PTY via ptyprocess for interactive CLIs
- spawn_via_env() for non-local backends (runs inside sandbox, never on host)
- Background reader threads per process (Popen stdout or PTY)
- wait() with timeout clamping, interrupt support, and transparent limit reporting
- JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery
- Module-level singleton shared across agent loop, gateway, and RL

Process Tool (model_tools.py):
- 7 actions: list, poll, log, wait, kill, write, submit
- Paired with terminal in all toolsets (CLI, messaging, RL)
- Timeout clamping with transparent notes in response

Terminal Tool Updates (tools/terminal_tool.py):
- Replaced nohup background mode with registry spawn (returns session_id)
- Added workdir parameter for per-command working directory
- Added check_interval parameter for gateway auto-check watchers
- Added pty parameter for interactive CLI tools (Codex, Claude Code)
- Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs
- Cleanup thread now respects active background processes (won't reap sandbox)

Gateway Integration (gateway/run.py, session.py, config.py):
- Session reset protection: sessions with active processes exempt from reset
- Default idle timeout increased from 2 hours to 24 hours
- from_dict fallback aligned to match (was 120, now 1440)
- session_key env var propagated to process registry for session mapping
- Crash recovery on gateway startup via checkpoint probe
- check_interval watcher: asyncio task polls process, delivers updates to platform

RL Safety (environments/):
- tool_context.py cleanup() kills background processes on episode end
- hermes_base_env.py warns when enabled_toolsets is None (loads all tools)
- Process tool safe in RL via wait() blocking the agent loop

Also:
- Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all])
- Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP
- Updated AGENTS.md with process management docs and project structure
- Updated README.md terminal section with process management overview

											
										
										
											2026-02-17 02:51:31 -08:00
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        connected_count = 0
 								        # Initialize and connect each configured platform
 								        for platform, platform_config in self.config.platforms.items():
 								            if not platform_config.enabled:
 								                continue
 								            adapter = self._create_adapter(platform, platform_config)
 								            if not adapter:
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								                logger.warning("No adapter available for %s", platform.value)
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								                continue
 								            # Set up message handler
 								            adapter.set_message_handler(self._handle_message)
 								            # Try to connect
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								            logger.info("Connecting to %s...", platform.value)
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								            try:
 								                success = await adapter.connect()
 								                if success:
 								                    self.adapters[platform] = adapter
 								                    connected_count += 1
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								                    logger.info("✓ %s connected", platform.value)
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								                else:
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								                    logger.warning("✗ %s failed to connect", platform.value)
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								            except Exception as e:
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								                logger.error("✗ %s error: %s", platform.value, e)
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
 								        if connected_count == 0:
-												refactor: streamline cron job handling and update CLI commands

- Removed legacy cron daemon functionality, integrating cron job execution directly into the gateway process for improved efficiency.
- Updated CLI commands to reflect changes, replacing `hermes cron daemon` with `hermes cron status` and enhancing documentation for cron job management.
- Clarified messaging in the README and other documentation regarding the gateway's role in managing cron jobs.
- Removed obsolete terminal_hecate tool and related configurations to simplify the codebase.

											
										
										
											2026-02-21 16:21:19 -08:00
+								            logger.warning("No messaging platforms connected.")
 								            logger.info("Gateway will continue running for cron job execution.")
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
 								        # Update delivery router with adapters
 								        self.delivery_router.adapters = self.adapters
 								        self._running = True
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
 								        # Emit gateway:startup hook
 								        hook_count = len(self.hooks.loaded_hooks)
 								        if hook_count:
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								            logger.info("%s hook(s) loaded", hook_count)
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
+								        await self.hooks.emit("gateway:startup", {
 								            "platforms": [p.value for p in self.adapters.keys()],
 								        })
-												refactor: streamline cron job handling and update CLI commands

- Removed legacy cron daemon functionality, integrating cron job execution directly into the gateway process for improved efficiency.
- Updated CLI commands to reflect changes, replacing `hermes cron daemon` with `hermes cron status` and enhancing documentation for cron job management.
- Clarified messaging in the README and other documentation regarding the gateway's role in managing cron jobs.
- Removed obsolete terminal_hecate tool and related configurations to simplify the codebase.

											
										
										
											2026-02-21 16:21:19 -08:00
+								        if connected_count > 0:
 								            logger.info("Gateway running with %s platform(s)", connected_count)
-												feat: implement channel directory and message mirroring for cross-platform communication

- Introduced a new channel directory to cache reachable channels/contacts for messaging platforms, enhancing the send_message tool's ability to resolve human-friendly names to numeric IDs.
- Added functionality to mirror sent messages into the target's session transcript, providing context for cross-platform message delivery.
- Updated the send_message tool to support listing available targets and improved error handling for channel resolution.
- Enhanced the gateway to build and refresh the channel directory during startup and at regular intervals, ensuring up-to-date channel information.

											
										
										
											2026-02-22 20:44:15 -08:00
 								        # Build initial channel directory for send_message name resolution
 								        try:
 								            from gateway.channel_directory import build_channel_directory
 								            directory = build_channel_directory(self.adapters)
 								            ch_count = sum(len(chs) for chs in directory.get("platforms", {}).values())
 								            logger.info("Channel directory built: %d target(s)", ch_count)
 								        except Exception as e:
 								            logger.warning("Channel directory build failed: %s", e)
-												feat: add /update slash command for gateway platforms

Adds a /update command to Telegram, Discord, and other gateway platforms
that runs `hermes update` to pull the latest code, update dependencies,
sync skills, and restart the gateway.

Implementation:
- Spawns `hermes update` in a separate systemd scope (systemd-run --user
  --scope) so the process survives the gateway restart that hermes update
  triggers at the end. Falls back to nohup if systemd-run is unavailable.
- Writes a marker file (.update_pending.json) with the originating
  platform and chat_id before spawning the update.
- On gateway startup, _send_update_notification() checks for the marker,
  reads the captured update output, sends the results back to the user,
  and cleans up.

Also:
- Registers /update as a Discord slash command
- Updates README.md, docs/messaging.md, docs/slash-commands.md
- Adds 18 tests covering handler, notification, and edge cases

											
										
										
											2026-03-05 01:20:58 -08:00
+								        # Check if we're restarting after a /update command
 								        await self._send_update_notification()
-												feat(gateway): proactive async memory flush on session expiry

Previously, when a session expired (idle/daily reset), the memory flush
ran synchronously inside get_or_create_session — blocking the user's
message for 10-60s while an LLM call saved memories.

Now a background watcher task (_session_expiry_watcher) runs every 5 min,
detects expired sessions, and flushes memories proactively in a thread
pool.  By the time the user sends their next message, memories are
already saved and the response is immediate.

Changes:
- Add _is_session_expired(entry) to SessionStore — works from entry
  alone without needing a SessionSource
- Add _pre_flushed_sessions set to track already-flushed sessions
- Remove sync _on_auto_reset callback from get_or_create_session
- Refactor flush into _flush_memories_for_session (sync worker) +
  _async_flush_memories (thread pool wrapper)
- Add _session_expiry_watcher background task, started in start()
- Simplify /reset command to use shared fire-and-forget flush
- Add 10 tests for expiry detection, callback removal, tracking

											
										
										
											2026-03-07 11:27:50 -08:00
+								        # Start background session expiry watcher for proactive memory flushing
 								        asyncio.create_task(self._session_expiry_watcher())
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								        logger.info("Press Ctrl+C to stop")
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
 								        return True
-												feat(gateway): proactive async memory flush on session expiry

Previously, when a session expired (idle/daily reset), the memory flush
ran synchronously inside get_or_create_session — blocking the user's
message for 10-60s while an LLM call saved memories.

Now a background watcher task (_session_expiry_watcher) runs every 5 min,
detects expired sessions, and flushes memories proactively in a thread
pool.  By the time the user sends their next message, memories are
already saved and the response is immediate.

Changes:
- Add _is_session_expired(entry) to SessionStore — works from entry
  alone without needing a SessionSource
- Add _pre_flushed_sessions set to track already-flushed sessions
- Remove sync _on_auto_reset callback from get_or_create_session
- Refactor flush into _flush_memories_for_session (sync worker) +
  _async_flush_memories (thread pool wrapper)
- Add _session_expiry_watcher background task, started in start()
- Simplify /reset command to use shared fire-and-forget flush
- Add 10 tests for expiry detection, callback removal, tracking

											
										
										
											2026-03-07 11:27:50 -08:00
+								    async def _session_expiry_watcher(self, interval: int = 300):
 								        """Background task that proactively flushes memories for expired sessions.
 								        Runs every `interval` seconds (default 5 min).  For each session that
 								        has expired according to its reset policy, flushes memories in a thread
 								        pool and marks the session so it won't be flushed again.
 								        This means memories are already saved by the time the user sends their
 								        next message, so there's no blocking delay.
 								        """
 								        await asyncio.sleep(60)  # initial delay — let the gateway fully start
 								        while self._running:
 								            try:
 								                self.session_store._ensure_loaded()
 								                for key, entry in list(self.session_store._entries.items()):
 								                    if entry.session_id in self.session_store._pre_flushed_sessions:
 								                        continue  # already flushed this session
 								                    if not self.session_store._is_session_expired(entry):
 								                        continue  # session still active
 								                    # Session has expired — flush memories in the background
 								                    logger.info(
 								                        "Session %s expired (key=%s), flushing memories proactively",
 								                        entry.session_id, key,
 								                    )
 								                    try:
 								                        await self._async_flush_memories(entry.session_id)
-												fix(gateway): persist Honcho managers across session requests

											
										
										
											2026-03-10 02:06:17 -07:00
+								                        self._shutdown_gateway_honcho(key)
-												feat(gateway): proactive async memory flush on session expiry

Previously, when a session expired (idle/daily reset), the memory flush
ran synchronously inside get_or_create_session — blocking the user's
message for 10-60s while an LLM call saved memories.

Now a background watcher task (_session_expiry_watcher) runs every 5 min,
detects expired sessions, and flushes memories proactively in a thread
pool.  By the time the user sends their next message, memories are
already saved and the response is immediate.

Changes:
- Add _is_session_expired(entry) to SessionStore — works from entry
  alone without needing a SessionSource
- Add _pre_flushed_sessions set to track already-flushed sessions
- Remove sync _on_auto_reset callback from get_or_create_session
- Refactor flush into _flush_memories_for_session (sync worker) +
  _async_flush_memories (thread pool wrapper)
- Add _session_expiry_watcher background task, started in start()
- Simplify /reset command to use shared fire-and-forget flush
- Add 10 tests for expiry detection, callback removal, tracking

											
										
										
											2026-03-07 11:27:50 -08:00
+								                        self.session_store._pre_flushed_sessions.add(entry.session_id)
 								                    except Exception as e:
 								                        logger.debug("Proactive memory flush failed for %s: %s", entry.session_id, e)
 								            except Exception as e:
 								                logger.debug("Session expiry watcher error: %s", e)
 								            # Sleep in small increments so we can stop quickly
 								            for _ in range(interval):
 								                if not self._running:
 								                    break
 								                await asyncio.sleep(1)
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								    async def stop(self) -> None:
 								        """Stop the gateway and disconnect all adapters."""
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								        logger.info("Stopping gateway...")
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        self._running = False
 								        for platform, adapter in self.adapters.items():
 								            try:
 								                await adapter.disconnect()
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								                logger.info("✓ %s disconnected", platform.value)
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								            except Exception as e:
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								                logger.error("✗ %s disconnect error: %s", platform.value, e)
-												fix(gateway): persist Honcho managers across session requests

											
										
										
											2026-03-10 02:06:17 -07:00
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        self.adapters.clear()
-												fix(gateway): persist Honcho managers across session requests

											
										
										
											2026-03-10 02:06:17 -07:00
+								        self._shutdown_all_gateway_honcho()
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        self._shutdown_event.set()
-												feat: implement channel directory and message mirroring for cross-platform communication

- Introduced a new channel directory to cache reachable channels/contacts for messaging platforms, enhancing the send_message tool's ability to resolve human-friendly names to numeric IDs.
- Added functionality to mirror sent messages into the target's session transcript, providing context for cross-platform message delivery.
- Updated the send_message tool to support listing available targets and improved error handling for channel resolution.
- Enhanced the gateway to build and refresh the channel directory during startup and at regular intervals, ensuring up-to-date channel information.

											
										
										
											2026-02-22 20:44:15 -08:00
 								        from gateway.status import remove_pid_file
 								        remove_pid_file()
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								        logger.info("Gateway stopped")
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
 								    async def wait_for_shutdown(self) -> None:
 								        """Wait for shutdown signal."""
 								        await self._shutdown_event.wait()
 								    def _create_adapter(
 								        self,
 								        platform: Platform,
 								        config: Any
 								    ) -> Optional[BasePlatformAdapter]:
 								        """Create the appropriate adapter for a platform."""
 								        if platform == Platform.TELEGRAM:
 								            from gateway.platforms.telegram import TelegramAdapter, check_telegram_requirements
 								            if not check_telegram_requirements():
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								                logger.warning("Telegram: python-telegram-bot not installed")
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								                return None
 								            return TelegramAdapter(config)
 								        elif platform == Platform.DISCORD:
 								            from gateway.platforms.discord import DiscordAdapter, check_discord_requirements
 								            if not check_discord_requirements():
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								                logger.warning("Discord: discord.py not installed")
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								                return None
 								            return DiscordAdapter(config)
 								        elif platform == Platform.WHATSAPP:
 								            from gateway.platforms.whatsapp import WhatsAppAdapter, check_whatsapp_requirements
 								            if not check_whatsapp_requirements():
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								                logger.warning("WhatsApp: Node.js not installed or bridge not configured")
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								                return None
 								            return WhatsAppAdapter(config)
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
+								        elif platform == Platform.SLACK:
 								            from gateway.platforms.slack import SlackAdapter, check_slack_requirements
 								            if not check_slack_requirements():
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								                logger.warning("Slack: slack-bolt not installed. Run: pip install 'hermes-agent[slack]'")
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
+								                return None
 								            return SlackAdapter(config)
-												feat: add Home Assistant integration (REST tools + WebSocket gateway)

- Add ha_list_entities, ha_get_state, ha_call_service tools via REST API
- Add WebSocket gateway adapter for real-time state_changed event monitoring
- Support domain/entity filtering, cooldown, and auto-reconnect with backoff
- Use REST API for outbound notifications to avoid WS race condition
- Gate tool availability on HASS_TOKEN env var
- Add 82 unit tests covering real logic (filtering, payload building, event pipeline)

											
										
										
											2026-02-28 13:32:48 +03:00
-												feat: add Signal messenger gateway platform (#405)

Complete Signal adapter using signal-cli daemon HTTP API.
Based on PR #268 by ibhagwan, rebuilt on current main with bug fixes.

Architecture:
- SSE streaming for inbound messages with exponential backoff (2s→60s)
- JSON-RPC 2.0 for outbound (send, typing, attachments, contacts)
- Health monitor detects stale SSE connections (120s threshold)
- Phone number redaction in all logs and global redact.py

Features:
- DM and group message support with separate access policies
- DM policies: pairing (default), allowlist, open
- Group policies: disabled (default), allowlist, open
- Attachment download with magic-byte type detection
- Typing indicators (8s refresh interval)
- 100MB attachment size limit, 8000 char message limit
- E.164 phone + UUID allowlist support

Integration:
- Platform.SIGNAL enum in gateway/config.py
- Signal in _is_user_authorized() allowlist maps (gateway/run.py)
- Adapter factory in _create_adapter() (gateway/run.py)
- user_id_alt/chat_id_alt fields in SessionSource for UUIDs
- send_message tool support via httpx JSON-RPC (not aiohttp)
- Interactive setup wizard in 'hermes gateway setup'
- Connectivity testing during setup (pings /api/v1/check)
- signal-cli detection and install guidance

Bug fixes from PR #268:
- Timestamp reads from envelope_data (not outer wrapper)
- Uses httpx consistently (not aiohttp in send_message tool)
- SIGNAL_DEBUG scoped to signal logger (not root)
- extract_images regex NOT modified (preserves group numbering)
- pairing.py NOT modified (no cross-platform side effects)
- No dual authorization (adapter defers to run.py for user auth)
- Wildcard uses set membership ('*' in set, not list equality)
- .zip default for PK magic bytes (not .docx)

No new Python dependencies — uses httpx (already core).
External requirement: signal-cli daemon (user-installed).

Tests: 30 new tests covering config, init, helpers, session source,
phone redaction, authorization, and send_message integration.

Co-authored-by: ibhagwan <ibhagwan@users.noreply.github.com>

											
										
										
											2026-03-08 20:20:35 -07:00
+								        elif platform == Platform.SIGNAL:
 								            from gateway.platforms.signal import SignalAdapter, check_signal_requirements
 								            if not check_signal_requirements():
 								                logger.warning("Signal: SIGNAL_HTTP_URL or SIGNAL_ACCOUNT not configured")
 								                return None
 								            return SignalAdapter(config)
-												feat: add Home Assistant integration (REST tools + WebSocket gateway)

- Add ha_list_entities, ha_get_state, ha_call_service tools via REST API
- Add WebSocket gateway adapter for real-time state_changed event monitoring
- Support domain/entity filtering, cooldown, and auto-reconnect with backoff
- Use REST API for outbound notifications to avoid WS race condition
- Gate tool availability on HASS_TOKEN env var
- Add 82 unit tests covering real logic (filtering, payload building, event pipeline)

											
										
										
											2026-02-28 13:32:48 +03:00
+								        elif platform == Platform.HOMEASSISTANT:
 								            from gateway.platforms.homeassistant import HomeAssistantAdapter, check_ha_requirements
 								            if not check_ha_requirements():
 								                logger.warning("HomeAssistant: aiohttp not installed or HASS_TOKEN not set")
 								                return None
 								            return HomeAssistantAdapter(config)
-												feat: add email gateway platform (IMAP/SMTP)

Allow users to interact with Hermes by sending and receiving emails.
Uses IMAP polling for incoming messages and SMTP for replies with
proper threading (In-Reply-To, References headers).

Integrates with all 14 gateway extension points: config, adapter
factory, authorization, send_message tool, cron delivery, toolsets,
prompt hints, channel directory, setup wizard, status display, and
env example.

65 tests covering config, parsing, dispatch, threading, IMAP fetch,
SMTP send, attachments, and all integration points.

											
										
										
											2026-03-10 03:15:38 +03:00
+								        elif platform == Platform.EMAIL:
 								            from gateway.platforms.email import EmailAdapter, check_email_requirements
 								            if not check_email_requirements():
 								                logger.warning("Email: EMAIL_ADDRESS, EMAIL_PASSWORD, EMAIL_IMAP_HOST, or EMAIL_SMTP_HOST not set")
 								                return None
 								            return EmailAdapter(config)
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        return None
-												Enhance messaging gateway configuration and security features

- Added new environment variables for Telegram and Discord bot configurations, including `TELEGRAM_ALLOWED_USERS` and `DISCORD_ALLOWED_USERS`, to restrict bot access to specific users.
- Updated documentation in AGENTS.md and README.md to include detailed setup instructions for the messaging gateway, emphasizing the importance of user allowlists for security.
- Improved the CLI setup wizard to prompt for allowed user IDs during configuration, enhancing user guidance and security awareness.
- Refined the gateway run script to support user authorization checks, ensuring only allowed users can interact with the bot.

											
										
										
											2026-02-03 10:46:23 -08:00
+								    def _is_user_authorized(self, source: SessionSource) -> bool:
 								        """
 								        Check if a user is authorized to use the bot.
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
+								        Checks in order:
-												feat: enhance user authorization checks in GatewayRunner

- Updated the authorization logic to include a per-platform allow-all flag for improved flexibility.
- Revised the order of checks to prioritize platform-specific allow-all settings, followed by environment variable allowlists and DM pairing approvals.
- Added global allow-all configuration for broader access control.
- Improved handling of allowlists by stripping whitespace and ensuring valid entries are processed.

											
										
										
											2026-02-22 16:32:08 -08:00
+. Per-platform allow-all flag (e.g., DISCORD_ALLOW_ALL_USERS=true)
 . Environment variable allowlists (TELEGRAM_ALLOWED_USERS, etc.)
 . DM pairing approved list
 . Global allow-all (GATEWAY_ALLOW_ALL_USERS=true)
 . Default: deny
-												Enhance messaging gateway configuration and security features

- Added new environment variables for Telegram and Discord bot configurations, including `TELEGRAM_ALLOWED_USERS` and `DISCORD_ALLOWED_USERS`, to restrict bot access to specific users.
- Updated documentation in AGENTS.md and README.md to include detailed setup instructions for the messaging gateway, emphasizing the importance of user allowlists for security.
- Improved the CLI setup wizard to prompt for allowed user IDs during configuration, enhancing user guidance and security awareness.
- Refined the gateway run script to support user authorization checks, ensuring only allowed users can interact with the bot.

											
										
										
											2026-02-03 10:46:23 -08:00
+								        """
-												fix: resolve 4 bugs found in HA integration code review

- Auto-authorize HA events in gateway (system-generated, not user messages)
- Guard _read_events against None/closed WebSocket after failed reconnect
- Use UUID for send() message_id instead of polluting WS sequence counter
- entity_id parameter now takes precedence over data["entity_id"]

											
										
										
											2026-02-28 15:12:18 +03:00
+								        # Home Assistant events are system-generated (state changes), not
 								        # user-initiated messages.  The HASS_TOKEN already authenticates the
 								        # connection, so HA events are always authorized.
 								        if source.platform == Platform.HOMEASSISTANT:
 								            return True
-												Enhance messaging gateway configuration and security features

- Added new environment variables for Telegram and Discord bot configurations, including `TELEGRAM_ALLOWED_USERS` and `DISCORD_ALLOWED_USERS`, to restrict bot access to specific users.
- Updated documentation in AGENTS.md and README.md to include detailed setup instructions for the messaging gateway, emphasizing the importance of user allowlists for security.
- Improved the CLI setup wizard to prompt for allowed user IDs during configuration, enhancing user guidance and security awareness.
- Refined the gateway run script to support user authorization checks, ensuring only allowed users can interact with the bot.

											
										
										
											2026-02-03 10:46:23 -08:00
+								        user_id = source.user_id
 								        if not user_id:
-												feat: enhance user authorization checks in GatewayRunner

- Updated the authorization logic to include a per-platform allow-all flag for improved flexibility.
- Revised the order of checks to prioritize platform-specific allow-all settings, followed by environment variable allowlists and DM pairing approvals.
- Added global allow-all configuration for broader access control.
- Improved handling of allowlists by stripping whitespace and ensuring valid entries are processed.

											
										
										
											2026-02-22 16:32:08 -08:00
+								            return False
-												Enhance messaging gateway configuration and security features

- Added new environment variables for Telegram and Discord bot configurations, including `TELEGRAM_ALLOWED_USERS` and `DISCORD_ALLOWED_USERS`, to restrict bot access to specific users.
- Updated documentation in AGENTS.md and README.md to include detailed setup instructions for the messaging gateway, emphasizing the importance of user allowlists for security.
- Improved the CLI setup wizard to prompt for allowed user IDs during configuration, enhancing user guidance and security awareness.
- Refined the gateway run script to support user authorization checks, ensuring only allowed users can interact with the bot.

											
										
										
											2026-02-03 10:46:23 -08:00
+								        platform_env_map = {
 								            Platform.TELEGRAM: "TELEGRAM_ALLOWED_USERS",
 								            Platform.DISCORD: "DISCORD_ALLOWED_USERS",
 								            Platform.WHATSAPP: "WHATSAPP_ALLOWED_USERS",
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
+								            Platform.SLACK: "SLACK_ALLOWED_USERS",
-												feat: add Signal messenger gateway platform (#405)

Complete Signal adapter using signal-cli daemon HTTP API.
Based on PR #268 by ibhagwan, rebuilt on current main with bug fixes.

Architecture:
- SSE streaming for inbound messages with exponential backoff (2s→60s)
- JSON-RPC 2.0 for outbound (send, typing, attachments, contacts)
- Health monitor detects stale SSE connections (120s threshold)
- Phone number redaction in all logs and global redact.py

Features:
- DM and group message support with separate access policies
- DM policies: pairing (default), allowlist, open
- Group policies: disabled (default), allowlist, open
- Attachment download with magic-byte type detection
- Typing indicators (8s refresh interval)
- 100MB attachment size limit, 8000 char message limit
- E.164 phone + UUID allowlist support

Integration:
- Platform.SIGNAL enum in gateway/config.py
- Signal in _is_user_authorized() allowlist maps (gateway/run.py)
- Adapter factory in _create_adapter() (gateway/run.py)
- user_id_alt/chat_id_alt fields in SessionSource for UUIDs
- send_message tool support via httpx JSON-RPC (not aiohttp)
- Interactive setup wizard in 'hermes gateway setup'
- Connectivity testing during setup (pings /api/v1/check)
- signal-cli detection and install guidance

Bug fixes from PR #268:
- Timestamp reads from envelope_data (not outer wrapper)
- Uses httpx consistently (not aiohttp in send_message tool)
- SIGNAL_DEBUG scoped to signal logger (not root)
- extract_images regex NOT modified (preserves group numbering)
- pairing.py NOT modified (no cross-platform side effects)
- No dual authorization (adapter defers to run.py for user auth)
- Wildcard uses set membership ('*' in set, not list equality)
- .zip default for PK magic bytes (not .docx)

No new Python dependencies — uses httpx (already core).
External requirement: signal-cli daemon (user-installed).

Tests: 30 new tests covering config, init, helpers, session source,
phone redaction, authorization, and send_message integration.

Co-authored-by: ibhagwan <ibhagwan@users.noreply.github.com>

											
										
										
											2026-03-08 20:20:35 -07:00
+								            Platform.SIGNAL: "SIGNAL_ALLOWED_USERS",
-												feat: add email gateway platform (IMAP/SMTP)

Allow users to interact with Hermes by sending and receiving emails.
Uses IMAP polling for incoming messages and SMTP for replies with
proper threading (In-Reply-To, References headers).

Integrates with all 14 gateway extension points: config, adapter
factory, authorization, send_message tool, cron delivery, toolsets,
prompt hints, channel directory, setup wizard, status display, and
env example.

65 tests covering config, parsing, dispatch, threading, IMAP fetch,
SMTP send, attachments, and all integration points.

											
										
										
											2026-03-10 03:15:38 +03:00
+								            Platform.EMAIL: "EMAIL_ALLOWED_USERS",
-												Enhance messaging gateway configuration and security features

- Added new environment variables for Telegram and Discord bot configurations, including `TELEGRAM_ALLOWED_USERS` and `DISCORD_ALLOWED_USERS`, to restrict bot access to specific users.
- Updated documentation in AGENTS.md and README.md to include detailed setup instructions for the messaging gateway, emphasizing the importance of user allowlists for security.
- Improved the CLI setup wizard to prompt for allowed user IDs during configuration, enhancing user guidance and security awareness.
- Refined the gateway run script to support user authorization checks, ensuring only allowed users can interact with the bot.

											
										
										
											2026-02-03 10:46:23 -08:00
+								        }
-												feat: enhance user authorization checks in GatewayRunner

- Updated the authorization logic to include a per-platform allow-all flag for improved flexibility.
- Revised the order of checks to prioritize platform-specific allow-all settings, followed by environment variable allowlists and DM pairing approvals.
- Added global allow-all configuration for broader access control.
- Improved handling of allowlists by stripping whitespace and ensuring valid entries are processed.

											
										
										
											2026-02-22 16:32:08 -08:00
+								        platform_allow_all_map = {
 								            Platform.TELEGRAM: "TELEGRAM_ALLOW_ALL_USERS",
 								            Platform.DISCORD: "DISCORD_ALLOW_ALL_USERS",
 								            Platform.WHATSAPP: "WHATSAPP_ALLOW_ALL_USERS",
 								            Platform.SLACK: "SLACK_ALLOW_ALL_USERS",
-												feat: add Signal messenger gateway platform (#405)

Complete Signal adapter using signal-cli daemon HTTP API.
Based on PR #268 by ibhagwan, rebuilt on current main with bug fixes.

Architecture:
- SSE streaming for inbound messages with exponential backoff (2s→60s)
- JSON-RPC 2.0 for outbound (send, typing, attachments, contacts)
- Health monitor detects stale SSE connections (120s threshold)
- Phone number redaction in all logs and global redact.py

Features:
- DM and group message support with separate access policies
- DM policies: pairing (default), allowlist, open
- Group policies: disabled (default), allowlist, open
- Attachment download with magic-byte type detection
- Typing indicators (8s refresh interval)
- 100MB attachment size limit, 8000 char message limit
- E.164 phone + UUID allowlist support

Integration:
- Platform.SIGNAL enum in gateway/config.py
- Signal in _is_user_authorized() allowlist maps (gateway/run.py)
- Adapter factory in _create_adapter() (gateway/run.py)
- user_id_alt/chat_id_alt fields in SessionSource for UUIDs
- send_message tool support via httpx JSON-RPC (not aiohttp)
- Interactive setup wizard in 'hermes gateway setup'
- Connectivity testing during setup (pings /api/v1/check)
- signal-cli detection and install guidance

Bug fixes from PR #268:
- Timestamp reads from envelope_data (not outer wrapper)
- Uses httpx consistently (not aiohttp in send_message tool)
- SIGNAL_DEBUG scoped to signal logger (not root)
- extract_images regex NOT modified (preserves group numbering)
- pairing.py NOT modified (no cross-platform side effects)
- No dual authorization (adapter defers to run.py for user auth)
- Wildcard uses set membership ('*' in set, not list equality)
- .zip default for PK magic bytes (not .docx)

No new Python dependencies — uses httpx (already core).
External requirement: signal-cli daemon (user-installed).

Tests: 30 new tests covering config, init, helpers, session source,
phone redaction, authorization, and send_message integration.

Co-authored-by: ibhagwan <ibhagwan@users.noreply.github.com>

											
										
										
											2026-03-08 20:20:35 -07:00
+								            Platform.SIGNAL: "SIGNAL_ALLOW_ALL_USERS",
-												feat: add email gateway platform (IMAP/SMTP)

Allow users to interact with Hermes by sending and receiving emails.
Uses IMAP polling for incoming messages and SMTP for replies with
proper threading (In-Reply-To, References headers).

Integrates with all 14 gateway extension points: config, adapter
factory, authorization, send_message tool, cron delivery, toolsets,
prompt hints, channel directory, setup wizard, status display, and
env example.

65 tests covering config, parsing, dispatch, threading, IMAP fetch,
SMTP send, attachments, and all integration points.

											
										
										
											2026-03-10 03:15:38 +03:00
+								            Platform.EMAIL: "EMAIL_ALLOW_ALL_USERS",
-												feat: enhance user authorization checks in GatewayRunner

- Updated the authorization logic to include a per-platform allow-all flag for improved flexibility.
- Revised the order of checks to prioritize platform-specific allow-all settings, followed by environment variable allowlists and DM pairing approvals.
- Added global allow-all configuration for broader access control.
- Improved handling of allowlists by stripping whitespace and ensuring valid entries are processed.

											
										
										
											2026-02-22 16:32:08 -08:00
+								        }
 								        # Per-platform allow-all flag (e.g., DISCORD_ALLOW_ALL_USERS=true)
 								        platform_allow_all_var = platform_allow_all_map.get(source.platform, "")
 								        if platform_allow_all_var and os.getenv(platform_allow_all_var, "").lower() in ("true", "1", "yes"):
 								            return True
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
+								        # Check pairing store (always checked, regardless of allowlists)
 								        platform_name = source.platform.value if source.platform else ""
 								        if self.pairing_store.is_approved(platform_name, user_id):
 								            return True
-												feat: enhance user authorization checks in GatewayRunner

- Updated the authorization logic to include a per-platform allow-all flag for improved flexibility.
- Revised the order of checks to prioritize platform-specific allow-all settings, followed by environment variable allowlists and DM pairing approvals.
- Added global allow-all configuration for broader access control.
- Improved handling of allowlists by stripping whitespace and ensuring valid entries are processed.

											
										
										
											2026-02-22 16:32:08 -08:00
 								        # Check platform-specific and global allowlists
 								        platform_allowlist = os.getenv(platform_env_map.get(source.platform, ""), "").strip()
 								        global_allowlist = os.getenv("GATEWAY_ALLOWED_USERS", "").strip()
-												Enhance messaging gateway configuration and security features

- Added new environment variables for Telegram and Discord bot configurations, including `TELEGRAM_ALLOWED_USERS` and `DISCORD_ALLOWED_USERS`, to restrict bot access to specific users.
- Updated documentation in AGENTS.md and README.md to include detailed setup instructions for the messaging gateway, emphasizing the importance of user allowlists for security.
- Improved the CLI setup wizard to prompt for allowed user IDs during configuration, enhancing user guidance and security awareness.
- Refined the gateway run script to support user authorization checks, ensuring only allowed users can interact with the bot.

											
										
										
											2026-02-03 10:46:23 -08:00
+								        if not platform_allowlist and not global_allowlist:
-												feat: enhance user authorization checks in GatewayRunner

- Updated the authorization logic to include a per-platform allow-all flag for improved flexibility.
- Revised the order of checks to prioritize platform-specific allow-all settings, followed by environment variable allowlists and DM pairing approvals.
- Added global allow-all configuration for broader access control.
- Improved handling of allowlists by stripping whitespace and ensuring valid entries are processed.

											
										
										
											2026-02-22 16:32:08 -08:00
+								            # No allowlists configured -- check global allow-all flag
 								            return os.getenv("GATEWAY_ALLOW_ALL_USERS", "").lower() in ("true", "1", "yes")
-												Enhance messaging gateway configuration and security features

- Added new environment variables for Telegram and Discord bot configurations, including `TELEGRAM_ALLOWED_USERS` and `DISCORD_ALLOWED_USERS`, to restrict bot access to specific users.
- Updated documentation in AGENTS.md and README.md to include detailed setup instructions for the messaging gateway, emphasizing the importance of user allowlists for security.
- Improved the CLI setup wizard to prompt for allowed user IDs during configuration, enhancing user guidance and security awareness.
- Refined the gateway run script to support user authorization checks, ensuring only allowed users can interact with the bot.

											
										
										
											2026-02-03 10:46:23 -08:00
+								        # Check if user is in any allowlist
 								        allowed_ids = set()
 								        if platform_allowlist:
-												feat: enhance user authorization checks in GatewayRunner

- Updated the authorization logic to include a per-platform allow-all flag for improved flexibility.
- Revised the order of checks to prioritize platform-specific allow-all settings, followed by environment variable allowlists and DM pairing approvals.
- Added global allow-all configuration for broader access control.
- Improved handling of allowlists by stripping whitespace and ensuring valid entries are processed.

											
										
										
											2026-02-22 16:32:08 -08:00
+								            allowed_ids.update(uid.strip() for uid in platform_allowlist.split(",") if uid.strip())
-												Enhance messaging gateway configuration and security features

- Added new environment variables for Telegram and Discord bot configurations, including `TELEGRAM_ALLOWED_USERS` and `DISCORD_ALLOWED_USERS`, to restrict bot access to specific users.
- Updated documentation in AGENTS.md and README.md to include detailed setup instructions for the messaging gateway, emphasizing the importance of user allowlists for security.
- Improved the CLI setup wizard to prompt for allowed user IDs during configuration, enhancing user guidance and security awareness.
- Refined the gateway run script to support user authorization checks, ensuring only allowed users can interact with the bot.

											
										
										
											2026-02-03 10:46:23 -08:00
+								        if global_allowlist:
-												feat: enhance user authorization checks in GatewayRunner

- Updated the authorization logic to include a per-platform allow-all flag for improved flexibility.
- Revised the order of checks to prioritize platform-specific allow-all settings, followed by environment variable allowlists and DM pairing approvals.
- Added global allow-all configuration for broader access control.
- Improved handling of allowlists by stripping whitespace and ensuring valid entries are processed.

											
										
										
											2026-02-22 16:32:08 -08:00
+								            allowed_ids.update(uid.strip() for uid in global_allowlist.split(",") if uid.strip())
-												add full support for whatsapp

											
										
										
											2026-02-25 21:04:36 -08:00
+								        # WhatsApp JIDs have @s.whatsapp.net suffix — strip it for comparison
 								        check_ids = {user_id}
 								        if "@" in user_id:
 								            check_ids.add(user_id.split("@")[0])
 								        return bool(check_ids & allowed_ids)
-												Enhance messaging gateway configuration and security features

- Added new environment variables for Telegram and Discord bot configurations, including `TELEGRAM_ALLOWED_USERS` and `DISCORD_ALLOWED_USERS`, to restrict bot access to specific users.
- Updated documentation in AGENTS.md and README.md to include detailed setup instructions for the messaging gateway, emphasizing the importance of user allowlists for security.
- Improved the CLI setup wizard to prompt for allowed user IDs during configuration, enhancing user guidance and security awareness.
- Refined the gateway run script to support user authorization checks, ensuring only allowed users can interact with the bot.

											
										
										
											2026-02-03 10:46:23 -08:00
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								    async def _handle_message(self, event: MessageEvent) -> Optional[str]:
 								        """
 								        Handle an incoming message from any platform.
 								        This is the core message processing pipeline:
-												Enhance messaging gateway configuration and security features

- Added new environment variables for Telegram and Discord bot configurations, including `TELEGRAM_ALLOWED_USERS` and `DISCORD_ALLOWED_USERS`, to restrict bot access to specific users.
- Updated documentation in AGENTS.md and README.md to include detailed setup instructions for the messaging gateway, emphasizing the importance of user allowlists for security.
- Improved the CLI setup wizard to prompt for allowed user IDs during configuration, enhancing user guidance and security awareness.
- Refined the gateway run script to support user authorization checks, ensuring only allowed users can interact with the bot.

											
										
										
											2026-02-03 10:46:23 -08:00
+. Check user authorization
 . Check for commands (/new, /reset, etc.)
-												Implement interrupt handling for agent and CLI input and persistent prompt line at bottom of CLI :)

- Enhanced the AIAgent class to support interrupt requests, allowing for graceful interruption of ongoing tasks and processing of new messages.
- Updated the HermesCLI to manage user input in a persistent manner, enabling real-time interruption of the agent's conversation.
- Introduced a mechanism in the GatewayRunner to handle incoming messages while an agent is running, allowing for immediate response to user commands.
- Improved overall user experience by providing feedback during interruptions and ensuring that pending messages are processed correctly.

											
										
										
											2026-02-03 16:15:49 -08:00
+. Check for running agent and interrupt if needed
 . Get or create session
 . Build context for agent
 . Run agent conversation
 . Return response
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        """
 								        source = event.source
-												Enhance messaging gateway configuration and security features

- Added new environment variables for Telegram and Discord bot configurations, including `TELEGRAM_ALLOWED_USERS` and `DISCORD_ALLOWED_USERS`, to restrict bot access to specific users.
- Updated documentation in AGENTS.md and README.md to include detailed setup instructions for the messaging gateway, emphasizing the importance of user allowlists for security.
- Improved the CLI setup wizard to prompt for allowed user IDs during configuration, enhancing user guidance and security awareness.
- Refined the gateway run script to support user authorization checks, ensuring only allowed users can interact with the bot.

											
										
										
											2026-02-03 10:46:23 -08:00
+								        # Check if user is authorized
 								        if not self._is_user_authorized(source):
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								            logger.warning("Unauthorized user: %s (%s) on %s", source.user_id, source.user_name, source.platform.value)
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
+								            # In DMs: offer pairing code. In groups: silently ignore.
 								            if source.chat_type == "dm":
 								                platform_name = source.platform.value if source.platform else "unknown"
 								                code = self.pairing_store.generate_code(
 								                    platform_name, source.user_id, source.user_name or ""
 								                )
 								                if code:
 								                    adapter = self.adapters.get(source.platform)
 								                    if adapter:
 								                        await adapter.send(
 								                            source.chat_id,
 								                            f"Hi~ I don't recognize you yet!\n\n"
 								                            f"Here's your pairing code: `{code}`\n\n"
 								                            f"Ask the bot owner to run:\n"
 								                            f"`hermes pairing approve {platform_name} {code}`"
 								                        )
 								                else:
 								                    adapter = self.adapters.get(source.platform)
 								                    if adapter:
 								                        await adapter.send(
 								                            source.chat_id,
 								                            "Too many pairing requests right now~ "
 								                            "Please try again later!"
 								                        )
 								            return None
-												Enhance messaging gateway configuration and security features

- Added new environment variables for Telegram and Discord bot configurations, including `TELEGRAM_ALLOWED_USERS` and `DISCORD_ALLOWED_USERS`, to restrict bot access to specific users.
- Updated documentation in AGENTS.md and README.md to include detailed setup instructions for the messaging gateway, emphasizing the importance of user allowlists for security.
- Improved the CLI setup wizard to prompt for allowed user IDs during configuration, enhancing user guidance and security awareness.
- Refined the gateway run script to support user authorization checks, ensuring only allowed users can interact with the bot.

											
										
										
											2026-02-03 10:46:23 -08:00
-												feat: enhance interrupt handling and container resource configuration

- Introduced a shared interrupt signaling mechanism to allow tools to check for user interrupts during long-running operations.
- Updated the AIAgent to handle interrupts more effectively, ensuring in-progress tool calls are canceled and multiple interrupt messages are combined into one prompt.
- Enhanced the CLI configuration to include container resource limits (CPU, memory, disk) and persistence options for Docker, Singularity, and Modal environments.
- Improved documentation to clarify interrupt behaviors and container resource settings, providing users with better guidance on configuration and usage.

											
										
										
											2026-02-23 02:11:33 -08:00
+								        # PRIORITY: If an agent is already running for this session, interrupt it
 								        # immediately. This is before command parsing to minimize latency -- the
 								        # user's "stop" message reaches the agent as fast as possible.
-												refactor: extract build_session_key() as single source of truth

The session key construction logic was duplicated in 4 places
(session.py + 3 inline copies in run.py), which is exactly the
kind of drift that caused issue #349 in the first place.

Extracted build_session_key() as a public function in session.py.
SessionStore._generate_session_key() now delegates to it, and all
inline key construction in run.py has been replaced with calls to
the shared function. Tests updated to test the function directly.

											
										
										
											2026-03-04 03:34:45 -08:00
+								        _quick_key = build_session_key(source)
-												feat: enhance interrupt handling and container resource configuration

- Introduced a shared interrupt signaling mechanism to allow tools to check for user interrupts during long-running operations.
- Updated the AIAgent to handle interrupts more effectively, ensuring in-progress tool calls are canceled and multiple interrupt messages are combined into one prompt.
- Enhanced the CLI configuration to include container resource limits (CPU, memory, disk) and persistence options for Docker, Singularity, and Modal environments.
- Improved documentation to clarify interrupt behaviors and container resource settings, providing users with better guidance on configuration and usage.

											
										
										
											2026-02-23 02:11:33 -08:00
+								        if _quick_key in self._running_agents:
 								            running_agent = self._running_agents[_quick_key]
 								            logger.debug("PRIORITY interrupt for session %s", _quick_key[:20])
 								            running_agent.interrupt(event.text)
 								            if _quick_key in self._pending_messages:
 								                self._pending_messages[_quick_key] += "\n" + event.text
 								            else:
 								                self._pending_messages[_quick_key] = event.text
 								            return None
-												Implement interrupt handling for agent and CLI input and persistent prompt line at bottom of CLI :)

- Enhanced the AIAgent class to support interrupt requests, allowing for graceful interruption of ongoing tasks and processing of new messages.
- Updated the HermesCLI to manage user input in a persistent manner, enabling real-time interruption of the agent's conversation.
- Introduced a mechanism in the GatewayRunner to handle incoming messages while an agent is running, allowing for immediate response to user commands.
- Improved overall user experience by providing feedback during interruptions and ensuring that pending messages are processed correctly.

											
										
										
											2026-02-03 16:15:49 -08:00
+								        # Check for commands
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        command = event.get_command()
-												feat(hooks): introduce event hooks system for lifecycle management

Add a new hooks system allowing users to run custom code at key lifecycle points in the agent's operation. This includes support for events such as `gateway:startup`, `session:start`, `agent:step`, and more. Documentation for creating hooks and available events has been added to `README.md` and a new `hooks.md` file. Additionally, integrate step callbacks in the agent to facilitate hook execution during tool-calling iterations.

											
										
										
											2026-02-28 17:09:26 -08:00
 								        # Emit command:* hook for any recognized slash command
 								        _known_commands = {"new", "reset", "help", "status", "stop", "model",
-												feat(gateway): add /compress and /usage commands for conversation management

Implemented the /compress command to allow users to manually compress conversation context, ensuring sufficient history is available before execution. The /usage command was also added to display token usage statistics for the current session, including prompt and completion tokens. Updated command documentation to reflect these new features.

											
										
										
											2026-03-01 00:25:44 -08:00
+								                          "personality", "retry", "undo", "sethome", "set-home",
-												feat: register remaining commands with platform menus

Telegram: add /insights, /update, /reload_mcp (underscore variant since
Telegram BotCommand names don't allow hyphens).

Discord: add /insights (with days parameter), /reload-mcp.

Also add reload_mcp as an alias for reload-mcp in the gateway command
dispatcher so Telegram's underscore form works, and add resume/provider
to the _known_commands set for hook emission.

											
										
										
											2026-03-08 17:13:45 -07:00
+								                          "compress", "usage", "insights", "reload-mcp", "reload_mcp",
-												feat: add /background command to gateway and CLI commands registry

Add /background <prompt> to the gateway, allowing users on Telegram,
Discord, Slack, etc. to fire off a prompt in a separate agent session.
The result is delivered back to the same chat when done, without
modifying the active conversation history.

Implementation:
- _handle_background_command: validates input, spawns asyncio task
- _run_background_task: creates AIAgent in executor thread, delivers
  result (text, images, media files) back via the platform adapter
- Inherits model, toolsets, provider routing from gateway config
- Error handling with user-visible failure messages

Also adds /background to hermes_cli/commands.py registry so it
appears in /help and autocomplete.

Tests: 15 new tests covering usage, task creation, uniqueness,
multi-platform, error paths, and help/autocomplete integration.

											
										
										
											2026-03-11 02:41:36 -07:00
+								                          "update", "title", "resume", "provider", "rollback",
-												fix: /reasoning command — add gateway support, fix display, persist settings (#1031)

* fix: /reasoning command output ordering, display, and inline think extraction

Three issues with the /reasoning command:

1. Output interleaving: The command echo used print() while feedback
   used _cprint(), causing them to render out-of-order under
   prompt_toolkit's patch_stdout. Changed echo to use _cprint() so
   all output renders through the same path in correct order.

2. Reasoning display not working: /reasoning show toggled a flag
   but reasoning never appeared for models that embed thinking in
   inline <think> blocks rather than structured API fields. Added
   fallback extraction in _build_assistant_message to capture
   <think> block content as reasoning when no structured reasoning
   fields (reasoning, reasoning_content, reasoning_details) are
   present. This feeds into both the reasoning callback (during
   tool loops) and the post-response reasoning box display.

3. Feedback clarity: Added checkmarks to confirm actions, persisted
   show/hide to config (was session-only before), and aligned the
   status display for readability.

Tests: 7 new tests for inline think block extraction (41 total).

* feat: add /reasoning command to gateway (Telegram/Discord/etc)

The /reasoning command only existed in the CLI — messaging platforms
had no way to view or change reasoning settings. This adds:

1. /reasoning command handler in the gateway:
   - No args: shows current effort level and display state
   - /reasoning <level>: sets reasoning effort (none/low/medium/high/xhigh)
   - /reasoning show|hide: toggles reasoning display in responses
   - All changes saved to config.yaml immediately

2. Reasoning display in gateway responses:
   - When show_reasoning is enabled, prepends a 'Reasoning' block
     with the model's last_reasoning content before the response
   - Collapses long reasoning (>15 lines) to keep messages readable
   - Uses last_reasoning from run_conversation result dict

3. Plumbing:
   - Added _show_reasoning attribute loaded from config at startup
   - Propagated last_reasoning through _run_agent return dict
   - Added /reasoning to help text and known_commands set
   - Uses getattr for _show_reasoning to handle test stubs
											
										
										
											2026-03-12 05:38:19 -07:00
+								                          "background", "reasoning"}
-												feat(hooks): introduce event hooks system for lifecycle management

Add a new hooks system allowing users to run custom code at key lifecycle points in the agent's operation. This includes support for events such as `gateway:startup`, `session:start`, `agent:step`, and more. Documentation for creating hooks and available events has been added to `README.md` and a new `hooks.md` file. Additionally, integrate step callbacks in the agent to facilitate hook execution during tool-calling iterations.

											
										
										
											2026-02-28 17:09:26 -08:00
+								        if command and command in _known_commands:
 								            await self.hooks.emit(f"command:{command}", {
 								                "platform": source.platform.value if source.platform else "",
 								                "user_id": source.user_id,
 								                "command": command,
 								                "args": event.get_command_args().strip(),
 								            })
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        if command in ["new", "reset"]:
 								            return await self._handle_reset_command(event)
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
+								        if command == "help":
 								            return await self._handle_help_command(event)
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        if command == "status":
 								            return await self._handle_status_command(event)
-												Implement interrupt handling for agent and CLI input and persistent prompt line at bottom of CLI :)

- Enhanced the AIAgent class to support interrupt requests, allowing for graceful interruption of ongoing tasks and processing of new messages.
- Updated the HermesCLI to manage user input in a persistent manner, enabling real-time interruption of the agent's conversation.
- Introduced a mechanism in the GatewayRunner to handle incoming messages while an agent is running, allowing for immediate response to user commands.
- Improved overall user experience by providing feedback during interruptions and ensuring that pending messages are processed correctly.

											
										
										
											2026-02-03 16:15:49 -08:00
+								        if command == "stop":
 								            return await self._handle_stop_command(event)
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
+								        if command == "model":
 								            return await self._handle_model_command(event)
-												feat: /provider command + fix gateway bugs + harden parse_model_input

/provider command (CLI + gateway):
  Shows all providers with auth status (✓/✗), aliases, and active marker.
  Users can now discover what provider names work with provider:model syntax.

Gateway bugs fixed:
  - Config was saved even when validation.persist=False (told user 'session
    only' but actually persisted the unvalidated model)
  - HERMES_INFERENCE_PROVIDER env var not set on provider switch, causing
    the switch to be silently overridden if that env var was already set

parse_model_input hardened:
  - Colon only treated as provider delimiter if left side is a recognized
    provider name or alias. 'anthropic/claude-3.5-sonnet:beta' now passes
    through as a model name instead of trying provider='anthropic/claude-3.5-sonnet'.
  - HTTP URLs, random colons no longer misinterpreted.

56 tests passing across model validation, CLI commands, and integration.

											
										
										
											2026-03-08 06:09:36 -07:00
+								        if command == "provider":
 								            return await self._handle_provider_command(event)
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
+								        if command == "personality":
 								            return await self._handle_personality_command(event)
 								        if command == "retry":
 								            return await self._handle_retry_command(event)
 								        if command == "undo":
 								            return await self._handle_undo_command(event)
-												feat: unify set-home command naming across platforms

- Updated the command name from `/set-home` to `/sethome` in the GatewayRunner class for consistency.
- Added a new slash command `/sethome` in the Discord adapter to set the home channel.
- Registered the `/sethome` command in the Telegram adapter to align with the updated naming convention.

											
										
										
											2026-02-23 15:01:22 -08:00
+								        if command in ["sethome", "set-home"]:
-												feat: implement channel directory and message mirroring for cross-platform communication

- Introduced a new channel directory to cache reachable channels/contacts for messaging platforms, enhancing the send_message tool's ability to resolve human-friendly names to numeric IDs.
- Added functionality to mirror sent messages into the target's session transcript, providing context for cross-platform message delivery.
- Updated the send_message tool to support listing available targets and improved error handling for channel resolution.
- Enhanced the gateway to build and refresh the channel directory during startup and at regular intervals, ensuring up-to-date channel information.

											
										
										
											2026-02-22 20:44:15 -08:00
+								            return await self._handle_set_home_command(event)
-												feat(gateway): add /compress and /usage commands for conversation management

Implemented the /compress command to allow users to manually compress conversation context, ensuring sufficient history is available before execution. The /usage command was also added to display token usage statistics for the current session, including prompt and completion tokens. Updated command documentation to reflect these new features.

											
										
										
											2026-03-01 00:25:44 -08:00
 								        if command == "compress":
 								            return await self._handle_compress_command(event)
 								        if command == "usage":
 								            return await self._handle_usage_command(event)
-												feat(mcp): banner integration, /reload-mcp command, resources & prompts

Banner integration:
- MCP Servers section in CLI startup banner between Tools and Skills
- Shows each server with transport type, tool count, connection status
- Failed servers shown in red; section hidden when no MCP configured
- Summary line includes MCP server count
- Removed raw print() calls from discovery (banner handles display)

/reload-mcp command:
- New slash command in both CLI and gateway
- Disconnects all MCP servers, re-reads config.yaml, reconnects
- Reports what changed (added/removed/reconnected servers)
- Allows adding/removing MCP servers without restarting

Resources & Prompts support:
- 4 utility tools registered per server: list_resources, read_resource,
  list_prompts, get_prompt
- Exposes MCP Resources (data sources) and Prompts (templates) as tools
- Proper parameter schemas (uri for read_resource, name for get_prompt)
- Handles text and binary resource content
- 23 new tests covering schemas, handlers, and registration

Test coverage: 74 MCP tests total, 1186 tests pass overall.

											
										
										
											2026-03-02 19:15:59 -08:00
-												feat: add /insights command with usage analytics and cost estimation

Inspired by Claude Code's /insights, adapted for Hermes Agent's multi-platform
architecture. Analyzes session history from state.db to produce comprehensive
usage insights.

Features:
- Overview stats: sessions, messages, tokens, estimated cost, active time
- Model breakdown: per-model sessions, tokens, and cost estimation
- Platform breakdown: CLI vs Telegram vs Discord etc. (unique to Hermes)
- Tool usage ranking: most-used tools with percentages
- Activity patterns: day-of-week chart, peak hours, streaks
- Notable sessions: longest, most messages, most tokens, most tool calls
- Cost estimation: real pricing data for 25+ models (OpenAI, Anthropic,
  DeepSeek, Google, Meta) with fuzzy model name matching
- Configurable time window: --days flag (default 30)
- Source filtering: --source flag to filter by platform

Three entry points:
- /insights slash command in CLI (supports --days and --source flags)
- /insights slash command in gateway (compact markdown format)
- hermes insights CLI subcommand (standalone)

Includes 56 tests covering pricing helpers, format helpers, empty DB,
populated DB with multi-platform data, filtering, formatting, and edge cases.

											
										
										
											2026-03-06 14:04:59 -08:00
+								        if command == "insights":
 								            return await self._handle_insights_command(event)
-												feat: register remaining commands with platform menus

Telegram: add /insights, /update, /reload_mcp (underscore variant since
Telegram BotCommand names don't allow hyphens).

Discord: add /insights (with days parameter), /reload-mcp.

Also add reload_mcp as an alias for reload-mcp in the gateway command
dispatcher so Telegram's underscore form works, and add resume/provider
to the _known_commands set for hook emission.

											
										
										
											2026-03-08 17:13:45 -07:00
+								        if command in ("reload-mcp", "reload_mcp"):
-												feat(mcp): banner integration, /reload-mcp command, resources & prompts

Banner integration:
- MCP Servers section in CLI startup banner between Tools and Skills
- Shows each server with transport type, tool count, connection status
- Failed servers shown in red; section hidden when no MCP configured
- Summary line includes MCP server count
- Removed raw print() calls from discovery (banner handles display)

/reload-mcp command:
- New slash command in both CLI and gateway
- Disconnects all MCP servers, re-reads config.yaml, reconnects
- Reports what changed (added/removed/reconnected servers)
- Allows adding/removing MCP servers without restarting

Resources & Prompts support:
- 4 utility tools registered per server: list_resources, read_resource,
  list_prompts, get_prompt
- Exposes MCP Resources (data sources) and Prompts (templates) as tools
- Proper parameter schemas (uri for read_resource, name for get_prompt)
- Handles text and binary resource content
- 23 new tests covering schemas, handlers, and registration

Test coverage: 74 MCP tests total, 1186 tests pass overall.

											
										
										
											2026-03-02 19:15:59 -08:00
+								            return await self._handle_reload_mcp_command(event)
-												feat: add /update slash command for gateway platforms

Adds a /update command to Telegram, Discord, and other gateway platforms
that runs `hermes update` to pull the latest code, update dependencies,
sync skills, and restart the gateway.

Implementation:
- Spawns `hermes update` in a separate systemd scope (systemd-run --user
  --scope) so the process survives the gateway restart that hermes update
  triggers at the end. Falls back to nohup if systemd-run is unavailable.
- Writes a marker file (.update_pending.json) with the originating
  platform and chat_id before spawning the update.
- On gateway startup, _send_update_notification() checks for the marker,
  reads the captured update output, sends the results back to the user,
  and cleans up.

Also:
- Registers /update as a Discord slash command
- Updates README.md, docs/messaging.md, docs/slash-commands.md
- Adds 18 tests covering handler, notification, and edge cases

											
										
										
											2026-03-05 01:20:58 -08:00
 								        if command == "update":
 								            return await self._handle_update_command(event)
-												fix: harden session title system + add /title to gateway

- Empty string titles normalized to None (prevents uncaught IntegrityError
  when two sessions both get empty-string titles via the unique index)
- Escape SQL LIKE wildcards (%, _) in resolve_session_by_title and
  get_next_title_in_lineage to prevent false matches on titles like
  'test_project' matching 'testXproject #2'
- Optimize list_sessions_rich from N+2 queries to a single query with
  correlated subqueries (preview + last_active computed in SQL)
- Add /title slash command to gateway (Telegram, Discord, Slack, WhatsApp)
  with set and show modes, uniqueness conflict handling
- Add /title to gateway /help text and _known_commands
- 12 new tests: empty string normalization, multi-empty-title safety,
  SQL wildcard edge cases, gateway /title set/show/conflict/cross-platform

											
										
										
											2026-03-08 15:48:09 -07:00
 								        if command == "title":
 								            return await self._handle_title_command(event)
-												feat: add /resume command to gateway for switching to named sessions

Messaging users can now switch back to previously-named sessions:
- /resume My Project  — resolves the title (with auto-lineage) and
  restores that session's conversation history
- /resume (no args)   — lists recent titled sessions to choose from

Adds SessionStore.switch_session() which ends the current session and
points the session entry at the target session ID so the old transcript
is loaded on the next message. Running agents are cleared on switch.

Completes the session naming feature from PR #720 for gateway users.

8 new tests covering: name resolution, lineage auto-latest, already-on-
session check, nonexistent names, agent cleanup, no-DB fallback, and
listing titled sessions.

											
										
										
											2026-03-08 17:09:00 -07:00
 								        if command == "resume":
 								            return await self._handle_resume_command(event)
-												feat: filesystem checkpoints and /rollback command

Automatic filesystem snapshots before destructive file operations,
with user-facing rollback.  Inspired by PR #559 (by @alireza78a).

Architecture:
- Shadow git repos at ~/.hermes/checkpoints/{hash}/ via GIT_DIR
- CheckpointManager: take/list/restore, turn-scoped dedup, pruning
- Transparent — the LLM never sees it, no tool schema, no tokens
- Once per turn — only first write_file/patch triggers a snapshot

Integration:
- Config: checkpoints.enabled + checkpoints.max_snapshots
- CLI flag: hermes --checkpoints
- Trigger: run_agent.py _execute_tool_calls() before write_file/patch
- /rollback slash command in CLI + gateway (list, restore by number)
- Pre-rollback snapshot auto-created on restore (undo the undo)

Safety:
- Never blocks file operations — all errors silently logged
- Skips root dir, home dir, dirs >50K files
- Disables gracefully when git not installed
- Shadow repo completely isolated from project git

Tests: 35 new tests, all passing (2798 total suite)
Docs: feature page, config reference, CLI commands reference

											
										
										
											2026-03-10 00:49:15 -07:00
 								        if command == "rollback":
 								            return await self._handle_rollback_command(event)
-												feat: add /background command to gateway and CLI commands registry

Add /background <prompt> to the gateway, allowing users on Telegram,
Discord, Slack, etc. to fire off a prompt in a separate agent session.
The result is delivered back to the same chat when done, without
modifying the active conversation history.

Implementation:
- _handle_background_command: validates input, spawns asyncio task
- _run_background_task: creates AIAgent in executor thread, delivers
  result (text, images, media files) back via the platform adapter
- Inherits model, toolsets, provider routing from gateway config
- Error handling with user-visible failure messages

Also adds /background to hermes_cli/commands.py registry so it
appears in /help and autocomplete.

Tests: 15 new tests covering usage, task creation, uniqueness,
multi-platform, error paths, and help/autocomplete integration.

											
										
										
											2026-03-11 02:41:36 -07:00
 								        if command == "background":
 								            return await self._handle_background_command(event)
-												fix: /reasoning command — add gateway support, fix display, persist settings (#1031)

* fix: /reasoning command output ordering, display, and inline think extraction

Three issues with the /reasoning command:

1. Output interleaving: The command echo used print() while feedback
   used _cprint(), causing them to render out-of-order under
   prompt_toolkit's patch_stdout. Changed echo to use _cprint() so
   all output renders through the same path in correct order.

2. Reasoning display not working: /reasoning show toggled a flag
   but reasoning never appeared for models that embed thinking in
   inline <think> blocks rather than structured API fields. Added
   fallback extraction in _build_assistant_message to capture
   <think> block content as reasoning when no structured reasoning
   fields (reasoning, reasoning_content, reasoning_details) are
   present. This feeds into both the reasoning callback (during
   tool loops) and the post-response reasoning box display.

3. Feedback clarity: Added checkmarks to confirm actions, persisted
   show/hide to config (was session-only before), and aligned the
   status display for readability.

Tests: 7 new tests for inline think block extraction (41 total).

* feat: add /reasoning command to gateway (Telegram/Discord/etc)

The /reasoning command only existed in the CLI — messaging platforms
had no way to view or change reasoning settings. This adds:

1. /reasoning command handler in the gateway:
   - No args: shows current effort level and display state
   - /reasoning <level>: sets reasoning effort (none/low/medium/high/xhigh)
   - /reasoning show|hide: toggles reasoning display in responses
   - All changes saved to config.yaml immediately

2. Reasoning display in gateway responses:
   - When show_reasoning is enabled, prepends a 'Reasoning' block
     with the model's last_reasoning content before the response
   - Collapses long reasoning (>15 lines) to keep messages readable
   - Uses last_reasoning from run_conversation result dict

3. Plumbing:
   - Added _show_reasoning attribute loaded from config at startup
   - Propagated last_reasoning through _run_agent return dict
   - Added /reasoning to help text and known_commands set
   - Uses getattr for _show_reasoning to handle test stubs
											
										
										
											2026-03-12 05:38:19 -07:00
 								        if command == "reasoning":
 								            return await self._handle_reasoning_command(event)
-												feat: implement channel directory and message mirroring for cross-platform communication

- Introduced a new channel directory to cache reachable channels/contacts for messaging platforms, enhancing the send_message tool's ability to resolve human-friendly names to numeric IDs.
- Added functionality to mirror sent messages into the target's session transcript, providing context for cross-platform message delivery.
- Updated the send_message tool to support listing available targets and improved error handling for channel resolution.
- Enhanced the gateway to build and refresh the channel directory during startup and at regular intervals, ensuring up-to-date channel information.

											
										
										
											2026-02-22 20:44:15 -08:00
-												feat(cli,gateway): add user-defined quick commands that bypass agent loop

Implements config-driven quick commands for both CLI and gateway that
execute locally without invoking the LLM.

Config example (~/.hermes/config.yaml):
  quick_commands:
    limits:
      type: exec
      command: /home/user/.local/bin/hermes-limits
    dn:
      type: exec
      command: echo daily-note

Changes:
- hermes_cli/config.py: add quick_commands: {} default
- cli.py: check quick_commands before skill commands in process_command()
- gateway/run.py: check quick_commands before skill commands in _handle_message()
- tests/test_quick_commands.py: 11 tests covering exec, timeout, unsupported type, missing command, priority over skills

Closes #744

											
										
										
											2026-03-09 07:38:06 +03:00
+								        # User-defined quick commands (bypass agent loop, no LLM call)
 								        if command:
 								            quick_commands = self.config.get("quick_commands", {})
 								            if command in quick_commands:
 								                qcmd = quick_commands[command]
 								                if qcmd.get("type") == "exec":
 								                    exec_cmd = qcmd.get("command", "")
 								                    if exec_cmd:
 								                        try:
 								                            proc = await asyncio.create_subprocess_shell(
 								                                exec_cmd,
 								                                stdout=asyncio.subprocess.PIPE,
 								                                stderr=asyncio.subprocess.PIPE,
 								                            )
 								                            stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=30)
 								                            output = (stdout or stderr).decode().strip()
 								                            return output if output else "Command returned no output."
 								                        except asyncio.TimeoutError:
 								                            return "Quick command timed out (30s)."
 								                        except Exception as e:
 								                            return f"Quick command error: {e}"
 								                    else:
 								                        return f"Quick command '/{command}' has no command defined."
 								                else:
 								                    return f"Quick command '/{command}' has unsupported type (only 'exec' is supported)."
-												feat(skills): implement dynamic skill slash commands for CLI and gateway

											
										
										
											2026-02-28 11:18:50 -08:00
+								        # Skill slash commands: /skill-name loads the skill and sends to agent
 								        if command:
 								            try:
 								                from agent.skill_commands import get_skill_commands, build_skill_invocation_message
 								                skill_cmds = get_skill_commands()
 								                cmd_key = f"/{command}"
 								                if cmd_key in skill_cmds:
 								                    user_instruction = event.get_command_args().strip()
-												feat: secure skill env setup on load (core #688)

When a skill declares required_environment_variables in its YAML
frontmatter, missing env vars trigger a secure TUI prompt (identical
to the sudo password widget) when the skill is loaded. Secrets flow
directly to ~/.hermes/.env, never entering LLM context.

Key changes:
- New required_environment_variables frontmatter field for skills
- Secure TUI widget (masked input, 120s timeout)
- Gateway safety: messaging platforms show local setup guidance
- Legacy prerequisites.env_vars normalized into new format
- Remote backend handling: conservative setup_needed=True
- Env var name validation, file permissions hardened to 0o600
- Redact patterns extended for secret-related JSON fields
- 12 existing skills updated with prerequisites declarations
- ~48 new tests covering skip, timeout, gateway, remote backends
- Dynamic panel widget sizing (fixes hardcoded width from original PR)

Cherry-picked from PR #723 by kshitijk4poor, rebased onto current main
with conflict resolution.

Fixes #688

Co-authored-by: kshitijk4poor <kshitijk4poor@users.noreply.github.com>

											
										
										
											2026-03-13 03:14:04 -07:00
+								                    msg = build_skill_invocation_message(
 								                        cmd_key, user_instruction, task_id=session_key
 								                    )
-												feat(skills): implement dynamic skill slash commands for CLI and gateway

											
										
										
											2026-02-28 11:18:50 -08:00
+								                    if msg:
 								                        event.text = msg
 								                        # Fall through to normal message processing with skill content
 								            except Exception as e:
 								                logger.debug("Skill command check failed (non-fatal): %s", e)
-												feat: implement channel directory and message mirroring for cross-platform communication

- Introduced a new channel directory to cache reachable channels/contacts for messaging platforms, enhancing the send_message tool's ability to resolve human-friendly names to numeric IDs.
- Added functionality to mirror sent messages into the target's session transcript, providing context for cross-platform message delivery.
- Updated the send_message tool to support listing available targets and improved error handling for channel resolution.
- Enhanced the gateway to build and refresh the channel directory during startup and at regular intervals, ensuring up-to-date channel information.

											
										
										
											2026-02-22 20:44:15 -08:00
-												Add Text-to-Speech (TTS) functionality with multiple providers

Add tool previews

Add AGENTS and SOUL.md support

Add Exec Approval

											
										
										
											2026-02-12 10:05:08 -08:00
+								        # Check for pending exec approval responses
-												refactor: extract build_session_key() as single source of truth

The session key construction logic was duplicated in 4 places
(session.py + 3 inline copies in run.py), which is exactly the
kind of drift that caused issue #349 in the first place.

Extracted build_session_key() as a public function in session.py.
SessionStore._generate_session_key() now delegates to it, and all
inline key construction in run.py has been replaced with calls to
the shared function. Tests updated to test the function directly.

											
										
										
											2026-03-04 03:34:45 -08:00
+								        session_key_preview = build_session_key(source)
-												Add Text-to-Speech (TTS) functionality with multiple providers

Add tool previews

Add AGENTS and SOUL.md support

Add Exec Approval

											
										
										
											2026-02-12 10:05:08 -08:00
+								        if session_key_preview in self._pending_approvals:
 								            user_text = event.text.strip().lower()
 								            if user_text in ("yes", "y", "approve", "ok", "go", "do it"):
 								                approval = self._pending_approvals.pop(session_key_preview)
 								                cmd = approval["command"]
-												feat(security): add tirith pre-exec command scanning

Integrate tirith as a pre-execution security scanner that detects
homograph URLs, pipe-to-interpreter patterns, terminal injection,
zero-width Unicode, and environment variable manipulation — threats
the existing 50-pattern dangerous command detector doesn't cover.

Architecture: gather-then-decide — both tirith and the dangerous
command detector run before any approval prompt, preventing gateway
force=True replay from bypassing one check when only the other was
shown to the user.

New files:
- tools/tirith_security.py: subprocess wrapper with auto-installer,
  mandatory cosign provenance verification, non-blocking background
  download, disk-persistent failure markers with retryable-cause
  tracking (cosign_missing auto-clears when cosign appears on PATH)
- tests/tools/test_tirith_security.py: 62 tests covering exit code
  mapping, fail_open, cosign verification, background install,
  HERMES_HOME isolation, and failure recovery
- tests/tools/test_command_guards.py: 21 integration tests for the
  combined guard orchestration

Modified files:
- tools/approval.py: add check_all_command_guards() orchestrator,
  add allow_permanent parameter to prompt_dangerous_approval()
- tools/terminal_tool.py: replace _check_dangerous_command with
  consolidated check_all_command_guards
- cli.py: update _approval_callback for allow_permanent kwarg,
  call ensure_installed() at startup
- gateway/run.py: iterate pattern_keys list on replay approval,
  call ensure_installed() at startup
- hermes_cli/config.py: add security config defaults, split
  commented sections for independent fallback
- cli-config.yaml.example: document tirith security config

											
										
										
											2026-03-11 14:20:32 +05:30
+								                pattern_keys = approval.get("pattern_keys", [])
 								                if not pattern_keys:
 								                    pk = approval.get("pattern_key", "")
 								                    pattern_keys = [pk] if pk else []
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								                logger.info("User approved dangerous command: %s...", cmd[:60])
-												refactor: deduplicate toolsets, unify async bridging, fix approval race condition, harden security

- Replace 4 copy-pasted messaging platform toolsets with shared _HERMES_CORE_TOOLS list
- Consolidate 5 ad-hoc async-bridging patterns into single _run_async() in model_tools.py
  - Removes deprecated get_event_loop()/set_event_loop() calls
  - Makes all tool handlers self-protecting regardless of caller's event loop state
  - RL handler refactored from if/elif chain to dispatch dict
- Fix exec approval race condition: replace module-level globals with thread-safe
  per-session tools/approval.py (submit_pending, pop_pending, approve_session, is_approved)
  - Session A approving "rm" no longer approves it for all other sessions
- Fix config deep merge: user overriding tts.elevenlabs.voice_id no longer clobbers
  tts.elevenlabs.model_id; migration detection now recurses to arbitrary depth
- Gateway default-deny: unauthenticated users denied unless GATEWAY_ALLOW_ALL_USERS=true
- Add 10 dangerous command patterns: rm --recursive, bash -c, python -e, curl|bash,
  xargs rm, find -delete
- Sanitize gateway error messages: users see generic message, full traceback goes to logs

											
										
										
											2026-02-21 18:28:49 -08:00
+								                from tools.terminal_tool import terminal_tool
 								                from tools.approval import approve_session
-												feat(security): add tirith pre-exec command scanning

Integrate tirith as a pre-execution security scanner that detects
homograph URLs, pipe-to-interpreter patterns, terminal injection,
zero-width Unicode, and environment variable manipulation — threats
the existing 50-pattern dangerous command detector doesn't cover.

Architecture: gather-then-decide — both tirith and the dangerous
command detector run before any approval prompt, preventing gateway
force=True replay from bypassing one check when only the other was
shown to the user.

New files:
- tools/tirith_security.py: subprocess wrapper with auto-installer,
  mandatory cosign provenance verification, non-blocking background
  download, disk-persistent failure markers with retryable-cause
  tracking (cosign_missing auto-clears when cosign appears on PATH)
- tests/tools/test_tirith_security.py: 62 tests covering exit code
  mapping, fail_open, cosign verification, background install,
  HERMES_HOME isolation, and failure recovery
- tests/tools/test_command_guards.py: 21 integration tests for the
  combined guard orchestration

Modified files:
- tools/approval.py: add check_all_command_guards() orchestrator,
  add allow_permanent parameter to prompt_dangerous_approval()
- tools/terminal_tool.py: replace _check_dangerous_command with
  consolidated check_all_command_guards
- cli.py: update _approval_callback for allow_permanent kwarg,
  call ensure_installed() at startup
- gateway/run.py: iterate pattern_keys list on replay approval,
  call ensure_installed() at startup
- hermes_cli/config.py: add security config defaults, split
  commented sections for independent fallback
- cli-config.yaml.example: document tirith security config

											
										
										
											2026-03-11 14:20:32 +05:30
+								                for pk in pattern_keys:
 								                    approve_session(session_key_preview, pk)
-												Add Text-to-Speech (TTS) functionality with multiple providers

Add tool previews

Add AGENTS and SOUL.md support

Add Exec Approval

											
										
										
											2026-02-12 10:05:08 -08:00
+								                result = terminal_tool(command=cmd, force=True)
 								                return f"✅ Command approved and executed.\n\n```\n{result[:3500]}\n```"
 								            elif user_text in ("no", "n", "deny", "cancel", "nope"):
 								                self._pending_approvals.pop(session_key_preview)
 								                return "❌ Command denied."
-												feat: add 'View full command' option to dangerous command approval (#887)

When a dangerous command is detected and the user is prompted for
approval, long commands are truncated (80 chars in fallback, 70 chars
in the TUI). Users had no way to see the full command before deciding.

This adds a 'View full command' option across all approval interfaces:

- CLI fallback (tools/approval.py): [v]iew option in the prompt menu.
  Shows the full command and re-prompts for approval decision.
- CLI TUI (cli.py): 'Show full command' choice in the arrow-key
  selection panel. Expands the command display in-place and removes
  the view option after use.
- CLI callbacks (callbacks.py): 'view' choice added to the list when
  the command exceeds 70 characters.
- Gateway (gateway/run.py): 'full', 'show', 'view' responses reveal
  the complete command while keeping the approval pending.

Includes 7 new tests covering view-then-approve, view-then-deny,
short command fallthrough, and double-view behavior.

Closes community feedback about the 80-char cap on dangerous commands.
											
										
										
											2026-03-12 06:27:21 -07:00
+								            elif user_text in ("full", "show", "view", "show full", "view full"):
 								                # Show full command without consuming the approval
 								                cmd = self._pending_approvals[session_key_preview]["command"]
 								                return f"Full command:\n\n```\n{cmd}\n```\n\nReply yes/no to approve or deny."
-												Add Text-to-Speech (TTS) functionality with multiple providers

Add tool previews

Add AGENTS and SOUL.md support

Add Exec Approval

											
										
										
											2026-02-12 10:05:08 -08:00
+								            # If it's not clearly an approval/denial, fall through to normal processing
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        # Get or create session
 								        session_entry = self.session_store.get_or_create_session(source)
-												Implement interrupt handling for agent and CLI input and persistent prompt line at bottom of CLI :)

- Enhanced the AIAgent class to support interrupt requests, allowing for graceful interruption of ongoing tasks and processing of new messages.
- Updated the HermesCLI to manage user input in a persistent manner, enabling real-time interruption of the agent's conversation.
- Introduced a mechanism in the GatewayRunner to handle incoming messages while an agent is running, allowing for immediate response to user commands.
- Improved overall user experience by providing feedback during interruptions and ensuring that pending messages are processed correctly.

											
										
										
											2026-02-03 16:15:49 -08:00
+								        session_key = session_entry.session_key
-												feat(hooks): introduce event hooks system for lifecycle management

Add a new hooks system allowing users to run custom code at key lifecycle points in the agent's operation. This includes support for events such as `gateway:startup`, `session:start`, `agent:step`, and more. Documentation for creating hooks and available events has been added to `README.md` and a new `hooks.md` file. Additionally, integrate step callbacks in the agent to facilitate hook execution during tool-calling iterations.

											
										
										
											2026-02-28 17:09:26 -08:00
+								        # Emit session:start for new or auto-reset sessions
 								        _is_new_session = (
 								            session_entry.created_at == session_entry.updated_at
 								            or getattr(session_entry, "was_auto_reset", False)
 								        )
 								        if _is_new_session:
 								            await self.hooks.emit("session:start", {
 								                "platform": source.platform.value if source.platform else "",
 								                "user_id": source.user_id,
 								                "session_id": session_entry.session_id,
 								                "session_key": session_key,
 								            })
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        # Build session context
 								        context = build_session_context(source, self.config, session_entry)
 								        # Set environment variables for tools
 								        self._set_session_env(context)
 								        # Build the context prompt to inject
 								        context_prompt = build_session_context_prompt(context)
-												Hermes Agent UX Improvements

											
										
										
											2026-02-22 02:16:11 -08:00
+								        # If the previous session expired and was auto-reset, prepend a notice
 								        # so the agent knows this is a fresh conversation (not an intentional /reset).
 								        if getattr(session_entry, 'was_auto_reset', False):
 								            context_prompt = (
 								                "[System note: The user's previous session expired due to inactivity. "
 								                "This is a fresh conversation with no prior context.]\n\n"
 								                + context_prompt
 								            )
 								            session_entry.was_auto_reset = False
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        # Load conversation history from transcript
 								        history = self.session_store.load_transcript(session_entry.session_id)
-												feat: auto-compress pathologically large gateway sessions (#628)

Long-lived gateway sessions can accumulate enough history that every new
message rehydrates an oversized transcript, causing repeated truncation
failures (finish_reason=length).

Add a session hygiene check in _handle_message that runs right after
loading the transcript and before invoking the agent:

1. Estimate message count and rough token count of the transcript
2. If above configurable thresholds (default: 200 msgs or 100K tokens),
   auto-compress the transcript proactively
3. Notify the user about the compression with before/after stats
4. If still above warn threshold (default: 200K tokens) after
   compression, suggest /reset
5. If compression fails on a dangerously large session, warn the user
   to use /compress or /reset manually

Thresholds are configurable via config.yaml:

  session_hygiene:
    auto_compress_tokens: 100000
    auto_compress_messages: 200
    warn_tokens: 200000

This complements the agent's existing preflight compression (which
runs inside run_conversation) by catching pathological sessions at
the gateway layer before the agent is even created.

Includes 12 tests for threshold detection and token estimation.

											
										
										
											2026-03-07 20:09:48 -08:00
+								        # -----------------------------------------------------------------
 								        # Session hygiene: auto-compress pathologically large transcripts
 								        #
 								        # Long-lived gateway sessions can accumulate enough history that
 								        # every new message rehydrates an oversized transcript, causing
 								        # repeated truncation/context failures.  Detect this early and
 								        # compress proactively — before the agent even starts.  (#628)
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								        #
-												fix: use actual API token counts for gateway compression pre-check

Root cause of aggressive gateway compression vs CLI:
- CLI: single AIAgent persists across conversation, uses real API-reported
  prompt_tokens for compression decisions — accurate
- Gateway: each message creates fresh AIAgent, token count discarded after,
  next message pre-check falls back to rough str(msg)//4 estimate which
  overestimates 30-50% on tool-heavy conversations

Fix:
- Add last_prompt_tokens field to SessionEntry — stores the actual
  API-reported prompt token count from the most recent agent turn
- After run_conversation(), extract context_compressor.last_prompt_tokens
  and persist it via update_session()
- Gateway pre-check now uses stored actual tokens when available (exact
  same accuracy as CLI), falling back to rough estimate with 1.4x safety
  factor only for the first message of a session

This makes gateway compression behave identically to CLI compression
for all turns after the first. Reported by TigerHix.

											
										
										
											2026-03-10 23:28:18 -07:00
+								        # Token source priority:
 								        # 1. Actual API-reported prompt_tokens from the last turn
 								        #    (stored in session_entry.last_prompt_tokens)
 								        # 2. Rough char-based estimate (str(msg)//4) with a 1.4x
 								        #    safety factor to account for overestimation on tool-heavy
 								        #    conversations (code/JSON tokenizes at 5-7+ chars/token).
-												feat: auto-compress pathologically large gateway sessions (#628)

Long-lived gateway sessions can accumulate enough history that every new
message rehydrates an oversized transcript, causing repeated truncation
failures (finish_reason=length).

Add a session hygiene check in _handle_message that runs right after
loading the transcript and before invoking the agent:

1. Estimate message count and rough token count of the transcript
2. If above configurable thresholds (default: 200 msgs or 100K tokens),
   auto-compress the transcript proactively
3. Notify the user about the compression with before/after stats
4. If still above warn threshold (default: 200K tokens) after
   compression, suggest /reset
5. If compression fails on a dangerously large session, warn the user
   to use /compress or /reset manually

Thresholds are configurable via config.yaml:

  session_hygiene:
    auto_compress_tokens: 100000
    auto_compress_messages: 200
    warn_tokens: 200000

This complements the agent's existing preflight compression (which
runs inside run_conversation) by catching pathological sessions at
the gateway layer before the agent is even created.

Includes 12 tests for threshold detection and token estimation.

											
										
										
											2026-03-07 20:09:48 -08:00
+								        # -----------------------------------------------------------------
 								        if history and len(history) >= 4:
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								            from agent.model_metadata import (
 								                estimate_messages_tokens_rough,
 								                get_model_context_length,
 								            )
-												feat: auto-compress pathologically large gateway sessions (#628)

Long-lived gateway sessions can accumulate enough history that every new
message rehydrates an oversized transcript, causing repeated truncation
failures (finish_reason=length).

Add a session hygiene check in _handle_message that runs right after
loading the transcript and before invoking the agent:

1. Estimate message count and rough token count of the transcript
2. If above configurable thresholds (default: 200 msgs or 100K tokens),
   auto-compress the transcript proactively
3. Notify the user about the compression with before/after stats
4. If still above warn threshold (default: 200K tokens) after
   compression, suggest /reset
5. If compression fails on a dangerously large session, warn the user
   to use /compress or /reset manually

Thresholds are configurable via config.yaml:

  session_hygiene:
    auto_compress_tokens: 100000
    auto_compress_messages: 200
    warn_tokens: 200000

This complements the agent's existing preflight compression (which
runs inside run_conversation) by catching pathological sessions at
the gateway layer before the agent is even created.

Includes 12 tests for threshold detection and token estimation.

											
										
										
											2026-03-07 20:09:48 -08:00
-												fix: raise session hygiene threshold from 50% to 85%

Session hygiene was firing at the same threshold (50%) as the agent's
own context compressor, causing premature compression on every turn
in long gateway sessions (especially Telegram).

Hygiene is a safety net for pathologically large sessions that would
cause API failures — it should NOT be doing normal compression work.
The agent's own compressor handles that during its tool loop with
accurate real token counts from the API.

Changes:
- Default hygiene threshold: 0.50 → 0.85 (fires only when truly large)
- Hygiene threshold is now independent of compression.threshold config
  (that setting controls the agent's compressor, not the pre-agent safety net)
- Removed env var override for hygiene threshold (CONTEXT_COMPRESSION_THRESHOLD
  still controls the agent's own compressor)
											
										
										
											2026-03-13 04:17:45 -07:00
+								            # Read model + compression config from config.yaml.
 								            # NOTE: hygiene threshold is intentionally HIGHER than the agent's
 								            # own compressor (0.85 vs 0.50).  Hygiene is a safety net for
 								            # sessions that grew too large between turns — it fires pre-agent
 								            # to prevent API failures.  The agent's own compressor handles
 								            # normal context management during its tool loop with accurate
 								            # real token counts.  Having hygiene at 0.50 caused premature
 								            # compression on every turn in long gateway sessions.
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								            _hyg_model = "anthropic/claude-sonnet-4.6"
-												fix: raise session hygiene threshold from 50% to 85%

Session hygiene was firing at the same threshold (50%) as the agent's
own context compressor, causing premature compression on every turn
in long gateway sessions (especially Telegram).

Hygiene is a safety net for pathologically large sessions that would
cause API failures — it should NOT be doing normal compression work.
The agent's own compressor handles that during its tool loop with
accurate real token counts from the API.

Changes:
- Default hygiene threshold: 0.50 → 0.85 (fires only when truly large)
- Hygiene threshold is now independent of compression.threshold config
  (that setting controls the agent's compressor, not the pre-agent safety net)
- Removed env var override for hygiene threshold (CONTEXT_COMPRESSION_THRESHOLD
  still controls the agent's own compressor)
											
										
										
											2026-03-13 04:17:45 -07:00
+								            _hyg_threshold_pct = 0.85
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								            _hyg_compression_enabled = True
-												feat: auto-compress pathologically large gateway sessions (#628)

Long-lived gateway sessions can accumulate enough history that every new
message rehydrates an oversized transcript, causing repeated truncation
failures (finish_reason=length).

Add a session hygiene check in _handle_message that runs right after
loading the transcript and before invoking the agent:

1. Estimate message count and rough token count of the transcript
2. If above configurable thresholds (default: 200 msgs or 100K tokens),
   auto-compress the transcript proactively
3. Notify the user about the compression with before/after stats
4. If still above warn threshold (default: 200K tokens) after
   compression, suggest /reset
5. If compression fails on a dangerously large session, warn the user
   to use /compress or /reset manually

Thresholds are configurable via config.yaml:

  session_hygiene:
    auto_compress_tokens: 100000
    auto_compress_messages: 200
    warn_tokens: 200000

This complements the agent's existing preflight compression (which
runs inside run_conversation) by catching pathological sessions at
the gateway layer before the agent is even created.

Includes 12 tests for threshold detection and token estimation.

											
										
										
											2026-03-07 20:09:48 -08:00
+								            try:
 								                _hyg_cfg_path = _hermes_home / "config.yaml"
 								                if _hyg_cfg_path.exists():
 								                    import yaml as _hyg_yaml
-												Merge PR #458: Add explicit UTF-8 encoding to config/data file I/O

Authored by shitcoinsherpa. Adds encoding='utf-8' to all text-mode
open() calls in gateway/run.py, gateway/config.py, hermes_cli/config.py,
hermes_cli/main.py, and hermes_cli/status.py. Prevents encoding errors
on Windows where the default locale is not UTF-8.

Also fixed 4 additional open() calls in gateway/run.py that were added
after the PR branch was created.

											
										
										
											2026-03-09 21:19:20 -07:00
+								                    with open(_hyg_cfg_path, encoding="utf-8") as _hyg_f:
-												feat: auto-compress pathologically large gateway sessions (#628)

Long-lived gateway sessions can accumulate enough history that every new
message rehydrates an oversized transcript, causing repeated truncation
failures (finish_reason=length).

Add a session hygiene check in _handle_message that runs right after
loading the transcript and before invoking the agent:

1. Estimate message count and rough token count of the transcript
2. If above configurable thresholds (default: 200 msgs or 100K tokens),
   auto-compress the transcript proactively
3. Notify the user about the compression with before/after stats
4. If still above warn threshold (default: 200K tokens) after
   compression, suggest /reset
5. If compression fails on a dangerously large session, warn the user
   to use /compress or /reset manually

Thresholds are configurable via config.yaml:

  session_hygiene:
    auto_compress_tokens: 100000
    auto_compress_messages: 200
    warn_tokens: 200000

This complements the agent's existing preflight compression (which
runs inside run_conversation) by catching pathological sessions at
the gateway layer before the agent is even created.

Includes 12 tests for threshold detection and token estimation.

											
										
										
											2026-03-07 20:09:48 -08:00
+								                        _hyg_data = _hyg_yaml.safe_load(_hyg_f) or {}
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
 								                    # Resolve model name (same logic as run_sync)
 								                    _model_cfg = _hyg_data.get("model", {})
 								                    if isinstance(_model_cfg, str):
 								                        _hyg_model = _model_cfg
 								                    elif isinstance(_model_cfg, dict):
 								                        _hyg_model = _model_cfg.get("default", _hyg_model)
-												fix: raise session hygiene threshold from 50% to 85%

Session hygiene was firing at the same threshold (50%) as the agent's
own context compressor, causing premature compression on every turn
in long gateway sessions (especially Telegram).

Hygiene is a safety net for pathologically large sessions that would
cause API failures — it should NOT be doing normal compression work.
The agent's own compressor handles that during its tool loop with
accurate real token counts from the API.

Changes:
- Default hygiene threshold: 0.50 → 0.85 (fires only when truly large)
- Hygiene threshold is now independent of compression.threshold config
  (that setting controls the agent's compressor, not the pre-agent safety net)
- Removed env var override for hygiene threshold (CONTEXT_COMPRESSION_THRESHOLD
  still controls the agent's own compressor)
											
										
										
											2026-03-13 04:17:45 -07:00
+								                    # Read compression settings — only use enabled flag.
 								                    # The threshold is intentionally separate from the agent's
 								                    # compression.threshold (hygiene runs higher).
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								                    _comp_cfg = _hyg_data.get("compression", {})
 								                    if isinstance(_comp_cfg, dict):
 								                        _hyg_compression_enabled = str(
 								                            _comp_cfg.get("enabled", True)
 								                        ).lower() in ("true", "1", "yes")
-												feat: auto-compress pathologically large gateway sessions (#628)

Long-lived gateway sessions can accumulate enough history that every new
message rehydrates an oversized transcript, causing repeated truncation
failures (finish_reason=length).

Add a session hygiene check in _handle_message that runs right after
loading the transcript and before invoking the agent:

1. Estimate message count and rough token count of the transcript
2. If above configurable thresholds (default: 200 msgs or 100K tokens),
   auto-compress the transcript proactively
3. Notify the user about the compression with before/after stats
4. If still above warn threshold (default: 200K tokens) after
   compression, suggest /reset
5. If compression fails on a dangerously large session, warn the user
   to use /compress or /reset manually

Thresholds are configurable via config.yaml:

  session_hygiene:
    auto_compress_tokens: 100000
    auto_compress_messages: 200
    warn_tokens: 200000

This complements the agent's existing preflight compression (which
runs inside run_conversation) by catching pathological sessions at
the gateway layer before the agent is even created.

Includes 12 tests for threshold detection and token estimation.

											
										
										
											2026-03-07 20:09:48 -08:00
+								            except Exception:
 								                pass
-												fix: raise session hygiene threshold from 50% to 85%

Session hygiene was firing at the same threshold (50%) as the agent's
own context compressor, causing premature compression on every turn
in long gateway sessions (especially Telegram).

Hygiene is a safety net for pathologically large sessions that would
cause API failures — it should NOT be doing normal compression work.
The agent's own compressor handles that during its tool loop with
accurate real token counts from the API.

Changes:
- Default hygiene threshold: 0.50 → 0.85 (fires only when truly large)
- Hygiene threshold is now independent of compression.threshold config
  (that setting controls the agent's compressor, not the pre-agent safety net)
- Removed env var override for hygiene threshold (CONTEXT_COMPRESSION_THRESHOLD
  still controls the agent's own compressor)
											
										
										
											2026-03-13 04:17:45 -07:00
+								            # Check env override for disabling compression entirely
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								            if os.getenv("CONTEXT_COMPRESSION_ENABLED", "").lower() in ("false", "0", "no"):
 								                _hyg_compression_enabled = False
-												feat: auto-compress pathologically large gateway sessions (#628)

Long-lived gateway sessions can accumulate enough history that every new
message rehydrates an oversized transcript, causing repeated truncation
failures (finish_reason=length).

Add a session hygiene check in _handle_message that runs right after
loading the transcript and before invoking the agent:

1. Estimate message count and rough token count of the transcript
2. If above configurable thresholds (default: 200 msgs or 100K tokens),
   auto-compress the transcript proactively
3. Notify the user about the compression with before/after stats
4. If still above warn threshold (default: 200K tokens) after
   compression, suggest /reset
5. If compression fails on a dangerously large session, warn the user
   to use /compress or /reset manually

Thresholds are configurable via config.yaml:

  session_hygiene:
    auto_compress_tokens: 100000
    auto_compress_messages: 200
    warn_tokens: 200000

This complements the agent's existing preflight compression (which
runs inside run_conversation) by catching pathological sessions at
the gateway layer before the agent is even created.

Includes 12 tests for threshold detection and token estimation.

											
										
										
											2026-03-07 20:09:48 -08:00
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								            if _hyg_compression_enabled:
 								                _hyg_context_length = get_model_context_length(_hyg_model)
 								                _compress_token_threshold = int(
-												fix: use actual API token counts for gateway compression pre-check

Root cause of aggressive gateway compression vs CLI:
- CLI: single AIAgent persists across conversation, uses real API-reported
  prompt_tokens for compression decisions — accurate
- Gateway: each message creates fresh AIAgent, token count discarded after,
  next message pre-check falls back to rough str(msg)//4 estimate which
  overestimates 30-50% on tool-heavy conversations

Fix:
- Add last_prompt_tokens field to SessionEntry — stores the actual
  API-reported prompt token count from the most recent agent turn
- After run_conversation(), extract context_compressor.last_prompt_tokens
  and persist it via update_session()
- Gateway pre-check now uses stored actual tokens when available (exact
  same accuracy as CLI), falling back to rough estimate with 1.4x safety
  factor only for the first message of a session

This makes gateway compression behave identically to CLI compression
for all turns after the first. Reported by TigerHix.

											
										
										
											2026-03-10 23:28:18 -07:00
+								                    _hyg_context_length * _hyg_threshold_pct
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								                )
-												fix: use actual API token counts for gateway compression pre-check

Root cause of aggressive gateway compression vs CLI:
- CLI: single AIAgent persists across conversation, uses real API-reported
  prompt_tokens for compression decisions — accurate
- Gateway: each message creates fresh AIAgent, token count discarded after,
  next message pre-check falls back to rough str(msg)//4 estimate which
  overestimates 30-50% on tool-heavy conversations

Fix:
- Add last_prompt_tokens field to SessionEntry — stores the actual
  API-reported prompt token count from the most recent agent turn
- After run_conversation(), extract context_compressor.last_prompt_tokens
  and persist it via update_session()
- Gateway pre-check now uses stored actual tokens when available (exact
  same accuracy as CLI), falling back to rough estimate with 1.4x safety
  factor only for the first message of a session

This makes gateway compression behave identically to CLI compression
for all turns after the first. Reported by TigerHix.

											
										
										
											2026-03-10 23:28:18 -07:00
+								                _warn_token_threshold = int(_hyg_context_length * 0.95)
-												feat: auto-compress pathologically large gateway sessions (#628)

Long-lived gateway sessions can accumulate enough history that every new
message rehydrates an oversized transcript, causing repeated truncation
failures (finish_reason=length).

Add a session hygiene check in _handle_message that runs right after
loading the transcript and before invoking the agent:

1. Estimate message count and rough token count of the transcript
2. If above configurable thresholds (default: 200 msgs or 100K tokens),
   auto-compress the transcript proactively
3. Notify the user about the compression with before/after stats
4. If still above warn threshold (default: 200K tokens) after
   compression, suggest /reset
5. If compression fails on a dangerously large session, warn the user
   to use /compress or /reset manually

Thresholds are configurable via config.yaml:

  session_hygiene:
    auto_compress_tokens: 100000
    auto_compress_messages: 200
    warn_tokens: 200000

This complements the agent's existing preflight compression (which
runs inside run_conversation) by catching pathological sessions at
the gateway layer before the agent is even created.

Includes 12 tests for threshold detection and token estimation.

											
										
										
											2026-03-07 20:09:48 -08:00
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								                _msg_count = len(history)
-												fix: use actual API token counts for gateway compression pre-check

Root cause of aggressive gateway compression vs CLI:
- CLI: single AIAgent persists across conversation, uses real API-reported
  prompt_tokens for compression decisions — accurate
- Gateway: each message creates fresh AIAgent, token count discarded after,
  next message pre-check falls back to rough str(msg)//4 estimate which
  overestimates 30-50% on tool-heavy conversations

Fix:
- Add last_prompt_tokens field to SessionEntry — stores the actual
  API-reported prompt token count from the most recent agent turn
- After run_conversation(), extract context_compressor.last_prompt_tokens
  and persist it via update_session()
- Gateway pre-check now uses stored actual tokens when available (exact
  same accuracy as CLI), falling back to rough estimate with 1.4x safety
  factor only for the first message of a session

This makes gateway compression behave identically to CLI compression
for all turns after the first. Reported by TigerHix.

											
										
										
											2026-03-10 23:28:18 -07:00
 								                # Prefer actual API-reported tokens from the last turn
 								                # (stored in session entry) over the rough char-based estimate.
 								                # The rough estimate (str(msg)//4) overestimates by 30-50% on
 								                # tool-heavy/code-heavy conversations, causing premature compression.
 								                _stored_tokens = session_entry.last_prompt_tokens
 								                if _stored_tokens > 0:
 								                    _approx_tokens = _stored_tokens
 								                    _token_source = "actual"
 								                else:
 								                    _approx_tokens = estimate_messages_tokens_rough(history)
 								                    # Apply safety factor only for rough estimates
 								                    _compress_token_threshold = int(
 								                        _compress_token_threshold * 1.4
 								                    )
 								                    _warn_token_threshold = int(_warn_token_threshold * 1.4)
 								                    _token_source = "estimated"
-												feat: auto-compress pathologically large gateway sessions (#628)

Long-lived gateway sessions can accumulate enough history that every new
message rehydrates an oversized transcript, causing repeated truncation
failures (finish_reason=length).

Add a session hygiene check in _handle_message that runs right after
loading the transcript and before invoking the agent:

1. Estimate message count and rough token count of the transcript
2. If above configurable thresholds (default: 200 msgs or 100K tokens),
   auto-compress the transcript proactively
3. Notify the user about the compression with before/after stats
4. If still above warn threshold (default: 200K tokens) after
   compression, suggest /reset
5. If compression fails on a dangerously large session, warn the user
   to use /compress or /reset manually

Thresholds are configurable via config.yaml:

  session_hygiene:
    auto_compress_tokens: 100000
    auto_compress_messages: 200
    warn_tokens: 200000

This complements the agent's existing preflight compression (which
runs inside run_conversation) by catching pathological sessions at
the gateway layer before the agent is even created.

Includes 12 tests for threshold detection and token estimation.

											
										
										
											2026-03-07 20:09:48 -08:00
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								                _needs_compress = _approx_tokens >= _compress_token_threshold
-												feat: auto-compress pathologically large gateway sessions (#628)

Long-lived gateway sessions can accumulate enough history that every new
message rehydrates an oversized transcript, causing repeated truncation
failures (finish_reason=length).

Add a session hygiene check in _handle_message that runs right after
loading the transcript and before invoking the agent:

1. Estimate message count and rough token count of the transcript
2. If above configurable thresholds (default: 200 msgs or 100K tokens),
   auto-compress the transcript proactively
3. Notify the user about the compression with before/after stats
4. If still above warn threshold (default: 200K tokens) after
   compression, suggest /reset
5. If compression fails on a dangerously large session, warn the user
   to use /compress or /reset manually

Thresholds are configurable via config.yaml:

  session_hygiene:
    auto_compress_tokens: 100000
    auto_compress_messages: 200
    warn_tokens: 200000

This complements the agent's existing preflight compression (which
runs inside run_conversation) by catching pathological sessions at
the gateway layer before the agent is even created.

Includes 12 tests for threshold detection and token estimation.

											
										
										
											2026-03-07 20:09:48 -08:00
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								                if _needs_compress:
 								                    logger.info(
-												fix: use actual API token counts for gateway compression pre-check

Root cause of aggressive gateway compression vs CLI:
- CLI: single AIAgent persists across conversation, uses real API-reported
  prompt_tokens for compression decisions — accurate
- Gateway: each message creates fresh AIAgent, token count discarded after,
  next message pre-check falls back to rough str(msg)//4 estimate which
  overestimates 30-50% on tool-heavy conversations

Fix:
- Add last_prompt_tokens field to SessionEntry — stores the actual
  API-reported prompt token count from the most recent agent turn
- After run_conversation(), extract context_compressor.last_prompt_tokens
  and persist it via update_session()
- Gateway pre-check now uses stored actual tokens when available (exact
  same accuracy as CLI), falling back to rough estimate with 1.4x safety
  factor only for the first message of a session

This makes gateway compression behave identically to CLI compression
for all turns after the first. Reported by TigerHix.

											
										
										
											2026-03-10 23:28:18 -07:00
+								                        "Session hygiene: %s messages, ~%s tokens (%s) — auto-compressing "
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								                        "(threshold: %s%% of %s = %s tokens)",
-												fix: use actual API token counts for gateway compression pre-check

Root cause of aggressive gateway compression vs CLI:
- CLI: single AIAgent persists across conversation, uses real API-reported
  prompt_tokens for compression decisions — accurate
- Gateway: each message creates fresh AIAgent, token count discarded after,
  next message pre-check falls back to rough str(msg)//4 estimate which
  overestimates 30-50% on tool-heavy conversations

Fix:
- Add last_prompt_tokens field to SessionEntry — stores the actual
  API-reported prompt token count from the most recent agent turn
- After run_conversation(), extract context_compressor.last_prompt_tokens
  and persist it via update_session()
- Gateway pre-check now uses stored actual tokens when available (exact
  same accuracy as CLI), falling back to rough estimate with 1.4x safety
  factor only for the first message of a session

This makes gateway compression behave identically to CLI compression
for all turns after the first. Reported by TigerHix.

											
										
										
											2026-03-10 23:28:18 -07:00
+								                        _msg_count, f"{_approx_tokens:,}", _token_source,
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								                        int(_hyg_threshold_pct * 100),
 								                        f"{_hyg_context_length:,}",
 								                        f"{_compress_token_threshold:,}",
 								                    )
-												feat: auto-compress pathologically large gateway sessions (#628)

Long-lived gateway sessions can accumulate enough history that every new
message rehydrates an oversized transcript, causing repeated truncation
failures (finish_reason=length).

Add a session hygiene check in _handle_message that runs right after
loading the transcript and before invoking the agent:

1. Estimate message count and rough token count of the transcript
2. If above configurable thresholds (default: 200 msgs or 100K tokens),
   auto-compress the transcript proactively
3. Notify the user about the compression with before/after stats
4. If still above warn threshold (default: 200K tokens) after
   compression, suggest /reset
5. If compression fails on a dangerously large session, warn the user
   to use /compress or /reset manually

Thresholds are configurable via config.yaml:

  session_hygiene:
    auto_compress_tokens: 100000
    auto_compress_messages: 200
    warn_tokens: 200000

This complements the agent's existing preflight compression (which
runs inside run_conversation) by catching pathological sessions at
the gateway layer before the agent is even created.

Includes 12 tests for threshold detection and token estimation.

											
										
										
											2026-03-07 20:09:48 -08:00
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								                    _hyg_adapter = self.adapters.get(source.platform)
-												fix(gateway): isolate telegram forum topic sessions

											
										
										
											2026-03-11 09:15:34 +01:00
+								                    _hyg_meta = {"thread_id": source.thread_id} if source.thread_id else None
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								                    if _hyg_adapter:
 								                        try:
 								                            await _hyg_adapter.send(
 								                                source.chat_id,
 								                                f"🗜️ Session is large ({_msg_count} messages, "
-												fix(gateway): isolate telegram forum topic sessions

											
										
										
											2026-03-11 09:15:34 +01:00
+								                                f"~{_approx_tokens:,} tokens). Auto-compressing...",
 								                                metadata=_hyg_meta,
-												feat: auto-compress pathologically large gateway sessions (#628)

Long-lived gateway sessions can accumulate enough history that every new
message rehydrates an oversized transcript, causing repeated truncation
failures (finish_reason=length).

Add a session hygiene check in _handle_message that runs right after
loading the transcript and before invoking the agent:

1. Estimate message count and rough token count of the transcript
2. If above configurable thresholds (default: 200 msgs or 100K tokens),
   auto-compress the transcript proactively
3. Notify the user about the compression with before/after stats
4. If still above warn threshold (default: 200K tokens) after
   compression, suggest /reset
5. If compression fails on a dangerously large session, warn the user
   to use /compress or /reset manually

Thresholds are configurable via config.yaml:

  session_hygiene:
    auto_compress_tokens: 100000
    auto_compress_messages: 200
    warn_tokens: 200000

This complements the agent's existing preflight compression (which
runs inside run_conversation) by catching pathological sessions at
the gateway layer before the agent is even created.

Includes 12 tests for threshold detection and token estimation.

											
										
										
											2026-03-07 20:09:48 -08:00
+								                            )
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								                        except Exception:
 								                            pass
-												feat: auto-compress pathologically large gateway sessions (#628)

Long-lived gateway sessions can accumulate enough history that every new
message rehydrates an oversized transcript, causing repeated truncation
failures (finish_reason=length).

Add a session hygiene check in _handle_message that runs right after
loading the transcript and before invoking the agent:

1. Estimate message count and rough token count of the transcript
2. If above configurable thresholds (default: 200 msgs or 100K tokens),
   auto-compress the transcript proactively
3. Notify the user about the compression with before/after stats
4. If still above warn threshold (default: 200K tokens) after
   compression, suggest /reset
5. If compression fails on a dangerously large session, warn the user
   to use /compress or /reset manually

Thresholds are configurable via config.yaml:

  session_hygiene:
    auto_compress_tokens: 100000
    auto_compress_messages: 200
    warn_tokens: 200000

This complements the agent's existing preflight compression (which
runs inside run_conversation) by catching pathological sessions at
the gateway layer before the agent is even created.

Includes 12 tests for threshold detection and token estimation.

											
										
										
											2026-03-07 20:09:48 -08:00
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								                    try:
 								                        from run_agent import AIAgent
 								                        _hyg_runtime = _resolve_runtime_agent_kwargs()
 								                        if _hyg_runtime.get("api_key"):
 								                            _hyg_msgs = [
 								                                {"role": m.get("role"), "content": m.get("content")}
 								                                for m in history
 								                                if m.get("role") in ("user", "assistant")
 								                                and m.get("content")
 								                            ]
 								                            if len(_hyg_msgs) >= 4:
 								                                _hyg_agent = AIAgent(
 								                                    **_hyg_runtime,
-												fix(gateway): pass model to temporary AIAgent instances

Memory flush, /compress, and session hygiene create AIAgent without
model=, falling back to the hardcoded default "anthropic/claude-opus-4.6".
This fails with a 400 error when the active provider is openai-codex
(Codex only accepts its own model names like gpt-5.1-codex-mini).

Add _resolve_gateway_model() that mirrors the env/config resolution
already used by _run_agent_sync, and wire it into all three temporary
agent creation sites.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-11 00:09:37 +01:00
+								                                    model=_hyg_model,
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								                                    max_iterations=4,
 								                                    quiet_mode=True,
 								                                    enabled_toolsets=["memory"],
 								                                    session_id=session_entry.session_id,
 								                                )
-												feat: auto-compress pathologically large gateway sessions (#628)

Long-lived gateway sessions can accumulate enough history that every new
message rehydrates an oversized transcript, causing repeated truncation
failures (finish_reason=length).

Add a session hygiene check in _handle_message that runs right after
loading the transcript and before invoking the agent:

1. Estimate message count and rough token count of the transcript
2. If above configurable thresholds (default: 200 msgs or 100K tokens),
   auto-compress the transcript proactively
3. Notify the user about the compression with before/after stats
4. If still above warn threshold (default: 200K tokens) after
   compression, suggest /reset
5. If compression fails on a dangerously large session, warn the user
   to use /compress or /reset manually

Thresholds are configurable via config.yaml:

  session_hygiene:
    auto_compress_tokens: 100000
    auto_compress_messages: 200
    warn_tokens: 200000

This complements the agent's existing preflight compression (which
runs inside run_conversation) by catching pathological sessions at
the gateway layer before the agent is even created.

Includes 12 tests for threshold detection and token estimation.

											
										
										
											2026-03-07 20:09:48 -08:00
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								                                loop = asyncio.get_event_loop()
 								                                _compressed, _ = await loop.run_in_executor(
 								                                    None,
 								                                    lambda: _hyg_agent._compress_context(
 								                                        _hyg_msgs, "",
 								                                        approx_tokens=_approx_tokens,
 								                                    ),
 								                                )
-												feat: auto-compress pathologically large gateway sessions (#628)

Long-lived gateway sessions can accumulate enough history that every new
message rehydrates an oversized transcript, causing repeated truncation
failures (finish_reason=length).

Add a session hygiene check in _handle_message that runs right after
loading the transcript and before invoking the agent:

1. Estimate message count and rough token count of the transcript
2. If above configurable thresholds (default: 200 msgs or 100K tokens),
   auto-compress the transcript proactively
3. Notify the user about the compression with before/after stats
4. If still above warn threshold (default: 200K tokens) after
   compression, suggest /reset
5. If compression fails on a dangerously large session, warn the user
   to use /compress or /reset manually

Thresholds are configurable via config.yaml:

  session_hygiene:
    auto_compress_tokens: 100000
    auto_compress_messages: 200
    warn_tokens: 200000

This complements the agent's existing preflight compression (which
runs inside run_conversation) by catching pathological sessions at
the gateway layer before the agent is even created.

Includes 12 tests for threshold detection and token estimation.

											
										
										
											2026-03-07 20:09:48 -08:00
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								                                self.session_store.rewrite_transcript(
 								                                    session_entry.session_id, _compressed
 								                                )
-												fix: integration hardening for gateway token tracking

Follow-up to 58dbd81 — ensures smooth transition for existing users:

- Backward compat: old session files without last_prompt_tokens
  default to 0 via data.get('last_prompt_tokens', 0)
- /compress, /undo, /retry: reset last_prompt_tokens to 0 after
  rewriting transcripts (stale token counts would under-report)
- Auto-compression hygiene: reset last_prompt_tokens after rewriting
- update_session: use None sentinel (not 0) as default so callers
  can explicitly reset to 0 while normal calls don't clobber
- 6 new tests covering: default value, serialization roundtrip,
  old-format migration, set/reset/no-change semantics
- /reset: new SessionEntry naturally gets last_prompt_tokens=0

2942 tests pass.

											
										
										
											2026-03-10 23:40:24 -07:00
+								                                # Reset stored token count — transcript was rewritten
 								                                session_entry.last_prompt_tokens = 0
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								                                history = _compressed
 								                                _new_count = len(_compressed)
 								                                _new_tokens = estimate_messages_tokens_rough(
 								                                    _compressed
 								                                )
-												feat: auto-compress pathologically large gateway sessions (#628)

Long-lived gateway sessions can accumulate enough history that every new
message rehydrates an oversized transcript, causing repeated truncation
failures (finish_reason=length).

Add a session hygiene check in _handle_message that runs right after
loading the transcript and before invoking the agent:

1. Estimate message count and rough token count of the transcript
2. If above configurable thresholds (default: 200 msgs or 100K tokens),
   auto-compress the transcript proactively
3. Notify the user about the compression with before/after stats
4. If still above warn threshold (default: 200K tokens) after
   compression, suggest /reset
5. If compression fails on a dangerously large session, warn the user
   to use /compress or /reset manually

Thresholds are configurable via config.yaml:

  session_hygiene:
    auto_compress_tokens: 100000
    auto_compress_messages: 200
    warn_tokens: 200000

This complements the agent's existing preflight compression (which
runs inside run_conversation) by catching pathological sessions at
the gateway layer before the agent is even created.

Includes 12 tests for threshold detection and token estimation.

											
										
										
											2026-03-07 20:09:48 -08:00
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								                                logger.info(
 								                                    "Session hygiene: compressed %s → %s msgs, "
 								                                    "~%s → ~%s tokens",
 								                                    _msg_count, _new_count,
 								                                    f"{_approx_tokens:,}", f"{_new_tokens:,}",
-												feat: auto-compress pathologically large gateway sessions (#628)

Long-lived gateway sessions can accumulate enough history that every new
message rehydrates an oversized transcript, causing repeated truncation
failures (finish_reason=length).

Add a session hygiene check in _handle_message that runs right after
loading the transcript and before invoking the agent:

1. Estimate message count and rough token count of the transcript
2. If above configurable thresholds (default: 200 msgs or 100K tokens),
   auto-compress the transcript proactively
3. Notify the user about the compression with before/after stats
4. If still above warn threshold (default: 200K tokens) after
   compression, suggest /reset
5. If compression fails on a dangerously large session, warn the user
   to use /compress or /reset manually

Thresholds are configurable via config.yaml:

  session_hygiene:
    auto_compress_tokens: 100000
    auto_compress_messages: 200
    warn_tokens: 200000

This complements the agent's existing preflight compression (which
runs inside run_conversation) by catching pathological sessions at
the gateway layer before the agent is even created.

Includes 12 tests for threshold detection and token estimation.

											
										
										
											2026-03-07 20:09:48 -08:00
+								                                )
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
-												feat: auto-compress pathologically large gateway sessions (#628)

Long-lived gateway sessions can accumulate enough history that every new
message rehydrates an oversized transcript, causing repeated truncation
failures (finish_reason=length).

Add a session hygiene check in _handle_message that runs right after
loading the transcript and before invoking the agent:

1. Estimate message count and rough token count of the transcript
2. If above configurable thresholds (default: 200 msgs or 100K tokens),
   auto-compress the transcript proactively
3. Notify the user about the compression with before/after stats
4. If still above warn threshold (default: 200K tokens) after
   compression, suggest /reset
5. If compression fails on a dangerously large session, warn the user
   to use /compress or /reset manually

Thresholds are configurable via config.yaml:

  session_hygiene:
    auto_compress_tokens: 100000
    auto_compress_messages: 200
    warn_tokens: 200000

This complements the agent's existing preflight compression (which
runs inside run_conversation) by catching pathological sessions at
the gateway layer before the agent is even created.

Includes 12 tests for threshold detection and token estimation.

											
										
										
											2026-03-07 20:09:48 -08:00
+								                                if _hyg_adapter:
 								                                    try:
 								                                        await _hyg_adapter.send(
 								                                            source.chat_id,
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								                                            f"🗜️ Compressed: {_msg_count} → "
 								                                            f"{_new_count} messages, "
 								                                            f"~{_approx_tokens:,} → "
-												fix(gateway): isolate telegram forum topic sessions

											
										
										
											2026-03-11 09:15:34 +01:00
+								                                            f"~{_new_tokens:,} tokens",
 								                                            metadata=_hyg_meta,
-												feat: auto-compress pathologically large gateway sessions (#628)

Long-lived gateway sessions can accumulate enough history that every new
message rehydrates an oversized transcript, causing repeated truncation
failures (finish_reason=length).

Add a session hygiene check in _handle_message that runs right after
loading the transcript and before invoking the agent:

1. Estimate message count and rough token count of the transcript
2. If above configurable thresholds (default: 200 msgs or 100K tokens),
   auto-compress the transcript proactively
3. Notify the user about the compression with before/after stats
4. If still above warn threshold (default: 200K tokens) after
   compression, suggest /reset
5. If compression fails on a dangerously large session, warn the user
   to use /compress or /reset manually

Thresholds are configurable via config.yaml:

  session_hygiene:
    auto_compress_tokens: 100000
    auto_compress_messages: 200
    warn_tokens: 200000

This complements the agent's existing preflight compression (which
runs inside run_conversation) by catching pathological sessions at
the gateway layer before the agent is even created.

Includes 12 tests for threshold detection and token estimation.

											
										
										
											2026-03-07 20:09:48 -08:00
+								                                        )
 								                                    except Exception:
 								                                        pass
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								                                # Still too large after compression — warn user
 								                                if _new_tokens >= _warn_token_threshold:
 								                                    logger.warning(
 								                                        "Session hygiene: still ~%s tokens after "
 								                                        "compression — suggesting /reset",
 								                                        f"{_new_tokens:,}",
 								                                    )
 								                                    if _hyg_adapter:
 								                                        try:
 								                                            await _hyg_adapter.send(
 								                                                source.chat_id,
 								                                                "⚠️ Session is still very large "
 								                                                "after compression "
 								                                                f"(~{_new_tokens:,} tokens). "
 								                                                "Consider using /reset to start "
-												fix(gateway): isolate telegram forum topic sessions

											
										
										
											2026-03-11 09:15:34 +01:00
+								                                                "fresh if you experience issues.",
 								                                                metadata=_hyg_meta,
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								                                            )
 								                                        except Exception:
 								                                            pass
 								                    except Exception as e:
 								                        logger.warning(
 								                            "Session hygiene auto-compress failed: %s", e
 								                        )
 								                        # Compression failed and session is dangerously large
 								                        if _approx_tokens >= _warn_token_threshold:
 								                            _hyg_adapter = self.adapters.get(source.platform)
-												fix(gateway): isolate telegram forum topic sessions

											
										
										
											2026-03-11 09:15:34 +01:00
+								                            _hyg_meta = {"thread_id": source.thread_id} if source.thread_id else None
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								                            if _hyg_adapter:
 								                                try:
 								                                    await _hyg_adapter.send(
 								                                        source.chat_id,
 								                                        f"⚠️ Session is very large "
 								                                        f"({_msg_count} messages, "
 								                                        f"~{_approx_tokens:,} tokens) and "
 								                                        "auto-compression failed. Consider "
 								                                        "using /compress or /reset to avoid "
-												fix(gateway): isolate telegram forum topic sessions

											
										
										
											2026-03-11 09:15:34 +01:00
+								                                        "issues.",
 								                                        metadata=_hyg_meta,
-												fix: unify gateway session hygiene with agent compression config

The gateway had a SEPARATE compression system ('session hygiene')
with hardcoded thresholds (100k tokens / 200 messages) that were
completely disconnected from the model's context length and the
user's compression config in config.yaml. This caused premature
auto-compression on Telegram/Discord — triggering at ~60k tokens
(from the 200-message threshold) or inconsistent token counts.

Changes:
- Gateway hygiene now reads model name from config.yaml and uses
  get_model_context_length() to derive the actual context limit
- Compression threshold comes from compression.threshold in
  config.yaml (default 0.85), same as the agent's ContextCompressor
- Removed the message-count-based trigger (was redundant and caused
  false positives in tool-heavy sessions)
- Removed the undocumented session_hygiene config section — the
  standard compression.* config now controls everything
- Env var overrides (CONTEXT_COMPRESSION_THRESHOLD,
  CONTEXT_COMPRESSION_ENABLED) are respected
- Warn threshold is now 95% of model context (was hardcoded 200k)
- Updated tests to verify model-aware thresholds, scaling across
  models, and that message count alone no longer triggers compression

For claude-opus-4.6 (200k context) at 85% threshold: gateway
hygiene now triggers at 170k tokens instead of the old 100k.

											
										
										
											2026-03-08 20:08:02 -07:00
+								                                    )
 								                                except Exception:
 								                                    pass
-												feat: auto-compress pathologically large gateway sessions (#628)

Long-lived gateway sessions can accumulate enough history that every new
message rehydrates an oversized transcript, causing repeated truncation
failures (finish_reason=length).

Add a session hygiene check in _handle_message that runs right after
loading the transcript and before invoking the agent:

1. Estimate message count and rough token count of the transcript
2. If above configurable thresholds (default: 200 msgs or 100K tokens),
   auto-compress the transcript proactively
3. Notify the user about the compression with before/after stats
4. If still above warn threshold (default: 200K tokens) after
   compression, suggest /reset
5. If compression fails on a dangerously large session, warn the user
   to use /compress or /reset manually

Thresholds are configurable via config.yaml:

  session_hygiene:
    auto_compress_tokens: 100000
    auto_compress_messages: 200
    warn_tokens: 200000

This complements the agent's existing preflight compression (which
runs inside run_conversation) by catching pathological sessions at
the gateway layer before the agent is even created.

Includes 12 tests for threshold detection and token estimation.

											
										
										
											2026-03-07 20:09:48 -08:00
-												feat: implement channel directory and message mirroring for cross-platform communication

- Introduced a new channel directory to cache reachable channels/contacts for messaging platforms, enhancing the send_message tool's ability to resolve human-friendly names to numeric IDs.
- Added functionality to mirror sent messages into the target's session transcript, providing context for cross-platform message delivery.
- Updated the send_message tool to support listing available targets and improved error handling for channel resolution.
- Enhanced the gateway to build and refresh the channel directory during startup and at regular intervals, ensuring up-to-date channel information.

											
										
										
											2026-02-22 20:44:15 -08:00
+								        # First-message onboarding -- only on the very first interaction ever
 								        if not history and not self.session_store.has_any_sessions():
-												Hermes Agent UX Improvements

											
										
										
											2026-02-22 02:16:11 -08:00
+								            context_prompt += (
-												feat: implement channel directory and message mirroring for cross-platform communication

- Introduced a new channel directory to cache reachable channels/contacts for messaging platforms, enhancing the send_message tool's ability to resolve human-friendly names to numeric IDs.
- Added functionality to mirror sent messages into the target's session transcript, providing context for cross-platform message delivery.
- Updated the send_message tool to support listing available targets and improved error handling for channel resolution.
- Enhanced the gateway to build and refresh the channel directory during startup and at regular intervals, ensuring up-to-date channel information.

											
										
										
											2026-02-22 20:44:15 -08:00
+								                "\n\n[System note: This is the user's very first message ever. "
-												Hermes Agent UX Improvements

											
										
										
											2026-02-22 02:16:11 -08:00
+								                "Briefly introduce yourself and mention that /help shows available commands. "
 								                "Keep the introduction concise -- one or two sentences max.]"
 								            )
-												feat: implement channel directory and message mirroring for cross-platform communication

- Introduced a new channel directory to cache reachable channels/contacts for messaging platforms, enhancing the send_message tool's ability to resolve human-friendly names to numeric IDs.
- Added functionality to mirror sent messages into the target's session transcript, providing context for cross-platform message delivery.
- Updated the send_message tool to support listing available targets and improved error handling for channel resolution.
- Enhanced the gateway to build and refresh the channel directory during startup and at regular intervals, ensuring up-to-date channel information.

											
										
										
											2026-02-22 20:44:15 -08:00
+								        # One-time prompt if no home channel is set for this platform
 								        if not history and source.platform and source.platform != Platform.LOCAL:
 								            platform_name = source.platform.value
 								            env_key = f"{platform_name.upper()}_HOME_CHANNEL"
 								            if not os.getenv(env_key):
 								                adapter = self.adapters.get(source.platform)
 								                if adapter:
 								                    await adapter.send(
 								                        source.chat_id,
 								                        f"📬 No home channel is set for {platform_name.title()}. "
 								                        f"A home channel is where Hermes delivers cron job results "
 								                        f"and cross-platform messages.\n\n"
-												feat: unify set-home command naming across platforms

- Updated the command name from `/set-home` to `/sethome` in the GatewayRunner class for consistency.
- Added a new slash command `/sethome` in the Discord adapter to set the home channel.
- Registered the `/sethome` command in the Telegram adapter to align with the updated naming convention.

											
										
										
											2026-02-23 15:01:22 -08:00
+								                        f"Type /sethome to make this chat your home channel, "
-												feat: implement channel directory and message mirroring for cross-platform communication

- Introduced a new channel directory to cache reachable channels/contacts for messaging platforms, enhancing the send_message tool's ability to resolve human-friendly names to numeric IDs.
- Added functionality to mirror sent messages into the target's session transcript, providing context for cross-platform message delivery.
- Updated the send_message tool to support listing available targets and improved error handling for channel resolution.
- Enhanced the gateway to build and refresh the channel directory during startup and at regular intervals, ensuring up-to-date channel information.

											
										
										
											2026-02-22 20:44:15 -08:00
+								                        f"or ignore to skip."
 								                    )
-												Enhance image handling and analysis capabilities across platforms

- Updated the vision tool to accept both HTTP/HTTPS URLs and local file paths for image analysis.
- Implemented caching of user-uploaded images in local directories to ensure reliable access for the vision tool, addressing issues with ephemeral URLs.
- Enhanced platform adapters (Discord, Telegram, WhatsApp) to download and cache images, allowing for immediate analysis and enriched message context.
- Added a new method to auto-analyze images attached by users, enriching the conversation with detailed descriptions.
- Improved documentation for image handling processes and updated related functions for clarity and efficiency.

											
										
										
											2026-02-15 16:10:50 -08:00
+								        # -----------------------------------------------------------------
 								        # Auto-analyze images sent by the user
 								        #
 								        # If the user attached image(s), we run the vision tool eagerly so
 								        # the conversation model always receives a text description.  The
 								        # local file path is also included so the model can re-examine the
 								        # image later with a more targeted question via vision_analyze.
 								        #
 								        # We filter to image paths only (by media_type) so that non-image
 								        # attachments (documents, audio, etc.) are not sent to the vision
 								        # tool even when they appear in the same message.
 								        # -----------------------------------------------------------------
 								        message_text = event.text or ""
 								        if event.media_urls:
 								            image_paths = []
 								            for i, path in enumerate(event.media_urls):
 								                # Check media_types if available; otherwise infer from message type
 								                mtype = event.media_types[i] if i < len(event.media_types) else ""
 								                is_image = (
 								                    mtype.startswith("image/")
 								                    or event.message_type == MessageType.PHOTO
 								                )
 								                if is_image:
 								                    image_paths.append(path)
 								            if image_paths:
 								                message_text = await self._enrich_message_with_vision(
 								                    message_text, image_paths
 								                )
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
+								        # -----------------------------------------------------------------
 								        # Auto-transcribe voice/audio messages sent by the user
 								        # -----------------------------------------------------------------
 								        if event.media_urls:
 								            audio_paths = []
 								            for i, path in enumerate(event.media_urls):
 								                mtype = event.media_types[i] if i < len(event.media_types) else ""
 								                is_audio = (
 								                    mtype.startswith("audio/")
 								                    or event.message_type in (MessageType.VOICE, MessageType.AUDIO)
 								                )
 								                if is_audio:
 								                    audio_paths.append(path)
 								            if audio_paths:
 								                message_text = await self._enrich_message_with_transcription(
 								                    message_text, audio_paths
 								                )
-												feat(telegram): add document file processing for PDF, text, and Office files

Download, cache, and enrich document files sent via Telegram. Supports
.pdf, .md, .txt, .docx, .xlsx, .pptx with size validation, unsupported
type rejection, text content injection for .md/.txt, and hourly cache
cleanup.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-27 11:44:57 -05:00
 								        # -----------------------------------------------------------------
 								        # Enrich document messages with context notes for the agent
 								        # -----------------------------------------------------------------
 								        if event.media_urls and event.message_type == MessageType.DOCUMENT:
 								            for i, path in enumerate(event.media_urls):
 								                mtype = event.media_types[i] if i < len(event.media_types) else ""
 								                if not (mtype.startswith("application/") or mtype.startswith("text/")):
 								                    continue
 								                # Extract display filename by stripping the doc_{uuid12}_ prefix
 								                import os as _os
 								                basename = _os.path.basename(path)
 								                # Format: doc_<12hex>_<original_filename>
 								                parts = basename.split("_", 2)
 								                display_name = parts[2] if len(parts) >= 3 else basename
-												fix(security): patch path traversal, size bypass, and prompt injection in document processing

- Sanitize filenames in cache_document_from_bytes to prevent path traversal (strip directory components, null bytes, resolve check)
- Reject documents with None file_size instead of silently allowing download
- Cap text file injection at 100 KB to prevent oversized prompt payloads
- Sanitize display_name in run.py context notes to block prompt injection via filenames
- Add 35 unit tests covering document cache utilities and Telegram document handling

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-27 11:53:46 -05:00
+								                # Sanitize to prevent prompt injection via filenames
 								                import re as _re
 								                display_name = _re.sub(r'[^\w.\- ]', '_', display_name)
-												feat(telegram): add document file processing for PDF, text, and Office files

Download, cache, and enrich document files sent via Telegram. Supports
.pdf, .md, .txt, .docx, .xlsx, .pptx with size validation, unsupported
type rejection, text content injection for .md/.txt, and hourly cache
cleanup.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-27 11:44:57 -05:00
 								                if mtype.startswith("text/"):
 								                    context_note = (
 								                        f"[The user sent a text document: '{display_name}'. "
 								                        f"Its content has been included below. "
 								                        f"The file is also saved at: {path}]"
 								                    )
 								                else:
 								                    context_note = (
 								                        f"[The user sent a document: '{display_name}'. "
 								                        f"The file is saved at: {path}. "
 								                        f"Ask the user what they'd like you to do with it.]"
 								                    )
 								                message_text = f"{context_note}\n\n{message_text}"
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        try:
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
+								            # Emit agent:start hook
 								            hook_ctx = {
 								                "platform": source.platform.value if source.platform else "",
 								                "user_id": source.user_id,
 								                "session_id": session_entry.session_id,
 								                "message": message_text[:500],
 								            }
 								            await self.hooks.emit("agent:start", hook_ctx)
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								            # Run the agent
-												Enhance agent response handling and transcript logging

- Refactored the agent response processing to return a comprehensive result dictionary, including final responses and full message history.
- Improved transcript logging to capture the complete conversation, including tool calls and intermediate reasoning, facilitating session resumption and debugging.
- Added handling for fresh sessions to include tool definitions in the transcript for clarity.
- Implemented logic to filter and timestamp new messages, ensuring accurate logging of user and assistant interactions.

											
										
										
											2026-02-16 00:53:17 -08:00
+								            agent_result = await self._run_agent(
-												Enhance image handling and analysis capabilities across platforms

- Updated the vision tool to accept both HTTP/HTTPS URLs and local file paths for image analysis.
- Implemented caching of user-uploaded images in local directories to ensure reliable access for the vision tool, addressing issues with ephemeral URLs.
- Enhanced platform adapters (Discord, Telegram, WhatsApp) to download and cache images, allowing for immediate analysis and enriched message context.
- Added a new method to auto-analyze images attached by users, enriching the conversation with detailed descriptions.
- Improved documentation for image handling processes and updated related functions for clarity and efficiency.

											
										
										
											2026-02-15 16:10:50 -08:00
+								                message=message_text,
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								                context_prompt=context_prompt,
 								                history=history,
 								                source=source,
-												Implement interrupt handling for agent and CLI input and persistent prompt line at bottom of CLI :)

- Enhanced the AIAgent class to support interrupt requests, allowing for graceful interruption of ongoing tasks and processing of new messages.
- Updated the HermesCLI to manage user input in a persistent manner, enabling real-time interruption of the agent's conversation.
- Introduced a mechanism in the GatewayRunner to handle incoming messages while an agent is running, allowing for immediate response to user commands.
- Improved overall user experience by providing feedback during interruptions and ensuring that pending messages are processed correctly.

											
										
										
											2026-02-03 16:15:49 -08:00
+								                session_id=session_entry.session_id,
 								                session_key=session_key
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								            )
-												Enhance agent response handling and transcript logging

- Refactored the agent response processing to return a comprehensive result dictionary, including final responses and full message history.
- Improved transcript logging to capture the complete conversation, including tool calls and intermediate reasoning, facilitating session resumption and debugging.
- Added handling for fresh sessions to include tool definitions in the transcript for clarity.
- Implemented logic to filter and timestamp new messages, ensuring accurate logging of user and assistant interactions.

											
										
										
											2026-02-16 00:53:17 -08:00
+								            response = agent_result.get("final_response", "")
 								            agent_messages = agent_result.get("messages", [])
-												fix: /reasoning command — add gateway support, fix display, persist settings (#1031)

* fix: /reasoning command output ordering, display, and inline think extraction

Three issues with the /reasoning command:

1. Output interleaving: The command echo used print() while feedback
   used _cprint(), causing them to render out-of-order under
   prompt_toolkit's patch_stdout. Changed echo to use _cprint() so
   all output renders through the same path in correct order.

2. Reasoning display not working: /reasoning show toggled a flag
   but reasoning never appeared for models that embed thinking in
   inline <think> blocks rather than structured API fields. Added
   fallback extraction in _build_assistant_message to capture
   <think> block content as reasoning when no structured reasoning
   fields (reasoning, reasoning_content, reasoning_details) are
   present. This feeds into both the reasoning callback (during
   tool loops) and the post-response reasoning box display.

3. Feedback clarity: Added checkmarks to confirm actions, persisted
   show/hide to config (was session-only before), and aligned the
   status display for readability.

Tests: 7 new tests for inline think block extraction (41 total).

* feat: add /reasoning command to gateway (Telegram/Discord/etc)

The /reasoning command only existed in the CLI — messaging platforms
had no way to view or change reasoning settings. This adds:

1. /reasoning command handler in the gateway:
   - No args: shows current effort level and display state
   - /reasoning <level>: sets reasoning effort (none/low/medium/high/xhigh)
   - /reasoning show|hide: toggles reasoning display in responses
   - All changes saved to config.yaml immediately

2. Reasoning display in gateway responses:
   - When show_reasoning is enabled, prepends a 'Reasoning' block
     with the model's last_reasoning content before the response
   - Collapses long reasoning (>15 lines) to keep messages readable
   - Uses last_reasoning from run_conversation result dict

3. Plumbing:
   - Added _show_reasoning attribute loaded from config at startup
   - Propagated last_reasoning through _run_agent return dict
   - Added /reasoning to help text and known_commands set
   - Uses getattr for _show_reasoning to handle test stubs
											
										
										
											2026-03-12 05:38:19 -07:00
-												fix: sync session_id after mid-run context compression

Critical bug: when the agent's context compressor fires during a tool
loop (_compress_context), it creates a new session_id and writes the
compressed messages there. But the gateway's session_entry still pointed
to the old session_id. On the next message, load_transcript() loaded
the stale pre-compression transcript, causing:

- Context bloat returning every turn
- Repeated compression cycles
- Loss of carefully compressed context

Fix: after run_conversation() returns, check if the agent's session_id
changed (compression split) and sync it back to the session store entry.
Also pass the effective session_id in the result dict so _handle_message
writes transcript entries to the correct session.

This affects ALL gateway adapters, not just webhook.
											
										
										
											2026-03-13 04:14:35 -07:00
+								            # If the agent's session_id changed during compression, update
 								            # session_entry so transcript writes below go to the right session.
 								            if agent_result.get("session_id") and agent_result["session_id"] != session_entry.session_id:
 								                session_entry.session_id = agent_result["session_id"]
-												fix: /reasoning command — add gateway support, fix display, persist settings (#1031)

* fix: /reasoning command output ordering, display, and inline think extraction

Three issues with the /reasoning command:

1. Output interleaving: The command echo used print() while feedback
   used _cprint(), causing them to render out-of-order under
   prompt_toolkit's patch_stdout. Changed echo to use _cprint() so
   all output renders through the same path in correct order.

2. Reasoning display not working: /reasoning show toggled a flag
   but reasoning never appeared for models that embed thinking in
   inline <think> blocks rather than structured API fields. Added
   fallback extraction in _build_assistant_message to capture
   <think> block content as reasoning when no structured reasoning
   fields (reasoning, reasoning_content, reasoning_details) are
   present. This feeds into both the reasoning callback (during
   tool loops) and the post-response reasoning box display.

3. Feedback clarity: Added checkmarks to confirm actions, persisted
   show/hide to config (was session-only before), and aligned the
   status display for readability.

Tests: 7 new tests for inline think block extraction (41 total).

* feat: add /reasoning command to gateway (Telegram/Discord/etc)

The /reasoning command only existed in the CLI — messaging platforms
had no way to view or change reasoning settings. This adds:

1. /reasoning command handler in the gateway:
   - No args: shows current effort level and display state
   - /reasoning <level>: sets reasoning effort (none/low/medium/high/xhigh)
   - /reasoning show|hide: toggles reasoning display in responses
   - All changes saved to config.yaml immediately

2. Reasoning display in gateway responses:
   - When show_reasoning is enabled, prepends a 'Reasoning' block
     with the model's last_reasoning content before the response
   - Collapses long reasoning (>15 lines) to keep messages readable
   - Uses last_reasoning from run_conversation result dict

3. Plumbing:
   - Added _show_reasoning attribute loaded from config at startup
   - Propagated last_reasoning through _run_agent return dict
   - Added /reasoning to help text and known_commands set
   - Uses getattr for _show_reasoning to handle test stubs
											
										
										
											2026-03-12 05:38:19 -07:00
+								            # Prepend reasoning/thinking if display is enabled
 								            if getattr(self, "_show_reasoning", False) and response:
 								                last_reasoning = agent_result.get("last_reasoning")
 								                if last_reasoning:
 								                    # Collapse long reasoning to keep messages readable
 								                    lines = last_reasoning.strip().splitlines()
 								                    if len(lines) > 15:
 								                        display_reasoning = "\n".join(lines[:15])
 								                        display_reasoning += f"\n_... ({len(lines) - 15} more lines)_"
 								                    else:
 								                        display_reasoning = last_reasoning.strip()
 								                    response = f"💭 **Reasoning:**\n```\n{display_reasoning}\n```\n\n{response}"
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
+								            # Emit agent:end hook
 								            await self.hooks.emit("agent:end", {
 								                **hook_ctx,
 								                "response": (response or "")[:500],
 								            })
-												Add background process management with process tool, wait, PTY, and stdin support

New process registry and tool for managing long-running background processes
across all terminal backends (local, Docker, Singularity, Modal, SSH).

Process Registry (tools/process_registry.py):
- ProcessSession tracking with rolling 200KB output buffer
- spawn_local() with optional PTY via ptyprocess for interactive CLIs
- spawn_via_env() for non-local backends (runs inside sandbox, never on host)
- Background reader threads per process (Popen stdout or PTY)
- wait() with timeout clamping, interrupt support, and transparent limit reporting
- JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery
- Module-level singleton shared across agent loop, gateway, and RL

Process Tool (model_tools.py):
- 7 actions: list, poll, log, wait, kill, write, submit
- Paired with terminal in all toolsets (CLI, messaging, RL)
- Timeout clamping with transparent notes in response

Terminal Tool Updates (tools/terminal_tool.py):
- Replaced nohup background mode with registry spawn (returns session_id)
- Added workdir parameter for per-command working directory
- Added check_interval parameter for gateway auto-check watchers
- Added pty parameter for interactive CLI tools (Codex, Claude Code)
- Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs
- Cleanup thread now respects active background processes (won't reap sandbox)

Gateway Integration (gateway/run.py, session.py, config.py):
- Session reset protection: sessions with active processes exempt from reset
- Default idle timeout increased from 2 hours to 24 hours
- from_dict fallback aligned to match (was 120, now 1440)
- session_key env var propagated to process registry for session mapping
- Crash recovery on gateway startup via checkpoint probe
- check_interval watcher: asyncio task polls process, delivers updates to platform

RL Safety (environments/):
- tool_context.py cleanup() kills background processes on episode end
- hermes_base_env.py warns when enabled_toolsets is None (loads all tools)
- Process tool safe in RL via wait() blocking the agent loop

Also:
- Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all])
- Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP
- Updated AGENTS.md with process management docs and project structure
- Updated README.md terminal section with process management overview

											
										
										
											2026-02-17 02:51:31 -08:00
+								            # Check for pending process watchers (check_interval on background processes)
 								            try:
 								                from tools.process_registry import process_registry
 								                while process_registry.pending_watchers:
 								                    watcher = process_registry.pending_watchers.pop(0)
 								                    asyncio.create_task(self._run_process_watcher(watcher))
 								            except Exception as e:
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								                logger.error("Process watcher setup error: %s", e)
-												Add background process management with process tool, wait, PTY, and stdin support

New process registry and tool for managing long-running background processes
across all terminal backends (local, Docker, Singularity, Modal, SSH).

Process Registry (tools/process_registry.py):
- ProcessSession tracking with rolling 200KB output buffer
- spawn_local() with optional PTY via ptyprocess for interactive CLIs
- spawn_via_env() for non-local backends (runs inside sandbox, never on host)
- Background reader threads per process (Popen stdout or PTY)
- wait() with timeout clamping, interrupt support, and transparent limit reporting
- JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery
- Module-level singleton shared across agent loop, gateway, and RL

Process Tool (model_tools.py):
- 7 actions: list, poll, log, wait, kill, write, submit
- Paired with terminal in all toolsets (CLI, messaging, RL)
- Timeout clamping with transparent notes in response

Terminal Tool Updates (tools/terminal_tool.py):
- Replaced nohup background mode with registry spawn (returns session_id)
- Added workdir parameter for per-command working directory
- Added check_interval parameter for gateway auto-check watchers
- Added pty parameter for interactive CLI tools (Codex, Claude Code)
- Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs
- Cleanup thread now respects active background processes (won't reap sandbox)

Gateway Integration (gateway/run.py, session.py, config.py):
- Session reset protection: sessions with active processes exempt from reset
- Default idle timeout increased from 2 hours to 24 hours
- from_dict fallback aligned to match (was 120, now 1440)
- session_key env var propagated to process registry for session mapping
- Crash recovery on gateway startup via checkpoint probe
- check_interval watcher: asyncio task polls process, delivers updates to platform

RL Safety (environments/):
- tool_context.py cleanup() kills background processes on episode end
- hermes_base_env.py warns when enabled_toolsets is None (loads all tools)
- Process tool safe in RL via wait() blocking the agent loop

Also:
- Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all])
- Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP
- Updated AGENTS.md with process management docs and project structure
- Updated README.md terminal section with process management overview

											
										
										
											2026-02-17 02:51:31 -08:00
-												Add Text-to-Speech (TTS) functionality with multiple providers

Add tool previews

Add AGENTS and SOUL.md support

Add Exec Approval

											
										
										
											2026-02-12 10:05:08 -08:00
+								            # Check if the agent encountered a dangerous command needing approval
 								            try:
-												refactor: deduplicate toolsets, unify async bridging, fix approval race condition, harden security

- Replace 4 copy-pasted messaging platform toolsets with shared _HERMES_CORE_TOOLS list
- Consolidate 5 ad-hoc async-bridging patterns into single _run_async() in model_tools.py
  - Removes deprecated get_event_loop()/set_event_loop() calls
  - Makes all tool handlers self-protecting regardless of caller's event loop state
  - RL handler refactored from if/elif chain to dispatch dict
- Fix exec approval race condition: replace module-level globals with thread-safe
  per-session tools/approval.py (submit_pending, pop_pending, approve_session, is_approved)
  - Session A approving "rm" no longer approves it for all other sessions
- Fix config deep merge: user overriding tts.elevenlabs.voice_id no longer clobbers
  tts.elevenlabs.model_id; migration detection now recurses to arbitrary depth
- Gateway default-deny: unauthenticated users denied unless GATEWAY_ALLOW_ALL_USERS=true
- Add 10 dangerous command patterns: rm --recursive, bash -c, python -e, curl|bash,
  xargs rm, find -delete
- Sanitize gateway error messages: users see generic message, full traceback goes to logs

											
										
										
											2026-02-21 18:28:49 -08:00
+								                from tools.approval import pop_pending
 								                pending = pop_pending(session_key)
 								                if pending:
 								                    self._pending_approvals[session_key] = pending
-												refactor: enhance error handling with structured logging across multiple modules

- Updated various modules including cli.py, run_agent.py, gateway, and tools to replace silent exception handling with structured logging.
- Improved error messages to provide more context, aiding in debugging and monitoring.
- Ensured consistent logging practices throughout the codebase, enhancing traceability and maintainability.

											
										
										
											2026-02-21 03:32:11 -08:00
+								            except Exception as e:
 								                logger.debug("Failed to check pending approvals: %s", e)
-												Add Text-to-Speech (TTS) functionality with multiple providers

Add tool previews

Add AGENTS and SOUL.md support

Add Exec Approval

											
										
										
											2026-02-12 10:05:08 -08:00
-												Enhance agent response handling and transcript logging

- Refactored the agent response processing to return a comprehensive result dictionary, including final responses and full message history.
- Improved transcript logging to capture the complete conversation, including tool calls and intermediate reasoning, facilitating session resumption and debugging.
- Added handling for fresh sessions to include tool definitions in the transcript for clarity.
- Implemented logic to filter and timestamp new messages, ensuring accurate logging of user and assistant interactions.

											
										
										
											2026-02-16 00:53:17 -08:00
+								            # Save the full conversation to the transcript, including tool calls.
 								            # This preserves the complete agent loop (tool_calls, tool results,
 								            # intermediate reasoning) so sessions can be resumed with full context
 								            # and transcripts are useful for debugging and training data.
 								            ts = datetime.now().isoformat()
 								            # If this is a fresh session (no history), write the full tool
 								            # definitions as the first entry so the transcript is self-describing
 								            # -- the same list of dicts sent as tools=[...] in the API request.
 								            if not history:
-												Update tool definitions handling in GatewayRunner

- Modified the retrieval of tool definitions to use the agent result's "tools" key, ensuring accurate logging in the transcript.
- Enhanced the response structure to include tools in the final output, improving the clarity of tool usage in session interactions.

											
										
										
											2026-02-16 00:55:18 -08:00
+								                tool_defs = agent_result.get("tools", [])
-												Enhance agent response handling and transcript logging

- Refactored the agent response processing to return a comprehensive result dictionary, including final responses and full message history.
- Improved transcript logging to capture the complete conversation, including tool calls and intermediate reasoning, facilitating session resumption and debugging.
- Added handling for fresh sessions to include tool definitions in the transcript for clarity.
- Implemented logic to filter and timestamp new messages, ensuring accurate logging of user and assistant interactions.

											
										
										
											2026-02-16 00:53:17 -08:00
+								                self.session_store.append_to_transcript(
 								                    session_entry.session_id,
 								                    {
 								                        "role": "session_meta",
 								                        "tools": tool_defs or [],
 								                        "model": os.getenv("HERMES_MODEL", ""),
 								                        "platform": source.platform.value if source.platform else "",
 								                        "timestamp": ts,
 								                    }
 								                )
-												fix(gateway): use filtered history length for transcript message extraction

The transcript extraction used len(history) to find new messages, but
history includes session_meta entries that are stripped before passing
to the agent. This mismatch caused 1 message to be lost from the
transcript on every turn after the first, because the slice offset
was too high. Use the filtered history length (history_offset) returned
by _run_agent instead.

Also changed the else branch from returning all agent_messages to
returning an empty list, so compressed/shorter agent output does not
duplicate the entire history into the transcript.

											
										
										
											2026-03-04 21:34:40 +03:00
+								            # Find only the NEW messages from this turn (skip history we loaded).
 								            # Use the filtered history length (history_offset) that was actually
 								            # passed to the agent, not len(history) which includes session_meta
 								            # entries that were stripped before the agent saw them.
 								            history_len = agent_result.get("history_offset", len(history))
 								            new_messages = agent_messages[history_len:] if len(agent_messages) > history_len else []
-												Enhance agent response handling and transcript logging

- Refactored the agent response processing to return a comprehensive result dictionary, including final responses and full message history.
- Improved transcript logging to capture the complete conversation, including tool calls and intermediate reasoning, facilitating session resumption and debugging.
- Added handling for fresh sessions to include tool definitions in the transcript for clarity.
- Implemented logic to filter and timestamp new messages, ensuring accurate logging of user and assistant interactions.

											
										
										
											2026-02-16 00:53:17 -08:00
 								            # If no new messages found (edge case), fall back to simple user/assistant
 								            if not new_messages:
 								                self.session_store.append_to_transcript(
 								                    session_entry.session_id,
 								                    {"role": "user", "content": message_text, "timestamp": ts}
 								                )
 								                if response:
 								                    self.session_store.append_to_transcript(
 								                        session_entry.session_id,
 								                        {"role": "assistant", "content": response, "timestamp": ts}
 								                    )
 								            else:
-												fix: eliminate 3x SQLite message duplication in gateway sessions (#860)

Three separate code paths all wrote to the same SQLite state.db with
no deduplication, inflating session transcripts by 3-4x:

1. _log_msg_to_db() — wrote each message individually after append
2. _flush_messages_to_session_db() — re-wrote ALL new messages at
   every _persist_session() call (~18 exit points), with no tracking
   of what was already written
3. gateway append_to_transcript() — wrote everything a third time
   after the agent returned

Since load_transcript() prefers SQLite over JSONL, the inflated data
was loaded on every session resume, causing proportional token waste.

Fix:
- Remove _log_msg_to_db() and all 16 call sites (redundant with flush)
- Add _last_flushed_db_idx tracking in _flush_messages_to_session_db()
  so repeated _persist_session() calls only write truly new messages
- Reset flush cursor on compression (new session ID)
- Add skip_db parameter to SessionStore.append_to_transcript() so the
  gateway skips SQLite writes when the agent already persisted them
- Gateway now passes skip_db=True for agent-managed messages, still
  writes to JSONL as backup

Verified: a 12-message CLI session with tool calls produces exactly
12 SQLite rows with zero duplicates (previously would be 36-48).

Tests: 9 new tests covering flush deduplication, skip_db behavior,
compression reset, and initialization. Full suite passes (2869 tests).

											
										
										
											2026-03-10 15:22:44 -07:00
+								                # The agent already persisted these messages to SQLite via
 								                # _flush_messages_to_session_db(), so skip the DB write here
 								                # to prevent the duplicate-write bug (#860).  We still write
 								                # to JSONL for backward compatibility and as a backup.
 								                agent_persisted = self._session_db is not None
-												Enhance agent response handling and transcript logging

- Refactored the agent response processing to return a comprehensive result dictionary, including final responses and full message history.
- Improved transcript logging to capture the complete conversation, including tool calls and intermediate reasoning, facilitating session resumption and debugging.
- Added handling for fresh sessions to include tool definitions in the transcript for clarity.
- Implemented logic to filter and timestamp new messages, ensuring accurate logging of user and assistant interactions.

											
										
										
											2026-02-16 00:53:17 -08:00
+								                for msg in new_messages:
 								                    # Skip system messages (they're rebuilt each run)
 								                    if msg.get("role") == "system":
 								                        continue
 								                    # Add timestamp to each message for debugging
 								                    entry = {**msg, "timestamp": ts}
 								                    self.session_store.append_to_transcript(
-												fix: eliminate 3x SQLite message duplication in gateway sessions (#860)

Three separate code paths all wrote to the same SQLite state.db with
no deduplication, inflating session transcripts by 3-4x:

1. _log_msg_to_db() — wrote each message individually after append
2. _flush_messages_to_session_db() — re-wrote ALL new messages at
   every _persist_session() call (~18 exit points), with no tracking
   of what was already written
3. gateway append_to_transcript() — wrote everything a third time
   after the agent returned

Since load_transcript() prefers SQLite over JSONL, the inflated data
was loaded on every session resume, causing proportional token waste.

Fix:
- Remove _log_msg_to_db() and all 16 call sites (redundant with flush)
- Add _last_flushed_db_idx tracking in _flush_messages_to_session_db()
  so repeated _persist_session() calls only write truly new messages
- Reset flush cursor on compression (new session ID)
- Add skip_db parameter to SessionStore.append_to_transcript() so the
  gateway skips SQLite writes when the agent already persisted them
- Gateway now passes skip_db=True for agent-managed messages, still
  writes to JSONL as backup

Verified: a 12-message CLI session with tool calls produces exactly
12 SQLite rows with zero duplicates (previously would be 36-48).

Tests: 9 new tests covering flush deduplication, skip_db behavior,
compression reset, and initialization. Full suite passes (2869 tests).

											
										
										
											2026-03-10 15:22:44 -07:00
+								                        session_entry.session_id, entry,
 								                        skip_db=agent_persisted,
-												Enhance agent response handling and transcript logging

- Refactored the agent response processing to return a comprehensive result dictionary, including final responses and full message history.
- Improved transcript logging to capture the complete conversation, including tool calls and intermediate reasoning, facilitating session resumption and debugging.
- Added handling for fresh sessions to include tool definitions in the transcript for clarity.
- Implemented logic to filter and timestamp new messages, ensuring accurate logging of user and assistant interactions.

											
										
										
											2026-02-16 00:53:17 -08:00
+								                    )
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
-												fix: use actual API token counts for gateway compression pre-check

Root cause of aggressive gateway compression vs CLI:
- CLI: single AIAgent persists across conversation, uses real API-reported
  prompt_tokens for compression decisions — accurate
- Gateway: each message creates fresh AIAgent, token count discarded after,
  next message pre-check falls back to rough str(msg)//4 estimate which
  overestimates 30-50% on tool-heavy conversations

Fix:
- Add last_prompt_tokens field to SessionEntry — stores the actual
  API-reported prompt token count from the most recent agent turn
- After run_conversation(), extract context_compressor.last_prompt_tokens
  and persist it via update_session()
- Gateway pre-check now uses stored actual tokens when available (exact
  same accuracy as CLI), falling back to rough estimate with 1.4x safety
  factor only for the first message of a session

This makes gateway compression behave identically to CLI compression
for all turns after the first. Reported by TigerHix.

											
										
										
											2026-03-10 23:28:18 -07:00
+								            # Update session with actual prompt token count from the agent
 								            self.session_store.update_session(
 								                session_entry.session_key,
 								                last_prompt_tokens=agent_result.get("last_prompt_tokens", 0),
 								            )
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
 								            return response
 								        except Exception as e:
-												refactor: deduplicate toolsets, unify async bridging, fix approval race condition, harden security

- Replace 4 copy-pasted messaging platform toolsets with shared _HERMES_CORE_TOOLS list
- Consolidate 5 ad-hoc async-bridging patterns into single _run_async() in model_tools.py
  - Removes deprecated get_event_loop()/set_event_loop() calls
  - Makes all tool handlers self-protecting regardless of caller's event loop state
  - RL handler refactored from if/elif chain to dispatch dict
- Fix exec approval race condition: replace module-level globals with thread-safe
  per-session tools/approval.py (submit_pending, pop_pending, approve_session, is_approved)
  - Session A approving "rm" no longer approves it for all other sessions
- Fix config deep merge: user overriding tts.elevenlabs.voice_id no longer clobbers
  tts.elevenlabs.model_id; migration detection now recurses to arbitrary depth
- Gateway default-deny: unauthenticated users denied unless GATEWAY_ALLOW_ALL_USERS=true
- Add 10 dangerous command patterns: rm --recursive, bash -c, python -e, curl|bash,
  xargs rm, find -delete
- Sanitize gateway error messages: users see generic message, full traceback goes to logs

											
										
										
											2026-02-21 18:28:49 -08:00
+								            logger.exception("Agent error in session %s", session_key)
 								            return (
 								                "Sorry, I encountered an unexpected error. "
 								                "The details have been logged for debugging. "
 								                "Try again or use /reset to start a fresh session."
 								            )
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        finally:
 								            # Clear session env
 								            self._clear_session_env()
 								    async def _handle_reset_command(self, event: MessageEvent) -> str:
 								        """Handle /new or /reset command."""
 								        source = event.source
 								        # Get existing session key
-												fix(gateway): persist transcript changes in /retry, /undo and fix /reset

/retry and /undo set session_entry.conversation_history which does not
exist on SessionEntry. The truncated history was never written to disk,
so the next message reload picked up the full unmodified transcript.

Added SessionStore.rewrite_transcript() that persists changes to both
the JSONL file and SQLite database, and updated both commands to use it.

/reset accessed self.session_store._sessions which does not exist on
SessionStore (the correct attribute is _entries). Also replaced the
hand-coded session key with _generate_session_key() to fix WhatsApp DM
sessions using the wrong key format.

Closes #210

											
										
										
											2026-03-01 01:12:58 +03:00
+								        session_key = self.session_store._generate_session_key(source)
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
-												feat(gateway): proactive async memory flush on session expiry

Previously, when a session expired (idle/daily reset), the memory flush
ran synchronously inside get_or_create_session — blocking the user's
message for 10-60s while an LLM call saved memories.

Now a background watcher task (_session_expiry_watcher) runs every 5 min,
detects expired sessions, and flushes memories proactively in a thread
pool.  By the time the user sends their next message, memories are
already saved and the response is immediate.

Changes:
- Add _is_session_expired(entry) to SessionStore — works from entry
  alone without needing a SessionSource
- Add _pre_flushed_sessions set to track already-flushed sessions
- Remove sync _on_auto_reset callback from get_or_create_session
- Refactor flush into _flush_memories_for_session (sync worker) +
  _async_flush_memories (thread pool wrapper)
- Add _session_expiry_watcher background task, started in start()
- Simplify /reset command to use shared fire-and-forget flush
- Add 10 tests for expiry detection, callback removal, tracking

											
										
										
											2026-03-07 11:27:50 -08:00
+								        # Flush memories in the background (fire-and-forget) so the user
 								        # gets the "Session reset!" response immediately.
-												feat: introduce skills management features in AIAgent and CLI

- Added skills configuration options in cli-config.yaml.example, including a nudge interval for skill creation reminders.
- Implemented skills guidance in AIAgent to prompt users to save reusable workflows after complex tasks.
- Enhanced skills indexing in the prompt builder to include descriptions from SKILL.md files for better context.
- Updated the agent's behavior to periodically remind users about potential skills during tool-calling iterations.

											
										
										
											2026-02-22 13:28:13 -08:00
+								        try:
-												fix: /retry, /undo, /compress, and /reset gateway commands (#210)

- /retry, /undo, /compress were setting a non-existent conversation_history
  attribute on SessionEntry (a @dataclass with no such field). The dangling
  attribute was silently created but never read — transcript was reloaded
  from DB on next interaction, making all three commands no-ops.

- /reset accessed self.session_store._sessions (non-existent) instead of
  self.session_store._entries, causing AttributeError caught by a bare
  except, silently skipping the pre-reset memory flush.

Fix:
- Add SessionDB.clear_messages() to delete messages and reset counters
- Add SessionStore.rewrite_transcript() to atomically replace transcript
  in both SQLite and legacy JSONL storage
- Replace all dangling attr assignments with rewrite_transcript() calls
- Fix _sessions → _entries in /reset handler

Closes #210

											
										
										
											2026-03-02 00:14:49 -08:00
+								            old_entry = self.session_store._entries.get(session_key)
-												feat: introduce skills management features in AIAgent and CLI

- Added skills configuration options in cli-config.yaml.example, including a nudge interval for skill creation reminders.
- Implemented skills guidance in AIAgent to prompt users to save reusable workflows after complex tasks.
- Enhanced skills indexing in the prompt builder to include descriptions from SKILL.md files for better context.
- Updated the agent's behavior to periodically remind users about potential skills during tool-calling iterations.

											
										
										
											2026-02-22 13:28:13 -08:00
+								            if old_entry:
-												feat(gateway): proactive async memory flush on session expiry

Previously, when a session expired (idle/daily reset), the memory flush
ran synchronously inside get_or_create_session — blocking the user's
message for 10-60s while an LLM call saved memories.

Now a background watcher task (_session_expiry_watcher) runs every 5 min,
detects expired sessions, and flushes memories proactively in a thread
pool.  By the time the user sends their next message, memories are
already saved and the response is immediate.

Changes:
- Add _is_session_expired(entry) to SessionStore — works from entry
  alone without needing a SessionSource
- Add _pre_flushed_sessions set to track already-flushed sessions
- Remove sync _on_auto_reset callback from get_or_create_session
- Refactor flush into _flush_memories_for_session (sync worker) +
  _async_flush_memories (thread pool wrapper)
- Add _session_expiry_watcher background task, started in start()
- Simplify /reset command to use shared fire-and-forget flush
- Add 10 tests for expiry detection, callback removal, tracking

											
										
										
											2026-03-07 11:27:50 -08:00
+								                asyncio.create_task(self._async_flush_memories(old_entry.session_id))
-												feat: introduce skills management features in AIAgent and CLI

- Added skills configuration options in cli-config.yaml.example, including a nudge interval for skill creation reminders.
- Implemented skills guidance in AIAgent to prompt users to save reusable workflows after complex tasks.
- Enhanced skills indexing in the prompt builder to include descriptions from SKILL.md files for better context.
- Updated the agent's behavior to periodically remind users about potential skills during tool-calling iterations.

											
										
										
											2026-02-22 13:28:13 -08:00
+								        except Exception as e:
 								            logger.debug("Gateway memory flush on reset failed: %s", e)
-												fix(gateway): persist Honcho managers across session requests

											
										
										
											2026-03-10 02:06:17 -07:00
 								        self._shutdown_gateway_honcho(session_key)
-												feat: introduce skills management features in AIAgent and CLI

- Added skills configuration options in cli-config.yaml.example, including a nudge interval for skill creation reminders.
- Implemented skills guidance in AIAgent to prompt users to save reusable workflows after complex tasks.
- Enhanced skills indexing in the prompt builder to include descriptions from SKILL.md files for better context.
- Updated the agent's behavior to periodically remind users about potential skills during tool-calling iterations.

											
										
										
											2026-02-22 13:28:13 -08:00
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        # Reset the session
 								        new_entry = self.session_store.reset_session(session_key)
-												Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.

											
										
										
											2026-02-15 21:38:59 -08:00
+								        # Emit session:reset hook
 								        await self.hooks.emit("session:reset", {
 								            "platform": source.platform.value if source.platform else "",
 								            "user_id": source.user_id,
 								            "session_key": session_key,
 								        })
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        if new_entry:
 								            return "✨ Session reset! I've started fresh with no memory of our previous conversation."
 								        else:
 								            # No existing session, just create one
 								            self.session_store.get_or_create_session(source, force_new=True)
 								            return "✨ New session started!"
 								    async def _handle_status_command(self, event: MessageEvent) -> str:
 								        """Handle /status command."""
 								        source = event.source
 								        session_entry = self.session_store.get_or_create_session(source)
 								        connected_platforms = [p.value for p in self.adapters.keys()]
-												Implement interrupt handling for agent and CLI input and persistent prompt line at bottom of CLI :)

- Enhanced the AIAgent class to support interrupt requests, allowing for graceful interruption of ongoing tasks and processing of new messages.
- Updated the HermesCLI to manage user input in a persistent manner, enabling real-time interruption of the agent's conversation.
- Introduced a mechanism in the GatewayRunner to handle incoming messages while an agent is running, allowing for immediate response to user commands.
- Improved overall user experience by providing feedback during interruptions and ensuring that pending messages are processed correctly.

											
										
										
											2026-02-03 16:15:49 -08:00
+								        # Check if there's an active agent
 								        session_key = session_entry.session_key
 								        is_running = session_key in self._running_agents
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								        lines = [
 								            "📊 **Hermes Gateway Status**",
 								            "",
 								            f"**Session ID:** `{session_entry.session_id[:12]}...`",
 								            f"**Created:** {session_entry.created_at.strftime('%Y-%m-%d %H:%M')}",
 								            f"**Last Activity:** {session_entry.updated_at.strftime('%Y-%m-%d %H:%M')}",
 								            f"**Tokens:** {session_entry.total_tokens:,}",
-												Implement interrupt handling for agent and CLI input and persistent prompt line at bottom of CLI :)

- Enhanced the AIAgent class to support interrupt requests, allowing for graceful interruption of ongoing tasks and processing of new messages.
- Updated the HermesCLI to manage user input in a persistent manner, enabling real-time interruption of the agent's conversation.
- Introduced a mechanism in the GatewayRunner to handle incoming messages while an agent is running, allowing for immediate response to user commands.
- Improved overall user experience by providing feedback during interruptions and ensuring that pending messages are processed correctly.

											
										
										
											2026-02-03 16:15:49 -08:00
+								            f"**Agent Running:** {'Yes ⚡' if is_running else 'No'}",
-												Enhance CLI with multi-platform messaging integration and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.

											
										
										
											2026-02-02 19:01:51 -08:00
+								            "",
 								            f"**Connected Platforms:** {', '.join(connected_platforms)}",
 								        ]
 								        return "\n".join(lines)
-												Implement interrupt handling for agent and CLI input and persistent prompt line at bottom of CLI :)

- Enhanced the AIAgent class to support interrupt requests, allowing for graceful interruption of ongoing tasks and processing of new messages.
- Updated the HermesCLI to manage user input in a persistent manner, enabling real-time interruption of the agent's conversation.
- Introduced a mechanism in the GatewayRunner to handle incoming messages while an agent is running, allowing for immediate response to user commands.
- Improved overall user experience by providing feedback during interruptions and ensuring that pending messages are processed correctly.

											
										
										
											2026-02-03 16:15:49 -08:00
+								    async def _handle_stop_command(self, event: MessageEvent) -> str:
 								        """Handle /stop command - interrupt a running agent."""
 								        source = event.source
 								        session_entry = self.session_store.get_or_create_session(source)
 								        session_key = session_entry.session_key
 								        if session_key in self._running_agents:
 								            agent = self._running_agents[session_key]
 								            agent.interrupt()
 								            return "⚡ Stopping the current task... The agent will finish its current step and respond."
 								        else:
 								            return "No active task to stop."
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
+								    async def _handle_help_command(self, event: MessageEvent) -> str:
 								        """Handle /help command - list available commands."""
-												feat(skills): implement dynamic skill slash commands for CLI and gateway

											
										
										
											2026-02-28 11:18:50 -08:00
+								        lines = [
 								            "📖 **Hermes Commands**\n",
 								            "`/new` — Start a new conversation",
 								            "`/reset` — Reset conversation history",
 								            "`/status` — Show session info",
 								            "`/stop` — Interrupt the running agent",
-												fix: resolve 'auto' provider in /model display + update gateway handler

- normalize_provider('auto') now returns 'openrouter' (the default)
  so /model shows the curated model list instead of nothing
- CLI /model display uses normalize_provider before looking up labels
- Gateway /model handler now uses the same validation logic as CLI:
  live API probe, provider:model syntax, curated model list display

											
										
										
											2026-03-08 05:54:52 -07:00
+								            "`/model [provider:model]` — Show/change model (or switch provider)",
-												feat: /provider command + fix gateway bugs + harden parse_model_input

/provider command (CLI + gateway):
  Shows all providers with auth status (✓/✗), aliases, and active marker.
  Users can now discover what provider names work with provider:model syntax.

Gateway bugs fixed:
  - Config was saved even when validation.persist=False (told user 'session
    only' but actually persisted the unvalidated model)
  - HERMES_INFERENCE_PROVIDER env var not set on provider switch, causing
    the switch to be silently overridden if that env var was already set

parse_model_input hardened:
  - Colon only treated as provider delimiter if left side is a recognized
    provider name or alias. 'anthropic/claude-3.5-sonnet:beta' now passes
    through as a model name instead of trying provider='anthropic/claude-3.5-sonnet'.
  - HTTP URLs, random colons no longer misinterpreted.

56 tests passing across model validation, CLI commands, and integration.

											
										
										
											2026-03-08 06:09:36 -07:00
+								            "`/provider` — Show available providers and auth status",
-												feat(skills): implement dynamic skill slash commands for CLI and gateway

											
										
										
											2026-02-28 11:18:50 -08:00
+								            "`/personality [name]` — Set a personality",
 								            "`/retry` — Retry your last message",
 								            "`/undo` — Remove the last exchange",
 								            "`/sethome` — Set this chat as the home channel",
-												feat(gateway): add /compress and /usage commands for conversation management

Implemented the /compress command to allow users to manually compress conversation context, ensuring sufficient history is available before execution. The /usage command was also added to display token usage statistics for the current session, including prompt and completion tokens. Updated command documentation to reflect these new features.

											
										
										
											2026-03-01 00:25:44 -08:00
+								            "`/compress` — Compress conversation context",
-												fix: harden session title system + add /title to gateway

- Empty string titles normalized to None (prevents uncaught IntegrityError
  when two sessions both get empty-string titles via the unique index)
- Escape SQL LIKE wildcards (%, _) in resolve_session_by_title and
  get_next_title_in_lineage to prevent false matches on titles like
  'test_project' matching 'testXproject #2'
- Optimize list_sessions_rich from N+2 queries to a single query with
  correlated subqueries (preview + last_active computed in SQL)
- Add /title slash command to gateway (Telegram, Discord, Slack, WhatsApp)
  with set and show modes, uniqueness conflict handling
- Add /title to gateway /help text and _known_commands
- 12 new tests: empty string normalization, multi-empty-title safety,
  SQL wildcard edge cases, gateway /title set/show/conflict/cross-platform

											
										
										
											2026-03-08 15:48:09 -07:00
+								            "`/title [name]` — Set or show the session title",
-												feat: add /resume command to gateway for switching to named sessions

Messaging users can now switch back to previously-named sessions:
- /resume My Project  — resolves the title (with auto-lineage) and
  restores that session's conversation history
- /resume (no args)   — lists recent titled sessions to choose from

Adds SessionStore.switch_session() which ends the current session and
points the session entry at the target session ID so the old transcript
is loaded on the next message. Running agents are cleared on switch.

Completes the session naming feature from PR #720 for gateway users.

8 new tests covering: name resolution, lineage auto-latest, already-on-
session check, nonexistent names, agent cleanup, no-DB fallback, and
listing titled sessions.

											
										
										
											2026-03-08 17:09:00 -07:00
+								            "`/resume [name]` — Resume a previously-named session",
-												feat(gateway): add /compress and /usage commands for conversation management

Implemented the /compress command to allow users to manually compress conversation context, ensuring sufficient history is available before execution. The /usage command was also added to display token usage statistics for the current session, including prompt and completion tokens. Updated command documentation to reflect these new features.

											
										
										
											2026-03-01 00:25:44 -08:00
+								            "`/usage` — Show token usage for this session",
-												feat: add /insights command with usage analytics and cost estimation

Inspired by Claude Code's /insights, adapted for Hermes Agent's multi-platform
architecture. Analyzes session history from state.db to produce comprehensive
usage insights.

Features:
- Overview stats: sessions, messages, tokens, estimated cost, active time
- Model breakdown: per-model sessions, tokens, and cost estimation
- Platform breakdown: CLI vs Telegram vs Discord etc. (unique to Hermes)
- Tool usage ranking: most-used tools with percentages
- Activity patterns: day-of-week chart, peak hours, streaks
- Notable sessions: longest, most messages, most tokens, most tool calls
- Cost estimation: real pricing data for 25+ models (OpenAI, Anthropic,
  DeepSeek, Google, Meta) with fuzzy model name matching
- Configurable time window: --days flag (default 30)
- Source filtering: --source flag to filter by platform

Three entry points:
- /insights slash command in CLI (supports --days and --source flags)
- /insights slash command in gateway (compact markdown format)
- hermes insights CLI subcommand (standalone)

Includes 56 tests covering pricing helpers, format helpers, empty DB,
populated DB with multi-platform data, filtering, formatting, and edge cases.

											
										
										
											2026-03-06 14:04:59 -08:00
+								            "`/insights [days]` — Show usage insights and analytics",
-												fix: /reasoning command — add gateway support, fix display, persist settings (#1031)

* fix: /reasoning command output ordering, display, and inline think extraction

Three issues with the /reasoning command:

1. Output interleaving: The command echo used print() while feedback
   used _cprint(), causing them to render out-of-order under
   prompt_toolkit's patch_stdout. Changed echo to use _cprint() so
   all output renders through the same path in correct order.

2. Reasoning display not working: /reasoning show toggled a flag
   but reasoning never appeared for models that embed thinking in
   inline <think> blocks rather than structured API fields. Added
   fallback extraction in _build_assistant_message to capture
   <think> block content as reasoning when no structured reasoning
   fields (reasoning, reasoning_content, reasoning_details) are
   present. This feeds into both the reasoning callback (during
   tool loops) and the post-response reasoning box display.

3. Feedback clarity: Added checkmarks to confirm actions, persisted
   show/hide to config (was session-only before), and aligned the
   status display for readability.

Tests: 7 new tests for inline think block extraction (41 total).

* feat: add /reasoning command to gateway (Telegram/Discord/etc)

The /reasoning command only existed in the CLI — messaging platforms
had no way to view or change reasoning settings. This adds:

1. /reasoning command handler in the gateway:
   - No args: shows current effort level and display state
   - /reasoning <level>: sets reasoning effort (none/low/medium/high/xhigh)
   - /reasoning show|hide: toggles reasoning display in responses
   - All changes saved to config.yaml immediately

2. Reasoning display in gateway responses:
   - When show_reasoning is enabled, prepends a 'Reasoning' block
     with the model's last_reasoning content before the response
   - Collapses long reasoning (>15 lines) to keep messages readable
   - Uses last_reasoning from run_conversation result dict

3. Plumbing:
   - Added _show_reasoning attribute loaded from config at startup
   - Propagated last_reasoning through _run_agent return dict
   - Added /reasoning to help text and known_commands set
   - Uses getattr for _show_reasoning to handle test stubs
											
										
										
											2026-03-12 05:38:19 -07:00
+								            "`/reasoning [level|show|hide]` — Set reasoning effort or toggle display",
-												feat: filesystem checkpoints and /rollback command

Automatic filesystem snapshots before destructive file operations,
with user-facing rollback.  Inspired by PR #559 (by @alireza78a).

Architecture:
- Shadow git repos at ~/.hermes/checkpoints/{hash}/ via GIT_DIR
- CheckpointManager: take/list/restore, turn-scoped dedup, pruning
- Transparent — the LLM never sees it, no tool schema, no tokens
- Once per turn — only first write_file/patch triggers a snapshot

Integration:
- Config: checkpoints.enabled + checkpoints.max_snapshots
- CLI flag: hermes --checkpoints
- Trigger: run_agent.py _execute_tool_calls() before write_file/patch
- /rollback slash command in CLI + gateway (list, restore by number)
- Pre-rollback snapshot auto-created on restore (undo the undo)

Safety:
- Never blocks file operations — all errors silently logged
- Skips root dir, home dir, dirs >50K files
- Disables gracefully when git not installed
- Shadow repo completely isolated from project git

Tests: 35 new tests, all passing (2798 total suite)
Docs: feature page, config reference, CLI commands reference

											
										
										
											2026-03-10 00:49:15 -07:00
+								            "`/rollback [number]` — List or restore filesystem checkpoints",
-												feat: add /background command to gateway and CLI commands registry

Add /background <prompt> to the gateway, allowing users on Telegram,
Discord, Slack, etc. to fire off a prompt in a separate agent session.
The result is delivered back to the same chat when done, without
modifying the active conversation history.

Implementation:
- _handle_background_command: validates input, spawns asyncio task
- _run_background_task: creates AIAgent in executor thread, delivers
  result (text, images, media files) back via the platform adapter
- Inherits model, toolsets, provider routing from gateway config
- Error handling with user-visible failure messages

Also adds /background to hermes_cli/commands.py registry so it
appears in /help and autocomplete.

Tests: 15 new tests covering usage, task creation, uniqueness,
multi-platform, error paths, and help/autocomplete integration.

											
										
										
											2026-03-11 02:41:36 -07:00
+								            "`/background <prompt>` — Run a prompt in a separate background session",
-												feat(mcp): banner integration, /reload-mcp command, resources & prompts

Banner integration:
- MCP Servers section in CLI startup banner between Tools and Skills
- Shows each server with transport type, tool count, connection status
- Failed servers shown in red; section hidden when no MCP configured
- Summary line includes MCP server count
- Removed raw print() calls from discovery (banner handles display)

/reload-mcp command:
- New slash command in both CLI and gateway
- Disconnects all MCP servers, re-reads config.yaml, reconnects
- Reports what changed (added/removed/reconnected servers)
- Allows adding/removing MCP servers without restarting

Resources & Prompts support:
- 4 utility tools registered per server: list_resources, read_resource,
  list_prompts, get_prompt
- Exposes MCP Resources (data sources) and Prompts (templates) as tools
- Proper parameter schemas (uri for read_resource, name for get_prompt)
- Handles text and binary resource content
- 23 new tests covering schemas, handlers, and registration

Test coverage: 74 MCP tests total, 1186 tests pass overall.

											
										
										
											2026-03-02 19:15:59 -08:00
+								            "`/reload-mcp` — Reload MCP servers from config",
-												feat: add /update slash command for gateway platforms

Adds a /update command to Telegram, Discord, and other gateway platforms
that runs `hermes update` to pull the latest code, update dependencies,
sync skills, and restart the gateway.

Implementation:
- Spawns `hermes update` in a separate systemd scope (systemd-run --user
  --scope) so the process survives the gateway restart that hermes update
  triggers at the end. Falls back to nohup if systemd-run is unavailable.
- Writes a marker file (.update_pending.json) with the originating
  platform and chat_id before spawning the update.
- On gateway startup, _send_update_notification() checks for the marker,
  reads the captured update output, sends the results back to the user,
  and cleans up.

Also:
- Registers /update as a Discord slash command
- Updates README.md, docs/messaging.md, docs/slash-commands.md
- Adds 18 tests covering handler, notification, and edge cases

											
										
										
											2026-03-05 01:20:58 -08:00
+								            "`/update` — Update Hermes Agent to the latest version",
-												feat(skills): implement dynamic skill slash commands for CLI and gateway

											
										
										
											2026-02-28 11:18:50 -08:00
+								            "`/help` — Show this message",
 								        ]
 								        try:
 								            from agent.skill_commands import get_skill_commands
 								            skill_cmds = get_skill_commands()
 								            if skill_cmds:
 								                lines.append(f"\n⚡ **Skill Commands** ({len(skill_cmds)} installed):")
 								                for cmd in sorted(skill_cmds):
 								                    lines.append(f"`{cmd}` — {skill_cmds[cmd]['description']}")
 								        except Exception:
 								            pass
 								        return "\n".join(lines)
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
 								    async def _handle_model_command(self, event: MessageEvent) -> str:
 								        """Handle /model command - show or change the current model."""
-												fix(gateway): sync /model and /personality with CLI config.yaml pattern

											
										
										
											2026-02-27 11:14:14 -05:00
+								        import yaml
-												fix: resolve 'auto' provider in /model display + update gateway handler

- normalize_provider('auto') now returns 'openrouter' (the default)
  so /model shows the curated model list instead of nothing
- CLI /model display uses normalize_provider before looking up labels
- Gateway /model handler now uses the same validation logic as CLI:
  live API probe, provider:model syntax, curated model list display

											
										
										
											2026-03-08 05:54:52 -07:00
+								        from hermes_cli.models import (
 								            parse_model_input,
 								            validate_requested_model,
 								            curated_models_for_provider,
-												fix: gateway /model also needs normalize_provider for 'auto' resolution

											
										
										
											2026-03-08 05:56:37 -07:00
+								            normalize_provider,
-												fix: resolve 'auto' provider in /model display + update gateway handler

- normalize_provider('auto') now returns 'openrouter' (the default)
  so /model shows the curated model list instead of nothing
- CLI /model display uses normalize_provider before looking up labels
- Gateway /model handler now uses the same validation logic as CLI:
  live API probe, provider:model syntax, curated model list display

											
										
										
											2026-03-08 05:54:52 -07:00
+								            _PROVIDER_LABELS,
 								        )
-												fix(gateway): sync /model and /personality with CLI config.yaml pattern

											
										
										
											2026-02-27 11:14:14 -05:00
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
+								        args = event.get_command_args().strip()
-												fix(gateway): sync /model and /personality with CLI config.yaml pattern

											
										
										
											2026-02-27 11:14:14 -05:00
+								        config_path = _hermes_home / 'config.yaml'
-												feat(gateway): improve model command handling by resolving current model from environment and config file

											
										
										
											2026-02-27 13:42:07 -08:00
-												fix: resolve 'auto' provider in /model display + update gateway handler

- normalize_provider('auto') now returns 'openrouter' (the default)
  so /model shows the curated model list instead of nothing
- CLI /model display uses normalize_provider before looking up labels
- Gateway /model handler now uses the same validation logic as CLI:
  live API probe, provider:model syntax, curated model list display

											
										
										
											2026-03-08 05:54:52 -07:00
+								        # Resolve current model and provider from config
-												refactor: remove LLM_MODEL env var dependency — config.yaml is sole source of truth

Model selection now comes exclusively from config.yaml (set via
'hermes model' or 'hermes setup'). The LLM_MODEL env var is no longer
read or written anywhere in production code.

Why: env vars are per-process/per-user and would conflict in
multi-agent or multi-tenant setups. Config.yaml is file-based and
can be scoped per-user or eventually per-session.

Changes:
- cli.py: Read model from CLI_CONFIG only, not LLM_MODEL/OPENAI_MODEL
- hermes_cli/auth.py: _save_model_choice() no longer writes LLM_MODEL
  to .env
- hermes_cli/setup.py: Remove 12 save_env_value('LLM_MODEL', ...)
  calls from all provider setup flows
- gateway/run.py: Remove LLM_MODEL fallback (HERMES_MODEL still works
  for gateway process runtime)
- cron/scheduler.py: Same
- agent/auxiliary_client.py: Remove LLM_MODEL from custom endpoint
  model detection

											
										
										
											2026-03-11 22:04:42 -07:00
+								        current = os.getenv("HERMES_MODEL") or "anthropic/claude-opus-4.6"
-												fix: resolve 'auto' provider in /model display + update gateway handler

- normalize_provider('auto') now returns 'openrouter' (the default)
  so /model shows the curated model list instead of nothing
- CLI /model display uses normalize_provider before looking up labels
- Gateway /model handler now uses the same validation logic as CLI:
  live API probe, provider:model syntax, curated model list display

											
										
										
											2026-03-08 05:54:52 -07:00
+								        current_provider = "openrouter"
-												fix(gateway): sync /model and /personality with CLI config.yaml pattern

											
										
										
											2026-02-27 11:14:14 -05:00
+								        try:
 								            if config_path.exists():
-												Add explicit encoding="utf-8" to all config/data file open() calls

On Windows, open() defaults to the system locale encoding (cp1252,
cp1254, etc.) rather than UTF-8. This breaks any file containing
non-ASCII characters, and also causes crashes when writing JSON with
ensure_ascii=False.

This adds encoding="utf-8" to open() calls in:
- gateway/run.py (config.yaml reads/writes throughout)
- gateway/config.py (gateway.json and config.yaml)
- hermes_cli/config.py (config.yaml load/save)
- hermes_cli/main.py (session export with ensure_ascii=False)
- hermes_cli/status.py (jobs.json and sessions.json)

											
										
										
											2026-03-05 17:04:33 -05:00
+								                with open(config_path, encoding="utf-8") as f:
-												fix(gateway): sync /model and /personality with CLI config.yaml pattern

											
										
										
											2026-02-27 11:14:14 -05:00
+								                    cfg = yaml.safe_load(f) or {}
 								                model_cfg = cfg.get("model", {})
 								                if isinstance(model_cfg, str):
 								                    current = model_cfg
 								                elif isinstance(model_cfg, dict):
 								                    current = model_cfg.get("default", current)
-												fix: resolve 'auto' provider in /model display + update gateway handler

- normalize_provider('auto') now returns 'openrouter' (the default)
  so /model shows the curated model list instead of nothing
- CLI /model display uses normalize_provider before looking up labels
- Gateway /model handler now uses the same validation logic as CLI:
  live API probe, provider:model syntax, curated model list display

											
										
										
											2026-03-08 05:54:52 -07:00
+								                    current_provider = model_cfg.get("provider", current_provider)
-												fix(gateway): sync /model and /personality with CLI config.yaml pattern

											
										
										
											2026-02-27 11:14:14 -05:00
+								        except Exception:
 								            pass
-												feat(gateway): improve model command handling by resolving current model from environment and config file

											
										
										
											2026-02-27 13:42:07 -08:00
-												fix: resolve 'auto' provider properly via credential detection

'auto' doesn't always mean openrouter — it could be nous, zai,
kimi-coding, etc. depending on configured credentials. Reverted the
hardcoded mapping and now both CLI and gateway call
resolve_provider() to detect the actual active provider when 'auto'
is set. Falls back to openrouter only if resolution fails.

											
										
										
											2026-03-08 05:58:45 -07:00
+								        # Resolve "auto" to the actual provider using credential detection
-												fix: gateway /model also needs normalize_provider for 'auto' resolution

											
										
										
											2026-03-08 05:56:37 -07:00
+								        current_provider = normalize_provider(current_provider)
-												fix: resolve 'auto' provider properly via credential detection

'auto' doesn't always mean openrouter — it could be nous, zai,
kimi-coding, etc. depending on configured credentials. Reverted the
hardcoded mapping and now both CLI and gateway call
resolve_provider() to detect the actual active provider when 'auto'
is set. Falls back to openrouter only if resolution fails.

											
										
										
											2026-03-08 05:58:45 -07:00
+								        if current_provider == "auto":
 								            try:
 								                from hermes_cli.auth import resolve_provider as _resolve_provider
 								                current_provider = _resolve_provider(current_provider)
 								            except Exception:
 								                current_provider = "openrouter"
-												fix: gateway /model also needs normalize_provider for 'auto' resolution

											
										
										
											2026-03-08 05:56:37 -07:00
-												fix: custom endpoint provider shows as openrouter in gateway

Three issues caused the gateway to display 'openrouter' instead of
'Custom endpoint' when users configured a custom OAI-compatible endpoint:

1. hermes setup: custom endpoint path saved OPENAI_BASE_URL and
   OPENAI_API_KEY to .env but never wrote model.provider to config.yaml.
   All other providers (Codex, z.ai, Kimi, etc.) call
   _update_config_for_provider() which sets this — custom was the only
   path that skipped it. Now writes model.provider='custom' and
   model.base_url to config.yaml.

2. hermes model: custom endpoint set model.provider='auto' in config.yaml.
   The CLI display had a hack to detect OPENAI_BASE_URL and override to
   'custom', but the gateway didn't. Now sets model.provider='custom'
   directly.

3. gateway /model and /provider commands: defaulted to 'openrouter' and
   read config.yaml — which had no provider set. Added OPENAI_BASE_URL
   detection fallback (same pattern the CLI uses) as a defensive catch
   for existing users who set up before this fix.

											
										
										
											2026-03-09 02:38:34 -07:00
+								        # Detect custom endpoint: provider resolved to openrouter but a custom
 								        # base URL is configured — the user set up a custom endpoint.
 								        if current_provider == "openrouter" and os.getenv("OPENAI_BASE_URL", "").strip():
 								            current_provider = "custom"
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
+								        if not args:
-												fix: resolve 'auto' provider in /model display + update gateway handler

- normalize_provider('auto') now returns 'openrouter' (the default)
  so /model shows the curated model list instead of nothing
- CLI /model display uses normalize_provider before looking up labels
- Gateway /model handler now uses the same validation logic as CLI:
  live API probe, provider:model syntax, curated model list display

											
										
										
											2026-03-08 05:54:52 -07:00
+								            provider_label = _PROVIDER_LABELS.get(current_provider, current_provider)
 								            lines = [
 								                f"🤖 **Current model:** `{current}`",
 								                f"**Provider:** {provider_label}",
 								                "",
 								            ]
 								            curated = curated_models_for_provider(current_provider)
 								            if curated:
 								                lines.append(f"**Available models ({provider_label}):**")
 								                for mid, desc in curated:
 								                    marker = " ←" if mid == current else ""
 								                    label = f"  _{desc}_" if desc else ""
 								                    lines.append(f"• `{mid}`{label}{marker}")
 								                lines.append("")
 								            lines.append("To change: `/model model-name`")
 								            lines.append("Switch provider: `/model provider:model-name`")
 								            return "\n".join(lines)
 								        # Parse provider:model syntax
 								        target_provider, new_model = parse_model_input(args, current_provider)
 								        provider_changed = target_provider != current_provider
 								        # Resolve credentials for the target provider (for API probe)
 								        api_key = os.getenv("OPENROUTER_API_KEY") or os.getenv("OPENAI_API_KEY") or ""
 								        base_url = "https://openrouter.ai/api/v1"
 								        if provider_changed:
 								            try:
 								                from hermes_cli.runtime_provider import resolve_runtime_provider
 								                runtime = resolve_runtime_provider(requested=target_provider)
 								                api_key = runtime.get("api_key", "")
 								                base_url = runtime.get("base_url", "")
 								            except Exception as e:
 								                provider_label = _PROVIDER_LABELS.get(target_provider, target_provider)
 								                return f"⚠️ Could not resolve credentials for provider '{provider_label}': {e}"
 								        else:
 								            # Use current provider's base_url from config or registry
 								            try:
 								                from hermes_cli.runtime_provider import resolve_runtime_provider
 								                runtime = resolve_runtime_provider(requested=current_provider)
 								                api_key = runtime.get("api_key", "")
 								                base_url = runtime.get("base_url", "")
 								            except Exception:
 								                pass
-												feat(gateway): improve model command handling by resolving current model from environment and config file

											
										
										
											2026-02-27 13:42:07 -08:00
-												fix: resolve 'auto' provider in /model display + update gateway handler

- normalize_provider('auto') now returns 'openrouter' (the default)
  so /model shows the curated model list instead of nothing
- CLI /model display uses normalize_provider before looking up labels
- Gateway /model handler now uses the same validation logic as CLI:
  live API probe, provider:model syntax, curated model list display

											
										
										
											2026-03-08 05:54:52 -07:00
+								        # Validate the model against the live API
 								        try:
 								            validation = validate_requested_model(
 								                new_model,
 								                target_provider,
 								                api_key=api_key,
 								                base_url=base_url,
-												feat(gateway): improve model command handling by resolving current model from environment and config file

											
										
										
											2026-02-27 13:42:07 -08:00
+								            )
-												fix: resolve 'auto' provider in /model display + update gateway handler

- normalize_provider('auto') now returns 'openrouter' (the default)
  so /model shows the curated model list instead of nothing
- CLI /model display uses normalize_provider before looking up labels
- Gateway /model handler now uses the same validation logic as CLI:
  live API probe, provider:model syntax, curated model list display

											
										
										
											2026-03-08 05:54:52 -07:00
+								        except Exception:
 								            validation = {"accepted": True, "persist": True, "recognized": False, "message": None}
-												feat(gateway): improve model command handling by resolving current model from environment and config file

											
										
										
											2026-02-27 13:42:07 -08:00
-												fix: resolve 'auto' provider in /model display + update gateway handler

- normalize_provider('auto') now returns 'openrouter' (the default)
  so /model shows the curated model list instead of nothing
- CLI /model display uses normalize_provider before looking up labels
- Gateway /model handler now uses the same validation logic as CLI:
  live API probe, provider:model syntax, curated model list display

											
										
										
											2026-03-08 05:54:52 -07:00
+								        if not validation.get("accepted"):
-												fix: improve /model user feedback + update docs

User messaging improvements:
- Rejection: '(>_<) Error: not a valid model' instead of '(^_^) Warning: Error:'
- Rejection: shows 'Model unchanged' + tip about /model and /provider
- Session-only: explains 'this session only' with reason and 'will revert on restart'
- Saved: clear '(saved to config)' confirmation

Docs updated:
- cli-commands.md, cli.md, messaging/index.md: /model now shows
  provider:model syntax, /provider command added to tables

Test fixes: deduplicated test names, assertions match new messages.

											
										
										
											2026-03-08 06:13:11 -07:00
+								            msg = validation.get("message", "Invalid model")
 								            tip = "\n\nUse `/model` to see available models, `/provider` to see providers" if "Did you mean" not in msg else ""
 								            return f"⚠️ {msg}{tip}"
-												fix: resolve 'auto' provider in /model display + update gateway handler

- normalize_provider('auto') now returns 'openrouter' (the default)
  so /model shows the curated model list instead of nothing
- CLI /model display uses normalize_provider before looking up labels
- Gateway /model handler now uses the same validation logic as CLI:
  live API probe, provider:model syntax, curated model list display

											
										
										
											2026-03-08 05:54:52 -07:00
-												feat: /provider command + fix gateway bugs + harden parse_model_input

/provider command (CLI + gateway):
  Shows all providers with auth status (✓/✗), aliases, and active marker.
  Users can now discover what provider names work with provider:model syntax.

Gateway bugs fixed:
  - Config was saved even when validation.persist=False (told user 'session
    only' but actually persisted the unvalidated model)
  - HERMES_INFERENCE_PROVIDER env var not set on provider switch, causing
    the switch to be silently overridden if that env var was already set

parse_model_input hardened:
  - Colon only treated as provider delimiter if left side is a recognized
    provider name or alias. 'anthropic/claude-3.5-sonnet:beta' now passes
    through as a model name instead of trying provider='anthropic/claude-3.5-sonnet'.
  - HTTP URLs, random colons no longer misinterpreted.

56 tests passing across model validation, CLI commands, and integration.

											
										
										
											2026-03-08 06:09:36 -07:00
+								        # Persist to config only if validation approves
 								        if validation.get("persist"):
 								            try:
 								                user_config = {}
 								                if config_path.exists():
-												Merge PR #458: Add explicit UTF-8 encoding to config/data file I/O

Authored by shitcoinsherpa. Adds encoding='utf-8' to all text-mode
open() calls in gateway/run.py, gateway/config.py, hermes_cli/config.py,
hermes_cli/main.py, and hermes_cli/status.py. Prevents encoding errors
on Windows where the default locale is not UTF-8.

Also fixed 4 additional open() calls in gateway/run.py that were added
after the PR branch was created.

											
										
										
											2026-03-09 21:19:20 -07:00
+								                    with open(config_path, encoding="utf-8") as f:
-												feat: /provider command + fix gateway bugs + harden parse_model_input

/provider command (CLI + gateway):
  Shows all providers with auth status (✓/✗), aliases, and active marker.
  Users can now discover what provider names work with provider:model syntax.

Gateway bugs fixed:
  - Config was saved even when validation.persist=False (told user 'session
    only' but actually persisted the unvalidated model)
  - HERMES_INFERENCE_PROVIDER env var not set on provider switch, causing
    the switch to be silently overridden if that env var was already set

parse_model_input hardened:
  - Colon only treated as provider delimiter if left side is a recognized
    provider name or alias. 'anthropic/claude-3.5-sonnet:beta' now passes
    through as a model name instead of trying provider='anthropic/claude-3.5-sonnet'.
  - HTTP URLs, random colons no longer misinterpreted.

56 tests passing across model validation, CLI commands, and integration.

											
										
										
											2026-03-08 06:09:36 -07:00
+								                        user_config = yaml.safe_load(f) or {}
 								                if "model" not in user_config or not isinstance(user_config["model"], dict):
 								                    user_config["model"] = {}
 								                user_config["model"]["default"] = new_model
 								                if provider_changed:
 								                    user_config["model"]["provider"] = target_provider
-												Merge PR #458: Add explicit UTF-8 encoding to config/data file I/O

Authored by shitcoinsherpa. Adds encoding='utf-8' to all text-mode
open() calls in gateway/run.py, gateway/config.py, hermes_cli/config.py,
hermes_cli/main.py, and hermes_cli/status.py. Prevents encoding errors
on Windows where the default locale is not UTF-8.

Also fixed 4 additional open() calls in gateway/run.py that were added
after the PR branch was created.

											
										
										
											2026-03-09 21:19:20 -07:00
+								                with open(config_path, 'w', encoding="utf-8") as f:
-												feat: /provider command + fix gateway bugs + harden parse_model_input

/provider command (CLI + gateway):
  Shows all providers with auth status (✓/✗), aliases, and active marker.
  Users can now discover what provider names work with provider:model syntax.

Gateway bugs fixed:
  - Config was saved even when validation.persist=False (told user 'session
    only' but actually persisted the unvalidated model)
  - HERMES_INFERENCE_PROVIDER env var not set on provider switch, causing
    the switch to be silently overridden if that env var was already set

parse_model_input hardened:
  - Colon only treated as provider delimiter if left side is a recognized
    provider name or alias. 'anthropic/claude-3.5-sonnet:beta' now passes
    through as a model name instead of trying provider='anthropic/claude-3.5-sonnet'.
  - HTTP URLs, random colons no longer misinterpreted.

56 tests passing across model validation, CLI commands, and integration.

											
										
										
											2026-03-08 06:09:36 -07:00
+								                    yaml.dump(user_config, f, default_flow_style=False, sort_keys=False)
 								            except Exception as e:
 								                return f"⚠️ Failed to save model change: {e}"
-												fix(gateway): sync /model and /personality with CLI config.yaml pattern

											
										
										
											2026-02-27 11:14:14 -05:00
-												feat: /provider command + fix gateway bugs + harden parse_model_input

/provider command (CLI + gateway):
  Shows all providers with auth status (✓/✗), aliases, and active marker.
  Users can now discover what provider names work with provider:model syntax.

Gateway bugs fixed:
  - Config was saved even when validation.persist=False (told user 'session
    only' but actually persisted the unvalidated model)
  - HERMES_INFERENCE_PROVIDER env var not set on provider switch, causing
    the switch to be silently overridden if that env var was already set

parse_model_input hardened:
  - Colon only treated as provider delimiter if left side is a recognized
    provider name or alias. 'anthropic/claude-3.5-sonnet:beta' now passes
    through as a model name instead of trying provider='anthropic/claude-3.5-sonnet'.
  - HTTP URLs, random colons no longer misinterpreted.

56 tests passing across model validation, CLI commands, and integration.

											
										
										
											2026-03-08 06:09:36 -07:00
+								        # Set env vars so the next agent run picks up the change
-												fix: resolve 'auto' provider in /model display + update gateway handler

- normalize_provider('auto') now returns 'openrouter' (the default)
  so /model shows the curated model list instead of nothing
- CLI /model display uses normalize_provider before looking up labels
- Gateway /model handler now uses the same validation logic as CLI:
  live API probe, provider:model syntax, curated model list display

											
										
										
											2026-03-08 05:54:52 -07:00
+								        os.environ["HERMES_MODEL"] = new_model
-												feat: /provider command + fix gateway bugs + harden parse_model_input

/provider command (CLI + gateway):
  Shows all providers with auth status (✓/✗), aliases, and active marker.
  Users can now discover what provider names work with provider:model syntax.

Gateway bugs fixed:
  - Config was saved even when validation.persist=False (told user 'session
    only' but actually persisted the unvalidated model)
  - HERMES_INFERENCE_PROVIDER env var not set on provider switch, causing
    the switch to be silently overridden if that env var was already set

parse_model_input hardened:
  - Colon only treated as provider delimiter if left side is a recognized
    provider name or alias. 'anthropic/claude-3.5-sonnet:beta' now passes
    through as a model name instead of trying provider='anthropic/claude-3.5-sonnet'.
  - HTTP URLs, random colons no longer misinterpreted.

56 tests passing across model validation, CLI commands, and integration.

											
										
										
											2026-03-08 06:09:36 -07:00
+								        if provider_changed:
 								            os.environ["HERMES_INFERENCE_PROVIDER"] = target_provider
-												fix: resolve 'auto' provider in /model display + update gateway handler

- normalize_provider('auto') now returns 'openrouter' (the default)
  so /model shows the curated model list instead of nothing
- CLI /model display uses normalize_provider before looking up labels
- Gateway /model handler now uses the same validation logic as CLI:
  live API probe, provider:model syntax, curated model list display

											
										
										
											2026-03-08 05:54:52 -07:00
 								        provider_label = _PROVIDER_LABELS.get(target_provider, target_provider)
 								        provider_note = f"\n**Provider:** {provider_label}" if provider_changed else ""
 								        warning = ""
 								        if validation.get("message"):
 								            warning = f"\n⚠️ {validation['message']}"
-												fix(gateway): sync /model and /personality with CLI config.yaml pattern

											
										
										
											2026-02-27 11:14:14 -05:00
-												fix: improve /model user feedback + update docs

User messaging improvements:
- Rejection: '(>_<) Error: not a valid model' instead of '(^_^) Warning: Error:'
- Rejection: shows 'Model unchanged' + tip about /model and /provider
- Session-only: explains 'this session only' with reason and 'will revert on restart'
- Saved: clear '(saved to config)' confirmation

Docs updated:
- cli-commands.md, cli.md, messaging/index.md: /model now shows
  provider:model syntax, /provider command added to tables

Test fixes: deduplicated test names, assertions match new messages.

											
										
										
											2026-03-08 06:13:11 -07:00
+								        if validation.get("persist"):
 								            persist_note = "saved to config"
 								        else:
 								            persist_note = "this session only — will revert on restart"
-												fix: resolve 'auto' provider in /model display + update gateway handler

- normalize_provider('auto') now returns 'openrouter' (the default)
  so /model shows the curated model list instead of nothing
- CLI /model display uses normalize_provider before looking up labels
- Gateway /model handler now uses the same validation logic as CLI:
  live API probe, provider:model syntax, curated model list display

											
										
										
											2026-03-08 05:54:52 -07:00
+								        return f"🤖 Model changed to `{new_model}` ({persist_note}){provider_note}{warning}\n_(takes effect on next message)_"
-												feat: /provider command + fix gateway bugs + harden parse_model_input

/provider command (CLI + gateway):
  Shows all providers with auth status (✓/✗), aliases, and active marker.
  Users can now discover what provider names work with provider:model syntax.

Gateway bugs fixed:
  - Config was saved even when validation.persist=False (told user 'session
    only' but actually persisted the unvalidated model)
  - HERMES_INFERENCE_PROVIDER env var not set on provider switch, causing
    the switch to be silently overridden if that env var was already set

parse_model_input hardened:
  - Colon only treated as provider delimiter if left side is a recognized
    provider name or alias. 'anthropic/claude-3.5-sonnet:beta' now passes
    through as a model name instead of trying provider='anthropic/claude-3.5-sonnet'.
  - HTTP URLs, random colons no longer misinterpreted.

56 tests passing across model validation, CLI commands, and integration.

											
										
										
											2026-03-08 06:09:36 -07:00
 								    async def _handle_provider_command(self, event: MessageEvent) -> str:
 								        """Handle /provider command - show available providers."""
 								        import yaml
 								        from hermes_cli.models import (
 								            list_available_providers,
 								            normalize_provider,
 								            _PROVIDER_LABELS,
 								        )
 								        # Resolve current provider from config
 								        current_provider = "openrouter"
 								        config_path = _hermes_home / 'config.yaml'
 								        try:
 								            if config_path.exists():
-												Add explicit encoding="utf-8" to all config/data file open() calls

On Windows, open() defaults to the system locale encoding (cp1252,
cp1254, etc.) rather than UTF-8. This breaks any file containing
non-ASCII characters, and also causes crashes when writing JSON with
ensure_ascii=False.

This adds encoding="utf-8" to open() calls in:
- gateway/run.py (config.yaml reads/writes throughout)
- gateway/config.py (gateway.json and config.yaml)
- hermes_cli/config.py (config.yaml load/save)
- hermes_cli/main.py (session export with ensure_ascii=False)
- hermes_cli/status.py (jobs.json and sessions.json)

											
										
										
											2026-03-05 17:04:33 -05:00
+								                with open(config_path, encoding="utf-8") as f:
-												feat: /provider command + fix gateway bugs + harden parse_model_input

/provider command (CLI + gateway):
  Shows all providers with auth status (✓/✗), aliases, and active marker.
  Users can now discover what provider names work with provider:model syntax.

Gateway bugs fixed:
  - Config was saved even when validation.persist=False (told user 'session
    only' but actually persisted the unvalidated model)
  - HERMES_INFERENCE_PROVIDER env var not set on provider switch, causing
    the switch to be silently overridden if that env var was already set

parse_model_input hardened:
  - Colon only treated as provider delimiter if left side is a recognized
    provider name or alias. 'anthropic/claude-3.5-sonnet:beta' now passes
    through as a model name instead of trying provider='anthropic/claude-3.5-sonnet'.
  - HTTP URLs, random colons no longer misinterpreted.

56 tests passing across model validation, CLI commands, and integration.

											
										
										
											2026-03-08 06:09:36 -07:00
+								                    cfg = yaml.safe_load(f) or {}
 								                model_cfg = cfg.get("model", {})
 								                if isinstance(model_cfg, dict):
 								                    current_provider = model_cfg.get("provider", current_provider)
 								        except Exception:
 								            pass
 								        current_provider = normalize_provider(current_provider)
 								        if current_provider == "auto":
 								            try:
 								                from hermes_cli.auth import resolve_provider as _resolve_provider
 								                current_provider = _resolve_provider(current_provider)
 								            except Exception:
 								                current_provider = "openrouter"
-												fix: custom endpoint provider shows as openrouter in gateway

Three issues caused the gateway to display 'openrouter' instead of
'Custom endpoint' when users configured a custom OAI-compatible endpoint:

1. hermes setup: custom endpoint path saved OPENAI_BASE_URL and
   OPENAI_API_KEY to .env but never wrote model.provider to config.yaml.
   All other providers (Codex, z.ai, Kimi, etc.) call
   _update_config_for_provider() which sets this — custom was the only
   path that skipped it. Now writes model.provider='custom' and
   model.base_url to config.yaml.

2. hermes model: custom endpoint set model.provider='auto' in config.yaml.
   The CLI display had a hack to detect OPENAI_BASE_URL and override to
   'custom', but the gateway didn't. Now sets model.provider='custom'
   directly.

3. gateway /model and /provider commands: defaulted to 'openrouter' and
   read config.yaml — which had no provider set. Added OPENAI_BASE_URL
   detection fallback (same pattern the CLI uses) as a defensive catch
   for existing users who set up before this fix.

											
										
										
											2026-03-09 02:38:34 -07:00
+								        # Detect custom endpoint
 								        if current_provider == "openrouter" and os.getenv("OPENAI_BASE_URL", "").strip():
 								            current_provider = "custom"
-												feat: /provider command + fix gateway bugs + harden parse_model_input

/provider command (CLI + gateway):
  Shows all providers with auth status (✓/✗), aliases, and active marker.
  Users can now discover what provider names work with provider:model syntax.

Gateway bugs fixed:
  - Config was saved even when validation.persist=False (told user 'session
    only' but actually persisted the unvalidated model)
  - HERMES_INFERENCE_PROVIDER env var not set on provider switch, causing
    the switch to be silently overridden if that env var was already set

parse_model_input hardened:
  - Colon only treated as provider delimiter if left side is a recognized
    provider name or alias. 'anthropic/claude-3.5-sonnet:beta' now passes
    through as a model name instead of trying provider='anthropic/claude-3.5-sonnet'.
  - HTTP URLs, random colons no longer misinterpreted.

56 tests passing across model validation, CLI commands, and integration.

											
										
										
											2026-03-08 06:09:36 -07:00
+								        current_label = _PROVIDER_LABELS.get(current_provider, current_provider)
 								        lines = [
 								            f"🔌 **Current provider:** {current_label} (`{current_provider}`)",
 								            "",
 								            "**Available providers:**",
 								        ]
 								        providers = list_available_providers()
 								        for p in providers:
 								            marker = " ← active" if p["id"] == current_provider else ""
 								            auth = "✅" if p["authenticated"] else "❌"
 								            aliases = f"  _(also: {', '.join(p['aliases'])})_" if p["aliases"] else ""
 								            lines.append(f"{auth} `{p['id']}` — {p['label']}{aliases}{marker}")
 								        lines.append("")
 								        lines.append("Switch: `/model provider:model-name`")
 								        lines.append("Setup: `hermes setup`")
 								        return "\n".join(lines)
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
 								    async def _handle_personality_command(self, event: MessageEvent) -> str:
 								        """Handle /personality command - list or set a personality."""
-												fix(gateway): sync /model and /personality with CLI config.yaml pattern

											
										
										
											2026-02-27 11:14:14 -05:00
+								        import yaml
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
+								        args = event.get_command_args().strip().lower()
-												fix(gateway): sync /model and /personality with CLI config.yaml pattern

											
										
										
											2026-02-27 11:14:14 -05:00
+								        config_path = _hermes_home / 'config.yaml'
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
+								        try:
 								            if config_path.exists():
-												Add explicit encoding="utf-8" to all config/data file open() calls

On Windows, open() defaults to the system locale encoding (cp1252,
cp1254, etc.) rather than UTF-8. This breaks any file containing
non-ASCII characters, and also causes crashes when writing JSON with
ensure_ascii=False.

This adds encoding="utf-8" to open() calls in:
- gateway/run.py (config.yaml reads/writes throughout)
- gateway/config.py (gateway.json and config.yaml)
- hermes_cli/config.py (config.yaml load/save)
- hermes_cli/main.py (session export with ensure_ascii=False)
- hermes_cli/status.py (jobs.json and sessions.json)

											
										
										
											2026-03-05 17:04:33 -05:00
+								                with open(config_path, 'r', encoding="utf-8") as f:
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
+								                    config = yaml.safe_load(f) or {}
 								                personalities = config.get("agent", {}).get("personalities", {})
 								            else:
-												fix(gateway): sync /model and /personality with CLI config.yaml pattern

											
										
										
											2026-02-27 11:14:14 -05:00
+								                config = {}
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
+								                personalities = {}
 								        except Exception:
-												fix(gateway): sync /model and /personality with CLI config.yaml pattern

											
										
										
											2026-02-27 11:14:14 -05:00
+								            config = {}
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
+								            personalities = {}
-												fix(gateway): sync /model and /personality with CLI config.yaml pattern

											
										
										
											2026-02-27 11:14:14 -05:00
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
+								        if not personalities:
 								            return "No personalities configured in `~/.hermes/config.yaml`"
-												fix(gateway): sync /model and /personality with CLI config.yaml pattern

											
										
										
											2026-02-27 11:14:14 -05:00
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
+								        if not args:
 								            lines = ["🎭 **Available Personalities**\n"]
-												feat(cli,gateway): add /personality none and custom personality support

Closes #643

Changes:
- /personality none|default|neutral — clears system prompt overlay
- Custom personalities in config.yaml support dict format with:
  name, description, system_prompt, tone, style directives
- Backwards compatible — existing string format still works
- CLI + gateway both updated
- 18 tests covering none/default/neutral, dict format, string format,
  list display, save to config

											
										
										
											2026-03-09 17:18:09 +03:00
+								            lines.append("• `none` — (no personality overlay)")
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
+								            for name, prompt in personalities.items():
-												feat(cli,gateway): add /personality none and custom personality support

Closes #643

Changes:
- /personality none|default|neutral — clears system prompt overlay
- Custom personalities in config.yaml support dict format with:
  name, description, system_prompt, tone, style directives
- Backwards compatible — existing string format still works
- CLI + gateway both updated
- 18 tests covering none/default/neutral, dict format, string format,
  list display, save to config

											
										
										
											2026-03-09 17:18:09 +03:00
+								                if isinstance(prompt, dict):
 								                    preview = prompt.get("description") or prompt.get("system_prompt", "")[:50]
 								                else:
 								                    preview = prompt[:50] + "..." if len(prompt) > 50 else prompt
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
+								                lines.append(f"• `{name}` — {preview}")
 								            lines.append(f"\nUsage: `/personality <name>`")
 								            return "\n".join(lines)
-												fix(gateway): sync /model and /personality with CLI config.yaml pattern

											
										
										
											2026-02-27 11:14:14 -05:00
-												feat(cli,gateway): add /personality none and custom personality support

Closes #643

Changes:
- /personality none|default|neutral — clears system prompt overlay
- Custom personalities in config.yaml support dict format with:
  name, description, system_prompt, tone, style directives
- Backwards compatible — existing string format still works
- CLI + gateway both updated
- 18 tests covering none/default/neutral, dict format, string format,
  list display, save to config

											
										
										
											2026-03-09 17:18:09 +03:00
+								        def _resolve_prompt(value):
 								            if isinstance(value, dict):
 								                parts = [value.get("system_prompt", "")]
 								                if value.get("tone"):
 								                    parts.append(f'Tone: {value["tone"]}')
 								                if value.get("style"):
 								                    parts.append(f'Style: {value["style"]}')
 								                return "\n".join(p for p in parts if p)
 								            return str(value)
 								        if args in ("none", "default", "neutral"):
 								            try:
 								                if "agent" not in config or not isinstance(config.get("agent"), dict):
 								                    config["agent"] = {}
 								                config["agent"]["system_prompt"] = ""
 								                with open(config_path, "w") as f:
 								                    yaml.dump(config, f, default_flow_style=False, sort_keys=False)
 								            except Exception as e:
 								                return f"⚠️ Failed to save personality change: {e}"
 								            self._ephemeral_system_prompt = ""
 								            return "🎭 Personality cleared — using base agent behavior.\n_(takes effect on next message)_"
 								        elif args in personalities:
 								            new_prompt = _resolve_prompt(personalities[args])
-												fix(gateway): sync /model and /personality with CLI config.yaml pattern

											
										
										
											2026-02-27 11:14:14 -05:00
 								            # Write to config.yaml, same pattern as CLI save_config_value.
 								            try:
 								                if "agent" not in config or not isinstance(config.get("agent"), dict):
 								                    config["agent"] = {}
 								                config["agent"]["system_prompt"] = new_prompt
-												Add explicit encoding="utf-8" to all config/data file open() calls

On Windows, open() defaults to the system locale encoding (cp1252,
cp1254, etc.) rather than UTF-8. This breaks any file containing
non-ASCII characters, and also causes crashes when writing JSON with
ensure_ascii=False.

This adds encoding="utf-8" to open() calls in:
- gateway/run.py (config.yaml reads/writes throughout)
- gateway/config.py (gateway.json and config.yaml)
- hermes_cli/config.py (config.yaml load/save)
- hermes_cli/main.py (session export with ensure_ascii=False)
- hermes_cli/status.py (jobs.json and sessions.json)

											
										
										
											2026-03-05 17:04:33 -05:00
+								                with open(config_path, 'w', encoding="utf-8") as f:
-												fix(gateway): sync /model and /personality with CLI config.yaml pattern

											
										
										
											2026-02-27 11:14:14 -05:00
+								                    yaml.dump(config, f, default_flow_style=False, sort_keys=False)
 								            except Exception as e:
 								                return f"⚠️ Failed to save personality change: {e}"
 								            # Update in-memory so it takes effect on the very next message.
 								            self._ephemeral_system_prompt = new_prompt
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
+								            return f"🎭 Personality set to **{args}**\n_(takes effect on next message)_"
-												fix(gateway): sync /model and /personality with CLI config.yaml pattern

											
										
										
											2026-02-27 11:14:14 -05:00
-												feat(cli,gateway): add /personality none and custom personality support

Closes #643

Changes:
- /personality none|default|neutral — clears system prompt overlay
- Custom personalities in config.yaml support dict format with:
  name, description, system_prompt, tone, style directives
- Backwards compatible — existing string format still works
- CLI + gateway both updated
- 18 tests covering none/default/neutral, dict format, string format,
  list display, save to config

											
										
										
											2026-03-09 17:18:09 +03:00
+								        available = "`none`, " + ", ".join(f"`{n}`" for n in personalities.keys())
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
+								        return f"Unknown personality: `{args}`\n\nAvailable: {available}"
 								    async def _handle_retry_command(self, event: MessageEvent) -> str:
 								        """Handle /retry command - re-send the last user message."""
 								        source = event.source
 								        session_entry = self.session_store.get_or_create_session(source)
 								        history = self.session_store.load_transcript(session_entry.session_id)
 								        # Find the last user message
 								        last_user_msg = None
 								        last_user_idx = None
 								        for i in range(len(history) - 1, -1, -1):
 								            if history[i].get("role") == "user":
 								                last_user_msg = history[i].get("content", "")
 								                last_user_idx = i
 								                break
 								        if not last_user_msg:
 								            return "No previous message to retry."
-												fix: /retry, /undo, /compress, and /reset gateway commands (#210)

- /retry, /undo, /compress were setting a non-existent conversation_history
  attribute on SessionEntry (a @dataclass with no such field). The dangling
  attribute was silently created but never read — transcript was reloaded
  from DB on next interaction, making all three commands no-ops.

- /reset accessed self.session_store._sessions (non-existent) instead of
  self.session_store._entries, causing AttributeError caught by a bare
  except, silently skipping the pre-reset memory flush.

Fix:
- Add SessionDB.clear_messages() to delete messages and reset counters
- Add SessionStore.rewrite_transcript() to atomically replace transcript
  in both SQLite and legacy JSONL storage
- Replace all dangling attr assignments with rewrite_transcript() calls
- Fix _sessions → _entries in /reset handler

Closes #210

											
										
										
											2026-03-02 00:14:49 -08:00
+								        # Truncate history to before the last user message and persist
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
+								        truncated = history[:last_user_idx]
-												fix: /retry, /undo, /compress, and /reset gateway commands (#210)

- /retry, /undo, /compress were setting a non-existent conversation_history
  attribute on SessionEntry (a @dataclass with no such field). The dangling
  attribute was silently created but never read — transcript was reloaded
  from DB on next interaction, making all three commands no-ops.

- /reset accessed self.session_store._sessions (non-existent) instead of
  self.session_store._entries, causing AttributeError caught by a bare
  except, silently skipping the pre-reset memory flush.

Fix:
- Add SessionDB.clear_messages() to delete messages and reset counters
- Add SessionStore.rewrite_transcript() to atomically replace transcript
  in both SQLite and legacy JSONL storage
- Replace all dangling attr assignments with rewrite_transcript() calls
- Fix _sessions → _entries in /reset handler

Closes #210

											
										
										
											2026-03-02 00:14:49 -08:00
+								        self.session_store.rewrite_transcript(session_entry.session_id, truncated)
-												fix: integration hardening for gateway token tracking

Follow-up to 58dbd81 — ensures smooth transition for existing users:

- Backward compat: old session files without last_prompt_tokens
  default to 0 via data.get('last_prompt_tokens', 0)
- /compress, /undo, /retry: reset last_prompt_tokens to 0 after
  rewriting transcripts (stale token counts would under-report)
- Auto-compression hygiene: reset last_prompt_tokens after rewriting
- update_session: use None sentinel (not 0) as default so callers
  can explicitly reset to 0 while normal calls don't clobber
- 6 new tests covering: default value, serialization roundtrip,
  old-format migration, set/reset/no-change semantics
- /reset: new SessionEntry naturally gets last_prompt_tokens=0

2942 tests pass.

											
										
										
											2026-03-10 23:40:24 -07:00
+								        # Reset stored token count — transcript was truncated
 								        session_entry.last_prompt_tokens = 0
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
 								        # Re-send by creating a fake text event with the old message
 								        retry_event = MessageEvent(
 								            text=last_user_msg,
 								            message_type=MessageType.TEXT,
 								            source=source,
 								            raw_message=event.raw_message,
 								        )
 								        # Let the normal message handler process it
-												fix(gateway): return response from /retry handler instead of discarding it

											
										
										
											2026-03-05 19:59:54 +03:00
+								        return await self._handle_message(retry_event)
-												feat: add new conversation command and enhance command handling

- Introduced the `/new` command to start a new conversation, resetting the history.
- Updated command handling in the CLI and various platform adapters (Discord, Slack, Telegram) to support the new command.
- Added help command functionality to list available commands, improving user guidance.
- Enhanced command mapping for better integration across platforms, ensuring consistent command behavior.

											
										
										
											2026-02-19 14:31:53 -08:00
 								    async def _handle_undo_command(self, event: MessageEvent) -> str:
 								        """Handle /undo command - remove the last user/assistant exchange."""
 								        source = event.source
 								        session_entry = self.session_store.get_or_create_session(source)
-												fix: preserve current approval semantics for tirith guard

Restore gateway/run.py to current main behavior while keeping tirith startup
and pattern_keys replay, preserve yolo and non-interactive bypass semantics in
the combined guard, and add regression tests for yolo and view-full flows.

											
										
										
											2026-03-14 00:17:04 -07:00
+								        history = self.session_store.load_transcript(session_entry.session_id)
 								        # Find the last user message and remove everything from it onward
 								        last_user_idx = None
 								        for i in range(len(history) - 1, -1, -1):
 								            if history[i].get("role") == "user":
 								                last_user_idx = i
 								                break
 								        if last_user_idx is None:
 								            return "Nothing to undo."
 								        removed_msg = history[last_user_idx].get("content", "")
 								        removed_count = len(history) - last_user_idx
 								        self.session_store.rewrite_transcript(session_entry.session_id, history[:last_user_idx])
 								        # Reset stored token count — transcript was truncated
 								        session_entry.last_prompt_tokens = 0
 								        preview = removed_msg[:40] + "..." if len(removed_msg) > 40 else removed_msg
 								        return f"↩️ Undid {removed_count} message(s).\nRemoved: \"{preview}\""
 								    async def _handle_set_home_command(self, event: MessageEvent) -> str:
 								        """Handle /sethome command -- set the current chat as the platform's home channel."""
 								        source = event.source
 								        platform_name = source.platform.value if source.platform else "unknown"
 								        chat_id = source.chat_id
 								        chat_name = source.chat_name or chat_id
 								        env_key = f"{platform_name.upper()}_HOME_CHANNEL"
 								        # Save to config.yaml
 								        try:
 								            import yaml
 								            config_path = _hermes_home / 'config.yaml'
 								            user_config = {}
 								            if config_path.exists():
 								                with open(config_path, encoding="utf-8") as f:
 								                    user_config = yaml.safe_load(f) or {}
 								            user_config[env_key] = chat_id
 								            with open(config_path, 'w', encoding="utf-8") as f:
 								                yaml.dump(user_config, f, default_flow_style=False)
 								            # Also set in the current environment so it takes effect immediately
 								            os.environ[env_key] = str(chat_id)
 								        except Exception as e:
 								            return f"Failed to save home channel: {e}"
 								        return (
 								            f"✅ Home channel set to **{chat_name}** (ID: {chat_id}).\n"
 								            f"Cron jobs and cross-platform messages will be delivered here."
 								        )
 								    async def _handle_rollback_command(self, event: MessageEvent) -> str:
 								        """Handle /rollback command — list or restore filesystem checkpoints."""
 								        from tools.checkpoint_manager import CheckpointManager, format_checkpoint_list
 								        # Read checkpoint config from config.yaml
 								        cp_cfg = {}
 								        try:
 								            import yaml as _y
 								            _cfg_path = _hermes_home / "config.yaml"
 								            if _cfg_path.exists():
 								                with open(_cfg_path, encoding="utf-8") as _f:
 								                    _data = _y.safe_load(_f) or {}
 								                cp_cfg = _data.get("checkpoints", {})
 								                if isinstance(cp_cfg, bool):
 								                    cp_cfg = {"enabled": cp_cfg}
 								        except Exception:
 								            pass
 								        if not cp_cfg.get("enabled", False):
 								            return (
 								                "Checkpoints are not enabled.\n"
 								                "Enable in config.yaml:\n```\ncheckpoints:\n  enabled: true\n```"
 								            )
 								        mgr = CheckpointManager(
 								            enabled=True,
 								            max_snapshots=cp_cfg.get("max_snapshots", 50),
 								        )
 								        cwd = os.getenv("MESSAGING_CWD", str(Path.home()))
 								        arg = event.get_command_args().strip()
 								        if not arg:
 								            checkpoints = mgr.list_checkpoints(cwd)
 								            return format_checkpoint_list(checkpoints, cwd)
 								        # Restore by number or hash
 								        checkpoints = mgr.list_checkpoints(cwd)
 								        if not checkpoints:
 								            return f"No checkpoints found for {cwd}"
 								        target_hash = None
 								        try:
 								            idx = int(arg) - 1
 								            if 0 <= idx < len(checkpoints):
 								                target_hash = checkpoints[idx]["hash"]
 								            else:
 								                return f"Invalid checkpoint number. Use 1-{len(checkpoints)}."
 								        except ValueError:
 								            target_hash = arg
 								        result = mgr.restore(cwd, target_hash)
 								        if result["success"]:
 								            return (
 								                f"✅ Restored to checkpoint {result['restored_to']}: {result['reason']}\n"
 								                f"A pre-rollback snapshot was saved automatically."
 								            )
 								        return f"❌ {result['error']}"
 								    async def _handle_background_command(self, event: MessageEvent) -> str:
 								        """Handle /background <prompt> — run a prompt in a separate background session.
 								        Spawns a new AIAgent in a background thread with its own session.
 								        When it completes, sends the result back to the same chat without
 								        modifying the active session's conversation history.
 								        """
 								        prompt = event.get_command_args().strip()
 								        if not prompt:
 								            return (
 								                "Usage: /background <prompt>\n"
 								                "Example: /background Summarize the top HN stories today\n\n"
 								                "Runs the prompt in a separate session. "
 								                "You can keep chatting — the result will appear here when done."
 								            )
 								        source = event.source
 								        task_id = f"bg_{datetime.now().strftime('%H%M%S')}_{os.urandom(3).hex()}"
 								        # Fire-and-forget the background task
 								        asyncio.create_task(
 								            self._run_background_task(prompt, source, task_id)
 								        )
 								        preview = prompt[:60] + ("..." if len(prompt) > 60 else "")
 								        return f'🔄 Background task started: "{preview}"\nTask ID: {task_id}\nYou can keep chatting — results will appear when done.'
 								    async def _run_background_task(
 								        self, prompt: str, source: "SessionSource", task_id: str
 								    ) -> None:
 								        """Execute a background agent task and deliver the result to the chat."""
 								        from run_agent import AIAgent
 								        adapter = self.adapters.get(source.platform)
 								        if not adapter:
 								            logger.warning("No adapter for platform %s in background task %s", source.platform, task_id)
 								            return
 								        _thread_metadata = {"thread_id": source.thread_id} if source.thread_id else None
 								        try:
 								            runtime_kwargs = _resolve_runtime_agent_kwargs()
 								            if not runtime_kwargs.get("api_key"):
 								                await adapter.send(
 								                    source.chat_id,
 								                    f"❌ Background task {task_id} failed: no provider credentials configured.",
 								                    metadata=_thread_metadata,
 								                )
 								                return
 								            # Read model from config via shared helper
 								            model = _resolve_gateway_model()
 								            # Determine toolset (same logic as _run_agent)
 								            default_toolset_map = {
 								                Platform.LOCAL: "hermes-cli",
 								                Platform.TELEGRAM: "hermes-telegram",
 								                Platform.DISCORD: "hermes-discord",
 								                Platform.WHATSAPP: "hermes-whatsapp",
 								                Platform.SLACK: "hermes-slack",
 								                Platform.SIGNAL: "hermes-signal",
 								                Platform.HOMEASSISTANT: "hermes-homeassistant",
 								                Platform.EMAIL: "hermes-email",
 								            }
 								            platform_toolsets_config = {}
 								            try:
 								                config_path = _hermes_home / 'config.yaml'
 								                if config_path.exists():
 								                    import yaml
 								                    with open(config_path, 'r', encoding="utf-8") as f:
 								                        user_config = yaml.safe_load(f) or {}
 								                    platform_toolsets_config = user_config.get("platform_toolsets", {})
 								            except Exception:
 								                pass
 								            platform_config_key = {
 								                Platform.LOCAL: "cli",
 								                Platform.TELEGRAM: "telegram",
 								                Platform.DISCORD: "discord",
 								                Platform.WHATSAPP: "whatsapp",
 								                Platform.SLACK: "slack",
 								                Platform.SIGNAL: "signal",
 								                Platform.HOMEASSISTANT: "homeassistant",
 								                Platform.EMAIL: "email",
 								            }.get(source.platform, "telegram")
 								            config_toolsets = platform_toolsets_config.get(platform_config_key)
 								            if config_toolsets and isinstance(config_toolsets, list):
 								                enabled_toolsets = config_toolsets
 								            else:
 								                default_toolset = default_toolset_map.get(source.platform, "hermes-telegram")
 								                enabled_toolsets = [default_toolset]
 								            platform_key = "cli" if source.platform == Platform.LOCAL else source.platform.value
 								            pr = self._provider_routing
 								            max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "90"))
 								            def run_sync():
 								                agent = AIAgent(
 								                    model=model,
 								                    **runtime_kwargs,
 								                    max_iterations=max_iterations,
 								                    quiet_mode=True,
 								                    verbose_logging=False,
 								                    enabled_toolsets=enabled_toolsets,
 								                    reasoning_config=self._reasoning_config,
 								                    providers_allowed=pr.get("only"),
 								                    providers_ignored=pr.get("ignore"),
 								                    providers_order=pr.get("order"),
 								                    provider_sort=pr.get("sort"),
 								                    provider_require_parameters=pr.get("require_parameters", False),
 								                    provider_data_collection=pr.get("data_collection"),
 								                    session_id=task_id,
 								                    platform=platform_key,
 								                    session_db=self._session_db,
 								                    fallback_model=self._fallback_model,
 								                )
 								                return agent.run_conversation(
 								                    user_message=prompt,
 								                    task_id=task_id,
 								                )
 								            loop = asyncio.get_event_loop()
 								            result = await loop.run_in_executor(None, run_sync)
 								            response = result.get("final_response", "") if result else ""
 								            if not response and result and result.get("error"):
 								                response = f"Error: {result['error']}"
 								            # Extract media files from the response
 								            if response:
 								                media_files, response = adapter.extract_media(response)
 								                images, text_content = adapter.extract_images(response)
 								                preview = prompt[:60] + ("..." if len(prompt) > 60 else "")
 								                header = f'✅ Background task complete\nPrompt: "{preview}"\n\n'
 								                if text_content:
 								                    await adapter.send(
 								                        chat_id=source.chat_id,
 								                        content=header + text_content,
 								                        metadata=_thread_metadata,
 								                    )
 								                elif not images and not media_files:
 								                    await adapter.send(
 								                        chat_id=source.chat_id,
 								                        content=header + "(No response generated)",
 								                        metadata=_thread_metadata,
 								                    )
 								                # Send extracted images
 								                for image_url, alt_text in (images or []):
 								                    try:
 								                        await adapter.send_image(
 								                            chat_id=source.chat_id,
 								                            image_url=image_url,
 								                            caption=alt_text,
 								                        )
 								                    except Exception:
 								                        pass
 								                # Send media files
 								                for media_path in (media_files or []):
 								                    try:
 								                        await adapter.send_file(
 								                            chat_id=source.chat_id,
 								                            file_path=media_path,
 								                        )
 								                    except Exception:
 								                        pass
 								            else:
 								                preview = prompt[:60] + ("..." if len(prompt) > 60 else "")
 								                await adapter.send(
 								                    chat_id=source.chat_id,
 								                    content=f'✅ Background task complete\nPrompt: "{preview}"\n\n(No response generated)',
 								                    metadata=_thread_metadata,
 								                )
 								        except Exception as e:
 								            logger.exception("Background task %s failed", task_id)
 								            try:
 								                await adapter.send(
 								                    chat_id=source.chat_id,
 								                    content=f"❌ Background task {task_id} failed: {e}",
 								                    metadata=_thread_metadata,
 								                )
 								            except Exception:
 								                pass
 								    async def _handle_reasoning_command(self, event: MessageEvent) -> str:
 								        """Handle /reasoning command — manage reasoning effort and display toggle.
 								        Usage:
 								            /reasoning              Show current effort level and display state
 								            /reasoning <level>      Set reasoning effort (none, low, medium, high, xhigh)
 								            /reasoning show|on      Show model reasoning in responses
 								            /reasoning hide|off     Hide model reasoning from responses
 								        """
 								        import yaml
 								        args = event.get_command_args().strip().lower()
 								        config_path = _hermes_home / "config.yaml"
 								        def _save_config_key(key_path: str, value):
 								            """Save a dot-separated key to config.yaml."""
 								            try:
 								                user_config = {}
 								                if config_path.exists():
 								                    with open(config_path, encoding="utf-8") as f:
 								                        user_config = yaml.safe_load(f) or {}
 								                keys = key_path.split(".")
 								                current = user_config
 								                for k in keys[:-1]:
 								                    if k not in current or not isinstance(current[k], dict):
 								                        current[k] = {}
 								                    current = current[k]
 								                current[keys[-1]] = value
 								                with open(config_path, "w", encoding="utf-8") as f:
 								                    yaml.dump(user_config, f, default_flow_style=False, sort_keys=False)
 								                return True
 								            except Exception as e:
 								                logger.error("Failed to save config key %s: %s", key_path, e)
 								                return False
 								        if not args:
 								            # Show current state
 								            rc = self._reasoning_config
 								            if rc is None:
 								                level = "medium (default)"
 								            elif rc.get("enabled") is False:
 								                level = "none (disabled)"
 								            else:
 								                level = rc.get("effort", "medium")
 								            display_state = "on ✓" if self._show_reasoning else "off"
 								            return (
 								                "🧠 **Reasoning Settings**\n\n"
 								                f"**Effort:** `{level}`\n"
 								                f"**Display:** {display_state}\n\n"
 								                "_Usage:_ `/reasoning <none|low|medium|high|xhigh|show|hide>`"
 								            )
 								        # Display toggle
 								        if args in ("show", "on"):
 								            self._show_reasoning = True
 								            _save_config_key("display.show_reasoning", True)
 								            return "🧠 ✓ Reasoning display: **ON**\nModel thinking will be shown before each response."
 								        if args in ("hide", "off"):
 								            self._show_reasoning = False
 								            _save_config_key("display.show_reasoning", False)
 								            return "🧠 ✓ Reasoning display: **OFF**"
 								        # Effort level change
 								        effort = args.strip()
 								        if effort == "none":
 								            parsed = {"enabled": False}
 								        elif effort in ("xhigh", "high", "medium", "low", "minimal"):
 								            parsed = {"enabled": True, "effort": effort}
 								        else:
 								            return (
 								                f"⚠️ Unknown argument: `{effort}`\n\n"
 								                "**Valid levels:** none, low, minimal, medium, high, xhigh\n"
 								                "**Display:** show, hide"
 								            )
 								        self._reasoning_config = parsed
 								        if _save_config_key("agent.reasoning_effort", effort):
 								            return f"🧠 ✓ Reasoning effort set to `{effort}` (saved to config)\n_(takes effect on next message)_"
 								        else:
 								            return f"🧠 ✓ Reasoning effort set to `{effort}` (this session only)"
 								    async def _handle_compress_command(self, event: MessageEvent) -> str:
 								        """Handle /compress command -- manually compress conversation context."""
 								        source = event.source
 								        session_entry = self.session_store.get_or_create_session(source)
 								        history = self.session_store.load_transcript(session_entry.session_id)
 								        if not history or len(history) < 4:
 								            return "Not enough conversation to compress (need at least 4 messages)."
 								        try:
 								            from run_agent import AIAgent
 								            from agent.model_metadata import estimate_messages_tokens_rough
 								            runtime_kwargs = _resolve_runtime_agent_kwargs()
 								            if not runtime_kwargs.get("api_key"):
 								                return "No provider configured -- cannot compress."
 								            # Resolve model from config (same reason as memory flush above).
 								            model = _resolve_gateway_model()
 								            msgs = [
 								                {"role": m.get("role"), "content": m.get("content")}
 								                for m in history
 								                if m.get("role") in ("user", "assistant") and m.get("content")
 								            ]
 								            original_count = len(msgs)
 								            approx_tokens = estimate_messages_tokens_rough(msgs)
 								            tmp_agent = AIAgent(
 								                **runtime_kwargs,
 								                model=model,
 								                max_iterations=4,
 								                quiet_mode=True,
 								                enabled_toolsets=["memory"],
 								                session_id=session_entry.session_id,
 								            )
 								            loop = asyncio.get_event_loop()
 								            compressed, _ = await loop.run_in_executor(
 								                None,
 								                lambda: tmp_agent._compress_context(msgs, "", approx_tokens=approx_tokens),
 								            )
 								            self.session_store.rewrite_transcript(session_entry.session_id, compressed)
 								            # Reset stored token count — transcript changed, old value is stale
 								            self.session_store.update_session(
 								                session_entry.session_key, last_prompt_tokens=0,
 								            )
 								            new_count = len(compressed)
 								            new_tokens = estimate_messages_tokens_rough(compressed)
 								            return (
 								                f"🗜️ Compressed: {original_count} → {new_count} messages\n"
 								                f"~{approx_tokens:,} → ~{new_tokens:,} tokens"
 								            )
 								        except Exception as e:
 								            logger.warning("Manual compress failed: %s", e)
 								            return f"Compression failed: {e}"
 								    async def _handle_title_command(self, event: MessageEvent) -> str:
 								        """Handle /title command — set or show the current session's title."""
 								        source = event.source
 								        session_entry = self.session_store.get_or_create_session(source)
 								        session_id = session_entry.session_id
 								        if not self._session_db:
 								            return "Session database not available."
 								        title_arg = event.get_command_args().strip()
 								        if title_arg:
 								            # Sanitize the title before setting
 								            try:
 								                sanitized = self._session_db.sanitize_title(title_arg)
 								            except ValueError as e:
 								                return f"⚠️ {e}"
 								            if not sanitized:
 								                return "⚠️ Title is empty after cleanup. Please use printable characters."
 								            # Set the title
 								            try:
 								                if self._session_db.set_session_title(session_id, sanitized):
 								                    return f"✏️ Session title set: **{sanitized}**"
 								                else:
 								                    return "Session not found in database."
 								            except ValueError as e:
 								                return f"⚠️ {e}"
 								        else:
 								            # Show the current title
 								            title = self._session_db.get_session_title(session_id)
 								            if title:
 								                return f"📌 Session title: **{title}**"
 								            else:
 								                return "No title set. Usage: `/title My Session Name`"
 								    async def _handle_resume_command(self, event: MessageEvent) -> str:
 								        """Handle /resume command — switch to a previously-named session."""
 								        if not self._session_db:
 								            return "Session database not available."
 								        source = event.source
 								        session_key = build_session_key(source)
 								        name = event.get_command_args().strip()
 								        if not name:
 								            # List recent titled sessions for this user/platform
 								            try:
 								                user_source = source.platform.value if source.platform else None
 								                sessions = self._session_db.list_sessions_rich(
 								                    source=user_source, limit=10
 								                )
 								                titled = [s for s in sessions if s.get("title")]
 								                if not titled:
 								                    return (
 								                        "No named sessions found.\n"
 								                        "Use `/title My Session` to name your current session, "
 								                        "then `/resume My Session` to return to it later."
 								                    )
 								                lines = ["📋 **Named Sessions**\n"]
 								                for s in titled[:10]:
 								                    title = s["title"]
 								                    preview = s.get("preview", "")[:40]
 								                    preview_part = f" — _{preview}_" if preview else ""
 								                    lines.append(f"• **{title}**{preview_part}")
 								                lines.append("\nUsage: `/resume <session name>`")
 								                return "\n".join(lines)
 								            except Exception as e:
 								                logger.debug("Failed to list titled sessions: %s", e)
 								                return f"Could not list sessions: {e}"
 								        # Resolve the name to a session ID
 								        target_id = self._session_db.resolve_session_by_title(name)
 								        if not target_id:
 								            return (
 								                f"No session found matching '**{name}**'.\n"
 								                "Use `/resume` with no arguments to see available sessions."
 								            )
 								        # Check if already on that session
 								        current_entry = self.session_store.get_or_create_session(source)
 								        if current_entry.session_id == target_id:
 								            return f"📌 Already on session **{name}**."
 								        # Flush memories for current session before switching
 								        try:
 								            asyncio.create_task(self._async_flush_memories(current_entry.session_id))
 								        except Exception as e:
 								            logger.debug("Memory flush on resume failed: %s", e)
 								        self._shutdown_gateway_honcho(session_key)
 								        # Clear any running agent for this session key
 								        if session_key in self._running_agents:
 								            del self._running_agents[session_key]
 								        # Switch the session entry to point at the old session
 								        new_entry = self.session_store.switch_session(session_key, target_id)
 								        if not new_entry:
 								            return "Failed to switch session."
 								        # Get the title for confirmation
 								        title = self._session_db.get_session_title(target_id) or name
 								        # Count messages for context
 								        history = self.session_store.load_transcript(target_id)
 								        msg_count = len([m for m in history if m.get("role") == "user"]) if history else 0
 								        msg_part = f" ({msg_count} message{'s' if msg_count != 1 else ''})" if msg_count else ""
 								        return f"↻ Resumed session **{title}**{msg_part}. Conversation restored."
 								    async def _handle_usage_command(self, event: MessageEvent) -> str:
 								        """Handle /usage command -- show token usage for the session's last agent run."""
 								        source = event.source
 								        session_key = build_session_key(source)
 								        agent = self._running_agents.get(session_key)
 								        if agent and hasattr(agent, "session_total_tokens") and agent.session_api_calls > 0:
 								            lines = [
 								                "📊 **Session Token Usage**",
 								                f"Prompt (input): {agent.session_prompt_tokens:,}",
 								                f"Completion (output): {agent.session_completion_tokens:,}",
 								                f"Total: {agent.session_total_tokens:,}",
 								                f"API calls: {agent.session_api_calls}",
 								            ]
 								            ctx = agent.context_compressor
 								            if ctx.last_prompt_tokens:
 								                pct = ctx.last_prompt_tokens / ctx.context_length * 100 if ctx.context_length else 0
 								                lines.append(f"Context: {ctx.last_prompt_tokens:,} / {ctx.context_length:,} ({pct:.0f}%)")
 								            if ctx.compression_count:
 								                lines.append(f"Compressions: {ctx.compression_count}")
 								            return "\n".join(lines)
 								        # No running agent -- check session history for a rough count
 								        session_entry = self.session_store.get_or_create_session(source)
 								        history = self.session_store.load_transcript(session_entry.session_id)
 								        if history:
 								            from agent.model_metadata import estimate_messages_tokens_rough
 								            msgs = [m for m in history if m.get("role") in ("user", "assistant") and m.get("content")]
 								            approx = estimate_messages_tokens_rough(msgs)
 								            return (
 								                f"📊 **Session Info**\n"
 								                f"Messages: {len(msgs)}\n"
 								                f"Estimated context: ~{approx:,} tokens\n"
 								                f"_(Detailed usage available during active conversations)_"
 								            )
 								        return "No usage data available for this session."
 								    async def _handle_insights_command(self, event: MessageEvent) -> str:
 								        """Handle /insights command -- show usage insights and analytics."""
 								        import asyncio as _asyncio
 								        args = event.get_command_args().strip()
 								        days = 30
 								        source = None
 								        # Parse simple args: /insights 7  or  /insights --days 7
 								        if args:
 								            parts = args.split()
 								            i = 0
 								            while i < len(parts):
 								                if parts[i] == "--days" and i + 1 < len(parts):
 								                    try:
 								                        days = int(parts[i + 1])
 								                    except ValueError:
 								                        return f"Invalid --days value: {parts[i + 1]}"
 								                    i += 2
 								                elif parts[i] == "--source" and i + 1 < len(parts):
 								                    source = parts[i + 1]
 								                    i += 2
 								                elif parts[i].isdigit():
 								                    days = int(parts[i])
 								                    i += 1
 								                else:
 								                    i += 1
 								        try:
 								            from hermes_state import SessionDB
 								            from agent.insights import InsightsEngine
 								            loop = _asyncio.get_event_loop()
 								            def _run_insights():
 								                db = SessionDB()
 								                engine = InsightsEngine(db)
 								                report = engine.generate(days=days, source=source)
 								                result = engine.format_gateway(report)
 								                db.close()
 								                return result
 								            return await loop.run_in_executor(None, _run_insights)
 								        except Exception as e:
 								            logger.error("Insights command error: %s", e, exc_info=True)
 								            return f"Error generating insights: {e}"
 								    async def _handle_reload_mcp_command(self, event: MessageEvent) -> str:
 								        """Handle /reload-mcp command -- disconnect and reconnect all MCP servers."""
 								        loop = asyncio.get_event_loop()
 								        try:
 								            from tools.mcp_tool import shutdown_mcp_servers, discover_mcp_tools, _load_mcp_config, _servers, _lock
 								            # Capture old server names before shutdown
 								            with _lock:
 								                old_servers = set(_servers.keys())
 								            # Read new config before shutting down, so we know what will be added/removed
 								            new_config = _load_mcp_config()
 								            new_server_names = set(new_config.keys())
 								            # Shutdown existing connections
 								            await loop.run_in_executor(None, shutdown_mcp_servers)
 								            # Reconnect by discovering tools (reads config.yaml fresh)
 								            new_tools = await loop.run_in_executor(None, discover_mcp_tools)
 								            # Compute what changed
 								            with _lock:
 								                connected_servers = set(_servers.keys())
 								            added = connected_servers - old_servers
 								            removed = old_servers - connected_servers
 								            reconnected = connected_servers & old_servers
 								            lines = ["🔄 **MCP Servers Reloaded**\n"]
 								            if reconnected:
 								                lines.append(f"♻️ Reconnected: {', '.join(sorted(reconnected))}")
 								            if added:
 								                lines.append(f"➕ Added: {', '.join(sorted(added))}")
 								            if removed:
 								                lines.append(f"➖ Removed: {', '.join(sorted(removed))}")
 								            if not connected_servers:
 								                lines.append("No MCP servers connected.")
 								            else:
 								                lines.append(f"\n🔧 {len(new_tools)} tool(s) available from {len(connected_servers)} server(s)")
 								            # Inject a message at the END of the session history so the
 								            # model knows tools changed on its next turn.  Appended after
 								            # all existing messages to preserve prompt-cache for the prefix.
 								            change_parts = []
 								            if added:
 								                change_parts.append(f"Added servers: {', '.join(sorted(added))}")
 								            if removed:
 								                change_parts.append(f"Removed servers: {', '.join(sorted(removed))}")
 								            if reconnected:
 								                change_parts.append(f"Reconnected servers: {', '.join(sorted(reconnected))}")
 								            tool_summary = f"{len(new_tools)} MCP tool(s) now available" if new_tools else "No MCP tools available"
 								            change_detail = ". ".join(change_parts) + ". " if change_parts else ""
 								            reload_msg = {
 								                "role": "user",
 								                "content": f"[SYSTEM: MCP servers have been reloaded. {change_detail}{tool_summary}. The tool list for this conversation has been updated accordingly.]",
 								            }
 								            try:
 								                session_entry = self.session_store.get_or_create_session(event.source)
 								                self.session_store.append_to_transcript(
 								                    session_entry.session_id, reload_msg
 								                )
 								            except Exception:
 								                pass  # Best-effort; don't fail the reload over a transcript write
 								            return "\n".join(lines)
 								        except Exception as e:
 								            logger.warning("MCP reload failed: %s", e)
 								            return f"❌ MCP reload failed: {e}"
 								    async def _handle_update_command(self, event: MessageEvent) -> str:
 								        """Handle /update command — update Hermes Agent to the latest version.
 								        Spawns ``hermes update`` in a separate systemd scope so it survives the
 								        gateway restart that ``hermes update`` triggers at the end.  A marker
 								        file is written so the *new* gateway process can notify the user of the
 								        result on startup.
 								        """
 								        import json
 								        import shutil
 								        import subprocess
 								        from datetime import datetime
 								        project_root = Path(__file__).parent.parent.resolve()
 								        git_dir = project_root / '.git'
 								        if not git_dir.exists():
 								            return "✗ Not a git repository — cannot update."
 								        hermes_bin = shutil.which("hermes")
 								        if not hermes_bin:
 								            return "✗ `hermes` command not found on PATH."
 								        # Write marker so the restarted gateway can notify this chat
 								        pending_path = _hermes_home / ".update_pending.json"
 								        output_path = _hermes_home / ".update_output.txt"
 								        pending = {
 								            "platform": event.source.platform.value,
 								            "chat_id": event.source.chat_id,
 								            "user_id": event.source.user_id,
 								            "timestamp": datetime.now().isoformat(),
 								        }
 								        pending_path.write_text(json.dumps(pending))
 								        # Spawn `hermes update` in a separate cgroup so it survives gateway
 								        # restart.  systemd-run --user --scope creates a transient scope unit.
 								        update_cmd = f"{hermes_bin} update > {output_path} 2>&1"
 								        try:
 								            systemd_run = shutil.which("systemd-run")
 								            if systemd_run:
 								                subprocess.Popen(
 								                    [systemd_run, "--user", "--scope",
 								                     "--unit=hermes-update", "--",
 								                     "bash", "-c", update_cmd],
 								                    stdout=subprocess.DEVNULL,
 								                    stderr=subprocess.DEVNULL,
 								                    start_new_session=True,
 								                )
 								            else:
 								                # Fallback: best-effort detach with start_new_session
 								                subprocess.Popen(
 								                    ["bash", "-c", f"nohup {update_cmd} &"],
 								                    stdout=subprocess.DEVNULL,
 								                    stderr=subprocess.DEVNULL,
 								                    start_new_session=True,
 								                )
 								        except Exception as e:
 								            pending_path.unlink(missing_ok=True)
 								            return f"✗ Failed to start update: {e}"
 								        return "⚕ Starting Hermes update… I'll notify you when it's done."
 								    async def _send_update_notification(self) -> None:
 								        """If the gateway is starting after a ``/update``, notify the user."""
 								        import json
 								        import re as _re
 								        pending_path = _hermes_home / ".update_pending.json"
 								        output_path = _hermes_home / ".update_output.txt"
 								        if not pending_path.exists():
 								            return
 								        try:
 								            pending = json.loads(pending_path.read_text())
 								            platform_str = pending.get("platform")
 								            chat_id = pending.get("chat_id")
 								            # Read the captured update output
 								            output = ""
 								            if output_path.exists():
 								                output = output_path.read_text()
 								            # Resolve adapter
 								            platform = Platform(platform_str)
 								            adapter = self.adapters.get(platform)
 								            if adapter and chat_id:
 								                # Strip ANSI escape codes for clean display
 								                output = _re.sub(r'\x1b\[[0-9;]*m', '', output).strip()
 								                if output:
 								                    # Truncate if too long for a single message
 								                    if len(output) > 3500:
 								                        output = "…" + output[-3500:]
 								                    msg = f"✅ Hermes update finished — gateway restarted.\n\n```\n{output}\n```"
 								                else:
 								                    msg = "✅ Hermes update finished — gateway restarted successfully."
 								                await adapter.send(chat_id, msg)
 								                logger.info("Sent post-update notification to %s:%s", platform_str, chat_id)
 								        except Exception as e:
 								            logger.warning("Post-update notification failed: %s", e)
 								        finally:
 								            pending_path.unlink(missing_ok=True)
 								            output_path.unlink(missing_ok=True)
 								    def _set_session_env(self, context: SessionContext) -> None:
 								        """Set environment variables for the current session."""
 								        os.environ["HERMES_SESSION_PLATFORM"] = context.source.platform.value
 								        os.environ["HERMES_SESSION_CHAT_ID"] = context.source.chat_id
 								        if context.source.chat_name:
 								            os.environ["HERMES_SESSION_CHAT_NAME"] = context.source.chat_name
 								    def _clear_session_env(self) -> None:
 								        """Clear session environment variables."""
 								        for var in ["HERMES_SESSION_PLATFORM", "HERMES_SESSION_CHAT_ID", "HERMES_SESSION_CHAT_NAME"]:
 								            if var in os.environ:
 								                del os.environ[var]
 								    async def _enrich_message_with_vision(
 								        self,
 								        user_text: str,
 								        image_paths: List[str],
 								    ) -> str:
 								        """
 								        Auto-analyze user-attached images with the vision tool and prepend
 								        the descriptions to the message text.
 								        Each image is analyzed with a general-purpose prompt.  The resulting
 								        description *and* the local cache path are injected so the model can:
 . Immediately understand what the user sent (no extra tool call).
 . Re-examine the image with vision_analyze if it needs more detail.
 								        Args:
 								            user_text:   The user's original caption / message text.
 								            image_paths: List of local file paths to cached images.
 								        Returns:
 								            The enriched message string with vision descriptions prepended.
 								        """
 								        from tools.vision_tools import vision_analyze_tool
 								        import json as _json
 								        analysis_prompt = (
 								            "Describe everything visible in this image in thorough detail. "
 								            "Include any text, code, data, objects, people, layout, colors, "
 								            "and any other notable visual information."
 								        )
 								        enriched_parts = []
 								        for path in image_paths:
 								            try:
 								                logger.debug("Auto-analyzing user image: %s", path)
 								                result_json = await vision_analyze_tool(
 								                    image_url=path,
 								                    user_prompt=analysis_prompt,
 								                )
 								                result = _json.loads(result_json)
 								                if result.get("success"):
 								                    description = result.get("analysis", "")
 								                    enriched_parts.append(
 								                        f"[The user sent an image~ Here's what I can see:\n{description}]\n"
 								                        f"[If you need a closer look, use vision_analyze with "
 								                        f"image_url: {path} ~]"
 								                    )
 								                else:
 								                    enriched_parts.append(
 								                        "[The user sent an image but I couldn't quite see it "
 								                        "this time (>_<) You can try looking at it yourself "
 								                        f"with vision_analyze using image_url: {path}]"
 								                    )
 								            except Exception as e:
 								                logger.error("Vision auto-analysis error: %s", e)
 								                enriched_parts.append(
 								                    f"[The user sent an image but something went wrong when I "
 								                    f"tried to look at it~ You can try examining it yourself "
 								                    f"with vision_analyze using image_url: {path}]"
 								                )
 								        # Combine: vision descriptions first, then the user's original text
 								        if enriched_parts:
 								            prefix = "\n\n".join(enriched_parts)
 								            if user_text:
 								                return f"{prefix}\n\n{user_text}"
 								            return prefix
 								        return user_text
 								    async def _enrich_message_with_transcription(
 								        self,
 								        user_text: str,
 								        audio_paths: List[str],
 								    ) -> str:
 								        """
 								        Auto-transcribe user voice/audio messages using OpenAI Whisper API
 								        and prepend the transcript to the message text.
 								        Args:
 								            user_text:   The user's original caption / message text.
 								            audio_paths: List of local file paths to cached audio files.
 								        Returns:
 								            The enriched message string with transcriptions prepended.
 								        """
 								        from tools.transcription_tools import transcribe_audio
 								        import asyncio
 								        enriched_parts = []
 								        for path in audio_paths:
 								            try:
 								                logger.debug("Transcribing user voice: %s", path)
 								                result = await asyncio.to_thread(transcribe_audio, path)
 								                if result["success"]:
 								                    transcript = result["transcript"]
 								                    enriched_parts.append(
 								                        f'[The user sent a voice message~ '
 								                        f'Here\'s what they said: "{transcript}"]'
 								                    )
 								                else:
 								                    error = result.get("error", "unknown error")
 								                    if "OPENAI_API_KEY" in error or "VOICE_TOOLS_OPENAI_KEY" in error:
 								                        enriched_parts.append(
 								                            "[The user sent a voice message but I can't listen "
 								                            "to it right now~ VOICE_TOOLS_OPENAI_KEY isn't set up yet "
 								                            "(';w;') Let them know!]"
 								                        )
 								                    else:
 								                        enriched_parts.append(
 								                            "[The user sent a voice message but I had trouble "
 								                            f"transcribing it~ ({error})]"
 								                        )
 								            except Exception as e:
 								                logger.error("Transcription error: %s", e)
 								                enriched_parts.append(
 								                    "[The user sent a voice message but something went wrong "
 								                    "when I tried to listen to it~ Let them know!]"
 								                )
 								        if enriched_parts:
 								            prefix = "\n\n".join(enriched_parts)
 								            if user_text:
 								                return f"{prefix}\n\n{user_text}"
 								            return prefix
 								        return user_text
 								    async def _run_process_watcher(self, watcher: dict) -> None:
 								        """
 								        Periodically check a background process and push updates to the user.
 								        Runs as an asyncio task. Stays silent when nothing changed.
 								        Auto-removes when the process exits or is killed.
 								        Notification mode (from ``display.background_process_notifications``):
 								          - ``all``    — running-output updates + final message
 								          - ``result`` — final completion message only
 								          - ``error``  — final message only when exit code != 0
 								          - ``off``    — no messages at all
 								        """
 								        from tools.process_registry import process_registry
 								        session_id = watcher["session_id"]
 								        interval = watcher["check_interval"]
 								        session_key = watcher.get("session_key", "")
 								        platform_name = watcher.get("platform", "")
 								        chat_id = watcher.get("chat_id", "")
 								        notify_mode = self._load_background_notifications_mode()
 								        logger.debug("Process watcher started: %s (every %ss, notify=%s)",
 								                      session_id, interval, notify_mode)
 								        if notify_mode == "off":
 								            # Still wait for the process to exit so we can log it, but don't
 								            # push any messages to the user.
 								            while True:
 								                await asyncio.sleep(interval)
 								                session = process_registry.get(session_id)
 								                if session is None or session.exited:
 								                    break
 								            logger.debug("Process watcher ended (silent): %s", session_id)
 								            return
 								        last_output_len = 0
 								        while True:
 								            await asyncio.sleep(interval)
 								            session = process_registry.get(session_id)
 								            if session is None:
 								                break
 								            current_output_len = len(session.output_buffer)
 								            has_new_output = current_output_len > last_output_len
 								            last_output_len = current_output_len
 								            if session.exited:
 								                # Decide whether to notify based on mode
 								                should_notify = (
 								                    notify_mode in ("all", "result")
 								                    or (notify_mode == "error" and session.exit_code not in (0, None))
 								                )
 								                if should_notify:
 								                    new_output = session.output_buffer[-1000:] if session.output_buffer else ""
 								                    message_text = (
 								                        f"[Background process {session_id} finished with exit code {session.exit_code}~ "
 								                        f"Here's the final output:\n{new_output}]"
 								                    )
 								                    adapter = None
 								                    for p, a in self.adapters.items():
 								                        if p.value == platform_name:
 								                            adapter = a
 								                            break
 								                    if adapter and chat_id:
 								                        try:
 								                            await adapter.send(chat_id, message_text)
 								                        except Exception as e:
 								                            logger.error("Watcher delivery error: %s", e)
 								                break
 								            elif has_new_output and notify_mode == "all":
 								                # New output available -- deliver status update (only in "all" mode)
 								                new_output = session.output_buffer[-500:] if session.output_buffer else ""
 								                message_text = (
 								                    f"[Background process {session_id} is still running~ "
 								                    f"New output:\n{new_output}]"
 								                )
 								                adapter = None
 								                for p, a in self.adapters.items():
 								                    if p.value == platform_name:
 								                        adapter = a
 								                        break
 								                if adapter and chat_id:
 								                    try:
 								                        await adapter.send(chat_id, message_text)
 								                    except Exception as e:
 								                        logger.error("Watcher delivery error: %s", e)
 								        logger.debug("Process watcher ended: %s", session_id)
 								    async def _run_agent(
 								        self,
 								        message: str,
 								        context_prompt: str,
 								        history: List[Dict[str, Any]],
 								        source: SessionSource,
 								        session_id: str,
 								        session_key: str = None
 								    ) -> Dict[str, Any]:
 								        """
 								        Run the agent with the given message and context.
 								        Returns the full result dict from run_conversation, including:
 								          - "final_response": str (the text to send back)
 								          - "messages": list (full conversation including tool calls)
 								          - "api_calls": int
 								          - "completed": bool
 								        This is run in a thread pool to not block the event loop.
 								        Supports interruption via new messages.
 								        """
 								        from run_agent import AIAgent
 								        import queue
 								        # Determine toolset based on platform.
 								        # Check config.yaml for per-platform overrides, fallback to hardcoded defaults.
 								        default_toolset_map = {
 								            Platform.LOCAL: "hermes-cli",
 								            Platform.TELEGRAM: "hermes-telegram",
 								            Platform.DISCORD: "hermes-discord",
 								            Platform.WHATSAPP: "hermes-whatsapp",
 								            Platform.SLACK: "hermes-slack",
 								            Platform.SIGNAL: "hermes-signal",
 								            Platform.HOMEASSISTANT: "hermes-homeassistant",
 								            Platform.EMAIL: "hermes-email",
 								        }
 								        # Try to load platform_toolsets from config
 								        platform_toolsets_config = {}
 								        try:
 								            config_path = _hermes_home / 'config.yaml'
 								            if config_path.exists():
 								                import yaml
 								                with open(config_path, 'r', encoding="utf-8") as f:
 								                    user_config = yaml.safe_load(f) or {}
 								                platform_toolsets_config = user_config.get("platform_toolsets", {})
 								        except Exception as e:
 								            logger.debug("Could not load platform_toolsets config: %s", e)
 								        # Map platform enum to config key
 								        platform_config_key = {
 								            Platform.LOCAL: "cli",
 								            Platform.TELEGRAM: "telegram",
 								            Platform.DISCORD: "discord",
 								            Platform.WHATSAPP: "whatsapp",
 								            Platform.SLACK: "slack",
 								            Platform.SIGNAL: "signal",
 								            Platform.HOMEASSISTANT: "homeassistant",
 								            Platform.EMAIL: "email",
 								        }.get(source.platform, "telegram")
 								        # Use config override if present (list of toolsets), otherwise hardcoded default
 								        config_toolsets = platform_toolsets_config.get(platform_config_key)
 								        if config_toolsets and isinstance(config_toolsets, list):
 								            enabled_toolsets = config_toolsets
 								        else:
 								            default_toolset = default_toolset_map.get(source.platform, "hermes-telegram")
 								            enabled_toolsets = [default_toolset]
 								        # Tool progress mode from config.yaml: "all", "new", "verbose", "off"
 								        # Falls back to env vars for backward compatibility
 								        _progress_cfg = {}
 								        try:
 								            _tp_cfg_path = _hermes_home / "config.yaml"
 								            if _tp_cfg_path.exists():
 								                import yaml as _tp_yaml
 								                with open(_tp_cfg_path, encoding="utf-8") as _tp_f:
 								                    _tp_data = _tp_yaml.safe_load(_tp_f) or {}
 								                _progress_cfg = _tp_data.get("display", {})
 								        except Exception:
 								            pass
 								        progress_mode = (
 								            _progress_cfg.get("tool_progress")
 								            or os.getenv("HERMES_TOOL_PROGRESS_MODE")
 								            or "all"
 								        )
 								        tool_progress_enabled = progress_mode != "off"
 								        # Queue for progress messages (thread-safe)
 								        progress_queue = queue.Queue() if tool_progress_enabled else None
 								        last_tool = [None]  # Mutable container for tracking in closure
 								        last_progress_msg = [None]  # Track last message for dedup
 								        repeat_count = [0]  # How many times the same message repeated
 								        def progress_callback(tool_name: str, preview: str = None, args: dict = None):
 								            """Callback invoked by agent when a tool is called."""
 								            if not progress_queue:
 								                return
 								            # "new" mode: only report when tool changes
 								            if progress_mode == "new" and tool_name == last_tool[0]:
 								                return
 								            last_tool[0] = tool_name
 								            # Build progress message with primary argument preview
 								            tool_emojis = {
 								                "terminal": "💻",
 								                "process": "⚙️",
 								                "web_search": "🔍",
 								                "web_extract": "📄",
 								                "read_file": "📖",
 								                "write_file": "✍️",
 								                "patch": "🔧",
 								                "search": "🔎",
 								                "search_files": "🔎",
 								                "list_directory": "📂",
 								                "image_generate": "🎨",
 								                "text_to_speech": "🔊",
 								                "browser_navigate": "🌐",
 								                "browser_click": "👆",
 								                "browser_type": "⌨️",
 								                "browser_snapshot": "📸",
 								                "browser_scroll": "📜",
 								                "browser_back": "◀️",
 								                "browser_press": "⌨️",
 								                "browser_close": "🚪",
 								                "browser_get_images": "🖼️",
 								                "browser_vision": "👁️",
 								                "moa_query": "🧠",
 								                "mixture_of_agents": "🧠",
 								                "vision_analyze": "👁️",
 								                "skill_view": "📚",
 								                "skills_list": "📋",
 								                "todo": "📋",
 								                "memory": "🧠",
 								                "session_search": "🔍",
 								                "send_message": "📨",
 								                "schedule_cronjob": "⏰",
 								                "list_cronjobs": "⏰",
 								                "remove_cronjob": "⏰",
 								                "execute_code": "🐍",
 								                "delegate_task": "🔀",
 								                "clarify": "❓",
 								                "skill_manage": "📝",
 								            }
 								            emoji = tool_emojis.get(tool_name, "⚙️")
 								            # Verbose mode: show detailed arguments
 								            if progress_mode == "verbose" and args:
 								                import json as _json
 								                args_str = _json.dumps(args, ensure_ascii=False, default=str)
 								                if len(args_str) > 200:
 								                    args_str = args_str[:197] + "..."
 								                msg = f"{emoji} {tool_name}({list(args.keys())})\n{args_str}"
 								                progress_queue.put(msg)
 								                return
 								            if preview:
 								                # Truncate preview to keep messages clean
 								                if len(preview) > 80:
 								                    preview = preview[:77] + "..."
 								                msg = f"{emoji} {tool_name}: \"{preview}\""
 								            else:
 								                msg = f"{emoji} {tool_name}..."
 								            # Dedup: collapse consecutive identical progress messages.
 								            # Common with execute_code where models iterate with the same
 								            # code (same boilerplate imports → identical previews).
 								            if msg == last_progress_msg[0]:
 								                repeat_count[0] += 1
 								                # Update the last line in progress_lines with a counter
 								                # via a special "dedup" queue message.
 								                progress_queue.put(("__dedup__", msg, repeat_count[0]))
 								                return
 								            last_progress_msg[0] = msg
 								            repeat_count[0] = 0
 								            progress_queue.put(msg)
 								        # Background task to send progress messages
 								        # Accumulates tool lines into a single message that gets edited
 								        _progress_metadata = {"thread_id": source.thread_id} if source.thread_id else None
 								        async def send_progress_messages():
 								            if not progress_queue:
 								                return
 								            adapter = self.adapters.get(source.platform)
 								            if not adapter:
 								                return
 								            progress_lines = []      # Accumulated tool lines
 								            progress_msg_id = None   # ID of the progress message to edit
 								            can_edit = True          # False once an edit fails (platform doesn't support it)
 								            while True:
 								                try:
 								                    raw = progress_queue.get_nowait()
 								                    # Handle dedup messages: update last line with repeat counter
 								                    if isinstance(raw, tuple) and len(raw) == 3 and raw[0] == "__dedup__":
 								                        _, base_msg, count = raw
 								                        if progress_lines:
 								                            progress_lines[-1] = f"{base_msg} (×{count + 1})"
 								                        msg = progress_lines[-1] if progress_lines else base_msg
 								                    else:
 								                        msg = raw
 								                        progress_lines.append(msg)
 								                    if can_edit and progress_msg_id is not None:
 								                        # Try to edit the existing progress message
 								                        full_text = "\n".join(progress_lines)
 								                        result = await adapter.edit_message(
 								                            chat_id=source.chat_id,
 								                            message_id=progress_msg_id,
 								                            content=full_text,
 								                        )
 								                        if not result.success:
 								                            # Platform doesn't support editing — stop trying,
 								                            # send just this new line as a separate message
 								                            can_edit = False
 								                            await adapter.send(chat_id=source.chat_id, content=msg, metadata=_progress_metadata)
 								                    else:
 								                        if can_edit:
 								                            # First tool: send all accumulated text as new message
 								                            full_text = "\n".join(progress_lines)
 								                            result = await adapter.send(chat_id=source.chat_id, content=full_text, metadata=_progress_metadata)
 								                        else:
 								                            # Editing unsupported: send just this line
 								                            result = await adapter.send(chat_id=source.chat_id, content=msg, metadata=_progress_metadata)
 								                        if result.success and result.message_id:
 								                            progress_msg_id = result.message_id
 								                    # Restore typing indicator
 								                    await asyncio.sleep(0.3)
 								                    await adapter.send_typing(source.chat_id, metadata=_progress_metadata)
 								                except queue.Empty:
 								                    await asyncio.sleep(0.3)
 								                except asyncio.CancelledError:
 								                    # Drain remaining queued messages
 								                    while not progress_queue.empty():
 								                        try:
 								                            raw = progress_queue.get_nowait()
 								                            if isinstance(raw, tuple) and len(raw) == 3 and raw[0] == "__dedup__":
 								                                _, base_msg, count = raw
 								                                if progress_lines:
 								                                    progress_lines[-1] = f"{base_msg} (×{count + 1})"
 								                            else:
 								                                progress_lines.append(raw)
 								                        except Exception:
 								                            break
 								                    # Final edit with all remaining tools (only if editing works)
 								                    if can_edit and progress_lines and progress_msg_id:
 								                        full_text = "\n".join(progress_lines)
 								                        try:
 								                            await adapter.edit_message(
 								                                chat_id=source.chat_id,
 								                                message_id=progress_msg_id,
 								                                content=full_text,
 								                            )
 								                        except Exception:
 								                            pass
 								                    return
 								                except Exception as e:
 								                    logger.error("Progress message error: %s", e)
 								                    await asyncio.sleep(1)
 								        # We need to share the agent instance for interrupt support
 								        agent_holder = [None]  # Mutable container for the agent instance
 								        result_holder = [None]  # Mutable container for the result
 								        tools_holder = [None]   # Mutable container for the tool definitions
 								        # Bridge sync step_callback → async hooks.emit for agent:step events
 								        _loop_for_step = asyncio.get_event_loop()
 								        _hooks_ref = self.hooks
 								        def _step_callback_sync(iteration: int, tool_names: list) -> None:
 								            try:
 								                asyncio.run_coroutine_threadsafe(
 								                    _hooks_ref.emit("agent:step", {
 								                        "platform": source.platform.value if source.platform else "",
 								                        "user_id": source.user_id,
 								                        "session_id": session_id,
 								                        "iteration": iteration,
 								                        "tool_names": tool_names,
 								                    }),
 								                    _loop_for_step,
 								                )
 								            except Exception as _e:
 								                logger.debug("agent:step hook error: %s", _e)
 								        def run_sync():
 								            # Pass session_key to process registry via env var so background
 								            # processes can be mapped back to this gateway session
 								            os.environ["HERMES_SESSION_KEY"] = session_key or ""
 								            # Read from env var or use default (same as CLI)
 								            max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "90"))
 								            # Map platform enum to the platform hint key the agent understands.
 								            # Platform.LOCAL ("local") maps to "cli"; others pass through as-is.
 								            platform_key = "cli" if source.platform == Platform.LOCAL else source.platform.value
 								            # Combine platform context with user-configured ephemeral system prompt
 								            combined_ephemeral = context_prompt or ""
 								            if self._ephemeral_system_prompt:
 								                combined_ephemeral = (combined_ephemeral + "\n\n" + self._ephemeral_system_prompt).strip()
 								            # Re-read .env and config for fresh credentials (gateway is long-lived,
 								            # keys may change without restart).
 								            try:
 								                load_dotenv(_env_path, override=True, encoding="utf-8")
 								            except UnicodeDecodeError:
 								                load_dotenv(_env_path, override=True, encoding="latin-1")
 								            except Exception:
 								                pass
 								            model = _resolve_gateway_model()
 								            try:
 								                runtime_kwargs = _resolve_runtime_agent_kwargs()
 								            except Exception as exc:
 								                return {
 								                    "final_response": f"⚠️ Provider authentication failed: {exc}",
 								                    "messages": [],
 								                    "api_calls": 0,
 								                    "tools": [],
 								                }
 								            pr = self._provider_routing
 								            honcho_manager, honcho_config = self._get_or_create_gateway_honcho(session_key)
 								            agent = AIAgent(
 								                model=model,
 								                **runtime_kwargs,
 								                max_iterations=max_iterations,
 								                quiet_mode=True,
 								                verbose_logging=False,
 								                enabled_toolsets=enabled_toolsets,
 								                ephemeral_system_prompt=combined_ephemeral or None,
 								                prefill_messages=self._prefill_messages or None,
 								                reasoning_config=self._reasoning_config,
 								                providers_allowed=pr.get("only"),
 								                providers_ignored=pr.get("ignore"),
 								                providers_order=pr.get("order"),
 								                provider_sort=pr.get("sort"),
 								                provider_require_parameters=pr.get("require_parameters", False),
 								                provider_data_collection=pr.get("data_collection"),
 								                session_id=session_id,
 								                tool_progress_callback=progress_callback if tool_progress_enabled else None,
 								                step_callback=_step_callback_sync if _hooks_ref.loaded_hooks else None,
 								                platform=platform_key,
 								                honcho_session_key=session_key,
 								                honcho_manager=honcho_manager,
 								                honcho_config=honcho_config,
 								                session_db=self._session_db,
 								                fallback_model=self._fallback_model,
 								            )
 								            # Store agent reference for interrupt support
 								            agent_holder[0] = agent
 								            # Capture the full tool definitions for transcript logging
 								            tools_holder[0] = agent.tools if hasattr(agent, 'tools') else None
 								            # Convert history to agent format.
 								            # Two cases:
 								            #   1. Normal path (from transcript): simple {role, content, timestamp} dicts
 								            #      - Strip timestamps, keep role+content
 								            #   2. Interrupt path (from agent result["messages"]): full agent messages
 								            #      that may include tool_calls, tool_call_id, reasoning, etc.
 								            #      - These must be passed through intact so the API sees valid
 								            #        assistant→tool sequences (dropping tool_calls causes 500 errors)
 								            agent_history = []
 								            for msg in history:
 								                role = msg.get("role")
 								                if not role:
 								                    continue
 								                # Skip metadata entries (tool definitions, session info)
 								                # -- these are for transcript logging, not for the LLM
 								                if role in ("session_meta",):
 								                    continue
 								                # Skip system messages -- the agent rebuilds its own system prompt
 								                if role == "system":
 								                    continue
 								                # Rich agent messages (tool_calls, tool results) must be passed
 								                # through intact so the API sees valid assistant→tool sequences
 								                has_tool_calls = "tool_calls" in msg
 								                has_tool_call_id = "tool_call_id" in msg
 								                is_tool_message = role == "tool"
 								                if has_tool_calls or has_tool_call_id or is_tool_message:
 								                    clean_msg = {k: v for k, v in msg.items() if k != "timestamp"}
 								                    agent_history.append(clean_msg)
 								                else:
 								                    # Simple text message - just need role and content
 								                    content = msg.get("content")
 								                    if content:
 								                        # Tag cross-platform mirror messages so the agent knows their origin
 								                        if msg.get("mirror"):
 								                            mirror_src = msg.get("mirror_source", "another session")
 								                            content = f"[Delivered from {mirror_src}] {content}"
 								                        agent_history.append({"role": role, "content": content})
 								            # Collect MEDIA paths already in history so we can exclude them
 								            # from the current turn's extraction. This is compression-safe:
 								            # even if the message list shrinks, we know which paths are old.
 								            _history_media_paths: set = set()
 								            for _hm in agent_history:
 								                if _hm.get("role") in ("tool", "function"):
 								                    _hc = _hm.get("content", "")
 								                    if "MEDIA:" in _hc:
 								                        for _match in re.finditer(r'MEDIA:(\S+)', _hc):
 								                            _p = _match.group(1).strip().rstrip('",}')
 								                            if _p:
 								                                _history_media_paths.add(_p)
 								            result = agent.run_conversation(message, conversation_history=agent_history, task_id=session_id)
 								            result_holder[0] = result
 								            # Return final response, or a message if something went wrong
 								            final_response = result.get("final_response")
 								            # Extract last actual prompt token count from the agent's compressor
 								            _last_prompt_toks = 0
 								            _agent = agent_holder[0]
 								            if _agent and hasattr(_agent, "context_compressor"):
 								                _last_prompt_toks = getattr(_agent.context_compressor, "last_prompt_tokens", 0)
 								            if not final_response:
 								                error_msg = f"⚠️ {result['error']}" if result.get("error") else "(No response generated)"
 								                return {
 								                    "final_response": error_msg,
 								                    "messages": result.get("messages", []),
 								                    "api_calls": result.get("api_calls", 0),
 								                    "tools": tools_holder[0] or [],
 								                    "history_offset": len(agent_history),
 								                    "last_prompt_tokens": _last_prompt_toks,
 								                }
 								            # Scan tool results for MEDIA:<path> tags that need to be delivered
 								            # as native audio/file attachments.  The TTS tool embeds MEDIA: tags
 								            # in its JSON response, but the model's final text reply usually
 								            # doesn't include them.  We collect unique tags from tool results and
 								            # append any that aren't already present in the final response, so the
 								            # adapter's extract_media() can find and deliver the files exactly once.
 								            #
 								            # Uses path-based deduplication against _history_media_paths (collected
 								            # before run_conversation) instead of index slicing. This is safe even
 								            # when context compression shrinks the message list. (Fixes #160)
 								            if "MEDIA:" not in final_response:
 								                media_tags = []
 								                has_voice_directive = False
 								                for msg in result.get("messages", []):
 								                    if msg.get("role") in ("tool", "function"):
 								                        content = msg.get("content", "")
 								                        if "MEDIA:" in content:
 								                            for match in re.finditer(r'MEDIA:(\S+)', content):
 								                                path = match.group(1).strip().rstrip('",}')
 								                                if path and path not in _history_media_paths:
 								                                    media_tags.append(f"MEDIA:{path}")
 								                            if "[[audio_as_voice]]" in content:
 								                                has_voice_directive = True
 								                if media_tags:
 								                    seen = set()
 								                    unique_tags = []
 								                    for tag in media_tags:
 								                        if tag not in seen:
 								                            seen.add(tag)
 								                            unique_tags.append(tag)
 								                    if has_voice_directive:
 								                        unique_tags.insert(0, "[[audio_as_voice]]")
 								                    final_response = final_response + "\n" + "\n".join(unique_tags)
 								            # Sync session_id: the agent may have created a new session during
 								            # mid-run context compression (_compress_context splits sessions).
 								            # If so, update the session store entry so the NEXT message loads
 								            # the compressed transcript, not the stale pre-compression one.
 								            agent = agent_holder[0]
 								            if agent and session_key and hasattr(agent, 'session_id') and agent.session_id != session_id:
 								                logger.info(
 								                    "Session split detected: %s → %s (compression)",
 								                    session_id, agent.session_id,
 								                )
 								                entry = self.session_store._entries.get(session_key)
 								                if entry:
 								                    entry.session_id = agent.session_id
 								                    self.session_store._save()
 								            effective_session_id = getattr(agent, 'session_id', session_id) if agent else session_id
 								            return {
 								                "final_response": final_response,
 								                "last_reasoning": result.get("last_reasoning"),
 								                "messages": result_holder[0].get("messages", []) if result_holder[0] else [],
 								                "api_calls": result_holder[0].get("api_calls", 0) if result_holder[0] else 0,
 								                "tools": tools_holder[0] or [],
 								                "history_offset": len(agent_history),
 								                "last_prompt_tokens": _last_prompt_toks,
 								                "session_id": effective_session_id,
 								            }
 								        # Start progress message sender if enabled
 								        progress_task = None
 								        if tool_progress_enabled:
 								            progress_task = asyncio.create_task(send_progress_messages())
 								        # Track this agent as running for this session (for interrupt support)
 								        # We do this in a callback after the agent is created
 								        async def track_agent():
 								            # Wait for agent to be created
 								            while agent_holder[0] is None:
 								                await asyncio.sleep(0.05)
 								            if session_key:
 								                self._running_agents[session_key] = agent_holder[0]
 								        tracking_task = asyncio.create_task(track_agent())
 								        # Monitor for interrupts from the adapter (new messages arriving)
 								        async def monitor_for_interrupt():
 								            adapter = self.adapters.get(source.platform)
 								            if not adapter or not session_key:
 								                return
 								            while True:
 								                await asyncio.sleep(0.2)  # Check every 200ms
 								                # Check if adapter has a pending interrupt for this session.
 								                # Must use session_key (build_session_key output) — NOT
 								                # source.chat_id — because the adapter stores interrupt events
 								                # under the full session key.
 								                if hasattr(adapter, 'has_pending_interrupt') and adapter.has_pending_interrupt(session_key):
 								                    agent = agent_holder[0]
 								                    if agent:
 								                        pending_event = adapter.get_pending_message(session_key)
 								                        pending_text = pending_event.text if pending_event else None
 								                        logger.debug("Interrupt detected from adapter, signaling agent...")
 								                        agent.interrupt(pending_text)
 								                        break
 								        interrupt_monitor = asyncio.create_task(monitor_for_interrupt())
 								        try:
 								            # Run in thread pool to not block
 								            loop = asyncio.get_event_loop()
 								            response = await loop.run_in_executor(None, run_sync)
 								            # Check if we were interrupted and have a pending message
 								            result = result_holder[0]
 								            adapter = self.adapters.get(source.platform)
 								            # Get pending message from adapter if interrupted.
 								            # Use session_key (not source.chat_id) to match adapter's storage keys.
 								            pending = None
 								            if result and result.get("interrupted") and adapter:
 								                pending_event = adapter.get_pending_message(session_key) if session_key else None
 								                if pending_event:
 								                    pending = pending_event.text
 								                elif result.get("interrupt_message"):
 								                    pending = result.get("interrupt_message")
 								            if pending:
 								                logger.debug("Processing interrupted message: '%s...'", pending[:40])
 								                # Clear the adapter's interrupt event so the next _run_agent call
 								                # doesn't immediately re-trigger the interrupt before the new agent
 								                # even makes its first API call (this was causing an infinite loop).
 								                if adapter and hasattr(adapter, '_active_sessions') and session_key and session_key in adapter._active_sessions:
 								                    adapter._active_sessions[session_key].clear()
 								                # Don't send the interrupted response to the user — it's just noise
 								                # like "Operation interrupted." They already know they sent a new
 								                # message, so go straight to processing it.
 								                # Now process the pending message with updated history
 								                updated_history = result.get("messages", history)
 								                return await self._run_agent(
 								                    message=pending,
 								                    context_prompt=context_prompt,
 								                    history=updated_history,
 								                    source=source,
 								                    session_id=session_id,
 								                    session_key=session_key
 								                )
 								        finally:
 								            # Stop progress sender and interrupt monitor
 								            if progress_task:
 								                progress_task.cancel()
 								            interrupt_monitor.cancel()
 								            # Clean up tracking
 								            tracking_task.cancel()
 								            if session_key and session_key in self._running_agents:
 								                del self._running_agents[session_key]
 								            # Wait for cancelled tasks
 								            for task in [progress_task, interrupt_monitor, tracking_task]:
 								                if task:
 								                    try:
 								                        await task
 								                    except asyncio.CancelledError:
 								                        pass
 								        return response
 								def _start_cron_ticker(stop_event: threading.Event, adapters=None, interval: int = 60):
 								    """
 								    Background thread that ticks the cron scheduler at a regular interval.
 								    Runs inside the gateway process so cronjobs fire automatically without
 								    needing a separate `hermes cron daemon` or system cron entry.
 								    Also refreshes the channel directory every 5 minutes and prunes the
 								    image/audio/document cache once per hour.
 								    """
 								    from cron.scheduler import tick as cron_tick
 								    from gateway.platforms.base import cleanup_image_cache, cleanup_document_cache
 								    IMAGE_CACHE_EVERY = 60   # ticks — once per hour at default 60s interval
 								    CHANNEL_DIR_EVERY = 5    # ticks — every 5 minutes
 								    logger.info("Cron ticker started (interval=%ds)", interval)
 								    tick_count = 0
 								    while not stop_event.is_set():
 								        try:
 								            cron_tick(verbose=False)
 								        except Exception as e:
 								            logger.debug("Cron tick error: %s", e)
 								        tick_count += 1
 								        if tick_count % CHANNEL_DIR_EVERY == 0 and adapters:
 								            try:
 								                from gateway.channel_directory import build_channel_directory
 								                build_channel_directory(adapters)
 								            except Exception as e:
 								                logger.debug("Channel directory refresh error: %s", e)
 								        if tick_count % IMAGE_CACHE_EVERY == 0:
 								            try:
 								                removed = cleanup_image_cache(max_age_hours=24)
 								                if removed:
 								                    logger.info("Image cache cleanup: removed %d stale file(s)", removed)
 								            except Exception as e:
 								                logger.debug("Image cache cleanup error: %s", e)
 								            try:
 								                removed = cleanup_document_cache(max_age_hours=24)
 								                if removed:
 								                    logger.info("Document cache cleanup: removed %d stale file(s)", removed)
 								            except Exception as e:
 								                logger.debug("Document cache cleanup error: %s", e)
 								        stop_event.wait(timeout=interval)
 								    logger.info("Cron ticker stopped")
 								async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool = False) -> bool:
 								    """
 								    Start the gateway and run until interrupted.
 								    This is the main entry point for running the gateway.
 								    Returns True if the gateway ran successfully, False if it failed to start.
 								    A False return causes a non-zero exit code so systemd can auto-restart.
 								    Args:
 								        config: Optional gateway configuration override.
 								        replace: If True, kill any existing gateway instance before starting.
 								                 Useful for systemd services to avoid restart-loop deadlocks
 								                 when the previous process hasn't fully exited yet.
 								    """
 								    # ── Duplicate-instance guard ──────────────────────────────────────
 								    # Prevent two gateways from running under the same HERMES_HOME.
 								    # The PID file is scoped to HERMES_HOME, so future multi-profile
 								    # setups (each profile using a distinct HERMES_HOME) will naturally
 								    # allow concurrent instances without tripping this guard.
 								    import time as _time
 								    from gateway.status import get_running_pid, remove_pid_file
 								    existing_pid = get_running_pid()
 								    if existing_pid is not None and existing_pid != os.getpid():
 								        if replace:
 								            logger.info(
 								                "Replacing existing gateway instance (PID %d) with --replace.",
 								                existing_pid,
 								            )
 								            try:
 								                os.kill(existing_pid, signal.SIGTERM)
 								            except ProcessLookupError:
 								                pass  # Already gone
 								            except PermissionError:
 								                logger.error(
 								                    "Permission denied killing PID %d. Cannot replace.",
 								                    existing_pid,
 								                )
 								                return False
 								            # Wait up to 10 seconds for the old process to exit
 								            for _ in range(20):
 								                try:
 								                    os.kill(existing_pid, 0)
 								                    _time.sleep(0.5)
 								                except (ProcessLookupError, PermissionError):
 								                    break  # Process is gone
 								            else:
 								                # Still alive after 10s — force kill
 								                logger.warning(
 								                    "Old gateway (PID %d) did not exit after SIGTERM, sending SIGKILL.",
 								                    existing_pid,
 								                )
 								                try:
 								                    os.kill(existing_pid, signal.SIGKILL)
 								                    _time.sleep(0.5)
 								                except (ProcessLookupError, PermissionError):
 								                    pass
 								            remove_pid_file()
 								        else:
 								            hermes_home = os.getenv("HERMES_HOME", "~/.hermes")
 								            logger.error(
 								                "Another gateway instance is already running (PID %d, HERMES_HOME=%s). "
 								                "Use 'hermes gateway restart' to replace it, or 'hermes gateway stop' first.",
 								                existing_pid, hermes_home,
 								            )
 								            print(
 								                f"\n❌ Gateway already running (PID {existing_pid}).\n"
 								                f"   Use 'hermes gateway restart' to replace it,\n"
 								                f"   or 'hermes gateway stop' to kill it first.\n"
 								                f"   Or use 'hermes gateway run --replace' to auto-replace.\n"
 								            )
 								            return False
 								    # Sync bundled skills on gateway start (fast -- skips unchanged)
 								    try:
 								        from tools.skills_sync import sync_skills
 								        sync_skills(quiet=True)
 								    except Exception:
 								        pass
 								    # Configure rotating file log so gateway output is persisted for debugging
 								    log_dir = _hermes_home / 'logs'
 								    log_dir.mkdir(parents=True, exist_ok=True)
 								    file_handler = RotatingFileHandler(
 								        log_dir / 'gateway.log',
 								        maxBytes=5 * 1024 * 1024,
 								        backupCount=3,
 								    )
 								    from agent.redact import RedactingFormatter
 								    file_handler.setFormatter(RedactingFormatter('%(asctime)s %(levelname)s %(name)s: %(message)s'))
 								    logging.getLogger().addHandler(file_handler)
 								    logging.getLogger().setLevel(logging.INFO)
 								    # Separate errors-only log for easy debugging
 								    error_handler = RotatingFileHandler(
 								        log_dir / 'errors.log',
 								        maxBytes=2 * 1024 * 1024,
 								        backupCount=2,
 								    )
 								    error_handler.setLevel(logging.WARNING)
 								    error_handler.setFormatter(RedactingFormatter('%(asctime)s %(levelname)s %(name)s: %(message)s'))
 								    logging.getLogger().addHandler(error_handler)
 								    runner = GatewayRunner(config)
 								    # Set up signal handlers
 								    def signal_handler():
 								        asyncio.create_task(runner.stop())
 								    loop = asyncio.get_event_loop()
 								    for sig in (signal.SIGINT, signal.SIGTERM):
 								        try:
 								            loop.add_signal_handler(sig, signal_handler)
 								        except NotImplementedError:
 								            pass
 								    # Start the gateway
 								    success = await runner.start()
 								    if not success:
 								        return False
 								    # Write PID file so CLI can detect gateway is running
 								    import atexit
 								    from gateway.status import write_pid_file, remove_pid_file
 								    write_pid_file()
 								    atexit.register(remove_pid_file)
 								    # Start background cron ticker so scheduled jobs fire automatically
 								    cron_stop = threading.Event()
 								    cron_thread = threading.Thread(
 								        target=_start_cron_ticker,
 								        args=(cron_stop,),
 								        kwargs={"adapters": runner.adapters},
 								        daemon=True,
 								        name="cron-ticker",
 								    )
 								    cron_thread.start()
 								    # Wait for shutdown
 								    await runner.wait_for_shutdown()
 								    # Stop cron ticker cleanly
 								    cron_stop.set()
 								    cron_thread.join(timeout=5)
 								    # Close MCP server connections
 								    try:
 								        from tools.mcp_tool import shutdown_mcp_servers
 								        shutdown_mcp_servers()
 								    except Exception:
 								        pass
 								    return True
 								def main():
 								    """CLI entry point for the gateway."""
 								    import argparse
 								    parser = argparse.ArgumentParser(description="Hermes Gateway - Multi-platform messaging")
 								    parser.add_argument("--config", "-c", help="Path to gateway config file")
 								    parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output")
 								    args = parser.parse_args()
 								    config = None
 								    if args.config:
 								        import json
 								        with open(args.config, encoding="utf-8") as f:
 								            data = json.load(f)
 								            config = GatewayConfig.from_dict(data)
 								    # Run the gateway - exit with code 1 if no platforms connected,
 								    # so systemd Restart=on-failure will retry on transient errors (e.g. DNS)
 								    success = asyncio.run(start_gateway(config))
 								    if not success:
 								        sys.exit(1)
 								if __name__ == "__main__":
 								    main()